From 703286608a220d53584cca5986aad5305eec75ed Mon Sep 17 00:00:00 2001 From: Eric W. Biederman Date: Mon, 8 Aug 2016 14:33:23 -0500 Subject: netns: Add a limit on the number of net namespaces Acked-by: Kees Cook Signed-off-by: "Eric W. Biederman" --- include/net/net_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 0933c7455a30..fc4f757107df 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -60,6 +60,7 @@ struct net { struct list_head exit_list; /* Use only net_mutex */ struct user_namespace *user_ns; /* Owning user namespace */ + struct ucounts *ucounts; spinlock_t nsid_lock; struct idr netns_ids; -- cgit v1.2.3 From 631fee7d70e8eabb642b4bcc58f08bbe880c91aa Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 9 Aug 2016 06:51:06 -0700 Subject: net: Remove fib_local variable After commit 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") fib_local is set but not used. Remove it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 - net/ipv4/fib_frontend.c | 7 ------- 2 files changed, 8 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d061ffeb1e71..7adf4386ac8f 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,7 +40,6 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; - struct fib_table __rcu *fib_local; struct fib_table __rcu *fib_main; struct fib_table __rcu *fib_default; #endif diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ef2ebeb89d0f..317c31939732 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -93,9 +93,6 @@ struct fib_table *fib_new_table(struct net *net, u32 id) return NULL; switch (id) { - case RT_TABLE_LOCAL: - rcu_assign_pointer(net->ipv4.fib_local, tb); - break; case RT_TABLE_MAIN: rcu_assign_pointer(net->ipv4.fib_main, tb); break; @@ -137,9 +134,6 @@ static void fib_replace_table(struct net *net, struct fib_table *old, { #ifdef CONFIG_IP_MULTIPLE_TABLES switch (new->tb_id) { - case RT_TABLE_LOCAL: - rcu_assign_pointer(net->ipv4.fib_local, new); - break; case RT_TABLE_MAIN: rcu_assign_pointer(net->ipv4.fib_main, new); break; @@ -1249,7 +1243,6 @@ static void ip_fib_net_exit(struct net *net) rtnl_lock(); #ifdef CONFIG_IP_MULTIPLE_TABLES - RCU_INIT_POINTER(net->ipv4.fib_local, NULL); RCU_INIT_POINTER(net->ipv4.fib_main, NULL); RCU_INIT_POINTER(net->ipv4.fib_default, NULL); #endif -- cgit v1.2.3 From e45a8a9e60ff1dd5ad118c794337a1101b46ab0d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 9 Aug 2016 18:27:08 +0200 Subject: xfrm: constify xfrm_replay structures The xfrm_replay structures are never modified, so declare them as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_replay.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index adfebd6f243c..d2fdd6d70959 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -187,7 +187,7 @@ struct xfrm_state { struct xfrm_replay_state_esn *preplay_esn; /* The functions for replay detection. */ - struct xfrm_replay *repl; + const struct xfrm_replay *repl; /* internal flag that only holds state for delayed aevent at the * moment diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 4fd725a0c500..cdc2e2e71bff 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -558,7 +558,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) x->repl->notify(x, XFRM_REPLAY_UPDATE); } -static struct xfrm_replay xfrm_replay_legacy = { +static const struct xfrm_replay xfrm_replay_legacy = { .advance = xfrm_replay_advance, .check = xfrm_replay_check, .recheck = xfrm_replay_check, @@ -566,7 +566,7 @@ static struct xfrm_replay xfrm_replay_legacy = { .overflow = xfrm_replay_overflow, }; -static struct xfrm_replay xfrm_replay_bmp = { +static const struct xfrm_replay xfrm_replay_bmp = { .advance = xfrm_replay_advance_bmp, .check = xfrm_replay_check_bmp, .recheck = xfrm_replay_check_bmp, @@ -574,7 +574,7 @@ static struct xfrm_replay xfrm_replay_bmp = { .overflow = xfrm_replay_overflow_bmp, }; -static struct xfrm_replay xfrm_replay_esn = { +static const struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, .recheck = xfrm_replay_recheck_esn, -- cgit v1.2.3 From d737a5805581c6f99dad4caa9fdf80965d617d1a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 9 Aug 2016 12:16:09 +0200 Subject: xfrm: state: don't use lock anymore unless acquire operation is needed push the lock down, after earlier patches we can rely on rcu to make sure state struct won't go away. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 6 +++--- net/xfrm/xfrm_state.c | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 24cd3949a9a4..1ab51d188408 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -38,9 +38,9 @@ struct netns_xfrm { * mode. Also, it can be used by ah/esp icmp error handler to find * offending SA. */ - struct hlist_head *state_bydst; - struct hlist_head *state_bysrc; - struct hlist_head *state_byspi; + struct hlist_head __rcu *state_bydst; + struct hlist_head __rcu *state_bysrc; + struct hlist_head __rcu *state_byspi; unsigned int state_hmask; unsigned int state_num; struct work_struct state_hash_work; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 53e7867f9254..1a15b658a79e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -799,7 +799,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, sequence = read_seqcount_begin(&xfrm_state_hash_generation); - spin_lock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_lock(); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { if (x->props.family == encap_family && @@ -870,6 +870,7 @@ found: } if (km_query(x, tmpl, pol) == 0) { + spin_lock_bh(&net->xfrm.xfrm_state_lock); x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); @@ -883,6 +884,7 @@ found: tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } else { x->km.state = XFRM_STATE_DEAD; to_put = x; @@ -899,7 +901,7 @@ out: } else { *err = acquire_in_progress ? -EAGAIN : error; } - spin_unlock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_unlock(); if (to_put) xfrm_state_put(to_put); -- cgit v1.2.3 From 59cc1f61f09c26ce82c308e24b76141e1efe99f8 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 10 Aug 2016 11:05:15 +0200 Subject: net: sched: convert qdisc linked list to hashtable Convert the per-device linked list into a hashtable. The primary motivation for this change is that currently, we're not tracking all the qdiscs in hierarchy (e.g. excluding default qdiscs), as the lookup performed over the linked list by qdisc_match_from_root() is rather expensive. The ultimate goal is to get rid of hidden qdiscs completely, which will bring much more determinism in user experience. Reviewed-by: Cong Wang Signed-off-by: Jiri Kosina Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ include/net/pkt_sched.h | 4 ++-- include/net/sch_generic.h | 2 +- net/core/dev.c | 3 +++ net/sched/sch_api.c | 23 +++++++++++++---------- net/sched/sch_generic.c | 8 +++++--- net/sched/sch_mq.c | 2 +- net/sched/sch_mqprio.c | 2 +- 8 files changed, 30 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 076df5360ba5..96e0b6cd964e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -52,6 +52,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -1800,6 +1801,9 @@ struct net_device { unsigned int num_tx_queues; unsigned int real_num_tx_queues; struct Qdisc *qdisc; +#ifdef CONFIG_NET_SCHED + DECLARE_HASHTABLE (qdisc_hash, 4); +#endif unsigned long tx_queue_len; spinlock_t tx_global_lock; int watchdog_timeo; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 7caa99b482c6..cd334c9584e9 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -90,8 +90,8 @@ int unregister_qdisc(struct Qdisc_ops *qops); void qdisc_get_default(char *id, size_t len); int qdisc_set_default(const char *id); -void qdisc_list_add(struct Qdisc *q); -void qdisc_list_del(struct Qdisc *q); +void qdisc_hash_add(struct Qdisc *q); +void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 909aff2db2b3..0d501779cc68 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -61,7 +61,7 @@ struct Qdisc { u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; - struct list_head list; + struct hlist_node hash; u32 handle; u32 parent; void *u32_node; diff --git a/net/core/dev.c b/net/core/dev.c index 4ce07dc25573..936ea0054f57 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7629,6 +7629,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->all_adj_list.lower); INIT_LIST_HEAD(&dev->ptype_all); INIT_LIST_HEAD(&dev->ptype_specific); +#ifdef CONFIG_NET_SCHED + hash_init(dev->qdisc_hash); +#endif dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 12ebde845523..25aada7b095c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -263,33 +264,33 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) root->handle == handle) return root; - list_for_each_entry_rcu(q, &root->list, list) { + hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) { if (q->handle == handle) return q; } return NULL; } -void qdisc_list_add(struct Qdisc *q) +void qdisc_hash_add(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { struct Qdisc *root = qdisc_dev(q)->qdisc; WARN_ON_ONCE(root == &noop_qdisc); ASSERT_RTNL(); - list_add_tail_rcu(&q->list, &root->list); + hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); } } -EXPORT_SYMBOL(qdisc_list_add); +EXPORT_SYMBOL(qdisc_hash_add); -void qdisc_list_del(struct Qdisc *q) +void qdisc_hash_del(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { ASSERT_RTNL(); - list_del_rcu(&q->list); + hash_del_rcu(&q->hash); } } -EXPORT_SYMBOL(qdisc_list_del); +EXPORT_SYMBOL(qdisc_hash_del); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { @@ -998,7 +999,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out4; } - qdisc_list_add(sch); + qdisc_hash_add(sch); return sch; } @@ -1435,6 +1436,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, { int ret = 0, q_idx = *q_idx_p; struct Qdisc *q; + int b; if (!root) return 0; @@ -1449,7 +1451,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, goto done; q_idx++; } - list_for_each_entry(q, &root->list, list) { + hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (q_idx < s_q_idx) { q_idx++; continue; @@ -1765,6 +1767,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, int *t_p, int s_t) { struct Qdisc *q; + int b; if (!root) return 0; @@ -1772,7 +1775,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) return -1; - list_for_each_entry(q, &root->list, list) { + hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e95b67cd5718..18faecc3f13e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -423,7 +423,6 @@ struct Qdisc noop_qdisc = { .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, - .list = LIST_HEAD_INIT(noop_qdisc.list), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, .running = SEQCNT_ZERO(noop_qdisc.running), @@ -613,7 +612,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); sch->padded = (char *) sch - (char *) p; } - INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); spin_lock_init(&sch->busylock); @@ -700,7 +698,7 @@ void qdisc_destroy(struct Qdisc *qdisc) return; #ifdef CONFIG_NET_SCHED - qdisc_list_del(qdisc); + qdisc_hash_del(qdisc); qdisc_put_stab(rtnl_dereference(qdisc->stab)); #endif @@ -788,6 +786,10 @@ static void attach_default_qdiscs(struct net_device *dev) qdisc->ops->attach(qdisc); } } +#ifdef CONFIG_NET_SCHED + if (dev->qdisc) + qdisc_hash_add(dev->qdisc); +#endif } static void transition_one_qdisc(struct net_device *dev, diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index b9439827c172..2bc8d7f8df16 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -88,7 +88,7 @@ static void mq_attach(struct Qdisc *sch) qdisc_destroy(old); #ifdef CONFIG_NET_SCHED if (ntx < dev->real_num_tx_queues) - qdisc_list_add(qdisc); + qdisc_hash_add(qdisc); #endif } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 549c66359924..b5c502c78143 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -182,7 +182,7 @@ static void mqprio_attach(struct Qdisc *sch) if (old) qdisc_destroy(old); if (ntx < dev->real_num_tx_queues) - qdisc_list_add(qdisc); + qdisc_hash_add(qdisc); } kfree(priv->qdiscs); priv->qdiscs = NULL; -- cgit v1.2.3 From ab10dccb11608b96b43b557c12a5ad867723e503 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 9 Aug 2016 12:38:24 +0800 Subject: rps: Inspect PPTP encapsulated by GRE to get flow hash The PPTP is encapsulated by GRE header with that GRE_VERSION bits must contain one. But current GRE RPS needs the GRE_VERSION must be zero. So RPS does not work for PPTP traffic. In my test environment, there are four MIPS cores, and all traffic are passed through by PPTP. As a result, only one core is 100% busy while other three cores are very idle. After this patch, the usage of four cores are balanced well. Signed-off-by: Gao Feng Reviewed-by: Philip Prindeville Signed-off-by: David S. Miller --- drivers/net/ppp/pptp.c | 36 +------------ include/net/gre.h | 10 +++- include/net/pptp.h | 40 +++++++++++++++ include/uapi/linux/if_tunnel.h | 7 ++- net/core/flow_dissector.c | 113 ++++++++++++++++++++++++++++------------- 5 files changed, 135 insertions(+), 71 deletions(-) create mode 100644 include/net/pptp.h (limited to 'include/net') diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index ae0905ed4a32..3e68dbc0af7f 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -37,6 +37,7 @@ #include #include #include +#include #include @@ -53,41 +54,6 @@ static struct proto pptp_sk_proto __read_mostly; static const struct ppp_channel_ops pptp_chan_ops; static const struct proto_ops pptp_ops; -#define PPP_LCP_ECHOREQ 0x09 -#define PPP_LCP_ECHOREP 0x0A -#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) - -#define MISSING_WINDOW 20 -#define WRAPPED(curseq, lastseq)\ - ((((curseq) & 0xffffff00) == 0) &&\ - (((lastseq) & 0xffffff00) == 0xffffff00)) - -#define PPTP_GRE_PROTO 0x880B -#define PPTP_GRE_VER 0x1 - -#define PPTP_GRE_FLAG_C 0x80 -#define PPTP_GRE_FLAG_R 0x40 -#define PPTP_GRE_FLAG_K 0x20 -#define PPTP_GRE_FLAG_S 0x10 -#define PPTP_GRE_FLAG_A 0x80 - -#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C) -#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R) -#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K) -#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S) -#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A) - -#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header)) -struct pptp_gre_header { - u8 flags; - u8 ver; - __be16 protocol; - __be16 payload_len; - __be16 call_id; - __be32 seq; - __be32 ack; -} __packed; - static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr) { struct pppox_sock *sock; diff --git a/include/net/gre.h b/include/net/gre.h index 7a54a31d1d4c..8962e1e68449 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -7,7 +7,15 @@ struct gre_base_hdr { __be16 flags; __be16 protocol; -}; +} __packed; + +struct gre_full_hdr { + struct gre_base_hdr fixed_header; + __be16 csum; + __be16 reserved1; + __be32 key; + __be32 seq; +} __packed; #define GRE_HEADER_SECTION 4 #define GREPROTO_CISCO 0 diff --git a/include/net/pptp.h b/include/net/pptp.h new file mode 100644 index 000000000000..301d3e2ba1f9 --- /dev/null +++ b/include/net/pptp.h @@ -0,0 +1,40 @@ +#ifndef _NET_PPTP_H +#define _NET_PPTP_H + +#define PPP_LCP_ECHOREQ 0x09 +#define PPP_LCP_ECHOREP 0x0A +#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) + +#define MISSING_WINDOW 20 +#define WRAPPED(curseq, lastseq)\ + ((((curseq) & 0xffffff00) == 0) &&\ + (((lastseq) & 0xffffff00) == 0xffffff00)) + +#define PPTP_GRE_PROTO 0x880B +#define PPTP_GRE_VER 0x1 + +#define PPTP_GRE_FLAG_C 0x80 +#define PPTP_GRE_FLAG_R 0x40 +#define PPTP_GRE_FLAG_K 0x20 +#define PPTP_GRE_FLAG_S 0x10 +#define PPTP_GRE_FLAG_A 0x80 + +#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C) +#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R) +#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K) +#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S) +#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A) + +#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header)) +struct pptp_gre_header { + u8 flags; + u8 ver; + __be16 protocol; + __be16 payload_len; + __be16 call_id; + __be32 seq; + __be32 ack; +} __packed; + + +#endif diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 1046f5515174..60dbb200de60 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -24,9 +24,14 @@ #define GRE_SEQ __cpu_to_be16(0x1000) #define GRE_STRICT __cpu_to_be16(0x0800) #define GRE_REC __cpu_to_be16(0x0700) -#define GRE_FLAGS __cpu_to_be16(0x00F8) +#define GRE_ACK __cpu_to_be16(0x0080) +#define GRE_FLAGS __cpu_to_be16(0x0078) #define GRE_VERSION __cpu_to_be16(0x0007) +#define GRE_VERSION_1 __cpu_to_be16(0x0001) +#define GRE_PROTO_PPP __cpu_to_be16(0x880b) +#define GRE_PPTP_KEY_MASK __cpu_to_be32(0xffff) + struct ip_tunnel_parm { char name[IFNAMSIZ]; int link; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 61ad43f61c5e..91028ae2fb01 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -338,32 +340,42 @@ mpls: ip_proto_again: switch (ip_proto) { case IPPROTO_GRE: { - struct gre_hdr { - __be16 flags; - __be16 proto; - } *hdr, _hdr; + struct gre_base_hdr *hdr, _hdr; + u16 gre_ver; + int offset = 0; hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) goto out_bad; - /* - * Only look inside GRE if version zero and no - * routing - */ - if (hdr->flags & (GRE_VERSION | GRE_ROUTING)) + + /* Only look inside GRE without routing */ + if (hdr->flags & GRE_ROUTING) break; - proto = hdr->proto; - nhoff += 4; + /* Only look inside GRE for version 0 and 1 */ + gre_ver = ntohs(hdr->flags & GRE_VERSION); + if (gre_ver > 1) + break; + + proto = hdr->protocol; + if (gre_ver) { + /* Version1 must be PPTP, and check the flags */ + if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) + break; + } + + offset += sizeof(struct gre_base_hdr); + if (hdr->flags & GRE_CSUM) - nhoff += 4; + offset += sizeof(((struct gre_full_hdr *)0)->csum) + + sizeof(((struct gre_full_hdr *)0)->reserved1); + if (hdr->flags & GRE_KEY) { const __be32 *keyid; __be32 _keyid; - keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid), + keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid), data, hlen, &_keyid); - if (!keyid) goto out_bad; @@ -372,32 +384,65 @@ ip_proto_again: key_keyid = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_GRE_KEYID, target_container); - key_keyid->keyid = *keyid; + if (gre_ver == 0) + key_keyid->keyid = *keyid; + else + key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; } - nhoff += 4; + offset += sizeof(((struct gre_full_hdr *)0)->key); } + if (hdr->flags & GRE_SEQ) - nhoff += 4; - if (proto == htons(ETH_P_TEB)) { - const struct ethhdr *eth; - struct ethhdr _eth; - - eth = __skb_header_pointer(skb, nhoff, - sizeof(_eth), - data, hlen, &_eth); - if (!eth) + offset += sizeof(((struct pptp_gre_header *)0)->seq); + + if (gre_ver == 0) { + if (proto == htons(ETH_P_TEB)) { + const struct ethhdr *eth; + struct ethhdr _eth; + + eth = __skb_header_pointer(skb, nhoff + offset, + sizeof(_eth), + data, hlen, &_eth); + if (!eth) + goto out_bad; + proto = eth->h_proto; + offset += sizeof(*eth); + + /* Cap headers that we access via pointers at the + * end of the Ethernet header as our maximum alignment + * at that point is only 2 bytes. + */ + if (NET_IP_ALIGN) + hlen = (nhoff + offset); + } + } else { /* version 1, must be PPTP */ + u8 _ppp_hdr[PPP_HDRLEN]; + u8 *ppp_hdr; + + if (hdr->flags & GRE_ACK) + offset += sizeof(((struct pptp_gre_header *)0)->ack); + + ppp_hdr = skb_header_pointer(skb, nhoff + offset, + sizeof(_ppp_hdr), _ppp_hdr); + if (!ppp_hdr) goto out_bad; - proto = eth->h_proto; - nhoff += sizeof(*eth); - - /* Cap headers that we access via pointers at the - * end of the Ethernet header as our maximum alignment - * at that point is only 2 bytes. - */ - if (NET_IP_ALIGN) - hlen = nhoff; + + switch (PPP_PROTOCOL(ppp_hdr)) { + case PPP_IP: + proto = htons(ETH_P_IP); + break; + case PPP_IPV6: + proto = htons(ETH_P_IPV6); + break; + default: + /* Could probably catch some more like MPLS */ + break; + } + + offset += PPP_HDRLEN; } + nhoff += offset; key_control->flags |= FLOW_DIS_ENCAPSULATION; if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) goto out_good; -- cgit v1.2.3 From a7c44247f704e385c77579d65c6ee6d002832529 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:56 +0200 Subject: xfrm: policy: make xfrm_policy_lookup_bytype lockless side effect: no longer disables BH (should be fine). Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 2 +- net/xfrm/xfrm_policy.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 1ab51d188408..3ab828a97e68 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -11,7 +11,7 @@ struct ctl_table_header; struct xfrm_policy_hash { - struct hlist_head *table; + struct hlist_head __rcu *table; unsigned int hmask; u8 dbits4; u8 sbits4; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 09f2e2b38246..9302647f20a0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1123,7 +1123,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (unlikely(!daddr || !saddr)) return NULL; - read_lock_bh(&net->xfrm.xfrm_policy_lock); + rcu_read_lock(); retry: do { sequence = read_seqcount_begin(&xfrm_policy_hash_generation); @@ -1172,7 +1172,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (ret && !xfrm_pol_hold_rcu(ret)) goto retry; fail: - read_unlock_bh(&net->xfrm.xfrm_policy_lock); + rcu_read_unlock(); return ret; } -- cgit v1.2.3 From 9d0380df6217e8dd014118fa1c99dda9974f3613 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 11 Aug 2016 15:17:59 +0200 Subject: xfrm: policy: convert policy_lock to spinlock After earlier patches conversions all spots acquire the writer lock and we can now convert this to a normal spinlock. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 2 +- net/xfrm/xfrm_policy.c | 68 ++++++++++++++++++++++++------------------------ 2 files changed, 35 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 3ab828a97e68..177ed444d7b2 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -73,7 +73,7 @@ struct netns_xfrm { struct dst_ops xfrm6_dst_ops; #endif spinlock_t xfrm_state_lock; - rwlock_t xfrm_policy_lock; + spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; /* flow cache part */ diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 35b85a9a358c..dd01fd2e55fa 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -484,7 +484,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) if (!ndst) return; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); write_seqcount_begin(&xfrm_policy_hash_generation); odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, @@ -500,7 +500,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) net->xfrm.policy_bydst[dir].hmask = nhashmask; write_seqcount_end(&xfrm_policy_hash_generation); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); synchronize_rcu(); @@ -519,7 +519,7 @@ static void xfrm_byidx_resize(struct net *net, int total) if (!nidx) return; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); @@ -527,7 +527,7 @@ static void xfrm_byidx_resize(struct net *net, int total) net->xfrm.policy_byidx = nidx; net->xfrm.policy_idx_hmask = nhashmask; - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } @@ -617,7 +617,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) rbits6 = net->xfrm.policy_hthresh.rbits6; } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); /* reset the bydst and inexact table in all directions */ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { @@ -659,7 +659,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) hlist_add_head(&policy->bydst, chain); } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); } @@ -770,7 +770,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct hlist_head *chain; struct hlist_node *newpos; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; @@ -781,7 +781,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) { - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return -EEXIST; } delpol = pol; @@ -817,7 +817,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); @@ -837,7 +837,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, struct hlist_head *chain; *err = 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; hlist_for_each_entry(pol, chain, bydst) { @@ -850,7 +850,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -859,7 +859,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, break; } } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -878,7 +878,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, return NULL; *err = 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, chain, byidx) { @@ -889,7 +889,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -898,7 +898,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, break; } } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -956,7 +956,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) { int dir, err = 0, cnt = 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); err = xfrm_policy_flush_secctx_check(net, type, task_valid); if (err) @@ -972,14 +972,14 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); goto again1; } @@ -991,13 +991,13 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); goto again2; } } @@ -1006,7 +1006,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) if (!cnt) err = -ESRCH; out: - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; } EXPORT_SYMBOL(xfrm_policy_flush); @@ -1026,7 +1026,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); if (list_empty(&walk->walk.all)) x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); else @@ -1054,7 +1054,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, } list_del_init(&walk->walk.all); out: - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); @@ -1073,9 +1073,9 @@ void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) if (list_empty(&walk->walk.all)) return; - write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ + spin_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ list_del(&walk->walk.all); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_policy_walk_done); @@ -1321,9 +1321,9 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); pol = __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (pol) { xfrm_policy_kill(pol); return 0; @@ -1342,7 +1342,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) return -EINVAL; #endif - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); old_pol = rcu_dereference_protected(sk->sk_policy[dir], lockdep_is_held(&net->xfrm.xfrm_policy_lock)); if (pol) { @@ -1360,7 +1360,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) */ xfrm_sk_policy_unlink(old_pol, dir); } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (old_pol) { xfrm_policy_kill(old_pol); @@ -1390,9 +1390,9 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); xfrm_sk_policy_link(newp, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_pol_put(newp); } return newp; @@ -3074,7 +3074,7 @@ static int __net_init xfrm_net_init(struct net *net) /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); - rwlock_init(&net->xfrm.xfrm_policy_lock); + spin_lock_init(&net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); return 0; @@ -3206,7 +3206,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * struct hlist_head *chain; u32 priority = ~0U; - read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && @@ -3230,7 +3230,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * xfrm_pol_hold(ret); - read_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return ret; } -- cgit v1.2.3 From adf0516845bcd0e626323c858ece28ee58c74455 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 12 Aug 2016 13:47:06 +0200 Subject: netfilter: remove ip_conntrack* sysctl compat code This backward compatibility has been around for more than ten years, since Yasuyuki Kozakai introduced IPv6 in conntrack. These days, we have alternate /proc/net/nf_conntrack* entries, the ctnetlink interface and the conntrack utility got adopted by many people in the user community according to what I observed on the netfilter user mailing list. So let's get rid of this. Note that nf_conntrack_htable_size and unsigned int nf_conntrack_max do not need to be exported as symbol anymore. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 8 - include/net/netns/conntrack.h | 8 - net/ipv4/netfilter/Kconfig | 11 - net/ipv4/netfilter/Makefile | 5 - net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 70 --- .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 491 --------------------- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 39 +- net/netfilter/nf_conntrack_core.c | 3 - net/netfilter/nf_conntrack_proto.c | 81 +--- net/netfilter/nf_conntrack_proto_generic.c | 39 +- net/netfilter/nf_conntrack_proto_sctp.c | 85 +--- net/netfilter/nf_conntrack_proto_tcp.c | 127 +----- net/netfilter/nf_conntrack_proto_udp.c | 49 +- 13 files changed, 7 insertions(+), 1009 deletions(-) delete mode 100644 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 1a5fb36f165f..de629f1520df 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -134,14 +134,6 @@ void nf_ct_l4proto_pernet_unregister(struct net *net, int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto); void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto); -static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn) -{ -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - kfree(pn->ctl_compat_table); - pn->ctl_compat_table = NULL; -#endif -} - /* Generic netlink helpers */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 38b1a80517f0..e469e85de3f9 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -15,10 +15,6 @@ struct nf_proto_net { #ifdef CONFIG_SYSCTL struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - struct ctl_table_header *ctl_compat_header; - struct ctl_table *ctl_compat_table; -#endif #endif unsigned int users; }; @@ -58,10 +54,6 @@ struct nf_ip_net { struct nf_udp_net udp; struct nf_icmp_net icmp; struct nf_icmp_net icmpv6; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - struct ctl_table_header *ctl_table_header; - struct ctl_table *ctl_table; -#endif }; struct ct_pcpu { diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index c187c60e3e0c..d613309e3e5d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -25,17 +25,6 @@ config NF_CONNTRACK_IPV4 To compile it as a module, choose M here. If unsure, say N. -config NF_CONNTRACK_PROC_COMPAT - bool "proc/sysctl compatibility with old connection tracking" - depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4 - default y - help - This option enables /proc and sysctl compatibility with the old - layer 3 dependent connection tracking. This is needed to keep - old programs that have not been adapted to the new names working. - - If unsure, say Y. - if NF_TABLES config NF_TABLES_IPV4 diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 87b073da14c9..853328f8fd05 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -4,11 +4,6 @@ # objects for l3 independent conntrack nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o -ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) -ifeq ($(CONFIG_PROC_FS),y) -nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o -endif -endif # connection tracking obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index ae1a71a97132..870aebda2932 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -202,47 +202,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { }, }; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) -static int log_invalid_proto_min = 0; -static int log_invalid_proto_max = 255; - -static struct ctl_table ip_ct_sysctl_table[] = { - { - .procname = "ip_conntrack_max", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_count", - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_buckets", - .maxlen = sizeof(unsigned int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_checksum", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_log_invalid", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &log_invalid_proto_min, - .extra2 = &log_invalid_proto_max, - }, - { } -}; -#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */ - /* Fast function for those who don't want to parse /proc (and I don't blame them). */ /* Reversing the socket's dst/src point of view gives us the reply @@ -350,20 +309,6 @@ static struct nf_sockopt_ops so_getorigdst = { static int ipv4_init_net(struct net *net) { -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - struct nf_ip_net *in = &net->ct.nf_ct_proto; - in->ctl_table = kmemdup(ip_ct_sysctl_table, - sizeof(ip_ct_sysctl_table), - GFP_KERNEL); - if (!in->ctl_table) - return -ENOMEM; - - in->ctl_table[0].data = &nf_conntrack_max; - in->ctl_table[1].data = &net->ct.count; - in->ctl_table[2].data = &nf_conntrack_htable_size; - in->ctl_table[3].data = &net->ct.sysctl_checksum; - in->ctl_table[4].data = &net->ct.sysctl_log_invalid; -#endif return 0; } @@ -379,9 +324,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .nlattr_tuple_size = ipv4_nlattr_tuple_size, .nlattr_to_tuple = ipv4_nlattr_to_tuple, .nla_policy = ipv4_nla_policy, -#endif -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - .ctl_table_path = "net/ipv4/netfilter", #endif .init_net = ipv4_init_net, .me = THIS_MODULE, @@ -492,16 +434,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) goto cleanup_icmpv4; } -#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - ret = nf_conntrack_ipv4_compat_init(); - if (ret < 0) - goto cleanup_proto; -#endif return ret; -#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - cleanup_proto: - nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); -#endif cleanup_icmpv4: nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); cleanup_udp4: @@ -520,9 +453,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) static void __exit nf_conntrack_l3proto_ipv4_fini(void) { synchronize_net(); -#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - nf_conntrack_ipv4_compat_fini(); -#endif nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c deleted file mode 100644 index 67bfc69e00bc..000000000000 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ /dev/null @@ -1,491 +0,0 @@ -/* ip_conntrack proc compat - based on ip_conntrack_standalone.c - * - * (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * (C) 2006-2010 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -struct ct_iter_state { - struct seq_net_private p; - struct hlist_nulls_head *hash; - unsigned int htable_size; - unsigned int bucket; -}; - -static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) -{ - struct ct_iter_state *st = seq->private; - struct hlist_nulls_node *n; - - for (st->bucket = 0; - st->bucket < st->htable_size; - st->bucket++) { - n = rcu_dereference( - hlist_nulls_first_rcu(&st->hash[st->bucket])); - if (!is_a_nulls(n)) - return n; - } - return NULL; -} - -static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, - struct hlist_nulls_node *head) -{ - struct ct_iter_state *st = seq->private; - - head = rcu_dereference(hlist_nulls_next_rcu(head)); - while (is_a_nulls(head)) { - if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= st->htable_size) - return NULL; - } - head = rcu_dereference( - hlist_nulls_first_rcu(&st->hash[st->bucket])); - } - return head; -} - -static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) -{ - struct hlist_nulls_node *head = ct_get_first(seq); - - if (head) - while (pos && (head = ct_get_next(seq, head))) - pos--; - return pos ? NULL : head; -} - -static void *ct_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - struct ct_iter_state *st = seq->private; - - rcu_read_lock(); - - nf_conntrack_get_ht(&st->hash, &st->htable_size); - return ct_get_idx(seq, *pos); -} - -static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - (*pos)++; - return ct_get_next(s, v); -} - -static void ct_seq_stop(struct seq_file *s, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -#ifdef CONFIG_NF_CONNTRACK_SECMARK -static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) -{ - int ret; - u32 len; - char *secctx; - - ret = security_secid_to_secctx(ct->secmark, &secctx, &len); - if (ret) - return; - - seq_printf(s, "secctx=%s ", secctx); - - security_release_secctx(secctx, len); -} -#else -static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) -{ -} -#endif - -static bool ct_seq_should_skip(const struct nf_conn *ct, - const struct net *net, - const struct nf_conntrack_tuple_hash *hash) -{ - /* we only want to print DIR_ORIGINAL */ - if (NF_CT_DIRECTION(hash)) - return true; - - if (nf_ct_l3num(ct) != AF_INET) - return true; - - if (!net_eq(nf_ct_net(ct), net)) - return true; - - return false; -} - -static int ct_seq_show(struct seq_file *s, void *v) -{ - struct nf_conntrack_tuple_hash *hash = v; - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); - const struct nf_conntrack_l3proto *l3proto; - const struct nf_conntrack_l4proto *l4proto; - int ret = 0; - - NF_CT_ASSERT(ct); - if (ct_seq_should_skip(ct, seq_file_net(s), hash)) - return 0; - - if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) - return 0; - - /* check if we raced w. object reuse */ - if (!nf_ct_is_confirmed(ct) || - ct_seq_should_skip(ct, seq_file_net(s), hash)) - goto release; - - l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); - NF_CT_ASSERT(l3proto); - l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); - NF_CT_ASSERT(l4proto); - - ret = -ENOSPC; - seq_printf(s, "%-8s %u %ld ", - l4proto->name, nf_ct_protonum(ct), - nf_ct_expires(ct) / HZ); - - if (l4proto->print_conntrack) - l4proto->print_conntrack(s, ct); - - if (seq_has_overflowed(s)) - goto release; - - print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - l3proto, l4proto); - - if (seq_has_overflowed(s)) - goto release; - - if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) - goto release; - - if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) - seq_printf(s, "[UNREPLIED] "); - - print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, - l3proto, l4proto); - - if (seq_has_overflowed(s)) - goto release; - - if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) - goto release; - - if (test_bit(IPS_ASSURED_BIT, &ct->status)) - seq_printf(s, "[ASSURED] "); - -#ifdef CONFIG_NF_CONNTRACK_MARK - seq_printf(s, "mark=%u ", ct->mark); -#endif - - ct_show_secctx(s, ct); - - seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)); - - if (seq_has_overflowed(s)) - goto release; - - ret = 0; -release: - nf_ct_put(ct); - return ret; -} - -static const struct seq_operations ct_seq_ops = { - .start = ct_seq_start, - .next = ct_seq_next, - .stop = ct_seq_stop, - .show = ct_seq_show -}; - -static int ct_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ct_seq_ops, - sizeof(struct ct_iter_state)); -} - -static const struct file_operations ct_file_ops = { - .owner = THIS_MODULE, - .open = ct_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -/* expects */ -struct ct_expect_iter_state { - struct seq_net_private p; - unsigned int bucket; -}; - -static struct hlist_node *ct_expect_get_first(struct seq_file *seq) -{ - struct ct_expect_iter_state *st = seq->private; - struct hlist_node *n; - - for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference( - hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); - if (n) - return n; - } - return NULL; -} - -static struct hlist_node *ct_expect_get_next(struct seq_file *seq, - struct hlist_node *head) -{ - struct ct_expect_iter_state *st = seq->private; - - head = rcu_dereference(hlist_next_rcu(head)); - while (head == NULL) { - if (++st->bucket >= nf_ct_expect_hsize) - return NULL; - head = rcu_dereference( - hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); - } - return head; -} - -static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) -{ - struct hlist_node *head = ct_expect_get_first(seq); - - if (head) - while (pos && (head = ct_expect_get_next(seq, head))) - pos--; - return pos ? NULL : head; -} - -static void *exp_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - rcu_read_lock(); - return ct_expect_get_idx(seq, *pos); -} - -static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - (*pos)++; - return ct_expect_get_next(seq, v); -} - -static void exp_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static int exp_seq_show(struct seq_file *s, void *v) -{ - struct nf_conntrack_expect *exp; - const struct hlist_node *n = v; - - exp = hlist_entry(n, struct nf_conntrack_expect, hnode); - - if (!net_eq(nf_ct_net(exp->master), seq_file_net(s))) - return 0; - - if (exp->tuple.src.l3num != AF_INET) - return 0; - - if (exp->timeout.function) - seq_printf(s, "%ld ", timer_pending(&exp->timeout) - ? (long)(exp->timeout.expires - jiffies)/HZ : 0); - else - seq_printf(s, "- "); - - seq_printf(s, "proto=%u ", exp->tuple.dst.protonum); - - print_tuple(s, &exp->tuple, - __nf_ct_l3proto_find(exp->tuple.src.l3num), - __nf_ct_l4proto_find(exp->tuple.src.l3num, - exp->tuple.dst.protonum)); - seq_putc(s, '\n'); - - return 0; -} - -static const struct seq_operations exp_seq_ops = { - .start = exp_seq_start, - .next = exp_seq_next, - .stop = exp_seq_stop, - .show = exp_seq_show -}; - -static int exp_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &exp_seq_ops, - sizeof(struct ct_expect_iter_state)); -} - -static const struct file_operations ip_exp_file_ops = { - .owner = THIS_MODULE, - .open = exp_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - int cpu; - - if (*pos == 0) - return SEQ_START_TOKEN; - - for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { - if (!cpu_possible(cpu)) - continue; - *pos = cpu+1; - return per_cpu_ptr(net->ct.stat, cpu); - } - - return NULL; -} - -static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - int cpu; - - for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { - if (!cpu_possible(cpu)) - continue; - *pos = cpu+1; - return per_cpu_ptr(net->ct.stat, cpu); - } - - return NULL; -} - -static void ct_cpu_seq_stop(struct seq_file *seq, void *v) -{ -} - -static int ct_cpu_seq_show(struct seq_file *seq, void *v) -{ - struct net *net = seq_file_net(seq); - unsigned int nr_conntracks = atomic_read(&net->ct.count); - const struct ip_conntrack_stat *st = v; - - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); - return 0; - } - - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " - "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - nr_conntracks, - st->searched, - st->found, - st->new, - st->invalid, - st->ignore, - st->delete, - st->delete_list, - st->insert, - st->insert_failed, - st->drop, - st->early_drop, - st->error, - - st->expect_new, - st->expect_create, - st->expect_delete, - st->search_restart - ); - return 0; -} - -static const struct seq_operations ct_cpu_seq_ops = { - .start = ct_cpu_seq_start, - .next = ct_cpu_seq_next, - .stop = ct_cpu_seq_stop, - .show = ct_cpu_seq_show, -}; - -static int ct_cpu_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ct_cpu_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations ct_cpu_seq_fops = { - .owner = THIS_MODULE, - .open = ct_cpu_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static int __net_init ip_conntrack_net_init(struct net *net) -{ - struct proc_dir_entry *proc, *proc_exp, *proc_stat; - - proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops); - if (!proc) - goto err1; - - proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net, - &ip_exp_file_ops); - if (!proc_exp) - goto err2; - - proc_stat = proc_create("ip_conntrack", S_IRUGO, - net->proc_net_stat, &ct_cpu_seq_fops); - if (!proc_stat) - goto err3; - return 0; - -err3: - remove_proc_entry("ip_conntrack_expect", net->proc_net); -err2: - remove_proc_entry("ip_conntrack", net->proc_net); -err1: - return -ENOMEM; -} - -static void __net_exit ip_conntrack_net_exit(struct net *net) -{ - remove_proc_entry("ip_conntrack", net->proc_net_stat); - remove_proc_entry("ip_conntrack_expect", net->proc_net); - remove_proc_entry("ip_conntrack", net->proc_net); -} - -static struct pernet_operations ip_conntrack_net_ops = { - .init = ip_conntrack_net_init, - .exit = ip_conntrack_net_exit, -}; - -int __init nf_conntrack_ipv4_compat_init(void) -{ - return register_pernet_subsys(&ip_conntrack_net_ops); -} - -void __exit nf_conntrack_ipv4_compat_fini(void) -{ - unregister_pernet_subsys(&ip_conntrack_net_ops); -} diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index c567e1b5d799..4b5904bc2614 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -327,17 +327,6 @@ static struct ctl_table icmp_sysctl_table[] = { }, { } }; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table icmp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_icmp_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -355,40 +344,14 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_icmp_net *in) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table, - sizeof(icmp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &in->timeout; -#endif -#endif - return 0; -} - static int icmp_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_icmp_net *in = icmp_pernet(net); struct nf_proto_net *pn = &in->pn; in->timeout = nf_ct_icmp_timeout; - ret = icmp_kmemdup_compat_sysctl_table(pn, in); - if (ret < 0) - return ret; - - ret = icmp_kmemdup_sysctl_table(pn, in); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - - return ret; + return icmp_kmemdup_sysctl_table(pn, in); } static struct nf_proto_net *icmp_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index dd2c43abf9e2..22558b7ff7cd 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -161,10 +161,7 @@ static void nf_conntrack_all_unlock(void) } unsigned int nf_conntrack_htable_size __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); - unsigned int nf_conntrack_max __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_max); DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b65d5864b6d9..8d2c7d8c666a 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -159,54 +159,6 @@ static int kill_l4proto(struct nf_conn *i, void *data) nf_ct_l3num(i) == l4proto->l3proto; } -static struct nf_ip_net *nf_ct_l3proto_net(struct net *net, - struct nf_conntrack_l3proto *l3proto) -{ - if (l3proto->l3proto == PF_INET) - return &net->ct.nf_ct_proto; - else - return NULL; -} - -static int nf_ct_l3proto_register_sysctl(struct net *net, - struct nf_conntrack_l3proto *l3proto) -{ - int err = 0; - struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto); - /* nf_conntrack_l3proto_ipv6 doesn't support sysctl */ - if (in == NULL) - return 0; - -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - if (in->ctl_table != NULL) { - err = nf_ct_register_sysctl(net, - &in->ctl_table_header, - l3proto->ctl_table_path, - in->ctl_table); - if (err < 0) { - kfree(in->ctl_table); - in->ctl_table = NULL; - } - } -#endif - return err; -} - -static void nf_ct_l3proto_unregister_sysctl(struct net *net, - struct nf_conntrack_l3proto *l3proto) -{ - struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto); - - if (in == NULL) - return; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - if (in->ctl_table_header != NULL) - nf_ct_unregister_sysctl(&in->ctl_table_header, - &in->ctl_table, - 0); -#endif -} - int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) { int ret = 0; @@ -241,7 +193,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto) { - int ret = 0; + int ret; if (proto->init_net) { ret = proto->init_net(net); @@ -249,7 +201,7 @@ int nf_ct_l3proto_pernet_register(struct net *net, return ret; } - return nf_ct_l3proto_register_sysctl(net, proto); + return 0; } EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); @@ -272,8 +224,6 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto) { - nf_ct_l3proto_unregister_sysctl(net, proto); - /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0); } @@ -312,26 +262,6 @@ int nf_ct_l4proto_register_sysctl(struct net *net, } } } -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) { - if (err < 0) { - nf_ct_kfree_compat_sysctl_table(pn); - goto out; - } - err = nf_ct_register_sysctl(net, - &pn->ctl_compat_header, - "net/ipv4/netfilter", - pn->ctl_compat_table); - if (err == 0) - goto out; - - nf_ct_kfree_compat_sysctl_table(pn); - nf_ct_unregister_sysctl(&pn->ctl_table_header, - &pn->ctl_table, - pn->users); - } -out: -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ return err; } @@ -346,13 +276,6 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net, nf_ct_unregister_sysctl(&pn->ctl_table_header, &pn->ctl_table, pn->users); - -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL) - nf_ct_unregister_sysctl(&pn->ctl_compat_header, - &pn->ctl_compat_table, - 0); -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ } diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 86dc752e5349..d5868bad33a7 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -151,17 +151,6 @@ static struct ctl_table generic_sysctl_table[] = { }, { } }; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table generic_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_generic_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -179,40 +168,14 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int generic_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_generic_net *gn) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(generic_compat_sysctl_table, - sizeof(generic_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &gn->timeout; -#endif -#endif - return 0; -} - static int generic_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_generic_net *gn = generic_pernet(net); struct nf_proto_net *pn = &gn->pn; gn->timeout = nf_ct_generic_timeout; - ret = generic_kmemdup_compat_sysctl_table(pn, gn); - if (ret < 0) - return ret; - - ret = generic_kmemdup_sysctl_table(pn, gn); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - - return ret; + return generic_kmemdup_sysctl_table(pn, gn); } static struct nf_proto_net *generic_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index e769f0561621..982ea62606c7 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -705,54 +705,6 @@ static struct ctl_table sctp_sysctl_table[] = { }, { } }; - -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table sctp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_sctp_timeout_closed", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_cookie_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_cookie_echoed", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_established", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_shutdown_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_shutdown_recd", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -781,32 +733,8 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct sctp_net *sn) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(sctp_compat_sysctl_table, - sizeof(sctp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED]; - pn->ctl_compat_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT]; - pn->ctl_compat_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED]; - pn->ctl_compat_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED]; - pn->ctl_compat_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT]; - pn->ctl_compat_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD]; - pn->ctl_compat_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]; -#endif -#endif - return 0; -} - static int sctp_init_net(struct net *net, u_int16_t proto) { - int ret; struct sctp_net *sn = sctp_pernet(net); struct nf_proto_net *pn = &sn->pn; @@ -817,18 +745,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto) sn->timeouts[i] = sctp_timeouts[i]; } - if (proto == AF_INET) { - ret = sctp_kmemdup_compat_sysctl_table(pn, sn); - if (ret < 0) - return ret; - - ret = sctp_kmemdup_sysctl_table(pn, sn); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - } else - ret = sctp_kmemdup_sysctl_table(pn, sn); - - return ret; + return sctp_kmemdup_sysctl_table(pn, sn); } static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4abe9e1f8909..69f687740c76 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1481,90 +1481,6 @@ static struct ctl_table tcp_sysctl_table[] = { }, { } }; - -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table tcp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_tcp_timeout_syn_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_syn_sent2", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_syn_recv", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_established", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_fin_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_close_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_last_ack", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_time_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_close", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_max_retrans", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_loose", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_tcp_be_liberal", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_tcp_max_retrans", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -1597,38 +1513,8 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_tcp_net *tn) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table, - sizeof(tcp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; - pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2]; - pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; - pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; - pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; - pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; - pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; - pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; - pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; - pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; - pn->ctl_compat_table[10].data = &tn->tcp_loose; - pn->ctl_compat_table[11].data = &tn->tcp_be_liberal; - pn->ctl_compat_table[12].data = &tn->tcp_max_retrans; -#endif -#endif - return 0; -} - static int tcp_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_tcp_net *tn = tcp_pernet(net); struct nf_proto_net *pn = &tn->pn; @@ -1643,18 +1529,7 @@ static int tcp_init_net(struct net *net, u_int16_t proto) tn->tcp_max_retrans = nf_ct_tcp_max_retrans; } - if (proto == AF_INET) { - ret = tcp_kmemdup_compat_sysctl_table(pn, tn); - if (ret < 0) - return ret; - - ret = tcp_kmemdup_sysctl_table(pn, tn); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - } else - ret = tcp_kmemdup_sysctl_table(pn, tn); - - return ret; + return tcp_kmemdup_sysctl_table(pn, tn); } static struct nf_proto_net *tcp_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 8a057e1e1247..20f35ed68030 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -218,23 +218,6 @@ static struct ctl_table udp_sysctl_table[] = { }, { } }; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table udp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_udp_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_udp_timeout_stream", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -254,27 +237,8 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_udp_net *un) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(udp_compat_sysctl_table, - sizeof(udp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &un->timeouts[UDP_CT_UNREPLIED]; - pn->ctl_compat_table[1].data = &un->timeouts[UDP_CT_REPLIED]; -#endif -#endif - return 0; -} - static int udp_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_udp_net *un = udp_pernet(net); struct nf_proto_net *pn = &un->pn; @@ -285,18 +249,7 @@ static int udp_init_net(struct net *net, u_int16_t proto) un->timeouts[i] = udp_timeouts[i]; } - if (proto == AF_INET) { - ret = udp_kmemdup_compat_sysctl_table(pn, un); - if (ret < 0) - return ret; - - ret = udp_kmemdup_sysctl_table(pn, un); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - } else - ret = udp_kmemdup_sysctl_table(pn, un); - - return ret; + return udp_kmemdup_sysctl_table(pn, un); } static struct nf_proto_net *udp_get_net_proto(struct net *net) -- cgit v1.2.3 From 03459345bc00da70a35fa39bcfcf13d779097074 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Sat, 13 Aug 2016 00:30:48 +0800 Subject: pptp: Refactor the struct and macros of PPTP codes 1. Use struct gre_base_hdr directly in pptp_gre_header instead of duplicated members; 2. Use existing macros like GRE_KEY, GRE_SEQ, and so on instead of duplicated macros defined by PPTP; 3. Add new macros like GRE_IS_ACK/SEQ and so on instead of PPTP_GRE_IS_A/S and so on; Signed-off-by: Gao Feng Reviewed-by: Philip Prindeville Signed-off-by: David S. Miller --- drivers/net/ppp/pptp.c | 28 +++++++++++++--------------- include/net/pptp.h | 19 +------------------ include/uapi/linux/if_tunnel.h | 12 ++++++++++-- 3 files changed, 24 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 3e68dbc0af7f..1951b1085cb8 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -206,16 +206,14 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) skb_push(skb, header_len); hdr = (struct pptp_gre_header *)(skb->data); - hdr->flags = PPTP_GRE_FLAG_K; - hdr->ver = PPTP_GRE_VER; - hdr->protocol = htons(PPTP_GRE_PROTO); - hdr->call_id = htons(opt->dst_addr.call_id); + hdr->gre_hd.flags = GRE_KEY | GRE_VERSION_1 | GRE_SEQ; + hdr->gre_hd.protocol = GRE_PROTO_PPP; + hdr->call_id = htons(opt->dst_addr.call_id); - hdr->flags |= PPTP_GRE_FLAG_S; - hdr->seq = htonl(++opt->seq_sent); + hdr->seq = htonl(++opt->seq_sent); if (opt->ack_sent != seq_recv) { /* send ack with this message */ - hdr->ver |= PPTP_GRE_FLAG_A; + hdr->gre_hd.flags |= GRE_ACK; hdr->ack = htonl(seq_recv); opt->ack_sent = seq_recv; } @@ -278,7 +276,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) headersize = sizeof(*header); /* test if acknowledgement present */ - if (PPTP_GRE_IS_A(header->ver)) { + if (GRE_IS_ACK(header->gre_hd.flags)) { __u32 ack; if (!pskb_may_pull(skb, headersize)) @@ -286,7 +284,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) header = (struct pptp_gre_header *)(skb->data); /* ack in different place if S = 0 */ - ack = PPTP_GRE_IS_S(header->flags) ? header->ack : header->seq; + ack = GRE_IS_SEQ(header->gre_hd.flags) ? header->ack : header->seq; ack = ntohl(ack); @@ -299,7 +297,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) headersize -= sizeof(header->ack); } /* test if payload present */ - if (!PPTP_GRE_IS_S(header->flags)) + if (!GRE_IS_SEQ(header->gre_hd.flags)) goto drop; payload_len = ntohs(header->payload_len); @@ -360,11 +358,11 @@ static int pptp_rcv(struct sk_buff *skb) header = (struct pptp_gre_header *)skb->data; - if (ntohs(header->protocol) != PPTP_GRE_PROTO || /* PPTP-GRE protocol for PPTP */ - PPTP_GRE_IS_C(header->flags) || /* flag C should be clear */ - PPTP_GRE_IS_R(header->flags) || /* flag R should be clear */ - !PPTP_GRE_IS_K(header->flags) || /* flag K should be set */ - (header->flags&0xF) != 0) /* routing and recursion ctrl = 0 */ + if (header->gre_hd.protocol != GRE_PROTO_PPP || /* PPTP-GRE protocol for PPTP */ + GRE_IS_CSUM(header->gre_hd.flags) || /* flag CSUM should be clear */ + GRE_IS_ROUTING(header->gre_hd.flags) || /* flag ROUTING should be clear */ + !GRE_IS_KEY(header->gre_hd.flags) || /* flag KEY should be set */ + (header->gre_hd.flags & GRE_FLAGS)) /* flag Recursion Ctrl should be clear */ /* if invalid, discard this packet */ goto drop; diff --git a/include/net/pptp.h b/include/net/pptp.h index 301d3e2ba1f9..92e9f1fe2628 100644 --- a/include/net/pptp.h +++ b/include/net/pptp.h @@ -10,26 +10,9 @@ ((((curseq) & 0xffffff00) == 0) &&\ (((lastseq) & 0xffffff00) == 0xffffff00)) -#define PPTP_GRE_PROTO 0x880B -#define PPTP_GRE_VER 0x1 - -#define PPTP_GRE_FLAG_C 0x80 -#define PPTP_GRE_FLAG_R 0x40 -#define PPTP_GRE_FLAG_K 0x20 -#define PPTP_GRE_FLAG_S 0x10 -#define PPTP_GRE_FLAG_A 0x80 - -#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C) -#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R) -#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K) -#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S) -#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A) - #define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header)) struct pptp_gre_header { - u8 flags; - u8 ver; - __be16 protocol; + struct gre_base_hdr gre_hd; __be16 payload_len; __be16 call_id; __be32 seq; diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 60dbb200de60..361b9f0f20d7 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -28,8 +28,16 @@ #define GRE_FLAGS __cpu_to_be16(0x0078) #define GRE_VERSION __cpu_to_be16(0x0007) -#define GRE_VERSION_1 __cpu_to_be16(0x0001) -#define GRE_PROTO_PPP __cpu_to_be16(0x880b) +#define GRE_IS_CSUM(f) ((f) & GRE_CSUM) +#define GRE_IS_ROUTING(f) ((f) & GRE_ROUTING) +#define GRE_IS_KEY(f) ((f) & GRE_KEY) +#define GRE_IS_SEQ(f) ((f) & GRE_SEQ) +#define GRE_IS_STRICT(f) ((f) & GRE_STRICT) +#define GRE_IS_REC(f) ((f) & GRE_REC) +#define GRE_IS_ACK(f) ((f) & GRE_ACK) + +#define GRE_VERSION_1 __cpu_to_be16(0x0001) +#define GRE_PROTO_PPP __cpu_to_be16(0x880b) #define GRE_PPTP_KEY_MASK __cpu_to_be32(0xffff) struct ip_tunnel_parm { -- cgit v1.2.3 From 92e47ba8839bacc185db89f3b11cd8036193e6a9 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 13 Aug 2016 22:35:36 +0800 Subject: netfilter: conntrack: simplify the code by using nf_conntrack_get_ht Since commit 64b87639c9cb ("netfilter: conntrack: fix race between nf_conntrack proc read and hash resize") introduce the nf_conntrack_get_ht, so there's no need to check nf_conntrack_generation again and again to get the hash table and hash size. And convert nf_conntrack_get_ht to inline function here. Suggested-by: Pablo Neira Ayuso Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 20 ++++++++++++++ include/net/netfilter/nf_conntrack_core.h | 3 -- net/netfilter/nf_conntrack_core.c | 46 +++++++------------------------ 3 files changed, 30 insertions(+), 39 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 445b019c2078..2a127480d4cc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -303,9 +303,29 @@ struct kernel_param; int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); int nf_conntrack_hash_resize(unsigned int hashsize); + +extern struct hlist_nulls_head *nf_conntrack_hash; extern unsigned int nf_conntrack_htable_size; +extern seqcount_t nf_conntrack_generation; extern unsigned int nf_conntrack_max; +/* must be called with rcu read lock held */ +static inline void +nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) +{ + struct hlist_nulls_head *hptr; + unsigned int sequence, hsz; + + do { + sequence = read_seqcount_begin(&nf_conntrack_generation); + hsz = nf_conntrack_htable_size; + hptr = nf_conntrack_hash; + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + + *hash = hptr; + *hsize = hsz; +} + struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags); diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 79d7ac5c9740..62e17d1319ff 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -51,8 +51,6 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto); -void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize); - /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * nf_conntrack_find_get(struct net *net, @@ -83,7 +81,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, #define CONNTRACK_LOCKS 1024 -extern struct hlist_nulls_head *nf_conntrack_hash; extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; void nf_conntrack_lock(spinlock_t *lock); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 22558b7ff7cd..aeba28c5512b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -74,7 +74,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash); static __read_mostly struct kmem_cache *nf_conntrack_cachep; static __read_mostly spinlock_t nf_conntrack_locks_all_lock; -static __read_mostly seqcount_t nf_conntrack_generation; static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; @@ -162,6 +161,7 @@ static void nf_conntrack_all_unlock(void) unsigned int nf_conntrack_htable_size __read_mostly; unsigned int nf_conntrack_max __read_mostly; +seqcount_t nf_conntrack_generation __read_mostly; DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); @@ -478,23 +478,6 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, net_eq(net, nf_ct_net(ct)); } -/* must be called with rcu read lock held */ -void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) -{ - struct hlist_nulls_head *hptr; - unsigned int sequence, hsz; - - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - hsz = nf_conntrack_htable_size; - hptr = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); - - *hash = hptr; - *hsize = hsz; -} -EXPORT_SYMBOL_GPL(nf_conntrack_get_ht); - /* * Warning : * - Caller must take a reference on returned object @@ -507,14 +490,11 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; - unsigned int bucket, sequence; + unsigned int bucket, hsize; begin: - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - bucket = scale_hash(hash); - ct_hash = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + nf_conntrack_get_ht(&ct_hash, &hsize); + bucket = reciprocal_scale(hash, hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { if (nf_ct_key_equal(h, tuple, zone, net)) { @@ -820,18 +800,15 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; - unsigned int hash, sequence; + unsigned int hash, hsize; struct hlist_nulls_node *n; struct nf_conn *ct; zone = nf_ct_zone(ignored_conntrack); rcu_read_lock(); - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - hash = hash_conntrack(net, tuple); - ct_hash = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + nf_conntrack_get_ht(&ct_hash, &hsize); + hash = __hash_conntrack(net, tuple, hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); @@ -897,14 +874,11 @@ static noinline int early_drop(struct net *net, unsigned int _hash) for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { struct hlist_nulls_head *ct_hash; - unsigned hash, sequence, drops; + unsigned int hash, hsize, drops; rcu_read_lock(); - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - hash = scale_hash(_hash++); - ct_hash = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + nf_conntrack_get_ht(&ct_hash, &hsize); + hash = reciprocal_scale(_hash++, hsize); drops = early_drop_list(net, &ct_hash[hash]); rcu_read_unlock(); -- cgit v1.2.3 From 43a0c6751a322847cb6fa0ab8cbf77a1d08bfc0a Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 15 Aug 2016 14:51:01 -0700 Subject: strparser: Stream parser for messages This patch introduces a utility for parsing application layer protocol messages in a TCP stream. This is a generalization of the mechanism implemented of Kernel Connection Multiplexor. The API includes a context structure, a set of callbacks, utility functions, and a data ready function. A stream parser instance is defined by a strparse structure that is bound to a TCP socket. The function to initialize the structure is: int strp_init(struct strparser *strp, struct sock *csk, struct strp_callbacks *cb); csk is the TCP socket being bound to and cb are the parser callbacks. The upper layer calls strp_tcp_data_ready when data is ready on the lower socket for strparser to process. This should be called from a data_ready callback that is set on the socket: void strp_tcp_data_ready(struct strparser *strp); A parser is bound to a TCP socket by setting data_ready function to strp_tcp_data_ready so that all receive indications on the socket go through the parser. This is assumes that sk_user_data is set to the strparser structure. There are four callbacks. - parse_msg is called to parse the message (returns length or error). - rcv_msg is called when a complete message has been received - read_sock_done is called when data_ready function exits - abort_parser is called to abort the parser The input to parse_msg is an skbuff which contains next message under construction. The backend processing of parse_msg will parse the application layer protocol headers to determine the length of the message in the stream. The possible return values are: >0 : indicates length of successfully parsed message 0 : indicates more data must be received to parse the message -ESTRPIPE : current message should not be processed by the kernel, return control of the socket to userspace which can proceed to read the messages itself other < 0 : Error is parsing, give control back to userspace assuming that synchronzation is lost and the stream is unrecoverable (application expected to close TCP socket) In the case of error return (< 0) strparse will stop the parser and report and error to userspace. The application must deal with the error. To handle the error the strparser is unbound from the TCP socket. If the error indicates that the stream TCP socket is at recoverable point (ESTRPIPE) then the application can read the TCP socket to process the stream. Once the application has dealt with the exceptions in the stream, it may again bind the socket to a strparser to continue data operations. Note that ENODATA may be returned to the application. In this case parse_msg returned -ESTRPIPE, however strparser was unable to maintain synchronization of the stream (i.e. some of the message in question was already read by the parser). strp_pause and strp_unpause are used to provide flow control. For instance, if rcv_msg is called but the upper layer can't immediately consume the message it can hold the message and pause strparser. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/strparser.h | 145 ++++++++++++++ net/Kconfig | 1 + net/Makefile | 1 + net/strparser/Kconfig | 4 + net/strparser/Makefile | 1 + net/strparser/strparser.c | 492 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 644 insertions(+) create mode 100644 include/net/strparser.h create mode 100644 net/strparser/Kconfig create mode 100644 net/strparser/Makefile create mode 100644 net/strparser/strparser.c (limited to 'include/net') diff --git a/include/net/strparser.h b/include/net/strparser.h new file mode 100644 index 000000000000..fdb3d6746cc4 --- /dev/null +++ b/include/net/strparser.h @@ -0,0 +1,145 @@ +/* + * Stream Parser + * + * Copyright (c) 2016 Tom Herbert + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + +#ifndef __NET_STRPARSER_H_ +#define __NET_STRPARSER_H_ + +#include +#include + +#define STRP_STATS_ADD(stat, count) ((stat) += (count)) +#define STRP_STATS_INCR(stat) ((stat)++) + +struct strp_stats { + unsigned long long rx_msgs; + unsigned long long rx_bytes; + unsigned int rx_mem_fail; + unsigned int rx_need_more_hdr; + unsigned int rx_msg_too_big; + unsigned int rx_msg_timeouts; + unsigned int rx_bad_hdr_len; +}; + +struct strp_aggr_stats { + unsigned long long rx_msgs; + unsigned long long rx_bytes; + unsigned int rx_mem_fail; + unsigned int rx_need_more_hdr; + unsigned int rx_msg_too_big; + unsigned int rx_msg_timeouts; + unsigned int rx_bad_hdr_len; + unsigned int rx_aborts; + unsigned int rx_interrupted; + unsigned int rx_unrecov_intr; +}; + +struct strparser; + +/* Callbacks are called with lock held for the attached socket */ +struct strp_callbacks { + int (*parse_msg)(struct strparser *strp, struct sk_buff *skb); + void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb); + int (*read_sock_done)(struct strparser *strp, int err); + void (*abort_parser)(struct strparser *strp, int err); +}; + +struct strp_rx_msg { + int full_len; + int offset; +}; + +static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb) +{ + return (struct strp_rx_msg *)((void *)skb->cb + + offsetof(struct qdisc_skb_cb, data)); +} + +/* Structure for an attached lower socket */ +struct strparser { + struct sock *sk; + + u32 rx_stopped : 1; + u32 rx_paused : 1; + u32 rx_aborted : 1; + u32 rx_interrupted : 1; + u32 rx_unrecov_intr : 1; + + struct sk_buff **rx_skb_nextp; + struct timer_list rx_msg_timer; + struct sk_buff *rx_skb_head; + unsigned int rx_need_bytes; + struct delayed_work rx_delayed_work; + struct work_struct rx_work; + struct strp_stats stats; + struct strp_callbacks cb; +}; + +/* Must be called with lock held for attached socket */ +static inline void strp_pause(struct strparser *strp) +{ + strp->rx_paused = 1; +} + +/* May be called without holding lock for attached socket */ +static inline void strp_unpause(struct strparser *strp) +{ + strp->rx_paused = 0; +} + +static inline void save_strp_stats(struct strparser *strp, + struct strp_aggr_stats *agg_stats) +{ + /* Save psock statistics in the mux when psock is being unattached. */ + +#define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += \ + strp->stats._stat) + SAVE_PSOCK_STATS(rx_msgs); + SAVE_PSOCK_STATS(rx_bytes); + SAVE_PSOCK_STATS(rx_mem_fail); + SAVE_PSOCK_STATS(rx_need_more_hdr); + SAVE_PSOCK_STATS(rx_msg_too_big); + SAVE_PSOCK_STATS(rx_msg_timeouts); + SAVE_PSOCK_STATS(rx_bad_hdr_len); +#undef SAVE_PSOCK_STATS + + if (strp->rx_aborted) + agg_stats->rx_aborts++; + if (strp->rx_interrupted) + agg_stats->rx_interrupted++; + if (strp->rx_unrecov_intr) + agg_stats->rx_unrecov_intr++; +} + +static inline void aggregate_strp_stats(struct strp_aggr_stats *stats, + struct strp_aggr_stats *agg_stats) +{ +#define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat) + SAVE_PSOCK_STATS(rx_msgs); + SAVE_PSOCK_STATS(rx_bytes); + SAVE_PSOCK_STATS(rx_mem_fail); + SAVE_PSOCK_STATS(rx_need_more_hdr); + SAVE_PSOCK_STATS(rx_msg_too_big); + SAVE_PSOCK_STATS(rx_msg_timeouts); + SAVE_PSOCK_STATS(rx_bad_hdr_len); + SAVE_PSOCK_STATS(rx_aborts); + SAVE_PSOCK_STATS(rx_interrupted); + SAVE_PSOCK_STATS(rx_unrecov_intr); +#undef SAVE_PSOCK_STATS + +} + +void strp_done(struct strparser *strp); +void strp_stop(struct strparser *strp); +void strp_check_rcv(struct strparser *strp); +int strp_init(struct strparser *strp, struct sock *csk, + struct strp_callbacks *cb); +void strp_tcp_data_ready(struct strparser *strp); + +#endif /* __NET_STRPARSER_H_ */ diff --git a/net/Kconfig b/net/Kconfig index c2cdbce629bd..7b6cd340b72b 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -369,6 +369,7 @@ source "net/irda/Kconfig" source "net/bluetooth/Kconfig" source "net/rxrpc/Kconfig" source "net/kcm/Kconfig" +source "net/strparser/Kconfig" config FIB_RULES bool diff --git a/net/Makefile b/net/Makefile index 9bd20bb86cc6..4cafaa2b4667 100644 --- a/net/Makefile +++ b/net/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_BT) += bluetooth/ obj-$(CONFIG_SUNRPC) += sunrpc/ obj-$(CONFIG_AF_RXRPC) += rxrpc/ obj-$(CONFIG_AF_KCM) += kcm/ +obj-$(CONFIG_STREAM_PARSER) += strparser/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_L2TP) += l2tp/ obj-$(CONFIG_DECNET) += decnet/ diff --git a/net/strparser/Kconfig b/net/strparser/Kconfig new file mode 100644 index 000000000000..6cff3f6d0c3a --- /dev/null +++ b/net/strparser/Kconfig @@ -0,0 +1,4 @@ + +config STREAM_PARSER + tristate + default n diff --git a/net/strparser/Makefile b/net/strparser/Makefile new file mode 100644 index 000000000000..858a126ebaa0 --- /dev/null +++ b/net/strparser/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_STREAM_PARSER) += strparser.o diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c new file mode 100644 index 000000000000..fd688c0a7744 --- /dev/null +++ b/net/strparser/strparser.c @@ -0,0 +1,492 @@ +/* + * Stream Parser + * + * Copyright (c) 2016 Tom Herbert + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct workqueue_struct *strp_wq; + +struct _strp_rx_msg { + /* Internal cb structure. struct strp_rx_msg must be first for passing + * to upper layer. + */ + struct strp_rx_msg strp; + int accum_len; + int early_eaten; +}; + +static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb) +{ + return (struct _strp_rx_msg *)((void *)skb->cb + + offsetof(struct qdisc_skb_cb, data)); +} + +/* Lower lock held */ +static void strp_abort_rx_strp(struct strparser *strp, int err) +{ + struct sock *csk = strp->sk; + + /* Unrecoverable error in receive */ + + del_timer(&strp->rx_msg_timer); + + if (strp->rx_stopped) + return; + + strp->rx_stopped = 1; + + /* Report an error on the lower socket */ + csk->sk_err = err; + csk->sk_error_report(csk); +} + +static void strp_start_rx_timer(struct strparser *strp) +{ + if (strp->sk->sk_rcvtimeo) + mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo); +} + +/* Lower lock held */ +static void strp_parser_err(struct strparser *strp, int err, + read_descriptor_t *desc) +{ + desc->error = err; + kfree_skb(strp->rx_skb_head); + strp->rx_skb_head = NULL; + strp->cb.abort_parser(strp, err); +} + +/* Lower socket lock held */ +static int strp_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len) +{ + struct strparser *strp = (struct strparser *)desc->arg.data; + struct _strp_rx_msg *rxm; + struct sk_buff *head, *skb; + size_t eaten = 0, cand_len; + ssize_t extra; + int err; + bool cloned_orig = false; + + if (strp->rx_paused) + return 0; + + head = strp->rx_skb_head; + if (head) { + /* Message already in progress */ + + rxm = _strp_rx_msg(head); + if (unlikely(rxm->early_eaten)) { + /* Already some number of bytes on the receive sock + * data saved in rx_skb_head, just indicate they + * are consumed. + */ + eaten = orig_len <= rxm->early_eaten ? + orig_len : rxm->early_eaten; + rxm->early_eaten -= eaten; + + return eaten; + } + + if (unlikely(orig_offset)) { + /* Getting data with a non-zero offset when a message is + * in progress is not expected. If it does happen, we + * need to clone and pull since we can't deal with + * offsets in the skbs for a message expect in the head. + */ + orig_skb = skb_clone(orig_skb, GFP_ATOMIC); + if (!orig_skb) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = -ENOMEM; + return 0; + } + if (!pskb_pull(orig_skb, orig_offset)) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + kfree_skb(orig_skb); + desc->error = -ENOMEM; + return 0; + } + cloned_orig = true; + orig_offset = 0; + } + + if (!strp->rx_skb_nextp) { + /* We are going to append to the frags_list of head. + * Need to unshare the frag_list. + */ + err = skb_unclone(head, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = err; + return 0; + } + + if (unlikely(skb_shinfo(head)->frag_list)) { + /* We can't append to an sk_buff that already + * has a frag_list. We create a new head, point + * the frag_list of that to the old head, and + * then are able to use the old head->next for + * appending to the message. + */ + if (WARN_ON(head->next)) { + desc->error = -EINVAL; + return 0; + } + + skb = alloc_skb(0, GFP_ATOMIC); + if (!skb) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = -ENOMEM; + return 0; + } + skb->len = head->len; + skb->data_len = head->len; + skb->truesize = head->truesize; + *_strp_rx_msg(skb) = *_strp_rx_msg(head); + strp->rx_skb_nextp = &head->next; + skb_shinfo(skb)->frag_list = head; + strp->rx_skb_head = skb; + head = skb; + } else { + strp->rx_skb_nextp = + &skb_shinfo(head)->frag_list; + } + } + } + + while (eaten < orig_len) { + /* Always clone since we will consume something */ + skb = skb_clone(orig_skb, GFP_ATOMIC); + if (!skb) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = -ENOMEM; + break; + } + + cand_len = orig_len - eaten; + + head = strp->rx_skb_head; + if (!head) { + head = skb; + strp->rx_skb_head = head; + /* Will set rx_skb_nextp on next packet if needed */ + strp->rx_skb_nextp = NULL; + rxm = _strp_rx_msg(head); + memset(rxm, 0, sizeof(*rxm)); + rxm->strp.offset = orig_offset + eaten; + } else { + /* Unclone since we may be appending to an skb that we + * already share a frag_list with. + */ + err = skb_unclone(skb, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = err; + break; + } + + rxm = _strp_rx_msg(head); + *strp->rx_skb_nextp = skb; + strp->rx_skb_nextp = &skb->next; + head->data_len += skb->len; + head->len += skb->len; + head->truesize += skb->truesize; + } + + if (!rxm->strp.full_len) { + ssize_t len; + + len = (*strp->cb.parse_msg)(strp, head); + + if (!len) { + /* Need more header to determine length */ + if (!rxm->accum_len) { + /* Start RX timer for new message */ + strp_start_rx_timer(strp); + } + rxm->accum_len += cand_len; + eaten += cand_len; + STRP_STATS_INCR(strp->stats.rx_need_more_hdr); + WARN_ON(eaten != orig_len); + break; + } else if (len < 0) { + if (len == -ESTRPIPE && rxm->accum_len) { + len = -ENODATA; + strp->rx_unrecov_intr = 1; + } else { + strp->rx_interrupted = 1; + } + strp_parser_err(strp, err, desc); + break; + } else if (len > strp->sk->sk_rcvbuf) { + /* Message length exceeds maximum allowed */ + STRP_STATS_INCR(strp->stats.rx_msg_too_big); + strp_parser_err(strp, -EMSGSIZE, desc); + break; + } else if (len <= (ssize_t)head->len - + skb->len - rxm->strp.offset) { + /* Length must be into new skb (and also + * greater than zero) + */ + STRP_STATS_INCR(strp->stats.rx_bad_hdr_len); + strp_parser_err(strp, -EPROTO, desc); + break; + } + + rxm->strp.full_len = len; + } + + extra = (ssize_t)(rxm->accum_len + cand_len) - + rxm->strp.full_len; + + if (extra < 0) { + /* Message not complete yet. */ + if (rxm->strp.full_len - rxm->accum_len > + tcp_inq(strp->sk)) { + /* Don't have the whole messages in the socket + * buffer. Set strp->rx_need_bytes to wait for + * the rest of the message. Also, set "early + * eaten" since we've already buffered the skb + * but don't consume yet per tcp_read_sock. + */ + + if (!rxm->accum_len) { + /* Start RX timer for new message */ + strp_start_rx_timer(strp); + } + + strp->rx_need_bytes = rxm->strp.full_len - + rxm->accum_len; + rxm->accum_len += cand_len; + rxm->early_eaten = cand_len; + STRP_STATS_ADD(strp->stats.rx_bytes, cand_len); + desc->count = 0; /* Stop reading socket */ + break; + } + rxm->accum_len += cand_len; + eaten += cand_len; + WARN_ON(eaten != orig_len); + break; + } + + /* Positive extra indicates ore bytes than needed for the + * message + */ + + WARN_ON(extra > cand_len); + + eaten += (cand_len - extra); + + /* Hurray, we have a new message! */ + del_timer(&strp->rx_msg_timer); + strp->rx_skb_head = NULL; + STRP_STATS_INCR(strp->stats.rx_msgs); + + /* Give skb to upper layer */ + strp->cb.rcv_msg(strp, head); + + if (unlikely(strp->rx_paused)) { + /* Upper layer paused strp */ + break; + } + } + + if (cloned_orig) + kfree_skb(orig_skb); + + STRP_STATS_ADD(strp->stats.rx_bytes, eaten); + + return eaten; +} + +static int default_read_sock_done(struct strparser *strp, int err) +{ + return err; +} + +/* Called with lock held on lower socket */ +static int strp_tcp_read_sock(struct strparser *strp) +{ + read_descriptor_t desc; + + desc.arg.data = strp; + desc.error = 0; + desc.count = 1; /* give more than one skb per call */ + + /* sk should be locked here, so okay to do tcp_read_sock */ + tcp_read_sock(strp->sk, &desc, strp_tcp_recv); + + desc.error = strp->cb.read_sock_done(strp, desc.error); + + return desc.error; +} + +/* Lower sock lock held */ +void strp_tcp_data_ready(struct strparser *strp) +{ + struct sock *csk = strp->sk; + + if (unlikely(strp->rx_stopped)) + return; + + /* This check is needed to synchronize with do_strp_rx_work. + * do_strp_rx_work acquires a process lock (lock_sock) whereas + * the lock held here is bh_lock_sock. The two locks can be + * held by different threads at the same time, but bh_lock_sock + * allows a thread in BH context to safely check if the process + * lock is held. In this case, if the lock is held, queue work. + */ + if (sock_owned_by_user(csk)) { + queue_work(strp_wq, &strp->rx_work); + return; + } + + if (strp->rx_paused) + return; + + if (strp->rx_need_bytes) { + if (tcp_inq(csk) >= strp->rx_need_bytes) + strp->rx_need_bytes = 0; + else + return; + } + + if (strp_tcp_read_sock(strp) == -ENOMEM) + queue_work(strp_wq, &strp->rx_work); +} +EXPORT_SYMBOL_GPL(strp_tcp_data_ready); + +static void do_strp_rx_work(struct strparser *strp) +{ + read_descriptor_t rd_desc; + struct sock *csk = strp->sk; + + /* We need the read lock to synchronize with strp_tcp_data_ready. We + * need the socket lock for calling tcp_read_sock. + */ + lock_sock(csk); + + if (unlikely(csk->sk_user_data != strp)) + goto out; + + if (unlikely(strp->rx_stopped)) + goto out; + + if (strp->rx_paused) + goto out; + + rd_desc.arg.data = strp; + + if (strp_tcp_read_sock(strp) == -ENOMEM) + queue_work(strp_wq, &strp->rx_work); + +out: + release_sock(csk); +} + +static void strp_rx_work(struct work_struct *w) +{ + do_strp_rx_work(container_of(w, struct strparser, rx_work)); +} + +static void strp_rx_msg_timeout(unsigned long arg) +{ + struct strparser *strp = (struct strparser *)arg; + + /* Message assembly timed out */ + STRP_STATS_INCR(strp->stats.rx_msg_timeouts); + lock_sock(strp->sk); + strp->cb.abort_parser(strp, ETIMEDOUT); + release_sock(strp->sk); +} + +int strp_init(struct strparser *strp, struct sock *csk, + struct strp_callbacks *cb) +{ + if (!cb || !cb->rcv_msg || !cb->parse_msg) + return -EINVAL; + + memset(strp, 0, sizeof(*strp)); + + strp->sk = csk; + + setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout, + (unsigned long)strp); + + INIT_WORK(&strp->rx_work, strp_rx_work); + + strp->cb.rcv_msg = cb->rcv_msg; + strp->cb.parse_msg = cb->parse_msg; + strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done; + strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp; + + return 0; +} +EXPORT_SYMBOL_GPL(strp_init); + +/* strp must already be stopped so that strp_tcp_recv will no longer be called. + * Note that strp_done is not called with the lower socket held. + */ +void strp_done(struct strparser *strp) +{ + WARN_ON(!strp->rx_stopped); + + del_timer_sync(&strp->rx_msg_timer); + cancel_work_sync(&strp->rx_work); + + if (strp->rx_skb_head) { + kfree_skb(strp->rx_skb_head); + strp->rx_skb_head = NULL; + } +} +EXPORT_SYMBOL_GPL(strp_done); + +void strp_stop(struct strparser *strp) +{ + strp->rx_stopped = 1; +} +EXPORT_SYMBOL_GPL(strp_stop); + +void strp_check_rcv(struct strparser *strp) +{ + queue_work(strp_wq, &strp->rx_work); +} +EXPORT_SYMBOL_GPL(strp_check_rcv); + +static int __init strp_mod_init(void) +{ + strp_wq = create_singlethread_workqueue("kstrp"); + + return 0; +} + +static void __exit strp_mod_exit(void) +{ +} +module_init(strp_mod_init); +module_exit(strp_mod_exit); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 9b73896a81dc68a638a011877b7344b252f92276 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 15 Aug 2016 14:51:02 -0700 Subject: kcm: Use stream parser Adapt KCM to use the stream parser. This mostly involves removing the RX handling and setting up the strparser using the interface. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/kcm.h | 37 +--- net/ipv6/ila/ila_common.c | 1 - net/kcm/Kconfig | 1 + net/kcm/kcmproc.c | 44 +++-- net/kcm/kcmsock.c | 456 ++++++++-------------------------------------- 5 files changed, 116 insertions(+), 423 deletions(-) (limited to 'include/net') diff --git a/include/net/kcm.h b/include/net/kcm.h index 2840b5825dcc..2a8965819db0 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -13,6 +13,7 @@ #include #include +#include #include extern unsigned int kcm_net_id; @@ -21,16 +22,8 @@ extern unsigned int kcm_net_id; #define KCM_STATS_INCR(stat) ((stat)++) struct kcm_psock_stats { - unsigned long long rx_msgs; - unsigned long long rx_bytes; unsigned long long tx_msgs; unsigned long long tx_bytes; - unsigned int rx_aborts; - unsigned int rx_mem_fail; - unsigned int rx_need_more_hdr; - unsigned int rx_msg_too_big; - unsigned int rx_msg_timeouts; - unsigned int rx_bad_hdr_len; unsigned long long reserved; unsigned long long unreserved; unsigned int tx_aborts; @@ -64,13 +57,6 @@ struct kcm_tx_msg { struct sk_buff *last_skb; }; -struct kcm_rx_msg { - int full_len; - int accum_len; - int offset; - int early_eaten; -}; - /* Socket structure for KCM client sockets */ struct kcm_sock { struct sock sk; @@ -87,6 +73,7 @@ struct kcm_sock { struct work_struct tx_work; struct list_head wait_psock_list; struct sk_buff *seq_skb; + u32 tx_stopped : 1; /* Don't use bit fields here, these are set under different locks */ bool tx_wait; @@ -104,11 +91,11 @@ struct bpf_prog; /* Structure for an attached lower socket */ struct kcm_psock { struct sock *sk; + struct strparser strp; struct kcm_mux *mux; int index; u32 tx_stopped : 1; - u32 rx_stopped : 1; u32 done : 1; u32 unattaching : 1; @@ -121,18 +108,12 @@ struct kcm_psock { struct kcm_psock_stats stats; /* Receive */ - struct sk_buff *rx_skb_head; - struct sk_buff **rx_skb_nextp; - struct sk_buff *ready_rx_msg; struct list_head psock_ready_list; - struct work_struct rx_work; - struct delayed_work rx_delayed_work; struct bpf_prog *bpf_prog; struct kcm_sock *rx_kcm; unsigned long long saved_rx_bytes; unsigned long long saved_rx_msgs; - struct timer_list rx_msg_timer; - unsigned int rx_need_bytes; + struct sk_buff *ready_rx_msg; /* Transmit */ struct kcm_sock *tx_kcm; @@ -146,6 +127,7 @@ struct kcm_net { struct mutex mutex; struct kcm_psock_stats aggregate_psock_stats; struct kcm_mux_stats aggregate_mux_stats; + struct strp_aggr_stats aggregate_strp_stats; struct list_head mux_list; int count; }; @@ -163,6 +145,7 @@ struct kcm_mux { struct kcm_mux_stats stats; struct kcm_psock_stats aggregate_psock_stats; + struct strp_aggr_stats aggregate_strp_stats; /* Receive */ spinlock_t rx_lock ____cacheline_aligned_in_smp; @@ -190,14 +173,6 @@ static inline void aggregate_psock_stats(struct kcm_psock_stats *stats, /* Save psock statistics in the mux when psock is being unattached. */ #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat) - SAVE_PSOCK_STATS(rx_msgs); - SAVE_PSOCK_STATS(rx_bytes); - SAVE_PSOCK_STATS(rx_aborts); - SAVE_PSOCK_STATS(rx_mem_fail); - SAVE_PSOCK_STATS(rx_need_more_hdr); - SAVE_PSOCK_STATS(rx_msg_too_big); - SAVE_PSOCK_STATS(rx_msg_timeouts); - SAVE_PSOCK_STATS(rx_bad_hdr_len); SAVE_PSOCK_STATS(tx_msgs); SAVE_PSOCK_STATS(tx_bytes); SAVE_PSOCK_STATS(reserved); diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index ec9efbcdad35..aba0998ddbfb 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -172,6 +172,5 @@ static void __exit ila_fini(void) module_init(ila_init); module_exit(ila_fini); -MODULE_ALIAS_RTNL_LWT(ILA); MODULE_AUTHOR("Tom Herbert "); MODULE_LICENSE("GPL"); diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig index 5db94d940ecc..87fca36e6c47 100644 --- a/net/kcm/Kconfig +++ b/net/kcm/Kconfig @@ -3,6 +3,7 @@ config AF_KCM tristate "KCM sockets" depends on INET select BPF_SYSCALL + select STREAM_PARSER ---help--- KCM (Kernel Connection Multiplexor) sockets provide a method for multiplexing messages of a message based application diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 16c2e03bd388..47e445364f4f 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -155,8 +155,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, seq_printf(seq, " psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ", psock->index, - psock->stats.rx_msgs, - psock->stats.rx_bytes, + psock->strp.stats.rx_msgs, + psock->strp.stats.rx_bytes, psock->stats.tx_msgs, psock->stats.tx_bytes, psock->sk->sk_receive_queue.qlen, @@ -170,9 +170,12 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, if (psock->tx_stopped) seq_puts(seq, "TxStop "); - if (psock->rx_stopped) + if (psock->strp.rx_stopped) seq_puts(seq, "RxStop "); + if (psock->strp.rx_paused) + seq_puts(seq, "RxPause "); + if (psock->tx_kcm) seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index); @@ -275,6 +278,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) { struct kcm_psock_stats psock_stats; struct kcm_mux_stats mux_stats; + struct strp_aggr_stats strp_stats; struct kcm_mux *mux; struct kcm_psock *psock; struct net *net = seq->private; @@ -282,20 +286,28 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) memset(&mux_stats, 0, sizeof(mux_stats)); memset(&psock_stats, 0, sizeof(psock_stats)); + memset(&strp_stats, 0, sizeof(strp_stats)); mutex_lock(&knet->mutex); aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats); aggregate_psock_stats(&knet->aggregate_psock_stats, &psock_stats); + aggregate_strp_stats(&knet->aggregate_strp_stats, + &strp_stats); list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) { spin_lock_bh(&mux->lock); aggregate_mux_stats(&mux->stats, &mux_stats); aggregate_psock_stats(&mux->aggregate_psock_stats, &psock_stats); - list_for_each_entry(psock, &mux->psocks, psock_list) + aggregate_strp_stats(&mux->aggregate_strp_stats, + &strp_stats); + list_for_each_entry(psock, &mux->psocks, psock_list) { aggregate_psock_stats(&psock->stats, &psock_stats); + save_strp_stats(&psock->strp, &strp_stats); + } + spin_unlock_bh(&mux->lock); } @@ -328,7 +340,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) mux_stats.rx_ready_drops); seq_printf(seq, - "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n", + "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n", "Psock", "RX-Msgs", "RX-Bytes", @@ -337,6 +349,8 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) "Reserved", "Unreserved", "RX-Aborts", + "RX-Intr", + "RX-Unrecov", "RX-MemFail", "RX-NeedMor", "RX-BadLen", @@ -345,20 +359,22 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) "TX-Aborts"); seq_printf(seq, - "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", + "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", "", - psock_stats.rx_msgs, - psock_stats.rx_bytes, + strp_stats.rx_msgs, + strp_stats.rx_bytes, psock_stats.tx_msgs, psock_stats.tx_bytes, psock_stats.reserved, psock_stats.unreserved, - psock_stats.rx_aborts, - psock_stats.rx_mem_fail, - psock_stats.rx_need_more_hdr, - psock_stats.rx_bad_hdr_len, - psock_stats.rx_msg_too_big, - psock_stats.rx_msg_timeouts, + strp_stats.rx_aborts, + strp_stats.rx_interrupted, + strp_stats.rx_unrecov_intr, + strp_stats.rx_mem_fail, + strp_stats.rx_need_more_hdr, + strp_stats.rx_bad_hdr_len, + strp_stats.rx_msg_too_big, + strp_stats.rx_msg_timeouts, psock_stats.tx_aborts); return 0; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index cb39e05b166c..eedbe404af35 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1,3 +1,13 @@ +/* + * Kernel Connection Multiplexor + * + * Copyright (c) 2016 Tom Herbert + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + #include #include #include @@ -35,38 +45,12 @@ static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb) return (struct kcm_tx_msg *)skb->cb; } -static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb) -{ - return (struct kcm_rx_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); -} - static void report_csk_error(struct sock *csk, int err) { csk->sk_err = EPIPE; csk->sk_error_report(csk); } -/* Callback lock held */ -static void kcm_abort_rx_psock(struct kcm_psock *psock, int err, - struct sk_buff *skb) -{ - struct sock *csk = psock->sk; - - /* Unrecoverable error in receive */ - - del_timer(&psock->rx_msg_timer); - - if (psock->rx_stopped) - return; - - psock->rx_stopped = 1; - KCM_STATS_INCR(psock->stats.rx_aborts); - - /* Report an error on the lower socket */ - report_csk_error(csk, err); -} - static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, bool wakeup_kcm) { @@ -109,12 +93,13 @@ static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, static void kcm_update_rx_mux_stats(struct kcm_mux *mux, struct kcm_psock *psock) { - KCM_STATS_ADD(mux->stats.rx_bytes, - psock->stats.rx_bytes - psock->saved_rx_bytes); + STRP_STATS_ADD(mux->stats.rx_bytes, + psock->strp.stats.rx_bytes - + psock->saved_rx_bytes); mux->stats.rx_msgs += - psock->stats.rx_msgs - psock->saved_rx_msgs; - psock->saved_rx_msgs = psock->stats.rx_msgs; - psock->saved_rx_bytes = psock->stats.rx_bytes; + psock->strp.stats.rx_msgs - psock->saved_rx_msgs; + psock->saved_rx_msgs = psock->strp.stats.rx_msgs; + psock->saved_rx_bytes = psock->strp.stats.rx_bytes; } static void kcm_update_tx_mux_stats(struct kcm_mux *mux, @@ -167,11 +152,11 @@ static void kcm_rcv_ready(struct kcm_sock *kcm) */ list_del(&psock->psock_ready_list); psock->ready_rx_msg = NULL; - /* Commit clearing of ready_rx_msg for queuing work */ smp_mb(); - queue_work(kcm_wq, &psock->rx_work); + strp_unpause(&psock->strp); + strp_check_rcv(&psock->strp); } /* Buffer limit is okay now, add to ready list */ @@ -285,6 +270,7 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, if (list_empty(&mux->kcm_rx_waiters)) { psock->ready_rx_msg = head; + strp_pause(&psock->strp); list_add_tail(&psock->psock_ready_list, &mux->psocks_ready); spin_unlock_bh(&mux->rx_lock); @@ -353,276 +339,6 @@ static void unreserve_rx_kcm(struct kcm_psock *psock, spin_unlock_bh(&mux->rx_lock); } -static void kcm_start_rx_timer(struct kcm_psock *psock) -{ - if (psock->sk->sk_rcvtimeo) - mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo); -} - -/* Macro to invoke filter function. */ -#define KCM_RUN_FILTER(prog, ctx) \ - (*prog->bpf_func)(ctx, prog->insnsi) - -/* Lower socket lock held */ -static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, - unsigned int orig_offset, size_t orig_len) -{ - struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data; - struct kcm_rx_msg *rxm; - struct kcm_sock *kcm; - struct sk_buff *head, *skb; - size_t eaten = 0, cand_len; - ssize_t extra; - int err; - bool cloned_orig = false; - - if (psock->ready_rx_msg) - return 0; - - head = psock->rx_skb_head; - if (head) { - /* Message already in progress */ - - rxm = kcm_rx_msg(head); - if (unlikely(rxm->early_eaten)) { - /* Already some number of bytes on the receive sock - * data saved in rx_skb_head, just indicate they - * are consumed. - */ - eaten = orig_len <= rxm->early_eaten ? - orig_len : rxm->early_eaten; - rxm->early_eaten -= eaten; - - return eaten; - } - - if (unlikely(orig_offset)) { - /* Getting data with a non-zero offset when a message is - * in progress is not expected. If it does happen, we - * need to clone and pull since we can't deal with - * offsets in the skbs for a message expect in the head. - */ - orig_skb = skb_clone(orig_skb, GFP_ATOMIC); - if (!orig_skb) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = -ENOMEM; - return 0; - } - if (!pskb_pull(orig_skb, orig_offset)) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - kfree_skb(orig_skb); - desc->error = -ENOMEM; - return 0; - } - cloned_orig = true; - orig_offset = 0; - } - - if (!psock->rx_skb_nextp) { - /* We are going to append to the frags_list of head. - * Need to unshare the frag_list. - */ - err = skb_unclone(head, GFP_ATOMIC); - if (err) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = err; - return 0; - } - - if (unlikely(skb_shinfo(head)->frag_list)) { - /* We can't append to an sk_buff that already - * has a frag_list. We create a new head, point - * the frag_list of that to the old head, and - * then are able to use the old head->next for - * appending to the message. - */ - if (WARN_ON(head->next)) { - desc->error = -EINVAL; - return 0; - } - - skb = alloc_skb(0, GFP_ATOMIC); - if (!skb) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = -ENOMEM; - return 0; - } - skb->len = head->len; - skb->data_len = head->len; - skb->truesize = head->truesize; - *kcm_rx_msg(skb) = *kcm_rx_msg(head); - psock->rx_skb_nextp = &head->next; - skb_shinfo(skb)->frag_list = head; - psock->rx_skb_head = skb; - head = skb; - } else { - psock->rx_skb_nextp = - &skb_shinfo(head)->frag_list; - } - } - } - - while (eaten < orig_len) { - /* Always clone since we will consume something */ - skb = skb_clone(orig_skb, GFP_ATOMIC); - if (!skb) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = -ENOMEM; - break; - } - - cand_len = orig_len - eaten; - - head = psock->rx_skb_head; - if (!head) { - head = skb; - psock->rx_skb_head = head; - /* Will set rx_skb_nextp on next packet if needed */ - psock->rx_skb_nextp = NULL; - rxm = kcm_rx_msg(head); - memset(rxm, 0, sizeof(*rxm)); - rxm->offset = orig_offset + eaten; - } else { - /* Unclone since we may be appending to an skb that we - * already share a frag_list with. - */ - err = skb_unclone(skb, GFP_ATOMIC); - if (err) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = err; - break; - } - - rxm = kcm_rx_msg(head); - *psock->rx_skb_nextp = skb; - psock->rx_skb_nextp = &skb->next; - head->data_len += skb->len; - head->len += skb->len; - head->truesize += skb->truesize; - } - - if (!rxm->full_len) { - ssize_t len; - - len = KCM_RUN_FILTER(psock->bpf_prog, head); - - if (!len) { - /* Need more header to determine length */ - if (!rxm->accum_len) { - /* Start RX timer for new message */ - kcm_start_rx_timer(psock); - } - rxm->accum_len += cand_len; - eaten += cand_len; - KCM_STATS_INCR(psock->stats.rx_need_more_hdr); - WARN_ON(eaten != orig_len); - break; - } else if (len > psock->sk->sk_rcvbuf) { - /* Message length exceeds maximum allowed */ - KCM_STATS_INCR(psock->stats.rx_msg_too_big); - desc->error = -EMSGSIZE; - psock->rx_skb_head = NULL; - kcm_abort_rx_psock(psock, EMSGSIZE, head); - break; - } else if (len <= (ssize_t)head->len - - skb->len - rxm->offset) { - /* Length must be into new skb (and also - * greater than zero) - */ - KCM_STATS_INCR(psock->stats.rx_bad_hdr_len); - desc->error = -EPROTO; - psock->rx_skb_head = NULL; - kcm_abort_rx_psock(psock, EPROTO, head); - break; - } - - rxm->full_len = len; - } - - extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len; - - if (extra < 0) { - /* Message not complete yet. */ - if (rxm->full_len - rxm->accum_len > - tcp_inq(psock->sk)) { - /* Don't have the whole messages in the socket - * buffer. Set psock->rx_need_bytes to wait for - * the rest of the message. Also, set "early - * eaten" since we've already buffered the skb - * but don't consume yet per tcp_read_sock. - */ - - if (!rxm->accum_len) { - /* Start RX timer for new message */ - kcm_start_rx_timer(psock); - } - - psock->rx_need_bytes = rxm->full_len - - rxm->accum_len; - rxm->accum_len += cand_len; - rxm->early_eaten = cand_len; - KCM_STATS_ADD(psock->stats.rx_bytes, cand_len); - desc->count = 0; /* Stop reading socket */ - break; - } - rxm->accum_len += cand_len; - eaten += cand_len; - WARN_ON(eaten != orig_len); - break; - } - - /* Positive extra indicates ore bytes than needed for the - * message - */ - - WARN_ON(extra > cand_len); - - eaten += (cand_len - extra); - - /* Hurray, we have a new message! */ - del_timer(&psock->rx_msg_timer); - psock->rx_skb_head = NULL; - KCM_STATS_INCR(psock->stats.rx_msgs); - -try_queue: - kcm = reserve_rx_kcm(psock, head); - if (!kcm) { - /* Unable to reserve a KCM, message is held in psock. */ - break; - } - - if (kcm_queue_rcv_skb(&kcm->sk, head)) { - /* Should mean socket buffer full */ - unreserve_rx_kcm(psock, false); - goto try_queue; - } - } - - if (cloned_orig) - kfree_skb(orig_skb); - - KCM_STATS_ADD(psock->stats.rx_bytes, eaten); - - return eaten; -} - -/* Called with lock held on lower socket */ -static int psock_tcp_read_sock(struct kcm_psock *psock) -{ - read_descriptor_t desc; - - desc.arg.data = psock; - desc.error = 0; - desc.count = 1; /* give more than one skb per call */ - - /* sk should be locked here, so okay to do tcp_read_sock */ - tcp_read_sock(psock->sk, &desc, kcm_tcp_recv); - - unreserve_rx_kcm(psock, true); - - return desc.error; -} - /* Lower sock lock held */ static void psock_tcp_data_ready(struct sock *sk) { @@ -631,65 +347,49 @@ static void psock_tcp_data_ready(struct sock *sk) read_lock_bh(&sk->sk_callback_lock); psock = (struct kcm_psock *)sk->sk_user_data; - if (unlikely(!psock || psock->rx_stopped)) - goto out; - - if (psock->ready_rx_msg) - goto out; - - if (psock->rx_need_bytes) { - if (tcp_inq(sk) >= psock->rx_need_bytes) - psock->rx_need_bytes = 0; - else - goto out; - } - - if (psock_tcp_read_sock(psock) == -ENOMEM) - queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); + if (likely(psock)) + strp_tcp_data_ready(&psock->strp); -out: read_unlock_bh(&sk->sk_callback_lock); } -static void do_psock_rx_work(struct kcm_psock *psock) +/* Called with lower sock held */ +static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb) { - read_descriptor_t rd_desc; - struct sock *csk = psock->sk; - - /* We need the read lock to synchronize with psock_tcp_data_ready. We - * need the socket lock for calling tcp_read_sock. - */ - lock_sock(csk); - read_lock_bh(&csk->sk_callback_lock); - - if (unlikely(csk->sk_user_data != psock)) - goto out; - - if (unlikely(psock->rx_stopped)) - goto out; - - if (psock->ready_rx_msg) - goto out; - - rd_desc.arg.data = psock; + struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); + struct kcm_sock *kcm; - if (psock_tcp_read_sock(psock) == -ENOMEM) - queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); +try_queue: + kcm = reserve_rx_kcm(psock, skb); + if (!kcm) { + /* Unable to reserve a KCM, message is held in psock and strp + * is paused. + */ + return; + } -out: - read_unlock_bh(&csk->sk_callback_lock); - release_sock(csk); + if (kcm_queue_rcv_skb(&kcm->sk, skb)) { + /* Should mean socket buffer full */ + unreserve_rx_kcm(psock, false); + goto try_queue; + } } -static void psock_rx_work(struct work_struct *w) +static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb) { - do_psock_rx_work(container_of(w, struct kcm_psock, rx_work)); + struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); + struct bpf_prog *prog = psock->bpf_prog; + + return (*prog->bpf_func)(skb, prog->insnsi); } -static void psock_rx_delayed_work(struct work_struct *w) +static int kcm_read_sock_done(struct strparser *strp, int err) { - do_psock_rx_work(container_of(w, struct kcm_psock, - rx_delayed_work.work)); + struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); + + unreserve_rx_kcm(psock, true); + + return err; } static void psock_tcp_state_change(struct sock *sk) @@ -713,14 +413,13 @@ static void psock_tcp_write_space(struct sock *sk) psock = (struct kcm_psock *)sk->sk_user_data; if (unlikely(!psock)) goto out; - mux = psock->mux; spin_lock_bh(&mux->lock); /* Check if the socket is reserved so someone is waiting for sending. */ kcm = psock->tx_kcm; - if (kcm) + if (kcm && !unlikely(kcm->tx_stopped)) queue_work(kcm_wq, &kcm->tx_work); spin_unlock_bh(&mux->lock); @@ -1411,7 +1110,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, struct kcm_sock *kcm = kcm_sk(sk); int err = 0; long timeo; - struct kcm_rx_msg *rxm; + struct strp_rx_msg *rxm; int copied = 0; struct sk_buff *skb; @@ -1425,7 +1124,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, /* Okay, have a message on the receive queue */ - rxm = kcm_rx_msg(skb); + rxm = strp_rx_msg(skb); if (len > rxm->full_len) len = rxm->full_len; @@ -1481,7 +1180,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); long timeo; - struct kcm_rx_msg *rxm; + struct strp_rx_msg *rxm; int err = 0; ssize_t copied; struct sk_buff *skb; @@ -1498,7 +1197,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, /* Okay, have a message on the receive queue */ - rxm = kcm_rx_msg(skb); + rxm = strp_rx_msg(skb); if (len > rxm->full_len) len = rxm->full_len; @@ -1674,15 +1373,6 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) spin_unlock_bh(&mux->rx_lock); } -static void kcm_rx_msg_timeout(unsigned long arg) -{ - struct kcm_psock *psock = (struct kcm_psock *)arg; - - /* Message assembly timed out */ - KCM_STATS_INCR(psock->stats.rx_msg_timeouts); - kcm_abort_rx_psock(psock, ETIMEDOUT, NULL); -} - static int kcm_attach(struct socket *sock, struct socket *csock, struct bpf_prog *prog) { @@ -1692,6 +1382,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, struct kcm_psock *psock = NULL, *tpsock; struct list_head *head; int index = 0; + struct strp_callbacks cb; if (csock->ops->family != PF_INET && csock->ops->family != PF_INET6) @@ -1713,11 +1404,12 @@ static int kcm_attach(struct socket *sock, struct socket *csock, psock->sk = csk; psock->bpf_prog = prog; - setup_timer(&psock->rx_msg_timer, kcm_rx_msg_timeout, - (unsigned long)psock); + cb.rcv_msg = kcm_rcv_strparser; + cb.abort_parser = NULL; + cb.parse_msg = kcm_parse_func_strparser; + cb.read_sock_done = kcm_read_sock_done; - INIT_WORK(&psock->rx_work, psock_rx_work); - INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work); + strp_init(&psock->strp, csk, &cb); sock_hold(csk); @@ -1750,7 +1442,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, spin_unlock_bh(&mux->lock); /* Schedule RX work in case there are already bytes queued */ - queue_work(kcm_wq, &psock->rx_work); + strp_check_rcv(&psock->strp); return 0; } @@ -1785,6 +1477,7 @@ out: return err; } +/* Lower socket lock held */ static void kcm_unattach(struct kcm_psock *psock) { struct sock *csk = psock->sk; @@ -1798,7 +1491,7 @@ static void kcm_unattach(struct kcm_psock *psock) csk->sk_data_ready = psock->save_data_ready; csk->sk_write_space = psock->save_write_space; csk->sk_state_change = psock->save_state_change; - psock->rx_stopped = 1; + strp_stop(&psock->strp); if (WARN_ON(psock->rx_kcm)) { write_unlock_bh(&csk->sk_callback_lock); @@ -1821,18 +1514,14 @@ static void kcm_unattach(struct kcm_psock *psock) write_unlock_bh(&csk->sk_callback_lock); - del_timer_sync(&psock->rx_msg_timer); - cancel_work_sync(&psock->rx_work); - cancel_delayed_work_sync(&psock->rx_delayed_work); + strp_done(&psock->strp); bpf_prog_put(psock->bpf_prog); - kfree_skb(psock->rx_skb_head); - psock->rx_skb_head = NULL; - spin_lock_bh(&mux->lock); aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats); + save_strp_stats(&psock->strp, &mux->aggregate_strp_stats); KCM_STATS_INCR(mux->stats.psock_unattach); @@ -1915,6 +1604,7 @@ static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) spin_unlock_bh(&mux->lock); + /* Lower socket lock should already be held */ kcm_unattach(psock); err = 0; @@ -2059,8 +1749,11 @@ static void release_mux(struct kcm_mux *mux) /* Release psocks */ list_for_each_entry_safe(psock, tmp_psock, &mux->psocks, psock_list) { - if (!WARN_ON(psock->unattaching)) + if (!WARN_ON(psock->unattaching)) { + lock_sock(psock->strp.sk); kcm_unattach(psock); + release_sock(psock->strp.sk); + } } if (WARN_ON(mux->psocks_cnt)) @@ -2072,6 +1765,8 @@ static void release_mux(struct kcm_mux *mux) aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats); aggregate_psock_stats(&mux->aggregate_psock_stats, &knet->aggregate_psock_stats); + aggregate_strp_stats(&mux->aggregate_strp_stats, + &knet->aggregate_strp_stats); list_del_rcu(&mux->kcm_mux_list); knet->count--; mutex_unlock(&knet->mutex); @@ -2151,6 +1846,13 @@ static int kcm_release(struct socket *sock) * it will just return. */ __skb_queue_purge(&sk->sk_write_queue); + + /* Set tx_stopped. This is checked when psock is bound to a kcm and we + * get a writespace callback. This prevents further work being queued + * from the callback (unbinding the psock occurs after canceling work. + */ + kcm->tx_stopped = 1; + release_sock(sk); spin_lock_bh(&mux->lock); -- cgit v1.2.3 From f6a66927692e30bdc1792e7a1fc2107d4dfcf42d Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 17 Aug 2016 13:36:11 +0300 Subject: flow_dissector: Get vlan priority in addition to vlan id Add vlan priority check to the flow dissector by adding new flow dissector struct, flow_dissector_key_vlan which includes vlan tag fields. vlan_id and flow_label fields were under the same struct (flow_dissector_key_tags). It was a convenient setting since struct flow_dissector_key_tags is used by struct flow_keys and by setting vlan_id and flow_label under the same struct, we get precisely 24 or 48 bytes in flow_keys from flow_dissector_key_basic. Now, when adding vlan priority support, the code will be cleaner if flow_label and vlan tag won't be under the same struct anymore. Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 1 + include/net/flow_dissector.h | 12 +++++++++--- net/core/flow_dissector.c | 25 ++++++++++++++++--------- 3 files changed, 26 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index a5f6ce6b578c..49d4aef1f789 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -81,6 +81,7 @@ static inline bool is_vlan_dev(const struct net_device *dev) #define skb_vlan_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) #define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) +#define skb_vlan_tag_get_prio(__skb) ((__skb)->vlan_tci & VLAN_PRIO_MASK) /** * struct vlan_pcpu_stats - VLAN percpu rx/tx stats diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d3d60dccd19f..f266b512c3bd 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -32,8 +32,13 @@ struct flow_dissector_key_basic { }; struct flow_dissector_key_tags { - u32 vlan_id:12, - flow_label:20; + u32 flow_label; +}; + +struct flow_dissector_key_vlan { + u16 vlan_id:12, + vlan_priority:3; + u16 padding; }; struct flow_dissector_key_keyid { @@ -119,7 +124,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ - FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */ + FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ @@ -148,6 +153,7 @@ struct flow_keys { #define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic; struct flow_dissector_key_tags tags; + struct flow_dissector_key_vlan vlan; struct flow_dissector_key_keyid keyid; struct flow_dissector_key_ports ports; struct flow_dissector_key_addrs addrs; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 362d693c003f..a2879c0f6c4c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -118,6 +118,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_tags *key_tags; + struct flow_dissector_key_vlan *key_vlan; struct flow_dissector_key_keyid *key_keyid; bool skip_vlan = false; u8 ip_proto = 0; @@ -266,16 +267,22 @@ ipv6: skip_vlan = true; if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_VLANID)) { - key_tags = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_VLANID, + FLOW_DISSECTOR_KEY_VLAN)) { + key_vlan = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_VLAN, target_container); - if (skb_vlan_tag_present(skb)) - key_tags->vlan_id = skb_vlan_tag_get_id(skb); - else - key_tags->vlan_id = ntohs(vlan->h_vlan_TCI) & + if (skb_vlan_tag_present(skb)) { + key_vlan->vlan_id = skb_vlan_tag_get_id(skb); + key_vlan->vlan_priority = + (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT); + } else { + key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) & VLAN_VID_MASK; + key_vlan->vlan_priority = + (ntohs(vlan->h_vlan_TCI) & + VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + } } goto again; @@ -935,8 +942,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { .offset = offsetof(struct flow_keys, ports), }, { - .key_id = FLOW_DISSECTOR_KEY_VLANID, - .offset = offsetof(struct flow_keys, tags), + .key_id = FLOW_DISSECTOR_KEY_VLAN, + .offset = offsetof(struct flow_keys, vlan), }, { .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, -- cgit v1.2.3 From 956af37102b515512331a03c35c958b2a1d8dd87 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Wed, 17 Aug 2016 13:36:14 +0300 Subject: net_sched: act_vlan: Add priority option The current vlan push action supports only vid and protocol options. Add priority option. Example script that adds vlan push action with vid and priority: tc filter add dev veth0 protocol ip parent ffff: \ flower \ indev veth0 \ action vlan push id 100 priority 5 Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/tc_act/tc_vlan.h | 1 + include/uapi/linux/tc_act/tc_vlan.h | 1 + net/sched/act_vlan.c | 13 +++++++++++-- 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index e29f52e8bdf1..6b835889ea30 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -20,6 +20,7 @@ struct tcf_vlan { int tcfv_action; u16 tcfv_push_vid; __be16 tcfv_push_proto; + u8 tcfv_push_prio; }; #define to_vlan(a) ((struct tcf_vlan *)a) diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index 31151ff6264f..be72b6e3843b 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -29,6 +29,7 @@ enum { TCA_VLAN_PUSH_VLAN_ID, TCA_VLAN_PUSH_VLAN_PROTOCOL, TCA_VLAN_PAD, + TCA_VLAN_PUSH_VLAN_PRIORITY, __TCA_VLAN_MAX, }; #define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 691409de3e1a..59a8d3150ae2 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -43,7 +43,8 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, goto drop; break; case TCA_VLAN_ACT_PUSH: - err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid); + err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid | + (v->tcfv_push_prio << VLAN_PRIO_SHIFT)); if (err) goto drop; break; @@ -65,6 +66,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { [TCA_VLAN_PARMS] = { .len = sizeof(struct tc_vlan) }, [TCA_VLAN_PUSH_VLAN_ID] = { .type = NLA_U16 }, [TCA_VLAN_PUSH_VLAN_PROTOCOL] = { .type = NLA_U16 }, + [TCA_VLAN_PUSH_VLAN_PRIORITY] = { .type = NLA_U8 }, }; static int tcf_vlan_init(struct net *net, struct nlattr *nla, @@ -78,6 +80,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, int action; __be16 push_vid = 0; __be16 push_proto = 0; + u8 push_prio = 0; bool exists = false; int ret = 0, err; @@ -123,6 +126,9 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, } else { push_proto = htons(ETH_P_8021Q); } + + if (tb[TCA_VLAN_PUSH_VLAN_PRIORITY]) + push_prio = nla_get_u8(tb[TCA_VLAN_PUSH_VLAN_PRIORITY]); break; default: if (exists) @@ -150,6 +156,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, v->tcfv_action = action; v->tcfv_push_vid = push_vid; + v->tcfv_push_prio = push_prio; v->tcfv_push_proto = push_proto; v->tcf_action = parm->action; @@ -181,7 +188,9 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, if (v->tcfv_action == TCA_VLAN_ACT_PUSH && (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, - v->tcfv_push_proto))) + v->tcfv_push_proto) || + (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY, + v->tcfv_push_prio)))) goto nla_put_failure; tcf_tm_dump(&t, &v->tcf_tm); -- cgit v1.2.3 From 54fd9c2dff144ed287ab3b8189dcdcd4d298d0cc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 18 Aug 2016 01:00:41 +0200 Subject: bpf: get rid of cgroup helper related ifdefs As recently discussed during the task_under_cgroup_hierarchy() addition, we should get rid of the ifdefs surrounding the bpf_skb_under_cgroup() helper. If related functionality is not built-in, the helper cannot be used anyway, which is also in line with what we do for all other helpers. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++++++++ net/core/filter.c | 6 +----- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index ff5be7e8ddea..2aab9b63bf16 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1114,6 +1114,16 @@ static inline bool sk_stream_is_writeable(const struct sock *sk) sk_stream_memory_free(sk); } +static inline int sk_under_cgroup_hierarchy(struct sock *sk, + struct cgroup *ancestor) +{ +#ifdef CONFIG_SOCK_CGROUP_DATA + return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), + ancestor); +#else + return -ENOTSUPP; +#endif +} static inline bool sk_has_memory_pressure(const struct sock *sk) { diff --git a/net/core/filter.c b/net/core/filter.c index 3b60dfd2ce92..a83766be1ad2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2374,7 +2374,6 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) } } -#ifdef CONFIG_SOCK_CGROUP_DATA static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *)(long)r1; @@ -2395,7 +2394,7 @@ static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) if (unlikely(!cgrp)) return -EAGAIN; - return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp); + return sk_under_cgroup_hierarchy(sk, cgrp); } static const struct bpf_func_proto bpf_skb_under_cgroup_proto = { @@ -2406,7 +2405,6 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = { .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; -#endif static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, unsigned long off, unsigned long len) @@ -2515,10 +2513,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_event_output_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; -#ifdef CONFIG_SOCK_CGROUP_DATA case BPF_FUNC_skb_under_cgroup: return &bpf_skb_under_cgroup_proto; -#endif default: return sk_filter_func_proto(func_id); } -- cgit v1.2.3 From ea825e70d0e0798eda3a57b05c90f21f5a369128 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 18 Aug 2016 15:30:12 -0700 Subject: net: dsa: Export suspend/resume functions In preparation for allowing switch drivers to implement system-wide suspend/resume functions, export dsa_switch_suspend and dsa_switch_resume() such that these are callable from the appropriate driver specific suspend/resume functions. Reviewed-by: Andrew Lunn Tested-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 14 ++++++++++++++ net/dsa/dsa.c | 6 ++++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 2217a3f817f8..d00c392bc9f8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -386,4 +386,18 @@ static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds, struct device_node *np); +#ifdef CONFIG_PM_SLEEP +int dsa_switch_suspend(struct dsa_switch *ds); +int dsa_switch_resume(struct dsa_switch *ds); +#else +static inline int dsa_switch_suspend(struct dsa_switch *ds) +{ + return 0; +} +static inline int dsa_switch_resume(struct dsa_switch *ds) +{ + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + #endif diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 7e68bc6bc853..9f5b47200365 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -543,7 +543,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) } #ifdef CONFIG_PM_SLEEP -static int dsa_switch_suspend(struct dsa_switch *ds) +int dsa_switch_suspend(struct dsa_switch *ds) { int i, ret = 0; @@ -562,8 +562,9 @@ static int dsa_switch_suspend(struct dsa_switch *ds) return ret; } +EXPORT_SYMBOL_GPL(dsa_switch_suspend); -static int dsa_switch_resume(struct dsa_switch *ds) +int dsa_switch_resume(struct dsa_switch *ds) { int i, ret = 0; @@ -585,6 +586,7 @@ static int dsa_switch_resume(struct dsa_switch *ds) return 0; } +EXPORT_SYMBOL_GPL(dsa_switch_resume); #endif /* platform driver init and cleanup *****************************************/ -- cgit v1.2.3 From b9a24bb76bf611a5268ceffe04219e6ad264559b Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 19 Aug 2016 12:36:54 -0700 Subject: net_sched: properly handle failure case of tcf_exts_init() After commit 22dc13c837c3 ("net_sched: convert tcf_exts from list to pointer array") we do dynamic allocation in tcf_exts_init(), therefore we need to handle the ENOMEM case properly. Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 6 ++-- net/sched/cls_basic.c | 12 +++++-- net/sched/cls_bpf.c | 27 +++++++++------ net/sched/cls_cgroup.c | 13 +++++-- net/sched/cls_flow.c | 26 ++++++++------ net/sched/cls_flower.c | 11 ++++-- net/sched/cls_fw.c | 18 +++++++--- net/sched/cls_route.c | 14 +++++--- net/sched/cls_rsvp.h | 17 +++++++--- net/sched/cls_tcindex.c | 90 +++++++++++++++++++++++++++++++++++-------------- net/sched/cls_u32.c | 21 ++++++++---- 11 files changed, 181 insertions(+), 74 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index c99508d426cc..a459be5fe1c2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -69,17 +69,19 @@ struct tcf_exts { int police; }; -static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police) +static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) { #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); - WARN_ON(!exts->actions); /* TODO: propagate the error to callers */ + if (!exts->actions) + return -ENOMEM; #endif exts->action = action; exts->police = police; + return 0; } /** diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 0b8c3ace671f..eb219b78cd49 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -138,10 +138,12 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct tcf_exts e; struct tcf_ematch_tree t; - tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t); if (err < 0) @@ -189,7 +191,10 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (!fnew) return -ENOBUFS; - tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE); + err = tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE); + if (err < 0) + goto errout; + err = -EINVAL; if (handle) { fnew->handle = handle; @@ -226,6 +231,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index c3002c2c68bb..4742f415ee5b 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -311,17 +311,19 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); - ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); + ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); if (ret < 0) return ret; + ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); + if (ret < 0) + goto errout; if (tb[TCA_BPF_FLAGS]) { u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]); if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) { - tcf_exts_destroy(&exts); - return -EINVAL; + ret = -EINVAL; + goto errout; } have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; @@ -331,10 +333,8 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : cls_bpf_prog_from_efd(tb, prog, tp); - if (ret < 0) { - tcf_exts_destroy(&exts); - return ret; - } + if (ret < 0) + goto errout; if (tb[TCA_BPF_CLASSID]) { prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]); @@ -343,6 +343,10 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, tcf_exts_change(tp, &prog->exts, &exts); return 0; + +errout: + tcf_exts_destroy(&exts); + return ret; } static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, @@ -388,7 +392,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (!prog) return -ENOBUFS; - tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE); + ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE); + if (ret < 0) + goto errout; if (oldprog) { if (handle && oldprog->handle != handle) { @@ -420,9 +426,10 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, *arg = (unsigned long) prog; return 0; + errout: + tcf_exts_destroy(&prog->exts); kfree(prog); - return ret; } diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 4c85bd3a750c..85233c470035 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -93,7 +93,9 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (!new) return -ENOBUFS; - tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); + err = tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); + if (err < 0) + goto errout; new->handle = handle; new->tp = tp; err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], @@ -101,10 +103,14 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); if (err < 0) goto errout; + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + if (err < 0) { + tcf_exts_destroy(&e); + goto errout; + } err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t); if (err < 0) { @@ -120,6 +126,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, call_rcu(&head->rcu, cls_cgroup_destroy_rcu); return 0; errout: + tcf_exts_destroy(&new->exts); kfree(new); return err; } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index fbfec6a18839..2c1ae549edbf 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -418,10 +418,12 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, return -EOPNOTSUPP; } - tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); + err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); + if (err < 0) + goto err1; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) - return err; + goto err1; err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t); if (err < 0) @@ -432,13 +434,15 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (!fnew) goto err2; - tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + if (err < 0) + goto err3; fold = (struct flow_filter *)*arg; if (fold) { err = -EINVAL; if (fold->handle != handle && handle) - goto err2; + goto err3; /* Copy fold into fnew */ fnew->tp = fold->tp; @@ -458,31 +462,31 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) - goto err2; + goto err3; if (mode == FLOW_MODE_HASH) perturb_period = fold->perturb_period; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) - goto err2; + goto err3; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } } else { err = -EINVAL; if (!handle) - goto err2; + goto err3; if (!tb[TCA_FLOW_KEYS]) - goto err2; + goto err3; mode = FLOW_MODE_MAP; if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) - goto err2; + goto err3; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) - goto err2; + goto err3; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } @@ -542,6 +546,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, call_rcu(&fold->rcu, flow_destroy_filter); return 0; +err3: + tcf_exts_destroy(&fnew->exts); err2: tcf_em_tree_destroy(&t); kfree(fnew); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1e11e57e6947..532ab6751343 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -513,10 +513,12 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct tcf_exts e; int err; - tcf_exts_init(&e, TCA_FLOWER_ACT, 0); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; if (tb[TCA_FLOWER_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); @@ -585,7 +587,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!fnew) return -ENOBUFS; - tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); + err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); + if (err < 0) + goto errout; if (!handle) { handle = fl_grab_new_handle(tp, head); @@ -649,6 +653,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; } diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index f23a3b68bba6..cc0bda945800 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -195,10 +195,12 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, u32 mask; int err; - tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + if (err < 0) + goto errout; if (tb[TCA_FW_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); @@ -270,10 +272,15 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, #endif /* CONFIG_NET_CLS_IND */ fnew->tp = f->tp; - tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE); + err = tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE); + if (err < 0) { + kfree(fnew); + return err; + } err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr); if (err < 0) { + tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; } @@ -313,7 +320,9 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) return -ENOBUFS; - tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE); + err = tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE); + if (err < 0) + goto errout; f->id = handle; f->tp = tp; @@ -328,6 +337,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + tcf_exts_destroy(&f->exts); kfree(f); return err; } diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 08a3b0a6f5ab..c91e65d81a48 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -383,17 +383,19 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *est, int new, bool ovr) { - int err; u32 id = 0, to = 0, nhandle = 0x8000; struct route4_filter *fp; unsigned int h1; struct route4_bucket *b; struct tcf_exts e; + int err; - tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; err = -EINVAL; if (tb[TCA_ROUTE4_TO]) { @@ -503,7 +505,10 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (!f) goto errout; - tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); + err = tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); + if (err < 0) + goto errout; + if (fold) { f->id = fold->id; f->iif = fold->iif; @@ -557,6 +562,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + tcf_exts_destroy(&f->exts); kfree(f); return err; } diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index f9c9fc075fe6..4f05a19fb073 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -487,10 +487,12 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + if (err < 0) + goto errout2; f = (struct rsvp_filter *)*arg; if (f) { @@ -506,7 +508,11 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, goto errout2; } - tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); + err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); + if (err < 0) { + kfree(n); + goto errout2; + } if (tb[TCA_RSVP_CLASSID]) { n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); @@ -530,7 +536,9 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) goto errout2; - tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); + err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); + if (err < 0) + goto errout; h2 = 16; if (tb[TCA_RSVP_SRC]) { memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src)); @@ -627,6 +635,7 @@ insert: goto insert; errout: + tcf_exts_destroy(&f->exts); kfree(f); errout2: tcf_exts_destroy(&e); diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 944c8ff45055..d9500709831f 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -219,10 +219,10 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = { [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 }, }; -static void tcindex_filter_result_init(struct tcindex_filter_result *r) +static int tcindex_filter_result_init(struct tcindex_filter_result *r) { memset(r, 0, sizeof(*r)); - tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } static void __tcindex_partial_destroy(struct rcu_head *head) @@ -233,23 +233,57 @@ static void __tcindex_partial_destroy(struct rcu_head *head) kfree(p); } +static void tcindex_free_perfect_hash(struct tcindex_data *cp) +{ + int i; + + for (i = 0; i < cp->hash; i++) + tcf_exts_destroy(&cp->perfect[i].exts); + kfree(cp->perfect); +} + +static int tcindex_alloc_perfect_hash(struct tcindex_data *cp) +{ + int i, err = 0; + + cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result), + GFP_KERNEL); + if (!cp->perfect) + return -ENOMEM; + + for (i = 0; i < cp->hash; i++) { + err = tcf_exts_init(&cp->perfect[i].exts, + TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + if (err < 0) + goto errout; + } + + return 0; + +errout: + tcindex_free_perfect_hash(cp); + return err; +} + static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, struct tcindex_filter_result *r, struct nlattr **tb, struct nlattr *est, bool ovr) { - int err, balloc = 0; struct tcindex_filter_result new_filter_result, *old_r = r; struct tcindex_filter_result cr; - struct tcindex_data *cp, *oldp; + struct tcindex_data *cp = NULL, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ + int err, balloc = 0; struct tcf_exts e; - tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; err = -ENOMEM; /* tcindex_data attributes must look atomic to classifier/lookup so @@ -270,19 +304,20 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (p->perfect) { int i; - cp->perfect = kmemdup(p->perfect, - sizeof(*r) * cp->hash, GFP_KERNEL); - if (!cp->perfect) + if (tcindex_alloc_perfect_hash(cp) < 0) goto errout; for (i = 0; i < cp->hash; i++) - tcf_exts_init(&cp->perfect[i].exts, - TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + cp->perfect[i].res = p->perfect[i].res; balloc = 1; } cp->h = p->h; - tcindex_filter_result_init(&new_filter_result); - tcindex_filter_result_init(&cr); + err = tcindex_filter_result_init(&new_filter_result); + if (err < 0) + goto errout1; + err = tcindex_filter_result_init(&cr); + if (err < 0) + goto errout1; if (old_r) cr.res = r->res; @@ -338,15 +373,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = -ENOMEM; if (!cp->perfect && !cp->h) { if (valid_perfect_hash(cp)) { - int i; - - cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL); - if (!cp->perfect) + if (tcindex_alloc_perfect_hash(cp) < 0) goto errout_alloc; - for (i = 0; i < cp->hash; i++) - tcf_exts_init(&cp->perfect[i].exts, - TCA_TCINDEX_ACT, - TCA_TCINDEX_POLICE); balloc = 1; } else { struct tcindex_filter __rcu **hash; @@ -373,8 +401,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (!f) goto errout_alloc; f->key = handle; - tcindex_filter_result_init(&f->result); f->next = NULL; + err = tcindex_filter_result_init(&f->result); + if (err < 0) { + kfree(f); + goto errout_alloc; + } } if (tb[TCA_TCINDEX_CLASSID]) { @@ -387,8 +419,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, else tcf_exts_change(tp, &cr.exts, &e); - if (old_r && old_r != r) - tcindex_filter_result_init(old_r); + if (old_r && old_r != r) { + err = tcindex_filter_result_init(old_r); + if (err < 0) { + kfree(f); + goto errout_alloc; + } + } oldp = p; r->res = cr.res; @@ -415,9 +452,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, errout_alloc: if (balloc == 1) - kfree(cp->perfect); + tcindex_free_perfect_hash(cp); else if (balloc == 2) kfree(cp->h); +errout1: + tcf_exts_destroy(&cr.exts); + tcf_exts_destroy(&new_filter_result.exts); errout: kfree(cp); tcf_exts_destroy(&e); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index ffe593efe930..a29263a9d8c1 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -709,13 +709,15 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, struct tc_u_knode *n, struct nlattr **tb, struct nlattr *est, bool ovr) { - int err; struct tcf_exts e; + int err; - tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; err = -EINVAL; if (tb[TCA_U32_LINK]) { @@ -833,7 +835,10 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, new->tp = tp; memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); - tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE); + if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) { + kfree(new); + return NULL; + } return new; } @@ -985,9 +990,12 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; n->flags = flags; - tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); n->tp = tp; + err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); + if (err < 0) + goto errout; + #ifdef CONFIG_CLS_U32_MARK n->pcpu_success = alloc_percpu(u32); if (!n->pcpu_success) { @@ -1028,9 +1036,10 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, errhw: #ifdef CONFIG_CLS_U32_MARK free_percpu(n->pcpu_success); -errout: #endif +errout: + tcf_exts_destroy(&n->exts); #ifdef CONFIG_CLS_U32_PERF free_percpu(n->pf); #endif -- cgit v1.2.3 From 7b314362a2344feaafbdf6aa8f3d57077728e37a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 22 Aug 2016 16:01:01 +0200 Subject: net: dsa: Allow the DSA driver to indicate the tag protocol DSA drivers may drive different families of switches which need different tag protocol. Rather than hard code the tag protocol in the driver structure, have a callback for the DSA core to call. Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 7 ++++++- drivers/net/dsa/bcm_sf2.c | 7 ++++++- drivers/net/dsa/mv88e6060.c | 7 ++++++- drivers/net/dsa/mv88e6xxx/chip.c | 7 ++++++- include/net/dsa.h | 5 +++-- net/dsa/dsa.c | 5 ++++- net/dsa/dsa2.c | 4 +++- 7 files changed, 34 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 38ee10de7884..65ecb51f99e5 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1373,8 +1373,13 @@ static void b53_br_set_stp_state(struct dsa_switch *ds, int port, b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg); } +static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) +{ + return DSA_TAG_PROTO_NONE; +} + static struct dsa_switch_driver b53_switch_ops = { - .tag_protocol = DSA_TAG_PROTO_NONE, + .get_tag_protocol = b53_get_tag_protocol, .setup = b53_setup, .set_addr = b53_set_addr, .get_strings = b53_get_strings, diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 8e6fe13dbec3..b47a74b37a42 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -136,6 +136,11 @@ static int bcm_sf2_sw_get_sset_count(struct dsa_switch *ds) return BCM_SF2_STATS_SIZE; } +static enum dsa_tag_protocol bcm_sf2_sw_get_tag_protocol(struct dsa_switch *ds) +{ + return DSA_TAG_PROTO_BRCM; +} + static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) { struct bcm_sf2_priv *priv = ds_to_priv(ds); @@ -1577,8 +1582,8 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) } static struct dsa_switch_driver bcm_sf2_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_BRCM, .setup = bcm_sf2_sw_setup, + .get_tag_protocol = bcm_sf2_sw_get_tag_protocol, .set_addr = bcm_sf2_sw_set_addr, .get_phy_flags = bcm_sf2_sw_get_phy_flags, .get_strings = bcm_sf2_sw_get_strings, diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index e36b40886bd8..1fdfbf3a50bc 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -69,6 +69,11 @@ static const char *mv88e6060_get_name(struct mii_bus *bus, int sw_addr) return NULL; } +static enum dsa_tag_protocol mv88e6060_get_tag_protocol(struct dsa_switch *ds) +{ + return DSA_TAG_PROTO_TRAILER; +} + static const char *mv88e6060_drv_probe(struct device *dsa_dev, struct device *host_dev, int sw_addr, void **_priv) @@ -248,7 +253,7 @@ mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) } static struct dsa_switch_driver mv88e6060_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_TRAILER, + .get_tag_protocol = mv88e6060_get_tag_protocol, .probe = mv88e6060_drv_probe, .setup = mv88e6060_setup, .set_addr = mv88e6060_set_addr, diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 014b52bd72f1..63cad6c00bc7 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3924,6 +3924,11 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, return 0; } +static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds) +{ + return DSA_TAG_PROTO_EDSA; +} + static const char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device *host_dev, int sw_addr, void **priv) @@ -3967,8 +3972,8 @@ free: } static struct dsa_switch_driver mv88e6xxx_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_EDSA, .probe = mv88e6xxx_drv_probe, + .get_tag_protocol = mv88e6xxx_get_tag_protocol, .setup = mv88e6xxx_setup, .set_addr = mv88e6xxx_set_addr, .adjust_link = mv88e6xxx_adjust_link, diff --git a/include/net/dsa.h b/include/net/dsa.h index d00c392bc9f8..8ca2684c5358 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -239,14 +239,15 @@ struct switchdev_obj_port_vlan; struct dsa_switch_driver { struct list_head list; - enum dsa_tag_protocol tag_protocol; - /* * Probing and setup. */ const char *(*probe)(struct device *dsa_dev, struct device *host_dev, int sw_addr, void **priv); + + enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds); + int (*setup)(struct dsa_switch *ds); int (*set_addr)(struct dsa_switch *ds, u8 *addr); u32 (*get_phy_flags)(struct dsa_switch *ds, int port); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 8bda74e595a5..8d3a28d4e99d 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -354,7 +354,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * switch. */ if (dst->cpu_switch == index) { - dst->tag_ops = dsa_resolve_tag_protocol(drv->tag_protocol); + enum dsa_tag_protocol tag_protocol; + + tag_protocol = drv->get_tag_protocol(ds); + dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { ret = PTR_ERR(dst->tag_ops); goto out; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index f30bad9678f0..2e343221464c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -443,6 +443,7 @@ static int dsa_cpu_parse(struct device_node *port, u32 index, struct dsa_switch_tree *dst, struct dsa_switch *ds) { + enum dsa_tag_protocol tag_protocol; struct net_device *ethernet_dev; struct device_node *ethernet; @@ -465,7 +466,8 @@ static int dsa_cpu_parse(struct device_node *port, u32 index, dst->cpu_port = index; } - dst->tag_ops = dsa_resolve_tag_protocol(ds->drv->tag_protocol); + tag_protocol = ds->drv->get_tag_protocol(ds); + dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { dev_warn(ds->dev, "No tagger for this switch\n"); return PTR_ERR(dst->tag_ops); -- cgit v1.2.3 From cff6a334e63420e95ec40dc7fcdc0b8258615760 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 23 Aug 2016 11:55:30 -0700 Subject: strparser: Queue work when being unpaused When the upper layer unpauses a stream parser connection we need to queue rx_work to make sure no events are missed. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/strparser.h | 5 +---- net/strparser/strparser.c | 11 +++++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/strparser.h b/include/net/strparser.h index fdb3d6746cc4..91fa0b958426 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -88,10 +88,7 @@ static inline void strp_pause(struct strparser *strp) } /* May be called without holding lock for attached socket */ -static inline void strp_unpause(struct strparser *strp) -{ - strp->rx_paused = 0; -} +void strp_unpause(struct strparser *strp); static inline void save_strp_stats(struct strparser *strp, struct strp_aggr_stats *agg_stats) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 68334b56db1e..4ecfc10cbe6d 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -445,6 +445,17 @@ int strp_init(struct strparser *strp, struct sock *csk, } EXPORT_SYMBOL_GPL(strp_init); +void strp_unpause(struct strparser *strp) +{ + strp->rx_paused = 0; + + /* Sync setting rx_paused with RX work */ + smp_mb(); + + queue_work(strp_wq, &strp->rx_work); +} +EXPORT_SYMBOL_GPL(strp_unpause); + /* strp must already be stopped so that strp_tcp_recv will no longer be called. * Note that strp_done is not called with the lower socket held. */ -- cgit v1.2.3 From cebc5cbab48f5c58512e26aa1965284354227258 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 22 Aug 2016 17:17:54 -0700 Subject: net-tcp: retire TFO_SERVER_WO_SOCKOPT2 config TFO_SERVER_WO_SOCKOPT2 was intended for debugging purposes during Fast Open development. Remove this config option and also update/clean-up the documentation of the Fast Open sysctl. Reported-by: Piotr Jurkiewicz Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 45 +++++++++++++++++----------------- include/net/tcp.h | 3 +-- net/ipv4/af_inet.c | 21 ++++++---------- 3 files changed, 32 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 9ae929395b24..3db8c67d2c8d 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -575,32 +575,33 @@ tcp_syncookies - BOOLEAN unconditionally generation of syncookies. tcp_fastopen - INTEGER - Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data - in the opening SYN packet. To use this feature, the client application - must use sendmsg() or sendto() with MSG_FASTOPEN flag rather than - connect() to perform a TCP handshake automatically. + Enable TCP Fast Open (RFC7413) to send and accept data in the opening + SYN packet. - The values (bitmap) are - 1: Enables sending data in the opening SYN on the client w/ MSG_FASTOPEN. - 2: Enables TCP Fast Open on the server side, i.e., allowing data in - a SYN packet to be accepted and passed to the application before - 3-way hand shake finishes. - 4: Send data in the opening SYN regardless of cookie availability and - without a cookie option. - 0x100: Accept SYN data w/o validating the cookie. - 0x200: Accept data-in-SYN w/o any cookie option present. - 0x400/0x800: Enable Fast Open on all listeners regardless of the - TCP_FASTOPEN socket option. The two different flags designate two - different ways of setting max_qlen without the TCP_FASTOPEN socket - option. + The client support is enabled by flag 0x1 (on by default). The client + then must use sendmsg() or sendto() with the MSG_FASTOPEN flag, + rather than connect() to send data in SYN. - Default: 1 + The server support is enabled by flag 0x2 (off by default). Then + either enable for all listeners with another flag (0x400) or + enable individual listeners via TCP_FASTOPEN socket option with + the option value being the length of the syn-data backlog. - Note that the client & server side Fast Open flags (1 and 2 - respectively) must be also enabled before the rest of flags can take - effect. + The values (bitmap) are + 0x1: (client) enables sending data in the opening SYN on the client. + 0x2: (server) enables the server support, i.e., allowing data in + a SYN packet to be accepted and passed to the + application before 3-way handshake finishes. + 0x4: (client) send data in the opening SYN regardless of cookie + availability and without a cookie option. + 0x200: (server) accept data-in-SYN w/o any cookie option present. + 0x400: (server) enable all listeners to support Fast Open by + default without explicit TCP_FASTOPEN socket option. + + Default: 0x1 - See include/net/tcp.h and the code for more details. + Note that that additional client or server features are only + effective if the basic support (0x1 and 0x2) are enabled respectively. tcp_syn_retries - INTEGER Number of times initial SYNs for an active TCP connection attempt diff --git a/include/net/tcp.h b/include/net/tcp.h index c00e7d51bb18..25d64f6de69e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -227,10 +227,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TFO_SERVER_COOKIE_NOT_REQD 0x200 /* Force enable TFO on all listeners, i.e., not requiring the - * TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen. + * TCP_FASTOPEN socket option. */ #define TFO_SERVER_WO_SOCKOPT1 0x400 -#define TFO_SERVER_WO_SOCKOPT2 0x800 extern struct inet_timewait_death_row tcp_death_row; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 55513e654d79..989a362814a9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -211,24 +211,19 @@ int inet_listen(struct socket *sock, int backlog) * we can only allow the backlog to be adjusted. */ if (old_state != TCP_LISTEN) { - /* Check special setups for testing purpose to enable TFO w/o - * requiring TCP_FASTOPEN sockopt. + /* Enable TFO w/o requiring TCP_FASTOPEN socket option. * Note that only TCP sockets (SOCK_STREAM) will reach here. - * Also fastopenq may already been allocated because this - * socket was in TCP_LISTEN state previously but was - * shutdown() (rather than close()). + * Also fastopen backlog may already been set via the option + * because the socket was in TCP_LISTEN state previously but + * was shutdown() rather than close(). */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 && + if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && + (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { - if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0) - fastopen_queue_tune(sk, backlog); - else if ((sysctl_tcp_fastopen & - TFO_SERVER_WO_SOCKOPT2) != 0) - fastopen_queue_tune(sk, - ((uint)sysctl_tcp_fastopen) >> 16); - + fastopen_queue_tune(sk, backlog); tcp_fastopen_init_key_once(true); } + err = inet_csk_listen_start(sk, backlog); if (err) goto out; -- cgit v1.2.3 From 5d77dca82839ef016a93ad7acd7058b14d967752 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 23 Aug 2016 21:06:33 -0700 Subject: net: diag: support SOCK_DESTROY for UDP sockets This implements SOCK_DESTROY for UDP sockets similar to what was done for TCP with commit c1e64e298b8ca ("net: diag: Support destroying TCP sockets.") A process with a UDP socket targeted for destroy is awakened and recvmsg fails with ECONNABORTED. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/udp.h | 1 + net/ipv4/udp.c | 15 ++++++++++ net/ipv4/udp_diag.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/udp.c | 1 + 4 files changed, 96 insertions(+) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index 8894d7144189..ea53a87d880f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -251,6 +251,7 @@ int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); void udp_err(struct sk_buff *, u32); +int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8f5f7f6026f7..e9ffc27b23d0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2193,6 +2193,20 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) } EXPORT_SYMBOL(udp_poll); +int udp_abort(struct sock *sk, int err) +{ + lock_sock(sk); + + sk->sk_err = err; + sk->sk_error_report(sk); + udp_disconnect(sk, 0); + + release_sock(sk); + + return 0; +} +EXPORT_SYMBOL_GPL(udp_abort); + struct proto udp_prot = { .name = "UDP", .owner = THIS_MODULE, @@ -2224,6 +2238,7 @@ struct proto udp_prot = { .compat_getsockopt = compat_udp_getsockopt, #endif .clear_sk = sk_prot_clear_portaddr_nulls, + .diag_destroy = udp_abort, }; EXPORT_SYMBOL(udp_prot); diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 3d5ccf4b1412..8a9f6e535caa 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -165,12 +165,88 @@ static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, r->idiag_wqueue = sk_wmem_alloc_get(sk); } +#ifdef CONFIG_INET_DIAG_DESTROY +static int __udp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req, + struct udp_table *tbl) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk; + int err; + + rcu_read_lock(); + + if (req->sdiag_family == AF_INET) + sk = __udp4_lib_lookup(net, + req->id.idiag_dst[0], req->id.idiag_dport, + req->id.idiag_src[0], req->id.idiag_sport, + req->id.idiag_if, tbl, NULL); +#if IS_ENABLED(CONFIG_IPV6) + else if (req->sdiag_family == AF_INET6) { + if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && + ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) + sk = __udp4_lib_lookup(net, + req->id.idiag_dst[0], req->id.idiag_dport, + req->id.idiag_src[0], req->id.idiag_sport, + req->id.idiag_if, tbl, NULL); + + else + sk = __udp6_lib_lookup(net, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if, tbl, NULL); + } +#endif + else { + rcu_read_unlock(); + return -EINVAL; + } + + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + + rcu_read_unlock(); + + if (!sk) + return -ENOENT; + + if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { + sock_put(sk); + return -ENOENT; + } + + err = sock_diag_destroy(sk, ECONNABORTED); + + sock_put(sk); + + return err; +} + +static int udp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) +{ + return __udp_diag_destroy(in_skb, req, &udp_table); +} + +static int udplite_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) +{ + return __udp_diag_destroy(in_skb, req, &udplite_table); +} + +#endif + static const struct inet_diag_handler udp_diag_handler = { .dump = udp_diag_dump, .dump_one = udp_diag_dump_one, .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDP, .idiag_info_size = 0, +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = udp_diag_destroy, +#endif }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, @@ -192,6 +268,9 @@ static const struct inet_diag_handler udplite_diag_handler = { .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDPLITE, .idiag_info_size = 0, +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = udplite_diag_destroy, +#endif }; static int __init udp_diag_init(void) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 81e2f98b958d..16512cf06e73 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1467,6 +1467,7 @@ struct proto udpv6_prot = { .compat_getsockopt = compat_udpv6_getsockopt, #endif .clear_sk = udp_v6_clear_sk, + .diag_destroy = udp_abort, }; static struct inet_protosw udpv6_protosw = { -- cgit v1.2.3 From 4cac8204661a6d1a842e47911933f1e90b392c84 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2016 11:39:27 -0700 Subject: udp: get rid of sk_prot_clear_portaddr_nulls() Since we no longer use SLAB_DESTROY_BY_RCU for UDP, we do not need sk_prot_clear_portaddr_nulls() helper. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 -- net/core/sock.c | 18 ------------------ net/ipv4/udp.c | 1 - net/ipv4/udplite.c | 1 - net/ipv6/udplite.c | 1 - 5 files changed, 23 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 2aab9b63bf16..1bc57609f8e1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1242,8 +1242,6 @@ static inline int __sk_prot_rehash(struct sock *sk) return sk->sk_prot->hash(sk); } -void sk_prot_clear_portaddr_nulls(struct sock *sk, int size); - /* About 10 seconds */ #define SOCK_DESTROY_TIME (10*HZ) diff --git a/net/core/sock.c b/net/core/sock.c index 25dab8b60223..2b09c2967e21 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1315,24 +1315,6 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) #endif } -void sk_prot_clear_portaddr_nulls(struct sock *sk, int size) -{ - unsigned long nulls1, nulls2; - - nulls1 = offsetof(struct sock, __sk_common.skc_node.next); - nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next); - if (nulls1 > nulls2) - swap(nulls1, nulls2); - - if (nulls1 != 0) - memset((char *)sk, 0, nulls1); - memset((char *)sk + nulls1 + sizeof(void *), 0, - nulls2 - nulls1 - sizeof(void *)); - memset((char *)sk + nulls2 + sizeof(void *), 0, - size - nulls2 - sizeof(void *)); -} -EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls); - static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, int family) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e9ffc27b23d0..f0ebb0bd1e11 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2237,7 +2237,6 @@ struct proto udp_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif - .clear_sk = sk_prot_clear_portaddr_nulls, .diag_destroy = udp_abort, }; EXPORT_SYMBOL(udp_prot); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 3b3efbda48e1..67fc9d96e67d 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -61,7 +61,6 @@ struct proto udplite_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif - .clear_sk = sk_prot_clear_portaddr_nulls, }; EXPORT_SYMBOL(udplite_prot); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 118057a5b759..5cf0099b86f7 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -56,7 +56,6 @@ struct proto udplitev6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif - .clear_sk = sk_prot_clear_portaddr_nulls, }; static struct inet_protosw udplite6_protosw = { -- cgit v1.2.3 From ba2489b0e0113f68a25fe7a563842c2b591829d7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2016 11:39:29 -0700 Subject: net: remove clear_sk() method We no longer use this handler, we can delete it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 1 - net/core/sock.c | 8 ++------ 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 1bc57609f8e1..c797c57f4d9f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1020,7 +1020,6 @@ struct proto { void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); - void (*clear_sk)(struct sock *sk, int size); /* Keeping track of sockets in use */ #ifdef CONFIG_PROC_FS diff --git a/net/core/sock.c b/net/core/sock.c index 2b09c2967e21..51a730485649 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1326,12 +1326,8 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); if (!sk) return sk; - if (priority & __GFP_ZERO) { - if (prot->clear_sk) - prot->clear_sk(sk, prot->obj_size); - else - sk_prot_clear_nulls(sk, prot->obj_size); - } + if (priority & __GFP_ZERO) + sk_prot_clear_nulls(sk, prot->obj_size); } else sk = kmalloc(prot->obj_size, priority); -- cgit v1.2.3 From 35db57bbc4b7ab810bba6e6d6954a0faf5a842cf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 23 Aug 2016 16:00:12 +0200 Subject: xfrm: state: remove per-netns gc task After commit 5b8ef3415a21f173 ("xfrm: Remove ancient sleeping when the SA is in acquire state") gc does not need any per-netns data anymore. As far as gc is concerned all state structs are the same, so we can use a global work struct for it. Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 2 -- net/xfrm/xfrm_state.c | 18 +++++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 177ed444d7b2..27bb9633c69d 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -44,8 +44,6 @@ struct netns_xfrm { unsigned int state_hmask; unsigned int state_num; struct work_struct state_hash_work; - struct hlist_head state_gc_list; - struct work_struct state_gc_work; struct list_head policy_all; struct hlist_head *policy_byidx; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1a15b658a79e..ba8bf518ba14 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -31,6 +31,8 @@ #define xfrm_state_deref_prot(table, net) \ rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) +static void xfrm_state_gc_task(struct work_struct *work); + /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -41,6 +43,9 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation); +static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); +static HLIST_HEAD(xfrm_state_gc_list); + static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) { return atomic_inc_not_zero(&x->refcnt); @@ -368,13 +373,12 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(struct work_struct *work) { - struct net *net = container_of(work, struct net, xfrm.state_gc_work); struct xfrm_state *x; struct hlist_node *tmp; struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - hlist_move_list(&net->xfrm.state_gc_list, &gc_list); + hlist_move_list(&xfrm_state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); synchronize_rcu(); @@ -515,14 +519,12 @@ EXPORT_SYMBOL(xfrm_state_alloc); void __xfrm_state_destroy(struct xfrm_state *x) { - struct net *net = xs_net(x); - WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->gclist, &net->xfrm.state_gc_list); + hlist_add_head(&x->gclist, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - schedule_work(&net->xfrm.state_gc_work); + schedule_work(&xfrm_state_gc_work); } EXPORT_SYMBOL(__xfrm_state_destroy); @@ -2134,8 +2136,6 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_num = 0; INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); - INIT_HLIST_HEAD(&net->xfrm.state_gc_list); - INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); spin_lock_init(&net->xfrm.xfrm_state_lock); return 0; @@ -2153,7 +2153,7 @@ void xfrm_state_fini(struct net *net) flush_work(&net->xfrm.state_hash_work); xfrm_state_flush(net, IPSEC_PROTO_ANY, false); - flush_work(&net->xfrm.state_gc_work); + flush_work(&xfrm_state_gc_work); WARN_ON(!list_empty(&net->xfrm.state_all)); -- cgit v1.2.3 From 9d490b4ee4d7d495a4f4908ea998d2a7355e0807 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 23 Aug 2016 12:38:56 -0400 Subject: net: dsa: rename switch operations structure Now that the dsa_switch_driver structure contains only function pointers as it is supposed to, rename it to the more appropriate dsa_switch_ops, uniformly to any other operations structure in the kernel. No functional changes here, basically just the result of something like: s/dsa_switch_driver *drv/dsa_switch_ops *ops/g However keep the {un,}register_switch_driver functions and their dsa_switch_drivers list as is, since they represent the -- likely to be deprecated soon -- legacy DSA registration framework. In the meantime, also fix the following checks from checkpatch.pl to make it happy with this patch: CHECK: Comparison to NULL could be written "!ops" #403: FILE: net/dsa/dsa.c:470: + if (ops == NULL) { CHECK: Comparison to NULL could be written "ds->ops->get_strings" #773: FILE: net/dsa/slave.c:697: + if (ds->ops->get_strings != NULL) CHECK: Comparison to NULL could be written "ds->ops->get_ethtool_stats" #824: FILE: net/dsa/slave.c:785: + if (ds->ops->get_ethtool_stats != NULL) CHECK: Comparison to NULL could be written "ds->ops->get_sset_count" #835: FILE: net/dsa/slave.c:798: + if (ds->ops->get_sset_count != NULL) total: 0 errors, 0 warnings, 4 checks, 784 lines checked Signed-off-by: Vivien Didelot Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.txt | 10 +-- drivers/net/dsa/b53/b53_common.c | 4 +- drivers/net/dsa/bcm_sf2.c | 4 +- drivers/net/dsa/mv88e6060.c | 6 +- drivers/net/dsa/mv88e6xxx/chip.c | 8 +- include/net/dsa.h | 10 +-- net/dsa/dsa.c | 70 ++++++++--------- net/dsa/dsa2.c | 16 ++-- net/dsa/slave.c | 146 +++++++++++++++++------------------ 9 files changed, 137 insertions(+), 137 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index 9d05ed7f7da5..44ed453ccf66 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -227,9 +227,9 @@ to address individual switches in the tree. dsa_switch: structure describing a switch device in the tree, referencing a dsa_switch_tree as a backpointer, slave network devices, master network device, -and a reference to the backing dsa_switch_driver +and a reference to the backing dsa_switch_ops -dsa_switch_driver: structure referencing function pointers, see below for a full +dsa_switch_ops: structure referencing function pointers, see below for a full description. Design limitations @@ -357,10 +357,10 @@ regular HWMON devices in /sys/class/hwmon/. Driver development ================== -DSA switch drivers need to implement a dsa_switch_driver structure which will +DSA switch drivers need to implement a dsa_switch_ops structure which will contain the various members described below. -register_switch_driver() registers this dsa_switch_driver in its internal list +register_switch_driver() registers this dsa_switch_ops in its internal list of drivers to probe for. unregister_switch_driver() does the exact opposite. Unless requested differently by setting the priv_size member accordingly, DSA @@ -379,7 +379,7 @@ Switch configuration buses, return a non-NULL string - setup: setup function for the switch, this function is responsible for setting - up the dsa_switch_driver private structure with all it needs: register maps, + up the dsa_switch_ops private structure with all it needs: register maps, interrupts, mutexes, locks etc.. This function is also expected to properly configure the switch to separate all network interfaces from each other, that is, they should be isolated by the switch hardware itself, typically by creating diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 65ecb51f99e5..6fb77cca78bb 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1378,7 +1378,7 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) return DSA_TAG_PROTO_NONE; } -static struct dsa_switch_driver b53_switch_ops = { +static struct dsa_switch_ops b53_switch_ops = { .get_tag_protocol = b53_get_tag_protocol, .setup = b53_setup, .set_addr = b53_set_addr, @@ -1618,7 +1618,7 @@ static int b53_switch_init(struct b53_device *dev) dev->vta_regs[1] = chip->vta_regs[1]; dev->vta_regs[2] = chip->vta_regs[2]; dev->jumbo_pm_reg = chip->jumbo_pm_reg; - ds->drv = &b53_switch_ops; + ds->ops = &b53_switch_ops; dev->cpu_port = chip->cpu_port; dev->num_vlans = chip->vlans; dev->num_arl_entries = chip->arl_entries; diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index b47a74b37a42..220e5d1948ca 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1581,7 +1581,7 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) return 0; } -static struct dsa_switch_driver bcm_sf2_switch_driver = { +static struct dsa_switch_ops bcm_sf2_switch_ops = { .setup = bcm_sf2_sw_setup, .get_tag_protocol = bcm_sf2_sw_get_tag_protocol, .set_addr = bcm_sf2_sw_set_addr, @@ -1632,7 +1632,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) priv = (struct bcm_sf2_priv *)(ds + 1); ds->priv = priv; ds->dev = &pdev->dev; - ds->drv = &bcm_sf2_switch_driver; + ds->ops = &bcm_sf2_switch_ops; dev_set_drvdata(&pdev->dev, ds); diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 1fdfbf3a50bc..7ff9d373a9ee 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -252,7 +252,7 @@ mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) return reg_write(ds, addr, regnum, val); } -static struct dsa_switch_driver mv88e6060_switch_driver = { +static struct dsa_switch_ops mv88e6060_switch_ops = { .get_tag_protocol = mv88e6060_get_tag_protocol, .probe = mv88e6060_drv_probe, .setup = mv88e6060_setup, @@ -263,14 +263,14 @@ static struct dsa_switch_driver mv88e6060_switch_driver = { static int __init mv88e6060_init(void) { - register_switch_driver(&mv88e6060_switch_driver); + register_switch_driver(&mv88e6060_switch_ops); return 0; } module_init(mv88e6060_init); static void __exit mv88e6060_cleanup(void) { - unregister_switch_driver(&mv88e6060_switch_driver); + unregister_switch_driver(&mv88e6060_switch_ops); } module_exit(mv88e6060_cleanup); diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 82d45165803c..750d01d775e0 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3976,7 +3976,7 @@ free: return NULL; } -static struct dsa_switch_driver mv88e6xxx_switch_driver = { +static struct dsa_switch_ops mv88e6xxx_switch_ops = { .probe = mv88e6xxx_drv_probe, .get_tag_protocol = mv88e6xxx_get_tag_protocol, .setup = mv88e6xxx_setup, @@ -4025,7 +4025,7 @@ static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip, ds->dev = dev; ds->priv = chip; - ds->drv = &mv88e6xxx_switch_driver; + ds->ops = &mv88e6xxx_switch_ops; dev_set_drvdata(dev, ds); @@ -4118,7 +4118,7 @@ static struct mdio_driver mv88e6xxx_driver = { static int __init mv88e6xxx_init(void) { - register_switch_driver(&mv88e6xxx_switch_driver); + register_switch_driver(&mv88e6xxx_switch_ops); return mdio_driver_register(&mv88e6xxx_driver); } module_init(mv88e6xxx_init); @@ -4126,7 +4126,7 @@ module_init(mv88e6xxx_init); static void __exit mv88e6xxx_cleanup(void) { mdio_driver_unregister(&mv88e6xxx_driver); - unregister_switch_driver(&mv88e6xxx_switch_driver); + unregister_switch_driver(&mv88e6xxx_switch_ops); } module_exit(mv88e6xxx_cleanup); diff --git a/include/net/dsa.h b/include/net/dsa.h index 8ca2684c5358..2ebeba44a461 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -165,9 +165,9 @@ struct dsa_switch { struct dsa_chip_data *cd; /* - * The used switch driver. + * The switch operations. */ - struct dsa_switch_driver *drv; + struct dsa_switch_ops *ops; /* * An array of which element [a] indicates which port on this @@ -236,7 +236,7 @@ struct switchdev_obj; struct switchdev_obj_port_fdb; struct switchdev_obj_port_vlan; -struct dsa_switch_driver { +struct dsa_switch_ops { struct list_head list; /* @@ -371,8 +371,8 @@ struct dsa_switch_driver { int (*cb)(struct switchdev_obj *obj)); }; -void register_switch_driver(struct dsa_switch_driver *type); -void unregister_switch_driver(struct dsa_switch_driver *type); +void register_switch_driver(struct dsa_switch_ops *type); +void unregister_switch_driver(struct dsa_switch_ops *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); static inline void *ds_to_priv(struct dsa_switch *ds) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 8d3a28d4e99d..d8d267e9a872 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -61,27 +61,27 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { static DEFINE_MUTEX(dsa_switch_drivers_mutex); static LIST_HEAD(dsa_switch_drivers); -void register_switch_driver(struct dsa_switch_driver *drv) +void register_switch_driver(struct dsa_switch_ops *ops) { mutex_lock(&dsa_switch_drivers_mutex); - list_add_tail(&drv->list, &dsa_switch_drivers); + list_add_tail(&ops->list, &dsa_switch_drivers); mutex_unlock(&dsa_switch_drivers_mutex); } EXPORT_SYMBOL_GPL(register_switch_driver); -void unregister_switch_driver(struct dsa_switch_driver *drv) +void unregister_switch_driver(struct dsa_switch_ops *ops) { mutex_lock(&dsa_switch_drivers_mutex); - list_del_init(&drv->list); + list_del_init(&ops->list); mutex_unlock(&dsa_switch_drivers_mutex); } EXPORT_SYMBOL_GPL(unregister_switch_driver); -static struct dsa_switch_driver * +static struct dsa_switch_ops * dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, const char **_name, void **priv) { - struct dsa_switch_driver *ret; + struct dsa_switch_ops *ret; struct list_head *list; const char *name; @@ -90,13 +90,13 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, mutex_lock(&dsa_switch_drivers_mutex); list_for_each(list, &dsa_switch_drivers) { - struct dsa_switch_driver *drv; + struct dsa_switch_ops *ops; - drv = list_entry(list, struct dsa_switch_driver, list); + ops = list_entry(list, struct dsa_switch_ops, list); - name = drv->probe(parent, host_dev, sw_addr, priv); + name = ops->probe(parent, host_dev, sw_addr, priv); if (name != NULL) { - ret = drv; + ret = ops; break; } } @@ -117,7 +117,7 @@ static ssize_t temp1_input_show(struct device *dev, struct dsa_switch *ds = dev_get_drvdata(dev); int temp, ret; - ret = ds->drv->get_temp(ds, &temp); + ret = ds->ops->get_temp(ds, &temp); if (ret < 0) return ret; @@ -131,7 +131,7 @@ static ssize_t temp1_max_show(struct device *dev, struct dsa_switch *ds = dev_get_drvdata(dev); int temp, ret; - ret = ds->drv->get_temp_limit(ds, &temp); + ret = ds->ops->get_temp_limit(ds, &temp); if (ret < 0) return ret; @@ -149,7 +149,7 @@ static ssize_t temp1_max_store(struct device *dev, if (ret < 0) return ret; - ret = ds->drv->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000)); + ret = ds->ops->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000)); if (ret < 0) return ret; @@ -164,7 +164,7 @@ static ssize_t temp1_max_alarm_show(struct device *dev, bool alarm; int ret; - ret = ds->drv->get_temp_alarm(ds, &alarm); + ret = ds->ops->get_temp_alarm(ds, &alarm); if (ret < 0) return ret; @@ -184,15 +184,15 @@ static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj, { struct device *dev = container_of(kobj, struct device, kobj); struct dsa_switch *ds = dev_get_drvdata(dev); - struct dsa_switch_driver *drv = ds->drv; + struct dsa_switch_ops *ops = ds->ops; umode_t mode = attr->mode; if (index == 1) { - if (!drv->get_temp_limit) + if (!ops->get_temp_limit) mode = 0; - else if (!drv->set_temp_limit) + else if (!ops->set_temp_limit) mode &= ~S_IWUSR; - } else if (index == 2 && !drv->get_temp_alarm) { + } else if (index == 2 && !ops->get_temp_alarm) { mode = 0; } return mode; @@ -228,8 +228,8 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, genphy_config_init(phydev); genphy_read_status(phydev); - if (ds->drv->adjust_link) - ds->drv->adjust_link(ds, port, phydev); + if (ds->ops->adjust_link) + ds->ops->adjust_link(ds, port, phydev); } return 0; @@ -303,7 +303,7 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds) static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) { - struct dsa_switch_driver *drv = ds->drv; + struct dsa_switch_ops *ops = ds->ops; struct dsa_switch_tree *dst = ds->dst; struct dsa_chip_data *cd = ds->cd; bool valid_name_found = false; @@ -356,7 +356,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) if (dst->cpu_switch == index) { enum dsa_tag_protocol tag_protocol; - tag_protocol = drv->get_tag_protocol(ds); + tag_protocol = ops->get_tag_protocol(ds); dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { ret = PTR_ERR(dst->tag_ops); @@ -371,15 +371,15 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) /* * Do basic register setup. */ - ret = drv->setup(ds); + ret = ops->setup(ds); if (ret < 0) goto out; - ret = drv->set_addr(ds, dst->master_netdev->dev_addr); + ret = ops->set_addr(ds, dst->master_netdev->dev_addr); if (ret < 0) goto out; - if (!ds->slave_mii_bus && drv->phy_read) { + if (!ds->slave_mii_bus && ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(parent); if (!ds->slave_mii_bus) { ret = -ENOMEM; @@ -426,7 +426,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * register with hardware monitoring subsystem. * Treat registration error as non-fatal and ignore it. */ - if (drv->get_temp) { + if (ops->get_temp) { const char *netname = netdev_name(dst->master_netdev); char hname[IFNAMSIZ + 1]; int i, j; @@ -457,7 +457,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, struct device *parent, struct device *host_dev) { struct dsa_chip_data *cd = dst->pd->chip + index; - struct dsa_switch_driver *drv; + struct dsa_switch_ops *ops; struct dsa_switch *ds; int ret; const char *name; @@ -466,8 +466,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, /* * Probe for switch model. */ - drv = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); - if (drv == NULL) { + ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); + if (!ops) { netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", index); return ERR_PTR(-EINVAL); @@ -486,7 +486,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, ds->dst = dst; ds->index = index; ds->cd = cd; - ds->drv = drv; + ds->ops = ops; ds->priv = priv; ds->dev = parent; @@ -541,7 +541,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) ds->dsa_port_mask |= ~(1 << port); } - if (ds->slave_mii_bus && ds->drv->phy_read) + if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); } @@ -560,8 +560,8 @@ int dsa_switch_suspend(struct dsa_switch *ds) return ret; } - if (ds->drv->suspend) - ret = ds->drv->suspend(ds); + if (ds->ops->suspend) + ret = ds->ops->suspend(ds); return ret; } @@ -571,8 +571,8 @@ int dsa_switch_resume(struct dsa_switch *ds) { int i, ret = 0; - if (ds->drv->resume) - ret = ds->drv->resume(ds); + if (ds->ops->resume) + ret = ds->ops->resume(ds); if (ret) return ret; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 2e343221464c..8278385dcd21 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -294,25 +294,25 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) int err; /* Initialize ds->phys_mii_mask before registering the slave MDIO bus - * driver and before drv->setup() has run, since the switch drivers and + * driver and before ops->setup() has run, since the switch drivers and * the slave MDIO bus driver rely on these values for probing PHY * devices or not */ ds->phys_mii_mask = ds->enabled_port_mask; - err = ds->drv->setup(ds); + err = ds->ops->setup(ds); if (err < 0) return err; - err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr); + err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); if (err < 0) return err; - err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr); + err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); if (err < 0) return err; - if (!ds->slave_mii_bus && ds->drv->phy_read) { + if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) return -ENOMEM; @@ -374,7 +374,7 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) dsa_user_port_unapply(port, index, ds); } - if (ds->slave_mii_bus && ds->drv->phy_read) + if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); } @@ -466,7 +466,7 @@ static int dsa_cpu_parse(struct device_node *port, u32 index, dst->cpu_port = index; } - tag_protocol = ds->drv->get_tag_protocol(ds); + tag_protocol = ds->ops->get_tag_protocol(ds); dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { dev_warn(ds->dev, "No tagger for this switch\n"); @@ -543,7 +543,7 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) ds->ports[reg].dn = port; - /* Initialize enabled_port_mask now for drv->setup() + /* Initialize enabled_port_mask now for ops->setup() * to have access to a correct value, just like what * net/dsa/dsa.c::dsa_switch_setup_one does. */ diff --git a/net/dsa/slave.c b/net/dsa/slave.c index fc9196745225..9f6c2a20f6ff 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -28,7 +28,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) struct dsa_switch *ds = bus->priv; if (ds->phys_mii_mask & (1 << addr)) - return ds->drv->phy_read(ds, addr, reg); + return ds->ops->phy_read(ds, addr, reg); return 0xffff; } @@ -38,7 +38,7 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) struct dsa_switch *ds = bus->priv; if (ds->phys_mii_mask & (1 << addr)) - return ds->drv->phy_write(ds, addr, reg, val); + return ds->ops->phy_write(ds, addr, reg, val); return 0; } @@ -98,14 +98,14 @@ static int dsa_slave_open(struct net_device *dev) goto clear_allmulti; } - if (ds->drv->port_enable) { - err = ds->drv->port_enable(ds, p->port, p->phy); + if (ds->ops->port_enable) { + err = ds->ops->port_enable(ds, p->port, p->phy); if (err) goto clear_promisc; } - if (ds->drv->port_stp_state_set) - ds->drv->port_stp_state_set(ds, p->port, stp_state); + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, p->port, stp_state); if (p->phy) phy_start(p->phy); @@ -144,11 +144,11 @@ static int dsa_slave_close(struct net_device *dev) if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) dev_uc_del(master, dev->dev_addr); - if (ds->drv->port_disable) - ds->drv->port_disable(ds, p->port, p->phy); + if (ds->ops->port_disable) + ds->ops->port_disable(ds, p->port, p->phy); - if (ds->drv->port_stp_state_set) - ds->drv->port_stp_state_set(ds, p->port, BR_STATE_DISABLED); + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, p->port, BR_STATE_DISABLED); return 0; } @@ -209,13 +209,13 @@ static int dsa_slave_port_vlan_add(struct net_device *dev, struct dsa_switch *ds = p->parent; if (switchdev_trans_ph_prepare(trans)) { - if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add) + if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) return -EOPNOTSUPP; - return ds->drv->port_vlan_prepare(ds, p->port, vlan, trans); + return ds->ops->port_vlan_prepare(ds, p->port, vlan, trans); } - ds->drv->port_vlan_add(ds, p->port, vlan, trans); + ds->ops->port_vlan_add(ds, p->port, vlan, trans); return 0; } @@ -226,10 +226,10 @@ static int dsa_slave_port_vlan_del(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (!ds->drv->port_vlan_del) + if (!ds->ops->port_vlan_del) return -EOPNOTSUPP; - return ds->drv->port_vlan_del(ds, p->port, vlan); + return ds->ops->port_vlan_del(ds, p->port, vlan); } static int dsa_slave_port_vlan_dump(struct net_device *dev, @@ -239,8 +239,8 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->port_vlan_dump) - return ds->drv->port_vlan_dump(ds, p->port, vlan, cb); + if (ds->ops->port_vlan_dump) + return ds->ops->port_vlan_dump(ds, p->port, vlan, cb); return -EOPNOTSUPP; } @@ -253,13 +253,13 @@ static int dsa_slave_port_fdb_add(struct net_device *dev, struct dsa_switch *ds = p->parent; if (switchdev_trans_ph_prepare(trans)) { - if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add) + if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add) return -EOPNOTSUPP; - return ds->drv->port_fdb_prepare(ds, p->port, fdb, trans); + return ds->ops->port_fdb_prepare(ds, p->port, fdb, trans); } - ds->drv->port_fdb_add(ds, p->port, fdb, trans); + ds->ops->port_fdb_add(ds, p->port, fdb, trans); return 0; } @@ -271,8 +271,8 @@ static int dsa_slave_port_fdb_del(struct net_device *dev, struct dsa_switch *ds = p->parent; int ret = -EOPNOTSUPP; - if (ds->drv->port_fdb_del) - ret = ds->drv->port_fdb_del(ds, p->port, fdb); + if (ds->ops->port_fdb_del) + ret = ds->ops->port_fdb_del(ds, p->port, fdb); return ret; } @@ -284,8 +284,8 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->port_fdb_dump) - return ds->drv->port_fdb_dump(ds, p->port, fdb, cb); + if (ds->ops->port_fdb_dump) + return ds->ops->port_fdb_dump(ds, p->port, fdb, cb); return -EOPNOTSUPP; } @@ -308,9 +308,9 @@ static int dsa_slave_stp_state_set(struct net_device *dev, struct dsa_switch *ds = p->parent; if (switchdev_trans_ph_prepare(trans)) - return ds->drv->port_stp_state_set ? 0 : -EOPNOTSUPP; + return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; - ds->drv->port_stp_state_set(ds, p->port, attr->u.stp_state); + ds->ops->port_stp_state_set(ds, p->port, attr->u.stp_state); return 0; } @@ -326,8 +326,8 @@ static int dsa_slave_vlan_filtering(struct net_device *dev, if (switchdev_trans_ph_prepare(trans)) return 0; - if (ds->drv->port_vlan_filtering) - return ds->drv->port_vlan_filtering(ds, p->port, + if (ds->ops->port_vlan_filtering) + return ds->ops->port_vlan_filtering(ds, p->port, attr->u.vlan_filtering); return 0; @@ -365,8 +365,8 @@ static int dsa_slave_ageing_time(struct net_device *dev, ds->ports[p->port].ageing_time = ageing_time; ageing_time = dsa_fastest_ageing_time(ds, ageing_time); - if (ds->drv->set_ageing_time) - return ds->drv->set_ageing_time(ds, ageing_time); + if (ds->ops->set_ageing_time) + return ds->ops->set_ageing_time(ds, ageing_time); return 0; } @@ -481,8 +481,8 @@ static int dsa_slave_bridge_port_join(struct net_device *dev, p->bridge_dev = br; - if (ds->drv->port_bridge_join) - ret = ds->drv->port_bridge_join(ds, p->port, br); + if (ds->ops->port_bridge_join) + ret = ds->ops->port_bridge_join(ds, p->port, br); return ret == -EOPNOTSUPP ? 0 : ret; } @@ -493,16 +493,16 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev) struct dsa_switch *ds = p->parent; - if (ds->drv->port_bridge_leave) - ds->drv->port_bridge_leave(ds, p->port); + if (ds->ops->port_bridge_leave) + ds->ops->port_bridge_leave(ds, p->port); p->bridge_dev = NULL; /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ - if (ds->drv->port_stp_state_set) - ds->drv->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING); + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING); } static int dsa_slave_port_attr_get(struct net_device *dev, @@ -605,8 +605,8 @@ static int dsa_slave_get_regs_len(struct net_device *dev) struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_regs_len) - return ds->drv->get_regs_len(ds, p->port); + if (ds->ops->get_regs_len) + return ds->ops->get_regs_len(ds, p->port); return -EOPNOTSUPP; } @@ -617,8 +617,8 @@ dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_regs) - ds->drv->get_regs(ds, p->port, regs, _p); + if (ds->ops->get_regs) + ds->ops->get_regs(ds, p->port, regs, _p); } static int dsa_slave_nway_reset(struct net_device *dev) @@ -651,8 +651,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev) if (ds->cd && ds->cd->eeprom_len) return ds->cd->eeprom_len; - if (ds->drv->get_eeprom_len) - return ds->drv->get_eeprom_len(ds); + if (ds->ops->get_eeprom_len) + return ds->ops->get_eeprom_len(ds); return 0; } @@ -663,8 +663,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_eeprom) - return ds->drv->get_eeprom(ds, eeprom, data); + if (ds->ops->get_eeprom) + return ds->ops->get_eeprom(ds, eeprom, data); return -EOPNOTSUPP; } @@ -675,8 +675,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->set_eeprom) - return ds->drv->set_eeprom(ds, eeprom, data); + if (ds->ops->set_eeprom) + return ds->ops->set_eeprom(ds, eeprom, data); return -EOPNOTSUPP; } @@ -694,8 +694,8 @@ static void dsa_slave_get_strings(struct net_device *dev, strncpy(data + len, "tx_bytes", len); strncpy(data + 2 * len, "rx_packets", len); strncpy(data + 3 * len, "rx_bytes", len); - if (ds->drv->get_strings != NULL) - ds->drv->get_strings(ds, p->port, data + 4 * len); + if (ds->ops->get_strings) + ds->ops->get_strings(ds, p->port, data + 4 * len); } } @@ -714,8 +714,8 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data); } - if (ds->drv->get_ethtool_stats) - ds->drv->get_ethtool_stats(ds, cpu_port, data + count); + if (ds->ops->get_ethtool_stats) + ds->ops->get_ethtool_stats(ds, cpu_port, data + count); } static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) @@ -727,8 +727,8 @@ static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) if (dst->master_ethtool_ops.get_sset_count) count += dst->master_ethtool_ops.get_sset_count(dev, sset); - if (sset == ETH_SS_STATS && ds->drv->get_sset_count) - count += ds->drv->get_sset_count(ds); + if (sset == ETH_SS_STATS && ds->ops->get_sset_count) + count += ds->ops->get_sset_count(ds); return count; } @@ -755,14 +755,14 @@ static void dsa_cpu_port_get_strings(struct net_device *dev, dst->master_ethtool_ops.get_strings(dev, stringset, data); } - if (stringset == ETH_SS_STATS && ds->drv->get_strings) { + if (stringset == ETH_SS_STATS && ds->ops->get_strings) { ndata = data + mcount * len; /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle * the output after to prepend our CPU port prefix we * constructed earlier */ - ds->drv->get_strings(ds, cpu_port, ndata); - count = ds->drv->get_sset_count(ds); + ds->ops->get_strings(ds, cpu_port, ndata); + count = ds->ops->get_sset_count(ds); for (i = 0; i < count; i++) { memmove(ndata + (i * len + sizeof(pfx)), ndata + i * len, len - sizeof(pfx)); @@ -782,8 +782,8 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, data[1] = dev->stats.tx_bytes; data[2] = dev->stats.rx_packets; data[3] = dev->stats.rx_bytes; - if (ds->drv->get_ethtool_stats != NULL) - ds->drv->get_ethtool_stats(ds, p->port, data + 4); + if (ds->ops->get_ethtool_stats) + ds->ops->get_ethtool_stats(ds, p->port, data + 4); } static int dsa_slave_get_sset_count(struct net_device *dev, int sset) @@ -795,8 +795,8 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) int count; count = 4; - if (ds->drv->get_sset_count != NULL) - count += ds->drv->get_sset_count(ds); + if (ds->ops->get_sset_count) + count += ds->ops->get_sset_count(ds); return count; } @@ -809,8 +809,8 @@ static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_wol) - ds->drv->get_wol(ds, p->port, w); + if (ds->ops->get_wol) + ds->ops->get_wol(ds, p->port, w); } static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) @@ -819,8 +819,8 @@ static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) struct dsa_switch *ds = p->parent; int ret = -EOPNOTSUPP; - if (ds->drv->set_wol) - ret = ds->drv->set_wol(ds, p->port, w); + if (ds->ops->set_wol) + ret = ds->ops->set_wol(ds, p->port, w); return ret; } @@ -831,10 +831,10 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) struct dsa_switch *ds = p->parent; int ret; - if (!ds->drv->set_eee) + if (!ds->ops->set_eee) return -EOPNOTSUPP; - ret = ds->drv->set_eee(ds, p->port, p->phy, e); + ret = ds->ops->set_eee(ds, p->port, p->phy, e); if (ret) return ret; @@ -850,10 +850,10 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) struct dsa_switch *ds = p->parent; int ret; - if (!ds->drv->get_eee) + if (!ds->ops->get_eee) return -EOPNOTSUPP; - ret = ds->drv->get_eee(ds, p->port, e); + ret = ds->ops->get_eee(ds, p->port, e); if (ret) return ret; @@ -988,8 +988,8 @@ static void dsa_slave_adjust_link(struct net_device *dev) p->old_pause = p->phy->pause; } - if (ds->drv->adjust_link && status_changed) - ds->drv->adjust_link(ds, p->port, p->phy); + if (ds->ops->adjust_link && status_changed) + ds->ops->adjust_link(ds, p->port, p->phy); if (status_changed) phy_print_status(p->phy); @@ -1004,8 +1004,8 @@ static int dsa_slave_fixed_link_update(struct net_device *dev, if (dev) { p = netdev_priv(dev); ds = p->parent; - if (ds->drv->fixed_link_update) - ds->drv->fixed_link_update(ds, p->port, status); + if (ds->ops->fixed_link_update) + ds->ops->fixed_link_update(ds, p->port, status); } return 0; @@ -1062,8 +1062,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, phy_dn = port_dn; } - if (ds->drv->get_phy_flags) - phy_flags = ds->drv->get_phy_flags(ds, p->port); + if (ds->ops->get_phy_flags) + phy_flags = ds->ops->get_phy_flags(ds, p->port); if (phy_dn) { int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn); -- cgit v1.2.3 From eb60a8ddf3c38959cc73821bec5335bed85e0200 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Aug 2016 10:23:34 -0700 Subject: net: minor optimization in qdisc_qstats_cpu_drop() per_cpu_inc() is faster (at least on x86) than per_cpu_ptr(xxx)++; Signed-off-by: Eric Dumazet Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 0d501779cc68..52a2015667b4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -592,7 +592,7 @@ static inline void qdisc_qstats_drop(struct Qdisc *sch) static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch) { - qstats_drop_inc(this_cpu_ptr(sch->cpu_qstats)); + this_cpu_inc(sch->cpu_qstats->drops); } static inline void qdisc_qstats_overlimit(struct Qdisc *sch) -- cgit v1.2.3 From c016c7e45ddfa5085b35b644e659ec014969740d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 24 Aug 2016 12:41:54 +0200 Subject: netfilter: nf_tables: honor NLM_F_EXCL flag in set element insertion If the NLM_F_EXCL flag is set, then new elements that clash with an existing one return EEXIST. In case you try to add an element whose data area differs from what we have, then this returns EBUSY. If no flag is specified at all, then this returns success to userspace. This patch also update the set insert operation so we can fetch the existing element that clashes with the one you want to add, we need this to make sure the element data doesn't differ. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 ++- net/netfilter/nf_tables_api.c | 20 +++++++++++++++----- net/netfilter/nft_set_hash.c | 17 +++++++++++++---- net/netfilter/nft_set_rbtree.c | 12 ++++++++---- 4 files changed, 38 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f2f13399ce44..8972468bc94b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -251,7 +251,8 @@ struct nft_set_ops { int (*insert)(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem); + const struct nft_set_elem *elem, + struct nft_set_ext **ext); void (*activate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 221d27f09623..bd9715e5ff26 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3483,12 +3483,12 @@ static int nft_setelem_parse_flags(const struct nft_set *set, } static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, - const struct nlattr *attr) + const struct nlattr *attr, u32 nlmsg_flags) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_data_desc d1, d2; struct nft_set_ext_tmpl tmpl; - struct nft_set_ext *ext; + struct nft_set_ext *ext, *ext2; struct nft_set_elem elem; struct nft_set_binding *binding; struct nft_userdata *udata; @@ -3615,9 +3615,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err4; ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; - err = set->ops->insert(ctx->net, set, &elem); - if (err < 0) + err = set->ops->insert(ctx->net, set, &elem, &ext2); + if (err) { + if (err == -EEXIST) { + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && + memcmp(nft_set_ext_data(ext), + nft_set_ext_data(ext2), set->dlen) != 0) + err = -EBUSY; + else if (!(nlmsg_flags & NLM_F_EXCL)) + err = 0; + } goto err5; + } nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -3673,7 +3683,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) return -ENFILE; - err = nft_add_set_elem(&ctx, set, attr); + err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags); if (err < 0) { atomic_dec(&set->nelems); break; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 564fa7929ed5..3794cb2fc788 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -126,7 +126,8 @@ err1: } static int nft_hash_insert(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + const struct nft_set_elem *elem, + struct nft_set_ext **ext) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he = elem->priv; @@ -135,9 +136,17 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, .set = set, .key = elem->key.val.data, }; - - return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params); + struct nft_hash_elem *prev; + + prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); + if (IS_ERR(prev)) + return PTR_ERR(prev); + if (prev) { + *ext = &prev->ext; + return -EEXIST; + } + return 0; } static void nft_hash_activate(const struct net *net, const struct nft_set *set, diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 6473936d05c6..038682d48261 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -94,7 +94,8 @@ out: } static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, - struct nft_rbtree_elem *new) + struct nft_rbtree_elem *new, + struct nft_set_ext **ext) { struct nft_rbtree *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); @@ -122,8 +123,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, else if (!nft_rbtree_interval_end(rbe) && nft_rbtree_interval_end(new)) p = &parent->rb_right; - else + else { + *ext = &rbe->ext; return -EEXIST; + } } } } @@ -133,13 +136,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, } static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + const struct nft_set_elem *elem, + struct nft_set_ext **ext) { struct nft_rbtree_elem *rbe = elem->priv; int err; spin_lock_bh(&nft_rbtree_lock); - err = __nft_rbtree_insert(net, set, rbe); + err = __nft_rbtree_insert(net, set, rbe, ext); spin_unlock_bh(&nft_rbtree_lock); return err; -- cgit v1.2.3 From 2a313cdf1e6e4cc8cc3f16f976e1abfbdd0626fa Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 25 Aug 2016 16:46:44 +0200 Subject: devlink: remove unused priv_size Remove unused and useless priv_size member from struct devlink_ops. Cc: Jiri Pirko Signed-off-by: Ivan Vecera Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/devlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index c99ffe8cef3c..211bd3c37028 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -50,7 +50,6 @@ struct devlink_sb_pool_info { }; struct devlink_ops { - size_t priv_size; int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); int (*port_split)(struct devlink *devlink, unsigned int port_index, -- cgit v1.2.3 From 6bc506b4fb065eac3d89ca1ce37082e174493d9e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 25 Aug 2016 18:42:37 +0200 Subject: bridge: switchdev: Add forward mark support for stacked devices switchdev_port_fwd_mark_set() is used to set the 'offload_fwd_mark' of port netdevs so that packets being flooded by the device won't be flooded twice. It works by assigning a unique identifier (the ifindex of the first bridge port) to bridge ports sharing the same parent ID. This prevents packets from being flooded twice by the same switch, but will flood packets through bridge ports belonging to a different switch. This method is problematic when stacked devices are taken into account, such as VLANs. In such cases, a physical port netdev can have upper devices being members in two different bridges, thus requiring two different 'offload_fwd_mark's to be configured on the port netdev, which is impossible. The main problem is that packet and netdev marking is performed at the physical netdev level, whereas flooding occurs between bridge ports, which are not necessarily port netdevs. Instead, packet and netdev marking should really be done in the bridge driver with the switch driver only telling it which packets it already forwarded. The bridge driver will mark such packets using the mark assigned to the ingress bridge port and will prevent the packet from being forwarded through any bridge port sharing the same mark (i.e. having the same parent ID). Remove the current switchdev 'offload_fwd_mark' implementation and instead implement the proposed method. In addition, make rocker - the sole user of the mark - use the proposed method. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- Documentation/networking/switchdev.txt | 13 ++--- drivers/net/ethernet/rocker/rocker_main.c | 2 +- drivers/net/ethernet/rocker/rocker_ofdpa.c | 4 -- include/linux/netdevice.h | 5 -- include/linux/skbuff.h | 13 ++--- include/net/switchdev.h | 6 --- net/bridge/Makefile | 2 + net/bridge/br_forward.c | 3 +- net/bridge/br_if.c | 10 ++-- net/bridge/br_input.c | 2 + net/bridge/br_private.h | 37 +++++++++++++ net/bridge/br_switchdev.c | 57 ++++++++++++++++++++ net/core/dev.c | 10 ---- net/switchdev/switchdev.c | 85 ------------------------------ 14 files changed, 117 insertions(+), 132 deletions(-) create mode 100644 net/bridge/br_switchdev.c (limited to 'include/net') diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 31c39115834d..44235e83799b 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -283,15 +283,10 @@ be sent to the port netdev for processing by the bridge driver. The bridge should not reflood the packet to the same ports the device flooded, otherwise there will be duplicate packets on the wire. -To avoid duplicate packets, the device/driver should mark a packet as already -forwarded using skb->offload_fwd_mark. The same mark is set on the device -ports in the domain using dev->offload_fwd_mark. If the skb->offload_fwd_mark -is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel -will drop the skb right before transmit on the egress port, with the -understanding that the device already forwarded the packet on same egress port. -The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark -for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and -a group ifindex. +To avoid duplicate packets, the switch driver should mark a packet as already +forwarded by setting the skb->offload_fwd_mark bit. The bridge driver will mark +the skb using the ingress bridge port's mark and prevent it from being forwarded +through any bridge port with the same mark. It is possible for the switch device to not handle flooding and push the packets up to the bridge driver for flooding. This is not ideal as the number diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index f0b09b05ed3f..1f0c08602eba 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2412,7 +2412,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker, skb->protocol = eth_type_trans(skb, rocker_port->dev); if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD) - skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark; + skb->offload_fwd_mark = 1; rocker_port->dev->stats.rx_packets++; rocker_port->dev->stats.rx_bytes += skb->len; diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 1ca796316173..fcad907baecf 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2558,7 +2558,6 @@ static int ofdpa_port_init(struct rocker_port *rocker_port) struct ofdpa_port *ofdpa_port = rocker_port->wpriv; int err; - switchdev_port_fwd_mark_set(ofdpa_port->dev, NULL, false); rocker_port_set_learning(rocker_port, !!(ofdpa_port->brport_flags & BR_LEARNING)); @@ -2817,7 +2816,6 @@ static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port, ofdpa_port_internal_vlan_id_get(ofdpa_port, bridge->ifindex); ofdpa_port->bridge_dev = bridge; - switchdev_port_fwd_mark_set(ofdpa_port->dev, bridge, true); return ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0); } @@ -2836,8 +2834,6 @@ static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port) ofdpa_port_internal_vlan_id_get(ofdpa_port, ofdpa_port->dev->ifindex); - switchdev_port_fwd_mark_set(ofdpa_port->dev, ofdpa_port->bridge_dev, - false); ofdpa_port->bridge_dev = NULL; err = ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 794bb0733799..d122be9345c7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1562,8 +1562,6 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * - * @offload_fwd_mark: Offload device fwding mark - * * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers @@ -1814,9 +1812,6 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct tcf_proto __rcu *egress_cl_list; #endif -#ifdef CONFIG_NET_SWITCHDEV - u32 offload_fwd_mark; -#endif /* These may be needed for future network-power-down code. */ struct timer_list watchdog_timer; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7047448e8129..cfb7219be665 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -612,7 +612,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking - * @offload_fwd_mark: fwding offload mark * @mark: Generic packet mark * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information @@ -730,7 +729,10 @@ struct sk_buff { __u8 ipvs_property:1; __u8 inner_protocol_type:1; __u8 remcsum_offload:1; - /* 3 or 5 bit hole */ +#ifdef CONFIG_NET_SWITCHDEV + __u8 offload_fwd_mark:1; +#endif + /* 2, 4 or 5 bit hole */ #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -757,14 +759,9 @@ struct sk_buff { unsigned int sender_cpu; }; #endif - union { #ifdef CONFIG_NETWORK_SECMARK - __u32 secmark; + __u32 secmark; #endif -#ifdef CONFIG_NET_SWITCHDEV - __u32 offload_fwd_mark; -#endif - }; union { __u32 mark; diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 62f6a967a1b7..82f5e0462021 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -347,12 +347,6 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, return idx; } -static inline void switchdev_port_fwd_mark_set(struct net_device *dev, - struct net_device *group_dev, - bool joining) -{ -} - static inline bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { diff --git a/net/bridge/Makefile b/net/bridge/Makefile index a1cda5d4718d..0aefc011b668 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -20,4 +20,6 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o +bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o + obj-$(CONFIG_NETFILTER) += netfilter/ diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 63a83d8d7da3..32a02de39cd2 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -29,7 +29,8 @@ static inline int should_deliver(const struct net_bridge_port *p, vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && - br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING; + br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING && + nbp_switchdev_allowed_egress(p, skb); } int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f2fede05d32c..1da3221845f1 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -545,6 +545,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err5; + err = nbp_switchdev_mark_set(p); + if (err) + goto err6; + dev_disable_lro(dev); list_add_rcu(&p->list, &br->port_list); @@ -566,7 +570,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) err = nbp_vlan_init(p); if (err) { netdev_err(dev, "failed to initialize vlan filtering on this port\n"); - goto err6; + goto err7; } spin_lock_bh(&br->lock); @@ -589,12 +593,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return 0; -err6: +err7: list_del_rcu(&p->list); br_fdb_delete_by_port(br, p, 0, 1); nbp_update_port_count(br); +err6: netdev_upper_dev_unlink(dev, br->dev); - err5: dev->priv_flags &= ~IFF_BRIDGE_PORT; netdev_rx_handler_unregister(dev); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 8e486203d133..3132cfc80e9d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -145,6 +145,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid)) goto out; + nbp_switchdev_frame_mark(p, skb); + /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; if (p->flags & BR_LEARNING) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index aac2a6e6b008..2379b2b865c9 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -251,6 +251,9 @@ struct net_bridge_port #ifdef CONFIG_BRIDGE_VLAN_FILTERING struct net_bridge_vlan_group __rcu *vlgrp; #endif +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif }; #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) @@ -359,6 +362,11 @@ struct net_bridge struct timer_list gc_timer; struct kobject *ifobj; u32 auto_cnt; + +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif + #ifdef CONFIG_BRIDGE_VLAN_FILTERING struct net_bridge_vlan_group __rcu *vlgrp; u8 vlan_enabled; @@ -381,6 +389,10 @@ struct br_input_skb_cb { #ifdef CONFIG_BRIDGE_VLAN_FILTERING bool vlan_filtered; #endif + +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif }; #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) @@ -1034,4 +1046,29 @@ static inline int br_sysfs_addbr(struct net_device *dev) { return 0; } static inline void br_sysfs_delbr(struct net_device *dev) { return; } #endif /* CONFIG_SYSFS */ +/* br_switchdev.c */ +#ifdef CONFIG_NET_SWITCHDEV +int nbp_switchdev_mark_set(struct net_bridge_port *p); +void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb); +bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb); +#else +static inline int nbp_switchdev_mark_set(struct net_bridge_port *p) +{ + return 0; +} + +static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb) +{ +} + +static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + return true; +} +#endif /* CONFIG_NET_SWITCHDEV */ + #endif diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c new file mode 100644 index 000000000000..f4097b900de1 --- /dev/null +++ b/net/bridge/br_switchdev.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include + +#include "br_private.h" + +static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) +{ + struct net_bridge_port *p; + + /* dev is yet to be added to the port list. */ + list_for_each_entry(p, &br->port_list, list) { + if (switchdev_port_same_parent_id(dev, p->dev)) + return p->offload_fwd_mark; + } + + return ++br->offload_fwd_mark; +} + +int nbp_switchdev_mark_set(struct net_bridge_port *p) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, + }; + int err; + + ASSERT_RTNL(); + + err = switchdev_port_attr_get(p->dev, &attr); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev); + + return 0; +} + +void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark)) + BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark; +} + +bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + return !skb->offload_fwd_mark || + BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark; +} diff --git a/net/core/dev.c b/net/core/dev.c index 7feae74ca928..1d5c6dda1988 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3355,16 +3355,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); -#ifdef CONFIG_NET_SWITCHDEV - /* Don't forward if offload device already forwarded */ - if (skb->offload_fwd_mark && - skb->offload_fwd_mark == dev->offload_fwd_mark) { - consume_skb(skb); - rc = NET_XMIT_SUCCESS; - goto out; - } -#endif - txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 2c683f24d557..1031a0327fff 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1305,88 +1305,3 @@ bool switchdev_port_same_parent_id(struct net_device *a, return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); } EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); - -static u32 switchdev_port_fwd_mark_get(struct net_device *dev, - struct net_device *group_dev) -{ - struct net_device *lower_dev; - struct list_head *iter; - - netdev_for_each_lower_dev(group_dev, lower_dev, iter) { - if (lower_dev == dev) - continue; - if (switchdev_port_same_parent_id(dev, lower_dev)) - return lower_dev->offload_fwd_mark; - return switchdev_port_fwd_mark_get(dev, lower_dev); - } - - return dev->ifindex; -} - -static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, - u32 old_mark, u32 *reset_mark) -{ - struct net_device *lower_dev; - struct list_head *iter; - - netdev_for_each_lower_dev(group_dev, lower_dev, iter) { - if (lower_dev->offload_fwd_mark == old_mark) { - if (!*reset_mark) - *reset_mark = lower_dev->ifindex; - lower_dev->offload_fwd_mark = *reset_mark; - } - switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); - } -} - -/** - * switchdev_port_fwd_mark_set - Set port offload forwarding mark - * - * @dev: port device - * @group_dev: containing device - * @joining: true if dev is joining group; false if leaving group - * - * An ungrouped port's offload mark is just its ifindex. A grouped - * port's (member of a bridge, for example) offload mark is the ifindex - * of one of the ports in the group with the same parent (switch) ID. - * Ports on the same device in the same group will have the same mark. - * - * Example: - * - * br0 ifindex=9 - * sw1p1 ifindex=2 mark=2 - * sw1p2 ifindex=3 mark=2 - * sw2p1 ifindex=4 mark=5 - * sw2p2 ifindex=5 mark=5 - * - * If sw2p2 leaves the bridge, we'll have: - * - * br0 ifindex=9 - * sw1p1 ifindex=2 mark=2 - * sw1p2 ifindex=3 mark=2 - * sw2p1 ifindex=4 mark=4 - * sw2p2 ifindex=5 mark=5 - */ -void switchdev_port_fwd_mark_set(struct net_device *dev, - struct net_device *group_dev, - bool joining) -{ - u32 mark = dev->ifindex; - u32 reset_mark = 0; - - if (group_dev) { - ASSERT_RTNL(); - if (joining) - mark = switchdev_port_fwd_mark_get(dev, group_dev); - else if (dev->offload_fwd_mark == mark) - /* Ohoh, this port was the mark reference port, - * but it's leaving the group, so reset the - * mark for the remaining ports in the group. - */ - switchdev_port_fwd_mark_reset(group_dev, mark, - &reset_mark); - } - - dev->offload_fwd_mark = mark; -} -EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set); -- cgit v1.2.3 From 0294b625ad5a6d1fb50632d67cf384862d8a4a46 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 28 Aug 2016 14:43:17 -0700 Subject: net: Add read_sock proto_op Add new function in proto_ops structure. This includes moving the typedef got sk_read_actor into net.h and removing the definition from tcp.h. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/net.h | 6 ++++++ include/net/tcp.h | 2 -- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/linux/net.h b/include/linux/net.h index b9f0ff4d489c..cd0c8bd0a1de 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -128,6 +129,9 @@ struct page; struct sockaddr; struct msghdr; struct module; +struct sk_buff; +typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, + unsigned int, size_t); struct proto_ops { int family; @@ -186,6 +190,8 @@ struct proto_ops { struct pipe_inode_info *pipe, size_t len, unsigned int flags); int (*set_peek_off)(struct sock *sk, int val); int (*peek_len)(struct socket *sock); + int (*read_sock)(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor); }; #define DECLARE_SOCKADDR(type, dst, src) \ diff --git a/include/net/tcp.h b/include/net/tcp.h index 25d64f6de69e..d56666ad9249 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -603,8 +603,6 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ -typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, - unsigned int, size_t); int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -- cgit v1.2.3 From 3203558589a597e0a10a66b258fbc5a4a6659ed0 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 28 Aug 2016 14:43:18 -0700 Subject: tcp: Set read_sock and peek_len proto_ops In inet_stream_ops we set read_sock to tcp_read_sock and peek_len to tcp_peek_len (which is just a stub function that calls tcp_inq). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/af_inet.c | 2 ++ net/ipv4/tcp.c | 6 ++++++ net/ipv6/af_inet6.c | 2 ++ 4 files changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index d56666ad9249..a5af6be3a572 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1848,6 +1848,8 @@ static inline int tcp_inq(struct sock *sk) return answ; } +int tcp_peek_len(struct socket *sock); + static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) { u16 segs_in; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 989a362814a9..e94b47be0019 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -916,6 +916,8 @@ const struct proto_ops inet_stream_ops = { .mmap = sock_no_mmap, .sendpage = inet_sendpage, .splice_read = tcp_splice_read, + .read_sock = tcp_read_sock, + .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1a9a0a8a1f3..60a438864f32 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1570,6 +1570,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, } EXPORT_SYMBOL(tcp_read_sock); +int tcp_peek_len(struct socket *sock) +{ + return tcp_inq(sock->sk); +} +EXPORT_SYMBOL(tcp_peek_len); + /* * This routine copies from a sock struct into the user buffer. * diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b454055ba625..46ad699937fd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -545,6 +545,8 @@ const struct proto_ops inet6_stream_ops = { .mmap = sock_no_mmap, .sendpage = inet_sendpage, .splice_read = tcp_splice_read, + .read_sock = tcp_read_sock, + .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, -- cgit v1.2.3 From 96a59083478d1ea66684c59c073424a9d4e6ac6d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 28 Aug 2016 14:43:19 -0700 Subject: kcm: Remove TCP specific references from kcm and strparser kcm and strparser need to work with any type of stream socket not just TCP. Eliminate references to TCP and call generic proto_ops functions of read_sock and peek_len. Also in strp_init check if the socket support the proto_ops read_sock and peek_len. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/strparser.h | 2 +- net/kcm/kcmsock.c | 30 +++++++++++++---------------- net/strparser/strparser.c | 48 ++++++++++++++++++++++++++++------------------- 3 files changed, 43 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/strparser.h b/include/net/strparser.h index 91fa0b958426..0c28ad97c52f 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -137,6 +137,6 @@ void strp_stop(struct strparser *strp); void strp_check_rcv(struct strparser *strp); int strp_init(struct strparser *strp, struct sock *csk, struct strp_callbacks *cb); -void strp_tcp_data_ready(struct strparser *strp); +void strp_data_ready(struct strparser *strp); #endif /* __NET_STRPARSER_H_ */ diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index eb731cacc325..2632ac748371 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -26,7 +26,6 @@ #include #include #include -#include #include unsigned int kcm_net_id; @@ -340,7 +339,7 @@ static void unreserve_rx_kcm(struct kcm_psock *psock, } /* Lower sock lock held */ -static void psock_tcp_data_ready(struct sock *sk) +static void psock_data_ready(struct sock *sk) { struct kcm_psock *psock; @@ -348,7 +347,7 @@ static void psock_tcp_data_ready(struct sock *sk) psock = (struct kcm_psock *)sk->sk_user_data; if (likely(psock)) - strp_tcp_data_ready(&psock->strp); + strp_data_ready(&psock->strp); read_unlock_bh(&sk->sk_callback_lock); } @@ -392,7 +391,7 @@ static int kcm_read_sock_done(struct strparser *strp, int err) return err; } -static void psock_tcp_state_change(struct sock *sk) +static void psock_state_change(struct sock *sk) { /* TCP only does a POLLIN for a half close. Do a POLLHUP here * since application will normally not poll with POLLIN @@ -402,7 +401,7 @@ static void psock_tcp_state_change(struct sock *sk) report_csk_error(sk, EPIPE); } -static void psock_tcp_write_space(struct sock *sk) +static void psock_write_space(struct sock *sk) { struct kcm_psock *psock; struct kcm_mux *mux; @@ -1383,19 +1382,12 @@ static int kcm_attach(struct socket *sock, struct socket *csock, struct list_head *head; int index = 0; struct strp_callbacks cb; - - if (csock->ops->family != PF_INET && - csock->ops->family != PF_INET6) - return -EINVAL; + int err; csk = csock->sk; if (!csk) return -EINVAL; - /* Only support TCP for now */ - if (csk->sk_protocol != IPPROTO_TCP) - return -EINVAL; - psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); if (!psock) return -ENOMEM; @@ -1409,7 +1401,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock, cb.parse_msg = kcm_parse_func_strparser; cb.read_sock_done = kcm_read_sock_done; - strp_init(&psock->strp, csk, &cb); + err = strp_init(&psock->strp, csk, &cb); + if (err) { + kmem_cache_free(kcm_psockp, psock); + return err; + } sock_hold(csk); @@ -1418,9 +1414,9 @@ static int kcm_attach(struct socket *sock, struct socket *csock, psock->save_write_space = csk->sk_write_space; psock->save_state_change = csk->sk_state_change; csk->sk_user_data = psock; - csk->sk_data_ready = psock_tcp_data_ready; - csk->sk_write_space = psock_tcp_write_space; - csk->sk_state_change = psock_tcp_state_change; + csk->sk_data_ready = psock_data_ready; + csk->sk_write_space = psock_write_space; + csk->sk_state_change = psock_state_change; write_unlock_bh(&csk->sk_callback_lock); /* Finished initialization, now add the psock to the MUX. */ diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 4ecfc10cbe6d..5c7549b5b92c 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -26,7 +26,6 @@ #include #include #include -#include static struct workqueue_struct *strp_wq; @@ -80,9 +79,16 @@ static void strp_parser_err(struct strparser *strp, int err, strp->cb.abort_parser(strp, err); } +static inline int strp_peek_len(struct strparser *strp) +{ + struct socket *sock = strp->sk->sk_socket; + + return sock->ops->peek_len(sock); +} + /* Lower socket lock held */ -static int strp_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, - unsigned int orig_offset, size_t orig_len) +static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len) { struct strparser *strp = (struct strparser *)desc->arg.data; struct _strp_rx_msg *rxm; @@ -266,12 +272,12 @@ static int strp_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, if (extra < 0) { /* Message not complete yet. */ if (rxm->strp.full_len - rxm->accum_len > - tcp_inq(strp->sk)) { + strp_peek_len(strp)) { /* Don't have the whole messages in the socket * buffer. Set strp->rx_need_bytes to wait for * the rest of the message. Also, set "early * eaten" since we've already buffered the skb - * but don't consume yet per tcp_read_sock. + * but don't consume yet per strp_read_sock. */ if (!rxm->accum_len) { @@ -329,16 +335,17 @@ static int default_read_sock_done(struct strparser *strp, int err) } /* Called with lock held on lower socket */ -static int strp_tcp_read_sock(struct strparser *strp) +static int strp_read_sock(struct strparser *strp) { + struct socket *sock = strp->sk->sk_socket; read_descriptor_t desc; desc.arg.data = strp; desc.error = 0; desc.count = 1; /* give more than one skb per call */ - /* sk should be locked here, so okay to do tcp_read_sock */ - tcp_read_sock(strp->sk, &desc, strp_tcp_recv); + /* sk should be locked here, so okay to do read_sock */ + sock->ops->read_sock(strp->sk, &desc, strp_recv); desc.error = strp->cb.read_sock_done(strp, desc.error); @@ -346,10 +353,8 @@ static int strp_tcp_read_sock(struct strparser *strp) } /* Lower sock lock held */ -void strp_tcp_data_ready(struct strparser *strp) +void strp_data_ready(struct strparser *strp) { - struct sock *csk = strp->sk; - if (unlikely(strp->rx_stopped)) return; @@ -360,7 +365,7 @@ void strp_tcp_data_ready(struct strparser *strp) * allows a thread in BH context to safely check if the process * lock is held. In this case, if the lock is held, queue work. */ - if (sock_owned_by_user(csk)) { + if (sock_owned_by_user(strp->sk)) { queue_work(strp_wq, &strp->rx_work); return; } @@ -369,24 +374,24 @@ void strp_tcp_data_ready(struct strparser *strp) return; if (strp->rx_need_bytes) { - if (tcp_inq(csk) >= strp->rx_need_bytes) + if (strp_peek_len(strp) >= strp->rx_need_bytes) strp->rx_need_bytes = 0; else return; } - if (strp_tcp_read_sock(strp) == -ENOMEM) + if (strp_read_sock(strp) == -ENOMEM) queue_work(strp_wq, &strp->rx_work); } -EXPORT_SYMBOL_GPL(strp_tcp_data_ready); +EXPORT_SYMBOL_GPL(strp_data_ready); static void do_strp_rx_work(struct strparser *strp) { read_descriptor_t rd_desc; struct sock *csk = strp->sk; - /* We need the read lock to synchronize with strp_tcp_data_ready. We - * need the socket lock for calling tcp_read_sock. + /* We need the read lock to synchronize with strp_data_ready. We + * need the socket lock for calling strp_read_sock. */ lock_sock(csk); @@ -398,7 +403,7 @@ static void do_strp_rx_work(struct strparser *strp) rd_desc.arg.data = strp; - if (strp_tcp_read_sock(strp) == -ENOMEM) + if (strp_read_sock(strp) == -ENOMEM) queue_work(strp_wq, &strp->rx_work); out: @@ -424,9 +429,14 @@ static void strp_rx_msg_timeout(unsigned long arg) int strp_init(struct strparser *strp, struct sock *csk, struct strp_callbacks *cb) { + struct socket *sock = csk->sk_socket; + if (!cb || !cb->rcv_msg || !cb->parse_msg) return -EINVAL; + if (!sock->ops->read_sock || !sock->ops->peek_len) + return -EAFNOSUPPORT; + memset(strp, 0, sizeof(*strp)); strp->sk = csk; @@ -456,7 +466,7 @@ void strp_unpause(struct strparser *strp) } EXPORT_SYMBOL_GPL(strp_unpause); -/* strp must already be stopped so that strp_tcp_recv will no longer be called. +/* strp must already be stopped so that strp_recv will no longer be called. * Note that strp_done is not called with the lower socket held. */ void strp_done(struct strparser *strp) -- cgit v1.2.3 From c9c3321257e1b95be9b375f811fb250162af8d39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 27 Aug 2016 07:37:54 -0700 Subject: tcp: add tcp_add_backlog() When TCP operates in lossy environments (between 1 and 10 % packet losses), many SACK blocks can be exchanged, and I noticed we could drop them on busy senders, if these SACK blocks have to be queued into the socket backlog. While the main cause is the poor performance of RACK/SACK processing, we can try to avoid these drops of valuable information that can lead to spurious timeouts and retransmits. Cause of the drops is the skb->truesize overestimation caused by : - drivers allocating ~2048 (or more) bytes as a fragment to hold an Ethernet frame. - various pskb_may_pull() calls bringing the headers into skb->head might have pulled all the frame content, but skb->truesize could not be lowered, as the stack has no idea of each fragment truesize. The backlog drops are also more visible on bidirectional flows, since their sk_rmem_alloc can be quite big. Let's add some room for the backlog, as only the socket owner can selectively take action to lower memory needs, like collapsing receive queues or partial ofo pruning. Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 33 +++++++++++++++++++++++++++++---- net/ipv6/tcp_ipv6.c | 5 +---- 3 files changed, 31 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index a5af6be3a572..dd99679e2e51 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1161,6 +1161,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) } bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); +bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad41e8ecf796..53e80cd004b6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1532,6 +1532,34 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_prequeue); +bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) +{ + u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf; + + /* Only socket owner can try to collapse/prune rx queues + * to reduce memory overhead, so add a little headroom here. + * Few sockets backlog are possibly concurrently non empty. + */ + limit += 64*1024; + + /* In case all data was pulled from skb frags (in __pskb_pull_tail()), + * we can fix skb->truesize to its real value to avoid future drops. + * This is valid because skb is not yet charged to the socket. + * It has been noticed pure SACK packets were sometimes dropped + * (if cooked by drivers without copybreak feature). + */ + if (!skb->data_len) + skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); + + if (unlikely(sk_add_backlog(sk, skb, limit))) { + bh_unlock_sock(sk); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); + return true; + } + return false; +} +EXPORT_SYMBOL(tcp_add_backlog); + /* * From tcp_input.c */ @@ -1662,10 +1690,7 @@ process: if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); - } else if (unlikely(sk_add_backlog(sk, skb, - sk->sk_rcvbuf + sk->sk_sndbuf))) { - bh_unlock_sock(sk); - __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP); + } else if (tcp_add_backlog(sk, skb)) { goto discard_and_relse; } bh_unlock_sock(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e4f55683af31..5bf460bd299f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1467,10 +1467,7 @@ process: if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); - } else if (unlikely(sk_add_backlog(sk, skb, - sk->sk_rcvbuf + sk->sk_sndbuf))) { - bh_unlock_sock(sk); - __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP); + } else if (tcp_add_backlog(sk, skb)) { goto discard_and_relse; } bh_unlock_sock(sk); -- cgit v1.2.3 From 616b14b46957b52dc7e1f3ec2210d3f9051b1178 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Aug 2016 15:33:30 +0200 Subject: netfilter: don't rely on DYING bit to detect when destroy event was sent The reliable event delivery mode currently (ab)uses the DYING bit to detect which entries on the dying list have to be skipped when re-delivering events from the eache worker in reliable event mode. Currently when we delete the conntrack from main table we only set this bit if we could also deliver the netlink destroy event to userspace. If we fail we move it to the dying list, the ecache worker will reattempt event delivery for all confirmed conntracks on the dying list that do not have the DYING bit set. Once timer is gone, we can no longer use if (del_timer()) to detect when we 'stole' the reference count owned by the timer/hash entry, so we need some other way to avoid racing with other cpu. Pablo suggested to add a marker in the ecache extension that skips entries that have been unhashed from main table but are still waiting for the last reference count to be dropped (e.g. because one skb waiting on nfqueue verdict still holds a reference). We do this by adding a tristate. If we fail to deliver the destroy event, make a note of this in the eache extension. The worker can then skip all entries that are in a different state. Either they never delivered a destroy event, e.g. because the netlink backend was not loaded, or redelivery took place already. Once the conntrack timer is removed we will now be able to replace del_timer() test with test_and_set_bit(DYING, &ct->status) to avoid racing with other cpu that tries to evict the same conntrack. Because DYING will then be set right before we report the destroy event we can no longer skip event reporting when dying bit is set. Suggested-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 17 ++++++++++++----- net/netfilter/nf_conntrack_ecache.c | 22 ++++++++++++++-------- 2 files changed, 26 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index fa36447371c6..12d967b58726 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -12,12 +12,19 @@ #include #include +enum nf_ct_ecache_state { + NFCT_ECACHE_UNKNOWN, /* destroy event not sent */ + NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */ + NFCT_ECACHE_DESTROY_SENT, /* sent destroy event after failure */ +}; + struct nf_conntrack_ecache { - unsigned long cache; /* bitops want long */ - unsigned long missed; /* missed events */ - u16 ctmask; /* bitmask of ct events to be delivered */ - u16 expmask; /* bitmask of expect events to be delivered */ - u32 portid; /* netlink portid of destroyer */ + unsigned long cache; /* bitops want long */ + unsigned long missed; /* missed events */ + u16 ctmask; /* bitmask of ct events to be delivered */ + u16 expmask; /* bitmask of expect events to be delivered */ + u32 portid; /* netlink portid of destroyer */ + enum nf_ct_ecache_state state; /* ecache state */ }; static inline struct nf_conntrack_ecache * diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index d28011b42845..da9df2d56e66 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -49,8 +49,13 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + struct nf_conntrack_ecache *e; - if (nf_ct_is_dying(ct)) + if (!nf_ct_is_confirmed(ct)) + continue; + + e = nf_ct_ecache_find(ct); + if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL) continue; if (nf_conntrack_event(IPCT_DESTROY, ct)) { @@ -58,8 +63,7 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) break; } - /* we've got the event delivered, now it's dying */ - set_bit(IPS_DYING_BIT, &ct->status); + e->state = NFCT_ECACHE_DESTROY_SENT; refs[evicted] = ct; if (++evicted >= ARRAY_SIZE(refs)) { @@ -130,7 +134,7 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, if (!e) goto out_unlock; - if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { + if (nf_ct_is_confirmed(ct)) { struct nf_ct_event item = { .ct = ct, .portid = e->portid ? e->portid : portid, @@ -150,11 +154,13 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, * triggered by a process, we store the PORTID * to include it in the retransmission. */ - if (eventmask & (1 << IPCT_DESTROY) && - e->portid == 0 && portid != 0) - e->portid = portid; - else + if (eventmask & (1 << IPCT_DESTROY)) { + if (e->portid == 0 && portid != 0) + e->portid = portid; + e->state = NFCT_ECACHE_DESTROY_FAIL; + } else { e->missed |= eventmask; + } } else { e->missed &= ~missed; } -- cgit v1.2.3 From f330a7fdbe1611104622faff7e614a246a7d20f0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Aug 2016 15:33:31 +0200 Subject: netfilter: conntrack: get rid of conntrack timer With stats enabled this eats 80 bytes on x86_64 per nf_conn entry, as Eric Dumazet pointed out during netfilter workshop 2016. Eric also says: "Another reason was the fact that Thomas was about to change max timer range [..]" (500462a9de657f8, 'timers: Switch to a non-cascading wheel'). Remove the timer and use a 32bit jiffies value containing timestamp until entry is valid. During conntrack lookup, even before doing tuple comparision, check the timeout value and evict the entry in case it is too old. The dying bit is used as a synchronization point to avoid races where multiple cpus try to evict the same entry. Because lookup is always lockless, we need to bump the refcnt once when we evict, else we could try to evict already-dead entry that is being recycled. This is the standard/expected way when conntrack entries are destroyed. Followup patches will introduce garbage colliction via work queue and further places where we can reap obsoleted entries (e.g. during netlink dumps), this is needed to avoid expired conntracks from hanging around for too long when lookup rate is low after a busy period. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 23 +++++++-- net/netfilter/nf_conntrack_core.c | 91 ++++++++++++++++++++---------------- net/netfilter/nf_conntrack_netlink.c | 14 ++---- net/netfilter/nf_conntrack_pptp.c | 3 +- net/netfilter/nf_nat_core.c | 6 --- 5 files changed, 74 insertions(+), 63 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 2a127480d4cc..7277751128e8 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -42,7 +42,6 @@ union nf_conntrack_expect_proto { #include #include -#include #ifdef CONFIG_NETFILTER_DEBUG #define NF_CT_ASSERT(x) WARN_ON(!(x)) @@ -73,7 +72,7 @@ struct nf_conn_help { #include struct nf_conn { - /* Usage count in here is 1 for hash table/destruct timer, 1 per skb, + /* Usage count in here is 1 for hash table, 1 per skb, * plus 1 for any connection(s) we are `master' for * * Hint, SKB address this struct and refcnt via skb->nfct and @@ -96,8 +95,8 @@ struct nf_conn { /* Have we seen traffic both ways yet? (bitset) */ unsigned long status; - /* Timer function; drops refcnt when it goes off. */ - struct timer_list timeout; + /* jiffies32 when this ct is considered dead */ + u32 timeout; possible_net_t ct_net; @@ -291,14 +290,28 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; } +#define nfct_time_stamp ((u32)(jiffies)) + /* jiffies until ct expires, 0 if already expired */ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { - long timeout = (long)ct->timeout.expires - (long)jiffies; + s32 timeout = ct->timeout - nfct_time_stamp; return timeout > 0 ? timeout : 0; } +static inline bool nf_ct_is_expired(const struct nf_conn *ct) +{ + return (__s32)(ct->timeout - nfct_time_stamp) <= 0; +} + +/* use after obtaining a reference count */ +static inline bool nf_ct_should_gc(const struct nf_conn *ct) +{ + return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) && + !nf_ct_is_dying(ct); +} + struct kernel_param; int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 887926aefc72..87ee6dad777c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -371,7 +371,6 @@ destroy_conntrack(struct nf_conntrack *nfct) pr_debug("destroy_conntrack(%p)\n", ct); NF_CT_ASSERT(atomic_read(&nfct->use) == 0); - NF_CT_ASSERT(!timer_pending(&ct->timeout)); if (unlikely(nf_ct_is_template(ct))) { nf_ct_tmpl_free(ct); @@ -434,35 +433,30 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) { struct nf_conn_tstamp *tstamp; + if (test_and_set_bit(IPS_DYING_BIT, &ct->status)) + return false; + tstamp = nf_conn_tstamp_find(ct); if (tstamp && tstamp->stop == 0) tstamp->stop = ktime_get_real_ns(); - if (nf_ct_is_dying(ct)) - goto delete; - if (nf_conntrack_event_report(IPCT_DESTROY, ct, portid, report) < 0) { - /* destroy event was not delivered */ + /* destroy event was not delivered. nf_ct_put will + * be done by event cache worker on redelivery. + */ nf_ct_delete_from_lists(ct); nf_conntrack_ecache_delayed_work(nf_ct_net(ct)); return false; } nf_conntrack_ecache_work(nf_ct_net(ct)); - set_bit(IPS_DYING_BIT, &ct->status); - delete: nf_ct_delete_from_lists(ct); nf_ct_put(ct); return true; } EXPORT_SYMBOL_GPL(nf_ct_delete); -static void death_by_timeout(unsigned long ul_conntrack) -{ - nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0); -} - static inline bool nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, const struct nf_conntrack_tuple *tuple, @@ -480,6 +474,18 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, net_eq(net, nf_ct_net(ct)); } +/* caller must hold rcu readlock and none of the nf_conntrack_locks */ +static void nf_ct_gc_expired(struct nf_conn *ct) +{ + if (!atomic_inc_not_zero(&ct->ct_general.use)) + return; + + if (nf_ct_should_gc(ct)) + nf_ct_kill(ct); + + nf_ct_put(ct); +} + /* * Warning : * - Caller must take a reference on returned object @@ -499,6 +505,17 @@ begin: bucket = reciprocal_scale(hash, hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { + struct nf_conn *ct; + + ct = nf_ct_tuplehash_to_ctrack(h); + if (nf_ct_is_expired(ct)) { + nf_ct_gc_expired(ct); + continue; + } + + if (nf_ct_is_dying(ct)) + continue; + if (nf_ct_key_equal(h, tuple, zone, net)) { NF_CT_STAT_INC_ATOMIC(net, found); return h; @@ -597,7 +614,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) zone, net)) goto out; - add_timer(&ct->timeout); smp_wmb(); /* The caller holds a reference to this object */ atomic_set(&ct->ct_general.use, 2); @@ -750,8 +766,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); + ct->timeout += nfct_time_stamp; atomic_inc(&ct->ct_general.use); ct->status |= IPS_CONFIRMED; @@ -815,8 +830,16 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); - if (ct != ignored_conntrack && - nf_ct_key_equal(h, tuple, zone, net)) { + + if (ct == ignored_conntrack) + continue; + + if (nf_ct_is_expired(ct)) { + nf_ct_gc_expired(ct); + continue; + } + + if (nf_ct_key_equal(h, tuple, zone, net)) { NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; @@ -850,6 +873,11 @@ static unsigned int early_drop_list(struct net *net, hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); + if (nf_ct_is_expired(tmp)) { + nf_ct_gc_expired(tmp); + continue; + } + if (test_bit(IPS_ASSURED_BIT, &tmp->status) || !net_eq(nf_ct_net(tmp), net) || nf_ct_is_dying(tmp)) @@ -867,7 +895,6 @@ static unsigned int early_drop_list(struct net *net, */ if (net_eq(nf_ct_net(tmp), net) && nf_ct_is_confirmed(tmp) && - del_timer(&tmp->timeout) && nf_ct_delete(tmp, 0, 0)) drops++; @@ -937,8 +964,6 @@ __nf_conntrack_alloc(struct net *net, /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; ct->status = 0; - /* Don't set timer yet: wait for confirmation */ - setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); write_pnet(&ct->ct_net, net); memset(&ct->__nfct_init_offset[0], 0, offsetof(struct nf_conn, proto) - @@ -1312,7 +1337,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, unsigned long extra_jiffies, int do_acct) { - NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); NF_CT_ASSERT(skb); /* Only update if this is not a fixed timeout */ @@ -1320,18 +1344,10 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, goto acct; /* If not in hash table, timer will not be active yet */ - if (!nf_ct_is_confirmed(ct)) { - ct->timeout.expires = extra_jiffies; - } else { - unsigned long newtime = jiffies + extra_jiffies; - - /* Only update the timeout if the new timeout is at least - HZ jiffies from the old timeout. Need del_timer for race - avoidance (may already be dying). */ - if (newtime - ct->timeout.expires >= HZ) - mod_timer_pending(&ct->timeout, newtime); - } + if (nf_ct_is_confirmed(ct)) + extra_jiffies += nfct_time_stamp; + ct->timeout = extra_jiffies; acct: if (do_acct) nf_ct_acct_update(ct, ctinfo, skb->len); @@ -1346,11 +1362,7 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, if (do_acct) nf_ct_acct_update(ct, ctinfo, skb->len); - if (del_timer(&ct->timeout)) { - ct->timeout.function((unsigned long)ct); - return true; - } - return false; + return nf_ct_delete(ct, 0, 0); } EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); @@ -1485,11 +1497,8 @@ void nf_ct_iterate_cleanup(struct net *net, while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { /* Time to push up daises... */ - if (del_timer(&ct->timeout)) - nf_ct_delete(ct, portid, report); - - /* ... else the timer will get him soon. */ + nf_ct_delete(ct, portid, report); nf_ct_put(ct); cond_resched(); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 68800c10a320..81fd34ce0a57 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1144,9 +1144,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, } } - if (del_timer(&ct->timeout)) - nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); - + nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_put(ct); return 0; @@ -1514,11 +1512,10 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, { u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); - if (!del_timer(&ct->timeout)) - return -ETIME; + ct->timeout = nfct_time_stamp + timeout * HZ; - ct->timeout.expires = jiffies + timeout * HZ; - add_timer(&ct->timeout); + if (test_bit(IPS_DYING_BIT, &ct->status)) + return -ETIME; return 0; } @@ -1716,9 +1713,8 @@ ctnetlink_create_conntrack(struct net *net, if (!cda[CTA_TIMEOUT]) goto err1; - ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); - ct->timeout.expires = jiffies + ct->timeout.expires * HZ; + ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; rcu_read_lock(); if (cda[CTA_HELP]) { diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 5588c7ae1ac2..f60a4755d71e 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -157,8 +157,7 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, pr_debug("setting timeout of conntrack %p to 0\n", sibling); sibling->proto.gre.timeout = 0; sibling->proto.gre.stream_timeout = 0; - if (del_timer(&sibling->timeout)) - sibling->timeout.function((unsigned long)sibling); + nf_ct_kill(sibling); nf_ct_put(sibling); return 1; } else { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index de31818417b8..81ae41f85d3a 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -565,16 +565,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() * will delete entry from already-freed table. */ - if (!del_timer(&ct->timeout)) - return 1; - ct->status &= ~IPS_NAT_DONE_MASK; - rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, nf_nat_bysource_params); - add_timer(&ct->timeout); - /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. */ -- cgit v1.2.3 From ad66713f5a20034b3b2a0cbc184319b2ede93f11 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 25 Aug 2016 15:33:35 +0200 Subject: netfilter: remove __nf_ct_kill_acct helper After timer removal this just calls nf_ct_delete so remove the __ prefix version and make nf_ct_kill a shorthand for nf_ct_delete. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 13 +++---------- net/netfilter/nf_conntrack_core.c | 12 +++++------- 2 files changed, 8 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 7277751128e8..50418052a520 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -219,21 +219,14 @@ static inline void nf_ct_refresh(struct nf_conn *ct, __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0); } -bool __nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, int do_acct); - /* kill conntrack and do accounting */ -static inline bool nf_ct_kill_acct(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb) -{ - return __nf_ct_kill_acct(ct, ctinfo, skb, 1); -} +bool nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + const struct sk_buff *skb); /* kill conntrack without accounting */ static inline bool nf_ct_kill(struct nf_conn *ct) { - return __nf_ct_kill_acct(ct, 0, NULL, 0); + return nf_ct_delete(ct, 0, 0); } /* These are for NAT. Icky. */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7c66ce401ce9..ac1db4019d5c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1430,17 +1430,15 @@ acct: } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); -bool __nf_ct_kill_acct(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, - int do_acct) +bool nf_ct_kill_acct(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb) { - if (do_acct) - nf_ct_acct_update(ct, ctinfo, skb->len); + nf_ct_acct_update(ct, ctinfo, skb->len); return nf_ct_delete(ct, 0, 0); } -EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); +EXPORT_SYMBOL_GPL(nf_ct_kill_acct); #if IS_ENABLED(CONFIG_NF_CT_NETLINK) -- cgit v1.2.3 From 779994fa3636d46848edb402fe7517968e036e6f Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 29 Aug 2016 18:25:28 +0800 Subject: netfilter: log: Check param to avoid overflow in nf_log_set The nf_log_set is an interface function, so it should do the strict sanity check of parameters. Convert the return value of nf_log_set as int instead of void. When the pf is invalid, return -EOPNOTSUPP. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 3 +-- net/bridge/netfilter/nf_log_bridge.c | 3 +-- net/ipv4/netfilter/nf_log_arp.c | 3 +-- net/ipv4/netfilter/nf_log_ipv4.c | 3 +-- net/ipv6/netfilter/nf_log_ipv6.c | 3 +-- net/netfilter/nf_log.c | 8 +++++--- 6 files changed, 10 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 83d855ba6af1..ee07dc8b0a7b 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -60,8 +60,7 @@ struct nf_logger { int nf_log_register(u_int8_t pf, struct nf_logger *logger); void nf_log_unregister(struct nf_logger *logger); -void nf_log_set(struct net *net, u_int8_t pf, - const struct nf_logger *logger); +int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger); void nf_log_unset(struct net *net, const struct nf_logger *logger); int nf_log_bind_pf(struct net *net, u_int8_t pf, diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c index 5d9953a90929..1663df598545 100644 --- a/net/bridge/netfilter/nf_log_bridge.c +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -50,8 +50,7 @@ static struct nf_logger nf_bridge_logger __read_mostly = { static int __net_init nf_log_bridge_net_init(struct net *net) { - nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); - return 0; + return nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); } static void __net_exit nf_log_bridge_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index cf8f2d4e867a..8945c2653814 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -111,8 +111,7 @@ static struct nf_logger nf_arp_logger __read_mostly = { static int __net_init nf_log_arp_net_init(struct net *net) { - nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); - return 0; + return nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); } static void __net_exit nf_log_arp_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 076aadda0473..20f225593a8b 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -347,8 +347,7 @@ static struct nf_logger nf_ip_logger __read_mostly = { static int __net_init nf_log_ipv4_net_init(struct net *net) { - nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); - return 0; + return nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); } static void __net_exit nf_log_ipv4_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 8dd869642f45..c1bcf699a23d 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -379,8 +379,7 @@ static struct nf_logger nf_ip6_logger __read_mostly = { static int __net_init nf_log_ipv6_net_init(struct net *net) { - nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); - return 0; + return nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); } static void __net_exit nf_log_ipv6_net_exit(struct net *net) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index aa5847a16713..30a17d649a83 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -39,12 +39,12 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) return NULL; } -void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) +int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) { const struct nf_logger *log; - if (pf == NFPROTO_UNSPEC) - return; + if (pf == NFPROTO_UNSPEC || pf >= ARRAY_SIZE(net->nf.nf_loggers)) + return -EOPNOTSUPP; mutex_lock(&nf_log_mutex); log = nft_log_dereference(net->nf.nf_loggers[pf]); @@ -52,6 +52,8 @@ void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) rcu_assign_pointer(net->nf.nf_loggers[pf], logger); mutex_unlock(&nf_log_mutex); + + return 0; } EXPORT_SYMBOL(nf_log_set); -- cgit v1.2.3 From 8324f0bcfbfc645cf248e4b93ab58341b7d3b135 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 09:49:29 +0100 Subject: rxrpc: Provide a way for AFS to ask for the peer address of a call Provide a function so that kernel users, such as AFS, can ask for the peer address of a call: void rxrpc_kernel_get_peer(struct rxrpc_call *call, struct sockaddr_rxrpc *_srx); In the future the kernel service won't get sk_buffs to look inside. Further, this allows us to hide any canonicalisation inside AF_RXRPC for when IPv6 support is added. Also propagate this through to afs_find_server() and issue a warning if we can't handle the address family yet. Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 7 +++++++ fs/afs/cmservice.c | 20 +++++++++++--------- fs/afs/internal.h | 5 ++++- fs/afs/rxrpc.c | 2 +- fs/afs/server.c | 11 ++++++++--- include/net/af_rxrpc.h | 2 ++ net/rxrpc/peer_object.c | 15 +++++++++++++++ 7 files changed, 48 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 70c926ae212d..dfe0b008df74 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -868,6 +868,13 @@ The kernel interface functions are as follows: This is used to allocate a null RxRPC key that can be used to indicate anonymous security for a particular domain. + (*) Get the peer address of a call. + + void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, + struct sockaddr_rxrpc *_srx); + + This is used to find the remote peer address of a call. + ======================= CONFIGURABLE PARAMETERS diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index ca32d891bbc3..77ee481059ac 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -167,9 +167,9 @@ static void SRXAFSCB_CallBack(struct work_struct *work) static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, bool last) { + struct sockaddr_rxrpc srx; struct afs_callback *cb; struct afs_server *server; - struct in_addr addr; __be32 *bp; u32 tmp; int ret, loop; @@ -178,6 +178,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, switch (call->unmarshall) { case 0: + rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); call->offset = 0; call->unmarshall++; @@ -282,8 +283,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - memcpy(&addr, &ip_hdr(skb)->saddr, 4); - server = afs_find_server(&addr); + server = afs_find_server(&srx); if (!server) return -ENOTCONN; call->server = server; @@ -314,12 +314,14 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, struct sk_buff *skb, bool last) { + struct sockaddr_rxrpc srx; struct afs_server *server; - struct in_addr addr; int ret; _enter(",{%u},%d", skb->len, last); + rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); + ret = afs_data_complete(call, skb, last); if (ret < 0) return ret; @@ -329,8 +331,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - memcpy(&addr, &ip_hdr(skb)->saddr, 4); - server = afs_find_server(&addr); + server = afs_find_server(&srx); if (!server) return -ENOTCONN; call->server = server; @@ -347,11 +348,13 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, struct sk_buff *skb, bool last) { + struct sockaddr_rxrpc srx; struct afs_server *server; - struct in_addr addr; _enter(",{%u},%d", skb->len, last); + rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); + /* There are some arguments that we ignore */ afs_data_consumed(call, skb); if (!last) @@ -362,8 +365,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - memcpy(&addr, &ip_hdr(skb)->saddr, 4); - server = afs_find_server(&addr); + server = afs_find_server(&srx); if (!server) return -ENOTCONN; call->server = server; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index df976b2a7f40..d97552de9c59 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -20,6 +20,7 @@ #include #include #include +#include #include "afs.h" #include "afs_vl.h" @@ -607,6 +608,8 @@ extern void afs_proc_cell_remove(struct afs_cell *); /* * rxrpc.c */ +extern struct socket *afs_socket; + extern int afs_open_socket(void); extern void afs_close_socket(void); extern void afs_data_consumed(struct afs_call *, struct sk_buff *); @@ -654,7 +657,7 @@ do { \ extern struct afs_server *afs_lookup_server(struct afs_cell *, const struct in_addr *); -extern struct afs_server *afs_find_server(const struct in_addr *); +extern struct afs_server *afs_find_server(const struct sockaddr_rxrpc *); extern void afs_put_server(struct afs_server *); extern void __exit afs_purge_servers(void); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 14d04c848465..a1916750e2f9 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -16,7 +16,7 @@ #include "internal.h" #include "afs_cm.h" -static struct socket *afs_socket; /* my RxRPC socket */ +struct socket *afs_socket; /* my RxRPC socket */ static struct workqueue_struct *afs_async_calls; static atomic_t afs_outstanding_calls; static atomic_t afs_outstanding_skbs; diff --git a/fs/afs/server.c b/fs/afs/server.c index f342acf3547d..d4066ab7dd55 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -178,13 +178,18 @@ server_in_two_cells: /* * look up a server by its IP address */ -struct afs_server *afs_find_server(const struct in_addr *_addr) +struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx) { struct afs_server *server = NULL; struct rb_node *p; - struct in_addr addr = *_addr; + struct in_addr addr = srx->transport.sin.sin_addr; - _enter("%pI4", &addr.s_addr); + _enter("{%d,%pI4}", srx->transport.family, &addr.s_addr); + + if (srx->transport.family != AF_INET) { + WARN(true, "AFS does not yes support non-IPv4 addresses\n"); + return NULL; + } read_lock(&afs_servers_lock); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 7b0f88699b25..f9224e835d43 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -49,5 +49,7 @@ int rxrpc_kernel_get_error_number(struct sk_buff *); void rxrpc_kernel_free_skb(struct sk_buff *); struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long); int rxrpc_kernel_reject_call(struct socket *); +void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, + struct sockaddr_rxrpc *); #endif /* _NET_RXRPC_H */ diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 538e9831c699..aebc73ac16dc 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -313,3 +313,18 @@ void __rxrpc_put_peer(struct rxrpc_peer *peer) kfree_rcu(peer, rcu); } + +/** + * rxrpc_kernel_get_peer - Get the peer address of a call + * @sock: The socket on which the call is in progress. + * @call: The call to query + * @_srx: Where to place the result + * + * Get the address of the remote peer in a call. + */ +void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, + struct sockaddr_rxrpc *_srx) +{ + *_srx = call->peer->srx; +} +EXPORT_SYMBOL(rxrpc_kernel_get_peer); -- cgit v1.2.3 From 4de48af663d88d8c9a2550e60725f5a5c660970b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 12:00:48 +0100 Subject: rxrpc: Pass struct socket * to more rxrpc kernel interface functions Pass struct socket * to more rxrpc kernel interface functions. They should be starting from this rather than the socket pointer in the rxrpc_call struct if they need to access the socket. I have left: rxrpc_kernel_is_data_last() rxrpc_kernel_get_abort_code() rxrpc_kernel_get_error_number() rxrpc_kernel_free_skb() rxrpc_kernel_data_consumed() unmodified as they're all about to be removed (and, in any case, don't touch the socket). Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 11 ++++++++--- fs/afs/rxrpc.c | 26 +++++++++++++++----------- include/net/af_rxrpc.h | 10 +++++++--- net/rxrpc/af_rxrpc.c | 5 +++-- net/rxrpc/output.c | 20 +++++++++++--------- 5 files changed, 44 insertions(+), 28 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index dfe0b008df74..cfc8cb91452f 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -725,7 +725,8 @@ The kernel interface functions are as follows: (*) End a client call. - void rxrpc_kernel_end_call(struct rxrpc_call *call); + void rxrpc_kernel_end_call(struct socket *sock, + struct rxrpc_call *call); This is used to end a previously begun call. The user_call_ID is expunged from AF_RXRPC's knowledge and will not be seen again in association with @@ -733,7 +734,9 @@ The kernel interface functions are as follows: (*) Send data through a call. - int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, + int rxrpc_kernel_send_data(struct socket *sock, + struct rxrpc_call *call, + struct msghdr *msg, size_t len); This is used to supply either the request part of a client call or the @@ -747,7 +750,9 @@ The kernel interface functions are as follows: (*) Abort a call. - void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code); + void rxrpc_kernel_abort_call(struct socket *sock, + struct rxrpc_call *call, + u32 abort_code); This is used to abort a call if it's still in an abortable state. The abort code specified will be placed in the ABORT message sent. diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index a1916750e2f9..7b0d18900f50 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -207,7 +207,7 @@ static void afs_free_call(struct afs_call *call) static void afs_end_call_nofree(struct afs_call *call) { if (call->rxcall) { - rxrpc_kernel_end_call(call->rxcall); + rxrpc_kernel_end_call(afs_socket, call->rxcall); call->rxcall = NULL; } if (call->type->destructor) @@ -325,8 +325,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg, * returns from sending the request */ if (first + loop >= last) call->state = AFS_CALL_AWAIT_REPLY; - ret = rxrpc_kernel_send_data(call->rxcall, msg, - to - offset); + ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, + msg, to - offset); kunmap(pages[loop]); if (ret < 0) break; @@ -406,7 +406,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, * request */ if (!call->send_pages) call->state = AFS_CALL_AWAIT_REPLY; - ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size); + ret = rxrpc_kernel_send_data(afs_socket, rxcall, + &msg, call->request_size); if (ret < 0) goto error_do_abort; @@ -421,7 +422,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, return wait_mode->wait(call); error_do_abort: - rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT); while ((skb = skb_dequeue(&call->rx_queue))) afs_free_skb(skb); error_kill_call: @@ -509,7 +510,8 @@ static void afs_deliver_to_call(struct afs_call *call) if (call->state != AFS_CALL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; do_abort: - rxrpc_kernel_abort_call(call->rxcall, + rxrpc_kernel_abort_call(afs_socket, + call->rxcall, abort_code); call->error = ret; call->state = AFS_CALL_ERROR; @@ -605,7 +607,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) /* kill the call */ if (call->state < AFS_CALL_COMPLETE) { _debug("call incomplete"); - rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, RX_CALL_DEAD); while ((skb = skb_dequeue(&call->rx_queue))) afs_free_skb(skb); } @@ -823,14 +825,15 @@ void afs_send_empty_reply(struct afs_call *call) msg.msg_flags = 0; call->state = AFS_CALL_AWAIT_ACK; - switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) { + switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0)) { case 0: _leave(" [replied]"); return; case -ENOMEM: _debug("oom"); - rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + RX_USER_ABORT); default: afs_end_call(call); _leave(" [error]"); @@ -859,7 +862,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) msg.msg_flags = 0; call->state = AFS_CALL_AWAIT_ACK; - n = rxrpc_kernel_send_data(call->rxcall, &msg, len); + n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len); if (n >= 0) { /* Success */ _leave(" [replied]"); @@ -868,7 +871,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); - rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + RX_USER_ABORT); } afs_end_call(call); _leave(" [error]"); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index f9224e835d43..f8d8079dc058 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -15,6 +15,9 @@ #include #include +struct key; +struct sock; +struct socket; struct rxrpc_call; /* @@ -39,10 +42,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, struct key *, unsigned long, gfp_t); -int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *, size_t); +int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, + struct msghdr *, size_t); void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); -void rxrpc_kernel_abort_call(struct rxrpc_call *, u32); -void rxrpc_kernel_end_call(struct rxrpc_call *); +void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32); +void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); bool rxrpc_kernel_is_data_last(struct sk_buff *); u32 rxrpc_kernel_get_abort_code(struct sk_buff *); int rxrpc_kernel_get_error_number(struct sk_buff *); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c7cf356b42b8..e07c91acd904 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -279,15 +279,16 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call); /** * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using + * @sock: The socket the call is on * @call: The call to end * * Allow a kernel service to end a call it was using. The call must be * complete before this is called (the call should be aborted if necessary). */ -void rxrpc_kernel_end_call(struct rxrpc_call *call) +void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); - rxrpc_remove_user_ID(call->socket, call); + rxrpc_remove_user_ID(rxrpc_sk(sock->sk), call); rxrpc_put_call(call); } EXPORT_SYMBOL(rxrpc_kernel_end_call); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 888fa87ed1d6..b1e708a12151 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -239,6 +239,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /** * rxrpc_kernel_send_data - Allow a kernel service to send data on a call + * @sock: The socket the call is on * @call: The call to send data through * @msg: The data to send * @len: The amount of data to send @@ -248,8 +249,8 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) * nor should an address be supplied. MSG_MORE should be flagged if there's * more data to come, otherwise this data will end the transmission phase. */ -int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, - size_t len) +int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, size_t len) { int ret; @@ -258,7 +259,7 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, ASSERTCMP(msg->msg_name, ==, NULL); ASSERTCMP(msg->msg_control, ==, NULL); - lock_sock(&call->socket->sk); + lock_sock(sock->sk); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); @@ -270,35 +271,36 @@ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, call->state != RXRPC_CALL_SERVER_SEND_REPLY) { ret = -EPROTO; /* request phase complete for this client call */ } else { - ret = rxrpc_send_data(call->socket, call, msg, len); + ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); } - release_sock(&call->socket->sk); + release_sock(sock->sk); _leave(" = %d", ret); return ret; } - EXPORT_SYMBOL(rxrpc_kernel_send_data); /** * rxrpc_kernel_abort_call - Allow a kernel service to abort a call + * @sock: The socket the call is on * @call: The call to be aborted * @abort_code: The abort code to stick into the ABORT packet * * Allow a kernel service to abort a call, if it's still in an abortable state. */ -void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code) +void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, + u32 abort_code) { _enter("{%d},%d", call->debug_id, abort_code); - lock_sock(&call->socket->sk); + lock_sock(sock->sk); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); rxrpc_send_abort(call, abort_code); - release_sock(&call->socket->sk); + release_sock(sock->sk); _leave(""); } -- cgit v1.2.3 From 14972cbd34ff668c390cbd2e6497323484c9e812 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 24 Aug 2016 20:10:43 -0700 Subject: net: lwtunnel: Handle fragmentation Today mpls iptunnel lwtunnel_output redirect expects the tunnel output function to handle fragmentation. This is ok but can be avoided if we did not do the mpls output redirect too early. ie we could wait until ip fragmentation is done and then call mpls output for each ip fragment. To make this work we will need, 1) the lwtunnel state to carry encap headroom 2) and do the redirect to the encap output handler on the ip fragment (essentially do the output redirect after fragmentation) This patch adds tunnel headroom in lwtstate to make sure we account for tunnel data in mtu calculations during fragmentation and adds new xmit redirect handler to redirect to lwtunnel xmit func after ip fragmentation. This includes IPV6 and some mtu fixes and testing from David Ahern. Signed-off-by: Roopa Prabhu Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/lwtunnel.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ net/core/lwtunnel.c | 35 +++++++++++++++++++++++++++++++++++ net/ipv4/ip_output.c | 8 ++++++++ net/ipv4/route.c | 4 +++- net/ipv6/ip6_output.c | 8 ++++++++ net/ipv6/route.c | 4 +++- net/mpls/mpls_iptunnel.c | 9 +++++---- 7 files changed, 106 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index e9f116e29c22..ea3f80f58fd6 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -13,6 +13,13 @@ /* lw tunnel state flags */ #define LWTUNNEL_STATE_OUTPUT_REDIRECT BIT(0) #define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1) +#define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2) + +enum { + LWTUNNEL_XMIT_DONE, + LWTUNNEL_XMIT_CONTINUE, +}; + struct lwtunnel_state { __u16 type; @@ -21,6 +28,7 @@ struct lwtunnel_state { int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*orig_input)(struct sk_buff *); int len; + __u16 headroom; __u8 data[0]; }; @@ -34,6 +42,7 @@ struct lwtunnel_encap_ops { struct lwtunnel_state *lwtstate); int (*get_encap_size)(struct lwtunnel_state *lwtstate); int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); + int (*xmit)(struct sk_buff *skb); }; #ifdef CONFIG_LWTUNNEL @@ -75,6 +84,24 @@ static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) return false; } + +static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) +{ + if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_XMIT_REDIRECT)) + return true; + + return false; +} + +static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, + unsigned int mtu) +{ + if (lwtunnel_xmit_redirect(lwtstate) && lwtstate->headroom < mtu) + return lwtstate->headroom; + + return 0; +} + int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, @@ -90,6 +117,7 @@ struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb); int lwtunnel_input(struct sk_buff *skb); +int lwtunnel_xmit(struct sk_buff *skb); #else @@ -117,6 +145,17 @@ static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) return false; } +static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) +{ + return false; +} + +static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, + unsigned int mtu) +{ + return 0; +} + static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num) { @@ -170,6 +209,11 @@ static inline int lwtunnel_input(struct sk_buff *skb) return -EOPNOTSUPP; } +static inline int lwtunnel_xmit(struct sk_buff *skb) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_LWTUNNEL */ #define MODULE_ALIAS_RTNL_LWT(encap_type) MODULE_ALIAS("rtnl-lwt-" __stringify(encap_type)) diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 669ecc9f884e..e5f84c26ba1a 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -251,6 +251,41 @@ drop: } EXPORT_SYMBOL(lwtunnel_output); +int lwtunnel_xmit(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + const struct lwtunnel_encap_ops *ops; + struct lwtunnel_state *lwtstate; + int ret = -EINVAL; + + if (!dst) + goto drop; + + lwtstate = dst->lwtstate; + + if (lwtstate->type == LWTUNNEL_ENCAP_NONE || + lwtstate->type > LWTUNNEL_ENCAP_MAX) + return 0; + + ret = -EOPNOTSUPP; + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[lwtstate->type]); + if (likely(ops && ops->xmit)) + ret = ops->xmit(skb); + rcu_read_unlock(); + + if (ret == -EOPNOTSUPP) + goto drop; + + return ret; + +drop: + kfree_skb(skb); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_xmit); + int lwtunnel_input(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index dde37fb340bf..65569274efb8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -197,6 +198,13 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s skb = skb2; } + if (lwtunnel_xmit_redirect(dst->lwtstate)) { + int res = lwtunnel_xmit(skb); + + if (res < 0 || res == LWTUNNEL_XMIT_DONE) + return res; + } + rcu_read_lock_bh(); nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); neigh = __ipv4_neigh_lookup_noref(dev, nexthop); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a1f2830d8110..3e992783c1d0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1246,7 +1246,9 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = 576; } - return min_t(unsigned int, mtu, IP_MAX_MTU); + mtu = min_t(unsigned int, mtu, IP_MAX_MTU); + + return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1dfc402d9ad1..993fd9666f1b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,6 +56,7 @@ #include #include #include +#include static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -104,6 +105,13 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } } + if (lwtunnel_xmit_redirect(dst->lwtstate)) { + int res = lwtunnel_xmit(skb); + + if (res < 0 || res == LWTUNNEL_XMIT_DONE) + return res; + } + rcu_read_lock_bh(); nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 49817555449e..09d43ff11a8d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1604,7 +1604,9 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) rcu_read_unlock(); out: - return min_t(unsigned int, mtu, IP6_MAX_MTU); + mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); + + return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } static struct dst_entry *icmp6_dst_gc_list; diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 644a8da6d4bd..aed872cc05a6 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -37,7 +37,7 @@ static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en) return en->labels * sizeof(struct mpls_shim_hdr); } -static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb) +static int mpls_xmit(struct sk_buff *skb) { struct mpls_iptunnel_encap *tun_encap_info; struct mpls_shim_hdr *hdr; @@ -115,7 +115,7 @@ static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb) net_dbg_ratelimited("%s: packet transmission failed: %d\n", __func__, err); - return 0; + return LWTUNNEL_XMIT_DONE; drop: kfree_skb(skb); @@ -153,7 +153,8 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla, if (ret) goto errout; newts->type = LWTUNNEL_ENCAP_MPLS; - newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT; + newts->headroom = mpls_encap_size(tun_encap_info); *ts = newts; @@ -209,7 +210,7 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) static const struct lwtunnel_encap_ops mpls_iptun_ops = { .build_state = mpls_build_state, - .output = mpls_output, + .xmit = mpls_xmit, .fill_encap = mpls_fill_encap_info, .get_encap_size = mpls_encap_nlsize, .cmp_encap = mpls_encap_cmp, -- cgit v1.2.3 From 8df3025520aaeba36aba867a4851f8968ac65b4d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 31 Aug 2016 11:50:03 -0400 Subject: net: dsa: add MDB support Add SWITCHDEV_OBJ_ID_PORT_MDB support to the DSA layer. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.txt | 23 +++++++++++++++ include/net/dsa.h | 16 +++++++++++ net/dsa/slave.c | 55 ++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) (limited to 'include/net') diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index a4e55c76d371..6d6c07cf1a9a 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -584,6 +584,29 @@ of DSA, would be the its port-based VLAN, used by the associated bridge device. function that the driver has to call for each MAC address known to be behind the given port. A switchdev object is used to carry the VID and FDB info. +- port_mdb_prepare: bridge layer function invoked when the bridge prepares the + installation of a multicast database entry. If the operation is not supported, + this function should return -EOPNOTSUPP to inform the bridge code to fallback + to a software implementation. No hardware setup must be done in this function. + See port_fdb_add for this and details. + +- port_mdb_add: bridge layer function invoked when the bridge wants to install + a multicast database entry, the switch hardware should be programmed with the + specified address in the specified VLAN ID in the forwarding database + associated with this VLAN ID. + +Note: VLAN ID 0 corresponds to the port private database, which, in the context +of DSA, would be the its port-based VLAN, used by the associated bridge device. + +- port_mdb_del: bridge layer function invoked when the bridge wants to remove a + multicast database entry, the switch hardware should be programmed to delete + the specified MAC address from the specified VLAN ID if it was mapped into + this port forwarding database. + +- port_mdb_dump: bridge layer function invoked with a switchdev callback + function that the driver has to call for each MAC address known to be behind + the given port. A switchdev object is used to carry the VID and MDB info. + TODO ==== diff --git a/include/net/dsa.h b/include/net/dsa.h index 2ebeba44a461..e3eb230b970d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -234,6 +234,7 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds) struct switchdev_trans; struct switchdev_obj; struct switchdev_obj_port_fdb; +struct switchdev_obj_port_mdb; struct switchdev_obj_port_vlan; struct dsa_switch_ops { @@ -369,6 +370,21 @@ struct dsa_switch_ops { int (*port_fdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)); + + /* + * Multicast database + */ + int (*port_mdb_prepare)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans); + void (*port_mdb_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans); + int (*port_mdb_del)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); + int (*port_mdb_dump)(struct dsa_switch *ds, int port, + struct switchdev_obj_port_mdb *mdb, + int (*cb)(struct switchdev_obj *obj)); }; void register_switch_driver(struct dsa_switch_ops *type); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9f6c2a20f6ff..9ecbe787f102 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -290,6 +290,50 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev, return -EOPNOTSUPP; } +static int dsa_slave_port_mdb_add(struct net_device *dev, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (switchdev_trans_ph_prepare(trans)) { + if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) + return -EOPNOTSUPP; + + return ds->ops->port_mdb_prepare(ds, p->port, mdb, trans); + } + + ds->ops->port_mdb_add(ds, p->port, mdb, trans); + + return 0; +} + +static int dsa_slave_port_mdb_del(struct net_device *dev, + const struct switchdev_obj_port_mdb *mdb) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (ds->ops->port_mdb_del) + return ds->ops->port_mdb_del(ds, p->port, mdb); + + return -EOPNOTSUPP; +} + +static int dsa_slave_port_mdb_dump(struct net_device *dev, + struct switchdev_obj_port_mdb *mdb, + switchdev_obj_dump_cb_t *cb) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (ds->ops->port_mdb_dump) + return ds->ops->port_mdb_dump(ds, p->port, mdb, cb); + + return -EOPNOTSUPP; +} + static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -412,6 +456,10 @@ static int dsa_slave_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_FDB(obj), trans); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dsa_slave_port_mdb_add(dev, SWITCHDEV_OBJ_PORT_MDB(obj), + trans); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_slave_port_vlan_add(dev, SWITCHDEV_OBJ_PORT_VLAN(obj), @@ -435,6 +483,9 @@ static int dsa_slave_port_obj_del(struct net_device *dev, err = dsa_slave_port_fdb_del(dev, SWITCHDEV_OBJ_PORT_FDB(obj)); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dsa_slave_port_mdb_del(dev, SWITCHDEV_OBJ_PORT_MDB(obj)); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_slave_port_vlan_del(dev, SWITCHDEV_OBJ_PORT_VLAN(obj)); @@ -459,6 +510,10 @@ static int dsa_slave_port_obj_dump(struct net_device *dev, SWITCHDEV_OBJ_PORT_FDB(obj), cb); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dsa_slave_port_mdb_dump(dev, SWITCHDEV_OBJ_PORT_MDB(obj), + cb); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_slave_port_vlan_dump(dev, SWITCHDEV_OBJ_PORT_VLAN(obj), -- cgit v1.2.3 From d001648ec7cf8b21ae9eec8b9ba4a18295adfb14 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 30 Aug 2016 20:42:14 +0100 Subject: rxrpc: Don't expose skbs to in-kernel users [ver #2] Don't expose skbs to in-kernel users, such as the AFS filesystem, but instead provide a notification hook the indicates that a call needs attention and another that indicates that there's a new call to be collected. This makes the following possibilities more achievable: (1) Call refcounting can be made simpler if skbs don't hold refs to calls. (2) skbs referring to non-data events will be able to be freed much sooner rather than being queued for AFS to pick up as rxrpc_kernel_recv_data will be able to consult the call state. (3) We can shortcut the receive phase when a call is remotely aborted because we don't have to go through all the packets to get to the one cancelling the operation. (4) It makes it easier to do encryption/decryption directly between AFS's buffers and sk_buffs. (5) Encryption/decryption can more easily be done in the AFS's thread contexts - usually that of the userspace process that issued a syscall - rather than in one of rxrpc's background threads on a workqueue. (6) AFS will be able to wait synchronously on a call inside AF_RXRPC. To make this work, the following interface function has been added: int rxrpc_kernel_recv_data( struct socket *sock, struct rxrpc_call *call, void *buffer, size_t bufsize, size_t *_offset, bool want_more, u32 *_abort_code); This is the recvmsg equivalent. It allows the caller to find out about the state of a specific call and to transfer received data into a buffer piecemeal. afs_extract_data() and rxrpc_kernel_recv_data() now do all the extraction logic between them. They don't wait synchronously yet because the socket lock needs to be dealt with. Five interface functions have been removed: rxrpc_kernel_is_data_last() rxrpc_kernel_get_abort_code() rxrpc_kernel_get_error_number() rxrpc_kernel_free_skb() rxrpc_kernel_data_consumed() As a temporary hack, sk_buffs going to an in-kernel call are queued on the rxrpc_call struct (->knlrecv_queue) rather than being handed over to the in-kernel user. To process the queue internally, a temporary function, temp_deliver_data() has been added. This will be replaced with common code between the rxrpc_recvmsg() path and the kernel_rxrpc_recv_data() path in a future patch. Signed-off-by: David Howells Signed-off-by: David S. Miller --- Documentation/networking/rxrpc.txt | 72 +++--- fs/afs/cmservice.c | 142 ++++++------ fs/afs/fsclient.c | 148 ++++++------- fs/afs/internal.h | 33 +-- fs/afs/rxrpc.c | 439 +++++++++++++------------------------ fs/afs/vlclient.c | 7 +- include/net/af_rxrpc.h | 35 +-- net/rxrpc/af_rxrpc.c | 29 +-- net/rxrpc/ar-internal.h | 23 +- net/rxrpc/call_accept.c | 13 +- net/rxrpc/call_object.c | 5 +- net/rxrpc/conn_event.c | 1 - net/rxrpc/input.c | 10 +- net/rxrpc/output.c | 2 +- net/rxrpc/recvmsg.c | 191 +++++++++++++--- net/rxrpc/skbuff.c | 1 - 16 files changed, 565 insertions(+), 586 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index cfc8cb91452f..1b63bbc6b94f 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -748,6 +748,37 @@ The kernel interface functions are as follows: The msg must not specify a destination address, control data or any flags other than MSG_MORE. len is the total amount of data to transmit. + (*) Receive data from a call. + + int rxrpc_kernel_recv_data(struct socket *sock, + struct rxrpc_call *call, + void *buf, + size_t size, + size_t *_offset, + bool want_more, + u32 *_abort) + + This is used to receive data from either the reply part of a client call + or the request part of a service call. buf and size specify how much + data is desired and where to store it. *_offset is added on to buf and + subtracted from size internally; the amount copied into the buffer is + added to *_offset before returning. + + want_more should be true if further data will be required after this is + satisfied and false if this is the last item of the receive phase. + + There are three normal returns: 0 if the buffer was filled and want_more + was true; 1 if the buffer was filled, the last DATA packet has been + emptied and want_more was false; and -EAGAIN if the function needs to be + called again. + + If the last DATA packet is processed but the buffer contains less than + the amount requested, EBADMSG is returned. If want_more wasn't set, but + more data was available, EMSGSIZE is returned. + + If a remote ABORT is detected, the abort code received will be stored in + *_abort and ECONNABORTED will be returned. + (*) Abort a call. void rxrpc_kernel_abort_call(struct socket *sock, @@ -825,47 +856,6 @@ The kernel interface functions are as follows: Other errors may be returned if the call had been aborted (-ECONNABORTED) or had timed out (-ETIME). - (*) Record the delivery of a data message. - - void rxrpc_kernel_data_consumed(struct rxrpc_call *call, - struct sk_buff *skb); - - This is used to record a data message as having been consumed and to - update the ACK state for the call. The message must still be passed to - rxrpc_kernel_free_skb() for disposal by the caller. - - (*) Free a message. - - void rxrpc_kernel_free_skb(struct sk_buff *skb); - - This is used to free a non-DATA socket buffer intercepted from an AF_RXRPC - socket. - - (*) Determine if a data message is the last one on a call. - - bool rxrpc_kernel_is_data_last(struct sk_buff *skb); - - This is used to determine if a socket buffer holds the last data message - to be received for a call (true will be returned if it does, false - if not). - - The data message will be part of the reply on a client call and the - request on an incoming call. In the latter case there will be more - messages, but in the former case there will not. - - (*) Get the abort code from an abort message. - - u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb); - - This is used to extract the abort code from a remote abort message. - - (*) Get the error number from a local or network error message. - - int rxrpc_kernel_get_error_number(struct sk_buff *skb); - - This is used to extract the error number from a message indicating either - a local error occurred or a network error occurred. - (*) Allocate a null key for doing anonymous security. struct key *rxrpc_get_null_key(const char *keyname); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 77ee481059ac..2037e7a77a37 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -17,15 +17,12 @@ #include "internal.h" #include "afs_cm.h" -static int afs_deliver_cb_init_call_back_state(struct afs_call *, - struct sk_buff *, bool); -static int afs_deliver_cb_init_call_back_state3(struct afs_call *, - struct sk_buff *, bool); -static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *, - struct sk_buff *, bool); +static int afs_deliver_cb_init_call_back_state(struct afs_call *); +static int afs_deliver_cb_init_call_back_state3(struct afs_call *); +static int afs_deliver_cb_probe(struct afs_call *); +static int afs_deliver_cb_callback(struct afs_call *); +static int afs_deliver_cb_probe_uuid(struct afs_call *); +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *); static void afs_cm_destructor(struct afs_call *); /* @@ -130,7 +127,7 @@ static void afs_cm_destructor(struct afs_call *call) * received. The step number here must match the final number in * afs_deliver_cb_callback(). */ - if (call->unmarshall == 6) { + if (call->unmarshall == 5) { ASSERT(call->server && call->count && call->request); afs_break_callbacks(call->server, call->count, call->request); } @@ -164,8 +161,7 @@ static void SRXAFSCB_CallBack(struct work_struct *work) /* * deliver request data to a CB.CallBack call */ -static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cb_callback(struct afs_call *call) { struct sockaddr_rxrpc srx; struct afs_callback *cb; @@ -174,7 +170,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, u32 tmp; int ret, loop; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -185,7 +181,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, /* extract the FID array and its count in two steps */ case 1: _debug("extract FID count"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -202,8 +198,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, case 2: _debug("extract FID array"); - ret = afs_extract_data(call, skb, last, call->buffer, - call->count * 3 * 4); + ret = afs_extract_data(call, call->buffer, + call->count * 3 * 4, true); if (ret < 0) return ret; @@ -229,7 +225,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, /* extract the callback array and its count in two steps */ case 3: _debug("extract CB count"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -239,13 +235,11 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, return -EBADMSG; call->offset = 0; call->unmarshall++; - if (tmp == 0) - goto empty_cb_array; case 4: _debug("extract CB array"); - ret = afs_extract_data(call, skb, last, call->request, - call->count * 3 * 4); + ret = afs_extract_data(call, call->buffer, + call->count * 3 * 4, false); if (ret < 0) return ret; @@ -258,15 +252,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, cb->type = ntohl(*bp++); } - empty_cb_array: call->offset = 0; call->unmarshall++; - case 5: - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; - /* Record that the message was unmarshalled successfully so * that the call destructor can know do the callback breaking * work, even if the final ACK isn't received. @@ -275,7 +263,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, * updated also. */ call->unmarshall++; - case 6: + case 5: break; } @@ -310,19 +298,17 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) /* * deliver request data to a CB.InitCallBackState call */ -static int afs_deliver_cb_init_call_back_state(struct afs_call *call, - struct sk_buff *skb, - bool last) +static int afs_deliver_cb_init_call_back_state(struct afs_call *call) { struct sockaddr_rxrpc srx; struct afs_server *server; int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); - ret = afs_data_complete(call, skb, last); + ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) return ret; @@ -344,21 +330,61 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, /* * deliver request data to a CB.InitCallBackState3 call */ -static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, - struct sk_buff *skb, - bool last) +static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) { struct sockaddr_rxrpc srx; struct afs_server *server; + struct afs_uuid *r; + unsigned loop; + __be32 *b; + int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); - /* There are some arguments that we ignore */ - afs_data_consumed(call, skb); - if (!last) - return -EAGAIN; + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL); + if (!call->buffer) + return -ENOMEM; + call->unmarshall++; + + case 1: + _debug("extract UUID"); + ret = afs_extract_data(call, call->buffer, + 11 * sizeof(__be32), false); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + _debug("unmarshall UUID"); + call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); + if (!call->request) + return -ENOMEM; + + b = call->buffer; + r = call->request; + r->time_low = ntohl(b[0]); + r->time_mid = ntohl(b[1]); + r->time_hi_and_version = ntohl(b[2]); + r->clock_seq_hi_and_reserved = ntohl(b[3]); + r->clock_seq_low = ntohl(b[4]); + + for (loop = 0; loop < 6; loop++) + r->node[loop] = ntohl(b[loop + 5]); + + call->offset = 0; + call->unmarshall++; + + case 2: + break; + } /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; @@ -390,14 +416,13 @@ static void SRXAFSCB_Probe(struct work_struct *work) /* * deliver request data to a CB.Probe call */ -static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cb_probe(struct afs_call *call) { int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); - ret = afs_data_complete(call, skb, last); + ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) return ret; @@ -435,19 +460,14 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) /* * deliver request data to a CB.ProbeUuid call */ -static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cb_probe_uuid(struct afs_call *call) { struct afs_uuid *r; unsigned loop; __be32 *b; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -459,8 +479,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, case 1: _debug("extract UUID"); - ret = afs_extract_data(call, skb, last, call->buffer, - 11 * sizeof(__be32)); + ret = afs_extract_data(call, call->buffer, + 11 * sizeof(__be32), false); switch (ret) { case 0: break; case -EAGAIN: return 0; @@ -487,16 +507,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, call->unmarshall++; case 2: - _debug("trailer"); - if (skb->len != 0) - return -EBADMSG; break; } - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; - call->state = AFS_CALL_REPLYING; INIT_WORK(&call->work, SRXAFSCB_ProbeUuid); @@ -570,14 +583,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) /* * deliver request data to a CB.TellMeAboutYourself call */ -static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) { int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); - ret = afs_data_complete(call, skb, last); + ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) return ret; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 9312b92e54be..96f4d764d1a6 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -235,16 +235,15 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, /* * deliver reply data to an FS.FetchStatus */ -static int afs_deliver_fs_fetch_status(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_fetch_status(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -307,8 +306,7 @@ int afs_fs_fetch_file_status(struct afs_server *server, /* * deliver reply data to an FS.FetchData */ -static int afs_deliver_fs_fetch_data(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_fetch_data(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; @@ -316,7 +314,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, void *buffer; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -332,7 +330,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, * client) */ case 1: _debug("extract data length (MSW)"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -347,7 +345,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, /* extract the returned data length */ case 2: _debug("extract data length"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -363,10 +361,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, _debug("extract data"); if (call->count > 0) { page = call->reply3; - buffer = kmap_atomic(page); - ret = afs_extract_data(call, skb, last, buffer, - call->count); - kunmap_atomic(buffer); + buffer = kmap(page); + ret = afs_extract_data(call, buffer, + call->count, true); + kunmap(buffer); if (ret < 0) return ret; } @@ -376,8 +374,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, /* extract the metadata */ case 4: - ret = afs_extract_data(call, skb, last, call->buffer, - (21 + 3 + 6) * 4); + ret = afs_extract_data(call, call->buffer, + (21 + 3 + 6) * 4, false); if (ret < 0) return ret; @@ -391,18 +389,15 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, call->unmarshall++; case 5: - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; break; } if (call->count < PAGE_SIZE) { _debug("clear"); page = call->reply3; - buffer = kmap_atomic(page); + buffer = kmap(page); memset(buffer + call->count, 0, PAGE_SIZE - call->count); - kunmap_atomic(buffer); + kunmap(buffer); } _leave(" = 0 [done]"); @@ -515,13 +510,12 @@ int afs_fs_fetch_data(struct afs_server *server, /* * deliver reply data to an FS.GiveUpCallBacks */ -static int afs_deliver_fs_give_up_callbacks(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_give_up_callbacks(struct afs_call *call) { - _enter(",{%u},%d", skb->len, last); + _enter(""); /* shouldn't be any reply data */ - return afs_data_complete(call, skb, last); + return afs_extract_data(call, NULL, 0, false); } /* @@ -599,16 +593,15 @@ int afs_fs_give_up_callbacks(struct afs_server *server, /* * deliver reply data to an FS.CreateFile or an FS.MakeDir */ -static int afs_deliver_fs_create_vnode(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_create_vnode(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -696,16 +689,15 @@ int afs_fs_create(struct afs_server *server, /* * deliver reply data to an FS.RemoveFile or FS.RemoveDir */ -static int afs_deliver_fs_remove(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_remove(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -777,16 +769,15 @@ int afs_fs_remove(struct afs_server *server, /* * deliver reply data to an FS.Link */ -static int afs_deliver_fs_link(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_link(struct afs_call *call) { struct afs_vnode *dvnode = call->reply, *vnode = call->reply2; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -863,16 +854,15 @@ int afs_fs_link(struct afs_server *server, /* * deliver reply data to an FS.Symlink */ -static int afs_deliver_fs_symlink(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_symlink(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -968,16 +958,15 @@ int afs_fs_symlink(struct afs_server *server, /* * deliver reply data to an FS.Rename */ -static int afs_deliver_fs_rename(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_rename(struct afs_call *call) { struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -1072,16 +1061,15 @@ int afs_fs_rename(struct afs_server *server, /* * deliver reply data to an FS.StoreData */ -static int afs_deliver_fs_store_data(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_store_data(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -1251,17 +1239,16 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, /* * deliver reply data to an FS.StoreStatus */ -static int afs_deliver_fs_store_status(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_store_status(struct afs_call *call) { afs_dataversion_t *store_version; struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -1443,14 +1430,13 @@ int afs_fs_setattr(struct afs_server *server, struct key *key, /* * deliver reply data to an FS.GetVolumeStatus */ -static int afs_deliver_fs_get_volume_status(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_get_volume_status(struct afs_call *call) { const __be32 *bp; char *p; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -1460,8 +1446,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the returned status record */ case 1: _debug("extract status"); - ret = afs_extract_data(call, skb, last, call->buffer, - 12 * 4); + ret = afs_extract_data(call, call->buffer, + 12 * 4, true); if (ret < 0) return ret; @@ -1472,7 +1458,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the volume name length */ case 2: - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -1487,8 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 3: _debug("extract volname"); if (call->count > 0) { - ret = afs_extract_data(call, skb, last, call->reply3, - call->count); + ret = afs_extract_data(call, call->reply3, + call->count, true); if (ret < 0) return ret; } @@ -1508,8 +1494,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, call->count = 4 - (call->count & 3); case 4: - ret = afs_extract_data(call, skb, last, call->buffer, - call->count); + ret = afs_extract_data(call, call->buffer, + call->count, true); if (ret < 0) return ret; @@ -1519,7 +1505,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the offline message length */ case 5: - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -1534,8 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 6: _debug("extract offline"); if (call->count > 0) { - ret = afs_extract_data(call, skb, last, call->reply3, - call->count); + ret = afs_extract_data(call, call->reply3, + call->count, true); if (ret < 0) return ret; } @@ -1555,8 +1541,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, call->count = 4 - (call->count & 3); case 7: - ret = afs_extract_data(call, skb, last, call->buffer, - call->count); + ret = afs_extract_data(call, call->buffer, + call->count, true); if (ret < 0) return ret; @@ -1566,7 +1552,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the message of the day length */ case 8: - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -1581,8 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 9: _debug("extract motd"); if (call->count > 0) { - ret = afs_extract_data(call, skb, last, call->reply3, - call->count); + ret = afs_extract_data(call, call->reply3, + call->count, true); if (ret < 0) return ret; } @@ -1595,26 +1581,17 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, call->unmarshall++; /* extract the message of the day padding */ - if ((call->count & 3) == 0) { - call->unmarshall++; - goto no_motd_padding; - } - call->count = 4 - (call->count & 3); + call->count = (4 - (call->count & 3)) & 3; case 10: - ret = afs_extract_data(call, skb, last, call->buffer, - call->count); + ret = afs_extract_data(call, call->buffer, + call->count, false); if (ret < 0) return ret; call->offset = 0; call->unmarshall++; - no_motd_padding: - case 11: - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; break; } @@ -1685,15 +1662,14 @@ int afs_fs_get_volume_status(struct afs_server *server, /* * deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock */ -static int afs_deliver_fs_xxxx_lock(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_xxxx_lock(struct afs_call *call) { const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index d97552de9c59..5497c8496055 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -57,7 +56,7 @@ struct afs_mount_params { */ struct afs_wait_mode { /* RxRPC received message notification */ - void (*rx_wakeup)(struct afs_call *call); + rxrpc_notify_rx_t notify_rx; /* synchronous call waiter and call dispatched notification */ int (*wait)(struct afs_call *call); @@ -76,10 +75,8 @@ struct afs_call { const struct afs_call_type *type; /* type of call */ const struct afs_wait_mode *wait_mode; /* completion wait mode */ wait_queue_head_t waitq; /* processes awaiting completion */ - void (*async_workfn)(struct afs_call *call); /* asynchronous work function */ struct work_struct async_work; /* asynchronous work processor */ struct work_struct work; /* actual work processor */ - struct sk_buff_head rx_queue; /* received packets */ struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ struct afs_server *server; /* server affected by incoming CM call */ @@ -93,6 +90,7 @@ struct afs_call { void *reply4; /* reply buffer (fourth part) */ pgoff_t first; /* first page in mapping to deal with */ pgoff_t last; /* last page in mapping to deal with */ + size_t offset; /* offset into received data store */ enum { /* call state */ AFS_CALL_REQUESTING, /* request is being sent for outgoing call */ AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */ @@ -100,21 +98,18 @@ struct afs_call { AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */ AFS_CALL_REPLYING, /* replying to incoming call */ AFS_CALL_AWAIT_ACK, /* awaiting final ACK of incoming call */ - AFS_CALL_COMPLETE, /* successfully completed */ - AFS_CALL_BUSY, /* server was busy */ - AFS_CALL_ABORTED, /* call was aborted */ - AFS_CALL_ERROR, /* call failed due to error */ + AFS_CALL_COMPLETE, /* Completed or failed */ } state; int error; /* error code */ + u32 abort_code; /* Remote abort ID or 0 */ unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ - unsigned reply_size; /* current size of reply */ unsigned first_offset; /* offset into mapping[first] */ unsigned last_to; /* amount of mapping[last] */ - unsigned offset; /* offset into received data store */ unsigned char unmarshall; /* unmarshalling phase */ bool incoming; /* T if incoming call */ bool send_pages; /* T if data from mapping should be sent */ + bool need_attention; /* T if RxRPC poked us */ u16 service_id; /* RxRPC service ID to call */ __be16 port; /* target UDP port */ __be32 operation_ID; /* operation ID for an incoming call */ @@ -129,8 +124,7 @@ struct afs_call_type { /* deliver request or reply data to an call * - returning an error will cause the call to be aborted */ - int (*deliver)(struct afs_call *call, struct sk_buff *skb, - bool last); + int (*deliver)(struct afs_call *call); /* map an abort code to an error number */ int (*abort_to_error)(u32 abort_code); @@ -612,27 +606,18 @@ extern struct socket *afs_socket; extern int afs_open_socket(void); extern void afs_close_socket(void); -extern void afs_data_consumed(struct afs_call *, struct sk_buff *); extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, const struct afs_wait_mode *); extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *, size_t, size_t); extern void afs_flat_call_destructor(struct afs_call *); -extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool); extern void afs_send_empty_reply(struct afs_call *); extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); -extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *, - size_t); +extern int afs_extract_data(struct afs_call *, void *, size_t, bool); -static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb, - bool last) +static inline int afs_transfer_reply(struct afs_call *call) { - if (skb->len > 0) - return -EBADMSG; - afs_data_consumed(call, skb); - if (!last) - return -EAGAIN; - return 0; + return afs_extract_data(call, call->buffer, call->reply_max, false); } /* diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 7b0d18900f50..244896baf241 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -19,31 +19,31 @@ struct socket *afs_socket; /* my RxRPC socket */ static struct workqueue_struct *afs_async_calls; static atomic_t afs_outstanding_calls; -static atomic_t afs_outstanding_skbs; -static void afs_wake_up_call_waiter(struct afs_call *); +static void afs_free_call(struct afs_call *); +static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); static int afs_wait_for_call_to_complete(struct afs_call *); -static void afs_wake_up_async_call(struct afs_call *); +static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static int afs_dont_wait_for_call_to_complete(struct afs_call *); -static void afs_process_async_call(struct afs_call *); -static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *); -static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool); +static void afs_process_async_call(struct work_struct *); +static void afs_rx_new_call(struct sock *); +static int afs_deliver_cm_op_id(struct afs_call *); /* synchronous call management */ const struct afs_wait_mode afs_sync_call = { - .rx_wakeup = afs_wake_up_call_waiter, + .notify_rx = afs_wake_up_call_waiter, .wait = afs_wait_for_call_to_complete, }; /* asynchronous call management */ const struct afs_wait_mode afs_async_call = { - .rx_wakeup = afs_wake_up_async_call, + .notify_rx = afs_wake_up_async_call, .wait = afs_dont_wait_for_call_to_complete, }; /* asynchronous incoming call management */ static const struct afs_wait_mode afs_async_incoming_call = { - .rx_wakeup = afs_wake_up_async_call, + .notify_rx = afs_wake_up_async_call, }; /* asynchronous incoming call initial processing */ @@ -55,16 +55,8 @@ static const struct afs_call_type afs_RXCMxxxx = { static void afs_collect_incoming_call(struct work_struct *); -static struct sk_buff_head afs_incoming_calls; static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call); -static void afs_async_workfn(struct work_struct *work) -{ - struct afs_call *call = container_of(work, struct afs_call, async_work); - - call->async_workfn(call); -} - static int afs_wait_atomic_t(atomic_t *p) { schedule(); @@ -83,8 +75,6 @@ int afs_open_socket(void) _enter(""); - skb_queue_head_init(&afs_incoming_calls); - ret = -ENOMEM; afs_async_calls = create_singlethread_workqueue("kafsd"); if (!afs_async_calls) @@ -110,12 +100,12 @@ int afs_open_socket(void) if (ret < 0) goto error_2; + rxrpc_kernel_new_call_notification(socket, afs_rx_new_call); + ret = kernel_listen(socket, INT_MAX); if (ret < 0) goto error_2; - rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor); - afs_socket = socket; _leave(" = 0"); return 0; @@ -136,51 +126,19 @@ void afs_close_socket(void) { _enter(""); + _debug("outstanding %u", atomic_read(&afs_outstanding_calls)); wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t, TASK_UNINTERRUPTIBLE); _debug("no outstanding calls"); + flush_workqueue(afs_async_calls); sock_release(afs_socket); _debug("dework"); destroy_workqueue(afs_async_calls); - - ASSERTCMP(atomic_read(&afs_outstanding_skbs), ==, 0); _leave(""); } -/* - * Note that the data in a socket buffer is now consumed. - */ -void afs_data_consumed(struct afs_call *call, struct sk_buff *skb) -{ - if (!skb) { - _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs)); - dump_stack(); - } else { - _debug("DLVR %p{%u} [%d]", - skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - rxrpc_kernel_data_consumed(call->rxcall, skb); - } -} - -/* - * free a socket buffer - */ -static void afs_free_skb(struct sk_buff *skb) -{ - if (!skb) { - _debug("FREE NULL [%d]", atomic_read(&afs_outstanding_skbs)); - dump_stack(); - } else { - _debug("FREE %p{%u} [%d]", - skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - if (atomic_dec_return(&afs_outstanding_skbs) == -1) - BUG(); - rxrpc_kernel_free_skb(skb); - } -} - /* * free a call */ @@ -191,7 +149,6 @@ static void afs_free_call(struct afs_call *call) ASSERTCMP(call->rxcall, ==, NULL); ASSERT(!work_pending(&call->async_work)); - ASSERT(skb_queue_empty(&call->rx_queue)); ASSERT(call->type->name != NULL); kfree(call->request); @@ -227,7 +184,7 @@ static void afs_end_call(struct afs_call *call) * allocate a call with flat request and reply buffers */ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, - size_t request_size, size_t reply_size) + size_t request_size, size_t reply_max) { struct afs_call *call; @@ -241,7 +198,7 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, call->type = type; call->request_size = request_size; - call->reply_max = reply_size; + call->reply_max = reply_max; if (request_size) { call->request = kmalloc(request_size, GFP_NOFS); @@ -249,14 +206,13 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, goto nomem_free; } - if (reply_size) { - call->buffer = kmalloc(reply_size, GFP_NOFS); + if (reply_max) { + call->buffer = kmalloc(reply_max, GFP_NOFS); if (!call->buffer) goto nomem_free; } init_waitqueue_head(&call->waitq); - skb_queue_head_init(&call->rx_queue); return call; nomem_free: @@ -354,7 +310,6 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, struct msghdr msg; struct kvec iov[1]; int ret; - struct sk_buff *skb; _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); @@ -366,8 +321,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, atomic_read(&afs_outstanding_calls)); call->wait_mode = wait_mode; - call->async_workfn = afs_process_async_call; - INIT_WORK(&call->async_work, afs_async_workfn); + INIT_WORK(&call->async_work, afs_process_async_call); memset(&srx, 0, sizeof(srx)); srx.srx_family = AF_RXRPC; @@ -380,7 +334,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, /* create a call */ rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key, - (unsigned long) call, gfp); + (unsigned long) call, gfp, + wait_mode->notify_rx); call->key = NULL; if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); @@ -423,150 +378,84 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, error_do_abort: rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT); - while ((skb = skb_dequeue(&call->rx_queue))) - afs_free_skb(skb); error_kill_call: afs_end_call(call); _leave(" = %d", ret); return ret; } -/* - * Handles intercepted messages that were arriving in the socket's Rx queue. - * - * Called from the AF_RXRPC call processor in waitqueue process context. For - * each call, it is guaranteed this will be called in order of packet to be - * delivered. - */ -static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID, - struct sk_buff *skb) -{ - struct afs_call *call = (struct afs_call *) user_call_ID; - - _enter("%p,,%u", call, skb->mark); - - _debug("ICPT %p{%u} [%d]", - skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - - ASSERTCMP(sk, ==, afs_socket->sk); - atomic_inc(&afs_outstanding_skbs); - - if (!call) { - /* its an incoming call for our callback service */ - skb_queue_tail(&afs_incoming_calls, skb); - queue_work(afs_wq, &afs_collect_incoming_call_work); - } else { - /* route the messages directly to the appropriate call */ - skb_queue_tail(&call->rx_queue, skb); - call->wait_mode->rx_wakeup(call); - } - - _leave(""); -} - /* * deliver messages to a call */ static void afs_deliver_to_call(struct afs_call *call) { - struct sk_buff *skb; - bool last; u32 abort_code; int ret; - _enter(""); - - while ((call->state == AFS_CALL_AWAIT_REPLY || - call->state == AFS_CALL_AWAIT_OP_ID || - call->state == AFS_CALL_AWAIT_REQUEST || - call->state == AFS_CALL_AWAIT_ACK) && - (skb = skb_dequeue(&call->rx_queue))) { - switch (skb->mark) { - case RXRPC_SKB_MARK_DATA: - _debug("Rcv DATA"); - last = rxrpc_kernel_is_data_last(skb); - ret = call->type->deliver(call, skb, last); - switch (ret) { - case -EAGAIN: - if (last) { - _debug("short data"); - goto unmarshal_error; - } - break; - case 0: - ASSERT(last); - if (call->state == AFS_CALL_AWAIT_REPLY) - call->state = AFS_CALL_COMPLETE; - break; - case -ENOTCONN: - abort_code = RX_CALL_DEAD; - goto do_abort; - case -ENOTSUPP: - abort_code = RX_INVALID_OPERATION; - goto do_abort; - default: - unmarshal_error: - abort_code = RXGEN_CC_UNMARSHAL; - if (call->state != AFS_CALL_AWAIT_REPLY) - abort_code = RXGEN_SS_UNMARSHAL; - do_abort: - rxrpc_kernel_abort_call(afs_socket, - call->rxcall, - abort_code); - call->error = ret; - call->state = AFS_CALL_ERROR; - break; + _enter("%s", call->type->name); + + while (call->state == AFS_CALL_AWAIT_REPLY || + call->state == AFS_CALL_AWAIT_OP_ID || + call->state == AFS_CALL_AWAIT_REQUEST || + call->state == AFS_CALL_AWAIT_ACK + ) { + if (call->state == AFS_CALL_AWAIT_ACK) { + size_t offset = 0; + ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, + NULL, 0, &offset, false, + &call->abort_code); + if (ret == -EINPROGRESS || ret == -EAGAIN) + return; + if (ret == 1) { + call->state = AFS_CALL_COMPLETE; + goto done; } - break; - case RXRPC_SKB_MARK_FINAL_ACK: - _debug("Rcv ACK"); - call->state = AFS_CALL_COMPLETE; - break; - case RXRPC_SKB_MARK_BUSY: - _debug("Rcv BUSY"); - call->error = -EBUSY; - call->state = AFS_CALL_BUSY; - break; - case RXRPC_SKB_MARK_REMOTE_ABORT: - abort_code = rxrpc_kernel_get_abort_code(skb); - call->error = call->type->abort_to_error(abort_code); - call->state = AFS_CALL_ABORTED; - _debug("Rcv ABORT %u -> %d", abort_code, call->error); - break; - case RXRPC_SKB_MARK_LOCAL_ABORT: - abort_code = rxrpc_kernel_get_abort_code(skb); - call->error = call->type->abort_to_error(abort_code); - call->state = AFS_CALL_ABORTED; - _debug("Loc ABORT %u -> %d", abort_code, call->error); - break; - case RXRPC_SKB_MARK_NET_ERROR: - call->error = -rxrpc_kernel_get_error_number(skb); - call->state = AFS_CALL_ERROR; - _debug("Rcv NET ERROR %d", call->error); - break; - case RXRPC_SKB_MARK_LOCAL_ERROR: - call->error = -rxrpc_kernel_get_error_number(skb); - call->state = AFS_CALL_ERROR; - _debug("Rcv LOCAL ERROR %d", call->error); - break; - default: - BUG(); - break; + return; } - afs_free_skb(skb); - } - - /* make sure the queue is empty if the call is done with (we might have - * aborted the call early because of an unmarshalling error) */ - if (call->state >= AFS_CALL_COMPLETE) { - while ((skb = skb_dequeue(&call->rx_queue))) - afs_free_skb(skb); - if (call->incoming) - afs_end_call(call); + ret = call->type->deliver(call); + switch (ret) { + case 0: + if (call->state == AFS_CALL_AWAIT_REPLY) + call->state = AFS_CALL_COMPLETE; + goto done; + case -EINPROGRESS: + case -EAGAIN: + goto out; + case -ENOTCONN: + abort_code = RX_CALL_DEAD; + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + abort_code); + goto do_abort; + case -ENOTSUPP: + abort_code = RX_INVALID_OPERATION; + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + abort_code); + goto do_abort; + case -ENODATA: + case -EBADMSG: + case -EMSGSIZE: + default: + abort_code = RXGEN_CC_UNMARSHAL; + if (call->state != AFS_CALL_AWAIT_REPLY) + abort_code = RXGEN_SS_UNMARSHAL; + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + abort_code); + goto do_abort; + } } +done: + if (call->state == AFS_CALL_COMPLETE && call->incoming) + afs_end_call(call); +out: _leave(""); + return; + +do_abort: + call->error = ret; + call->state = AFS_CALL_COMPLETE; + goto done; } /* @@ -574,7 +463,6 @@ static void afs_deliver_to_call(struct afs_call *call) */ static int afs_wait_for_call_to_complete(struct afs_call *call) { - struct sk_buff *skb; int ret; DECLARE_WAITQUEUE(myself, current); @@ -586,14 +474,15 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) set_current_state(TASK_INTERRUPTIBLE); /* deliver any messages that are in the queue */ - if (!skb_queue_empty(&call->rx_queue)) { + if (call->state < AFS_CALL_COMPLETE && call->need_attention) { + call->need_attention = false; __set_current_state(TASK_RUNNING); afs_deliver_to_call(call); continue; } ret = call->error; - if (call->state >= AFS_CALL_COMPLETE) + if (call->state == AFS_CALL_COMPLETE) break; ret = -EINTR; if (signal_pending(current)) @@ -607,9 +496,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) /* kill the call */ if (call->state < AFS_CALL_COMPLETE) { _debug("call incomplete"); - rxrpc_kernel_abort_call(afs_socket, call->rxcall, RX_CALL_DEAD); - while ((skb = skb_dequeue(&call->rx_queue))) - afs_free_skb(skb); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + RX_CALL_DEAD); } _debug("call complete"); @@ -621,17 +509,24 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) /* * wake up a waiting call */ -static void afs_wake_up_call_waiter(struct afs_call *call) +static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) { + struct afs_call *call = (struct afs_call *)call_user_ID; + + call->need_attention = true; wake_up(&call->waitq); } /* * wake up an asynchronous call */ -static void afs_wake_up_async_call(struct afs_call *call) +static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) { - _enter(""); + struct afs_call *call = (struct afs_call *)call_user_ID; + + call->need_attention = true; queue_work(afs_async_calls, &call->async_work); } @@ -649,8 +544,10 @@ static int afs_dont_wait_for_call_to_complete(struct afs_call *call) /* * delete an asynchronous call */ -static void afs_delete_async_call(struct afs_call *call) +static void afs_delete_async_call(struct work_struct *work) { + struct afs_call *call = container_of(work, struct afs_call, async_work); + _enter(""); afs_free_call(call); @@ -660,17 +557,19 @@ static void afs_delete_async_call(struct afs_call *call) /* * perform processing on an asynchronous call - * - on a multiple-thread workqueue this work item may try to run on several - * CPUs at the same time */ -static void afs_process_async_call(struct afs_call *call) +static void afs_process_async_call(struct work_struct *work) { + struct afs_call *call = container_of(work, struct afs_call, async_work); + _enter(""); - if (!skb_queue_empty(&call->rx_queue)) + if (call->state < AFS_CALL_COMPLETE && call->need_attention) { + call->need_attention = false; afs_deliver_to_call(call); + } - if (call->state >= AFS_CALL_COMPLETE && call->wait_mode) { + if (call->state == AFS_CALL_COMPLETE && call->wait_mode) { if (call->wait_mode->async_complete) call->wait_mode->async_complete(call->reply, call->error); @@ -681,45 +580,13 @@ static void afs_process_async_call(struct afs_call *call) /* we can't just delete the call because the work item may be * queued */ - call->async_workfn = afs_delete_async_call; + call->async_work.func = afs_delete_async_call; queue_work(afs_async_calls, &call->async_work); } _leave(""); } -/* - * Empty a socket buffer into a flat reply buffer. - */ -int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last) -{ - size_t len = skb->len; - - if (len > call->reply_max - call->reply_size) { - _leave(" = -EBADMSG [%zu > %u]", - len, call->reply_max - call->reply_size); - return -EBADMSG; - } - - if (len > 0) { - if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, - len) < 0) - BUG(); - call->reply_size += len; - } - - afs_data_consumed(call, skb); - if (!last) - return -EAGAIN; - - if (call->reply_size != call->reply_max) { - _leave(" = -EBADMSG [%u != %u]", - call->reply_size, call->reply_max); - return -EBADMSG; - } - return 0; -} - /* * accept the backlog of incoming calls */ @@ -727,14 +594,10 @@ static void afs_collect_incoming_call(struct work_struct *work) { struct rxrpc_call *rxcall; struct afs_call *call = NULL; - struct sk_buff *skb; - - while ((skb = skb_dequeue(&afs_incoming_calls))) { - _debug("new call"); - /* don't need the notification */ - afs_free_skb(skb); + _enter(""); + do { if (!call) { call = kzalloc(sizeof(struct afs_call), GFP_KERNEL); if (!call) { @@ -742,12 +605,10 @@ static void afs_collect_incoming_call(struct work_struct *work) return; } - call->async_workfn = afs_process_async_call; - INIT_WORK(&call->async_work, afs_async_workfn); + INIT_WORK(&call->async_work, afs_process_async_call); call->wait_mode = &afs_async_incoming_call; call->type = &afs_RXCMxxxx; init_waitqueue_head(&call->waitq); - skb_queue_head_init(&call->rx_queue); call->state = AFS_CALL_AWAIT_OP_ID; _debug("CALL %p{%s} [%d]", @@ -757,46 +618,47 @@ static void afs_collect_incoming_call(struct work_struct *work) } rxcall = rxrpc_kernel_accept_call(afs_socket, - (unsigned long) call); + (unsigned long)call, + afs_wake_up_async_call); if (!IS_ERR(rxcall)) { call->rxcall = rxcall; + call->need_attention = true; + queue_work(afs_async_calls, &call->async_work); call = NULL; } - } + } while (!call); if (call) afs_free_call(call); } +/* + * Notification of an incoming call. + */ +static void afs_rx_new_call(struct sock *sk) +{ + queue_work(afs_wq, &afs_collect_incoming_call_work); +} + /* * Grab the operation ID from an incoming cache manager call. The socket * buffer is discarded on error or if we don't yet have sufficient data. */ -static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cm_op_id(struct afs_call *call) { - size_t len = skb->len; - void *oibuf = (void *) &call->operation_ID; + int ret; - _enter("{%u},{%zu},%d", call->offset, len, last); + _enter("{%zu}", call->offset); ASSERTCMP(call->offset, <, 4); /* the operation ID forms the first four bytes of the request data */ - len = min_t(size_t, len, 4 - call->offset); - if (skb_copy_bits(skb, 0, oibuf + call->offset, len) < 0) - BUG(); - if (!pskb_pull(skb, len)) - BUG(); - call->offset += len; - - if (call->offset < 4) { - afs_data_consumed(call, skb); - _leave(" = -EAGAIN"); - return -EAGAIN; - } + ret = afs_extract_data(call, &call->operation_ID, 4, true); + if (ret < 0) + return ret; call->state = AFS_CALL_AWAIT_REQUEST; + call->offset = 0; /* ask the cache manager to route the call (it'll change the call type * if successful) */ @@ -805,7 +667,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, /* pass responsibility for the remainer of this message off to the * cache manager op */ - return call->type->deliver(call, skb, last); + return call->type->deliver(call); } /* @@ -881,25 +743,40 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) /* * Extract a piece of data from the received data socket buffers. */ -int afs_extract_data(struct afs_call *call, struct sk_buff *skb, - bool last, void *buf, size_t count) +int afs_extract_data(struct afs_call *call, void *buf, size_t count, + bool want_more) { - size_t len = skb->len; + int ret; - _enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count); + _enter("{%s,%zu},,%zu,%d", + call->type->name, call->offset, count, want_more); - ASSERTCMP(call->offset, <, count); + ASSERTCMP(call->offset, <=, count); - len = min_t(size_t, len, count - call->offset); - if (skb_copy_bits(skb, 0, buf + call->offset, len) < 0 || - !pskb_pull(skb, len)) - BUG(); - call->offset += len; + ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, + buf, count, &call->offset, + want_more, &call->abort_code); + if (ret == 0 || ret == -EAGAIN) + return ret; - if (call->offset < count) { - afs_data_consumed(call, skb); - _leave(" = -EAGAIN"); - return -EAGAIN; + if (ret == 1) { + switch (call->state) { + case AFS_CALL_AWAIT_REPLY: + call->state = AFS_CALL_COMPLETE; + break; + case AFS_CALL_AWAIT_REQUEST: + call->state = AFS_CALL_REPLYING; + break; + default: + break; + } + return 0; } - return 0; + + if (ret == -ECONNABORTED) + call->error = call->type->abort_to_error(call->abort_code); + else + call->error = ret; + call->state = AFS_CALL_COMPLETE; + return ret; } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index f94d1abdc3eb..94bcd97d22b8 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -58,17 +58,16 @@ static int afs_vl_abort_to_error(u32 abort_code) /* * deliver reply data to a VL.GetEntryByXXX call */ -static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) { struct afs_cache_vlocation *entry; __be32 *bp; u32 tmp; int loop, ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index f8d8079dc058..b4b6a3664dda 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -12,7 +12,6 @@ #ifndef _NET_RXRPC_H #define _NET_RXRPC_H -#include #include struct key; @@ -20,38 +19,26 @@ struct sock; struct socket; struct rxrpc_call; -/* - * the mark applied to socket buffers that may be intercepted - */ -enum rxrpc_skb_mark { - RXRPC_SKB_MARK_DATA, /* data message */ - RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */ - RXRPC_SKB_MARK_BUSY, /* server busy message */ - RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */ - RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */ - RXRPC_SKB_MARK_NET_ERROR, /* network error message */ - RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */ - RXRPC_SKB_MARK_NEW_CALL, /* local error message */ -}; +typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, + unsigned long); +typedef void (*rxrpc_notify_new_call_t)(struct sock *); -typedef void (*rxrpc_interceptor_t)(struct sock *, unsigned long, - struct sk_buff *); -void rxrpc_kernel_intercept_rx_messages(struct socket *, rxrpc_interceptor_t); +void rxrpc_kernel_new_call_notification(struct socket *, + rxrpc_notify_new_call_t); struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, struct sockaddr_rxrpc *, struct key *, unsigned long, - gfp_t); + gfp_t, + rxrpc_notify_rx_t); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t); -void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); +int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, + void *, size_t, size_t *, bool, u32 *); void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); -bool rxrpc_kernel_is_data_last(struct sk_buff *); -u32 rxrpc_kernel_get_abort_code(struct sk_buff *); -int rxrpc_kernel_get_error_number(struct sk_buff *); -void rxrpc_kernel_free_skb(struct sk_buff *); -struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long); +struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long, + rxrpc_notify_rx_t); int rxrpc_kernel_reject_call(struct socket *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index e07c91acd904..32d544995dda 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -231,6 +231,8 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @srx: The address of the peer to contact * @key: The security context to use (defaults to socket setting) * @user_call_ID: The ID to use + * @gfp: The allocation constraints + * @notify_rx: Where to send notifications instead of socket queue * * Allow a kernel service to begin a call on the nominated socket. This just * sets up all the internal tracking structures and allocates connection and @@ -243,7 +245,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct sockaddr_rxrpc *srx, struct key *key, unsigned long user_call_ID, - gfp_t gfp) + gfp_t gfp, + rxrpc_notify_rx_t notify_rx) { struct rxrpc_conn_parameters cp; struct rxrpc_call *call; @@ -270,6 +273,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.exclusive = false; cp.service_id = srx->srx_service; call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp); + if (!IS_ERR(call)) + call->notify_rx = notify_rx; release_sock(&rx->sk); _leave(" = %p", call); @@ -289,31 +294,27 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); rxrpc_remove_user_ID(rxrpc_sk(sock->sk), call); + rxrpc_purge_queue(&call->knlrecv_queue); rxrpc_put_call(call); } EXPORT_SYMBOL(rxrpc_kernel_end_call); /** - * rxrpc_kernel_intercept_rx_messages - Intercept received RxRPC messages + * rxrpc_kernel_new_call_notification - Get notifications of new calls * @sock: The socket to intercept received messages on - * @interceptor: The function to pass the messages to + * @notify_new_call: Function to be called when new calls appear * - * Allow a kernel service to intercept messages heading for the Rx queue on an - * RxRPC socket. They get passed to the specified function instead. - * @interceptor should free the socket buffers it is given. @interceptor is - * called with the socket receive queue spinlock held and softirqs disabled - - * this ensures that the messages will be delivered in the right order. + * Allow a kernel service to be given notifications about new calls. */ -void rxrpc_kernel_intercept_rx_messages(struct socket *sock, - rxrpc_interceptor_t interceptor) +void rxrpc_kernel_new_call_notification( + struct socket *sock, + rxrpc_notify_new_call_t notify_new_call) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - _enter(""); - rx->interceptor = interceptor; + rx->notify_new_call = notify_new_call; } - -EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages); +EXPORT_SYMBOL(rxrpc_kernel_new_call_notification); /* * connect an RxRPC socket diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0c320b2b7b43..4e86d248dc5e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -39,6 +39,20 @@ struct rxrpc_crypt { struct rxrpc_connection; +/* + * Mark applied to socket buffers. + */ +enum rxrpc_skb_mark { + RXRPC_SKB_MARK_DATA, /* data message */ + RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */ + RXRPC_SKB_MARK_BUSY, /* server busy message */ + RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */ + RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */ + RXRPC_SKB_MARK_NET_ERROR, /* network error message */ + RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */ + RXRPC_SKB_MARK_NEW_CALL, /* local error message */ +}; + /* * sk_state for RxRPC sockets */ @@ -57,7 +71,7 @@ enum { struct rxrpc_sock { /* WARNING: sk has to be the first member */ struct sock sk; - rxrpc_interceptor_t interceptor; /* kernel service Rx interceptor function */ + rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ struct rxrpc_local *local; /* local endpoint */ struct list_head listen_link; /* link in the local endpoint's listen list */ struct list_head secureq; /* calls awaiting connection security clearance */ @@ -367,6 +381,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */ RXRPC_CALL_IS_SERVICE, /* Call is service call */ RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ + RXRPC_CALL_RX_NO_MORE, /* Don't indicate MSG_MORE from recvmsg() */ }; /* @@ -441,6 +456,7 @@ struct rxrpc_call { struct timer_list resend_timer; /* Tx resend timer */ struct work_struct destroyer; /* call destroyer */ struct work_struct processor; /* packet processor and ACK generator */ + rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ struct list_head chan_wait_link; /* Link in conn->waiting_calls */ struct hlist_node error_link; /* link in error distribution list */ @@ -448,6 +464,7 @@ struct rxrpc_call { struct rb_node sock_node; /* node in socket call tree */ struct sk_buff_head rx_queue; /* received packets */ struct sk_buff_head rx_oos_queue; /* packets received out of sequence */ + struct sk_buff_head knlrecv_queue; /* Queue for kernel_recv [TODO: replace this] */ struct sk_buff *tx_pending; /* Tx socket buffer being filled */ wait_queue_head_t waitq; /* Wait queue for channel or Tx */ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */ @@ -512,7 +529,8 @@ extern struct workqueue_struct *rxrpc_workqueue; * call_accept.c */ void rxrpc_accept_incoming_calls(struct rxrpc_local *); -struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long); +struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, + rxrpc_notify_rx_t); int rxrpc_reject_call(struct rxrpc_sock *); /* @@ -874,6 +892,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *); /* * skbuff.c */ +void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_packet_destructor(struct sk_buff *); void rxrpc_new_skb(struct sk_buff *); void rxrpc_see_skb(struct sk_buff *); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 03af88fe798b..68a439e30df1 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -286,7 +286,8 @@ security_mismatch: * - assign the user call ID to the call at the front of the queue */ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, - unsigned long user_call_ID) + unsigned long user_call_ID, + rxrpc_notify_rx_t notify_rx) { struct rxrpc_call *call; struct rb_node *parent, **pp; @@ -340,6 +341,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, } /* formalise the acceptance */ + call->notify_rx = notify_rx; call->user_call_ID = user_call_ID; rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); @@ -437,17 +439,20 @@ out: * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call * @sock: The socket on which the impending call is waiting * @user_call_ID: The tag to attach to the call + * @notify_rx: Where to send notifications instead of socket queue * * Allow a kernel service to accept an incoming call, assuming the incoming - * call is still valid. + * call is still valid. The caller should immediately trigger their own + * notification as there must be data waiting. */ struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock, - unsigned long user_call_ID) + unsigned long user_call_ID, + rxrpc_notify_rx_t notify_rx) { struct rxrpc_call *call; _enter(",%lx", user_call_ID); - call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID); + call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID, notify_rx); _leave(" = %p", call); return call; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 104ee8b1de06..516d8ea82f02 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -136,6 +136,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) INIT_LIST_HEAD(&call->accept_link); skb_queue_head_init(&call->rx_queue); skb_queue_head_init(&call->rx_oos_queue); + skb_queue_head_init(&call->knlrecv_queue); init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); rwlock_init(&call->state_lock); @@ -552,8 +553,6 @@ void rxrpc_release_call(struct rxrpc_call *call) spin_lock_bh(&call->lock); } spin_unlock_bh(&call->lock); - - ASSERTCMP(call->state, !=, RXRPC_CALL_COMPLETE); } del_timer_sync(&call->resend_timer); @@ -682,6 +681,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); rxrpc_purge_queue(&call->rx_queue); + rxrpc_purge_queue(&call->knlrecv_queue); rxrpc_put_peer(call->peer); kmem_cache_free(rxrpc_call_jar, call); } @@ -737,6 +737,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call) rxrpc_purge_queue(&call->rx_queue); ASSERT(skb_queue_empty(&call->rx_oos_queue)); + rxrpc_purge_queue(&call->knlrecv_queue); sock_put(&call->socket->sk); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index bc9b05938ff5..9db90f4f768d 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -282,7 +282,6 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, case RXRPC_PACKET_TYPE_DATA: case RXRPC_PACKET_TYPE_ACK: rxrpc_conn_retransmit_call(conn, skb); - rxrpc_free_skb(skb); return 0; case RXRPC_PACKET_TYPE_ABORT: diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 86bea9ad6c3d..72f016cfaaf5 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -90,9 +90,15 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, } /* allow interception by a kernel service */ - if (rx->interceptor) { - rx->interceptor(sk, call->user_call_ID, skb); + if (skb->mark == RXRPC_SKB_MARK_NEW_CALL && + rx->notify_new_call) { spin_unlock_bh(&sk->sk_receive_queue.lock); + skb_queue_tail(&call->knlrecv_queue, skb); + rx->notify_new_call(&rx->sk); + } else if (call->notify_rx) { + spin_unlock_bh(&sk->sk_receive_queue.lock); + skb_queue_tail(&call->knlrecv_queue, skb); + call->notify_rx(&rx->sk, call, call->user_call_ID); } else { _net("post skb %p", skb); __skb_queue_tail(&sk->sk_receive_queue, skb); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index b1e708a12151..817ae801e769 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -190,7 +190,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (cmd == RXRPC_CMD_ACCEPT) { if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) return -EINVAL; - call = rxrpc_accept_call(rx, user_call_ID); + call = rxrpc_accept_call(rx, user_call_ID, NULL); if (IS_ERR(call)) return PTR_ERR(call); rxrpc_put_call(call); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index c9b38c7fb448..0ab7b334bab1 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -369,55 +369,178 @@ wait_error: } -/** - * rxrpc_kernel_is_data_last - Determine if data message is last one - * @skb: Message holding data +/* + * Deliver messages to a call. This keeps processing packets until the buffer + * is filled and we find either more DATA (returns 0) or the end of the DATA + * (returns 1). If more packets are required, it returns -EAGAIN. * - * Determine if data message is last one for the parent call. + * TODO: Note that this is hacked in at the moment and will be replaced. */ -bool rxrpc_kernel_is_data_last(struct sk_buff *skb) +static int temp_deliver_data(struct socket *sock, struct rxrpc_call *call, + struct iov_iter *iter, size_t size, + size_t *_offset) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + size_t remain; + int ret, copy; + + _enter("%d", call->debug_id); + +next: + local_bh_disable(); + skb = skb_dequeue(&call->knlrecv_queue); + local_bh_enable(); + if (!skb) { + if (test_bit(RXRPC_CALL_RX_NO_MORE, &call->flags)) + return 1; + _leave(" = -EAGAIN [empty]"); + return -EAGAIN; + } - ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_DATA); + sp = rxrpc_skb(skb); + _debug("dequeued %p %u/%zu", skb, sp->offset, size); - return sp->hdr.flags & RXRPC_LAST_PACKET; -} + switch (skb->mark) { + case RXRPC_SKB_MARK_DATA: + remain = size - *_offset; + if (remain > 0) { + copy = skb->len - sp->offset; + if (copy > remain) + copy = remain; + ret = skb_copy_datagram_iter(skb, sp->offset, iter, + copy); + if (ret < 0) + goto requeue_and_leave; -EXPORT_SYMBOL(rxrpc_kernel_is_data_last); + /* handle piecemeal consumption of data packets */ + sp->offset += copy; + *_offset += copy; + } -/** - * rxrpc_kernel_get_abort_code - Get the abort code from an RxRPC abort message - * @skb: Message indicating an abort - * - * Get the abort code from an RxRPC abort message. - */ -u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + if (sp->offset < skb->len) + goto partially_used_skb; + + /* We consumed the whole packet */ + ASSERTCMP(sp->offset, ==, skb->len); + if (sp->hdr.flags & RXRPC_LAST_PACKET) + set_bit(RXRPC_CALL_RX_NO_MORE, &call->flags); + rxrpc_kernel_data_consumed(call, skb); + rxrpc_free_skb(skb); + goto next; - switch (skb->mark) { - case RXRPC_SKB_MARK_REMOTE_ABORT: - case RXRPC_SKB_MARK_LOCAL_ABORT: - return sp->call->abort_code; default: - BUG(); + rxrpc_free_skb(skb); + goto next; } -} -EXPORT_SYMBOL(rxrpc_kernel_get_abort_code); +partially_used_skb: + ASSERTCMP(*_offset, ==, size); + ret = 0; +requeue_and_leave: + skb_queue_head(&call->knlrecv_queue, skb); + return ret; +} /** - * rxrpc_kernel_get_error - Get the error number from an RxRPC error message - * @skb: Message indicating an error + * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info + * @sock: The socket that the call exists on + * @call: The call to send data through + * @buf: The buffer to receive into + * @size: The size of the buffer, including data already read + * @_offset: The running offset into the buffer. + * @want_more: True if more data is expected to be read + * @_abort: Where the abort code is stored if -ECONNABORTED is returned + * + * Allow a kernel service to receive data and pick up information about the + * state of a call. Returns 0 if got what was asked for and there's more + * available, 1 if we got what was asked for and we're at the end of the data + * and -EAGAIN if we need more data. + * + * Note that we may return -EAGAIN to drain empty packets at the end of the + * data, even if we've already copied over the requested data. * - * Get the error number from an RxRPC error message. + * This function adds the amount it transfers to *_offset, so this should be + * precleared as appropriate. Note that the amount remaining in the buffer is + * taken to be size - *_offset. + * + * *_abort should also be initialised to 0. */ -int rxrpc_kernel_get_error_number(struct sk_buff *skb) +int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, + void *buf, size_t size, size_t *_offset, + bool want_more, u32 *_abort) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct iov_iter iter; + struct kvec iov; + int ret; - return sp->error; -} + _enter("{%d,%s},%zu,%d", + call->debug_id, rxrpc_call_states[call->state], size, want_more); + + ASSERTCMP(*_offset, <=, size); + ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING); -EXPORT_SYMBOL(rxrpc_kernel_get_error_number); + iov.iov_base = buf + *_offset; + iov.iov_len = size - *_offset; + iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset); + + lock_sock(sock->sk); + + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + case RXRPC_CALL_SERVER_RECV_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + ret = temp_deliver_data(sock, call, &iter, size, _offset); + if (ret < 0) + goto out; + + /* We can only reach here with a partially full buffer if we + * have reached the end of the data. We must otherwise have a + * full buffer or have been given -EAGAIN. + */ + if (ret == 1) { + if (*_offset < size) + goto short_data; + if (!want_more) + goto read_phase_complete; + ret = 0; + goto out; + } + + if (!want_more) + goto excess_data; + goto out; + + case RXRPC_CALL_COMPLETE: + goto call_complete; + + default: + *_offset = 0; + ret = -EINPROGRESS; + goto out; + } + +read_phase_complete: + ret = 1; +out: + release_sock(sock->sk); + _leave(" = %d [%zu,%d]", ret, *_offset, *_abort); + return ret; + +short_data: + ret = -EBADMSG; + goto out; +excess_data: + ret = -EMSGSIZE; + goto out; +call_complete: + *_abort = call->abort_code; + ret = call->error; + if (call->completion == RXRPC_CALL_SUCCEEDED) { + ret = 1; + if (size > 0) + ret = -ECONNRESET; + } + goto out; +} +EXPORT_SYMBOL(rxrpc_kernel_recv_data); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 20529205bb8c..9752f8b1fdd0 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -127,7 +127,6 @@ void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb) call->rx_data_recv = sp->hdr.seq; rxrpc_hard_ACK_data(call, skb); } -EXPORT_SYMBOL(rxrpc_kernel_data_consumed); /* * Destroy a packet that has an RxRPC control buffer -- cgit v1.2.3 From 66fdd05e7a85564f86d9b220de946aa98e8bc048 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Wed, 31 Aug 2016 11:16:22 +0800 Subject: rps: flow_dissector: Add the const for the parameter of flow_keys_have_l4 Add the const for the parameter of flow_keys_have_l4 for the readability. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index f266b512c3bd..d9534927d93b 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -183,7 +183,7 @@ struct flow_keys_digest { void make_flow_keys_digest(struct flow_keys_digest *digest, const struct flow_keys *flow); -static inline bool flow_keys_have_l4(struct flow_keys *keys) +static inline bool flow_keys_have_l4(const struct flow_keys *keys) { return (keys->ports.ports || keys->tags.flow_label); } -- cgit v1.2.3 From d297653dd6f07afbe7e6c702a4bcd7615680002e Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 30 Aug 2016 21:56:45 -0700 Subject: rtnetlink: fdb dump: optimize by saving last interface markers fdb dumps spanning multiple skb's currently restart from the first interface again for every skb. This results in unnecessary iterations on the already visited interfaces and their fdb entries. In large scale setups, we have seen this to slow down fdb dumps considerably. On a system with 30k macs we see fdb dumps spanning across more than 300 skbs. To fix the problem, this patch replaces the existing single fdb marker with three markers: netdev hash entries, netdevs and fdb index to continue where we left off instead of restarting from the first netdev. This is consistent with link dumps. In the process of fixing the performance issue, this patch also re-implements fix done by commit 472681d57a5d ("net: ndo_fdb_dump should report -EMSGSIZE to rtnl_fdb_dump") (with an internal fix from Wilson Kok) in the following ways: - change ndo_fdb_dump handlers to return error code instead of the last fdb index - use cb->args strictly for dump frag markers and not error codes. This is consistent with other dump functions. Below results were taken on a system with 1000 netdevs and 35085 fdb entries: before patch: $time bridge fdb show | wc -l 15065 real 1m11.791s user 0m0.070s sys 1m8.395s (existing code does not return all macs) after patch: $time bridge fdb show | wc -l 35085 real 0m2.017s user 0m0.113s sys 0m1.942s Signed-off-by: Roopa Prabhu Signed-off-by: Wilson Kok Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 7 +- drivers/net/vxlan.c | 14 ++- include/linux/netdevice.h | 4 +- include/linux/rtnetlink.h | 2 +- include/net/switchdev.h | 4 +- net/bridge/br_fdb.c | 23 ++--- net/bridge/br_private.h | 2 +- net/core/rtnetlink.c | 105 ++++++++++++++--------- net/switchdev/switchdev.c | 10 +-- 9 files changed, 98 insertions(+), 73 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 3ebef27e0964..3ae3968b0edf 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -432,18 +432,19 @@ static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], static int qlcnic_fdb_dump(struct sk_buff *skb, struct netlink_callback *ncb, struct net_device *netdev, - struct net_device *filter_dev, int idx) + struct net_device *filter_dev, int *idx) { struct qlcnic_adapter *adapter = netdev_priv(netdev); + int err = 0; if (!adapter->fdb_mac_learn) return ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); if ((adapter->flags & QLCNIC_ESWITCH_ENABLED) || qlcnic_sriov_check(adapter)) - idx = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); + err = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); - return idx; + return err; } static void qlcnic_82xx_cancel_idc_work(struct qlcnic_adapter *adapter) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 3f7e0d2dd21a..f605a3684a7f 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -860,20 +860,20 @@ out: /* Dump forwarding table */ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, - struct net_device *filter_dev, int idx) + struct net_device *filter_dev, int *idx) { struct vxlan_dev *vxlan = netdev_priv(dev); unsigned int h; + int err = 0; for (h = 0; h < FDB_HASH_SIZE; ++h) { struct vxlan_fdb *f; - int err; hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { struct vxlan_rdst *rd; list_for_each_entry_rcu(rd, &f->remotes, list) { - if (idx < cb->args[0]) + if (*idx < cb->args[2]) goto skip; err = vxlan_fdb_info(skb, vxlan, f, @@ -881,17 +881,15 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI, rd); - if (err < 0) { - cb->args[1] = err; + if (err < 0) goto out; - } skip: - ++idx; + *idx += 1; } } } out: - return idx; + return err; } /* Watch incoming packets to learn mapping between Ethernet address diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d122be9345c7..67bb978470dc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1031,7 +1031,7 @@ struct netdev_xdp { * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, - * int idx) + * int *idx) * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * @@ -1263,7 +1263,7 @@ struct net_device_ops { struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx); + int *idx); int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2daece8979f7..57e54847b0b9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -105,7 +105,7 @@ extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx); + int *idx); extern int ndo_dflt_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 82f5e0462021..6279f2f179ec 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -222,7 +222,7 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], u16 vid); int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, - struct net_device *filter_dev, int idx); + struct net_device *filter_dev, int *idx); void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); @@ -342,7 +342,7 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx) + int *idx) { return idx; } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index cd620fab41b0..6b43c8c88f19 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -710,24 +710,27 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx) + int *idx) { struct net_bridge *br = netdev_priv(dev); + int err = 0; int i; if (!(dev->priv_flags & IFF_EBRIDGE)) goto out; - if (!filter_dev) - idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); + if (!filter_dev) { + err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); + if (err < 0) + goto out; + } for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { - int err; - if (idx < cb->args[0]) + if (*idx < cb->args[2]) goto skip; if (filter_dev && @@ -750,17 +753,15 @@ int br_fdb_dump(struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI); - if (err < 0) { - cb->args[1] = err; - break; - } + if (err < 0) + goto out; skip: - ++idx; + *idx += 1; } } out: - return idx; + return err; } /* Update (create or replace) forwarding database entry */ diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2379b2b865c9..3d36493f4487 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -508,7 +508,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, struct net_device *fdev, int idx); + struct net_device *dev, struct net_device *fdev, int *idx); int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 318fc5231b2b..1dfca1c3f8f5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3068,7 +3068,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, seq = cb->nlh->nlmsg_seq; list_for_each_entry(ha, &list->list, list) { - if (*idx < cb->args[0]) + if (*idx < cb->args[2]) goto skip; err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0, @@ -3095,19 +3095,18 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx) + int *idx) { int err; netif_addr_lock_bh(dev); - err = nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->uc); + err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc); if (err) goto out; - nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc); + nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc); out: netif_addr_unlock_bh(dev); - cb->args[1] = err; - return idx; + return err; } EXPORT_SYMBOL(ndo_dflt_fdb_dump); @@ -3120,9 +3119,13 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) const struct net_device_ops *cops = NULL; struct ifinfomsg *ifm = nlmsg_data(cb->nlh); struct net *net = sock_net(skb->sk); + struct hlist_head *head; int brport_idx = 0; int br_idx = 0; - int idx = 0; + int h, s_h; + int idx = 0, s_idx; + int err = 0; + int fidx = 0; if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) == 0) { @@ -3140,49 +3143,71 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) ops = br_dev->netdev_ops; } - cb->args[1] = 0; - for_each_netdev(net, dev) { - if (brport_idx && (dev->ifindex != brport_idx)) - continue; + s_h = cb->args[0]; + s_idx = cb->args[1]; - if (!br_idx) { /* user did not specify a specific bridge */ - if (dev->priv_flags & IFF_BRIDGE_PORT) { - br_dev = netdev_master_upper_dev_get(dev); - cops = br_dev->netdev_ops; - } + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry(dev, head, index_hlist) { - } else { - if (dev != br_dev && - !(dev->priv_flags & IFF_BRIDGE_PORT)) + if (brport_idx && (dev->ifindex != brport_idx)) continue; - if (br_dev != netdev_master_upper_dev_get(dev) && - !(dev->priv_flags & IFF_EBRIDGE)) - continue; + if (!br_idx) { /* user did not specify a specific bridge */ + if (dev->priv_flags & IFF_BRIDGE_PORT) { + br_dev = netdev_master_upper_dev_get(dev); + cops = br_dev->netdev_ops; + } + } else { + if (dev != br_dev && + !(dev->priv_flags & IFF_BRIDGE_PORT)) + continue; - cops = ops; - } + if (br_dev != netdev_master_upper_dev_get(dev) && + !(dev->priv_flags & IFF_EBRIDGE)) + continue; + cops = ops; + } - if (dev->priv_flags & IFF_BRIDGE_PORT) { - if (cops && cops->ndo_fdb_dump) - idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev, - idx); - } - if (cb->args[1] == -EMSGSIZE) - break; + if (idx < s_idx) + goto cont; - if (dev->netdev_ops->ndo_fdb_dump) - idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, - idx); - else - idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); - if (cb->args[1] == -EMSGSIZE) - break; + if (dev->priv_flags & IFF_BRIDGE_PORT) { + if (cops && cops->ndo_fdb_dump) { + err = cops->ndo_fdb_dump(skb, cb, + br_dev, dev, + &fidx); + if (err == -EMSGSIZE) + goto out; + } + } - cops = NULL; + if (dev->netdev_ops->ndo_fdb_dump) + err = dev->netdev_ops->ndo_fdb_dump(skb, cb, + dev, NULL, + &fidx); + else + err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, + &fidx); + if (err == -EMSGSIZE) + goto out; + + cops = NULL; + + /* reset fdb offset to 0 for rest of the interfaces */ + cb->args[2] = 0; + fidx = 0; +cont: + idx++; + } } - cb->args[0] = idx; +out: + cb->args[0] = h; + cb->args[1] = idx; + cb->args[2] = fidx; + return skb->len; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 1031a0327fff..10b819308439 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1042,7 +1042,7 @@ static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj) struct nlmsghdr *nlh; struct ndmsg *ndm; - if (dump->idx < dump->cb->args[0]) + if (dump->idx < dump->cb->args[2]) goto skip; nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, @@ -1089,7 +1089,7 @@ nla_put_failure: */ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, - struct net_device *filter_dev, int idx) + struct net_device *filter_dev, int *idx) { struct switchdev_fdb_dump dump = { .fdb.obj.orig_dev = dev, @@ -1097,14 +1097,14 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, .dev = dev, .skb = skb, .cb = cb, - .idx = idx, + .idx = *idx, }; int err; err = switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb); - cb->args[1] = err; - return dump.idx; + *idx = dump.idx; + return err; } EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); -- cgit v1.2.3 From 04bed1434df256b07e78fa5deae848bba18a90f3 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 31 Aug 2016 18:06:13 -0400 Subject: net: dsa: remove ds_to_priv Access the priv member of the dsa_switch structure directly, instead of having an unnecessary helper. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 42 +++++++++++------------ drivers/net/dsa/bcm_sf2.h | 2 +- drivers/net/dsa/mv88e6060.c | 4 +-- drivers/net/dsa/mv88e6xxx/chip.c | 72 ++++++++++++++++++++-------------------- include/net/dsa.h | 5 --- 5 files changed, 60 insertions(+), 65 deletions(-) (limited to 'include/net') diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 1299104a87d4..0afc2e5a04dc 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -477,7 +477,7 @@ static int b53_fast_age_vlan(struct b53_device *dev, u16 vid) static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; unsigned int i; u16 pvlan; @@ -495,7 +495,7 @@ static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) static int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; unsigned int cpu_port = dev->cpu_port; u16 pvlan; @@ -520,7 +520,7 @@ static int b53_enable_port(struct dsa_switch *ds, int port, static void b53_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; u8 reg; /* Disable Tx/Rx for the port */ @@ -629,7 +629,7 @@ static int b53_switch_reset(struct b53_device *dev) static int b53_phy_read16(struct dsa_switch *ds, int addr, int reg) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; u16 value = 0; int ret; @@ -644,7 +644,7 @@ static int b53_phy_read16(struct dsa_switch *ds, int addr, int reg) static int b53_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; if (priv->ops->phy_write16) return priv->ops->phy_write16(priv, addr, reg, val); @@ -714,7 +714,7 @@ static unsigned int b53_get_mib_size(struct b53_device *dev) static void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; const struct b53_mib_desc *mibs = b53_get_mib(dev); unsigned int mib_size = b53_get_mib_size(dev); unsigned int i; @@ -727,7 +727,7 @@ static void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data) static void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; const struct b53_mib_desc *mibs = b53_get_mib(dev); unsigned int mib_size = b53_get_mib_size(dev); const struct b53_mib_desc *s; @@ -759,7 +759,7 @@ static void b53_get_ethtool_stats(struct dsa_switch *ds, int port, static int b53_get_sset_count(struct dsa_switch *ds) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; return b53_get_mib_size(dev); } @@ -771,7 +771,7 @@ static int b53_set_addr(struct dsa_switch *ds, u8 *addr) static int b53_setup(struct dsa_switch *ds) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; unsigned int port; int ret; @@ -802,7 +802,7 @@ static int b53_setup(struct dsa_switch *ds) static void b53_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; u8 rgmii_ctrl = 0, reg = 0, off; if (!phy_is_pseudo_fixed_link(phydev)) @@ -936,7 +936,7 @@ static int b53_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; if ((is5325(dev) || is5365(dev)) && vlan->vid_begin == 0) return -EOPNOTSUPP; @@ -953,7 +953,7 @@ static void b53_vlan_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; unsigned int cpu_port = dev->cpu_port; @@ -987,7 +987,7 @@ static void b53_vlan_add(struct dsa_switch *ds, int port, static int b53_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; unsigned int cpu_port = dev->cpu_port; struct b53_vlan *vl; @@ -1033,7 +1033,7 @@ static int b53_vlan_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_vlan *vlan, int (*cb)(struct switchdev_obj *obj)) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; u16 vid, vid_start = 0, pvid; struct b53_vlan *vl; int err = 0; @@ -1192,7 +1192,7 @@ static int b53_fdb_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; /* 5325 and 5365 require some more massaging, but could * be supported eventually @@ -1207,7 +1207,7 @@ static void b53_fdb_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; if (b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, true)) pr_err("%s: failed to add MAC address\n", __func__); @@ -1216,7 +1216,7 @@ static void b53_fdb_add(struct dsa_switch *ds, int port, static int b53_fdb_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; return b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, false); } @@ -1275,7 +1275,7 @@ static int b53_fdb_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; struct net_device *dev = ds->ports[port].netdev; struct b53_arl_entry results[2]; unsigned int count = 0; @@ -1314,7 +1314,7 @@ static int b53_fdb_dump(struct dsa_switch *ds, int port, static int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; s8 cpu_port = ds->dst->cpu_port; u16 pvlan, reg; unsigned int i; @@ -1359,7 +1359,7 @@ static int b53_br_join(struct dsa_switch *ds, int port, static void b53_br_leave(struct dsa_switch *ds, int port) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; struct net_device *bridge = dev->ports[port].bridge_dev; struct b53_vlan *vl = &dev->vlans[0]; s8 cpu_port = ds->dst->cpu_port; @@ -1410,7 +1410,7 @@ static void b53_br_leave(struct dsa_switch *ds, int port) static void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; u8 hw_state, cur_hw_state; u8 reg; diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index afe56686b3d7..46c4ea796574 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -101,7 +101,7 @@ struct bcm_sf2_priv { static inline struct bcm_sf2_priv *bcm_sf2_to_priv(struct dsa_switch *ds) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; return dev->priv; } diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 7ff9d373a9ee..7ce36dbd9b62 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -19,7 +19,7 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) { - struct mv88e6060_priv *priv = ds_to_priv(ds); + struct mv88e6060_priv *priv = ds->priv; return mdiobus_read_nested(priv->bus, priv->sw_addr + addr, reg); } @@ -37,7 +37,7 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) { - struct mv88e6060_priv *priv = ds_to_priv(ds); + struct mv88e6060_priv *priv = ds->priv; return mdiobus_write_nested(priv->bus, priv->sw_addr + addr, reg, val); } diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index e2a953e7f5f5..d6b0f78e9598 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -586,7 +586,7 @@ static bool mv88e6xxx_has_fid_reg(struct mv88e6xxx_chip *chip) static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u32 reg; int ret; @@ -832,7 +832,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port, uint8_t *data) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_hw_stat *stat; int i, j; @@ -848,7 +848,7 @@ static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port, static int mv88e6xxx_get_sset_count(struct dsa_switch *ds) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_hw_stat *stat; int i, j; @@ -863,7 +863,7 @@ static int mv88e6xxx_get_sset_count(struct dsa_switch *ds) static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_hw_stat *stat; int ret; int i, j; @@ -894,7 +894,7 @@ static int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port) static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, struct ethtool_regs *regs, void *_p) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 *p = _p; int i; @@ -924,7 +924,7 @@ static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip) static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 reg; int err; @@ -954,7 +954,7 @@ out: static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, struct phy_device *phydev, struct ethtool_eee *e) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 reg; int err; @@ -1185,7 +1185,7 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int stp_state; int err; @@ -1434,7 +1434,7 @@ static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_vlan *vlan, int (*cb)(struct switchdev_obj *obj)) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_vtu_stu_entry next; u16 pvid; int err; @@ -1803,7 +1803,7 @@ static int _mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid, static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, u16 vid_begin, u16 vid_end) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_vtu_stu_entry vlan; int i, err; @@ -1864,7 +1864,7 @@ static const char * const mv88e6xxx_port_8021q_mode_names[] = { static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE : PORT_CONTROL_2_8021Q_DISABLED; int ret; @@ -1906,7 +1906,7 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU)) @@ -1947,7 +1947,7 @@ static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; u16 vid; @@ -2009,7 +2009,7 @@ static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_chip *chip, static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 pvid, vid; int err = 0; @@ -2134,7 +2134,7 @@ static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; mutex_lock(&chip->reg_lock); if (mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid, @@ -2146,7 +2146,7 @@ static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -2306,7 +2306,7 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -2319,7 +2319,7 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *bridge) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int i, err = 0; mutex_lock(&chip->reg_lock); @@ -2342,7 +2342,7 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct net_device *bridge = chip->ports[port].bridge_dev; int i; @@ -2763,7 +2763,7 @@ static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip, static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds, unsigned int ageing_time) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -3204,7 +3204,7 @@ static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) static int mv88e6xxx_setup(struct dsa_switch *ds) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; int i; @@ -3244,7 +3244,7 @@ unlock: static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -3352,7 +3352,7 @@ static void mv88e6xxx_mdio_unregister(struct mv88e6xxx_chip *chip) static int mv88e61xx_get_temp(struct dsa_switch *ds, int *temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 val; int ret; @@ -3395,7 +3395,7 @@ error: static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; u16 val; int ret; @@ -3415,7 +3415,7 @@ static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp) static int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP)) return -EOPNOTSUPP; @@ -3428,7 +3428,7 @@ static int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; u16 val; int ret; @@ -3451,7 +3451,7 @@ static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) static int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; u16 val; int err; @@ -3474,7 +3474,7 @@ unlock: static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; u16 val; int ret; @@ -3498,7 +3498,7 @@ static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; return chip->eeprom_len; } @@ -3556,7 +3556,7 @@ static int mv88e6xxx_get_eeprom16(struct mv88e6xxx_chip *chip, static int mv88e6xxx_get_eeprom(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -3645,7 +3645,7 @@ static int mv88e6xxx_set_eeprom16(struct mv88e6xxx_chip *chip, static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; if (eeprom->magic != 0xc3ec4951) @@ -3953,7 +3953,7 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) return DSA_TAG_PROTO_EDSA; @@ -4018,7 +4018,7 @@ static void mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; mutex_lock(&chip->reg_lock); if (mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, @@ -4030,7 +4030,7 @@ static void mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, static int mv88e6xxx_port_mdb_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -4045,7 +4045,7 @@ static int mv88e6xxx_port_mdb_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_mdb *mdb, int (*cb)(struct switchdev_obj *obj)) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -4173,7 +4173,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) static void mv88e6xxx_remove(struct mdio_device *mdiodev) { struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; mv88e6xxx_phy_destroy(chip); mv88e6xxx_unregister_switch(chip); diff --git a/include/net/dsa.h b/include/net/dsa.h index e3eb230b970d..9d97c5214341 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -391,11 +391,6 @@ void register_switch_driver(struct dsa_switch_ops *type); void unregister_switch_driver(struct dsa_switch_ops *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); -static inline void *ds_to_priv(struct dsa_switch *ds) -{ - return ds->priv; -} - static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) { return dst->rcv != NULL; -- cgit v1.2.3 From dd19bde36739702bbd9a832b5d4995bc0fa8d6d7 Mon Sep 17 00:00:00 2001 From: Rosen, Rami Date: Fri, 2 Sep 2016 14:11:57 +0300 Subject: switchdev: Fix return value of switchdev_port_fdb_dump(). This patch fixes the retun value of switchdev_port_fdb_dump() when CONFIG_NET_SWITCHDEV is not set. This avoids getting "warning: return makes integer from pointer without a cast [-Wint-conversion]" when building when CONFIG_NET_SWITCHDEV is not set under several compiler versions. This warning is due to commit d297653dd6f07afbe7e6c702a4bcd7615680002e ("rtnetlink: fdb dump: optimize by saving last interface markers"). Signed-off-by: Rami Rosen Acked-by: Roopa Prabhu Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/switchdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 6279f2f179ec..729fe1534160 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -344,7 +344,7 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, struct net_device *filter_dev, int *idx) { - return idx; + return *idx; } static inline bool switchdev_port_same_parent_id(struct net_device *a, -- cgit v1.2.3 From 5a42976d4fe5d7fddce133de995c742c87b1b7e3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 6 Sep 2016 22:19:51 +0100 Subject: rxrpc: Add tracepoint for working out where aborts happen Add a tracepoint for working out where local aborts happen. Each tracepoint call is labelled with a 3-letter code so that they can be distinguished - and the DATA sequence number is added too where available. rxrpc_kernel_abort_call() also takes a 3-letter code so that AFS can indicate the circumstances when it aborts a call. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 17 ++++--- include/net/af_rxrpc.h | 3 +- include/trace/events/rxrpc.h | 29 ++++++++++++ net/rxrpc/ar-internal.h | 14 ++++-- net/rxrpc/call_event.c | 7 +-- net/rxrpc/call_object.c | 2 +- net/rxrpc/conn_event.c | 6 +++ net/rxrpc/input.c | 7 +-- net/rxrpc/insecure.c | 19 ++++---- net/rxrpc/rxkad.c | 108 +++++++++++++++++++------------------------ net/rxrpc/sendmsg.c | 18 ++++---- 11 files changed, 132 insertions(+), 98 deletions(-) (limited to 'include/net') diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 37608be52abd..53750dece80e 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -377,7 +377,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, return wait_mode->wait(call); error_do_abort: - rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD"); error_kill_call: afs_end_call(call); _leave(" = %d", ret); @@ -425,12 +425,12 @@ static void afs_deliver_to_call(struct afs_call *call) case -ENOTCONN: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(afs_socket, call->rxcall, - abort_code); + abort_code, -ret, "KNC"); goto do_abort; case -ENOTSUPP: abort_code = RX_INVALID_OPERATION; rxrpc_kernel_abort_call(afs_socket, call->rxcall, - abort_code); + abort_code, -ret, "KIV"); goto do_abort; case -ENODATA: case -EBADMSG: @@ -440,7 +440,7 @@ static void afs_deliver_to_call(struct afs_call *call) if (call->state != AFS_CALL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(afs_socket, call->rxcall, - abort_code); + abort_code, EBADMSG, "KUM"); goto do_abort; } } @@ -463,6 +463,7 @@ do_abort: */ static int afs_wait_for_call_to_complete(struct afs_call *call) { + const char *abort_why; int ret; DECLARE_WAITQUEUE(myself, current); @@ -481,9 +482,11 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) continue; } + abort_why = "KWC"; ret = call->error; if (call->state == AFS_CALL_COMPLETE) break; + abort_why = "KWI"; ret = -EINTR; if (signal_pending(current)) break; @@ -497,7 +500,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) if (call->state < AFS_CALL_COMPLETE) { _debug("call incomplete"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_CALL_DEAD); + RX_CALL_DEAD, -ret, abort_why); } _debug("call complete"); @@ -695,7 +698,7 @@ void afs_send_empty_reply(struct afs_call *call) case -ENOMEM: _debug("oom"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_USER_ABORT); + RX_USER_ABORT, ENOMEM, "KOO"); default: afs_end_call(call); _leave(" [error]"); @@ -734,7 +737,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_USER_ABORT); + RX_USER_ABORT, ENOMEM, "KOO"); } afs_end_call(call); _leave(" [error]"); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index b4b6a3664dda..08ed8729126c 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -35,7 +35,8 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t); int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, void *, size_t, size_t *, bool, u32 *); -void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32); +void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, + u32, int, const char *); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long, rxrpc_notify_rx_t); diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 30164896f1f6..85ee035774ae 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -84,6 +84,35 @@ TRACE_EVENT(rxrpc_skb, __entry->where) ); +TRACE_EVENT(rxrpc_abort, + TP_PROTO(const char *why, u32 cid, u32 call_id, rxrpc_seq_t seq, + int abort_code, int error), + + TP_ARGS(why, cid, call_id, seq, abort_code, error), + + TP_STRUCT__entry( + __array(char, why, 4 ) + __field(u32, cid ) + __field(u32, call_id ) + __field(rxrpc_seq_t, seq ) + __field(int, abort_code ) + __field(int, error ) + ), + + TP_fast_assign( + memcpy(__entry->why, why, 4); + __entry->cid = cid; + __entry->call_id = call_id; + __entry->abort_code = abort_code; + __entry->error = error; + __entry->seq = seq; + ), + + TP_printk("%08x:%08x s=%u a=%d e=%d %s", + __entry->cid, __entry->call_id, __entry->seq, + __entry->abort_code, __entry->error, __entry->why) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0353399792b6..dbfb9ed17483 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -155,7 +155,8 @@ struct rxrpc_security { void *); /* verify the security on a received packet */ - int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, u32 *); + int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, + rxrpc_seq_t, u16); /* issue a challenge */ int (*issue_challenge)(struct rxrpc_connection *); @@ -637,9 +638,12 @@ static inline bool rxrpc_call_completed(struct rxrpc_call *call) /* * Record that a call is locally aborted. */ -static inline bool __rxrpc_abort_call(struct rxrpc_call *call, +static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) { + trace_rxrpc_abort(why, call->cid, call->call_id, seq, + abort_code, error); if (__rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error)) { @@ -649,13 +653,13 @@ static inline bool __rxrpc_abort_call(struct rxrpc_call *call, return false; } -static inline bool rxrpc_abort_call(struct rxrpc_call *call, - u32 abort_code, int error) +static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) { bool ret; write_lock_bh(&call->state_lock); - ret = __rxrpc_abort_call(call, abort_code, error); + ret = __rxrpc_abort_call(why, call, seq, abort_code, error); write_unlock_bh(&call->state_lock); return ret; } diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 8365d3366114..af88ad7d2cf9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -598,7 +598,8 @@ process_further: /* secured packets must be verified and possibly decrypted */ if (call->conn->security->verify_packet(call, skb, - _abort_code) < 0) + sp->hdr.seq, + sp->hdr.cksum) < 0) goto protocol_error; rxrpc_insert_oos_packet(call, skb); @@ -982,7 +983,7 @@ skip_msg_init: } if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) { - rxrpc_abort_call(call, RX_CALL_TIMEOUT, ETIME); + rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); _debug("post timeout"); if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, @@ -1005,7 +1006,7 @@ skip_msg_init: case -EKEYEXPIRED: case -EKEYREJECTED: case -EPROTO: - rxrpc_abort_call(call, abort_code, -ret); + rxrpc_abort_call("PRO", call, 0, abort_code, -ret); goto kill_ACKs; } } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index be5733d55794..9efd9b0b0bdf 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -563,7 +563,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) if (call->state < RXRPC_CALL_COMPLETE) { _debug("+++ ABORTING STATE %d +++\n", call->state); - __rxrpc_abort_call(call, RX_CALL_DEAD, ECONNRESET); + __rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 9db90f4f768d..8c7938ba6a84 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -158,6 +158,11 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, lockdep_is_held(&conn->channel_lock)); if (call) { rxrpc_see_call(call); + if (compl == RXRPC_CALL_LOCALLY_ABORTED) + trace_rxrpc_abort("CON", call->cid, + call->call_id, 0, + abort_code, error); + write_lock_bh(&call->state_lock); if (rxrpc_set_call_completion(call, compl, abort_code, error)) { @@ -167,6 +172,7 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, write_unlock_bh(&call->state_lock); if (queue) rxrpc_queue_call(call); + } } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 79f3f585cdc3..8e624109750a 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -429,7 +429,7 @@ protocol_error: _debug("protocol error"); write_lock_bh(&call->state_lock); protocol_error_locked: - if (__rxrpc_abort_call(call, RX_PROTOCOL_ERROR, EPROTO)) + if (__rxrpc_abort_call("FPR", call, 0, RX_PROTOCOL_ERROR, EPROTO)) rxrpc_queue_call(call); free_packet_unlock: write_unlock_bh(&call->state_lock); @@ -495,9 +495,10 @@ static void rxrpc_process_jumbo_packet(struct rxrpc_call *call, protocol_error: _debug("protocol error"); rxrpc_free_skb(part); - rxrpc_free_skb(jumbo); - if (rxrpc_abort_call(call, RX_PROTOCOL_ERROR, EPROTO)) + if (rxrpc_abort_call("PJP", call, sp->hdr.seq, + RX_PROTOCOL_ERROR, EPROTO)) rxrpc_queue_call(call); + rxrpc_free_skb(jumbo); _leave(""); } diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index c21ad213b337..a4aba0246731 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -23,31 +23,32 @@ static int none_prime_packet_security(struct rxrpc_connection *conn) } static int none_secure_packet(struct rxrpc_call *call, - struct sk_buff *skb, - size_t data_size, - void *sechdr) + struct sk_buff *skb, + size_t data_size, + void *sechdr) { return 0; } static int none_verify_packet(struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb, + rxrpc_seq_t seq, + u16 expected_cksum) { return 0; } static int none_respond_to_challenge(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb, + u32 *_abort_code) { *_abort_code = RX_PROTOCOL_ERROR; return -EPROTO; } static int none_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb, + u32 *_abort_code) { *_abort_code = RX_PROTOCOL_ERROR; return -EPROTO; diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 89f475febfd7..3777432df10b 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -316,12 +316,10 @@ static int rxkad_secure_packet(struct rxrpc_call *call, /* * decrypt partial encryption on a packet (level 1 security) */ -static int rxkad_verify_packet_auth(const struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, + rxrpc_seq_t seq) { struct rxkad_level1_hdr sechdr; - struct rxrpc_skb_priv *sp; SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist sg[16]; @@ -332,7 +330,10 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, _enter(""); - sp = rxrpc_skb(skb); + if (skb->len < 8) { + rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO); + goto protocol_error; + } /* we want to decrypt the skbuff in-place */ nsg = skb_cow_data(skb, 0, &trailer); @@ -351,9 +352,11 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, crypto_skcipher_decrypt(req); skcipher_request_zero(req); - /* remove the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) - goto datalen_error; + /* Extract the decrypted packet length */ + if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) { + rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } if (!skb_pull(skb, sizeof(sechdr))) BUG(); @@ -361,24 +364,24 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, data_size = buf & 0xffff; check = buf >> 16; - check ^= sp->hdr.seq ^ sp->hdr.callNumber; + check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - *_abort_code = RXKADSEALEDINCON; + rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } /* shorten the packet to remove the padding */ - if (data_size > skb->len) - goto datalen_error; - else if (data_size < skb->len) + if (data_size > skb->len) { + rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } + if (data_size < skb->len) skb->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; -datalen_error: - *_abort_code = RXKADDATALEN; protocol_error: _leave(" = -EPROTO"); return -EPROTO; @@ -391,13 +394,11 @@ nomem: /* * wholly decrypt a packet (level 2 security) */ -static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, + rxrpc_seq_t seq) { const struct rxrpc_key_token *token; struct rxkad_level2_hdr sechdr; - struct rxrpc_skb_priv *sp; SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; @@ -408,7 +409,10 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, _enter(",{%d}", skb->len); - sp = rxrpc_skb(skb); + if (skb->len < 8) { + rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO); + goto protocol_error; + } /* we want to decrypt the skbuff in-place */ nsg = skb_cow_data(skb, 0, &trailer); @@ -437,9 +441,11 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, if (sg != _sg) kfree(sg); - /* remove the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) - goto datalen_error; + /* Extract the decrypted packet length */ + if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) { + rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } if (!skb_pull(skb, sizeof(sechdr))) BUG(); @@ -447,24 +453,23 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, data_size = buf & 0xffff; check = buf >> 16; - check ^= sp->hdr.seq ^ sp->hdr.callNumber; + check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - *_abort_code = RXKADSEALEDINCON; + rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } - /* shorten the packet to remove the padding */ - if (data_size > skb->len) - goto datalen_error; - else if (data_size < skb->len) + if (data_size > skb->len) { + rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } + if (data_size < skb->len) skb->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; -datalen_error: - *_abort_code = RXKADDATALEN; protocol_error: _leave(" = -EPROTO"); return -EPROTO; @@ -475,40 +480,30 @@ nomem: } /* - * verify the security on a received packet + * Verify the security on a received packet or subpacket (if part of a + * jumbo packet). */ -static int rxkad_verify_packet(struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + rxrpc_seq_t seq, u16 expected_cksum) { SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); - struct rxrpc_skb_priv *sp; struct rxrpc_crypt iv; struct scatterlist sg; u16 cksum; u32 x, y; - int ret; - - sp = rxrpc_skb(skb); _enter("{%d{%x}},{#%u}", - call->debug_id, key_serial(call->conn->params.key), sp->hdr.seq); + call->debug_id, key_serial(call->conn->params.key), seq); if (!call->conn->cipher) return 0; - if (sp->hdr.securityIndex != RXRPC_SECURITY_RXKAD) { - *_abort_code = RXKADINCONSISTENCY; - _leave(" = -EPROTO [not rxkad]"); - return -EPROTO; - } - /* continue encrypting from where we left off */ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); /* validate the security checksum */ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); - x |= sp->hdr.seq & 0x3fffffff; + x |= seq & 0x3fffffff; call->crypto_buf[0] = htonl(call->call_id); call->crypto_buf[1] = htonl(x); @@ -524,29 +519,22 @@ static int rxkad_verify_packet(struct rxrpc_call *call, if (cksum == 0) cksum = 1; /* zero checksums are not permitted */ - if (sp->hdr.cksum != cksum) { - *_abort_code = RXKADSEALEDINCON; + if (cksum != expected_cksum) { + rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); _leave(" = -EPROTO [csum failed]"); return -EPROTO; } switch (call->conn->params.security_level) { case RXRPC_SECURITY_PLAIN: - ret = 0; - break; + return 0; case RXRPC_SECURITY_AUTH: - ret = rxkad_verify_packet_auth(call, skb, _abort_code); - break; + return rxkad_verify_packet_1(call, skb, seq); case RXRPC_SECURITY_ENCRYPT: - ret = rxkad_verify_packet_encrypt(call, skb, _abort_code); - break; + return rxkad_verify_packet_2(call, skb, seq); default: - ret = -ENOANO; - break; + return -ENOANO; } - - _leave(" = %d", ret); - return ret; } /* diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 2439aff131c7..9a4af992fcdf 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -454,14 +454,15 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, /* * abort a call, sending an ABORT packet to the peer */ -static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) +static void rxrpc_send_abort(struct rxrpc_call *call, const char *why, + u32 abort_code, int error) { if (call->state >= RXRPC_CALL_COMPLETE) return; write_lock_bh(&call->state_lock); - if (__rxrpc_abort_call(call, abort_code, ECONNABORTED)) { + if (__rxrpc_abort_call(why, call, 0, abort_code, error)) { del_timer_sync(&call->resend_timer); del_timer_sync(&call->ack_timer); clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); @@ -556,7 +557,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /* it's too late for this call */ ret = -ESHUTDOWN; } else if (cmd == RXRPC_CMD_SEND_ABORT) { - rxrpc_send_abort(call, abort_code); + rxrpc_send_abort(call, "CMD", abort_code, ECONNABORTED); ret = 0; } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; @@ -626,20 +627,19 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data); * @sock: The socket the call is on * @call: The call to be aborted * @abort_code: The abort code to stick into the ABORT packet + * @error: Local error value + * @why: 3-char string indicating why. * * Allow a kernel service to abort a call, if it's still in an abortable state. */ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, - u32 abort_code) + u32 abort_code, int error, const char *why) { - _enter("{%d},%d", call->debug_id, abort_code); + _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); lock_sock(sock->sk); - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - rxrpc_send_abort(call, abort_code); + rxrpc_send_abort(call, why, abort_code, error); release_sock(sock->sk); _leave(""); -- cgit v1.2.3 From 00e907127e6f86d0f9b122d9b4347a8aa09a8b61 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:12 +0100 Subject: rxrpc: Preallocate peers, conns and calls for incoming service requests Make it possible for the data_ready handler called from the UDP transport socket to completely instantiate an rxrpc_call structure and make it immediately live by preallocating all the memory it might need. The idea is to cut out the background thread usage as much as possible. [Note that the preallocated structs are not actually used in this patch - that will be done in a future patch.] If insufficient resources are available in the preallocation buffers, it will be possible to discard the DATA packet in the data_ready handler or schedule a BUSY packet without the need to schedule an attempt at allocation in a background thread. To this end: (1) Preallocate rxrpc_peer, rxrpc_connection and rxrpc_call structs to a maximum number each of the listen backlog size. The backlog size is limited to a maxmimum of 32. Only this many of each can be in the preallocation buffer. (2) For userspace sockets, the preallocation is charged initially by listen() and will be recharged by accepting or rejecting pending new incoming calls. (3) For kernel services {,re,dis}charging of the preallocation buffers is handled manually. Two notifier callbacks have to be provided before kernel_listen() is invoked: (a) An indication that a new call has been instantiated. This can be used to trigger background recharging. (b) An indication that a call is being discarded. This is used when the socket is being released. A function, rxrpc_kernel_charge_accept() is called by the kernel service to preallocate a single call. It should be passed the user ID to be used for that call and a callback to associate the rxrpc call with the kernel service's side of the ID. (4) Discard the preallocation when the socket is closed. (5) Temporarily bump the refcount on the call allocated in rxrpc_incoming_call() so that rxrpc_release_call() can ditch the preallocation ref on service calls unconditionally. This will no longer be necessary once the preallocation is used. Note that this does not yet control the number of active service calls on a client - that will come in a later patch. A future development would be to provide a setsockopt() call that allows a userspace server to manually charge the preallocation buffer. This would allow user call IDs to be provided in advance and the awkward manual accept stage to be bypassed. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 71 ++++++++++++++- include/net/af_rxrpc.h | 10 ++- net/rxrpc/af_rxrpc.c | 16 +++- net/rxrpc/ar-internal.h | 32 ++++++- net/rxrpc/call_accept.c | 229 +++++++++++++++++++++++++++++++++++++++++++++++ net/rxrpc/call_object.c | 12 ++- net/rxrpc/conn_object.c | 2 + net/rxrpc/conn_service.c | 24 +++++ net/rxrpc/input.c | 2 +- net/rxrpc/proc.c | 8 +- 10 files changed, 391 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 53750dece80e..720ef05a24fe 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -18,6 +18,7 @@ struct socket *afs_socket; /* my RxRPC socket */ static struct workqueue_struct *afs_async_calls; +static struct afs_call *afs_spare_incoming_call; static atomic_t afs_outstanding_calls; static void afs_free_call(struct afs_call *); @@ -26,7 +27,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static int afs_dont_wait_for_call_to_complete(struct afs_call *); static void afs_process_async_call(struct work_struct *); -static void afs_rx_new_call(struct sock *); +static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); +static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); static int afs_deliver_cm_op_id(struct afs_call *); /* synchronous call management */ @@ -54,8 +56,10 @@ static const struct afs_call_type afs_RXCMxxxx = { }; static void afs_collect_incoming_call(struct work_struct *); +static void afs_charge_preallocation(struct work_struct *); static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call); +static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation); static int afs_wait_atomic_t(atomic_t *p) { @@ -100,13 +104,15 @@ int afs_open_socket(void) if (ret < 0) goto error_2; - rxrpc_kernel_new_call_notification(socket, afs_rx_new_call); + rxrpc_kernel_new_call_notification(socket, afs_rx_new_call, + afs_rx_discard_new_call); ret = kernel_listen(socket, INT_MAX); if (ret < 0) goto error_2; afs_socket = socket; + afs_charge_preallocation(NULL); _leave(" = 0"); return 0; @@ -126,6 +132,12 @@ void afs_close_socket(void) { _enter(""); + if (afs_spare_incoming_call) { + atomic_inc(&afs_outstanding_calls); + afs_free_call(afs_spare_incoming_call); + afs_spare_incoming_call = NULL; + } + _debug("outstanding %u", atomic_read(&afs_outstanding_calls)); wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t, TASK_UNINTERRUPTIBLE); @@ -635,12 +647,65 @@ static void afs_collect_incoming_call(struct work_struct *work) afs_free_call(call); } +static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID) +{ + struct afs_call *call = (struct afs_call *)user_call_ID; + + call->rxcall = rxcall; +} + +/* + * Charge the incoming call preallocation. + */ +static void afs_charge_preallocation(struct work_struct *work) +{ + struct afs_call *call = afs_spare_incoming_call; + + for (;;) { + if (!call) { + call = kzalloc(sizeof(struct afs_call), GFP_KERNEL); + if (!call) + break; + + INIT_WORK(&call->async_work, afs_process_async_call); + call->wait_mode = &afs_async_incoming_call; + call->type = &afs_RXCMxxxx; + init_waitqueue_head(&call->waitq); + call->state = AFS_CALL_AWAIT_OP_ID; + } + + if (rxrpc_kernel_charge_accept(afs_socket, + afs_wake_up_async_call, + afs_rx_attach, + (unsigned long)call, + GFP_KERNEL) < 0) + break; + call = NULL; + } + afs_spare_incoming_call = call; +} + +/* + * Discard a preallocated call when a socket is shut down. + */ +static void afs_rx_discard_new_call(struct rxrpc_call *rxcall, + unsigned long user_call_ID) +{ + struct afs_call *call = (struct afs_call *)user_call_ID; + + atomic_inc(&afs_outstanding_calls); + call->rxcall = NULL; + afs_free_call(call); +} + /* * Notification of an incoming call. */ -static void afs_rx_new_call(struct sock *sk) +static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long user_call_ID) { queue_work(afs_wq, &afs_collect_incoming_call_work); + queue_work(afs_wq, &afs_charge_preallocation_work); } /* diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 08ed8729126c..9cf551be916b 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -21,10 +21,14 @@ struct rxrpc_call; typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, unsigned long); -typedef void (*rxrpc_notify_new_call_t)(struct sock *); +typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *, + unsigned long); +typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long); +typedef void (*rxrpc_user_attach_call_t)(struct rxrpc_call *, unsigned long); void rxrpc_kernel_new_call_notification(struct socket *, - rxrpc_notify_new_call_t); + rxrpc_notify_new_call_t, + rxrpc_discard_new_call_t); struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, struct sockaddr_rxrpc *, struct key *, @@ -43,5 +47,7 @@ struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long, int rxrpc_kernel_reject_call(struct socket *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); +int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, + rxrpc_user_attach_call_t, unsigned long, gfp_t); #endif /* _NET_RXRPC_H */ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index f13cca1e973e..1e8cf3ded81f 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -193,7 +193,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; struct rxrpc_sock *rx = rxrpc_sk(sk); - unsigned int max; + unsigned int max, old; int ret; _enter("%p,%d", rx, backlog); @@ -212,9 +212,13 @@ static int rxrpc_listen(struct socket *sock, int backlog) backlog = max; else if (backlog < 0 || backlog > max) break; + old = sk->sk_max_ack_backlog; sk->sk_max_ack_backlog = backlog; - rx->sk.sk_state = RXRPC_SERVER_LISTENING; - ret = 0; + ret = rxrpc_service_prealloc(rx, GFP_KERNEL); + if (ret == 0) + rx->sk.sk_state = RXRPC_SERVER_LISTENING; + else + sk->sk_max_ack_backlog = old; break; default: ret = -EBUSY; @@ -303,16 +307,19 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); * rxrpc_kernel_new_call_notification - Get notifications of new calls * @sock: The socket to intercept received messages on * @notify_new_call: Function to be called when new calls appear + * @discard_new_call: Function to discard preallocated calls * * Allow a kernel service to be given notifications about new calls. */ void rxrpc_kernel_new_call_notification( struct socket *sock, - rxrpc_notify_new_call_t notify_new_call) + rxrpc_notify_new_call_t notify_new_call, + rxrpc_discard_new_call_t discard_new_call) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); rx->notify_new_call = notify_new_call; + rx->discard_new_call = discard_new_call; } EXPORT_SYMBOL(rxrpc_kernel_new_call_notification); @@ -622,6 +629,7 @@ static int rxrpc_release_sock(struct sock *sk) } /* try to flush out this socket */ + rxrpc_discard_prealloc(rx); rxrpc_release_calls_on_socket(rx); flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 027791261768..45e1c269f90e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -63,6 +63,27 @@ enum { RXRPC_CLOSE, /* socket is being closed */ }; +/* + * Service backlog preallocation. + * + * This contains circular buffers of preallocated peers, connections and calls + * for incoming service calls and their head and tail pointers. This allows + * calls to be set up in the data_ready handler, thereby avoiding the need to + * shuffle packets around so much. + */ +struct rxrpc_backlog { + unsigned short peer_backlog_head; + unsigned short peer_backlog_tail; + unsigned short conn_backlog_head; + unsigned short conn_backlog_tail; + unsigned short call_backlog_head; + unsigned short call_backlog_tail; +#define RXRPC_BACKLOG_MAX 32 + struct rxrpc_peer *peer_backlog[RXRPC_BACKLOG_MAX]; + struct rxrpc_connection *conn_backlog[RXRPC_BACKLOG_MAX]; + struct rxrpc_call *call_backlog[RXRPC_BACKLOG_MAX]; +}; + /* * RxRPC socket definition */ @@ -70,13 +91,15 @@ struct rxrpc_sock { /* WARNING: sk has to be the first member */ struct sock sk; rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ + rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */ struct rxrpc_local *local; /* local endpoint */ struct hlist_node listen_link; /* link in the local endpoint's listen list */ struct list_head secureq; /* calls awaiting connection security clearance */ struct list_head acceptq; /* calls awaiting acceptance */ + struct rxrpc_backlog *backlog; /* Preallocation for services */ struct key *key; /* security for this socket */ struct key *securities; /* list of server security descriptors */ - struct rb_root calls; /* outstanding calls on this socket */ + struct rb_root calls; /* User ID -> call mapping */ unsigned long flags; #define RXRPC_SOCK_CONNECTED 0 /* connect_srx is set */ rwlock_t call_lock; /* lock for calls */ @@ -290,6 +313,7 @@ enum rxrpc_conn_cache_state { enum rxrpc_conn_proto_state { RXRPC_CONN_UNUSED, /* Connection not yet attempted */ RXRPC_CONN_CLIENT, /* Client connection */ + RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */ RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */ RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */ RXRPC_CONN_SERVICE, /* Service secured connection */ @@ -408,6 +432,7 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */ + RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ @@ -534,6 +559,8 @@ extern struct workqueue_struct *rxrpc_workqueue; /* * call_accept.c */ +int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); +void rxrpc_discard_prealloc(struct rxrpc_sock *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, rxrpc_notify_rx_t); @@ -557,6 +584,7 @@ extern struct list_head rxrpc_calls; extern rwlock_t rxrpc_call_lock; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); +struct rxrpc_call *rxrpc_alloc_call(gfp_t); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, @@ -573,6 +601,7 @@ void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_get_call_for_skb(struct rxrpc_call *, struct sk_buff *); void rxrpc_put_call_for_skb(struct rxrpc_call *, struct sk_buff *); +void rxrpc_cleanup_call(struct rxrpc_call *); void __exit rxrpc_destroy_all_calls(void); static inline bool rxrpc_is_service_call(const struct rxrpc_call *call) @@ -757,6 +786,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *, struct sockaddr_rxrpc *, struct sk_buff *); +struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 4c71efcf82ed..cc7194e05a15 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -20,11 +20,209 @@ #include #include #include +#include #include #include #include #include "ar-internal.h" +/* + * Preallocate a single service call, connection and peer and, if possible, + * give them a user ID and attach the user's side of the ID to them. + */ +static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, + struct rxrpc_backlog *b, + rxrpc_notify_rx_t notify_rx, + rxrpc_user_attach_call_t user_attach_call, + unsigned long user_call_ID, gfp_t gfp) +{ + const void *here = __builtin_return_address(0); + struct rxrpc_call *call; + int max, tmp; + unsigned int size = RXRPC_BACKLOG_MAX; + unsigned int head, tail, call_head, call_tail; + + max = rx->sk.sk_max_ack_backlog; + tmp = rx->sk.sk_ack_backlog; + if (tmp >= max) { + _leave(" = -ENOBUFS [full %u]", max); + return -ENOBUFS; + } + max -= tmp; + + /* We don't need more conns and peers than we have calls, but on the + * other hand, we shouldn't ever use more peers than conns or conns + * than calls. + */ + call_head = b->call_backlog_head; + call_tail = READ_ONCE(b->call_backlog_tail); + tmp = CIRC_CNT(call_head, call_tail, size); + if (tmp >= max) { + _leave(" = -ENOBUFS [enough %u]", tmp); + return -ENOBUFS; + } + max = tmp + 1; + + head = b->peer_backlog_head; + tail = READ_ONCE(b->peer_backlog_tail); + if (CIRC_CNT(head, tail, size) < max) { + struct rxrpc_peer *peer = rxrpc_alloc_peer(rx->local, gfp); + if (!peer) + return -ENOMEM; + b->peer_backlog[head] = peer; + smp_store_release(&b->peer_backlog_head, + (head + 1) & (size - 1)); + } + + head = b->conn_backlog_head; + tail = READ_ONCE(b->conn_backlog_tail); + if (CIRC_CNT(head, tail, size) < max) { + struct rxrpc_connection *conn; + + conn = rxrpc_prealloc_service_connection(gfp); + if (!conn) + return -ENOMEM; + b->conn_backlog[head] = conn; + smp_store_release(&b->conn_backlog_head, + (head + 1) & (size - 1)); + } + + /* Now it gets complicated, because calls get registered with the + * socket here, particularly if a user ID is preassigned by the user. + */ + call = rxrpc_alloc_call(gfp); + if (!call) + return -ENOMEM; + call->flags |= (1 << RXRPC_CALL_IS_SERVICE); + call->state = RXRPC_CALL_SERVER_PREALLOC; + + trace_rxrpc_call(call, rxrpc_call_new_service, + atomic_read(&call->usage), + here, (const void *)user_call_ID); + + write_lock(&rx->call_lock); + if (user_attach_call) { + struct rxrpc_call *xcall; + struct rb_node *parent, **pp; + + /* Check the user ID isn't already in use */ + pp = &rx->calls.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + xcall = rb_entry(parent, struct rxrpc_call, sock_node); + if (user_call_ID < call->user_call_ID) + pp = &(*pp)->rb_left; + else if (user_call_ID > call->user_call_ID) + pp = &(*pp)->rb_right; + else + goto id_in_use; + } + + call->user_call_ID = user_call_ID; + call->notify_rx = notify_rx; + rxrpc_get_call(call, rxrpc_call_got); + user_attach_call(call, user_call_ID); + rxrpc_get_call(call, rxrpc_call_got_userid); + rb_link_node(&call->sock_node, parent, pp); + rb_insert_color(&call->sock_node, &rx->calls); + set_bit(RXRPC_CALL_HAS_USERID, &call->flags); + } + + write_unlock(&rx->call_lock); + + write_lock(&rxrpc_call_lock); + list_add_tail(&call->link, &rxrpc_calls); + write_unlock(&rxrpc_call_lock); + + b->call_backlog[call_head] = call; + smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); + _leave(" = 0 [%d -> %lx]", call->debug_id, user_call_ID); + return 0; + +id_in_use: + write_unlock(&rx->call_lock); + rxrpc_cleanup_call(call); + _leave(" = -EBADSLT"); + return -EBADSLT; +} + +/* + * Preallocate sufficient service connections, calls and peers to cover the + * entire backlog of a socket. When a new call comes in, if we don't have + * sufficient of each available, the call gets rejected as busy or ignored. + * + * The backlog is replenished when a connection is accepted or rejected. + */ +int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) +{ + struct rxrpc_backlog *b = rx->backlog; + + if (!b) { + b = kzalloc(sizeof(struct rxrpc_backlog), gfp); + if (!b) + return -ENOMEM; + rx->backlog = b; + } + + if (rx->discard_new_call) + return 0; + + while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp) == 0) + ; + + return 0; +} + +/* + * Discard the preallocation on a service. + */ +void rxrpc_discard_prealloc(struct rxrpc_sock *rx) +{ + struct rxrpc_backlog *b = rx->backlog; + unsigned int size = RXRPC_BACKLOG_MAX, head, tail; + + if (!b) + return; + rx->backlog = NULL; + + head = b->peer_backlog_head; + tail = b->peer_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_peer *peer = b->peer_backlog[tail]; + kfree(peer); + tail = (tail + 1) & (size - 1); + } + + head = b->conn_backlog_head; + tail = b->conn_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_connection *conn = b->conn_backlog[tail]; + write_lock(&rxrpc_connection_lock); + list_del(&conn->link); + list_del(&conn->proc_link); + write_unlock(&rxrpc_connection_lock); + kfree(conn); + tail = (tail + 1) & (size - 1); + } + + head = b->call_backlog_head; + tail = b->call_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_call *call = b->call_backlog[tail]; + if (rx->discard_new_call) { + _debug("discard %lx", call->user_call_ID); + rx->discard_new_call(call, call->user_call_ID); + } + rxrpc_call_completed(call); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + tail = (tail + 1) & (size - 1); + } + + kfree(b); +} + /* * generate a connection-level abort */ @@ -450,3 +648,34 @@ int rxrpc_kernel_reject_call(struct socket *sock) return ret; } EXPORT_SYMBOL(rxrpc_kernel_reject_call); + +/* + * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls + * @sock: The socket on which to preallocate + * @notify_rx: Event notification function for the call + * @user_attach_call: Func to attach call to user_call_ID + * @user_call_ID: The tag to attach to the preallocated call + * @gfp: The allocation conditions. + * + * Charge up the socket with preallocated calls, each with a user ID. A + * function should be provided to effect the attachment from the user's side. + * The user is given a ref to hold on the call. + * + * Note that the call may be come connected before this function returns. + */ +int rxrpc_kernel_charge_accept(struct socket *sock, + rxrpc_notify_rx_t notify_rx, + rxrpc_user_attach_call_t user_attach_call, + unsigned long user_call_ID, gfp_t gfp) +{ + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + struct rxrpc_backlog *b = rx->backlog; + + if (sock->sk->sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; + + return rxrpc_service_prealloc_one(rx, b, notify_rx, + user_attach_call, user_call_ID, + gfp); +} +EXPORT_SYMBOL(rxrpc_kernel_charge_accept); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f843397e03b6..d233adc9b5e5 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -31,6 +31,7 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK", + [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", @@ -71,7 +72,6 @@ DEFINE_RWLOCK(rxrpc_call_lock); static void rxrpc_call_life_expired(unsigned long _call); static void rxrpc_ack_time_expired(unsigned long _call); static void rxrpc_resend_time_expired(unsigned long _call); -static void rxrpc_cleanup_call(struct rxrpc_call *call); /* * find an extant server call @@ -113,7 +113,7 @@ found_extant_call: /* * allocate a new call */ -static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) +struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) { struct rxrpc_call *call; @@ -392,6 +392,9 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, if (call_id <= conn->channels[chan].call_counter) goto old_call; /* TODO: Just drop packet */ + /* Temporary: Mirror the backlog prealloc ref (TODO: use prealloc) */ + rxrpc_get_call(candidate, rxrpc_call_got); + /* make the call available */ _debug("new call"); call = candidate; @@ -596,6 +599,9 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) del_timer_sync(&call->ack_timer); del_timer_sync(&call->lifetimer); + /* We have to release the prealloc backlog ref */ + if (rxrpc_is_service_call(call)) + rxrpc_put_call(call, rxrpc_call_put); _leave(""); } @@ -682,7 +688,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) /* * clean up a call */ -static void rxrpc_cleanup_call(struct rxrpc_call *call) +void rxrpc_cleanup_call(struct rxrpc_call *call) { _net("DESTROY CALL %d", call->debug_id); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 9c6685b97e70..8da82e3aa00e 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -286,6 +286,8 @@ static void rxrpc_connection_reaper(struct work_struct *work) ASSERTCMP(atomic_read(&conn->usage), >, 0); if (likely(atomic_read(&conn->usage) > 1)) continue; + if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) + continue; idle_timestamp = READ_ONCE(conn->idle_timestamp); _debug("reap CONN %d { u=%d,t=%ld }", diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 316a92107fee..189338a60457 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -118,6 +118,30 @@ replace_old_connection: goto conn_published; } +/* + * Preallocate a service connection. The connection is placed on the proc and + * reap lists so that we don't have to get the lock from BH context. + */ +struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) +{ + struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp); + + if (conn) { + /* We maintain an extra ref on the connection whilst it is on + * the rxrpc_connections list. + */ + conn->state = RXRPC_CONN_SERVICE_PREALLOC; + atomic_set(&conn->usage, 2); + + write_lock(&rxrpc_connection_lock); + list_add_tail(&conn->link, &rxrpc_connections); + list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); + write_unlock(&rxrpc_connection_lock); + } + + return conn; +} + /* * get a record of an incoming connection */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 6c4b7df05e95..5906579060cd 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -102,7 +102,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, rx->notify_new_call) { spin_unlock_bh(&sk->sk_receive_queue.lock); skb_queue_tail(&call->knlrecv_queue, skb); - rx->notify_new_call(&rx->sk); + rx->notify_new_call(&rx->sk, NULL, 0); } else if (call->notify_rx) { spin_unlock_bh(&sk->sk_receive_queue.lock); skb_queue_tail(&call->knlrecv_queue, skb); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index dfad23821a62..d529d1b4021c 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -17,6 +17,7 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { [RXRPC_CONN_UNUSED] = "Unused ", [RXRPC_CONN_CLIENT] = "Client ", + [RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc", [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ", [RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ", [RXRPC_CONN_SERVICE] = "SvSecure", @@ -156,6 +157,11 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) } conn = list_entry(v, struct rxrpc_connection, proc_link); + if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) { + strcpy(lbuff, "no_local"); + strcpy(rbuff, "no_connection"); + goto print; + } sprintf(lbuff, "%pI4:%u", &conn->params.local->srx.transport.sin.sin_addr, @@ -164,7 +170,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) sprintf(rbuff, "%pI4:%u", &conn->params.peer->srx.transport.sin.sin_addr, ntohs(conn->params.peer->srx.transport.sin.sin_port)); - +print: seq_printf(seq, "UDP %-22.22s %-22.22s %4x %08x %s %3u" " %s %08x %08x %08x\n", -- cgit v1.2.3 From 248f219cb8bcbfbd7f132752d44afa2df7c241d1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Sep 2016 11:10:12 +0100 Subject: rxrpc: Rewrite the data and ack handling code Rewrite the data and ack handling code such that: (1) Parsing of received ACK and ABORT packets and the distribution and the filing of DATA packets happens entirely within the data_ready context called from the UDP socket. This allows us to process and discard ACK and ABORT packets much more quickly (they're no longer stashed on a queue for a background thread to process). (2) We avoid calling skb_clone(), pskb_pull() and pskb_trim(). We instead keep track of the offset and length of the content of each packet in the sk_buff metadata. This means we don't do any allocation in the receive path. (3) Jumbo DATA packet parsing is now done in data_ready context. Rather than cloning the packet once for each subpacket and pulling/trimming it, we file the packet multiple times with an annotation for each indicating which subpacket is there. From that we can directly calculate the offset and length. (4) A call's receive queue can be accessed without taking locks (memory barriers do have to be used, though). (5) Incoming calls are set up from preallocated resources and immediately made live. They can than have packets queued upon them and ACKs generated. If insufficient resources exist, DATA packet #1 is given a BUSY reply and other DATA packets are discarded). (6) sk_buffs no longer take a ref on their parent call. To make this work, the following changes are made: (1) Each call's receive buffer is now a circular buffer of sk_buff pointers (rxtx_buffer) rather than a number of sk_buff_heads spread between the call and the socket. This permits each sk_buff to be in the buffer multiple times. The receive buffer is reused for the transmit buffer. (2) A circular buffer of annotations (rxtx_annotations) is kept parallel to the data buffer. Transmission phase annotations indicate whether a buffered packet has been ACK'd or not and whether it needs retransmission. Receive phase annotations indicate whether a slot holds a whole packet or a jumbo subpacket and, if the latter, which subpacket. They also note whether the packet has been decrypted in place. (3) DATA packet window tracking is much simplified. Each phase has just two numbers representing the window (rx_hard_ack/rx_top and tx_hard_ack/tx_top). The hard_ack number is the sequence number before base of the window, representing the last packet the other side says it has consumed. hard_ack starts from 0 and the first packet is sequence number 1. The top number is the sequence number of the highest-numbered packet residing in the buffer. Packets between hard_ack+1 and top are soft-ACK'd to indicate they've been received, but not yet consumed. Four macros, before(), before_eq(), after() and after_eq() are added to compare sequence numbers within the window. This allows for the top of the window to wrap when the hard-ack sequence number gets close to the limit. Two flags, RXRPC_CALL_RX_LAST and RXRPC_CALL_TX_LAST, are added also to indicate when rx_top and tx_top point at the packets with the LAST_PACKET bit set, indicating the end of the phase. (4) Calls are queued on the socket 'receive queue' rather than packets. This means that we don't need have to invent dummy packets to queue to indicate abnormal/terminal states and we don't have to keep metadata packets (such as ABORTs) around (5) The offset and length of a (sub)packet's content are now passed to the verify_packet security op. This is currently expected to decrypt the packet in place and validate it. However, there's now nowhere to store the revised offset and length of the actual data within the decrypted blob (there may be a header and padding to skip) because an sk_buff may represent multiple packets, so a locate_data security op is added to retrieve these details from the sk_buff content when needed. (6) recvmsg() now has to handle jumbo subpackets, where each subpacket is individually secured and needs to be individually decrypted. The code to do this is broken out into rxrpc_recvmsg_data() and shared with the kernel API. It now iterates over the call's receive buffer rather than walking the socket receive queue. Additional changes: (1) The timers are condensed to a single timer that is set for the soonest of three timeouts (delayed ACK generation, DATA retransmission and call lifespan). (2) Transmission of ACK and ABORT packets is effected immediately from process-context socket ops/kernel API calls that cause them instead of them being punted off to a background work item. The data_ready handler still has to defer to the background, though. (3) A shutdown op is added to the AF_RXRPC socket so that the AFS filesystem can shut down the socket and flush its own work items before closing the socket to deal with any in-progress service calls. Future additional changes that will need to be considered: (1) Make sure that a call doesn't hog the front of the queue by receiving data from the network as fast as userspace is consuming it to the exclusion of other calls. (2) Transmit delayed ACKs from within recvmsg() when we've consumed sufficiently more packets to avoid the background work item needing to run. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 51 +- include/net/af_rxrpc.h | 3 - include/rxrpc/packet.h | 7 + net/rxrpc/af_rxrpc.c | 57 +- net/rxrpc/ar-internal.h | 177 +++--- net/rxrpc/call_accept.c | 472 +++++++--------- net/rxrpc/call_event.c | 1357 +++++++--------------------------------------- net/rxrpc/call_object.c | 535 +++++------------- net/rxrpc/conn_event.c | 137 +---- net/rxrpc/conn_object.c | 6 +- net/rxrpc/conn_service.c | 101 +--- net/rxrpc/input.c | 1044 ++++++++++++++++++----------------- net/rxrpc/insecure.c | 13 +- net/rxrpc/local_event.c | 2 +- net/rxrpc/local_object.c | 7 - net/rxrpc/misc.c | 2 +- net/rxrpc/output.c | 125 ++++- net/rxrpc/peer_event.c | 17 +- net/rxrpc/peer_object.c | 82 ++- net/rxrpc/recvmsg.c | 764 ++++++++++++++------------ net/rxrpc/rxkad.c | 108 +++- net/rxrpc/security.c | 10 +- net/rxrpc/sendmsg.c | 126 ++--- net/rxrpc/skbuff.c | 127 ----- 24 files changed, 1993 insertions(+), 3337 deletions(-) (limited to 'include/net') diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 720ef05a24fe..59bdaa7527b6 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -55,10 +55,8 @@ static const struct afs_call_type afs_RXCMxxxx = { .abort_to_error = afs_abort_to_error, }; -static void afs_collect_incoming_call(struct work_struct *); static void afs_charge_preallocation(struct work_struct *); -static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call); static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation); static int afs_wait_atomic_t(atomic_t *p) @@ -143,6 +141,8 @@ void afs_close_socket(void) TASK_UNINTERRUPTIBLE); _debug("no outstanding calls"); + flush_workqueue(afs_async_calls); + kernel_sock_shutdown(afs_socket, SHUT_RDWR); flush_workqueue(afs_async_calls); sock_release(afs_socket); @@ -602,51 +602,6 @@ static void afs_process_async_call(struct work_struct *work) _leave(""); } -/* - * accept the backlog of incoming calls - */ -static void afs_collect_incoming_call(struct work_struct *work) -{ - struct rxrpc_call *rxcall; - struct afs_call *call = NULL; - - _enter(""); - - do { - if (!call) { - call = kzalloc(sizeof(struct afs_call), GFP_KERNEL); - if (!call) { - rxrpc_kernel_reject_call(afs_socket); - return; - } - - INIT_WORK(&call->async_work, afs_process_async_call); - call->wait_mode = &afs_async_incoming_call; - call->type = &afs_RXCMxxxx; - init_waitqueue_head(&call->waitq); - call->state = AFS_CALL_AWAIT_OP_ID; - - _debug("CALL %p{%s} [%d]", - call, call->type->name, - atomic_read(&afs_outstanding_calls)); - atomic_inc(&afs_outstanding_calls); - } - - rxcall = rxrpc_kernel_accept_call(afs_socket, - (unsigned long)call, - afs_wake_up_async_call); - if (!IS_ERR(rxcall)) { - call->rxcall = rxcall; - call->need_attention = true; - queue_work(afs_async_calls, &call->async_work); - call = NULL; - } - } while (!call); - - if (call) - afs_free_call(call); -} - static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID) { struct afs_call *call = (struct afs_call *)user_call_ID; @@ -704,7 +659,7 @@ static void afs_rx_discard_new_call(struct rxrpc_call *rxcall, static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, unsigned long user_call_ID) { - queue_work(afs_wq, &afs_collect_incoming_call_work); + atomic_inc(&afs_outstanding_calls); queue_work(afs_wq, &afs_charge_preallocation_work); } diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 9cf551be916b..1061a472a3e3 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -42,9 +42,6 @@ int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, u32, int, const char *); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); -struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long, - rxrpc_notify_rx_t); -int rxrpc_kernel_reject_call(struct socket *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h index b0ae5c1a6ce6..fd6eb3a60a8c 100644 --- a/include/rxrpc/packet.h +++ b/include/rxrpc/packet.h @@ -133,6 +133,13 @@ struct rxrpc_ackpacket { } __packed; +/* Some ACKs refer to specific packets and some are general and can be updated. */ +#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED) | \ + (1 << RXRPC_ACK_PING_RESPONSE) | \ + (1 << RXRPC_ACK_DELAY) | \ + (1 << RXRPC_ACK_IDLE)) + + /* * ACK packets can have a further piece of information tagged on the end */ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 1e8cf3ded81f..caa226dd436e 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -155,7 +155,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) } if (rx->srx.srx_service) { - write_lock_bh(&local->services_lock); + write_lock(&local->services_lock); hlist_for_each_entry(prx, &local->services, listen_link) { if (prx->srx.srx_service == rx->srx.srx_service) goto service_in_use; @@ -163,7 +163,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) rx->local = local; hlist_add_head_rcu(&rx->listen_link, &local->services); - write_unlock_bh(&local->services_lock); + write_unlock(&local->services_lock); rx->sk.sk_state = RXRPC_SERVER_BOUND; } else { @@ -176,7 +176,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) return 0; service_in_use: - write_unlock_bh(&local->services_lock); + write_unlock(&local->services_lock); rxrpc_put_local(local); ret = -EADDRINUSE; error_unlock: @@ -515,15 +515,16 @@ error: static unsigned int rxrpc_poll(struct file *file, struct socket *sock, poll_table *wait) { - unsigned int mask; struct sock *sk = sock->sk; + struct rxrpc_sock *rx = rxrpc_sk(sk); + unsigned int mask; sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* the socket is readable if there are any messages waiting on the Rx * queue */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!list_empty(&rx->recvmsg_q)) mask |= POLLIN | POLLRDNORM; /* the socket is writable if there is space to add new data to the @@ -575,8 +576,11 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, rx->calls = RB_ROOT; INIT_HLIST_NODE(&rx->listen_link); - INIT_LIST_HEAD(&rx->secureq); - INIT_LIST_HEAD(&rx->acceptq); + spin_lock_init(&rx->incoming_lock); + INIT_LIST_HEAD(&rx->sock_calls); + INIT_LIST_HEAD(&rx->to_be_accepted); + INIT_LIST_HEAD(&rx->recvmsg_q); + rwlock_init(&rx->recvmsg_lock); rwlock_init(&rx->call_lock); memset(&rx->srx, 0, sizeof(rx->srx)); @@ -584,6 +588,39 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, return 0; } +/* + * Kill all the calls on a socket and shut it down. + */ +static int rxrpc_shutdown(struct socket *sock, int flags) +{ + struct sock *sk = sock->sk; + struct rxrpc_sock *rx = rxrpc_sk(sk); + int ret = 0; + + _enter("%p,%d", sk, flags); + + if (flags != SHUT_RDWR) + return -EOPNOTSUPP; + if (sk->sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; + + lock_sock(sk); + + spin_lock_bh(&sk->sk_receive_queue.lock); + if (sk->sk_state < RXRPC_CLOSE) { + sk->sk_state = RXRPC_CLOSE; + sk->sk_shutdown = SHUTDOWN_MASK; + } else { + ret = -ESHUTDOWN; + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + + rxrpc_discard_prealloc(rx); + + release_sock(sk); + return ret; +} + /* * RxRPC socket destructor */ @@ -623,9 +660,9 @@ static int rxrpc_release_sock(struct sock *sk) ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1); if (!hlist_unhashed(&rx->listen_link)) { - write_lock_bh(&rx->local->services_lock); + write_lock(&rx->local->services_lock); hlist_del_rcu(&rx->listen_link); - write_unlock_bh(&rx->local->services_lock); + write_unlock(&rx->local->services_lock); } /* try to flush out this socket */ @@ -678,7 +715,7 @@ static const struct proto_ops rxrpc_rpc_ops = { .poll = rxrpc_poll, .ioctl = sock_no_ioctl, .listen = rxrpc_listen, - .shutdown = sock_no_shutdown, + .shutdown = rxrpc_shutdown, .setsockopt = rxrpc_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = rxrpc_sendmsg, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 45e1c269f90e..b1cb79ec4e96 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -94,9 +94,12 @@ struct rxrpc_sock { rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */ struct rxrpc_local *local; /* local endpoint */ struct hlist_node listen_link; /* link in the local endpoint's listen list */ - struct list_head secureq; /* calls awaiting connection security clearance */ - struct list_head acceptq; /* calls awaiting acceptance */ struct rxrpc_backlog *backlog; /* Preallocation for services */ + spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */ + struct list_head sock_calls; /* List of calls owned by this socket */ + struct list_head to_be_accepted; /* calls awaiting acceptance */ + struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */ + rwlock_t recvmsg_lock; /* Lock for recvmsg_q */ struct key *key; /* security for this socket */ struct key *securities; /* list of server security descriptors */ struct rb_root calls; /* User ID -> call mapping */ @@ -138,13 +141,16 @@ struct rxrpc_host_header { * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { - struct rxrpc_call *call; /* call with which associated */ - unsigned long resend_at; /* time in jiffies at which to resend */ + union { + unsigned long resend_at; /* time in jiffies at which to resend */ + struct { + u8 nr_jumbo; /* Number of jumbo subpackets */ + }; + }; union { unsigned int offset; /* offset into buffer of next read */ int remain; /* amount of space remaining for next write */ u32 error; /* network error code */ - bool need_resend; /* T if needs resending */ }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ @@ -179,7 +185,11 @@ struct rxrpc_security { /* verify the security on a received packet */ int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, - rxrpc_seq_t, u16); + unsigned int, unsigned int, rxrpc_seq_t, u16); + + /* Locate the data in a received packet that has been verified. */ + void (*locate_data)(struct rxrpc_call *, struct sk_buff *, + unsigned int *, unsigned int *); /* issue a challenge */ int (*issue_challenge)(struct rxrpc_connection *); @@ -211,7 +221,6 @@ struct rxrpc_local { struct work_struct processor; struct hlist_head services; /* services listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ - struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */ struct sk_buff_head reject_queue; /* packets awaiting rejection */ struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */ struct rb_root client_conns; /* Client connections by socket params */ @@ -388,38 +397,21 @@ struct rxrpc_connection { */ enum rxrpc_call_flag { RXRPC_CALL_RELEASED, /* call has been released - no more message to userspace */ - RXRPC_CALL_TERMINAL_MSG, /* call has given the socket its final message */ - RXRPC_CALL_RCVD_LAST, /* all packets received */ - RXRPC_CALL_RUN_RTIMER, /* Tx resend timer started */ - RXRPC_CALL_TX_SOFT_ACK, /* sent some soft ACKs */ - RXRPC_CALL_INIT_ACCEPT, /* acceptance was initiated */ RXRPC_CALL_HAS_USERID, /* has a user ID attached */ - RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */ RXRPC_CALL_IS_SERVICE, /* Call is service call */ RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ - RXRPC_CALL_RX_NO_MORE, /* Don't indicate MSG_MORE from recvmsg() */ + RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ + RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ }; /* * Events that can be raised on a call. */ enum rxrpc_call_event { - RXRPC_CALL_EV_RCVD_ACKALL, /* ACKALL or reply received */ - RXRPC_CALL_EV_RCVD_BUSY, /* busy packet received */ - RXRPC_CALL_EV_RCVD_ABORT, /* abort packet received */ - RXRPC_CALL_EV_RCVD_ERROR, /* network error received */ - RXRPC_CALL_EV_ACK_FINAL, /* need to generate final ACK (and release call) */ RXRPC_CALL_EV_ACK, /* need to generate ACK */ - RXRPC_CALL_EV_REJECT_BUSY, /* need to generate busy message */ RXRPC_CALL_EV_ABORT, /* need to generate abort */ - RXRPC_CALL_EV_CONN_ABORT, /* local connection abort generated */ - RXRPC_CALL_EV_RESEND_TIMER, /* Tx resend timer expired */ + RXRPC_CALL_EV_TIMER, /* Timer expired */ RXRPC_CALL_EV_RESEND, /* Tx resend required */ - RXRPC_CALL_EV_DRAIN_RX_OOS, /* drain the Rx out of sequence queue */ - RXRPC_CALL_EV_LIFE_TIMER, /* call's lifetimer ran out */ - RXRPC_CALL_EV_ACCEPTED, /* incoming call accepted by userspace app */ - RXRPC_CALL_EV_SECURED, /* incoming call's connection is now secure */ - RXRPC_CALL_EV_POST_ACCEPT, /* need to post an "accept?" message to the app */ }; /* @@ -431,7 +423,6 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */ RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ - RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */ @@ -448,7 +439,6 @@ enum rxrpc_call_state { */ enum rxrpc_call_completion { RXRPC_CALL_SUCCEEDED, /* - Normal termination */ - RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */ RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ @@ -465,24 +455,23 @@ struct rxrpc_call { struct rxrpc_connection *conn; /* connection carrying call */ struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_sock __rcu *socket; /* socket responsible */ - struct timer_list lifetimer; /* lifetime remaining on call */ - struct timer_list ack_timer; /* ACK generation timer */ - struct timer_list resend_timer; /* Tx resend timer */ - struct work_struct processor; /* packet processor and ACK generator */ + unsigned long ack_at; /* When deferred ACK needs to happen */ + unsigned long resend_at; /* When next resend needs to happen */ + unsigned long expire_at; /* When the call times out */ + struct timer_list timer; /* Combined event timer */ + struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ struct list_head chan_wait_link; /* Link in conn->waiting_calls */ struct hlist_node error_link; /* link in error distribution list */ - struct list_head accept_link; /* calls awaiting acceptance */ - struct rb_node sock_node; /* node in socket call tree */ - struct sk_buff_head rx_queue; /* received packets */ - struct sk_buff_head rx_oos_queue; /* packets received out of sequence */ - struct sk_buff_head knlrecv_queue; /* Queue for kernel_recv [TODO: replace this] */ + struct list_head accept_link; /* Link in rx->acceptq */ + struct list_head recvmsg_link; /* Link in rx->recvmsg_q */ + struct list_head sock_link; /* Link in rx->sock_calls */ + struct rb_node sock_node; /* Node in rx->calls */ struct sk_buff *tx_pending; /* Tx socket buffer being filled */ wait_queue_head_t waitq; /* Wait queue for channel or Tx */ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */ unsigned long user_call_ID; /* user-defined call ID */ - unsigned long creation_jif; /* time of call creation */ unsigned long flags; unsigned long events; spinlock_t lock; @@ -492,40 +481,55 @@ struct rxrpc_call { enum rxrpc_call_state state; /* current state of call */ enum rxrpc_call_completion completion; /* Call completion condition */ atomic_t usage; - atomic_t sequence; /* Tx data packet sequence counter */ u16 service_id; /* service ID */ u8 security_ix; /* Security type */ u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ int debug_id; /* debug ID for printks */ - /* transmission-phase ACK management */ - u8 acks_head; /* offset into window of first entry */ - u8 acks_tail; /* offset into window of last entry */ - u8 acks_winsz; /* size of un-ACK'd window */ - u8 acks_unacked; /* lowest unacked packet in last ACK received */ - int acks_latest; /* serial number of latest ACK received */ - rxrpc_seq_t acks_hard; /* highest definitively ACK'd msg seq */ - unsigned long *acks_window; /* sent packet window - * - elements are pointers with LSB set if ACK'd + /* Rx/Tx circular buffer, depending on phase. + * + * In the Rx phase, packets are annotated with 0 or the number of the + * segment of a jumbo packet each buffer refers to. There can be up to + * 47 segments in a maximum-size UDP packet. + * + * In the Tx phase, packets are annotated with which buffers have been + * acked. + */ +#define RXRPC_RXTX_BUFF_SIZE 64 +#define RXRPC_RXTX_BUFF_MASK (RXRPC_RXTX_BUFF_SIZE - 1) + struct sk_buff **rxtx_buffer; + u8 *rxtx_annotations; +#define RXRPC_TX_ANNO_ACK 0 +#define RXRPC_TX_ANNO_UNACK 1 +#define RXRPC_TX_ANNO_NAK 2 +#define RXRPC_TX_ANNO_RETRANS 3 +#define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ +#define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ +#define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ + rxrpc_seq_t tx_hard_ack; /* Dead slot in buffer; the first transmitted but + * not hard-ACK'd packet follows this. + */ + rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ + rxrpc_seq_t rx_hard_ack; /* Dead slot in buffer; the first received but not + * consumed packet follows this. */ + rxrpc_seq_t rx_top; /* Highest Rx slot allocated. */ + rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */ + u8 rx_winsize; /* Size of Rx window */ + u8 tx_winsize; /* Maximum size of Tx window */ + u8 nr_jumbo_dup; /* Number of jumbo duplicates */ /* receive-phase ACK management */ - rxrpc_seq_t rx_data_expect; /* next data seq ID expected to be received */ - rxrpc_seq_t rx_data_post; /* next data seq ID expected to be posted */ - rxrpc_seq_t rx_data_recv; /* last data seq ID encountered by recvmsg */ - rxrpc_seq_t rx_data_eaten; /* last data seq ID consumed by recvmsg */ - rxrpc_seq_t rx_first_oos; /* first packet in rx_oos_queue (or 0) */ - rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */ - rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ u8 ackr_reason; /* reason to ACK */ u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ - atomic_t ackr_not_idle; /* number of packets in Rx queue */ + rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ + unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ + unsigned short rx_pkt_len; /* Current recvmsg packet len */ - /* received packet records, 1 bit per record */ -#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG) - unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1]; + /* transmission-phase ACK management */ + rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ }; enum rxrpc_call_trace { @@ -535,10 +539,8 @@ enum rxrpc_call_trace { rxrpc_call_queued_ref, rxrpc_call_seen, rxrpc_call_got, - rxrpc_call_got_skb, rxrpc_call_got_userid, rxrpc_call_put, - rxrpc_call_put_skb, rxrpc_call_put_userid, rxrpc_call_put_noqueue, rxrpc_call__nr_trace @@ -561,6 +563,9 @@ extern struct workqueue_struct *rxrpc_workqueue; */ int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); +struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, + struct rxrpc_connection *, + struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, rxrpc_notify_rx_t); @@ -569,8 +574,7 @@ int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool); -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool); +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool); void rxrpc_process_call(struct work_struct *); /* @@ -589,9 +593,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, unsigned long, gfp_t); -struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, - struct rxrpc_connection *, - struct sk_buff *); +void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, + struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); bool __rxrpc_queue_call(struct rxrpc_call *); @@ -599,8 +602,6 @@ bool rxrpc_queue_call(struct rxrpc_call *); void rxrpc_see_call(struct rxrpc_call *); void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); -void rxrpc_get_call_for_skb(struct rxrpc_call *, struct sk_buff *); -void rxrpc_put_call_for_skb(struct rxrpc_call *, struct sk_buff *); void rxrpc_cleanup_call(struct rxrpc_call *); void __exit rxrpc_destroy_all_calls(void); @@ -672,13 +673,8 @@ static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, { trace_rxrpc_abort(why, call->cid, call->call_id, seq, abort_code, error); - if (__rxrpc_set_call_completion(call, - RXRPC_CALL_LOCALLY_ABORTED, - abort_code, error)) { - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - return true; - } - return false; + return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error); } static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, @@ -713,8 +709,6 @@ void __exit rxrpc_destroy_all_client_connections(void); * conn_event.c */ void rxrpc_process_connection(struct work_struct *); -void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *); -void rxrpc_reject_packets(struct rxrpc_local *); /* * conn_object.c @@ -783,18 +777,14 @@ static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn) */ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct sk_buff *); -struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *, - struct sockaddr_rxrpc *, - struct sk_buff *); struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t); +void rxrpc_new_incoming_connection(struct rxrpc_connection *, struct sk_buff *); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* * input.c */ void rxrpc_data_ready(struct sock *); -int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool); -void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); /* * insecure.c @@ -868,6 +858,7 @@ extern const char *rxrpc_acks(u8 reason); */ int rxrpc_send_call_packet(struct rxrpc_call *, u8); int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *); +void rxrpc_reject_packets(struct rxrpc_local *); /* * peer_event.c @@ -883,6 +874,8 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); +struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *, + struct rxrpc_peer *); static inline struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) { @@ -912,6 +905,7 @@ extern const struct file_operations rxrpc_connection_seq_fops; /* * recvmsg.c */ +void rxrpc_notify_socket(struct rxrpc_call *); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* @@ -961,6 +955,23 @@ static inline void rxrpc_sysctl_exit(void) {} */ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); +static inline bool before(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) < 0; +} +static inline bool before_eq(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) <= 0; +} +static inline bool after(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) > 0; +} +static inline bool after_eq(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) >= 0; +} + /* * debug tracing */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index cc7194e05a15..b8acec0d596e 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -129,6 +129,8 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, set_bit(RXRPC_CALL_HAS_USERID, &call->flags); } + list_add(&call->sock_link, &rx->sock_calls); + write_unlock(&rx->call_lock); write_lock(&rxrpc_call_lock); @@ -186,6 +188,12 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) return; rx->backlog = NULL; + /* Make sure that there aren't any incoming calls in progress before we + * clear the preallocation buffers. + */ + spin_lock_bh(&rx->incoming_lock); + spin_unlock_bh(&rx->incoming_lock); + head = b->peer_backlog_head; tail = b->peer_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { @@ -224,251 +232,179 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) } /* - * generate a connection-level abort + * Allocate a new incoming call from the prealloc pool, along with a connection + * and a peer as necessary. */ -static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, - struct rxrpc_wire_header *whdr) +static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, + struct rxrpc_local *local, + struct rxrpc_connection *conn, + struct sk_buff *skb) { - struct msghdr msg; - struct kvec iov[1]; - size_t len; - int ret; - - _enter("%d,,", local->debug_id); - - whdr->type = RXRPC_PACKET_TYPE_BUSY; - whdr->serial = htonl(1); - - msg.msg_name = &srx->transport.sin; - msg.msg_namelen = sizeof(srx->transport.sin); - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - iov[0].iov_base = whdr; - iov[0].iov_len = sizeof(*whdr); - - len = iov[0].iov_len; - - _proto("Tx BUSY %%1"); + struct rxrpc_backlog *b = rx->backlog; + struct rxrpc_peer *peer, *xpeer; + struct rxrpc_call *call; + unsigned short call_head, conn_head, peer_head; + unsigned short call_tail, conn_tail, peer_tail; + unsigned short call_count, conn_count; + + /* #calls >= #conns >= #peers must hold true. */ + call_head = smp_load_acquire(&b->call_backlog_head); + call_tail = b->call_backlog_tail; + call_count = CIRC_CNT(call_head, call_tail, RXRPC_BACKLOG_MAX); + conn_head = smp_load_acquire(&b->conn_backlog_head); + conn_tail = b->conn_backlog_tail; + conn_count = CIRC_CNT(conn_head, conn_tail, RXRPC_BACKLOG_MAX); + ASSERTCMP(conn_count, >=, call_count); + peer_head = smp_load_acquire(&b->peer_backlog_head); + peer_tail = b->peer_backlog_tail; + ASSERTCMP(CIRC_CNT(peer_head, peer_tail, RXRPC_BACKLOG_MAX), >=, + conn_count); + + if (call_count == 0) + return NULL; + + if (!conn) { + /* No connection. We're going to need a peer to start off + * with. If one doesn't yet exist, use a spare from the + * preallocation set. We dump the address into the spare in + * anticipation - and to save on stack space. + */ + xpeer = b->peer_backlog[peer_tail]; + if (rxrpc_extract_addr_from_skb(&xpeer->srx, skb) < 0) + return NULL; + + peer = rxrpc_lookup_incoming_peer(local, xpeer); + if (peer == xpeer) { + b->peer_backlog[peer_tail] = NULL; + smp_store_release(&b->peer_backlog_tail, + (peer_tail + 1) & + (RXRPC_BACKLOG_MAX - 1)); + } - ret = kernel_sendmsg(local->socket, &msg, iov, 1, len); - if (ret < 0) { - _leave(" = -EAGAIN [sendmsg failed: %d]", ret); - return -EAGAIN; + /* Now allocate and set up the connection */ + conn = b->conn_backlog[conn_tail]; + b->conn_backlog[conn_tail] = NULL; + smp_store_release(&b->conn_backlog_tail, + (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + rxrpc_get_local(local); + conn->params.local = local; + conn->params.peer = peer; + rxrpc_new_incoming_connection(conn, skb); + } else { + rxrpc_get_connection(conn); } - _leave(" = 0"); - return 0; + /* And now we can allocate and set up a new call */ + call = b->call_backlog[call_tail]; + b->call_backlog[call_tail] = NULL; + smp_store_release(&b->call_backlog_tail, + (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + + call->conn = conn; + call->peer = rxrpc_get_peer(conn->params.peer); + return call; } /* - * accept an incoming call that needs peer, transport and/or connection setting - * up + * Set up a new incoming call. Called in BH context with the RCU read lock + * held. + * + * If this is for a kernel service, when we allocate the call, it will have + * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the + * retainer ref obtained from the backlog buffer. Prealloc calls for userspace + * services only have the ref from the backlog buffer. We want to pass this + * ref to non-BH context to dispose of. + * + * If we want to report an error, we mark the skb with the packet type and + * abort code and return NULL. */ -static int rxrpc_accept_incoming_call(struct rxrpc_local *local, - struct rxrpc_sock *rx, - struct sk_buff *skb, - struct sockaddr_rxrpc *srx) +struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_connection *conn, + struct sk_buff *skb) { - struct rxrpc_connection *conn; - struct rxrpc_skb_priv *sp, *nsp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_sock *rx; struct rxrpc_call *call; - struct sk_buff *notification; - int ret; _enter(""); - sp = rxrpc_skb(skb); - - /* get a notification message to send to the server app */ - notification = alloc_skb(0, GFP_NOFS); - if (!notification) { - _debug("no memory"); - ret = -ENOMEM; - goto error_nofree; - } - rxrpc_new_skb(notification); - notification->mark = RXRPC_SKB_MARK_NEW_CALL; - - conn = rxrpc_incoming_connection(local, srx, skb); - if (IS_ERR(conn)) { - _debug("no conn"); - ret = PTR_ERR(conn); - goto error; - } - - call = rxrpc_incoming_call(rx, conn, skb); - rxrpc_put_connection(conn); - if (IS_ERR(call)) { - _debug("no call"); - ret = PTR_ERR(call); - goto error; + /* Get the socket providing the service */ + hlist_for_each_entry_rcu_bh(rx, &local->services, listen_link) { + if (rx->srx.srx_service == sp->hdr.serviceId) + goto found_service; } - /* attach the call to the socket */ - read_lock_bh(&local->services_lock); - if (rx->sk.sk_state == RXRPC_CLOSE) - goto invalid_service; - - write_lock(&rx->call_lock); - if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) { - rxrpc_get_call(call, rxrpc_call_got); - - spin_lock(&call->conn->state_lock); - if (sp->hdr.securityIndex > 0 && - call->conn->state == RXRPC_CONN_SERVICE_UNSECURED) { - _debug("await conn sec"); - list_add_tail(&call->accept_link, &rx->secureq); - call->conn->state = RXRPC_CONN_SERVICE_CHALLENGING; - set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); - rxrpc_queue_conn(call->conn); - } else { - _debug("conn ready"); - call->state = RXRPC_CALL_SERVER_ACCEPTING; - list_add_tail(&call->accept_link, &rx->acceptq); - rxrpc_get_call_for_skb(call, notification); - nsp = rxrpc_skb(notification); - nsp->call = call; - - ASSERTCMP(atomic_read(&call->usage), >=, 3); - - _debug("notify"); - spin_lock(&call->lock); - ret = rxrpc_queue_rcv_skb(call, notification, true, - false); - spin_unlock(&call->lock); - notification = NULL; - BUG_ON(ret < 0); - } - spin_unlock(&call->conn->state_lock); + trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EOPNOTSUPP); + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->priority = RX_INVALID_OPERATION; + _leave(" = NULL [service]"); + return NULL; - _debug("queued"); +found_service: + spin_lock(&rx->incoming_lock); + if (rx->sk.sk_state == RXRPC_CLOSE) { + trace_rxrpc_abort("CLS", sp->hdr.cid, sp->hdr.callNumber, + sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->priority = RX_INVALID_OPERATION; + _leave(" = NULL [close]"); + call = NULL; + goto out; } - write_unlock(&rx->call_lock); - _debug("process"); - rxrpc_fast_process_packet(call, skb); - - _debug("done"); - read_unlock_bh(&local->services_lock); - rxrpc_free_skb(notification); - rxrpc_put_call(call, rxrpc_call_put); - _leave(" = 0"); - return 0; - -invalid_service: - _debug("invalid"); - read_unlock_bh(&local->services_lock); - - rxrpc_release_call(rx, call); - rxrpc_put_call(call, rxrpc_call_put); - ret = -ECONNREFUSED; -error: - rxrpc_free_skb(notification); -error_nofree: - _leave(" = %d", ret); - return ret; -} + call = rxrpc_alloc_incoming_call(rx, local, conn, skb); + if (!call) { + skb->mark = RXRPC_SKB_MARK_BUSY; + _leave(" = NULL [busy]"); + call = NULL; + goto out; + } -/* - * accept incoming calls that need peer, transport and/or connection setting up - * - the packets we get are all incoming client DATA packets that have seq == 1 - */ -void rxrpc_accept_incoming_calls(struct rxrpc_local *local) -{ - struct rxrpc_skb_priv *sp; - struct sockaddr_rxrpc srx; - struct rxrpc_sock *rx; - struct rxrpc_wire_header whdr; - struct sk_buff *skb; - int ret; + /* Make the call live. */ + rxrpc_incoming_call(rx, call, skb); + conn = call->conn; - _enter("%d", local->debug_id); + if (rx->notify_new_call) + rx->notify_new_call(&rx->sk, call, call->user_call_ID); - skb = skb_dequeue(&local->accept_queue); - if (!skb) { - _leave("\n"); - return; - } + spin_lock(&conn->state_lock); + switch (conn->state) { + case RXRPC_CONN_SERVICE_UNSECURED: + conn->state = RXRPC_CONN_SERVICE_CHALLENGING; + set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); + rxrpc_queue_conn(call->conn); + break; - _net("incoming call skb %p", skb); - - rxrpc_see_skb(skb); - sp = rxrpc_skb(skb); - - /* Set up a response packet header in case we need it */ - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.seq = htonl(sp->hdr.seq); - whdr.serial = 0; - whdr.flags = 0; - whdr.type = 0; - whdr.userStatus = 0; - whdr.securityIndex = sp->hdr.securityIndex; - whdr._rsvd = 0; - whdr.serviceId = htons(sp->hdr.serviceId); - - if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) - goto drop; - - /* get the socket providing the service */ - read_lock_bh(&local->services_lock); - hlist_for_each_entry(rx, &local->services, listen_link) { - if (rx->srx.srx_service == sp->hdr.serviceId && - rx->sk.sk_state != RXRPC_CLOSE) - goto found_service; - } - read_unlock_bh(&local->services_lock); - goto invalid_service; + case RXRPC_CONN_SERVICE: + write_lock(&call->state_lock); + if (rx->discard_new_call) + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + else + call->state = RXRPC_CALL_SERVER_ACCEPTING; + write_unlock(&call->state_lock); + break; -found_service: - _debug("found service %hd", rx->srx.srx_service); - if (sk_acceptq_is_full(&rx->sk)) - goto backlog_full; - sk_acceptq_added(&rx->sk); - read_unlock_bh(&local->services_lock); - - ret = rxrpc_accept_incoming_call(local, rx, skb, &srx); - if (ret < 0) - sk_acceptq_removed(&rx->sk); - switch (ret) { - case -ECONNRESET: /* old calls are ignored */ - case -ECONNABORTED: /* aborted calls are reaborted or ignored */ - case 0: - return; - case -ECONNREFUSED: - goto invalid_service; - case -EBUSY: - goto busy; - case -EKEYREJECTED: - goto security_mismatch; + case RXRPC_CONN_REMOTELY_ABORTED: + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + conn->remote_abort, ECONNABORTED); + break; + case RXRPC_CONN_LOCALLY_ABORTED: + rxrpc_abort_call("CON", call, sp->hdr.seq, + conn->local_abort, ECONNABORTED); + break; default: BUG(); } + spin_unlock(&conn->state_lock); -backlog_full: - read_unlock_bh(&local->services_lock); -busy: - rxrpc_busy(local, &srx, &whdr); - rxrpc_free_skb(skb); - return; - -drop: - rxrpc_free_skb(skb); - return; + if (call->state == RXRPC_CALL_SERVER_ACCEPTING) + rxrpc_notify_socket(call); -invalid_service: - skb->priority = RX_INVALID_OPERATION; - rxrpc_reject_packet(local, skb); - return; - - /* can't change connection security type mid-flow */ -security_mismatch: - skb->priority = RX_PROTOCOL_ERROR; - rxrpc_reject_packet(local, skb); - return; + _leave(" = %p{%d}", call, call->debug_id); +out: + spin_unlock(&rx->incoming_lock); + return call; } /* @@ -490,11 +426,10 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, write_lock(&rx->call_lock); ret = -ENODATA; - if (list_empty(&rx->acceptq)) + if (list_empty(&rx->to_be_accepted)) goto out; /* check the user ID isn't already in use */ - ret = -EBADSLT; pp = &rx->calls.rb_node; parent = NULL; while (*pp) { @@ -506,11 +441,14 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, else if (user_call_ID > call->user_call_ID) pp = &(*pp)->rb_right; else - goto out; + goto id_in_use; } - /* dequeue the first call and check it's still valid */ - call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); + /* Dequeue the first call and check it's still valid. We gain + * responsibility for the queue's reference. + */ + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); list_del_init(&call->accept_link); sk_acceptq_removed(&rx->sk); rxrpc_see_call(call); @@ -528,31 +466,35 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, } /* formalise the acceptance */ - rxrpc_get_call(call, rxrpc_call_got_userid); + rxrpc_get_call(call, rxrpc_call_got); call->notify_rx = notify_rx; call->user_call_ID = user_call_ID; + rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags)) BUG(); - if (test_and_set_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) - BUG(); write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); - rxrpc_queue_call(call); + rxrpc_notify_socket(call); + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %p{%d}", call, call->debug_id); return call; out_release: + _debug("release %p", call); write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); - _debug("release %p", call); rxrpc_release_call(rx, call); - _leave(" = %d", ret); - return ERR_PTR(ret); -out: + rxrpc_put_call(call, rxrpc_call_put); + goto out; + +id_in_use: + ret = -EBADSLT; write_unlock(&rx->call_lock); +out: + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %d", ret); return ERR_PTR(ret); } @@ -564,6 +506,7 @@ out: int rxrpc_reject_call(struct rxrpc_sock *rx) { struct rxrpc_call *call; + bool abort = false; int ret; _enter(""); @@ -572,15 +515,16 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock(&rx->call_lock); - ret = -ENODATA; - if (list_empty(&rx->acceptq)) { + if (list_empty(&rx->to_be_accepted)) { write_unlock(&rx->call_lock); - _leave(" = -ENODATA"); return -ENODATA; } - /* dequeue the first call and check it's still valid */ - call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); + /* Dequeue the first call and check it's still valid. We gain + * responsibility for the queue's reference. + */ + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); list_del_init(&call->accept_link); sk_acceptq_removed(&rx->sk); rxrpc_see_call(call); @@ -588,66 +532,28 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock_bh(&call->state_lock); switch (call->state) { case RXRPC_CALL_SERVER_ACCEPTING: - __rxrpc_set_call_completion(call, RXRPC_CALL_SERVER_BUSY, - 0, ECONNABORTED); - if (test_and_set_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) - rxrpc_queue_call(call); - ret = 0; - break; + __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED); + abort = true; + /* fall through */ case RXRPC_CALL_COMPLETE: ret = call->error; - break; + goto out_discard; default: BUG(); } +out_discard: write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); - rxrpc_release_call(rx, call); - _leave(" = %d", ret); - return ret; -} - -/** - * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call - * @sock: The socket on which the impending call is waiting - * @user_call_ID: The tag to attach to the call - * @notify_rx: Where to send notifications instead of socket queue - * - * Allow a kernel service to accept an incoming call, assuming the incoming - * call is still valid. The caller should immediately trigger their own - * notification as there must be data waiting. - */ -struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock, - unsigned long user_call_ID, - rxrpc_notify_rx_t notify_rx) -{ - struct rxrpc_call *call; - - _enter(",%lx", user_call_ID); - call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID, notify_rx); - _leave(" = %p", call); - return call; -} -EXPORT_SYMBOL(rxrpc_kernel_accept_call); - -/** - * rxrpc_kernel_reject_call - Allow a kernel service to reject an incoming call - * @sock: The socket on which the impending call is waiting - * - * Allow a kernel service to reject an incoming call with a BUSY message, - * assuming the incoming call is still valid. - */ -int rxrpc_kernel_reject_call(struct socket *sock) -{ - int ret; - - _enter(""); - ret = rxrpc_reject_call(rxrpc_sk(sock->sk)); + if (abort) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + } + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %d", ret); return ret; } -EXPORT_SYMBOL(rxrpc_kernel_reject_call); /* * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index af88ad7d2cf9..2b976e789562 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -22,1257 +22,286 @@ #include "ar-internal.h" /* - * propose an ACK be sent + * Set the timer */ -void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u16 skew, u32 serial, bool immediate) +static void rxrpc_set_timer(struct rxrpc_call *call) { - unsigned long expiry; - s8 prior = rxrpc_ack_priority[ack_reason]; - - ASSERTCMP(prior, >, 0); - - _enter("{%d},%s,%%%x,%u", - call->debug_id, rxrpc_acks(ack_reason), serial, immediate); + unsigned long t, now = jiffies; - if (prior < rxrpc_ack_priority[call->ackr_reason]) { - if (immediate) - goto cancel_timer; - return; - } - - /* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial - * numbers */ - if (prior == rxrpc_ack_priority[call->ackr_reason]) { - if (prior <= 4) { - call->ackr_skew = skew; - call->ackr_serial = serial; - } - if (immediate) - goto cancel_timer; - return; - } - - call->ackr_reason = ack_reason; - call->ackr_serial = serial; - - switch (ack_reason) { - case RXRPC_ACK_DELAY: - _debug("run delay timer"); - expiry = rxrpc_soft_ack_delay; - goto run_timer; - - case RXRPC_ACK_IDLE: - if (!immediate) { - _debug("run defer timer"); - expiry = rxrpc_idle_ack_delay; - goto run_timer; - } - goto cancel_timer; + _enter("{%ld,%ld,%ld:%ld}", + call->ack_at - now, call->resend_at - now, call->expire_at - now, + call->timer.expires - now); + + read_lock_bh(&call->state_lock); - case RXRPC_ACK_REQUESTED: - expiry = rxrpc_requested_ack_delay; - if (!expiry) - goto cancel_timer; - if (!immediate || serial == 1) { - _debug("run defer timer"); - goto run_timer; + if (call->state < RXRPC_CALL_COMPLETE) { + t = call->ack_at; + if (time_before(call->resend_at, t)) + t = call->resend_at; + if (time_before(call->expire_at, t)) + t = call->expire_at; + if (!timer_pending(&call->timer) || + time_before(t, call->timer.expires)) { + _debug("set timer %ld", t - now); + mod_timer(&call->timer, t); } - - default: - _debug("immediate ACK"); - goto cancel_timer; } - -run_timer: - expiry += jiffies; - if (!timer_pending(&call->ack_timer) || - time_after(call->ack_timer.expires, expiry)) - mod_timer(&call->ack_timer, expiry); - return; - -cancel_timer: - _debug("cancel timer %%%u", serial); - try_to_del_timer_sync(&call->ack_timer); - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) - rxrpc_queue_call(call); read_unlock_bh(&call->state_lock); } /* - * propose an ACK be sent, locking the call structure + * propose an ACK be sent */ -void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u16 skew, u32 serial, bool immediate) +static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, + u16 skew, u32 serial, bool immediate, + bool background) { + unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay; s8 prior = rxrpc_ack_priority[ack_reason]; - if (prior > rxrpc_ack_priority[call->ackr_reason]) { - spin_lock_bh(&call->lock); - __rxrpc_propose_ACK(call, ack_reason, skew, serial, immediate); - spin_unlock_bh(&call->lock); - } -} - -/* - * set the resend timer - */ -static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend, - unsigned long resend_at) -{ - read_lock_bh(&call->state_lock); - if (call->state == RXRPC_CALL_COMPLETE) - resend = 0; - - if (resend & 1) { - _debug("SET RESEND"); - set_bit(RXRPC_CALL_EV_RESEND, &call->events); - } - - if (resend & 2) { - _debug("MODIFY RESEND TIMER"); - set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - mod_timer(&call->resend_timer, resend_at); - } else { - _debug("KILL RESEND TIMER"); - del_timer_sync(&call->resend_timer); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - } - read_unlock_bh(&call->state_lock); -} - -/* - * resend packets - */ -static void rxrpc_resend(struct rxrpc_call *call) -{ - struct rxrpc_wire_header *whdr; - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - bool stop; - int loop; - u8 resend; - - _enter("{%d,%d,%d,%d},", - call->acks_hard, call->acks_unacked, - atomic_read(&call->sequence), - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); - - stop = false; - resend = 0; - resend_at = 0; - - for (loop = call->acks_tail; - loop != call->acks_head || stop; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - if (*p_txb & 1) - continue; - - txb = (struct sk_buff *) *p_txb; - sp = rxrpc_skb(txb); - - if (sp->need_resend) { - sp->need_resend = false; - - /* each Tx packet has a new serial number */ - sp->hdr.serial = atomic_inc_return(&call->conn->serial); - - whdr = (struct rxrpc_wire_header *)txb->head; - whdr->serial = htonl(sp->hdr.serial); - - _proto("Tx DATA %%%u { #%d }", - sp->hdr.serial, sp->hdr.seq); - if (rxrpc_send_data_packet(call->conn, txb) < 0) { - stop = true; - sp->resend_at = jiffies + 3; - } else { - if (rxrpc_is_client_call(call)) - rxrpc_expose_client_call(call); - sp->resend_at = - jiffies + rxrpc_resend_timeout; - } - } - - if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; - } - } - - rxrpc_set_resend(call, resend, resend_at); - _leave(""); -} - -/* - * handle resend timer expiry - */ -static void rxrpc_resend_timer(struct rxrpc_call *call) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - int loop; - u8 resend; - - _enter("%d,%d,%d", - call->acks_tail, call->acks_unacked, call->acks_head); - - if (call->state == RXRPC_CALL_COMPLETE) - return; - - resend = 0; - resend_at = 0; - - for (loop = call->acks_unacked; - loop != call->acks_head; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); - - ASSERT(!(*p_txb & 1)); + _enter("{%d},%s,%%%x,%u", + call->debug_id, rxrpc_acks(ack_reason), serial, immediate); - if (sp->need_resend) { - ; - } else if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; + /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial + * numbers, but we don't alter the timeout. + */ + _debug("prior %u %u vs %u %u", + ack_reason, prior, + call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]); + if (ack_reason == call->ackr_reason) { + if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) { + call->ackr_serial = serial; + call->ackr_skew = skew; } + if (!immediate) + return; + } else if (prior > rxrpc_ack_priority[call->ackr_reason]) { + call->ackr_reason = ack_reason; + call->ackr_serial = serial; + call->ackr_skew = skew; } - rxrpc_set_resend(call, resend, resend_at); - _leave(""); -} - -/* - * process soft ACKs of our transmitted packets - * - these indicate packets the peer has or has not received, but hasn't yet - * given to the consumer, and so can still be discarded and re-requested - */ -static int rxrpc_process_soft_ACKs(struct rxrpc_call *call, - struct rxrpc_ackpacket *ack, - struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - int loop; - u8 sacks[RXRPC_MAXACKS], resend; - - _enter("{%d,%d},{%d},", - call->acks_hard, - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz), - ack->nAcks); + switch (ack_reason) { + case RXRPC_ACK_REQUESTED: + if (rxrpc_requested_ack_delay < expiry) + expiry = rxrpc_requested_ack_delay; + if (serial == 1) + immediate = false; + break; - if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0) - goto protocol_error; + case RXRPC_ACK_DELAY: + if (rxrpc_soft_ack_delay < expiry) + expiry = rxrpc_soft_ack_delay; + break; - resend = 0; - resend_at = 0; - for (loop = 0; loop < ack->nAcks; loop++) { - p_txb = call->acks_window; - p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1); - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); + case RXRPC_ACK_IDLE: + if (rxrpc_soft_ack_delay < expiry) + expiry = rxrpc_idle_ack_delay; + break; - switch (sacks[loop]) { - case RXRPC_ACK_TYPE_ACK: - sp->need_resend = false; - *p_txb |= 1; - break; - case RXRPC_ACK_TYPE_NACK: - sp->need_resend = true; - *p_txb &= ~1; - resend = 1; - break; - default: - _debug("Unsupported ACK type %d", sacks[loop]); - goto protocol_error; - } + default: + immediate = true; + break; } - smp_mb(); - call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1); - - /* anything not explicitly ACK'd is implicitly NACK'd, but may just not - * have been received or processed yet by the far end */ - for (loop = call->acks_unacked; - loop != call->acks_head; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); - - if (*p_txb & 1) { - /* packet must have been discarded */ - sp->need_resend = true; - *p_txb &= ~1; - resend |= 1; - } else if (sp->need_resend) { - ; - } else if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; + now = jiffies; + if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { + _debug("already scheduled"); + } else if (immediate || expiry == 0) { + _debug("immediate ACK %lx", call->events); + if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events) && + background) + rxrpc_queue_call(call); + } else { + ack_at = now + expiry; + _debug("deferred ACK %ld < %ld", expiry, call->ack_at - now); + if (time_before(ack_at, call->ack_at)) { + call->ack_at = ack_at; + rxrpc_set_timer(call); } } - - rxrpc_set_resend(call, resend, resend_at); - _leave(" = 0"); - return 0; - -protocol_error: - _leave(" = -EPROTO"); - return -EPROTO; } /* - * discard hard-ACK'd packets from the Tx window - */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard) -{ - unsigned long _skb; - int tail = call->acks_tail, old_tail; - int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz); - - _enter("{%u,%u},%u", call->acks_hard, win, hard); - - ASSERTCMP(hard - call->acks_hard, <=, win); - - while (call->acks_hard < hard) { - smp_read_barrier_depends(); - _skb = call->acks_window[tail] & ~1; - rxrpc_free_skb((struct sk_buff *) _skb); - old_tail = tail; - tail = (tail + 1) & (call->acks_winsz - 1); - call->acks_tail = tail; - if (call->acks_unacked == old_tail) - call->acks_unacked = tail; - call->acks_hard++; - } - - wake_up(&call->waitq); -} - -/* - * clear the Tx window in the event of a failure + * propose an ACK be sent, locking the call structure */ -static void rxrpc_clear_tx_window(struct rxrpc_call *call) +void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, + u16 skew, u32 serial, bool immediate, bool background) { - rxrpc_rotate_tx_window(call, atomic_read(&call->sequence)); + spin_lock_bh(&call->lock); + __rxrpc_propose_ACK(call, ack_reason, skew, serial, + immediate, background); + spin_unlock_bh(&call->lock); } /* - * drain the out of sequence received packet queue into the packet Rx queue + * Perform retransmission of NAK'd and unack'd packets. */ -static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call) +static void rxrpc_resend(struct rxrpc_call *call) { + struct rxrpc_wire_header *whdr; struct rxrpc_skb_priv *sp; struct sk_buff *skb; - bool terminal; - int ret; + rxrpc_seq_t cursor, seq, top; + unsigned long resend_at, now; + int ix; + u8 annotation; - _enter("{%d,%d}", call->rx_data_post, call->rx_first_oos); + _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); spin_lock_bh(&call->lock); - ret = -ECONNRESET; - if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) - goto socket_unavailable; + cursor = call->tx_hard_ack; + top = call->tx_top; + ASSERT(before_eq(cursor, top)); + if (cursor == top) + goto out_unlock; + + /* Scan the packet list without dropping the lock and decide which of + * the packets in the Tx buffer we're going to resend and what the new + * resend timeout will be. + */ + now = jiffies; + resend_at = now + rxrpc_resend_timeout; + seq = cursor + 1; + do { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + if (annotation == RXRPC_TX_ANNO_ACK) + continue; - skb = skb_dequeue(&call->rx_oos_queue); - if (skb) { + skb = call->rxtx_buffer[ix]; rxrpc_see_skb(skb); sp = rxrpc_skb(skb); - _debug("drain OOS packet %d [%d]", - sp->hdr.seq, call->rx_first_oos); - - if (sp->hdr.seq != call->rx_first_oos) { - skb_queue_head(&call->rx_oos_queue, skb); - call->rx_first_oos = rxrpc_skb(skb)->hdr.seq; - _debug("requeue %p {%u}", skb, call->rx_first_oos); - } else { - skb->mark = RXRPC_SKB_MARK_DATA; - terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) && - !(sp->hdr.flags & RXRPC_CLIENT_INITIATED)); - ret = rxrpc_queue_rcv_skb(call, skb, true, terminal); - BUG_ON(ret < 0); - _debug("drain #%u", call->rx_data_post); - call->rx_data_post++; - - /* find out what the next packet is */ - skb = skb_peek(&call->rx_oos_queue); - rxrpc_see_skb(skb); - if (skb) - call->rx_first_oos = rxrpc_skb(skb)->hdr.seq; - else - call->rx_first_oos = 0; - _debug("peek %p {%u}", skb, call->rx_first_oos); - } - } - - ret = 0; -socket_unavailable: - spin_unlock_bh(&call->lock); - _leave(" = %d", ret); - return ret; -} - -/* - * insert an out of sequence packet into the buffer - */ -static void rxrpc_insert_oos_packet(struct rxrpc_call *call, - struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp, *psp; - struct sk_buff *p; - u32 seq; - - sp = rxrpc_skb(skb); - seq = sp->hdr.seq; - _enter(",,{%u}", seq); - - skb->destructor = rxrpc_packet_destructor; - ASSERTCMP(sp->call, ==, NULL); - sp->call = call; - rxrpc_get_call_for_skb(call, skb); - - /* insert into the buffer in sequence order */ - spin_lock_bh(&call->lock); - - skb_queue_walk(&call->rx_oos_queue, p) { - psp = rxrpc_skb(p); - if (psp->hdr.seq > seq) { - _debug("insert oos #%u before #%u", seq, psp->hdr.seq); - skb_insert(p, skb, &call->rx_oos_queue); - goto inserted; - } - } - - _debug("append oos #%u", seq); - skb_queue_tail(&call->rx_oos_queue, skb); -inserted: - - /* we might now have a new front to the queue */ - if (call->rx_first_oos == 0 || seq < call->rx_first_oos) - call->rx_first_oos = seq; - - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - call->rx_data_post == call->rx_first_oos) { - _debug("drain rx oos now"); - set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events); - } - read_unlock(&call->state_lock); - - spin_unlock_bh(&call->lock); - _leave(" [stored #%u]", call->rx_first_oos); -} - -/* - * clear the Tx window on final ACK reception - */ -static void rxrpc_zap_tx_window(struct rxrpc_call *call) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - unsigned long _skb, *acks_window; - u8 winsz = call->acks_winsz; - int tail; - - acks_window = call->acks_window; - call->acks_window = NULL; - - while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) { - tail = call->acks_tail; - smp_read_barrier_depends(); - _skb = acks_window[tail] & ~1; - smp_mb(); - call->acks_tail = (call->acks_tail + 1) & (winsz - 1); - - skb = (struct sk_buff *) _skb; - sp = rxrpc_skb(skb); - _debug("+++ clear Tx %u", sp->hdr.seq); - rxrpc_free_skb(skb); - } - - kfree(acks_window); -} - -/* - * process the extra information that may be appended to an ACK packet - */ -static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, - unsigned int latest, int nAcks) -{ - struct rxrpc_ackinfo ackinfo; - struct rxrpc_peer *peer; - unsigned int mtu; - - if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) { - _leave(" [no ackinfo]"); - return; - } - - _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", - latest, - ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU), - ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max)); - - mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU)); - - peer = call->peer; - if (mtu < peer->maxdata) { - spin_lock_bh(&peer->lock); - peer->maxdata = mtu; - peer->mtu = mtu + peer->hdrsize; - spin_unlock_bh(&peer->lock); - _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); - } -} - -/* - * process packets in the reception queue - */ -static int rxrpc_process_rx_queue(struct rxrpc_call *call, - u32 *_abort_code) -{ - struct rxrpc_ackpacket ack; - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - bool post_ACK; - int latest; - u32 hard, tx; - - _enter(""); - -process_further: - skb = skb_dequeue(&call->rx_queue); - if (!skb) - return -EAGAIN; - - rxrpc_see_skb(skb); - _net("deferred skb %p", skb); - - sp = rxrpc_skb(skb); - - _debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state); - - post_ACK = false; - - switch (sp->hdr.type) { - /* data packets that wind up here have been received out of - * order, need security processing or are jumbo packets */ - case RXRPC_PACKET_TYPE_DATA: - _proto("OOSQ DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - - /* secured packets must be verified and possibly decrypted */ - if (call->conn->security->verify_packet(call, skb, - sp->hdr.seq, - sp->hdr.cksum) < 0) - goto protocol_error; - - rxrpc_insert_oos_packet(call, skb); - goto process_further; - - /* partial ACK to process */ - case RXRPC_PACKET_TYPE_ACK: - if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) { - _debug("extraction failure"); - goto protocol_error; - } - if (!skb_pull(skb, sizeof(ack))) - BUG(); - - latest = sp->hdr.serial; - hard = ntohl(ack.firstPacket); - tx = atomic_read(&call->sequence); - - _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", - latest, - ntohs(ack.maxSkew), - hard, - ntohl(ack.previousPacket), - ntohl(ack.serial), - rxrpc_acks(ack.reason), - ack.nAcks); - - rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks); - - if (ack.reason == RXRPC_ACK_PING) { - _proto("Rx ACK %%%u PING Request", latest); - rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, - skb->priority, sp->hdr.serial, true); - } - - /* discard any out-of-order or duplicate ACKs */ - if (latest - call->acks_latest <= 0) { - _debug("discard ACK %d <= %d", - latest, call->acks_latest); - goto discard; - } - call->acks_latest = latest; - - if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && - call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY && - call->state != RXRPC_CALL_SERVER_SEND_REPLY && - call->state != RXRPC_CALL_SERVER_AWAIT_ACK) - goto discard; - - _debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state); - - if (hard > 0) { - if (hard - 1 > tx) { - _debug("hard-ACK'd packet %d not transmitted" - " (%d top)", - hard - 1, tx); - goto protocol_error; - } - - if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY || - call->state == RXRPC_CALL_SERVER_AWAIT_ACK) && - hard > tx) { - call->acks_hard = tx; - goto all_acked; + if (annotation == RXRPC_TX_ANNO_UNACK) { + if (time_after(sp->resend_at, now)) { + if (time_before(sp->resend_at, resend_at)) + resend_at = sp->resend_at; + continue; } - - smp_rmb(); - rxrpc_rotate_tx_window(call, hard - 1); - } - - if (ack.nAcks > 0) { - if (hard - 1 + ack.nAcks > tx) { - _debug("soft-ACK'd packet %d+%d not" - " transmitted (%d top)", - hard - 1, ack.nAcks, tx); - goto protocol_error; - } - - if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0) - goto protocol_error; } - goto discard; - /* complete ACK to process */ - case RXRPC_PACKET_TYPE_ACKALL: - goto all_acked; - - /* abort and busy are handled elsewhere */ - case RXRPC_PACKET_TYPE_BUSY: - case RXRPC_PACKET_TYPE_ABORT: - BUG(); - - /* connection level events - also handled elsewhere */ - case RXRPC_PACKET_TYPE_CHALLENGE: - case RXRPC_PACKET_TYPE_RESPONSE: - case RXRPC_PACKET_TYPE_DEBUG: - BUG(); - } - - /* if we've had a hard ACK that covers all the packets we've sent, then - * that ends that phase of the operation */ -all_acked: - write_lock_bh(&call->state_lock); - _debug("ack all %d", call->state); - - switch (call->state) { - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; - break; - case RXRPC_CALL_SERVER_AWAIT_ACK: - _debug("srv complete"); - __rxrpc_call_completed(call); - post_ACK = true; - break; - case RXRPC_CALL_CLIENT_SEND_REQUEST: - case RXRPC_CALL_SERVER_RECV_REQUEST: - goto protocol_error_unlock; /* can't occur yet */ - default: - write_unlock_bh(&call->state_lock); - goto discard; /* assume packet left over from earlier phase */ - } - - write_unlock_bh(&call->state_lock); - - /* if all the packets we sent are hard-ACK'd, then we can discard - * whatever we've got left */ - _debug("clear Tx %d", - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); - - del_timer_sync(&call->resend_timer); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - - if (call->acks_window) - rxrpc_zap_tx_window(call); + /* Okay, we need to retransmit a packet. */ + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; + seq++; + } while (before_eq(seq, top)); + + call->resend_at = resend_at; + + /* Now go through the Tx window and perform the retransmissions. We + * have to drop the lock for each send. If an ACK comes in whilst the + * lock is dropped, it may clear some of the retransmission markers for + * packets that it soft-ACKs. + */ + seq = cursor + 1; + do { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + if (annotation != RXRPC_TX_ANNO_RETRANS) + continue; - if (post_ACK) { - /* post the final ACK message for userspace to pick up */ - _debug("post ACK"); - skb->mark = RXRPC_SKB_MARK_FINAL_ACK; - sp->call = call; - rxrpc_get_call_for_skb(call, skb); - spin_lock_bh(&call->lock); - if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0) - BUG(); + skb = call->rxtx_buffer[ix]; + rxrpc_get_skb(skb); spin_unlock_bh(&call->lock); - goto process_further; - } - -discard: - rxrpc_free_skb(skb); - goto process_further; - -protocol_error_unlock: - write_unlock_bh(&call->state_lock); -protocol_error: - rxrpc_free_skb(skb); - _leave(" = -EPROTO"); - return -EPROTO; -} - -/* - * post a message to the socket Rx queue for recvmsg() to pick up - */ -static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error, - bool fatal) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - int ret; - - _enter("{%d,%lx},%u,%u,%d", - call->debug_id, call->flags, mark, error, fatal); - - /* remove timers and things for fatal messages */ - if (fatal) { - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - } + sp = rxrpc_skb(skb); - if (mark != RXRPC_SKB_MARK_NEW_CALL && - !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { - _leave("[no userid]"); - return 0; - } + /* Each Tx packet needs a new serial number */ + sp->hdr.serial = atomic_inc_return(&call->conn->serial); - if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) { - skb = alloc_skb(0, GFP_NOFS); - if (!skb) - return -ENOMEM; + whdr = (struct rxrpc_wire_header *)skb->head; + whdr->serial = htonl(sp->hdr.serial); - rxrpc_new_skb(skb); + if (rxrpc_send_data_packet(call->conn, skb) < 0) { + call->resend_at = now + 2; + rxrpc_free_skb(skb); + return; + } - skb->mark = mark; - - sp = rxrpc_skb(skb); - memset(sp, 0, sizeof(*sp)); - sp->error = error; - sp->call = call; - rxrpc_get_call_for_skb(call, skb); + if (rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); + sp->resend_at = now + rxrpc_resend_timeout; + rxrpc_free_skb(skb); spin_lock_bh(&call->lock); - ret = rxrpc_queue_rcv_skb(call, skb, true, fatal); - spin_unlock_bh(&call->lock); - BUG_ON(ret < 0); - } - return 0; + /* We need to clear the retransmit state, but there are two + * things we need to be aware of: A new ACK/NAK might have been + * received and the packet might have been hard-ACK'd (in which + * case it will no longer be in the buffer). + */ + if (after(seq, call->tx_hard_ack) && + (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_RETRANS || + call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK)) + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; + + if (after(call->tx_hard_ack, seq)) + seq = call->tx_hard_ack; + seq++; + } while (before_eq(seq, top)); + +out_unlock: + spin_unlock_bh(&call->lock); + _leave(""); } /* - * Handle background processing of incoming call packets and ACK / abort - * generation. A ref on the call is donated to us by whoever queued the work - * item. + * Handle retransmission and deferred ACK/abort generation. */ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); - struct rxrpc_wire_header whdr; - struct rxrpc_ackpacket ack; - struct rxrpc_ackinfo ackinfo; - struct msghdr msg; - struct kvec iov[5]; - enum rxrpc_call_event genbit; - unsigned long bits; - __be32 data, pad; - size_t len; - bool requeue = false; - int loop, nbit, ioc, ret, mtu; - u32 serial, abort_code = RX_PROTOCOL_ERROR; - u8 *acks = NULL; + unsigned long now; rxrpc_see_call(call); //printk("\n--------------------\n"); - _enter("{%d,%s,%lx} [%lu]", - call->debug_id, rxrpc_call_states[call->state], call->events, - (jiffies - call->creation_jif) / (HZ / 10)); - - if (call->state >= RXRPC_CALL_COMPLETE) { - rxrpc_put_call(call, rxrpc_call_put); - return; - } - - if (!call->conn) - goto skip_msg_init; - - /* there's a good chance we're going to have to send a message, so set - * one up in advance */ - msg.msg_name = &call->peer->srx.transport; - msg.msg_namelen = call->peer->srx.transport_len; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; + _enter("{%d,%s,%lx}", + call->debug_id, rxrpc_call_states[call->state], call->events); - whdr.epoch = htonl(call->conn->proto.epoch); - whdr.cid = htonl(call->cid); - whdr.callNumber = htonl(call->call_id); - whdr.seq = 0; - whdr.type = RXRPC_PACKET_TYPE_ACK; - whdr.flags = call->conn->out_clientflag; - whdr.userStatus = 0; - whdr.securityIndex = call->conn->security_ix; - whdr._rsvd = 0; - whdr.serviceId = htons(call->service_id); - - memset(iov, 0, sizeof(iov)); - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); -skip_msg_init: - - /* deal with events of a final nature */ - if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) { - enum rxrpc_skb_mark mark; - - clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_ABORT, &call->events); - - if (call->completion == RXRPC_CALL_NETWORK_ERROR) { - mark = RXRPC_SKB_MARK_NET_ERROR; - _debug("post net error %d", call->error); - } else { - mark = RXRPC_SKB_MARK_LOCAL_ERROR; - _debug("post net local error %d", call->error); - } - - if (rxrpc_post_message(call, mark, call->error, true) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events); - goto kill_ACKs; - } - - if (test_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events)) { - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - - clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_ABORT, &call->events); - - _debug("post conn abort"); - - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - call->error, true) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - goto kill_ACKs; - } - - if (test_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) { - whdr.type = RXRPC_PACKET_TYPE_BUSY; - genbit = RXRPC_CALL_EV_REJECT_BUSY; - goto send_message; - } - - if (test_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - call->error, true) < 0) - goto no_mem; - whdr.type = RXRPC_PACKET_TYPE_ABORT; - data = htonl(call->abort_code); - iov[1].iov_base = &data; - iov[1].iov_len = sizeof(data); - genbit = RXRPC_CALL_EV_ABORT; - goto send_message; - } - - if (test_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) { - genbit = RXRPC_CALL_EV_ACK_FINAL; - - ack.bufferSpace = htons(8); - ack.maxSkew = 0; - ack.serial = 0; - ack.reason = RXRPC_ACK_IDLE; - ack.nAcks = 0; - call->ackr_reason = 0; - - spin_lock_bh(&call->lock); - ack.serial = htonl(call->ackr_serial); - ack.previousPacket = htonl(call->ackr_prev_seq); - ack.firstPacket = htonl(call->rx_data_eaten + 1); - spin_unlock_bh(&call->lock); - - pad = 0; - - iov[1].iov_base = &ack; - iov[1].iov_len = sizeof(ack); - iov[2].iov_base = &pad; - iov[2].iov_len = 3; - iov[3].iov_base = &ackinfo; - iov[3].iov_len = sizeof(ackinfo); - goto send_ACK; +recheck_state: + if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + goto recheck_state; } - if (call->events & ((1 << RXRPC_CALL_EV_RCVD_BUSY) | - (1 << RXRPC_CALL_EV_RCVD_ABORT)) - ) { - u32 mark; - - if (test_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events)) - mark = RXRPC_SKB_MARK_REMOTE_ABORT; - else - mark = RXRPC_SKB_MARK_BUSY; - - _debug("post abort/busy"); - rxrpc_clear_tx_window(call); - if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0) - goto no_mem; - - clear_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - goto kill_ACKs; + if (call->state == RXRPC_CALL_COMPLETE) { + del_timer_sync(&call->timer); + goto out_put; } - if (test_and_clear_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events)) { - _debug("do implicit ackall"); - rxrpc_clear_tx_window(call); - } - - if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) { + now = jiffies; + if (time_after_eq(now, call->expire_at)) { rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); - - _debug("post timeout"); - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - ETIME, true) < 0) - goto no_mem; - - clear_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events); - goto kill_ACKs; + set_bit(RXRPC_CALL_EV_ABORT, &call->events); } - /* deal with assorted inbound messages */ - if (!skb_queue_empty(&call->rx_queue)) { - ret = rxrpc_process_rx_queue(call, &abort_code); - switch (ret) { - case 0: - case -EAGAIN: - break; - case -ENOMEM: - goto no_mem; - case -EKEYEXPIRED: - case -EKEYREJECTED: - case -EPROTO: - rxrpc_abort_call("PRO", call, 0, abort_code, -ret); - goto kill_ACKs; + if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || + time_after_eq(now, call->ack_at)) { + call->ack_at = call->expire_at; + if (call->ackr_reason) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + goto recheck_state; } } - /* handle resending */ - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) - rxrpc_resend_timer(call); - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) || + time_after_eq(now, call->resend_at)) { rxrpc_resend(call); - - /* consider sending an ordinary ACK */ - if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { - _debug("send ACK: window: %d - %d { %lx }", - call->rx_data_eaten, call->ackr_win_top, - call->ackr_window[0]); - - if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST && - call->ackr_reason != RXRPC_ACK_PING_RESPONSE) { - /* ACK by sending reply DATA packet in this state */ - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - goto maybe_reschedule; - } - - genbit = RXRPC_CALL_EV_ACK; - - acks = kzalloc(call->ackr_win_top - call->rx_data_eaten, - GFP_NOFS); - if (!acks) - goto no_mem; - - //hdr.flags = RXRPC_SLOW_START_OK; - ack.bufferSpace = htons(8); - ack.maxSkew = 0; - - spin_lock_bh(&call->lock); - ack.reason = call->ackr_reason; - ack.serial = htonl(call->ackr_serial); - ack.previousPacket = htonl(call->ackr_prev_seq); - ack.firstPacket = htonl(call->rx_data_eaten + 1); - - ack.nAcks = 0; - for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) { - nbit = loop * BITS_PER_LONG; - for (bits = call->ackr_window[loop]; bits; bits >>= 1 - ) { - _debug("- l=%d n=%d b=%lx", loop, nbit, bits); - if (bits & 1) { - acks[nbit] = RXRPC_ACK_TYPE_ACK; - ack.nAcks = nbit + 1; - } - nbit++; - } - } - call->ackr_reason = 0; - spin_unlock_bh(&call->lock); - - pad = 0; - - iov[1].iov_base = &ack; - iov[1].iov_len = sizeof(ack); - iov[2].iov_base = acks; - iov[2].iov_len = ack.nAcks; - iov[3].iov_base = &pad; - iov[3].iov_len = 3; - iov[4].iov_base = &ackinfo; - iov[4].iov_len = sizeof(ackinfo); - - switch (ack.reason) { - case RXRPC_ACK_REQUESTED: - case RXRPC_ACK_DUPLICATE: - case RXRPC_ACK_OUT_OF_SEQUENCE: - case RXRPC_ACK_EXCEEDS_WINDOW: - case RXRPC_ACK_NOSPACE: - case RXRPC_ACK_PING: - case RXRPC_ACK_PING_RESPONSE: - goto send_ACK_with_skew; - case RXRPC_ACK_DELAY: - case RXRPC_ACK_IDLE: - goto send_ACK; - } + goto recheck_state; } - /* handle completion of security negotiations on an incoming - * connection */ - if (test_and_clear_bit(RXRPC_CALL_EV_SECURED, &call->events)) { - _debug("secured"); - spin_lock_bh(&call->lock); - - if (call->state == RXRPC_CALL_SERVER_SECURING) { - struct rxrpc_sock *rx; - _debug("securing"); - rcu_read_lock(); - rx = rcu_dereference(call->socket); - if (rx) { - write_lock(&rx->call_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags)) { - _debug("not released"); - call->state = RXRPC_CALL_SERVER_ACCEPTING; - list_move_tail(&call->accept_link, - &rx->acceptq); - } - write_unlock(&rx->call_lock); - } - rcu_read_unlock(); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events); - read_unlock(&call->state_lock); - } - - spin_unlock_bh(&call->lock); - if (!test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) - goto maybe_reschedule; - } - - /* post a notification of an acceptable connection to the app */ - if (test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) { - _debug("post accept"); - if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL, - 0, false) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events); - goto maybe_reschedule; - } - - /* handle incoming call acceptance */ - if (test_and_clear_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) { - _debug("accepted"); - ASSERTCMP(call->rx_data_post, ==, 0); - call->rx_data_post = 1; - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events); - read_unlock_bh(&call->state_lock); - } - - /* drain the out of sequence received packet queue into the packet Rx - * queue */ - if (test_and_clear_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) { - while (call->rx_data_post == call->rx_first_oos) - if (rxrpc_drain_rx_oos_queue(call) < 0) - break; - goto maybe_reschedule; - } + rxrpc_set_timer(call); /* other events may have been raised since we started checking */ - goto maybe_reschedule; - -send_ACK_with_skew: - ack.maxSkew = htons(call->ackr_skew); -send_ACK: - mtu = call->peer->if_mtu; - mtu -= call->peer->hdrsize; - ackinfo.maxMTU = htonl(mtu); - ackinfo.rwind = htonl(rxrpc_rx_window_size); - - /* permit the peer to send us jumbo packets if it wants to */ - ackinfo.rxMTU = htonl(rxrpc_rx_mtu); - ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max); - - serial = atomic_inc_return(&call->conn->serial); - whdr.serial = htonl(serial); - _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", - serial, - ntohs(ack.maxSkew), - ntohl(ack.firstPacket), - ntohl(ack.previousPacket), - ntohl(ack.serial), - rxrpc_acks(ack.reason), - ack.nAcks); - - del_timer_sync(&call->ack_timer); - if (ack.nAcks > 0) - set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags); - goto send_message_2; - -send_message: - _debug("send message"); - - serial = atomic_inc_return(&call->conn->serial); - whdr.serial = htonl(serial); - _proto("Tx %s %%%u", rxrpc_pkts[whdr.type], serial); -send_message_2: - - len = iov[0].iov_len; - ioc = 1; - if (iov[4].iov_len) { - ioc = 5; - len += iov[4].iov_len; - len += iov[3].iov_len; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[3].iov_len) { - ioc = 4; - len += iov[3].iov_len; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[2].iov_len) { - ioc = 3; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[1].iov_len) { - ioc = 2; - len += iov[1].iov_len; - } - - ret = kernel_sendmsg(call->conn->params.local->socket, - &msg, iov, ioc, len); - if (ret < 0) { - _debug("sendmsg failed: %d", ret); - if (call->state < RXRPC_CALL_COMPLETE) - requeue = true; - goto error; - } - - switch (genbit) { - case RXRPC_CALL_EV_ABORT: - clear_bit(genbit, &call->events); - clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - goto kill_ACKs; - - case RXRPC_CALL_EV_ACK_FINAL: - rxrpc_call_completed(call); - goto kill_ACKs; - - default: - clear_bit(genbit, &call->events); - switch (call->state) { - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - case RXRPC_CALL_CLIENT_RECV_REPLY: - case RXRPC_CALL_SERVER_RECV_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - _debug("start ACK timer"); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, - call->ackr_skew, call->ackr_serial, - false); - default: - break; - } - goto maybe_reschedule; - } - -kill_ACKs: - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - -maybe_reschedule: - if (call->events || !skb_queue_empty(&call->rx_queue)) { - if (call->state < RXRPC_CALL_COMPLETE) - requeue = true; - } - -error: - kfree(acks); - - if ((requeue || call->events) && !work_pending(&call->processor)) { - _debug("jumpstart %x", call->conn->proto.cid); + if (call->events && call->state < RXRPC_CALL_COMPLETE) { __rxrpc_queue_call(call); - } else { - rxrpc_put_call(call, rxrpc_call_put); + goto out; } +out_put: + rxrpc_put_call(call, rxrpc_call_put); +out: _leave(""); - return; - -no_mem: - _debug("out of memory"); - goto maybe_reschedule; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index d233adc9b5e5..18ab13f82f6e 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -30,7 +30,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", - [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept", @@ -43,7 +42,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { [RXRPC_CALL_SUCCEEDED] = "Complete", - [RXRPC_CALL_SERVER_BUSY] = "SvBusy ", [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort", [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort", [RXRPC_CALL_LOCAL_ERROR] = "LocError", @@ -57,10 +55,8 @@ const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { [rxrpc_call_queued_ref] = "QUR", [rxrpc_call_seen] = "SEE", [rxrpc_call_got] = "GOT", - [rxrpc_call_got_skb] = "Gsk", [rxrpc_call_got_userid] = "Gus", [rxrpc_call_put] = "PUT", - [rxrpc_call_put_skb] = "Psk", [rxrpc_call_put_userid] = "Pus", [rxrpc_call_put_noqueue] = "PNQ", }; @@ -69,9 +65,15 @@ struct kmem_cache *rxrpc_call_jar; LIST_HEAD(rxrpc_calls); DEFINE_RWLOCK(rxrpc_call_lock); -static void rxrpc_call_life_expired(unsigned long _call); -static void rxrpc_ack_time_expired(unsigned long _call); -static void rxrpc_resend_time_expired(unsigned long _call); +static void rxrpc_call_timer_expired(unsigned long _call) +{ + struct rxrpc_call *call = (struct rxrpc_call *)_call; + + _enter("%d", call->debug_id); + + if (call->state < RXRPC_CALL_COMPLETE) + rxrpc_queue_call(call); +} /* * find an extant server call @@ -121,27 +123,24 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) if (!call) return NULL; - call->acks_winsz = 16; - call->acks_window = kmalloc(call->acks_winsz * sizeof(unsigned long), + call->rxtx_buffer = kcalloc(RXRPC_RXTX_BUFF_SIZE, + sizeof(struct sk_buff *), gfp); - if (!call->acks_window) { - kmem_cache_free(rxrpc_call_jar, call); - return NULL; - } + if (!call->rxtx_buffer) + goto nomem; - setup_timer(&call->lifetimer, &rxrpc_call_life_expired, - (unsigned long) call); - setup_timer(&call->ack_timer, &rxrpc_ack_time_expired, - (unsigned long) call); - setup_timer(&call->resend_timer, &rxrpc_resend_time_expired, - (unsigned long) call); + call->rxtx_annotations = kcalloc(RXRPC_RXTX_BUFF_SIZE, sizeof(u8), gfp); + if (!call->rxtx_annotations) + goto nomem_2; + + setup_timer(&call->timer, rxrpc_call_timer_expired, + (unsigned long)call); INIT_WORK(&call->processor, &rxrpc_process_call); INIT_LIST_HEAD(&call->link); INIT_LIST_HEAD(&call->chan_wait_link); INIT_LIST_HEAD(&call->accept_link); - skb_queue_head_init(&call->rx_queue); - skb_queue_head_init(&call->rx_oos_queue); - skb_queue_head_init(&call->knlrecv_queue); + INIT_LIST_HEAD(&call->recvmsg_link); + INIT_LIST_HEAD(&call->sock_link); init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); rwlock_init(&call->state_lock); @@ -150,63 +149,52 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) memset(&call->sock_node, 0xed, sizeof(call->sock_node)); - call->rx_data_expect = 1; - call->rx_data_eaten = 0; - call->rx_first_oos = 0; - call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size; - call->creation_jif = jiffies; + /* Leave space in the ring to handle a maxed-out jumbo packet */ + call->rx_winsize = RXRPC_RXTX_BUFF_SIZE - 1 - 46; + call->tx_winsize = 16; + call->rx_expect_next = 1; return call; + +nomem_2: + kfree(call->rxtx_buffer); +nomem: + kmem_cache_free(rxrpc_call_jar, call); + return NULL; } /* * Allocate a new client call. */ -static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, - struct sockaddr_rxrpc *srx, +static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_call *call; _enter(""); - ASSERT(rx->local != NULL); - call = rxrpc_alloc_call(gfp); if (!call) return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; - call->rx_data_post = 1; call->service_id = srx->srx_service; - rcu_assign_pointer(call->socket, rx); _leave(" = %p", call); return call; } /* - * Begin client call. + * Initiate the call ack/resend/expiry timer. */ -static int rxrpc_begin_client_call(struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) +static void rxrpc_start_call_timer(struct rxrpc_call *call) { - int ret; - - /* Set up or get a connection record and set the protocol parameters, - * including channel number and call ID. - */ - ret = rxrpc_connect_call(call, cp, srx, gfp); - if (ret < 0) - return ret; - - spin_lock(&call->conn->params.peer->lock); - hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets); - spin_unlock(&call->conn->params.peer->lock); - - call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; - add_timer(&call->lifetimer); - return 0; + unsigned long expire_at; + + expire_at = jiffies + rxrpc_max_call_lifetime; + call->expire_at = expire_at; + call->ack_at = expire_at; + call->resend_at = expire_at; + call->timer.expires = expire_at; + add_timer(&call->timer); } /* @@ -226,7 +214,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _enter("%p,%lx", rx, user_call_ID); - call = rxrpc_alloc_client_call(rx, srx, gfp); + call = rxrpc_alloc_client_call(srx, gfp); if (IS_ERR(call)) { _leave(" = %ld", PTR_ERR(call)); return call; @@ -255,19 +243,32 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, goto found_user_ID_now_present; } + rcu_assign_pointer(call->socket, rx); rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); + list_add(&call->sock_link, &rx->sock_calls); + write_unlock(&rx->call_lock); - write_lock_bh(&rxrpc_call_lock); + write_lock(&rxrpc_call_lock); list_add_tail(&call->link, &rxrpc_calls); - write_unlock_bh(&rxrpc_call_lock); + write_unlock(&rxrpc_call_lock); - ret = rxrpc_begin_client_call(call, cp, srx, gfp); + /* Set up or get a connection record and set the protocol parameters, + * including channel number and call ID. + */ + ret = rxrpc_connect_call(call, cp, srx, gfp); if (ret < 0) goto error; + spin_lock_bh(&call->conn->params.peer->lock); + hlist_add_head(&call->error_link, + &call->conn->params.peer->error_targets); + spin_unlock_bh(&call->conn->params.peer->lock); + + rxrpc_start_call_timer(call); + _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); _leave(" = %p [new]", call); @@ -279,9 +280,9 @@ error: write_unlock(&rx->call_lock); rxrpc_put_call(call, rxrpc_call_put_userid); - write_lock_bh(&rxrpc_call_lock); + write_lock(&rxrpc_call_lock); list_del_init(&call->link); - write_unlock_bh(&rxrpc_call_lock); + write_unlock(&rxrpc_call_lock); error_out: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, @@ -303,142 +304,46 @@ found_user_ID_now_present: } /* - * set up an incoming call - * - called in process context with IRQs enabled + * Set up an incoming call. call->conn points to the connection. + * This is called in BH context and isn't allowed to fail. */ -struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, - struct rxrpc_connection *conn, - struct sk_buff *skb) +void rxrpc_incoming_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct sk_buff *skb) { + struct rxrpc_connection *conn = call->conn; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_call *call, *candidate; - const void *here = __builtin_return_address(0); - u32 call_id, chan; - - _enter(",%d", conn->debug_id); - - ASSERT(rx != NULL); - - candidate = rxrpc_alloc_call(GFP_NOIO); - if (!candidate) - return ERR_PTR(-EBUSY); + u32 chan; - trace_rxrpc_call(candidate, rxrpc_call_new_service, - atomic_read(&candidate->usage), here, NULL); + _enter(",%d", call->conn->debug_id); - chan = sp->hdr.cid & RXRPC_CHANNELMASK; - candidate->conn = conn; - candidate->peer = conn->params.peer; - candidate->cid = sp->hdr.cid; - candidate->call_id = sp->hdr.callNumber; - candidate->security_ix = sp->hdr.securityIndex; - candidate->rx_data_post = 0; - candidate->state = RXRPC_CALL_SERVER_ACCEPTING; - candidate->flags |= (1 << RXRPC_CALL_IS_SERVICE); - if (conn->security_ix > 0) - candidate->state = RXRPC_CALL_SERVER_SECURING; - rcu_assign_pointer(candidate->socket, rx); - - spin_lock(&conn->channel_lock); - - /* set the channel for this call */ - call = rcu_dereference_protected(conn->channels[chan].call, - lockdep_is_held(&conn->channel_lock)); - - _debug("channel[%u] is %p", candidate->cid & RXRPC_CHANNELMASK, call); - if (call && call->call_id == sp->hdr.callNumber) { - /* already set; must've been a duplicate packet */ - _debug("extant call [%d]", call->state); - ASSERTCMP(call->conn, ==, conn); - - read_lock(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_LOCALLY_ABORTED: - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) - rxrpc_queue_call(call); - case RXRPC_CALL_REMOTELY_ABORTED: - read_unlock(&call->state_lock); - goto aborted_call; - default: - rxrpc_get_call(call, rxrpc_call_got); - read_unlock(&call->state_lock); - goto extant_call; - } - } - - if (call) { - /* it seems the channel is still in use from the previous call - * - ditch the old binding if its call is now complete */ - _debug("CALL: %u { %s }", - call->debug_id, rxrpc_call_states[call->state]); - - if (call->state == RXRPC_CALL_COMPLETE) { - __rxrpc_disconnect_call(conn, call); - } else { - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -EBUSY"); - return ERR_PTR(-EBUSY); - } - } - - /* check the call number isn't duplicate */ - _debug("check dup"); - call_id = sp->hdr.callNumber; - - /* We just ignore calls prior to the current call ID. Terminated calls - * are handled via the connection. + rcu_assign_pointer(call->socket, rx); + call->call_id = sp->hdr.callNumber; + call->service_id = sp->hdr.serviceId; + call->cid = sp->hdr.cid; + call->state = RXRPC_CALL_SERVER_ACCEPTING; + if (sp->hdr.securityIndex > 0) + call->state = RXRPC_CALL_SERVER_SECURING; + + /* Set the channel for this call. We don't get channel_lock as we're + * only defending against the data_ready handler (which we're called + * from) and the RESPONSE packet parser (which is only really + * interested in call_counter and can cope with a disagreement with the + * call pointer). */ - if (call_id <= conn->channels[chan].call_counter) - goto old_call; /* TODO: Just drop packet */ - - /* Temporary: Mirror the backlog prealloc ref (TODO: use prealloc) */ - rxrpc_get_call(candidate, rxrpc_call_got); - - /* make the call available */ - _debug("new call"); - call = candidate; - candidate = NULL; - conn->channels[chan].call_counter = call_id; + chan = sp->hdr.cid & RXRPC_CHANNELMASK; + conn->channels[chan].call_counter = call->call_id; + conn->channels[chan].call_id = call->call_id; rcu_assign_pointer(conn->channels[chan].call, call); - rxrpc_get_connection(conn); - rxrpc_get_peer(call->peer); - spin_unlock(&conn->channel_lock); spin_lock(&conn->params.peer->lock); hlist_add_head(&call->error_link, &conn->params.peer->error_targets); spin_unlock(&conn->params.peer->lock); - write_lock_bh(&rxrpc_call_lock); - list_add_tail(&call->link, &rxrpc_calls); - write_unlock_bh(&rxrpc_call_lock); - - call->service_id = conn->params.service_id; - _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); - call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; - add_timer(&call->lifetimer); - _leave(" = %p {%d} [new]", call, call->debug_id); - return call; - -extant_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1); - return call; - -aborted_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -ECONNABORTED"); - return ERR_PTR(-ECONNABORTED); - -old_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -ECONNRESET [old]"); - return ERR_PTR(-ECONNRESET); + rxrpc_start_call_timer(call); + _leave(""); } /* @@ -497,25 +402,17 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) } /* - * Note the addition of a ref on a call for a socket buffer. + * Detach a call from its owning socket. */ -void rxrpc_get_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) +void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { - const void *here = __builtin_return_address(0); - int n = atomic_inc_return(&call->usage); + struct rxrpc_connection *conn = call->conn; + bool put = false; + int i; - trace_rxrpc_call(call, rxrpc_call_got_skb, n, here, skb); -} + _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); -/* - * detach a call from a socket and set up for release - */ -void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) -{ - _enter("{%d,%d,%d,%d}", - call->debug_id, atomic_read(&call->usage), - atomic_read(&call->ackr_not_idle), - call->rx_first_oos); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); rxrpc_see_call(call); @@ -524,80 +421,46 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) BUG(); spin_unlock_bh(&call->lock); - /* dissociate from the socket - * - the socket's ref on the call is passed to the death timer - */ - _debug("RELEASE CALL %p (%d)", call, call->debug_id); + del_timer_sync(&call->timer); - if (call->peer) { - spin_lock(&call->peer->lock); - hlist_del_init(&call->error_link); - spin_unlock(&call->peer->lock); - } + /* Make sure we don't get any more notifications */ + write_lock_bh(&rx->recvmsg_lock); - write_lock_bh(&rx->call_lock); - if (!list_empty(&call->accept_link)) { + if (!list_empty(&call->recvmsg_link)) { _debug("unlinking once-pending call %p { e=%lx f=%lx }", call, call->events, call->flags); - ASSERT(!test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); - list_del_init(&call->accept_link); - sk_acceptq_removed(&rx->sk); - } else if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { + list_del(&call->recvmsg_link); + put = true; + } + + /* list_empty() must return false in rxrpc_notify_socket() */ + call->recvmsg_link.next = NULL; + call->recvmsg_link.prev = NULL; + + write_unlock_bh(&rx->recvmsg_lock); + if (put) + rxrpc_put_call(call, rxrpc_call_put); + + write_lock(&rx->call_lock); + + if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { rb_erase(&call->sock_node, &rx->calls); memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); - clear_bit(RXRPC_CALL_HAS_USERID, &call->flags); rxrpc_put_call(call, rxrpc_call_put_userid); } - write_unlock_bh(&rx->call_lock); - - /* free up the channel for reuse */ - if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK) { - clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); - rxrpc_call_completed(call); - } else { - write_lock_bh(&call->state_lock); - - if (call->state < RXRPC_CALL_COMPLETE) { - _debug("+++ ABORTING STATE %d +++\n", call->state); - __rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); - clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); - } - - write_unlock_bh(&call->state_lock); - } - if (call->conn) + list_del(&call->sock_link); + write_unlock(&rx->call_lock); + + _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); + + if (conn) rxrpc_disconnect_call(call); - /* clean up the Rx queue */ - if (!skb_queue_empty(&call->rx_queue) || - !skb_queue_empty(&call->rx_oos_queue)) { - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - - _debug("purge Rx queues"); - - spin_lock_bh(&call->lock); - while ((skb = skb_dequeue(&call->rx_queue)) || - (skb = skb_dequeue(&call->rx_oos_queue))) { - spin_unlock_bh(&call->lock); - - sp = rxrpc_skb(skb); - _debug("- zap %s %%%u #%u", - rxrpc_pkts[sp->hdr.type], - sp->hdr.serial, sp->hdr.seq); - rxrpc_free_skb(skb); - spin_lock_bh(&call->lock); - } - spin_unlock_bh(&call->lock); + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { + rxrpc_free_skb(call->rxtx_buffer[i]); + call->rxtx_buffer[i] = NULL; } - rxrpc_purge_queue(&call->knlrecv_queue); - - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - del_timer_sync(&call->lifetimer); /* We have to release the prealloc backlog ref */ if (rxrpc_is_service_call(call)) @@ -611,28 +474,19 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) { struct rxrpc_call *call; - struct rb_node *p; _enter("%p", rx); - read_lock_bh(&rx->call_lock); - - /* kill the not-yet-accepted incoming calls */ - list_for_each_entry(call, &rx->secureq, accept_link) { - rxrpc_release_call(rx, call); - } - - list_for_each_entry(call, &rx->acceptq, accept_link) { - rxrpc_release_call(rx, call); - } - - /* mark all the calls as no longer wanting incoming packets */ - for (p = rb_first(&rx->calls); p; p = rb_next(p)) { - call = rb_entry(p, struct rxrpc_call, sock_node); + while (!list_empty(&rx->sock_calls)) { + call = list_entry(rx->sock_calls.next, + struct rxrpc_call, sock_link); + rxrpc_get_call(call, rxrpc_call_got); + rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); } - read_unlock_bh(&rx->call_lock); _leave(""); } @@ -651,23 +505,12 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) ASSERTCMP(n, >=, 0); if (n == 0) { _debug("call %d dead", call->debug_id); - rxrpc_cleanup_call(call); - } -} + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); -/* - * Release a call ref held by a socket buffer. - */ -void rxrpc_put_call_for_skb(struct rxrpc_call *call, struct sk_buff *skb) -{ - const void *here = __builtin_return_address(0); - int n; + write_lock(&rxrpc_call_lock); + list_del_init(&call->link); + write_unlock(&rxrpc_call_lock); - n = atomic_dec_return(&call->usage); - trace_rxrpc_call(call, rxrpc_call_put_skb, n, here, skb); - ASSERTCMP(n, >=, 0); - if (n == 0) { - _debug("call %d dead", call->debug_id); rxrpc_cleanup_call(call); } } @@ -679,9 +522,9 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) { struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); - rxrpc_purge_queue(&call->rx_queue); - rxrpc_purge_queue(&call->knlrecv_queue); rxrpc_put_peer(call->peer); + kfree(call->rxtx_buffer); + kfree(call->rxtx_annotations); kmem_cache_free(rxrpc_call_jar, call); } @@ -690,49 +533,24 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) */ void rxrpc_cleanup_call(struct rxrpc_call *call) { - _net("DESTROY CALL %d", call->debug_id); + int i; - write_lock_bh(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock_bh(&rxrpc_call_lock); + _net("DESTROY CALL %d", call->debug_id); memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); - del_timer_sync(&call->lifetimer); - del_timer_sync(&call->ack_timer); - del_timer_sync(&call->resend_timer); + del_timer_sync(&call->timer); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - ASSERT(!work_pending(&call->processor)); ASSERTCMP(call->conn, ==, NULL); - if (call->acks_window) { - _debug("kill Tx window %d", - CIRC_CNT(call->acks_head, call->acks_tail, - call->acks_winsz)); - smp_mb(); - while (CIRC_CNT(call->acks_head, call->acks_tail, - call->acks_winsz) > 0) { - struct rxrpc_skb_priv *sp; - unsigned long _skb; - - _skb = call->acks_window[call->acks_tail] & ~1; - sp = rxrpc_skb((struct sk_buff *)_skb); - _debug("+++ clear Tx %u", sp->hdr.seq); - rxrpc_free_skb((struct sk_buff *)_skb); - call->acks_tail = - (call->acks_tail + 1) & (call->acks_winsz - 1); - } - - kfree(call->acks_window); - } + /* Clean up the Rx/Tx buffer */ + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) + rxrpc_free_skb(call->rxtx_buffer[i]); rxrpc_free_skb(call->tx_pending); - rxrpc_purge_queue(&call->rx_queue); - ASSERT(skb_queue_empty(&call->rx_oos_queue)); - rxrpc_purge_queue(&call->knlrecv_queue); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } @@ -747,8 +565,8 @@ void __exit rxrpc_destroy_all_calls(void) if (list_empty(&rxrpc_calls)) return; - - write_lock_bh(&rxrpc_call_lock); + + write_lock(&rxrpc_call_lock); while (!list_empty(&rxrpc_calls)) { call = list_entry(rxrpc_calls.next, struct rxrpc_call, link); @@ -757,74 +575,15 @@ void __exit rxrpc_destroy_all_calls(void) rxrpc_see_call(call); list_del_init(&call->link); - pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n", + pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", call, atomic_read(&call->usage), - atomic_read(&call->ackr_not_idle), rxrpc_call_states[call->state], call->flags, call->events); - if (!skb_queue_empty(&call->rx_queue)) - pr_err("Rx queue occupied\n"); - if (!skb_queue_empty(&call->rx_oos_queue)) - pr_err("OOS queue occupied\n"); - write_unlock_bh(&rxrpc_call_lock); + write_unlock(&rxrpc_call_lock); cond_resched(); - write_lock_bh(&rxrpc_call_lock); + write_lock(&rxrpc_call_lock); } - write_unlock_bh(&rxrpc_call_lock); - _leave(""); -} - -/* - * handle call lifetime being exceeded - */ -static void rxrpc_call_life_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - rxrpc_see_call(call); - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - set_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events); - rxrpc_queue_call(call); -} - -/* - * handle resend timer expiry - * - may not take call->state_lock as this can deadlock against del_timer_sync() - */ -static void rxrpc_resend_time_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - rxrpc_see_call(call); - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - if (!test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) - rxrpc_queue_call(call); -} - -/* - * handle ACK timer expiry - */ -static void rxrpc_ack_time_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - rxrpc_see_call(call); - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) - rxrpc_queue_call(call); + write_unlock(&rxrpc_call_lock); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 8c7938ba6a84..0691007cfc02 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -15,10 +15,6 @@ #include #include #include -#include -#include -#include -#include #include #include #include @@ -140,16 +136,10 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, u32 abort_code, int error) { struct rxrpc_call *call; - bool queue; - int i, bit; + int i; _enter("{%d},%x", conn->debug_id, abort_code); - if (compl == RXRPC_CALL_LOCALLY_ABORTED) - bit = RXRPC_CALL_EV_CONN_ABORT; - else - bit = RXRPC_CALL_EV_RCVD_ABORT; - spin_lock(&conn->channel_lock); for (i = 0; i < RXRPC_MAXCALLS; i++) { @@ -157,22 +147,13 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, conn->channels[i].call, lockdep_is_held(&conn->channel_lock)); if (call) { - rxrpc_see_call(call); if (compl == RXRPC_CALL_LOCALLY_ABORTED) trace_rxrpc_abort("CON", call->cid, call->call_id, 0, abort_code, error); - - write_lock_bh(&call->state_lock); - if (rxrpc_set_call_completion(call, compl, abort_code, - error)) { - set_bit(bit, &call->events); - queue = true; - } - write_unlock_bh(&call->state_lock); - if (queue) - rxrpc_queue_call(call); - + if (rxrpc_set_call_completion(call, compl, + abort_code, error)) + rxrpc_notify_socket(call); } } @@ -251,17 +232,18 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, /* * mark a call as being on a now-secured channel - * - must be called with softirqs disabled + * - must be called with BH's disabled. */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { _enter("%p", call); if (call) { - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_SECURED, &call->events)) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); + write_lock_bh(&call->state_lock); + if (call->state == RXRPC_CALL_SERVER_SECURING) { + call->state = RXRPC_CALL_SERVER_ACCEPTING; + rxrpc_notify_socket(call); + } + write_unlock_bh(&call->state_lock); } } @@ -278,7 +260,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, int loop, ret; if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - kleave(" = -ECONNABORTED [%u]", conn->state); + _leave(" = -ECONNABORTED [%u]", conn->state); return -ECONNABORTED; } @@ -291,14 +273,14 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return 0; case RXRPC_PACKET_TYPE_ABORT: - if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0) + if (skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) < 0) return -EPROTO; abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); conn->state = RXRPC_CONN_REMOTELY_ABORTED; - rxrpc_abort_calls(conn, 0, RXRPC_CALL_REMOTELY_ABORTED, - abort_code); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, + abort_code, ECONNABORTED); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: @@ -323,14 +305,16 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { conn->state = RXRPC_CONN_SERVICE; + spin_unlock(&conn->state_lock); for (loop = 0; loop < RXRPC_MAXCALLS; loop++) rxrpc_call_is_secure( rcu_dereference_protected( conn->channels[loop].call, lockdep_is_held(&conn->channel_lock))); + } else { + spin_unlock(&conn->state_lock); } - spin_unlock(&conn->state_lock); spin_unlock(&conn->channel_lock); return 0; @@ -433,88 +417,3 @@ protocol_error: _leave(" [EPROTO]"); goto out; } - -/* - * put a packet up for transport-level abort - */ -void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) -{ - CHECK_SLAB_OKAY(&local->usage); - - skb_queue_tail(&local->reject_queue, skb); - rxrpc_queue_local(local); -} - -/* - * reject packets through the local endpoint - */ -void rxrpc_reject_packets(struct rxrpc_local *local) -{ - union { - struct sockaddr sa; - struct sockaddr_in sin; - } sa; - struct rxrpc_skb_priv *sp; - struct rxrpc_wire_header whdr; - struct sk_buff *skb; - struct msghdr msg; - struct kvec iov[2]; - size_t size; - __be32 code; - - _enter("%d", local->debug_id); - - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); - iov[1].iov_base = &code; - iov[1].iov_len = sizeof(code); - size = sizeof(whdr) + sizeof(code); - - msg.msg_name = &sa; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - memset(&sa, 0, sizeof(sa)); - sa.sa.sa_family = local->srx.transport.family; - switch (sa.sa.sa_family) { - case AF_INET: - msg.msg_namelen = sizeof(sa.sin); - break; - default: - msg.msg_namelen = 0; - break; - } - - memset(&whdr, 0, sizeof(whdr)); - whdr.type = RXRPC_PACKET_TYPE_ABORT; - - while ((skb = skb_dequeue(&local->reject_queue))) { - rxrpc_see_skb(skb); - sp = rxrpc_skb(skb); - switch (sa.sa.sa_family) { - case AF_INET: - sa.sin.sin_port = udp_hdr(skb)->source; - sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; - code = htonl(skb->priority); - - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.serviceId = htons(sp->hdr.serviceId); - whdr.flags = sp->hdr.flags; - whdr.flags ^= RXRPC_CLIENT_INITIATED; - whdr.flags &= RXRPC_CLIENT_INITIATED; - - kernel_sendmsg(local->socket, &msg, iov, 2, size); - break; - - default: - break; - } - - rxrpc_free_skb(skb); - } - - _leave(""); -} diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 8da82e3aa00e..ffa9addb97b2 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -169,7 +169,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, chan->last_abort = call->abort_code; chan->last_type = RXRPC_PACKET_TYPE_ABORT; } else { - chan->last_seq = call->rx_data_eaten; + chan->last_seq = call->rx_hard_ack; chan->last_type = RXRPC_PACKET_TYPE_ACK; } /* Sync with rxrpc_conn_retransmit(). */ @@ -191,6 +191,10 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; + spin_lock_bh(&conn->params.peer->lock); + hlist_del_init(&call->error_link); + spin_unlock_bh(&conn->params.peer->lock); + if (rxrpc_is_client_call(call)) return rxrpc_disconnect_client_call(call); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 189338a60457..83d54da4ce8b 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -65,9 +65,8 @@ done: * Insert a service connection into a peer's tree, thereby making it a target * for incoming packets. */ -static struct rxrpc_connection * -rxrpc_publish_service_conn(struct rxrpc_peer *peer, - struct rxrpc_connection *conn) +static void rxrpc_publish_service_conn(struct rxrpc_peer *peer, + struct rxrpc_connection *conn) { struct rxrpc_connection *cursor = NULL; struct rxrpc_conn_proto k = conn->proto; @@ -96,7 +95,7 @@ conn_published: set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags); write_sequnlock_bh(&peer->service_conn_lock); _leave(" = %d [new]", conn->debug_id); - return conn; + return; found_extant_conn: if (atomic_read(&cursor->usage) == 0) @@ -143,106 +142,30 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) } /* - * get a record of an incoming connection + * Set up an incoming connection. This is called in BH context with the RCU + * read lock held. */ -struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local, - struct sockaddr_rxrpc *srx, - struct sk_buff *skb) +void rxrpc_new_incoming_connection(struct rxrpc_connection *conn, + struct sk_buff *skb) { - struct rxrpc_connection *conn; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_peer *peer; - const char *new = "old"; _enter(""); - peer = rxrpc_lookup_peer(local, srx, GFP_NOIO); - if (!peer) { - _debug("no peer"); - return ERR_PTR(-EBUSY); - } - - ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED); - - rcu_read_lock(); - peer = rxrpc_lookup_peer_rcu(local, srx); - if (peer) { - conn = rxrpc_find_service_conn_rcu(peer, skb); - if (conn) { - if (sp->hdr.securityIndex != conn->security_ix) - goto security_mismatch_rcu; - if (rxrpc_get_connection_maybe(conn)) - goto found_extant_connection_rcu; - - /* The conn has expired but we can't remove it without - * the appropriate lock, so we attempt to replace it - * when we have a new candidate. - */ - } - - if (!rxrpc_get_peer_maybe(peer)) - peer = NULL; - } - rcu_read_unlock(); - - if (!peer) { - peer = rxrpc_lookup_peer(local, srx, GFP_NOIO); - if (!peer) - goto enomem; - } - - /* We don't have a matching record yet. */ - conn = rxrpc_alloc_connection(GFP_NOIO); - if (!conn) - goto enomem_peer; - conn->proto.epoch = sp->hdr.epoch; conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK; - conn->params.local = local; - conn->params.peer = peer; conn->params.service_id = sp->hdr.serviceId; conn->security_ix = sp->hdr.securityIndex; conn->out_clientflag = 0; - conn->state = RXRPC_CONN_SERVICE; - if (conn->params.service_id) + if (conn->security_ix) conn->state = RXRPC_CONN_SERVICE_UNSECURED; - - rxrpc_get_local(local); - - /* We maintain an extra ref on the connection whilst it is on - * the rxrpc_connections list. - */ - atomic_set(&conn->usage, 2); - - write_lock(&rxrpc_connection_lock); - list_add_tail(&conn->link, &rxrpc_connections); - list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); - write_unlock(&rxrpc_connection_lock); + else + conn->state = RXRPC_CONN_SERVICE; /* Make the connection a target for incoming packets. */ - rxrpc_publish_service_conn(peer, conn); - - new = "new"; - -success: - _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid); - _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage)); - return conn; - -found_extant_connection_rcu: - rcu_read_unlock(); - goto success; - -security_mismatch_rcu: - rcu_read_unlock(); - _leave(" = -EKEYREJECTED"); - return ERR_PTR(-EKEYREJECTED); + rxrpc_publish_service_conn(conn->params.peer, conn); -enomem_peer: - rxrpc_put_peer(peer); -enomem: - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); + _net("CONNECTION new %d {%x}", conn->debug_id, conn->proto.cid); } /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 5906579060cd..afeba98004b1 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1,6 +1,6 @@ /* RxRPC packet reception * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -27,549 +27,547 @@ #include #include "ar-internal.h" +static void rxrpc_proto_abort(const char *why, + struct rxrpc_call *call, rxrpc_seq_t seq) +{ + if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) { + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + rxrpc_queue_call(call); + } +} + /* - * queue a packet for recvmsg to pass to userspace - * - the caller must hold a lock on call->lock - * - must not be called with interrupts disabled (sk_filter() disables BH's) - * - eats the packet whether successful or not - * - there must be just one reference to the packet, which the caller passes to - * this function + * Apply a hard ACK by advancing the Tx window. */ -int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, - bool force, bool terminal) +static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to) { - struct rxrpc_skb_priv *sp; - struct rxrpc_sock *rx; - struct sock *sk; - int ret; + struct sk_buff *skb, *list = NULL; + int ix; - _enter(",,%d,%d", force, terminal); + spin_lock(&call->lock); - ASSERT(!irqs_disabled()); + while (before(call->tx_hard_ack, to)) { + call->tx_hard_ack++; + ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + rxrpc_see_skb(skb); + call->rxtx_buffer[ix] = NULL; + call->rxtx_annotations[ix] = 0; + skb->next = list; + list = skb; + } - sp = rxrpc_skb(skb); - ASSERTCMP(sp->call, ==, call); + spin_unlock(&call->lock); - /* if we've already posted the terminal message for a call, then we - * don't post any more */ - if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) { - _debug("already terminated"); - ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE); + while (list) { + skb = list; + list = skb->next; + skb->next = NULL; rxrpc_free_skb(skb); - return 0; } +} - /* The socket may go away under us */ - ret = 0; - rcu_read_lock(); - rx = rcu_dereference(call->socket); - if (!rx) - goto out; - sk = &rx->sk; - if (sock_flag(sk, SOCK_DEAD)) - goto out; +/* + * End the transmission phase of a call. + * + * This occurs when we get an ACKALL packet, the first DATA packet of a reply, + * or a final ACK packet. + */ +static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why) +{ + _enter(""); - if (!force) { - /* cast skb->rcvbuf to unsigned... It's pointless, but - * reduces number of warnings when compiling with -W - * --ANK */ -// ret = -ENOBUFS; -// if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= -// (unsigned int) sk->sk_rcvbuf) -// goto out; - - ret = sk_filter(sk, skb); - if (ret < 0) - goto out; + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + return true; + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + case RXRPC_CALL_SERVER_AWAIT_ACK: + break; + default: + rxrpc_proto_abort(abort_why, call, call->tx_top); + return false; } - spin_lock_bh(&sk->sk_receive_queue.lock); - if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) && - !test_bit(RXRPC_CALL_RELEASED, &call->flags) && - sk->sk_state != RXRPC_CLOSE) { - skb->destructor = rxrpc_packet_destructor; - skb->dev = NULL; - skb->sk = sk; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); - - if (terminal) { - _debug("<<<< TERMINAL MESSAGE >>>>"); - set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags); - } + rxrpc_rotate_tx_window(call, call->tx_top); - /* allow interception by a kernel service */ - if (skb->mark == RXRPC_SKB_MARK_NEW_CALL && - rx->notify_new_call) { - spin_unlock_bh(&sk->sk_receive_queue.lock); - skb_queue_tail(&call->knlrecv_queue, skb); - rx->notify_new_call(&rx->sk, NULL, 0); - } else if (call->notify_rx) { - spin_unlock_bh(&sk->sk_receive_queue.lock); - skb_queue_tail(&call->knlrecv_queue, skb); - call->notify_rx(&rx->sk, call, call->user_call_ID); - } else { - _net("post skb %p", skb); - __skb_queue_tail(&sk->sk_receive_queue, skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); + write_lock(&call->state_lock); - sk->sk_data_ready(sk); - } - skb = NULL; - } else { - spin_unlock_bh(&sk->sk_receive_queue.lock); + switch (call->state) { + default: + break; + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + break; + case RXRPC_CALL_SERVER_AWAIT_ACK: + __rxrpc_call_completed(call); + rxrpc_notify_socket(call); + break; } - ret = 0; -out: - rxrpc_free_skb(skb); - rcu_read_unlock(); + write_unlock(&call->state_lock); + _leave(" = ok"); + return true; +} + +/* + * Scan a jumbo packet to validate its structure and to work out how many + * subpackets it contains. + * + * A jumbo packet is a collection of consecutive packets glued together with + * little headers between that indicate how to change the initial header for + * each subpacket. + * + * RXRPC_JUMBO_PACKET must be set on all but the last subpacket - and all but + * the last are RXRPC_JUMBO_DATALEN in size. The last subpacket may be of any + * size. + */ +static bool rxrpc_validate_jumbo(struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = sp->offset; + unsigned int len = skb->data_len; + int nr_jumbo = 1; + u8 flags = sp->hdr.flags; + + do { + nr_jumbo++; + if (len - offset < RXRPC_JUMBO_SUBPKTLEN) + goto protocol_error; + if (flags & RXRPC_LAST_PACKET) + goto protocol_error; + offset += RXRPC_JUMBO_DATALEN; + if (skb_copy_bits(skb, offset, &flags, 1) < 0) + goto protocol_error; + offset += sizeof(struct rxrpc_jumbo_header); + } while (flags & RXRPC_JUMBO_PACKET); + + sp->nr_jumbo = nr_jumbo; + return true; - _leave(" = %d", ret); - return ret; +protocol_error: + return false; } /* - * process a DATA packet, posting the packet to the appropriate queue - * - eats the packet if successful + * Handle reception of a duplicate packet. + * + * We have to take care to avoid an attack here whereby we're given a series of + * jumbograms, each with a sequence number one before the preceding one and + * filled up to maximum UDP size. If they never send us the first packet in + * the sequence, they can cause us to have to hold on to around 2MiB of kernel + * space until the call times out. + * + * We limit the space usage by only accepting three duplicate jumbo packets per + * call. After that, we tell the other side we're no longer accepting jumbos + * (that information is encoded in the ACK packet). */ -static int rxrpc_fast_process_data(struct rxrpc_call *call, - struct sk_buff *skb, u32 seq) +static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, + u8 annotation, bool *_jumbo_dup) { - struct rxrpc_skb_priv *sp; - bool terminal; - int ret, ackbit, ack; - u32 serial; - u16 skew; - u8 flags; + /* Discard normal packets that are duplicates. */ + if (annotation == 0) + return; - _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq); + /* Skip jumbo subpackets that are duplicates. When we've had three or + * more partially duplicate jumbo packets, we refuse to take any more + * jumbos for this call. + */ + if (!*_jumbo_dup) { + call->nr_jumbo_dup++; + *_jumbo_dup = true; + } +} - sp = rxrpc_skb(skb); - ASSERTCMP(sp->call, ==, NULL); - flags = sp->hdr.flags; - serial = sp->hdr.serial; - skew = skb->priority; +/* + * Process a DATA packet, adding the packet to the Rx ring. + */ +static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, + u16 skew) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = sp->offset; + unsigned int ix; + rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; + rxrpc_seq_t seq = sp->hdr.seq, hard_ack; + bool immediate_ack = false, jumbo_dup = false, queued; + u16 len; + u8 ack = 0, flags, annotation = 0; - spin_lock(&call->lock); + _enter("{%u,%u},{%u,%u}", + call->rx_hard_ack, call->rx_top, skb->data_len, seq); - if (call->state > RXRPC_CALL_COMPLETE) - goto discard; + _proto("Rx DATA %%%u { #%u f=%02x }", + sp->hdr.serial, seq, sp->hdr.flags); - ASSERTCMP(call->rx_data_expect, >=, call->rx_data_post); - ASSERTCMP(call->rx_data_post, >=, call->rx_data_recv); - ASSERTCMP(call->rx_data_recv, >=, call->rx_data_eaten); + if (call->state >= RXRPC_CALL_COMPLETE) + return; - if (seq < call->rx_data_post) { - _debug("dup #%u [-%u]", seq, call->rx_data_post); - ack = RXRPC_ACK_DUPLICATE; - ret = -ENOBUFS; - goto discard_and_ack; - } + /* Received data implicitly ACKs all of the request packets we sent + * when we're acting as a client. + */ + if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY && + !rxrpc_end_tx_phase(call, "ETD")) + return; - /* we may already have the packet in the out of sequence queue */ - ackbit = seq - (call->rx_data_eaten + 1); - ASSERTCMP(ackbit, >=, 0); - if (__test_and_set_bit(ackbit, call->ackr_window)) { - _debug("dup oos #%u [%u,%u]", - seq, call->rx_data_eaten, call->rx_data_post); - ack = RXRPC_ACK_DUPLICATE; - goto discard_and_ack; - } + call->ackr_prev_seq = seq; - if (seq >= call->ackr_win_top) { - _debug("exceed #%u [%u]", seq, call->ackr_win_top); - __clear_bit(ackbit, call->ackr_window); + hard_ack = READ_ONCE(call->rx_hard_ack); + if (after(seq, hard_ack + call->rx_winsize)) { ack = RXRPC_ACK_EXCEEDS_WINDOW; - goto discard_and_ack; + ack_serial = serial; + goto ack; } - if (seq == call->rx_data_expect) { - clear_bit(RXRPC_CALL_EXPECT_OOS, &call->flags); - call->rx_data_expect++; - } else if (seq > call->rx_data_expect) { - _debug("oos #%u [%u]", seq, call->rx_data_expect); - call->rx_data_expect = seq + 1; - if (test_and_set_bit(RXRPC_CALL_EXPECT_OOS, &call->flags)) { - ack = RXRPC_ACK_OUT_OF_SEQUENCE; - goto enqueue_and_ack; + flags = sp->hdr.flags; + if (flags & RXRPC_JUMBO_PACKET) { + if (call->nr_jumbo_dup > 3) { + ack = RXRPC_ACK_NOSPACE; + ack_serial = serial; + goto ack; } - goto enqueue_packet; + annotation = 1; } - if (seq != call->rx_data_post) { - _debug("ahead #%u [%u]", seq, call->rx_data_post); - goto enqueue_packet; +next_subpacket: + queued = false; + ix = seq & RXRPC_RXTX_BUFF_MASK; + len = skb->data_len; + if (flags & RXRPC_JUMBO_PACKET) + len = RXRPC_JUMBO_DATALEN; + + if (flags & RXRPC_LAST_PACKET) { + if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) && + seq != call->rx_top) + return rxrpc_proto_abort("LSN", call, seq); + } else { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + after_eq(seq, call->rx_top)) + return rxrpc_proto_abort("LSA", call, seq); } - if (test_bit(RXRPC_CALL_RCVD_LAST, &call->flags)) - goto protocol_error; - - /* if the packet need security things doing to it, then it goes down - * the slow path */ - if (call->security_ix) - goto enqueue_packet; - - sp->call = call; - rxrpc_get_call_for_skb(call, skb); - terminal = ((flags & RXRPC_LAST_PACKET) && - !(flags & RXRPC_CLIENT_INITIATED)); - ret = rxrpc_queue_rcv_skb(call, skb, false, terminal); - if (ret < 0) { - if (ret == -ENOMEM || ret == -ENOBUFS) { - __clear_bit(ackbit, call->ackr_window); - ack = RXRPC_ACK_NOSPACE; - goto discard_and_ack; + if (before_eq(seq, hard_ack)) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; + goto skip; + } + + if (flags & RXRPC_REQUEST_ACK && !ack) { + ack = RXRPC_ACK_REQUESTED; + ack_serial = serial; + } + + if (call->rxtx_buffer[ix]) { + rxrpc_input_dup_data(call, seq, annotation, &jumbo_dup); + if (ack != RXRPC_ACK_DUPLICATE) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; } - goto out; + immediate_ack = true; + goto skip; } - skb = NULL; - sp = NULL; - - _debug("post #%u", seq); - ASSERTCMP(call->rx_data_post, ==, seq); - call->rx_data_post++; - - if (flags & RXRPC_LAST_PACKET) - set_bit(RXRPC_CALL_RCVD_LAST, &call->flags); - - /* if we've reached an out of sequence packet then we need to drain - * that queue into the socket Rx queue now */ - if (call->rx_data_post == call->rx_first_oos) { - _debug("drain rx oos now"); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); + /* Queue the packet. We use a couple of memory barriers here as need + * to make sure that rx_top is perceived to be set after the buffer + * pointer and that the buffer pointer is set after the annotation and + * the skb data. + * + * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() + * and also rxrpc_fill_out_ack(). + */ + rxrpc_get_skb(skb); + call->rxtx_annotations[ix] = annotation; + smp_wmb(); + call->rxtx_buffer[ix] = skb; + if (after(seq, call->rx_top)) + smp_store_release(&call->rx_top, seq); + queued = true; + + if (after_eq(seq, call->rx_expect_next)) { + if (after(seq, call->rx_expect_next)) { + _net("OOS %u > %u", seq, call->rx_expect_next); + ack = RXRPC_ACK_OUT_OF_SEQUENCE; + ack_serial = serial; + } + call->rx_expect_next = seq + 1; } - spin_unlock(&call->lock); - atomic_inc(&call->ackr_not_idle); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, skew, serial, false); - _leave(" = 0 [posted]"); - return 0; +skip: + offset += len; + if (flags & RXRPC_JUMBO_PACKET) { + if (skb_copy_bits(skb, offset, &flags, 1) < 0) + return rxrpc_proto_abort("XJF", call, seq); + offset += sizeof(struct rxrpc_jumbo_header); + seq++; + serial++; + annotation++; + if (flags & RXRPC_JUMBO_PACKET) + annotation |= RXRPC_RX_ANNO_JLAST; + + _proto("Rx DATA Jumbo %%%u", serial); + goto next_subpacket; + } -protocol_error: - ret = -EBADMSG; -out: - spin_unlock(&call->lock); - _leave(" = %d", ret); - return ret; + if (queued && flags & RXRPC_LAST_PACKET && !ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } -discard_and_ack: - _debug("discard and ACK packet %p", skb); - __rxrpc_propose_ACK(call, ack, skew, serial, true); -discard: - spin_unlock(&call->lock); - rxrpc_free_skb(skb); - _leave(" = 0 [discarded]"); - return 0; +ack: + if (ack) + rxrpc_propose_ACK(call, ack, skew, ack_serial, + immediate_ack, true); -enqueue_and_ack: - __rxrpc_propose_ACK(call, ack, skew, serial, true); -enqueue_packet: - _net("defer skb %p", skb); - spin_unlock(&call->lock); - skb_queue_tail(&call->rx_queue, skb); - atomic_inc(&call->ackr_not_idle); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); - _leave(" = 0 [queued]"); - return 0; + if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) + rxrpc_notify_socket(call); + _leave(" [queued]"); } /* - * assume an implicit ACKALL of the transmission phase of a client socket upon - * reception of the first reply packet + * Process the extra information that may be appended to an ACK packet */ -static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial) +static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, + struct rxrpc_ackinfo *ackinfo) { - write_lock_bh(&call->state_lock); - - switch (call->state) { - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; - call->acks_latest = serial; - - _debug("implicit ACKALL %%%u", call->acks_latest); - set_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events); - write_unlock_bh(&call->state_lock); + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_peer *peer; + unsigned int mtu; + + _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", + sp->hdr.serial, + ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), + ntohl(ackinfo->rwind), ntohl(ackinfo->jumbo_max)); + + if (call->tx_winsize > ntohl(ackinfo->rwind)) + call->tx_winsize = ntohl(ackinfo->rwind); + + mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU)); + + peer = call->peer; + if (mtu < peer->maxdata) { + spin_lock_bh(&peer->lock); + peer->maxdata = mtu; + peer->mtu = mtu + peer->hdrsize; + spin_unlock_bh(&peer->lock); + _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); + } +} - if (try_to_del_timer_sync(&call->resend_timer) >= 0) { - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_EV_RESEND, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); +/* + * Process individual soft ACKs. + * + * Each ACK in the array corresponds to one packet and can be either an ACK or + * a NAK. If we get find an explicitly NAK'd packet we resend immediately; + * packets that lie beyond the end of the ACK list are scheduled for resend by + * the timer on the basis that the peer might just not have processed them at + * the time the ACK was sent. + */ +static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, + rxrpc_seq_t seq, int nr_acks) +{ + bool resend = false; + int ix; + + for (; nr_acks > 0; nr_acks--, seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + switch (*acks) { + case RXRPC_ACK_TYPE_ACK: + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK; + break; + case RXRPC_ACK_TYPE_NACK: + if (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK) + continue; + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK; + resend = true; + break; + default: + return rxrpc_proto_abort("SFT", call, 0); } - break; - - default: - write_unlock_bh(&call->state_lock); - break; } + + if (resend && + !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); } /* - * post an incoming packet to the nominated call to deal with - * - must get rid of the sk_buff, either by freeing it or by queuing it + * Process an ACK packet. + * + * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet + * in the ACK array. Anything before that is hard-ACK'd and may be discarded. + * + * A hard-ACK means that a packet has been processed and may be discarded; a + * soft-ACK means that the packet may be discarded and retransmission + * requested. A phase is complete when all packets are hard-ACK'd. */ -void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) +static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, + u16 skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - __be32 wtmp; - u32 abort_code; - - _enter("%p,%p", call, skb); - - ASSERT(!irqs_disabled()); - -#if 0 // INJECT RX ERROR - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { - static int skip = 0; - if (++skip == 3) { - printk("DROPPED 3RD PACKET!!!!!!!!!!!!!\n"); - skip = 0; - goto free_packet; - } + union { + struct rxrpc_ackpacket ack; + struct rxrpc_ackinfo info; + u8 acks[RXRPC_MAXACKS]; + } buf; + rxrpc_seq_t first_soft_ack, hard_ack; + int nr_acks, offset; + + _enter(""); + + if (skb_copy_bits(skb, sp->offset, &buf.ack, sizeof(buf.ack)) < 0) { + _debug("extraction failure"); + return rxrpc_proto_abort("XAK", call, 0); } -#endif - - /* request ACK generation for any ACK or DATA packet that requests - * it */ - if (sp->hdr.flags & RXRPC_REQUEST_ACK) { - _proto("ACK Requested on %%%u", sp->hdr.serial); + sp->offset += sizeof(buf.ack); + + first_soft_ack = ntohl(buf.ack.firstPacket); + hard_ack = first_soft_ack - 1; + nr_acks = buf.ack.nAcks; + + _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", + sp->hdr.serial, + ntohs(buf.ack.maxSkew), + first_soft_ack, + ntohl(buf.ack.previousPacket), + ntohl(buf.ack.serial), + rxrpc_acks(buf.ack.reason), + buf.ack.nAcks); + + if (buf.ack.reason == RXRPC_ACK_PING) { + _proto("Rx ACK %%%u PING Request", sp->hdr.serial); + rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, + skew, sp->hdr.serial, true, true); + } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, - skb->priority, sp->hdr.serial, false); + skew, sp->hdr.serial, true, true); } - switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_ABORT: - _debug("abort"); - - if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0) - goto protocol_error; - - abort_code = ntohl(wtmp); - _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); - - if (__rxrpc_set_call_completion(call, - RXRPC_CALL_REMOTELY_ABORTED, - abort_code, ECONNABORTED)) { - set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - rxrpc_queue_call(call); - } - goto free_packet; - - case RXRPC_PACKET_TYPE_BUSY: - _proto("Rx BUSY %%%u", sp->hdr.serial); - - if (rxrpc_is_service_call(call)) - goto protocol_error; + offset = sp->offset + nr_acks + 3; + if (skb->data_len >= offset + sizeof(buf.info)) { + if (skb_copy_bits(skb, offset, &buf.info, sizeof(buf.info)) < 0) + return rxrpc_proto_abort("XAI", call, 0); + rxrpc_input_ackinfo(call, skb, &buf.info); + } - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - __rxrpc_set_call_completion(call, - RXRPC_CALL_SERVER_BUSY, - 0, EBUSY); - set_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events); - rxrpc_queue_call(call); - case RXRPC_CALL_SERVER_BUSY: - goto free_packet_unlock; - default: - goto protocol_error_locked; - } + if (first_soft_ack == 0) + return rxrpc_proto_abort("AK0", call, 0); + /* Ignore ACKs unless we are or have just been transmitting. */ + switch (call->state) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + case RXRPC_CALL_SERVER_SEND_REPLY: + case RXRPC_CALL_SERVER_AWAIT_ACK: + break; default: - _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial); - goto protocol_error; - - case RXRPC_PACKET_TYPE_DATA: - _proto("Rx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - - if (sp->hdr.seq == 0) - goto protocol_error; - - call->ackr_prev_seq = sp->hdr.seq; + return; + } - /* received data implicitly ACKs all of the request packets we - * sent when we're acting as a client */ - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) - rxrpc_assume_implicit_ackall(call, sp->hdr.serial); + /* Discard any out-of-order or duplicate ACKs. */ + if ((int)sp->hdr.serial - (int)call->acks_latest <= 0) { + _debug("discard ACK %d <= %d", + sp->hdr.serial, call->acks_latest); + return; + } + call->acks_latest = sp->hdr.serial; - switch (rxrpc_fast_process_data(call, skb, sp->hdr.seq)) { - case 0: - skb = NULL; - goto done; + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && + hard_ack == call->tx_top) { + rxrpc_end_tx_phase(call, "ETA"); + return; + } - default: - BUG(); + if (before(hard_ack, call->tx_hard_ack) || + after(hard_ack, call->tx_top)) + return rxrpc_proto_abort("AKW", call, 0); - /* data packet received beyond the last packet */ - case -EBADMSG: - goto protocol_error; - } + if (after(hard_ack, call->tx_hard_ack)) + rxrpc_rotate_tx_window(call, hard_ack); - case RXRPC_PACKET_TYPE_ACKALL: - case RXRPC_PACKET_TYPE_ACK: - /* ACK processing is done in process context */ - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - skb_queue_tail(&call->rx_queue, skb); - rxrpc_queue_call(call); - skb = NULL; - } - read_unlock_bh(&call->state_lock); - goto free_packet; - } + if (after(first_soft_ack, call->tx_top)) + return; -protocol_error: - _debug("protocol error"); - write_lock_bh(&call->state_lock); -protocol_error_locked: - if (__rxrpc_abort_call("FPR", call, 0, RX_PROTOCOL_ERROR, EPROTO)) - rxrpc_queue_call(call); -free_packet_unlock: - write_unlock_bh(&call->state_lock); -free_packet: - rxrpc_free_skb(skb); -done: - _leave(""); + if (nr_acks > call->tx_top - first_soft_ack + 1) + nr_acks = first_soft_ack - call->tx_top + 1; + if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0) + return rxrpc_proto_abort("XSA", call, 0); + rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks); } /* - * split up a jumbo data packet + * Process an ACKALL packet. */ -static void rxrpc_process_jumbo_packet(struct rxrpc_call *call, - struct sk_buff *jumbo) +static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) { - struct rxrpc_jumbo_header jhdr; - struct rxrpc_skb_priv *sp; - struct sk_buff *part; - - _enter(",{%u,%u}", jumbo->data_len, jumbo->len); - - sp = rxrpc_skb(jumbo); - - do { - sp->hdr.flags &= ~RXRPC_JUMBO_PACKET; - - /* make a clone to represent the first subpacket in what's left - * of the jumbo packet */ - part = skb_clone(jumbo, GFP_ATOMIC); - if (!part) { - /* simply ditch the tail in the event of ENOMEM */ - pskb_trim(jumbo, RXRPC_JUMBO_DATALEN); - break; - } - rxrpc_new_skb(part); - - pskb_trim(part, RXRPC_JUMBO_DATALEN); - - if (!pskb_pull(jumbo, RXRPC_JUMBO_DATALEN)) - goto protocol_error; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - if (skb_copy_bits(jumbo, 0, &jhdr, sizeof(jhdr)) < 0) - goto protocol_error; - if (!pskb_pull(jumbo, sizeof(jhdr))) - BUG(); + _proto("Rx ACKALL %%%u", sp->hdr.serial); - sp->hdr.seq += 1; - sp->hdr.serial += 1; - sp->hdr.flags = jhdr.flags; - sp->hdr._rsvd = ntohs(jhdr._rsvd); + rxrpc_end_tx_phase(call, "ETL"); +} - _proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1); +/* + * Process an ABORT packet. + */ +static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + __be32 wtmp; + u32 abort_code = RX_CALL_DEAD; - rxrpc_fast_process_packet(call, part); - part = NULL; + _enter(""); - } while (sp->hdr.flags & RXRPC_JUMBO_PACKET); + if (skb->len >= 4 && + skb_copy_bits(skb, sp->offset, &wtmp, sizeof(wtmp)) >= 0) + abort_code = ntohl(wtmp); - rxrpc_fast_process_packet(call, jumbo); - _leave(""); - return; + _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); -protocol_error: - _debug("protocol error"); - rxrpc_free_skb(part); - if (rxrpc_abort_call("PJP", call, sp->hdr.seq, - RX_PROTOCOL_ERROR, EPROTO)) - rxrpc_queue_call(call); - rxrpc_free_skb(jumbo); - _leave(""); + if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + abort_code, ECONNABORTED)) + rxrpc_notify_socket(call); } /* - * post an incoming packet to the appropriate call/socket to deal with - * - must get rid of the sk_buff, either by freeing it or by queuing it + * Process an incoming call packet. */ -static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn, - struct rxrpc_call *call, - struct sk_buff *skb) +static void rxrpc_input_call_packet(struct rxrpc_call *call, + struct sk_buff *skb, u16 skew) { - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); _enter("%p,%p", call, skb); - sp = rxrpc_skb(skb); - - _debug("extant call [%d]", call->state); - - read_lock(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_COMPLETE: - switch (call->completion) { - case RXRPC_CALL_LOCALLY_ABORTED: - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, - &call->events)) { - rxrpc_queue_call(call); - goto free_unlock; - } - default: - goto dead_call; - case RXRPC_CALL_SUCCEEDED: - if (rxrpc_is_service_call(call)) - goto dead_call; - goto resend_final_ack; - } - - case RXRPC_CALL_CLIENT_FINAL_ACK: - goto resend_final_ack; + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_DATA: + rxrpc_input_data(call, skb, skew); + break; - default: + case RXRPC_PACKET_TYPE_ACK: + rxrpc_input_ack(call, skb, skew); break; - } - read_unlock(&call->state_lock); + case RXRPC_PACKET_TYPE_BUSY: + _proto("Rx BUSY %%%u", sp->hdr.serial); - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - sp->hdr.flags & RXRPC_JUMBO_PACKET) - rxrpc_process_jumbo_packet(call, skb); - else - rxrpc_fast_process_packet(call, skb); + /* Just ignore BUSY packets from the server; the retry and + * lifespan timers will take care of business. BUSY packets + * from the client don't make sense. + */ + break; - goto done; + case RXRPC_PACKET_TYPE_ABORT: + rxrpc_input_abort(call, skb); + break; -resend_final_ack: - _debug("final ack again"); - set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - rxrpc_queue_call(call); - goto free_unlock; + case RXRPC_PACKET_TYPE_ACKALL: + rxrpc_input_ackall(call, skb); + break; -dead_call: - if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { - skb->priority = RX_CALL_DEAD; - rxrpc_reject_packet(conn->params.local, skb); - goto unlock; + default: + _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial); + break; } -free_unlock: - rxrpc_free_skb(skb); -unlock: - read_unlock(&call->state_lock); -done: + _leave(""); } @@ -600,6 +598,17 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, rxrpc_queue_local(local); } +/* + * put a packet up for transport-level abort + */ +static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) +{ + CHECK_SLAB_OKAY(&local->usage); + + skb_queue_tail(&local->reject_queue, skb); + rxrpc_queue_local(local); +} + /* * Extract the wire header from a packet and translate the byte order. */ @@ -611,8 +620,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) /* dig out the RxRPC connection details */ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) return -EBADMSG; - if (!pskb_pull(skb, sizeof(whdr))) - BUG(); memset(sp, 0, sizeof(*sp)); sp->hdr.epoch = ntohl(whdr.epoch); @@ -626,6 +633,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) sp->hdr.securityIndex = whdr.securityIndex; sp->hdr._rsvd = ntohs(whdr._rsvd); sp->hdr.serviceId = ntohs(whdr.serviceId); + sp->offset = sizeof(whdr); return 0; } @@ -637,19 +645,22 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * shut down and the local endpoint from going away, thus sk_user_data will not * be cleared until this function returns. */ -void rxrpc_data_ready(struct sock *sk) +void rxrpc_data_ready(struct sock *udp_sk) { struct rxrpc_connection *conn; + struct rxrpc_channel *chan; + struct rxrpc_call *call; struct rxrpc_skb_priv *sp; - struct rxrpc_local *local = sk->sk_user_data; + struct rxrpc_local *local = udp_sk->sk_user_data; struct sk_buff *skb; + unsigned int channel; int ret, skew; - _enter("%p", sk); + _enter("%p", udp_sk); ASSERT(!irqs_disabled()); - skb = skb_recv_datagram(sk, 0, 1, &ret); + skb = skb_recv_datagram(udp_sk, 0, 1, &ret); if (!skb) { if (ret == -EAGAIN) return; @@ -695,111 +706,122 @@ void rxrpc_data_ready(struct sock *sk) goto bad_message; } - if (sp->hdr.type == RXRPC_PACKET_TYPE_VERSION) { + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_VERSION: rxrpc_post_packet_to_local(local, skb); goto out; - } - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - (sp->hdr.callNumber == 0 || sp->hdr.seq == 0)) - goto bad_message; + case RXRPC_PACKET_TYPE_BUSY: + if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) + goto discard; + + case RXRPC_PACKET_TYPE_DATA: + if (sp->hdr.callNumber == 0) + goto bad_message; + if (sp->hdr.flags & RXRPC_JUMBO_PACKET && + !rxrpc_validate_jumbo(skb)) + goto bad_message; + break; + } rcu_read_lock(); conn = rxrpc_find_connection_rcu(local, skb); - if (!conn) { - skb->priority = 0; - goto cant_route_call; - } + if (conn) { + if (sp->hdr.securityIndex != conn->security_ix) + goto wrong_security; - /* Note the serial number skew here */ - skew = (int)sp->hdr.serial - (int)conn->hi_serial; - if (skew >= 0) { - if (skew > 0) - conn->hi_serial = sp->hdr.serial; - skb->priority = 0; - } else { - skew = -skew; - skb->priority = min(skew, 65535); - } + if (sp->hdr.callNumber == 0) { + /* Connection-level packet */ + _debug("CONN %p {%d}", conn, conn->debug_id); + rxrpc_post_packet_to_conn(conn, skb); + goto out_unlock; + } + + /* Note the serial number skew here */ + skew = (int)sp->hdr.serial - (int)conn->hi_serial; + if (skew >= 0) { + if (skew > 0) + conn->hi_serial = sp->hdr.serial; + } else { + skew = -skew; + skew = min(skew, 65535); + } - if (sp->hdr.callNumber == 0) { - /* Connection-level packet */ - _debug("CONN %p {%d}", conn, conn->debug_id); - rxrpc_post_packet_to_conn(conn, skb); - goto out_unlock; - } else { /* Call-bound packets are routed by connection channel. */ - unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK; - struct rxrpc_channel *chan = &conn->channels[channel]; - struct rxrpc_call *call; + channel = sp->hdr.cid & RXRPC_CHANNELMASK; + chan = &conn->channels[channel]; /* Ignore really old calls */ if (sp->hdr.callNumber < chan->last_call) goto discard_unlock; if (sp->hdr.callNumber == chan->last_call) { - /* For the previous service call, if completed - * successfully, we discard all further packets. + /* For the previous service call, if completed successfully, we + * discard all further packets. */ if (rxrpc_conn_is_service(conn) && (chan->last_type == RXRPC_PACKET_TYPE_ACK || sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)) goto discard_unlock; - /* But otherwise we need to retransmit the final packet - * from data cached in the connection record. + /* But otherwise we need to retransmit the final packet from + * data cached in the connection record. */ rxrpc_post_packet_to_conn(conn, skb); goto out_unlock; } call = rcu_dereference(chan->call); - if (!call || atomic_read(&call->usage) == 0) - goto cant_route_call; + } else { + skew = 0; + call = NULL; + } - rxrpc_see_call(call); - rxrpc_post_packet_to_call(conn, call, skb); - goto out_unlock; + if (!call || atomic_read(&call->usage) == 0) { + if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) || + sp->hdr.callNumber == 0 || + sp->hdr.type != RXRPC_PACKET_TYPE_DATA) + goto bad_message_unlock; + if (sp->hdr.seq != 1) + goto discard_unlock; + call = rxrpc_new_incoming_call(local, conn, skb); + if (!call) { + rcu_read_unlock(); + goto reject_packet; + } } + rxrpc_input_call_packet(call, skb, skew); + goto discard_unlock; + discard_unlock: - rxrpc_free_skb(skb); -out_unlock: rcu_read_unlock(); +discard: + rxrpc_free_skb(skb); out: trace_rxrpc_rx_done(0, 0); return; -cant_route_call: +out_unlock: rcu_read_unlock(); + goto out; - _debug("can't route call"); - if (sp->hdr.flags & RXRPC_CLIENT_INITIATED && - sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { - if (sp->hdr.seq == 1) { - _debug("first packet"); - skb_queue_tail(&local->accept_queue, skb); - rxrpc_queue_work(&local->processor); - _leave(" [incoming]"); - goto out; - } - skb->priority = RX_INVALID_OPERATION; - } else { - skb->priority = RX_CALL_DEAD; - } - - if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { - _debug("reject type %d",sp->hdr.type); - goto reject_packet; - } else { - rxrpc_free_skb(skb); - } - _leave(" [no call]"); - return; +wrong_security: + rcu_read_unlock(); + trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RXKADINCONSISTENCY, EBADMSG); + skb->priority = RXKADINCONSISTENCY; + goto post_abort; +bad_message_unlock: + rcu_read_unlock(); bad_message: + trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); skb->priority = RX_PROTOCOL_ERROR; +post_abort: + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; reject_packet: trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index a4aba0246731..7d4375e557e6 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -30,14 +30,18 @@ static int none_secure_packet(struct rxrpc_call *call, return 0; } -static int none_verify_packet(struct rxrpc_call *call, - struct sk_buff *skb, - rxrpc_seq_t seq, - u16 expected_cksum) +static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq, u16 expected_cksum) { return 0; } +static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ +} + static int none_respond_to_challenge(struct rxrpc_connection *conn, struct sk_buff *skb, u32 *_abort_code) @@ -79,6 +83,7 @@ const struct rxrpc_security rxrpc_no_security = { .prime_packet_security = none_prime_packet_security, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, + .locate_data = none_locate_data, .respond_to_challenge = none_respond_to_challenge, .verify_response = none_verify_response, .clear = none_clear, diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index bcc6593b4cdb..cdd58e6e9fbd 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -98,7 +98,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: - if (skb_copy_bits(skb, 0, &v, 1) < 0) + if (skb_copy_bits(skb, sp->offset, &v, 1) < 0) return; _proto("Rx VERSION { %02x }", v); if (v == 0) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 610916f4ae34..782b9adf67cb 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -77,7 +77,6 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) INIT_WORK(&local->processor, rxrpc_local_processor); INIT_HLIST_HEAD(&local->services); init_rwsem(&local->defrag_sem); - skb_queue_head_init(&local->accept_queue); skb_queue_head_init(&local->reject_queue); skb_queue_head_init(&local->event_queue); local->client_conns = RB_ROOT; @@ -308,7 +307,6 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) /* At this point, there should be no more packets coming in to the * local endpoint. */ - rxrpc_purge_queue(&local->accept_queue); rxrpc_purge_queue(&local->reject_queue); rxrpc_purge_queue(&local->event_queue); @@ -332,11 +330,6 @@ static void rxrpc_local_processor(struct work_struct *work) if (atomic_read(&local->usage) == 0) return rxrpc_local_destroyer(local); - if (!skb_queue_empty(&local->accept_queue)) { - rxrpc_accept_incoming_calls(local); - again = true; - } - if (!skb_queue_empty(&local->reject_queue)) { rxrpc_reject_packets(local); again = true; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 39e7cc37c392..fd096f742e4b 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -50,7 +50,7 @@ unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further * packets. */ -unsigned int rxrpc_rx_window_size = 32; +unsigned int rxrpc_rx_window_size = RXRPC_RXTX_BUFF_SIZE - 46; /* * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 8756d74fd74b..719a4c23f09d 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include "ar-internal.h" @@ -38,20 +40,38 @@ struct rxrpc_pkt_buffer { static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, struct rxrpc_pkt_buffer *pkt) { + rxrpc_seq_t hard_ack, top, seq; + int ix; u32 mtu, jmax; u8 *ackp = pkt->acks; + /* Barrier against rxrpc_input_data(). */ + hard_ack = READ_ONCE(call->rx_hard_ack); + top = smp_load_acquire(&call->rx_top); + pkt->ack.bufferSpace = htons(8); - pkt->ack.maxSkew = htons(0); - pkt->ack.firstPacket = htonl(call->rx_data_eaten + 1); + pkt->ack.maxSkew = htons(call->ackr_skew); + pkt->ack.firstPacket = htonl(hard_ack + 1); pkt->ack.previousPacket = htonl(call->ackr_prev_seq); pkt->ack.serial = htonl(call->ackr_serial); - pkt->ack.reason = RXRPC_ACK_IDLE; - pkt->ack.nAcks = 0; + pkt->ack.reason = call->ackr_reason; + pkt->ack.nAcks = top - hard_ack; + + if (after(top, hard_ack)) { + seq = hard_ack + 1; + do { + ix = seq & RXRPC_RXTX_BUFF_MASK; + if (call->rxtx_buffer[ix]) + *ackp++ = RXRPC_ACK_TYPE_ACK; + else + *ackp++ = RXRPC_ACK_TYPE_NACK; + seq++; + } while (before_eq(seq, top)); + } - mtu = call->peer->if_mtu; - mtu -= call->peer->hdrsize; - jmax = rxrpc_rx_jumbo_max; + mtu = call->conn->params.peer->if_mtu; + mtu -= call->conn->params.peer->hdrsize; + jmax = (call->nr_jumbo_dup > 3) ? 1 : rxrpc_rx_jumbo_max; pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); pkt->ackinfo.maxMTU = htonl(mtu); pkt->ackinfo.rwind = htonl(rxrpc_rx_window_size); @@ -60,11 +80,11 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, *ackp++ = 0; *ackp++ = 0; *ackp++ = 0; - return 3; + return top - hard_ack + 3; } /* - * Send a final ACK or ABORT call packet. + * Send an ACK or ABORT call packet. */ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) { @@ -158,6 +178,19 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); + if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) { + switch (pkt->whdr.type) { + case RXRPC_PACKET_TYPE_ACK: + rxrpc_propose_ACK(call, pkt->ack.reason, + ntohs(pkt->ack.maxSkew), + ntohl(pkt->ack.serial), + true, true); + break; + case RXRPC_PACKET_TYPE_ABORT: + break; + } + } + out: rxrpc_put_connection(conn); kfree(pkt); @@ -233,3 +266,77 @@ send_fragmentable: _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata); return ret; } + +/* + * reject packets through the local endpoint + */ +void rxrpc_reject_packets(struct rxrpc_local *local) +{ + union { + struct sockaddr sa; + struct sockaddr_in sin; + } sa; + struct rxrpc_skb_priv *sp; + struct rxrpc_wire_header whdr; + struct sk_buff *skb; + struct msghdr msg; + struct kvec iov[2]; + size_t size; + __be32 code; + + _enter("%d", local->debug_id); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = &code; + iov[1].iov_len = sizeof(code); + size = sizeof(whdr) + sizeof(code); + + msg.msg_name = &sa; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + memset(&sa, 0, sizeof(sa)); + sa.sa.sa_family = local->srx.transport.family; + switch (sa.sa.sa_family) { + case AF_INET: + msg.msg_namelen = sizeof(sa.sin); + break; + default: + msg.msg_namelen = 0; + break; + } + + memset(&whdr, 0, sizeof(whdr)); + whdr.type = RXRPC_PACKET_TYPE_ABORT; + + while ((skb = skb_dequeue(&local->reject_queue))) { + rxrpc_see_skb(skb); + sp = rxrpc_skb(skb); + switch (sa.sa.sa_family) { + case AF_INET: + sa.sin.sin_port = udp_hdr(skb)->source; + sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + code = htonl(skb->priority); + + whdr.epoch = htonl(sp->hdr.epoch); + whdr.cid = htonl(sp->hdr.cid); + whdr.callNumber = htonl(sp->hdr.callNumber); + whdr.serviceId = htons(sp->hdr.serviceId); + whdr.flags = sp->hdr.flags; + whdr.flags ^= RXRPC_CLIENT_INITIATED; + whdr.flags &= RXRPC_CLIENT_INITIATED; + + kernel_sendmsg(local->socket, &msg, iov, 2, size); + break; + + default: + break; + } + + rxrpc_free_skb(skb); + } + + _leave(""); +} diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 27b9ecad007e..c8948936c6fc 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -129,15 +129,14 @@ void rxrpc_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } + rxrpc_new_skb(skb); serr = SKB_EXT_ERR(skb); if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); - kfree_skb(skb); + rxrpc_free_skb(skb); return; } - rxrpc_new_skb(skb); - rcu_read_lock(); peer = rxrpc_lookup_peer_icmp_rcu(local, skb); if (peer && !rxrpc_get_peer_maybe(peer)) @@ -249,7 +248,6 @@ void rxrpc_peer_error_distributor(struct work_struct *work) container_of(work, struct rxrpc_peer, error_distributor); struct rxrpc_call *call; enum rxrpc_call_completion compl; - bool queue; int error; _enter(""); @@ -272,15 +270,8 @@ void rxrpc_peer_error_distributor(struct work_struct *work) hlist_del_init(&call->error_link); rxrpc_see_call(call); - queue = false; - write_lock(&call->state_lock); - if (__rxrpc_set_call_completion(call, compl, 0, error)) { - set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events); - queue = true; - } - write_unlock(&call->state_lock); - if (queue) - rxrpc_queue_call(call); + if (rxrpc_set_call_completion(call, compl, 0, error)) + rxrpc_notify_socket(call); } spin_unlock_bh(&peer->lock); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index aebc73ac16dc..2efe29a4c232 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -198,6 +198,32 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) return peer; } +/* + * Initialise peer record. + */ +static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) +{ + rxrpc_assess_MTU_size(peer); + peer->mtu = peer->if_mtu; + + if (peer->srx.transport.family == AF_INET) { + peer->hdrsize = sizeof(struct iphdr); + switch (peer->srx.transport_type) { + case SOCK_DGRAM: + peer->hdrsize += sizeof(struct udphdr); + break; + default: + BUG(); + break; + } + } else { + BUG(); + } + + peer->hdrsize += sizeof(struct rxrpc_wire_header); + peer->maxdata = peer->mtu - peer->hdrsize; +} + /* * Set up a new peer. */ @@ -214,29 +240,39 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, if (peer) { peer->hash_key = hash_key; memcpy(&peer->srx, srx, sizeof(*srx)); + rxrpc_init_peer(peer, hash_key); + } - rxrpc_assess_MTU_size(peer); - peer->mtu = peer->if_mtu; - - if (srx->transport.family == AF_INET) { - peer->hdrsize = sizeof(struct iphdr); - switch (srx->transport_type) { - case SOCK_DGRAM: - peer->hdrsize += sizeof(struct udphdr); - break; - default: - BUG(); - break; - } - } else { - BUG(); - } + _leave(" = %p", peer); + return peer; +} - peer->hdrsize += sizeof(struct rxrpc_wire_header); - peer->maxdata = peer->mtu - peer->hdrsize; +/* + * Set up a new incoming peer. The address is prestored in the preallocated + * peer. + */ +struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, + struct rxrpc_peer *prealloc) +{ + struct rxrpc_peer *peer; + unsigned long hash_key; + + hash_key = rxrpc_peer_hash_key(local, &prealloc->srx); + prealloc->local = local; + rxrpc_init_peer(prealloc, hash_key); + + spin_lock(&rxrpc_peer_hash_lock); + + /* Need to check that we aren't racing with someone else */ + peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key); + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + peer = prealloc; + hash_add_rcu(rxrpc_peer_hash, &peer->hash_link, hash_key); } - _leave(" = %p", peer); + spin_unlock(&rxrpc_peer_hash_lock); return peer; } @@ -272,7 +308,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, return NULL; } - spin_lock(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxrpc_peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); @@ -282,7 +318,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, hash_add_rcu(rxrpc_peer_hash, &candidate->hash_link, hash_key); - spin_unlock(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxrpc_peer_hash_lock); if (peer) kfree(candidate); @@ -307,9 +343,9 @@ void __rxrpc_put_peer(struct rxrpc_peer *peer) { ASSERT(hlist_empty(&peer->error_targets)); - spin_lock(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxrpc_peer_hash_lock); hash_del_rcu(&peer->hash_link); - spin_unlock(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxrpc_peer_hash_lock); kfree_rcu(peer, rcu); } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 6876ffb3b410..20d0b5c6f81b 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -19,319 +19,479 @@ #include "ar-internal.h" /* - * receive a message from an RxRPC socket - * - we need to be careful about two or more threads calling recvmsg - * simultaneously + * Post a call for attention by the socket or kernel service. Further + * notifications are suppressed by putting recvmsg_link on a dummy queue. */ -int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - int flags) +void rxrpc_notify_socket(struct rxrpc_call *call) { - struct rxrpc_skb_priv *sp; - struct rxrpc_call *call = NULL, *continue_call = NULL; - struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - struct sk_buff *skb; - long timeo; - int copy, ret, ullen, offset, copied = 0; - u32 abort_code; + struct rxrpc_sock *rx; + struct sock *sk; - DEFINE_WAIT(wait); + _enter("%d", call->debug_id); - _enter(",,,%zu,%d", len, flags); + if (!list_empty(&call->recvmsg_link)) + return; + + rcu_read_lock(); + + rx = rcu_dereference(call->socket); + sk = &rx->sk; + if (rx && sk->sk_state < RXRPC_CLOSE) { + if (call->notify_rx) { + call->notify_rx(sk, call, call->user_call_ID); + } else { + write_lock_bh(&rx->recvmsg_lock); + if (list_empty(&call->recvmsg_link)) { + rxrpc_get_call(call, rxrpc_call_got); + list_add_tail(&call->recvmsg_link, &rx->recvmsg_q); + } + write_unlock_bh(&rx->recvmsg_lock); - if (flags & (MSG_OOB | MSG_TRUNC)) - return -EOPNOTSUPP; + if (!sock_flag(sk, SOCK_DEAD)) { + _debug("call %ps", sk->sk_data_ready); + sk->sk_data_ready(sk); + } + } + } - ullen = msg->msg_flags & MSG_CMSG_COMPAT ? 4 : sizeof(unsigned long); + rcu_read_unlock(); + _leave(""); +} - timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT); - msg->msg_flags |= MSG_MORE; +/* + * Pass a call terminating message to userspace. + */ +static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) +{ + u32 tmp = 0; + int ret; - lock_sock(&rx->sk); + switch (call->completion) { + case RXRPC_CALL_SUCCEEDED: + ret = 0; + if (rxrpc_is_service_call(call)) + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &tmp); + break; + case RXRPC_CALL_REMOTELY_ABORTED: + tmp = call->abort_code; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); + break; + case RXRPC_CALL_LOCALLY_ABORTED: + tmp = call->abort_code; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); + break; + case RXRPC_CALL_NETWORK_ERROR: + tmp = call->error; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp); + break; + case RXRPC_CALL_LOCAL_ERROR: + tmp = call->error; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); + break; + default: + pr_err("Invalid terminal call state %u\n", call->state); + BUG(); + break; + } - for (;;) { - /* return immediately if a client socket has no outstanding - * calls */ - if (RB_EMPTY_ROOT(&rx->calls)) { - if (copied) - goto out; - if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) { - release_sock(&rx->sk); - if (continue_call) - rxrpc_put_call(continue_call, - rxrpc_call_put); - return -ENODATA; - } - } + return ret; +} - /* get the next message on the Rx queue */ - skb = skb_peek(&rx->sk.sk_receive_queue); - if (!skb) { - /* nothing remains on the queue */ - if (copied && - (flags & MSG_PEEK || timeo == 0)) - goto out; +/* + * Pass back notification of a new call. The call is added to the + * to-be-accepted list. This means that the next call to be accepted might not + * be the last call seen awaiting acceptance, but unless we leave this on the + * front of the queue and block all other messages until someone gives us a + * user_ID for it, there's not a lot we can do. + */ +static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct msghdr *msg, int flags) +{ + int tmp = 0, ret; - /* wait for a message to turn up */ - release_sock(&rx->sk); - prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, - TASK_INTERRUPTIBLE); - ret = sock_error(&rx->sk); - if (ret) - goto wait_error; - - if (skb_queue_empty(&rx->sk.sk_receive_queue)) { - if (signal_pending(current)) - goto wait_interrupted; - timeo = schedule_timeout(timeo); - } - finish_wait(sk_sleep(&rx->sk), &wait); - lock_sock(&rx->sk); - continue; - } + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &tmp); - peek_next_packet: - rxrpc_see_skb(skb); - sp = rxrpc_skb(skb); - call = sp->call; - ASSERT(call != NULL); - rxrpc_see_call(call); - - _debug("next pkt %s", rxrpc_pkts[sp->hdr.type]); - - /* make sure we wait for the state to be updated in this call */ - spin_lock_bh(&call->lock); - spin_unlock_bh(&call->lock); - - if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { - _debug("packet from released call"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); - continue; - } + if (ret == 0 && !(flags & MSG_PEEK)) { + _debug("to be accepted"); + write_lock_bh(&rx->recvmsg_lock); + list_del_init(&call->recvmsg_link); + write_unlock_bh(&rx->recvmsg_lock); - /* determine whether to continue last data receive */ - if (continue_call) { - _debug("maybe cont"); - if (call != continue_call || - skb->mark != RXRPC_SKB_MARK_DATA) { - release_sock(&rx->sk); - rxrpc_put_call(continue_call, rxrpc_call_put); - _leave(" = %d [noncont]", copied); - return copied; - } - } + write_lock(&rx->call_lock); + list_add_tail(&call->accept_link, &rx->to_be_accepted); + write_unlock(&rx->call_lock); + } - rxrpc_get_call(call, rxrpc_call_got); + return ret; +} - /* copy the peer address and timestamp */ - if (!continue_call) { - if (msg->msg_name) { - size_t len = - sizeof(call->conn->params.peer->srx); - memcpy(msg->msg_name, - &call->conn->params.peer->srx, len); - msg->msg_namelen = len; - } - sock_recv_timestamp(msg, &rx->sk, skb); - } +/* + * End the packet reception phase. + */ +static void rxrpc_end_rx_phase(struct rxrpc_call *call) +{ + _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); - /* receive the message */ - if (skb->mark != RXRPC_SKB_MARK_DATA) - goto receive_non_data_message; + if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + } else { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, false); + } - _debug("recvmsg DATA #%u { %d, %d }", - sp->hdr.seq, skb->len, sp->offset); + write_lock_bh(&call->state_lock); - if (!continue_call) { - /* only set the control data once per recvmsg() */ - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, - ullen, &call->user_call_ID); - if (ret < 0) - goto copy_error; - ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); - } + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + __rxrpc_call_completed(call); + break; - ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); - ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); - call->rx_data_recv = sp->hdr.seq; + case RXRPC_CALL_SERVER_RECV_REQUEST: + call->state = RXRPC_CALL_SERVER_ACK_REQUEST; + break; + default: + break; + } - ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); + write_unlock_bh(&call->state_lock); +} - offset = sp->offset; - copy = skb->len - offset; - if (copy > len - copied) - copy = len - copied; +/* + * Discard a packet we've used up and advance the Rx window by one. + */ +static void rxrpc_rotate_rx_window(struct rxrpc_call *call) +{ + struct sk_buff *skb; + rxrpc_seq_t hard_ack, top; + int ix; + + _enter("%d", call->debug_id); + + hard_ack = call->rx_hard_ack; + top = smp_load_acquire(&call->rx_top); + ASSERT(before(hard_ack, top)); + + hard_ack++; + ix = hard_ack & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + rxrpc_see_skb(skb); + call->rxtx_buffer[ix] = NULL; + call->rxtx_annotations[ix] = 0; + /* Barrier against rxrpc_input_data(). */ + smp_store_release(&call->rx_hard_ack, hard_ack); - ret = skb_copy_datagram_msg(skb, offset, msg, copy); + rxrpc_free_skb(skb); + _debug("%u,%u,%lx", hard_ack, top, call->flags); + if (hard_ack == top && test_bit(RXRPC_CALL_RX_LAST, &call->flags)) + rxrpc_end_rx_phase(call); +} + +/* + * Decrypt and verify a (sub)packet. The packet's length may be changed due to + * padding, but if this is the case, the packet length will be resident in the + * socket buffer. Note that we can't modify the master skb info as the skb may + * be the home to multiple subpackets. + */ +static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + u8 annotation, + unsigned int offset, unsigned int len) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + rxrpc_seq_t seq = sp->hdr.seq; + u16 cksum = sp->hdr.cksum; + + _enter(""); + + /* For all but the head jumbo subpacket, the security checksum is in a + * jumbo header immediately prior to the data. + */ + if ((annotation & RXRPC_RX_ANNO_JUMBO) > 1) { + __be16 tmp; + if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0) + BUG(); + cksum = ntohs(tmp); + seq += (annotation & RXRPC_RX_ANNO_JUMBO) - 1; + } + + return call->conn->security->verify_packet(call, skb, offset, len, + seq, cksum); +} + +/* + * Locate the data within a packet. This is complicated by: + * + * (1) An skb may contain a jumbo packet - so we have to find the appropriate + * subpacket. + * + * (2) The (sub)packets may be encrypted and, if so, the encrypted portion + * contains an extra header which includes the true length of the data, + * excluding any encrypted padding. + */ +static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + u8 *_annotation, + unsigned int *_offset, unsigned int *_len) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = *_offset; + unsigned int len = *_len; + int ret; + u8 annotation = *_annotation; + + if (offset > 0) + return 0; + + /* Locate the subpacket */ + offset = sp->offset; + len = skb->len - sp->offset; + if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) { + offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) * + RXRPC_JUMBO_SUBPKTLEN); + len = (annotation & RXRPC_RX_ANNO_JLAST) ? + skb->len - offset : RXRPC_JUMBO_SUBPKTLEN; + } + + if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) { + ret = rxrpc_verify_packet(call, skb, annotation, offset, len); if (ret < 0) - goto copy_error; + return ret; + *_annotation |= RXRPC_RX_ANNO_VERIFIED; + } - /* handle piecemeal consumption of data packets */ - _debug("copied %d+%d", copy, copied); + *_offset = offset; + *_len = len; + call->conn->security->locate_data(call, skb, _offset, _len); + return 0; +} - offset += copy; - copied += copy; +/* + * Deliver messages to a call. This keeps processing packets until the buffer + * is filled and we find either more DATA (returns 0) or the end of the DATA + * (returns 1). If more packets are required, it returns -EAGAIN. + */ +static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, struct iov_iter *iter, + size_t len, int flags, size_t *_offset) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + rxrpc_seq_t hard_ack, top, seq; + size_t remain; + bool last; + unsigned int rx_pkt_offset, rx_pkt_len; + int ix, copy, ret = 0; + + _enter(""); + + rx_pkt_offset = call->rx_pkt_offset; + rx_pkt_len = call->rx_pkt_len; + + /* Barriers against rxrpc_input_data(). */ + hard_ack = call->rx_hard_ack; + top = smp_load_acquire(&call->rx_top); + for (seq = hard_ack + 1; before_eq(seq, top); seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + if (!skb) + break; + smp_rmb(); + rxrpc_see_skb(skb); + sp = rxrpc_skb(skb); - if (!(flags & MSG_PEEK)) - sp->offset = offset; + if (msg) + sock_recv_timestamp(msg, sock->sk, skb); + + ret = rxrpc_locate_data(call, skb, &call->rxtx_annotations[ix], + &rx_pkt_offset, &rx_pkt_len); + _debug("recvmsg %x DATA #%u { %d, %d }", + sp->hdr.callNumber, seq, rx_pkt_offset, rx_pkt_len); + + /* We have to handle short, empty and used-up DATA packets. */ + remain = len - *_offset; + copy = rx_pkt_len; + if (copy > remain) + copy = remain; + if (copy > 0) { + ret = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, + copy); + if (ret < 0) + goto out; + + /* handle piecemeal consumption of data packets */ + _debug("copied %d @%zu", copy, *_offset); + + rx_pkt_offset += copy; + rx_pkt_len -= copy; + *_offset += copy; + } - if (sp->offset < skb->len) { + if (rx_pkt_len > 0) { _debug("buffer full"); - ASSERTCMP(copied, ==, len); + ASSERTCMP(*_offset, ==, len); break; } - /* we transferred the whole data packet */ + /* The whole packet has been transferred. */ + last = sp->hdr.flags & RXRPC_LAST_PACKET; if (!(flags & MSG_PEEK)) - rxrpc_kernel_data_consumed(call, skb); - - if (sp->hdr.flags & RXRPC_LAST_PACKET) { - _debug("last"); - if (rxrpc_conn_is_client(call->conn)) { - /* last byte of reply received */ - ret = copied; - goto terminal_message; - } + rxrpc_rotate_rx_window(call); + rx_pkt_offset = 0; + rx_pkt_len = 0; - /* last bit of request received */ - if (!(flags & MSG_PEEK)) { - _debug("eat packet"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != - skb) - BUG(); - rxrpc_free_skb(skb); - } - msg->msg_flags &= ~MSG_MORE; - break; - } + ASSERTIFCMP(last, seq, ==, top); + } - /* move on to the next data message */ - _debug("next"); - if (!continue_call) - continue_call = sp->call; - else - rxrpc_put_call(call, rxrpc_call_put); - call = NULL; - - if (flags & MSG_PEEK) { - _debug("peek next"); - skb = skb->next; - if (skb == (struct sk_buff *) &rx->sk.sk_receive_queue) - break; - goto peek_next_packet; - } + if (after(seq, top)) { + ret = -EAGAIN; + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags)) + ret = 1; + } +out: + if (!(flags & MSG_PEEK)) { + call->rx_pkt_offset = rx_pkt_offset; + call->rx_pkt_len = rx_pkt_len; + } + _leave(" = %d [%u/%u]", ret, seq, top); + return ret; +} - _debug("eat packet"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); +/* + * Receive a message from an RxRPC socket + * - we need to be careful about two or more threads calling recvmsg + * simultaneously + */ +int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) +{ + struct rxrpc_call *call; + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + struct list_head *l; + size_t copied = 0; + long timeo; + int ret; + + DEFINE_WAIT(wait); + + _enter(",,,%zu,%d", len, flags); + + if (flags & (MSG_OOB | MSG_TRUNC)) + return -EOPNOTSUPP; + + timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT); + +try_again: + lock_sock(&rx->sk); + + /* Return immediately if a client socket has no outstanding calls */ + if (RB_EMPTY_ROOT(&rx->calls) && + list_empty(&rx->recvmsg_q) && + rx->sk.sk_state != RXRPC_SERVER_LISTENING) { + release_sock(&rx->sk); + return -ENODATA; } - /* end of non-terminal data packet reception for the moment */ - _debug("end rcv data"); -out: - release_sock(&rx->sk); - if (call) - rxrpc_put_call(call, rxrpc_call_put); - if (continue_call) - rxrpc_put_call(continue_call, rxrpc_call_put); - _leave(" = %d [data]", copied); - return copied; - - /* handle non-DATA messages such as aborts, incoming connections and - * final ACKs */ -receive_non_data_message: - _debug("non-data"); - - if (skb->mark == RXRPC_SKB_MARK_NEW_CALL) { - _debug("RECV NEW CALL"); - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &abort_code); - if (ret < 0) - goto copy_error; - if (!(flags & MSG_PEEK)) { - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); + if (list_empty(&rx->recvmsg_q)) { + ret = -EWOULDBLOCK; + if (timeo == 0) + goto error_no_call; + + release_sock(&rx->sk); + + /* Wait for something to happen */ + prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, + TASK_INTERRUPTIBLE); + ret = sock_error(&rx->sk); + if (ret) + goto wait_error; + + if (list_empty(&rx->recvmsg_q)) { + if (signal_pending(current)) + goto wait_interrupted; + timeo = schedule_timeout(timeo); } - goto out; + finish_wait(sk_sleep(&rx->sk), &wait); + goto try_again; } - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, - ullen, &call->user_call_ID); - if (ret < 0) - goto copy_error; - ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); + /* Find the next call and dequeue it if we're not just peeking. If we + * do dequeue it, that comes with a ref that we will need to release. + */ + write_lock_bh(&rx->recvmsg_lock); + l = rx->recvmsg_q.next; + call = list_entry(l, struct rxrpc_call, recvmsg_link); + if (!(flags & MSG_PEEK)) + list_del_init(&call->recvmsg_link); + else + rxrpc_get_call(call, rxrpc_call_got); + write_unlock_bh(&rx->recvmsg_lock); - switch (skb->mark) { - case RXRPC_SKB_MARK_DATA: + _debug("recvmsg call %p", call); + + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); - case RXRPC_SKB_MARK_FINAL_ACK: - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &abort_code); - break; - case RXRPC_SKB_MARK_BUSY: - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code); - break; - case RXRPC_SKB_MARK_REMOTE_ABORT: - abort_code = call->abort_code; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); - break; - case RXRPC_SKB_MARK_LOCAL_ABORT: - abort_code = call->abort_code; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); - if (call->error) { - abort_code = call->error; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, - &abort_code); + + if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { + if (flags & MSG_CMSG_COMPAT) { + unsigned int id32 = call->user_call_ID; + + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned int), &id32); + } else { + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned long), + &call->user_call_ID); } + if (ret < 0) + goto error; + } + + if (msg->msg_name) { + size_t len = sizeof(call->conn->params.peer->srx); + memcpy(msg->msg_name, &call->conn->params.peer->srx, len); + msg->msg_namelen = len; + } + + switch (call->state) { + case RXRPC_CALL_SERVER_ACCEPTING: + ret = rxrpc_recvmsg_new_call(rx, call, msg, flags); break; - case RXRPC_SKB_MARK_NET_ERROR: - _debug("RECV NET ERROR %d", sp->error); - abort_code = sp->error; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &abort_code); - break; - case RXRPC_SKB_MARK_LOCAL_ERROR: - _debug("RECV LOCAL ERROR %d", sp->error); - abort_code = sp->error; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, - &abort_code); + case RXRPC_CALL_CLIENT_RECV_REPLY: + case RXRPC_CALL_SERVER_RECV_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, + flags, &copied); + if (ret == -EAGAIN) + ret = 0; break; default: - pr_err("Unknown packet mark %u\n", skb->mark); - BUG(); + ret = 0; break; } if (ret < 0) - goto copy_error; - -terminal_message: - _debug("terminal"); - msg->msg_flags &= ~MSG_MORE; - msg->msg_flags |= MSG_EOR; + goto error; - if (!(flags & MSG_PEEK)) { - _net("free terminal skb %p", skb); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); - rxrpc_release_call(rx, call); + if (call->state == RXRPC_CALL_COMPLETE) { + ret = rxrpc_recvmsg_term(call, msg); + if (ret < 0) + goto error; + if (!(flags & MSG_PEEK)) + rxrpc_release_call(rx, call); + msg->msg_flags |= MSG_EOR; + ret = 1; } - release_sock(&rx->sk); - rxrpc_put_call(call, rxrpc_call_put); - if (continue_call) - rxrpc_put_call(continue_call, rxrpc_call_put); - _leave(" = %d", ret); - return ret; + if (ret == 0) + msg->msg_flags |= MSG_MORE; + else + msg->msg_flags &= ~MSG_MORE; + ret = copied; -copy_error: - _debug("copy error"); - release_sock(&rx->sk); +error: rxrpc_put_call(call, rxrpc_call_put); - if (continue_call) - rxrpc_put_call(continue_call, rxrpc_call_put); +error_no_call: + release_sock(&rx->sk); _leave(" = %d", ret); return ret; @@ -339,85 +499,8 @@ wait_interrupted: ret = sock_intr_errno(timeo); wait_error: finish_wait(sk_sleep(&rx->sk), &wait); - if (continue_call) - rxrpc_put_call(continue_call, rxrpc_call_put); - if (copied) - copied = ret; - _leave(" = %d [waitfail %d]", copied, ret); - return copied; - -} - -/* - * Deliver messages to a call. This keeps processing packets until the buffer - * is filled and we find either more DATA (returns 0) or the end of the DATA - * (returns 1). If more packets are required, it returns -EAGAIN. - * - * TODO: Note that this is hacked in at the moment and will be replaced. - */ -static int temp_deliver_data(struct socket *sock, struct rxrpc_call *call, - struct iov_iter *iter, size_t size, - size_t *_offset) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - size_t remain; - int ret, copy; - - _enter("%d", call->debug_id); - -next: - local_bh_disable(); - skb = skb_dequeue(&call->knlrecv_queue); - local_bh_enable(); - if (!skb) { - if (test_bit(RXRPC_CALL_RX_NO_MORE, &call->flags)) - return 1; - _leave(" = -EAGAIN [empty]"); - return -EAGAIN; - } - - sp = rxrpc_skb(skb); - _debug("dequeued %p %u/%zu", skb, sp->offset, size); - - switch (skb->mark) { - case RXRPC_SKB_MARK_DATA: - remain = size - *_offset; - if (remain > 0) { - copy = skb->len - sp->offset; - if (copy > remain) - copy = remain; - ret = skb_copy_datagram_iter(skb, sp->offset, iter, - copy); - if (ret < 0) - goto requeue_and_leave; - - /* handle piecemeal consumption of data packets */ - sp->offset += copy; - *_offset += copy; - } - - if (sp->offset < skb->len) - goto partially_used_skb; - - /* We consumed the whole packet */ - ASSERTCMP(sp->offset, ==, skb->len); - if (sp->hdr.flags & RXRPC_LAST_PACKET) - set_bit(RXRPC_CALL_RX_NO_MORE, &call->flags); - rxrpc_kernel_data_consumed(call, skb); - rxrpc_free_skb(skb); - goto next; - - default: - rxrpc_free_skb(skb); - goto next; - } - -partially_used_skb: - ASSERTCMP(*_offset, ==, size); - ret = 0; -requeue_and_leave: - skb_queue_head(&call->knlrecv_queue, skb); + release_sock(&rx->sk); + _leave(" = %d [wait]", ret); return ret; } @@ -453,8 +536,9 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, struct kvec iov; int ret; - _enter("{%d,%s},%zu,%d", - call->debug_id, rxrpc_call_states[call->state], size, want_more); + _enter("{%d,%s},%zu/%zu,%d", + call->debug_id, rxrpc_call_states[call->state], + *_offset, size, want_more); ASSERTCMP(*_offset, <=, size); ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING); @@ -469,7 +553,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, case RXRPC_CALL_CLIENT_RECV_REPLY: case RXRPC_CALL_SERVER_RECV_REQUEST: case RXRPC_CALL_SERVER_ACK_REQUEST: - ret = temp_deliver_data(sock, call, &iter, size, _offset); + ret = rxrpc_recvmsg_data(sock, call, NULL, &iter, size, 0, + _offset); if (ret < 0) goto out; @@ -494,7 +579,6 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, goto call_complete; default: - *_offset = 0; ret = -EINPROGRESS; goto out; } diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 3777432df10b..ae392558829d 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -317,6 +317,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, * decrypt partial encryption on a packet (level 1 security) */ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, rxrpc_seq_t seq) { struct rxkad_level1_hdr sechdr; @@ -330,18 +331,20 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, _enter(""); - if (skb->len < 8) { + if (len < 8) { rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } - /* we want to decrypt the skbuff in-place */ + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ nsg = skb_cow_data(skb, 0, &trailer); if (nsg < 0 || nsg > 16) goto nomem; sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, 8); + skb_to_sgvec(skb, sg, offset, 8); /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); @@ -353,12 +356,12 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_zero(req); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) { + if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO); goto protocol_error; } - if (!skb_pull(skb, sizeof(sechdr))) - BUG(); + offset += sizeof(sechdr); + len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); data_size = buf & 0xffff; @@ -371,18 +374,16 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, goto protocol_error; } - /* shorten the packet to remove the padding */ - if (data_size > skb->len) { + if (data_size > len) { rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO); goto protocol_error; } - if (data_size < skb->len) - skb->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; protocol_error: + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO"); return -EPROTO; @@ -395,6 +396,7 @@ nomem: * wholly decrypt a packet (level 2 security) */ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, rxrpc_seq_t seq) { const struct rxrpc_key_token *token; @@ -409,12 +411,14 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, _enter(",{%d}", skb->len); - if (skb->len < 8) { + if (len < 8) { rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } - /* we want to decrypt the skbuff in-place */ + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ nsg = skb_cow_data(skb, 0, &trailer); if (nsg < 0) goto nomem; @@ -427,7 +431,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, skb->len); + skb_to_sgvec(skb, sg, offset, len); /* decrypt from the session key */ token = call->conn->params.key->payload.data[0]; @@ -435,19 +439,19 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_set_tfm(req, call->conn->cipher); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x); + skcipher_request_set_crypt(req, sg, sg, len, iv.x); crypto_skcipher_decrypt(req); skcipher_request_zero(req); if (sg != _sg) kfree(sg); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) { + if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO); goto protocol_error; } - if (!skb_pull(skb, sizeof(sechdr))) - BUG(); + offset += sizeof(sechdr); + len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); data_size = buf & 0xffff; @@ -460,17 +464,16 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, goto protocol_error; } - if (data_size > skb->len) { + if (data_size > len) { rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO); goto protocol_error; } - if (data_size < skb->len) - skb->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; protocol_error: + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO"); return -EPROTO; @@ -484,6 +487,7 @@ nomem: * jumbo packet). */ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, rxrpc_seq_t seq, u16 expected_cksum) { SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); @@ -521,6 +525,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, if (cksum != expected_cksum) { rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO [csum failed]"); return -EPROTO; } @@ -529,14 +534,60 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, case RXRPC_SECURITY_PLAIN: return 0; case RXRPC_SECURITY_AUTH: - return rxkad_verify_packet_1(call, skb, seq); + return rxkad_verify_packet_1(call, skb, offset, len, seq); case RXRPC_SECURITY_ENCRYPT: - return rxkad_verify_packet_2(call, skb, seq); + return rxkad_verify_packet_2(call, skb, offset, len, seq); default: return -ENOANO; } } +/* + * Locate the data contained in a packet that was partially encrypted. + */ +static void rxkad_locate_data_1(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + struct rxkad_level1_hdr sechdr; + + if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) + BUG(); + *_offset += sizeof(sechdr); + *_len = ntohl(sechdr.data_size) & 0xffff; +} + +/* + * Locate the data contained in a packet that was completely encrypted. + */ +static void rxkad_locate_data_2(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + struct rxkad_level2_hdr sechdr; + + if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) + BUG(); + *_offset += sizeof(sechdr); + *_len = ntohl(sechdr.data_size) & 0xffff; +} + +/* + * Locate the data contained in an already decrypted packet. + */ +static void rxkad_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + switch (call->conn->params.security_level) { + case RXRPC_SECURITY_AUTH: + rxkad_locate_data_1(call, skb, _offset, _len); + return; + case RXRPC_SECURITY_ENCRYPT: + rxkad_locate_data_2(call, skb, _offset, _len); + return; + default: + return; + } +} + /* * issue a challenge */ @@ -704,7 +755,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, struct rxkad_challenge challenge; struct rxkad_response resp __attribute__((aligned(8))); /* must be aligned for crypto */ - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); u32 version, nonce, min_level, abort_code; int ret; @@ -722,8 +773,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, } abort_code = RXKADPACKETSHORT; - sp = rxrpc_skb(skb); - if (skb_copy_bits(skb, 0, &challenge, sizeof(challenge)) < 0) + if (skb_copy_bits(skb, sp->offset, &challenge, sizeof(challenge)) < 0) goto protocol_error; version = ntohl(challenge.version); @@ -969,7 +1019,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, { struct rxkad_response response __attribute__((aligned(8))); /* must be aligned for crypto */ - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; time_t expiry; void *ticket; @@ -980,7 +1030,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, 0, &response, sizeof(response)) < 0) + if (skb_copy_bits(skb, sp->offset, &response, sizeof(response)) < 0) goto protocol_error; if (!pskb_pull(skb, sizeof(response))) BUG(); @@ -988,7 +1038,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, version = ntohl(response.version); ticket_len = ntohl(response.ticket_len); kvno = ntohl(response.kvno); - sp = rxrpc_skb(skb); _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", sp->hdr.serial, version, kvno, ticket_len); @@ -1010,7 +1059,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, return -ENOMEM; abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, 0, ticket, ticket_len) < 0) + if (skb_copy_bits(skb, sp->offset, ticket, ticket_len) < 0) goto protocol_error_free; ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key, @@ -1135,6 +1184,7 @@ const struct rxrpc_security rxkad = { .prime_packet_security = rxkad_prime_packet_security, .secure_packet = rxkad_secure_packet, .verify_packet = rxkad_verify_packet, + .locate_data = rxkad_locate_data, .issue_challenge = rxkad_issue_challenge, .respond_to_challenge = rxkad_respond_to_challenge, .verify_response = rxkad_verify_response, diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 5d79d5a9c944..82d8134e9287 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -130,20 +130,20 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) } /* find the service */ - read_lock_bh(&local->services_lock); + read_lock(&local->services_lock); hlist_for_each_entry(rx, &local->services, listen_link) { if (rx->srx.srx_service == conn->params.service_id) goto found_service; } /* the service appears to have died */ - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = -ENOENT"); return -ENOENT; found_service: if (!rx->securities) { - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = -ENOKEY"); return -ENOKEY; } @@ -152,13 +152,13 @@ found_service: kref = keyring_search(make_key_ref(rx->securities, 1UL), &key_type_rxrpc_s, kdesc); if (IS_ERR(kref)) { - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = %ld [search]", PTR_ERR(kref)); return PTR_ERR(kref); } key = key_ref_to_ptr(kref); - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); conn->server_key = key; conn->security = sec; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 9a4af992fcdf..cba236575073 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include "ar-internal.h" @@ -38,19 +37,20 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, DECLARE_WAITQUEUE(myself, current); int ret; - _enter(",{%d},%ld", - CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), - call->acks_winsz), - *timeo); + _enter(",{%u,%u,%u}", + call->tx_hard_ack, call->tx_top, call->tx_winsize); add_wait_queue(&call->waitq, &myself); for (;;) { set_current_state(TASK_INTERRUPTIBLE); ret = 0; - if (CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), - call->acks_winsz) > 0) + if (call->tx_top - call->tx_hard_ack < call->tx_winsize) break; + if (call->state >= RXRPC_CALL_COMPLETE) { + ret = -call->error; + break; + } if (signal_pending(current)) { ret = sock_intr_errno(*timeo); break; @@ -68,36 +68,44 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, } /* - * attempt to schedule an instant Tx resend + * Schedule an instant Tx resend. */ -static inline void rxrpc_instant_resend(struct rxrpc_call *call) +static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix) { - read_lock_bh(&call->state_lock); - if (try_to_del_timer_sync(&call->resend_timer) >= 0) { - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) + spin_lock_bh(&call->lock); + + if (call->state < RXRPC_CALL_COMPLETE) { + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; + if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) rxrpc_queue_call(call); } - read_unlock_bh(&call->state_lock); + + spin_unlock_bh(&call->lock); } /* - * queue a packet for transmission, set the resend timer and attempt - * to send the packet immediately + * Queue a DATA packet for transmission, set the resend timeout and send the + * packet immediately */ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, bool last) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - int ret; + rxrpc_seq_t seq = sp->hdr.seq; + int ret, ix; + + _net("queue skb %p [%d]", skb, seq); - _net("queue skb %p [%d]", skb, call->acks_head); + ASSERTCMP(seq, ==, call->tx_top + 1); - ASSERT(call->acks_window != NULL); - call->acks_window[call->acks_head] = (unsigned long) skb; + ix = seq & RXRPC_RXTX_BUFF_MASK; + rxrpc_get_skb(skb); + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK; smp_wmb(); - call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1); + call->rxtx_buffer[ix] = skb; + call->tx_top = seq; + if (last) + set_bit(RXRPC_CALL_TX_LAST, &call->flags); if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { _debug("________awaiting reply/ACK__________"); @@ -121,34 +129,17 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - sp->need_resend = false; - sp->resend_at = jiffies + rxrpc_resend_timeout; - if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) { - _debug("run timer"); - call->resend_timer.expires = sp->resend_at; - add_timer(&call->resend_timer); - } - - /* attempt to cancel the rx-ACK timer, deferring reply transmission if - * we're ACK'ing the request phase of an incoming call */ - ret = -EAGAIN; - if (try_to_del_timer_sync(&call->ack_timer) >= 0) { - /* the packet may be freed by rxrpc_process_call() before this - * returns */ - if (rxrpc_is_client_call(call)) - rxrpc_expose_client_call(call); - ret = rxrpc_send_data_packet(call->conn, skb); - _net("sent skb %p", skb); - } else { - _debug("failed to delete ACK timer"); - } + if (seq == 1 && rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); + sp->resend_at = jiffies + rxrpc_resend_timeout; + ret = rxrpc_send_data_packet(call->conn, skb); if (ret < 0) { _debug("need instant resend %d", ret); - sp->need_resend = true; - rxrpc_instant_resend(call); + rxrpc_instant_resend(call, ix); } + rxrpc_free_skb(skb); _leave(""); } @@ -212,9 +203,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, _debug("alloc"); - if (CIRC_SPACE(call->acks_head, - ACCESS_ONCE(call->acks_tail), - call->acks_winsz) <= 0) { + if (call->tx_top - call->tx_hard_ack >= + call->tx_winsize) { ret = -EAGAIN; if (msg->msg_flags & MSG_DONTWAIT) goto maybe_error; @@ -313,7 +303,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, memset(skb_put(skb, pad), 0, pad); } - seq = atomic_inc_return(&call->sequence); + seq = call->tx_top + 1; sp->hdr.epoch = conn->proto.epoch; sp->hdr.cid = call->cid; @@ -329,9 +319,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, sp->hdr.flags = conn->out_clientflag; if (msg_data_left(msg) == 0 && !more) sp->hdr.flags |= RXRPC_LAST_PACKET; - else if (CIRC_SPACE(call->acks_head, - ACCESS_ONCE(call->acks_tail), - call->acks_winsz) > 1) + else if (call->tx_top - call->tx_hard_ack < + call->tx_winsize) sp->hdr.flags |= RXRPC_MORE_PACKETS; if (more && seq & 1) sp->hdr.flags |= RXRPC_REQUEST_ACK; @@ -358,7 +347,7 @@ out: call_terminated: rxrpc_free_skb(skb); _leave(" = %d", -call->error); - return ret; + return -call->error; maybe_error: if (copied) @@ -451,29 +440,6 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, return 0; } -/* - * abort a call, sending an ABORT packet to the peer - */ -static void rxrpc_send_abort(struct rxrpc_call *call, const char *why, - u32 abort_code, int error) -{ - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - write_lock_bh(&call->state_lock); - - if (__rxrpc_abort_call(why, call, 0, abort_code, error)) { - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - rxrpc_queue_call(call); - } - - write_unlock_bh(&call->state_lock); -} - /* * Create a new client call for sendmsg(). */ @@ -549,7 +515,6 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) return PTR_ERR(call); } - rxrpc_see_call(call); _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); @@ -557,8 +522,10 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /* it's too late for this call */ ret = -ESHUTDOWN; } else if (cmd == RXRPC_CMD_SEND_ABORT) { - rxrpc_send_abort(call, "CMD", abort_code, ECONNABORTED); ret = 0; + if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED)) + ret = rxrpc_send_call_packet(call, + RXRPC_PACKET_TYPE_ABORT); } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else if (rxrpc_is_client_call(call) && @@ -639,7 +606,8 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, lock_sock(sock->sk); - rxrpc_send_abort(call, why, abort_code, error); + if (rxrpc_abort_call(why, call, 0, abort_code, error)) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); release_sock(sock->sk); _leave(""); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 9b8f8456d3bf..620d9ccaf3c1 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -18,133 +18,6 @@ #include #include "ar-internal.h" -/* - * set up for the ACK at the end of the receive phase when we discard the final - * receive phase data packet - * - called with softirqs disabled - */ -static void rxrpc_request_final_ACK(struct rxrpc_call *call) -{ - /* the call may be aborted before we have a chance to ACK it */ - write_lock(&call->state_lock); - - switch (call->state) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - call->state = RXRPC_CALL_CLIENT_FINAL_ACK; - _debug("request final ACK"); - - set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - if (try_to_del_timer_sync(&call->ack_timer) >= 0) - rxrpc_queue_call(call); - break; - - case RXRPC_CALL_SERVER_RECV_REQUEST: - call->state = RXRPC_CALL_SERVER_ACK_REQUEST; - default: - break; - } - - write_unlock(&call->state_lock); -} - -/* - * drop the bottom ACK off of the call ACK window and advance the window - */ -static void rxrpc_hard_ACK_data(struct rxrpc_call *call, struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - int loop; - u32 seq; - - spin_lock_bh(&call->lock); - - _debug("hard ACK #%u", sp->hdr.seq); - - for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) { - call->ackr_window[loop] >>= 1; - call->ackr_window[loop] |= - call->ackr_window[loop + 1] << (BITS_PER_LONG - 1); - } - - seq = sp->hdr.seq; - ASSERTCMP(seq, ==, call->rx_data_eaten + 1); - call->rx_data_eaten = seq; - - if (call->ackr_win_top < UINT_MAX) - call->ackr_win_top++; - - ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE, - call->rx_data_post, >=, call->rx_data_recv); - ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE, - call->rx_data_recv, >=, call->rx_data_eaten); - - if (sp->hdr.flags & RXRPC_LAST_PACKET) { - rxrpc_request_final_ACK(call); - } else if (atomic_dec_and_test(&call->ackr_not_idle) && - test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) { - /* We previously soft-ACK'd some received packets that have now - * been consumed, so send a hard-ACK if no more packets are - * immediately forthcoming to allow the transmitter to free up - * its Tx bufferage. - */ - _debug("send Rx idle ACK"); - __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, - skb->priority, sp->hdr.serial, false); - } - - spin_unlock_bh(&call->lock); -} - -/** - * rxrpc_kernel_data_consumed - Record consumption of data message - * @call: The call to which the message pertains. - * @skb: Message holding data - * - * Record the consumption of a data message and generate an ACK if appropriate. - * The call state is shifted if this was the final packet. The caller must be - * in process context with no spinlocks held. - * - * TODO: Actually generate the ACK here rather than punting this to the - * workqueue. - */ -void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - _enter("%d,%p{%u}", call->debug_id, skb, sp->hdr.seq); - - ASSERTCMP(sp->call, ==, call); - ASSERTCMP(sp->hdr.type, ==, RXRPC_PACKET_TYPE_DATA); - - /* TODO: Fix the sequence number tracking */ - ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); - ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); - ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); - - call->rx_data_recv = sp->hdr.seq; - rxrpc_hard_ACK_data(call, skb); -} - -/* - * Destroy a packet that has an RxRPC control buffer - */ -void rxrpc_packet_destructor(struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_call *call = sp->call; - - _enter("%p{%p}", skb, call); - - if (call) { - rxrpc_put_call_for_skb(call, skb); - sp->call = NULL; - } - - if (skb->sk) - sock_rfree(skb); - _leave(""); -} - /* * Note the existence of a new-to-us socket buffer (allocated or dequeued). */ -- cgit v1.2.3 From 9f5afeae51526b3ad7b7cb21ee8b145ce6ea7a7a Mon Sep 17 00:00:00 2001 From: Yaogong Wang Date: Wed, 7 Sep 2016 14:49:28 -0700 Subject: tcp: use an RB tree for ooo receive queue Over the years, TCP BDP has increased by several orders of magnitude, and some people are considering to reach the 2 Gbytes limit. Even with current window scale limit of 14, ~1 Gbytes maps to ~740,000 MSS. In presence of packet losses (or reorders), TCP stores incoming packets into an out of order queue, and number of skbs sitting there waiting for the missing packets to be received can be in the 10^5 range. Most packets are appended to the tail of this queue, and when packets can finally be transferred to receive queue, we scan the queue from its head. However, in presence of heavy losses, we might have to find an arbitrary point in this queue, involving a linear scan for every incoming packet, throwing away cpu caches. This patch converts it to a RB tree, to get bounded latencies. Yaogong wrote a preliminary patch about 2 years ago. Eric did the rebase, added ofo_last_skb cache, polishing and tests. Tested with network dropping between 1 and 10 % packets, with good success (about 30 % increase of throughput in stress tests) Next step would be to also use an RB tree for the write queue at sender side ;) Signed-off-by: Yaogong Wang Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Ilpo Järvinen Acked-By: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 + include/linux/tcp.h | 7 +- include/net/tcp.h | 2 +- net/core/skbuff.c | 19 +++ net/ipv4/tcp.c | 4 +- net/ipv4/tcp_input.c | 330 +++++++++++++++++++++++++++-------------------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 1 - 8 files changed, 218 insertions(+), 149 deletions(-) (limited to 'include/net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cfb7219be665..4c5662f05bda 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2402,6 +2402,8 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } +void skb_rbtree_purge(struct rb_root *root); + void *netdev_alloc_frag(unsigned int fragsz); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7be9b1242354..c723a465125d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -281,10 +281,9 @@ struct tcp_sock { struct sk_buff* lost_skb_hint; struct sk_buff *retransmit_skb_hint; - /* OOO segments go in this list. Note that socket lock must be held, - * as we do not use sk_buff_head lock. - */ - struct sk_buff_head out_of_order_queue; + /* OOO segments go in this rbtree. Socket lock must be held. */ + struct rb_root out_of_order_queue; + struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */ /* SACKs data, these 2 need to be together (see tcp_options_write) */ struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ diff --git a/include/net/tcp.h b/include/net/tcp.h index d6ae36512429..fdfbedd61c67 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -640,7 +640,7 @@ static inline void tcp_fast_path_check(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (skb_queue_empty(&tp->out_of_order_queue) && + if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && tp->rcv_wnd && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && !tp->urg_data) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3864b4b68fa1..1e329d411242 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2444,6 +2444,25 @@ void skb_queue_purge(struct sk_buff_head *list) } EXPORT_SYMBOL(skb_queue_purge); +/** + * skb_rbtree_purge - empty a skb rbtree + * @root: root of the rbtree to empty + * + * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from + * the list and one reference dropped. This function does not take + * any lock. Synchronization should be handled by the caller (e.g., TCP + * out-of-order queue is protected by the socket lock). + */ +void skb_rbtree_purge(struct rb_root *root) +{ + struct sk_buff *skb, *next; + + rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode) + kfree_skb(skb); + + *root = RB_ROOT; +} + /** * skb_queue_head - queue a buffer at the list head * @list: list to use diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 77311a92275c..a13fcb369f52 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -380,7 +380,7 @@ void tcp_init_sock(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - __skb_queue_head_init(&tp->out_of_order_queue); + tp->out_of_order_queue = RB_ROOT; tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); INIT_LIST_HEAD(&tp->tsq_node); @@ -2243,7 +2243,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); tcp_write_queue_purge(sk); - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); inet->inet_dport = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8cd02c0b056c..a5934c4c8cd4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4108,7 +4108,7 @@ void tcp_fin(struct sock *sk) /* It _is_ possible, that we have something out-of-order _after_ FIN. * Probably, we should reset in this case. For now drop them. */ - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); sk_mem_reclaim(sk); @@ -4268,7 +4268,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) int this_sack; /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ - if (skb_queue_empty(&tp->out_of_order_queue)) { + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) { tp->rx_opt.num_sacks = 0; return; } @@ -4344,10 +4344,13 @@ static void tcp_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); __u32 dsack_high = tp->rcv_nxt; + bool fin, fragstolen, eaten; struct sk_buff *skb, *tail; - bool fragstolen, eaten; + struct rb_node *p; - while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) { + p = rb_first(&tp->out_of_order_queue); + while (p) { + skb = rb_entry(p, struct sk_buff, rbnode); if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) break; @@ -4357,9 +4360,10 @@ static void tcp_ofo_queue(struct sock *sk) dsack_high = TCP_SKB_CB(skb)->end_seq; tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); } + p = rb_next(p); + rb_erase(&skb->rbnode, &tp->out_of_order_queue); - __skb_unlink(skb, &tp->out_of_order_queue); - if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { + if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { SOCK_DEBUG(sk, "ofo packet was already received\n"); tcp_drop(sk, skb); continue; @@ -4371,12 +4375,19 @@ static void tcp_ofo_queue(struct sock *sk) tail = skb_peek_tail(&sk->sk_receive_queue); eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); + fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (!eaten) __skb_queue_tail(&sk->sk_receive_queue, skb); - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) - tcp_fin(sk); - if (eaten) + else kfree_skb_partial(skb, fragstolen); + + if (unlikely(fin)) { + tcp_fin(sk); + /* tcp_fin() purges tp->out_of_order_queue, + * so we must end this loop right now. + */ + break; + } } } @@ -4403,8 +4414,10 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + struct rb_node **p, *q, *parent; struct sk_buff *skb1; u32 seq, end_seq; + bool fragstolen; tcp_ecn_check_ce(tp, skb); @@ -4419,88 +4432,85 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) inet_csk_schedule_ack(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); + seq = TCP_SKB_CB(skb)->seq; + end_seq = TCP_SKB_CB(skb)->end_seq; SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", - tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + tp->rcv_nxt, seq, end_seq); - skb1 = skb_peek_tail(&tp->out_of_order_queue); - if (!skb1) { + p = &tp->out_of_order_queue.rb_node; + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) { /* Initial out of order segment, build 1 SACK. */ if (tcp_is_sack(tp)) { tp->rx_opt.num_sacks = 1; - tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; - tp->selective_acks[0].end_seq = - TCP_SKB_CB(skb)->end_seq; + tp->selective_acks[0].start_seq = seq; + tp->selective_acks[0].end_seq = end_seq; } - __skb_queue_head(&tp->out_of_order_queue, skb); + rb_link_node(&skb->rbnode, NULL, p); + rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); + tp->ooo_last_skb = skb; goto end; } - seq = TCP_SKB_CB(skb)->seq; - end_seq = TCP_SKB_CB(skb)->end_seq; - - if (seq == TCP_SKB_CB(skb1)->end_seq) { - bool fragstolen; - - if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); - } else { - tcp_grow_window(sk, skb); - kfree_skb_partial(skb, fragstolen); - skb = NULL; + /* In the typical case, we are adding an skb to the end of the list. + * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. + */ + if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { +coalesce_done: + tcp_grow_window(sk, skb); + kfree_skb_partial(skb, fragstolen); + skb = NULL; + goto add_sack; + } + + /* Find place to insert this segment. Handle overlaps on the way. */ + parent = NULL; + while (*p) { + parent = *p; + skb1 = rb_entry(parent, struct sk_buff, rbnode); + if (before(seq, TCP_SKB_CB(skb1)->seq)) { + p = &parent->rb_left; + continue; } - - if (!tp->rx_opt.num_sacks || - tp->selective_acks[0].end_seq != seq) - goto add_sack; - - /* Common case: data arrive in order after hole. */ - tp->selective_acks[0].end_seq = end_seq; - goto end; - } - - /* Find place to insert this segment. */ - while (1) { - if (!after(TCP_SKB_CB(skb1)->seq, seq)) - break; - if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { - skb1 = NULL; - break; + if (before(seq, TCP_SKB_CB(skb1)->end_seq)) { + if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + /* All the bits are present. Drop. */ + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPOFOMERGE); + __kfree_skb(skb); + skb = NULL; + tcp_dsack_set(sk, seq, end_seq); + goto add_sack; + } + if (after(seq, TCP_SKB_CB(skb1)->seq)) { + /* Partial overlap. */ + tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq); + } else { + /* skb's seq == skb1's seq and skb covers skb1. + * Replace skb1 with skb. + */ + rb_replace_node(&skb1->rbnode, &skb->rbnode, + &tp->out_of_order_queue); + tcp_dsack_extend(sk, + TCP_SKB_CB(skb1)->seq, + TCP_SKB_CB(skb1)->end_seq); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPOFOMERGE); + __kfree_skb(skb1); + goto add_sack; + } + } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { + goto coalesce_done; } - skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); + p = &parent->rb_right; } - /* Do skb overlap to previous one? */ - if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { - if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - /* All the bits are present. Drop. */ - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - tcp_drop(sk, skb); - skb = NULL; - tcp_dsack_set(sk, seq, end_seq); - goto add_sack; - } - if (after(seq, TCP_SKB_CB(skb1)->seq)) { - /* Partial overlap. */ - tcp_dsack_set(sk, seq, - TCP_SKB_CB(skb1)->end_seq); - } else { - if (skb_queue_is_first(&tp->out_of_order_queue, - skb1)) - skb1 = NULL; - else - skb1 = skb_queue_prev( - &tp->out_of_order_queue, - skb1); - } - } - if (!skb1) - __skb_queue_head(&tp->out_of_order_queue, skb); - else - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); + /* Insert segment into RB tree. */ + rb_link_node(&skb->rbnode, parent, p); + rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); - /* And clean segments covered by new one as whole. */ - while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { - skb1 = skb_queue_next(&tp->out_of_order_queue, skb); + /* Remove other segments covered by skb. */ + while ((q = rb_next(&skb->rbnode)) != NULL) { + skb1 = rb_entry(q, struct sk_buff, rbnode); if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) break; @@ -4509,12 +4519,15 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) end_seq); break; } - __skb_unlink(skb1, &tp->out_of_order_queue); + rb_erase(&skb1->rbnode, &tp->out_of_order_queue); tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); tcp_drop(sk, skb1); } + /* If there is no skb after us, we are the last_skb ! */ + if (!q) + tp->ooo_last_skb = skb; add_sack: if (tcp_is_sack(tp)) @@ -4651,13 +4664,13 @@ queue_and_out: if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) tcp_fin(sk); - if (!skb_queue_empty(&tp->out_of_order_queue)) { + if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) { tcp_ofo_queue(sk); /* RFC2581. 4.2. SHOULD send immediate ACK, when * gap in queue is filled. */ - if (skb_queue_empty(&tp->out_of_order_queue)) + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) inet_csk(sk)->icsk_ack.pingpong = 0; } @@ -4711,48 +4724,76 @@ drop: tcp_data_queue_ofo(sk, skb); } +static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list) +{ + if (list) + return !skb_queue_is_last(list, skb) ? skb->next : NULL; + + return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode); +} + static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, - struct sk_buff_head *list) + struct sk_buff_head *list, + struct rb_root *root) { - struct sk_buff *next = NULL; + struct sk_buff *next = tcp_skb_next(skb, list); - if (!skb_queue_is_last(list, skb)) - next = skb_queue_next(list, skb); + if (list) + __skb_unlink(skb, list); + else + rb_erase(&skb->rbnode, root); - __skb_unlink(skb, list); __kfree_skb(skb); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); return next; } +/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */ +static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct sk_buff *skb1; + + while (*p) { + parent = *p; + skb1 = rb_entry(parent, struct sk_buff, rbnode); + if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + rb_link_node(&skb->rbnode, parent, p); + rb_insert_color(&skb->rbnode, root); +} + /* Collapse contiguous sequence of skbs head..tail with * sequence numbers start..end. * - * If tail is NULL, this means until the end of the list. + * If tail is NULL, this means until the end of the queue. * * Segments with FIN/SYN are not collapsed (only because this * simplifies code) */ static void -tcp_collapse(struct sock *sk, struct sk_buff_head *list, - struct sk_buff *head, struct sk_buff *tail, - u32 start, u32 end) +tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, + struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end) { - struct sk_buff *skb, *n; + struct sk_buff *skb = head, *n; + struct sk_buff_head tmp; bool end_of_skbs; /* First, check that queue is collapsible and find - * the point where collapsing can be useful. */ - skb = head; + * the point where collapsing can be useful. + */ restart: - end_of_skbs = true; - skb_queue_walk_from_safe(list, skb, n) { - if (skb == tail) - break; + for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) { + n = tcp_skb_next(skb, list); + /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - skb = tcp_collapse_one(sk, skb, list); + skb = tcp_collapse_one(sk, skb, list, root); if (!skb) break; goto restart; @@ -4770,13 +4811,10 @@ restart: break; } - if (!skb_queue_is_last(list, skb)) { - struct sk_buff *next = skb_queue_next(list, skb); - if (next != tail && - TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) { - end_of_skbs = false; - break; - } + if (n && n != tail && + TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) { + end_of_skbs = false; + break; } /* Decided to skip this, advance start seq. */ @@ -4786,17 +4824,22 @@ restart: (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) return; + __skb_queue_head_init(&tmp); + while (before(start, end)) { int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start); struct sk_buff *nskb; nskb = alloc_skb(copy, GFP_ATOMIC); if (!nskb) - return; + break; memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_queue_before(list, skb, nskb); + if (list) + __skb_queue_before(list, skb, nskb); + else + __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ skb_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ @@ -4814,14 +4857,17 @@ restart: start += size; } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - skb = tcp_collapse_one(sk, skb, list); + skb = tcp_collapse_one(sk, skb, list, root); if (!skb || skb == tail || (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) - return; + goto end; } } } +end: + skb_queue_walk_safe(&tmp, skb, n) + tcp_rbtree_insert(root, skb); } /* Collapse ofo queue. Algorithm: select contiguous sequence of skbs @@ -4830,43 +4876,43 @@ restart: static void tcp_collapse_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = skb_peek(&tp->out_of_order_queue); - struct sk_buff *head; + struct sk_buff *skb, *head; + struct rb_node *p; u32 start, end; - if (!skb) + p = rb_first(&tp->out_of_order_queue); + skb = rb_entry_safe(p, struct sk_buff, rbnode); +new_range: + if (!skb) { + p = rb_last(&tp->out_of_order_queue); + /* Note: This is possible p is NULL here. We do not + * use rb_entry_safe(), as ooo_last_skb is valid only + * if rbtree is not empty. + */ + tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode); return; - + } start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; - head = skb; - - for (;;) { - struct sk_buff *next = NULL; - if (!skb_queue_is_last(&tp->out_of_order_queue, skb)) - next = skb_queue_next(&tp->out_of_order_queue, skb); - skb = next; + for (head = skb;;) { + skb = tcp_skb_next(skb, NULL); - /* Segment is terminated when we see gap or when - * we are at the end of all the queue. */ + /* Range is terminated when we see a gap or when + * we are at the queue end. + */ if (!skb || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { - tcp_collapse(sk, &tp->out_of_order_queue, + tcp_collapse(sk, NULL, &tp->out_of_order_queue, head, skb, start, end); - head = skb; - if (!skb) - break; - /* Start new segment */ + goto new_range; + } + + if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) start = TCP_SKB_CB(skb)->seq; + if (after(TCP_SKB_CB(skb)->end_seq, end)) end = TCP_SKB_CB(skb)->end_seq; - } else { - if (before(TCP_SKB_CB(skb)->seq, start)) - start = TCP_SKB_CB(skb)->seq; - if (after(TCP_SKB_CB(skb)->end_seq, end)) - end = TCP_SKB_CB(skb)->end_seq; - } } } @@ -4883,20 +4929,24 @@ static void tcp_collapse_ofo_queue(struct sock *sk) static bool tcp_prune_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; + struct rb_node *node, *prev; - if (skb_queue_empty(&tp->out_of_order_queue)) + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) return false; NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); - - while ((skb = __skb_dequeue_tail(&tp->out_of_order_queue)) != NULL) { - tcp_drop(sk, skb); + node = &tp->ooo_last_skb->rbnode; + do { + prev = rb_prev(node); + rb_erase(node, &tp->out_of_order_queue); + tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode)); sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !tcp_under_memory_pressure(sk)) break; - } + node = prev; + } while (node); + tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); /* Reset SACK state. A conforming SACK implementation will * do the same at a timeout based retransmit. When a connection @@ -4930,7 +4980,7 @@ static int tcp_prune_queue(struct sock *sk) tcp_collapse_ofo_queue(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) - tcp_collapse(sk, &sk->sk_receive_queue, + tcp_collapse(sk, &sk->sk_receive_queue, NULL, skb_peek(&sk->sk_receive_queue), NULL, tp->copied_seq, tp->rcv_nxt); @@ -5035,7 +5085,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* We have out of order data. */ - (ofo_possible && skb_peek(&tp->out_of_order_queue))) { + (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) { /* Then ack it now */ tcp_send_ack(sk); } else { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a75bf48d7950..04b989328558 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1845,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_write_queue_purge(sk); /* Cleans up our, hopefully empty, out_of_order_queue. */ - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); #ifdef CONFIG_TCP_MD5SIG /* Clean up the MD5 key list, if any */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4b95ec4ed2c8..f63c73dc0acb 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -488,7 +488,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->snd_cwnd_cnt = 0; tcp_init_xmit_timers(newsk); - __skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; -- cgit v1.2.3 From 3c15b8e112d7351b2586c649a759318548523364 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 6 Sep 2016 22:35:47 +0800 Subject: netfilter: nf_conntrack: remove unused ctl_table_path member in nf_conntrack_l3proto After commit adf0516845bc ("netfilter: remove ip_conntrack* sysctl compat code"), ctl_table_path member in struct nf_conntrack_l3proto{} is not used anymore, remove it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index cdc920b4c4c2..8992e4229da9 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -63,10 +63,6 @@ struct nf_conntrack_l3proto { size_t nla_size; -#ifdef CONFIG_SYSCTL - const char *ctl_table_path; -#endif /* CONFIG_SYSCTL */ - /* Init l3proto pernet data */ int (*init_net)(struct net *net); -- cgit v1.2.3 From d817f432c2ab7639a4f69de73eafdc55e57c45ad Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 8 Sep 2016 16:23:45 +0300 Subject: net/ip_tunnels: Introduce tunnel_id_to_key32() and key32_to_tunnel_id() Add utility functions to convert a 32 bits key into a 64 bits tunnel and vice versa. These functions will be used instead of cloning code in GRE and VXLAN, and in tc act_iptunnel which will be introduced in a following patch in this patchset. Signed-off-by: Amir Vadai Signed-off-by: Hadar Hen Zion Reviewed-by: Shmulik Ladkani Acked-by: Jiri Benc Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 ++-- include/net/ip_tunnels.h | 19 +++++++++++++++++++ include/net/vxlan.h | 18 ------------------ net/ipv4/ip_gre.c | 23 ++--------------------- 4 files changed, 23 insertions(+), 41 deletions(-) (limited to 'include/net') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 199dec033cf8..4bfeb9765c55 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1291,7 +1291,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) struct metadata_dst *tun_dst; tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, - vxlan_vni_to_tun_id(vni), sizeof(*md)); + key32_to_tunnel_id(vni), sizeof(*md)); if (!tun_dst) goto drop; @@ -1945,7 +1945,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto drop; } dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; - vni = vxlan_tun_id_to_vni(info->key.tun_id); + vni = tunnel_id_to_key32(info->key.tun_id); remote_ip.sa.sa_family = ip_tunnel_info_af(info); if (remote_ip.sa.sa_family == AF_INET) { remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index a5e7035fb93f..e598c639aa6f 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -222,6 +222,25 @@ static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET; } +static inline __be64 key32_to_tunnel_id(__be32 key) +{ +#ifdef __BIG_ENDIAN + return (__force __be64)key; +#else + return (__force __be64)((__force u64)key << 32); +#endif +} + +/* Returns the least-significant 32 bits of a __be64. */ +static inline __be32 tunnel_id_to_key32(__be64 tun_id) +{ +#ifdef __BIG_ENDIAN + return (__force __be32)tun_id; +#else + return (__force __be32)((__force u64)tun_id >> 32); +#endif +} + #ifdef CONFIG_INET int ip_tunnel_init(struct net_device *dev); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index b96d0360c095..0255613a54a4 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -350,24 +350,6 @@ static inline __be32 vxlan_vni_field(__be32 vni) #endif } -static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id) -{ -#if defined(__BIG_ENDIAN) - return (__force __be32)tun_id; -#else - return (__force __be32)((__force u64)tun_id >> 32); -#endif -} - -static inline __be64 vxlan_vni_to_tun_id(__be32 vni) -{ -#if defined(__BIG_ENDIAN) - return (__force __be64)vni; -#else - return (__force __be64)((u64)(__force u32)vni << 32); -#endif -} - static inline size_t vxlan_rco_start(__be32 vni_field) { return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 113cc43df789..576f705d8180 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -246,25 +246,6 @@ static void gre_err(struct sk_buff *skb, u32 info) ipgre_err(skb, info, &tpi); } -static __be64 key_to_tunnel_id(__be32 key) -{ -#ifdef __BIG_ENDIAN - return (__force __be64)((__force u32)key); -#else - return (__force __be64)((__force u64)key << 32); -#endif -} - -/* Returns the least-significant 32 bits of a __be64. */ -static __be32 tunnel_id_to_key(__be64 x) -{ -#ifdef __BIG_ENDIAN - return (__force __be32)x; -#else - return (__force __be32)((__force u64)x >> 32); -#endif -} - static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) { @@ -290,7 +271,7 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, __be64 tun_id; flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); - tun_id = key_to_tunnel_id(tpi->key); + tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); if (!tun_dst) return PACKET_REJECT; @@ -446,7 +427,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); gre_build_header(skb, tunnel_hlen, flags, proto, - tunnel_id_to_key(tun_info->key.tun_id), 0); + tunnel_id_to_key32(tun_info->key.tun_id), 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; -- cgit v1.2.3 From 2ff378b7474feac1ec665d01e4dfc6907cccc11c Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 8 Sep 2016 16:23:46 +0300 Subject: net/dst: Utility functions to build dst_metadata without supplying an skb Extract __ip_tun_set_dst() and __ipv6_tun_set_dst() out of ip_tun_rx_dst() and ipv6_tun_rx_dst(), to be used without supplying an skb. Signed-off-by: Amir Vadai Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Reviewed-by: Shmulik Ladkani Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 52 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 5db9f5910428..6965c8f68ade 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -112,12 +112,13 @@ static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb return &dst->u.tun_info; } -static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, - __be16 flags, - __be64 tunnel_id, - int md_size) +static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, + __be32 daddr, + __u8 tos, __u8 ttl, + __be16 flags, + __be64 tunnel_id, + int md_size) { - const struct iphdr *iph = ip_hdr(skb); struct metadata_dst *tun_dst; tun_dst = tun_rx_dst(md_size); @@ -125,17 +126,30 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, return NULL; ip_tunnel_key_init(&tun_dst->u.tun_info.key, - iph->saddr, iph->daddr, iph->tos, iph->ttl, + saddr, daddr, tos, ttl, 0, 0, 0, tunnel_id, flags); return tun_dst; } -static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, +static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, __be16 flags, __be64 tunnel_id, int md_size) { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); + + return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, + flags, tunnel_id, md_size); +} + +static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u8 tos, __u8 ttl, + __be32 label, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ struct metadata_dst *tun_dst; struct ip_tunnel_info *info; @@ -150,14 +164,26 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, info->key.tp_src = 0; info->key.tp_dst = 0; - info->key.u.ipv6.src = ip6h->saddr; - info->key.u.ipv6.dst = ip6h->daddr; + info->key.u.ipv6.src = *saddr; + info->key.u.ipv6.dst = *daddr; - info->key.tos = ipv6_get_dsfield(ip6h); - info->key.ttl = ip6h->hop_limit; - info->key.label = ip6_flowlabel(ip6h); + info->key.tos = tos; + info->key.ttl = ttl; + info->key.label = label; return tun_dst; } +static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + + return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, + ipv6_get_dsfield(ip6h), ip6h->hop_limit, + ip6_flowlabel(ip6h), flags, tunnel_id, + md_size); +} #endif /* __NET_DST_METADATA_H */ -- cgit v1.2.3 From d0f6dd8a914f42c6f1a3a8c08caa16559d3d9a1b Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 8 Sep 2016 16:23:48 +0300 Subject: net/sched: Introduce act_tunnel_key This action could be used before redirecting packets to a shared tunnel device, or when redirecting packets arriving from a such a device. The action will release the metadata created by the tunnel device (decap), or set the metadata with the specified values for encap operation. For example, the following flower filter will forward all ICMP packets destined to 11.11.11.2 through the shared vxlan device 'vxlan0'. Before redirecting, a metadata for the vxlan tunnel is created using the tunnel_key action and it's arguments: $ tc filter add dev net0 protocol ip parent ffff: \ flower \ ip_proto 1 \ dst_ip 11.11.11.2 \ action tunnel_key set \ src_ip 11.11.0.1 \ dst_ip 11.11.0.2 \ id 11 \ action mirred egress redirect dev vxlan0 Signed-off-by: Amir Vadai Signed-off-by: Hadar Hen Zion Reviewed-by: Shmulik Ladkani Acked-by: Jamal Hadi Salim Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tc_act/tc_tunnel_key.h | 30 +++ include/uapi/linux/tc_act/tc_tunnel_key.h | 41 ++++ net/sched/Kconfig | 11 + net/sched/Makefile | 1 + net/sched/act_tunnel_key.c | 351 ++++++++++++++++++++++++++++++ 5 files changed, 434 insertions(+) create mode 100644 include/net/tc_act/tc_tunnel_key.h create mode 100644 include/uapi/linux/tc_act/tc_tunnel_key.h create mode 100644 net/sched/act_tunnel_key.c (limited to 'include/net') diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h new file mode 100644 index 000000000000..253f8da6c2a6 --- /dev/null +++ b/include/net/tc_act/tc_tunnel_key.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016, Amir Vadai + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __NET_TC_TUNNEL_KEY_H +#define __NET_TC_TUNNEL_KEY_H + +#include + +struct tcf_tunnel_key_params { + struct rcu_head rcu; + int tcft_action; + int action; + struct metadata_dst *tcft_enc_metadata; +}; + +struct tcf_tunnel_key { + struct tc_action common; + struct tcf_tunnel_key_params __rcu *params; +}; + +#define to_tunnel_key(a) ((struct tcf_tunnel_key *)a) + +#endif /* __NET_TC_TUNNEL_KEY_H */ diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h new file mode 100644 index 000000000000..890106ff16e6 --- /dev/null +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016, Amir Vadai + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_TC_TUNNEL_KEY_H +#define __LINUX_TC_TUNNEL_KEY_H + +#include + +#define TCA_ACT_TUNNEL_KEY 17 + +#define TCA_TUNNEL_KEY_ACT_SET 1 +#define TCA_TUNNEL_KEY_ACT_RELEASE 2 + +struct tc_tunnel_key { + tc_gen; + int t_action; +}; + +enum { + TCA_TUNNEL_KEY_UNSPEC, + TCA_TUNNEL_KEY_TM, + TCA_TUNNEL_KEY_PARMS, + TCA_TUNNEL_KEY_ENC_IPV4_SRC, /* be32 */ + TCA_TUNNEL_KEY_ENC_IPV4_DST, /* be32 */ + TCA_TUNNEL_KEY_ENC_IPV6_SRC, /* struct in6_addr */ + TCA_TUNNEL_KEY_ENC_IPV6_DST, /* struct in6_addr */ + TCA_TUNNEL_KEY_ENC_KEY_ID, /* be64 */ + TCA_TUNNEL_KEY_PAD, + __TCA_TUNNEL_KEY_MAX, +}; + +#define TCA_TUNNEL_KEY_MAX (__TCA_TUNNEL_KEY_MAX - 1) + +#endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index ccf931b3b94c..72e3426fa48f 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -761,6 +761,17 @@ config NET_ACT_IFE To compile this code as a module, choose M here: the module will be called act_ife. +config NET_ACT_TUNNEL_KEY + tristate "IP tunnel metadata manipulation" + depends on NET_CLS_ACT + ---help--- + Say Y here to set/release ip tunnel metadata. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_tunnel_key. + config NET_IFE_SKBMARK tristate "Support to encoding decoding skb mark on IFE action" depends on NET_ACT_IFE diff --git a/net/sched/Makefile b/net/sched/Makefile index ae088a5a9d95..b9d046b9535a 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o +obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c new file mode 100644 index 000000000000..dceff7412dc3 --- /dev/null +++ b/net/sched/act_tunnel_key.c @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2016, Amir Vadai + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define TUNNEL_KEY_TAB_MASK 15 + +static int tunnel_key_net_id; +static struct tc_action_ops act_tunnel_key_ops; + +static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + int action; + + rcu_read_lock(); + + params = rcu_dereference(t->params); + + tcf_lastuse_update(&t->tcf_tm); + bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb); + action = params->action; + + switch (params->tcft_action) { + case TCA_TUNNEL_KEY_ACT_RELEASE: + skb_dst_drop(skb); + break; + case TCA_TUNNEL_KEY_ACT_SET: + skb_dst_drop(skb); + skb_dst_set(skb, dst_clone(¶ms->tcft_enc_metadata->dst)); + break; + default: + WARN_ONCE(1, "Bad tunnel_key action %d.\n", + params->tcft_action); + break; + } + + rcu_read_unlock(); + + return action; +} + +static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { + [TCA_TUNNEL_KEY_PARMS] = { .len = sizeof(struct tc_tunnel_key) }, + [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, +}; + +static int tunnel_key_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; + struct tcf_tunnel_key_params *params_old; + struct tcf_tunnel_key_params *params_new; + struct metadata_dst *metadata = NULL; + struct tc_tunnel_key *parm; + struct tcf_tunnel_key *t; + bool exists = false; + __be64 key_id; + int ret = 0; + int err; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy); + if (err < 0) + return err; + + if (!tb[TCA_TUNNEL_KEY_PARMS]) + return -EINVAL; + + parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + switch (parm->t_action) { + case TCA_TUNNEL_KEY_ACT_RELEASE: + break; + case TCA_TUNNEL_KEY_ACT_SET: + if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) { + ret = -EINVAL; + goto err_out; + } + + key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); + + if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && + tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { + __be32 saddr; + __be32 daddr; + + saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]); + daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); + + metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, + TUNNEL_KEY, key_id, 0); + } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && + tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { + struct in6_addr saddr; + struct in6_addr daddr; + + saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]); + daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); + + metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0, + TUNNEL_KEY, key_id, 0); + } + + if (!metadata) { + ret = -EINVAL; + goto err_out; + } + + metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; + break; + default: + goto err_out; + } + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_tunnel_key_ops, bind, true); + if (ret) + return ret; + + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + + t = to_tunnel_key(*a); + + ASSERT_RTNL(); + params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); + if (unlikely(!params_new)) { + if (ret == ACT_P_CREATED) + tcf_hash_release(*a, bind); + return -ENOMEM; + } + + params_old = rtnl_dereference(t->params); + + params_new->action = parm->action; + params_new->tcft_action = parm->t_action; + params_new->tcft_enc_metadata = metadata; + + rcu_assign_pointer(t->params, params_new); + + if (params_old) + kfree_rcu(params_old, rcu); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + + return ret; + +err_out: + if (exists) + tcf_hash_release(*a, bind); + return ret; +} + +static void tunnel_key_release(struct tc_action *a, int bind) +{ + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + + rcu_read_lock(); + params = rcu_dereference(t->params); + + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + dst_release(¶ms->tcft_enc_metadata->dst); + + kfree_rcu(params, rcu); + + rcu_read_unlock(); +} + +static int tunnel_key_dump_addresses(struct sk_buff *skb, + const struct ip_tunnel_info *info) +{ + unsigned short family = ip_tunnel_info_af(info); + + if (family == AF_INET) { + __be32 saddr = info->key.u.ipv4.src; + __be32 daddr = info->key.u.ipv4.dst; + + if (!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_SRC, saddr) && + !nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_DST, daddr)) + return 0; + } + + if (family == AF_INET6) { + const struct in6_addr *saddr6 = &info->key.u.ipv6.src; + const struct in6_addr *daddr6 = &info->key.u.ipv6.dst; + + if (!nla_put_in6_addr(skb, + TCA_TUNNEL_KEY_ENC_IPV6_SRC, saddr6) && + !nla_put_in6_addr(skb, + TCA_TUNNEL_KEY_ENC_IPV6_DST, daddr6)) + return 0; + } + + return -EINVAL; +} + +static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + struct tc_tunnel_key opt = { + .index = t->tcf_index, + .refcnt = t->tcf_refcnt - ref, + .bindcnt = t->tcf_bindcnt - bind, + }; + struct tcf_t tm; + int ret = -1; + + rcu_read_lock(); + params = rcu_dereference(t->params); + + opt.t_action = params->tcft_action; + opt.action = params->action; + + if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) { + struct ip_tunnel_key *key = + ¶ms->tcft_enc_metadata->u.tun_info.key; + __be32 key_id = tunnel_id_to_key32(key->tun_id); + + if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) || + tunnel_key_dump_addresses(skb, + ¶ms->tcft_enc_metadata->u.tun_info)) + goto nla_put_failure; + } + + tcf_tm_dump(&tm, &t->tcf_tm); + if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm), + &tm, TCA_TUNNEL_KEY_PAD)) + goto nla_put_failure; + + ret = skb->len; + goto out; + +nla_put_failure: + nlmsg_trim(skb, b); +out: + rcu_read_unlock(); + + return ret; +} + +static int tunnel_key_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_tunnel_key_ops = { + .kind = "tunnel_key", + .type = TCA_ACT_TUNNEL_KEY, + .owner = THIS_MODULE, + .act = tunnel_key_act, + .dump = tunnel_key_dump, + .init = tunnel_key_init, + .cleanup = tunnel_key_release, + .walk = tunnel_key_walker, + .lookup = tunnel_key_search, + .size = sizeof(struct tcf_tunnel_key), +}; + +static __net_init int tunnel_key_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK); +} + +static void __net_exit tunnel_key_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations tunnel_key_net_ops = { + .init = tunnel_key_init_net, + .exit = tunnel_key_exit_net, + .id = &tunnel_key_net_id, + .size = sizeof(struct tc_action_net), +}; + +static int __init tunnel_key_init_module(void) +{ + return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops); +} + +static void __exit tunnel_key_cleanup_module(void) +{ + tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops); +} + +module_init(tunnel_key_init_module); +module_exit(tunnel_key_cleanup_module); + +MODULE_AUTHOR("Amir Vadai "); +MODULE_DESCRIPTION("ip tunnel manipulation actions"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 9ee0034b8f49aaaa7e7c2da8db1038915db99c19 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:52 -0700 Subject: net: flow: Add l3mdev flow update Add l3mdev hook to set FLOWI_FLAG_SKIP_NH_OIF flag and update oif/iif in flow struct if its oif or iif points to a device enslaved to an L3 Master device. Only 1 needs to be converted to match the l3mdev FIB rule. This moves the flow adjustment for l3mdev to a single point catching all lookups. It is redundant for existing hooks (those are removed in later patches) but is needed for missed lookups such as PMTU updates. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 6 ++++++ net/ipv4/fib_rules.c | 3 +++ net/ipv6/fib6_rules.c | 3 +++ net/l3mdev/l3mdev.c | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+) (limited to 'include/net') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index e90095091aa0..81e175e80537 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -49,6 +49,8 @@ struct l3mdev_ops { int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); +void l3mdev_update_flow(struct net *net, struct flowi *fl); + int l3mdev_master_ifindex_rcu(const struct net_device *dev); static inline int l3mdev_master_ifindex(struct net_device *dev) { @@ -290,6 +292,10 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, { return 1; } +static inline +void l3mdev_update_flow(struct net *net, struct flowi *fl) +{ +} #endif #endif /* _NET_L3MDEV_H_ */ diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 6e9ea69e5f75..770bebed6b28 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -56,6 +56,9 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, }; int err; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi4_to_flowi(flp)); + err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); #ifdef CONFIG_IP_ROUTE_CLASSID if (arg.rule) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 5857c1fc8b67..eea23b57c6a5 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -38,6 +38,9 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, .flags = FIB_LOOKUP_NOREF, }; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi6_to_flowi(fl6)); + fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index c4a1c3e84e12..43610e5acc4e 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -222,3 +222,38 @@ out: return rc; } + +void l3mdev_update_flow(struct net *net, struct flowi *fl) +{ + struct net_device *dev; + int ifindex; + + rcu_read_lock(); + + if (fl->flowi_oif) { + dev = dev_get_by_index_rcu(net, fl->flowi_oif); + if (dev) { + ifindex = l3mdev_master_ifindex_rcu(dev); + if (ifindex) { + fl->flowi_oif = ifindex; + fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; + goto out; + } + } + } + + if (fl->flowi_iif) { + dev = dev_get_by_index_rcu(net, fl->flowi_iif); + if (dev) { + ifindex = l3mdev_master_ifindex_rcu(dev); + if (ifindex) { + fl->flowi_iif = ifindex; + fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; + } + } + } + +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(l3mdev_update_flow); -- cgit v1.2.3 From a8e3e1a9f02094145580ea7920c6a1d9aabd5539 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:53 -0700 Subject: net: l3mdev: Add hook to output path This patch adds the infrastructure to the output path to pass an skb to an l3mdev device if it has a hook registered. This is the Tx parallel to l3mdev_ip{6}_rcv in the receive path and is the basis for removing the existing hook that returns the vrf dst on the fib lookup. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_output.c | 8 ++++++++ net/ipv6/ip6_output.c | 8 ++++++++ net/ipv6/output_core.c | 7 +++++++ net/ipv6/raw.c | 7 +++++++ 5 files changed, 78 insertions(+) (limited to 'include/net') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 81e175e80537..53d5274920e3 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -11,6 +11,7 @@ #ifndef _NET_L3MDEV_H_ #define _NET_L3MDEV_H_ +#include #include /** @@ -18,6 +19,10 @@ * * @l3mdev_fib_table: Get FIB table id to use for lookups * + * @l3mdev_l3_rcv: Hook in L3 receive path + * + * @l3mdev_l3_out: Hook in L3 output path + * * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device * * @l3mdev_get_saddr: Get source address for a flow @@ -29,6 +34,9 @@ struct l3mdev_ops { u32 (*l3mdev_fib_table)(const struct net_device *dev); struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev, struct sk_buff *skb, u16 proto); + struct sk_buff * (*l3mdev_l3_out)(struct net_device *dev, + struct sock *sk, struct sk_buff *skb, + u16 proto); /* IPv4 ops */ struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, @@ -201,6 +209,34 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) return l3mdev_l3_rcv(skb, AF_INET6); } +static inline +struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) +{ + struct net_device *dev = skb_dst(skb)->dev; + + if (netif_is_l3_slave(dev)) { + struct net_device *master; + + master = netdev_master_upper_dev_get_rcu(dev); + if (master && master->l3mdev_ops->l3mdev_l3_out) + skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, + skb, proto); + } + + return skb; +} + +static inline +struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) +{ + return l3mdev_l3_out(sk, skb, AF_INET); +} + +static inline +struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) +{ + return l3mdev_l3_out(sk, skb, AF_INET6); +} #else static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev) @@ -286,6 +322,18 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) return skb; } +static inline +struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) +{ + return skb; +} + +static inline +struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) +{ + return skb; +} + static inline int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b913f5bf0757..41e10e34769c 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -99,6 +99,14 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) iph->tot_len = htons(skb->len); ip_send_check(iph); + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip_out(sk, skb); + if (unlikely(!skb)) + return 0; + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 993fd9666f1b..6ea6caace3a8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -236,6 +236,14 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out((struct sock *)sk, skb); + if (unlikely(!skb)) + return 0; + /* hooks should never assume socket lock is held. * we promote our socket to non const */ diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 462f2a76b5c2..7cca8ac66fe9 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -148,6 +148,13 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out(sk, skb); + if (unlikely(!skb)) + return 0; + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 590dd1f7746f..54404f08efcc 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -653,6 +653,13 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, if (err) goto error_fault; + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out(sk, skb); + if (unlikely(!skb)) + return 0; + IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); -- cgit v1.2.3 From 5f02ce24c2696fec33f2a5dfcf753996f5fdd211 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:54 -0700 Subject: net: l3mdev: Allow the l3mdev to be a loopback Allow an L3 master device to act as the loopback for that L3 domain. For IPv4 the device can also have the address 127.0.0.1. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 6 +++--- net/ipv4/route.c | 8 ++++++-- net/ipv6/route.c | 12 ++++++++++-- 3 files changed, 19 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 53d5274920e3..3ee110518584 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -90,7 +90,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) } static inline -const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) +struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) { /* netdev_master_upper_dev_get_rcu calls * list_first_or_null_rcu to walk the upper dev list. @@ -99,7 +99,7 @@ const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) * typecast to remove the const */ struct net_device *dev = (struct net_device *)_dev; - const struct net_device *master; + struct net_device *master; if (!dev) return NULL; @@ -254,7 +254,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) } static inline -const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) +struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) { return NULL; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3e992783c1d0..f49b2c534e92 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2018,7 +2018,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, return ERR_PTR(-EINVAL); if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) - if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) + if (ipv4_is_loopback(fl4->saddr) && + !(dev_out->flags & IFF_LOOPBACK) && + !netif_is_l3_master(dev_out)) return ERR_PTR(-EINVAL); if (ipv4_is_lbcast(fl4->daddr)) @@ -2302,7 +2304,9 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, else fl4->saddr = fl4->daddr; } - dev_out = net->loopback_dev; + + /* L3 master device is the loopback for that domain */ + dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; flags |= RTCF_LOCAL; goto make_route; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 09d43ff11a8d..2c681113c055 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2558,8 +2558,16 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { u32 tb_id; struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, - DST_NOCOUNT); + struct net_device *dev = net->loopback_dev; + struct rt6_info *rt; + + /* use L3 Master device as loopback for host routes if device + * is enslaved and address is not link local or multicast + */ + if (!rt6_need_strict(addr)) + dev = l3mdev_master_dev_rcu(idev->dev) ? : dev; + + rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); if (!rt) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 4c1feac58e06270321cc500b85c2d94a11495775 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:56 -0700 Subject: net: vrf: Flip IPv6 output path from FIB lookup hook to out hook Flip the IPv6 output path to use the l3mdev tx out hook. The VRF dst is not returned on the first FIB lookup. Instead, the dst on the skb is switched at the beginning of the IPv6 output processing to send the packet to the VRF driver on xmit. Link scope addresses (linklocal and multicast) need special handling: specifically the oif the flow struct can not be changed because we want the lookup tied to the enslaved interface. ie., the source address and the returned route MUST point to the interface scope passed in. Convert the existing vrf_get_rt6_dst to handle only link scope addresses. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 124 ++++++++++++++++++++++++++++++++++----------------- include/net/l3mdev.h | 8 ++-- net/ipv6/route.c | 11 +++-- net/l3mdev/l3mdev.c | 15 +++---- 4 files changed, 100 insertions(+), 58 deletions(-) (limited to 'include/net') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 08540b96ec18..f5372edf6edc 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -137,6 +137,20 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, } #if IS_ENABLED(CONFIG_IPV6) +static int vrf_ip6_local_out(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, + sk, skb, NULL, skb_dst(skb)->dev, dst_output); + + if (likely(err == 1)) + err = dst_output(net, sk, skb); + + return err; +} + static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { @@ -207,7 +221,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); - ret = ip6_local_out(net, skb->sk, skb); + ret = vrf_ip6_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(ret))) dev->stats.tx_errors++; else @@ -391,6 +405,43 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } +/* set dst on skb to send packet to us via dev_xmit path. Allows + * packet to go through device based features such as qdisc, netfilter + * hooks and packet sockets with skb->dev set to vrf device. + */ +static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + struct net_vrf *vrf = netdev_priv(vrf_dev); + struct dst_entry *dst = NULL; + struct rt6_info *rt6; + + /* don't divert link scope packets */ + if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) + return skb; + + rcu_read_lock(); + + rt6 = rcu_dereference(vrf->rt6); + if (likely(rt6)) { + dst = &rt6->dst; + dst_hold(dst); + } + + rcu_read_unlock(); + + if (unlikely(!dst)) { + vrf_tx_error(vrf_dev, skb); + return NULL; + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return skb; +} + /* holding rtnl */ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { @@ -477,6 +528,13 @@ out: return rc; } #else +static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + return skb; +} + static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { } @@ -587,6 +645,8 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, switch (proto) { case AF_INET: return vrf_ip_out(vrf_dev, sk, skb); + case AF_INET6: + return vrf_ip6_out(vrf_dev, sk, skb); } return skb; @@ -1031,53 +1091,33 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, } #if IS_ENABLED(CONFIG_IPV6) -static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, - struct flowi6 *fl6) +/* send to link-local or multicast address via interface enslaved to + * VRF device. Force lookup to VRF table without changing flow struct + */ +static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, + struct flowi6 *fl6) { - bool need_strict = rt6_need_strict(&fl6->daddr); - struct net_vrf *vrf = netdev_priv(dev); struct net *net = dev_net(dev); + int flags = RT6_LOOKUP_F_IFACE; struct dst_entry *dst = NULL; struct rt6_info *rt; - /* send to link-local or multicast address */ - if (need_strict) { - int flags = RT6_LOOKUP_F_IFACE; - - /* VRF device does not have a link-local address and - * sending packets to link-local or mcast addresses over - * a VRF device does not make sense - */ - if (fl6->flowi6_oif == dev->ifindex) { - struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst; - - dst_hold(dst); - return dst; - } - - if (!ipv6_addr_any(&fl6->saddr)) - flags |= RT6_LOOKUP_F_HAS_SADDR; - - rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); - if (rt) - dst = &rt->dst; - - } else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) { - - rcu_read_lock(); - - rt = rcu_dereference(vrf->rt6); - if (likely(rt)) { - dst = &rt->dst; - dst_hold(dst); - } - - rcu_read_unlock(); + /* VRF device does not have a link-local address and + * sending packets to link-local or mcast addresses over + * a VRF device does not make sense + */ + if (fl6->flowi6_oif == dev->ifindex) { + dst = &net->ipv6.ip6_null_entry->dst; + dst_hold(dst); + return dst; } - /* make sure oif is set to VRF device for lookup */ - if (!need_strict) - fl6->flowi6_oif = dev->ifindex; + if (!ipv6_addr_any(&fl6->saddr)) + flags |= RT6_LOOKUP_F_HAS_SADDR; + + rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); + if (rt) + dst = &rt->dst; return dst; } @@ -1130,7 +1170,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) - .l3mdev_get_rt6_dst = vrf_get_rt6_dst, + .l3mdev_link_scope_lookup = vrf_link_scope_lookup, .l3mdev_get_saddr6 = vrf_get_saddr6, #endif }; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 3ee110518584..51aab20a4d0a 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -27,7 +27,7 @@ * * @l3mdev_get_saddr: Get source address for a flow * - * @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device + * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ struct l3mdev_ops { @@ -45,7 +45,7 @@ struct l3mdev_ops { struct flowi4 *fl4); /* IPv6 ops */ - struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev, + struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); int (*l3mdev_get_saddr6)(struct net_device *dev, const struct sock *sk, @@ -177,7 +177,7 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6); +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); int l3mdev_get_saddr6(struct net *net, const struct sock *sk, struct flowi6 *fl6); @@ -299,7 +299,7 @@ static inline int l3mdev_get_saddr(struct net *net, int ifindex, } static inline -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6) +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { return NULL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2c681113c055..87e0a01ce744 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1188,12 +1188,15 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags) { - struct dst_entry *dst; bool any_src; - dst = l3mdev_get_rt6_dst(net, fl6); - if (dst) - return dst; + if (rt6_need_strict(&fl6->daddr)) { + struct dst_entry *dst; + + dst = l3mdev_link_scope_lookup(net, fl6); + if (dst) + return dst; + } fl6->flowi6_iif = LOOPBACK_IFINDEX; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index 43610e5acc4e..ac9d928d0a9e 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -100,15 +100,14 @@ u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); /** - * l3mdev_get_rt6_dst - IPv6 route lookup based on flow. Returns - * cached route for L3 master device if relevant - * to flow + * l3mdev_link_scope_lookup - IPv6 route lookup based on flow for link + * local and multicast addresses * @net: network namespace for device index lookup * @fl6: IPv6 flow struct for lookup */ -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, - struct flowi6 *fl6) +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, + struct flowi6 *fl6) { struct dst_entry *dst = NULL; struct net_device *dev; @@ -121,15 +120,15 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, dev = netdev_master_upper_dev_get_rcu(dev); if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_rt6_dst) - dst = dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6); + dev->l3mdev_ops->l3mdev_link_scope_lookup) + dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6); rcu_read_unlock(); } return dst; } -EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst); +EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); /** * l3mdev_get_saddr - get source address for a flow based on an interface -- cgit v1.2.3 From d66f6c0a8f3c0bcc4ee7a9b1da4b0ebe7ee555a3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:58 -0700 Subject: net: ipv4: Remove l3mdev_get_saddr No longer needed Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 38 -------------------------------------- include/net/l3mdev.h | 12 ------------ include/net/route.h | 10 ---------- net/ipv4/raw.c | 6 ------ net/ipv4/udp.c | 6 ------ net/l3mdev/l3mdev.c | 31 ------------------------------- 6 files changed, 103 deletions(-) (limited to 'include/net') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index f5372edf6edc..9ad2a169485f 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -863,43 +863,6 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev, return rth; } -/* called under rcu_read_lock */ -static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) -{ - struct fib_result res = { .tclassid = 0 }; - struct net *net = dev_net(dev); - u32 orig_tos = fl4->flowi4_tos; - u8 flags = fl4->flowi4_flags; - u8 scope = fl4->flowi4_scope; - u8 tos = RT_FL_TOS(fl4); - int rc; - - if (unlikely(!fl4->daddr)) - return 0; - - fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF; - fl4->flowi4_iif = LOOPBACK_IFINDEX; - /* make sure oif is set to VRF device for lookup */ - fl4->flowi4_oif = dev->ifindex; - fl4->flowi4_tos = tos & IPTOS_RT_MASK; - fl4->flowi4_scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); - - rc = fib_lookup(net, fl4, &res, 0); - if (!rc) { - if (res.type == RTN_LOCAL) - fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr; - else - fib_select_path(net, &res, fl4, -1); - } - - fl4->flowi4_flags = flags; - fl4->flowi4_tos = orig_tos; - fl4->flowi4_scope = scope; - - return rc; -} - static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { return 0; @@ -1166,7 +1129,6 @@ static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk, static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_get_rtable = vrf_get_rtable, - .l3mdev_get_saddr = vrf_get_saddr, .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 51aab20a4d0a..1129e1d8cd6e 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -25,8 +25,6 @@ * * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device * - * @l3mdev_get_saddr: Get source address for a flow - * * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ @@ -41,8 +39,6 @@ struct l3mdev_ops { /* IPv4 ops */ struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, const struct flowi4 *fl4); - int (*l3mdev_get_saddr)(struct net_device *dev, - struct flowi4 *fl4); /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, @@ -175,8 +171,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) return rc; } -int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); - struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); int l3mdev_get_saddr6(struct net *net, const struct sock *sk, struct flowi6 *fl6); @@ -292,12 +286,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) return false; } -static inline int l3mdev_get_saddr(struct net *net, int ifindex, - struct flowi4 *fl4) -{ - return 0; -} - static inline struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { diff --git a/include/net/route.h b/include/net/route.h index ad777d79af94..0429d47cad25 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -285,15 +284,6 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, ip_route_connect_init(fl4, dst, src, tos, oif, protocol, sport, dport, sk); - if (!src && oif) { - int rc; - - rc = l3mdev_get_saddr(net, oif, fl4); - if (rc < 0) - return ERR_PTR(rc); - - src = fl4->saddr; - } if (!dst || !src) { rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 438f50c1a676..90a85c955872 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -606,12 +606,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); - if (!saddr && ipc.oif) { - err = l3mdev_get_saddr(net, ipc.oif, &fl4); - if (err < 0) - goto done; - } - if (!inet->hdrincl) { rfv.msg = msg; rfv.hlen = 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 058c31286ce1..7d96dc2d3d08 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1021,12 +1021,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flow_flags, faddr, saddr, dport, inet->inet_sport); - if (!saddr && ipc.oif) { - err = l3mdev_get_saddr(net, ipc.oif, fl4); - if (err < 0) - goto out; - } - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) { diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index ac9d928d0a9e..be40df60703c 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -130,37 +130,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, } EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); -/** - * l3mdev_get_saddr - get source address for a flow based on an interface - * enslaved to an L3 master device - * @net: network namespace for device index lookup - * @ifindex: Interface index - * @fl4: IPv4 flow struct - */ - -int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4) -{ - struct net_device *dev; - int rc = 0; - - if (ifindex) { - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev && netif_is_l3_slave(dev)) - dev = netdev_master_upper_dev_get_rcu(dev); - - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_saddr) - rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4); - - rcu_read_unlock(); - } - - return rc; -} -EXPORT_SYMBOL_GPL(l3mdev_get_saddr); - int l3mdev_get_saddr6(struct net *net, const struct sock *sk, struct flowi6 *fl6) { -- cgit v1.2.3 From 8a966fc016b67d2a8ab4a83d22ded8cde032a0eb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:09:59 -0700 Subject: net: ipv6: Remove l3mdev_get_saddr6 No longer needed Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 41 ----------------------------------------- include/net/l3mdev.h | 11 ----------- net/ipv6/ip6_output.c | 9 +-------- net/l3mdev/l3mdev.c | 24 ------------------------ 4 files changed, 1 insertion(+), 84 deletions(-) (limited to 'include/net') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 9ad2a169485f..3a34f547c578 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1084,46 +1084,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, return dst; } - -/* called under rcu_read_lock */ -static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk, - struct flowi6 *fl6) -{ - struct net *net = dev_net(dev); - struct dst_entry *dst; - struct rt6_info *rt; - int err; - - if (rt6_need_strict(&fl6->daddr)) { - rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, - RT6_LOOKUP_F_IFACE); - if (unlikely(!rt)) - return 0; - - dst = &rt->dst; - } else { - __u8 flags = fl6->flowi6_flags; - - fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; - fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF; - - dst = ip6_route_output(net, sk, fl6); - rt = (struct rt6_info *)dst; - - fl6->flowi6_flags = flags; - } - - err = dst->error; - if (!err) { - err = ip6_route_get_saddr(net, rt, &fl6->daddr, - sk ? inet6_sk(sk)->srcprefs : 0, - &fl6->saddr); - } - - dst_release(dst); - - return err; -} #endif static const struct l3mdev_ops vrf_l3mdev_ops = { @@ -1133,7 +1093,6 @@ static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_link_scope_lookup = vrf_link_scope_lookup, - .l3mdev_get_saddr6 = vrf_get_saddr6, #endif }; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 1129e1d8cd6e..a5e506eb51de 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -43,9 +43,6 @@ struct l3mdev_ops { /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); - int (*l3mdev_get_saddr6)(struct net_device *dev, - const struct sock *sk, - struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV @@ -172,8 +169,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) } struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); -int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6); static inline struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) @@ -292,12 +287,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) return NULL; } -static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6) -{ - return 0; -} - static inline struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1cb41b365048..6001e781164e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -926,13 +926,6 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, int err; int flags = 0; - if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif && - (!*dst || !(*dst)->error)) { - err = l3mdev_get_saddr6(net, sk, fl6); - if (err) - goto out_err; - } - /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, * the route-specific preferred source forces the @@ -1024,7 +1017,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, out_err_release: dst_release(*dst); *dst = NULL; -out_err: + if (err == -ENETUNREACH) IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); return err; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index be40df60703c..8da86ceca33d 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -130,30 +130,6 @@ struct dst_entry *l3mdev_link_scope_lookup(struct net *net, } EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); -int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6) -{ - struct net_device *dev; - int rc = 0; - - if (fl6->flowi6_oif) { - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); - if (dev && netif_is_l3_slave(dev)) - dev = netdev_master_upper_dev_get_rcu(dev); - - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_saddr6) - rc = dev->l3mdev_ops->l3mdev_get_saddr6(dev, sk, fl6); - - rcu_read_unlock(); - } - - return rc; -} -EXPORT_SYMBOL_GPL(l3mdev_get_saddr6); - /** * l3mdev_fib_rule_match - Determine if flowi references an * L3 master device -- cgit v1.2.3 From ca28b8f2b8f316b9973693c72770c98da3e9500e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:10:00 -0700 Subject: net: l3mdev: Remove l3mdev_fib_oif No longer used Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'include/net') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index a5e506eb51de..a586035c97cb 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -107,26 +107,6 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) return master; } -/* get index of an interface to use for FIB lookups. For devices - * enslaved to an L3 master device FIB lookups are based on the - * master index - */ -static inline int l3mdev_fib_oif_rcu(struct net_device *dev) -{ - return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex; -} - -static inline int l3mdev_fib_oif(struct net_device *dev) -{ - int oif; - - rcu_read_lock(); - oif = l3mdev_fib_oif_rcu(dev); - rcu_read_unlock(); - - return oif; -} - u32 l3mdev_fib_table_rcu(const struct net_device *dev); u32 l3mdev_fib_table_by_index(struct net *net, int ifindex); static inline u32 l3mdev_fib_table(const struct net_device *dev) @@ -248,15 +228,6 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) return NULL; } -static inline int l3mdev_fib_oif_rcu(struct net_device *dev) -{ - return dev ? dev->ifindex : 0; -} -static inline int l3mdev_fib_oif(struct net_device *dev) -{ - return dev ? dev->ifindex : 0; -} - static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev) { return 0; -- cgit v1.2.3 From afb460fe0ef0af6d98ed51006153acb01278df2d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:10:01 -0700 Subject: net: l3mdev: remove get_rtable method No longer used Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 21 --------------------- include/net/l3mdev.h | 21 --------------------- 2 files changed, 42 deletions(-) (limited to 'include/net') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 3a34f547c578..ccce59fbb2b3 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -843,26 +843,6 @@ static u32 vrf_fib_table(const struct net_device *dev) return vrf->tb_id; } -static struct rtable *vrf_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - struct rtable *rth = NULL; - - if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) { - struct net_vrf *vrf = netdev_priv(dev); - - rcu_read_lock(); - - rth = rcu_dereference(vrf->rth); - if (likely(rth)) - dst_hold(&rth->dst); - - rcu_read_unlock(); - } - - return rth; -} - static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { return 0; @@ -1088,7 +1068,6 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, - .l3mdev_get_rtable = vrf_get_rtable, .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index a586035c97cb..3832099289c5 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -23,8 +23,6 @@ * * @l3mdev_l3_out: Hook in L3 output path * - * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device - * * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ @@ -36,10 +34,6 @@ struct l3mdev_ops { struct sock *sk, struct sk_buff *skb, u16 proto); - /* IPv4 ops */ - struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, - const struct flowi4 *fl4); - /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); @@ -120,15 +114,6 @@ static inline u32 l3mdev_fib_table(const struct net_device *dev) return tb_id; } -static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable) - return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4); - - return NULL; -} - static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { struct net_device *dev; @@ -241,12 +226,6 @@ static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) return 0; } -static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - return NULL; -} - static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { return false; -- cgit v1.2.3 From c71ad3d45a5e928e617ca436f3ce88bb773fb766 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 10 Sep 2016 12:10:02 -0700 Subject: net: flow: Remove FLOWI_FLAG_L3MDEV_SRC flag No longer used Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 5 ++--- include/net/flow.h | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index ccce59fbb2b3..55674b0e65b7 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -165,7 +165,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, .flowlabel = ip6_flowinfo(iph), .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, - .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF, + .flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF, }; int ret = NET_XMIT_DROP; struct dst_entry *dst; @@ -265,8 +265,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, .flowi4_oif = vrf_dev->ifindex, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC | - FLOWI_FLAG_SKIP_NH_OIF, + .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, .daddr = ip4h->daddr, }; struct net *net = dev_net(vrf_dev); diff --git a/include/net/flow.h b/include/net/flow.h index d47ef4bb5423..035aa7716967 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -34,8 +34,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 -#define FLOWI_FLAG_L3MDEV_SRC 0x04 -#define FLOWI_FLAG_SKIP_NH_OIF 0x08 +#define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; }; -- cgit v1.2.3 From 5a1f044b5048e834f936fbb33a93e5d8410779ec Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 29 Aug 2016 23:25:14 +0300 Subject: cfg80211: clarify the requirements of .disconnect() cfg80211 expects the .disconnect() handler to call cfg80211_disconnect() when done. Make this requirement more explicit. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9c23f4d33e06..d5e7f690bad9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2423,7 +2423,8 @@ struct cfg80211_qos_map { * cases, the result of roaming is indicated with a call to * cfg80211_roamed() or cfg80211_roamed_bss(). * (invoked with the wireless_dev mutex held) - * @disconnect: Disconnect from the BSS/ESS. + * @disconnect: Disconnect from the BSS/ESS. Once done, call + * cfg80211_disconnected(). * (invoked with the wireless_dev mutex held) * * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call -- cgit v1.2.3 From 480dd46b9d6812e5fb7172c305ee0f1154c26eed Mon Sep 17 00:00:00 2001 From: Maxim Altshul Date: Mon, 22 Aug 2016 17:14:04 +0300 Subject: mac80211: RX BA support for sta max_rx_aggregation_subframes The ability to change the max_rx_aggregation frames is useful in cases of IOP. There exist some devices (latest mobile phones and some AP's) that tend to not respect a BA sessions maximum size (in Kbps). These devices won't respect the AMPDU size that was negotiated during association (even though they do respect the maximal number of packets). This violation is characterized by a valid number of packets in a single AMPDU. Even so, the total size will exceed the size negotiated during association. Eventually, this will cause some undefined behavior, which in turn causes the hw to drop packets, causing the throughput to plummet. This patch will make the subframe limitation to be held by each station, instead of being held only by hw. Signed-off-by: Maxim Altshul Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ net/mac80211/agg-rx.c | 7 +++++-- net/mac80211/sta_info.c | 3 +++ 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cca510a585c3..a1457ca2a30c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1735,6 +1735,9 @@ struct ieee80211_sta_rates { * @supp_rates: Bitmap of supported rates (per band) * @ht_cap: HT capabilities of this STA; restricted to our own capabilities * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities + * @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU + * that this station is allowed to transmit to us. + * Can be modified by driver. * @wme: indicates whether the STA supports QoS/WME (if local devices does, * otherwise always false) * @drv_priv: data area for driver use, will always be aligned to @@ -1775,6 +1778,7 @@ struct ieee80211_sta { u16 aid; struct ieee80211_sta_ht_cap ht_cap; struct ieee80211_sta_vht_cap vht_cap; + u8 max_rx_aggregation_subframes; bool wme; u8 uapsd_queues; u8 max_sp; diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index a9aff6079c42..282e99bdb301 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -298,10 +298,13 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, buf_size = IEEE80211_MAX_AMPDU_BUF; /* make sure the size doesn't exceed the maximum supported by the hw */ - if (buf_size > local->hw.max_rx_aggregation_subframes) - buf_size = local->hw.max_rx_aggregation_subframes; + if (buf_size > sta->sta.max_rx_aggregation_subframes) + buf_size = sta->sta.max_rx_aggregation_subframes; params.buf_size = buf_size; + ht_dbg(sta->sdata, "AddBA Req buf_size=%d for %pM\n", + buf_size, sta->sta.addr); + /* examine state machine */ mutex_lock(&sta->ampdu_mlme.mtx); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 19f14c907d74..5e70fa52e1ff 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -340,6 +340,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, memcpy(sta->addr, addr, ETH_ALEN); memcpy(sta->sta.addr, addr, ETH_ALEN); + sta->sta.max_rx_aggregation_subframes = + local->hw.max_rx_aggregation_subframes; + sta->local = local; sta->sdata = sdata; sta->rx_stats.last_rx = jiffies; -- cgit v1.2.3 From 99ee7cae3bf3ce04e90d7b193d9f4f59a7044d91 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Aug 2016 23:25:17 +0300 Subject: mac80211: add support for radiotap timestamp field Use the existing device timestamp from the RX status information to add support for the new radiotap timestamp field. Currently only 32-bit counters are supported, but we also add the radiotap mactime where applicable. This new field allows more flexibility in where the timestamp is taken etc. The non-timestamp data in the field is taken from a new field in the hw struct. Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 21 +++++++++++++++++++++ include/net/mac80211.h | 12 ++++++++++++ net/mac80211/main.c | 3 +++ net/mac80211/rx.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+) (limited to 'include/net') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index b0fd9476c538..ba07b9d8ed63 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -190,6 +190,10 @@ struct ieee80211_radiotap_header { * IEEE80211_RADIOTAP_VHT u16, u8, u8, u8[4], u8, u8, u16 * * Contains VHT information about this frame. + * + * IEEE80211_RADIOTAP_TIMESTAMP u64, u16, u8, u8 variable + * + * Contains timestamp information for this frame. */ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_TSFT = 0, @@ -214,6 +218,7 @@ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_MCS = 19, IEEE80211_RADIOTAP_AMPDU_STATUS = 20, IEEE80211_RADIOTAP_VHT = 21, + IEEE80211_RADIOTAP_TIMESTAMP = 22, /* valid in every it_present bitmap, even vendor namespaces */ IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29, @@ -321,6 +326,22 @@ enum ieee80211_radiotap_type { #define IEEE80211_RADIOTAP_CODING_LDPC_USER2 0x04 #define IEEE80211_RADIOTAP_CODING_LDPC_USER3 0x08 +/* For IEEE80211_RADIOTAP_TIMESTAMP */ +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MASK 0x000F +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MS 0x0000 +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US 0x0001 +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS 0x0003 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK 0x00F0 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU 0x0000 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0010 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU 0x0020 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0030 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN 0x00F0 + +#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT 0x00 +#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT 0x01 +#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY 0x02 + /* helpers */ static inline int ieee80211_get_radiotap_len(unsigned char *data) { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a1457ca2a30c..08bac23c8de1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2145,6 +2145,14 @@ enum ieee80211_hw_flags { * the default is _GI | _BANDWIDTH. * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values. * + * @radiotap_timestamp: Information for the radiotap timestamp field; if the + * 'units_pos' member is set to a non-negative value it must be set to + * a combination of a IEEE80211_RADIOTAP_TIMESTAMP_UNIT_* and a + * IEEE80211_RADIOTAP_TIMESTAMP_SPOS_* value, and then the timestamp + * field will be added and populated from the &struct ieee80211_rx_status + * device_timestamp. If the 'accuracy' member is non-negative, it's put + * into the accuracy radiotap field and the accuracy known flag is set. + * * @netdev_features: netdev features to be set in each netdev created * from this HW. Note that not all features are usable with mac80211, * other features will be rejected during HW registration. @@ -2188,6 +2196,10 @@ struct ieee80211_hw { u8 offchannel_tx_hw_queue; u8 radiotap_mcs_details; u16 radiotap_vht_details; + struct { + int units_pos; + s16 accuracy; + } radiotap_timestamp; netdev_features_t netdev_features; u8 uapsd_queues; u8 uapsd_max_sp_len; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index d00ea9b13f49..ac053a9df36d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -660,6 +660,9 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, ieee80211_roc_setup(local); + local->hw.radiotap_timestamp.units_pos = -1; + local->hw.radiotap_timestamp.accuracy = -1; + return &local->hw; err_free: wiphy_free(wiphy); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6a265aa73a46..284f0f25e22e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -180,6 +180,11 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, len += 12; } + if (local->hw.radiotap_timestamp.units_pos >= 0) { + len = ALIGN(len, 8); + len += 12; + } + if (status->chains) { /* antenna and antenna signal fields */ len += 2 * hweight8(status->chains); @@ -447,6 +452,31 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos += 2; } + if (local->hw.radiotap_timestamp.units_pos >= 0) { + u16 accuracy = 0; + u8 flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT; + + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_TIMESTAMP); + + /* ensure 8 byte alignment */ + while ((pos - (u8 *)rthdr) & 7) + pos++; + + put_unaligned_le64(status->device_timestamp, pos); + pos += sizeof(u64); + + if (local->hw.radiotap_timestamp.accuracy >= 0) { + accuracy = local->hw.radiotap_timestamp.accuracy; + flags |= IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY; + } + put_unaligned_le16(accuracy, pos); + pos += sizeof(u16); + + *pos++ = local->hw.radiotap_timestamp.units_pos; + *pos++ = flags; + } + for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) { *pos++ = status->chain_signal[chain]; *pos++ = chain; -- cgit v1.2.3 From beac5afa2d78605b70f40cf5ab5601ab10659c7f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:49 +0200 Subject: netfilter: nf_tables: ensure proper initialization of nft_pktinfo fields This patch introduces nft_set_pktinfo_unspec() that ensures proper initialization all of pktinfo fields for non-IP traffic. This is used by the bridge, netdev and arp families. This new function relies on nft_set_pktinfo_proto_unspec() to set a new tprot_set field that indicates if transport protocol information is available. Remain fields are zeroed. The meta expression has been also updated to check to tprot_set in first place given that zero is a valid tprot value. Even a handcrafted packet may come with the IPPROTO_RAW (255) protocol number so we can't rely on this value as tprot unset. Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 18 ++++++++++++++++++ include/net/netfilter/nf_tables_ipv4.h | 1 + include/net/netfilter/nf_tables_ipv6.h | 1 + net/bridge/netfilter/nf_tables_bridge.c | 6 +++--- net/ipv4/netfilter/nf_tables_arp.c | 2 +- net/netfilter/nf_tables_netdev.c | 4 +++- net/netfilter/nft_meta.c | 2 ++ 7 files changed, 29 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8972468bc94b..a7a7cebc8d07 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -19,6 +19,7 @@ struct nft_pktinfo { const struct net_device *out; u8 pf; u8 hook; + bool tprot_set; u8 tprot; /* for x_tables compatibility */ struct xt_action_param xt; @@ -36,6 +37,23 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, pkt->pf = pkt->xt.family = state->pf; } +static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, + struct sk_buff *skb) +{ + pkt->tprot_set = false; + pkt->tprot = 0; + pkt->xt.thoff = 0; + pkt->xt.fragoff = 0; +} + +static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + nft_set_pktinfo(pkt, skb, state); + nft_set_pktinfo_proto_unspec(pkt, skb); +} + /** * struct nft_verdict - nf_tables verdict * diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index ca6ef6bf775e..af952f7843ee 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -14,6 +14,7 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, nft_set_pktinfo(pkt, skb, state); ip = ip_hdr(pkt->skb); + pkt->tprot_set = true; pkt->tprot = ip->protocol; pkt->xt.thoff = ip_hdrlen(pkt->skb); pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 8ad39a6a5fe1..6aeee47b1b5e 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -19,6 +19,7 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, if (protohdr < 0) return -1; + pkt->tprot_set = true; pkt->tprot = protohdr; pkt->xt.thoff = thoff; pkt->xt.fragoff = frag_off; diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index a78c4e2826e5..29899887163e 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -71,7 +71,7 @@ static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt, if (nft_bridge_iphdr_validate(skb)) nft_set_pktinfo_ipv4(pkt, skb, state); else - nft_set_pktinfo(pkt, skb, state); + nft_set_pktinfo_unspec(pkt, skb, state); } static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, @@ -83,7 +83,7 @@ static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, nft_set_pktinfo_ipv6(pkt, skb, state) == 0) return; #endif - nft_set_pktinfo(pkt, skb, state); + nft_set_pktinfo_unspec(pkt, skb, state); } static unsigned int @@ -101,7 +101,7 @@ nft_do_chain_bridge(void *priv, nft_bridge_set_pktinfo_ipv6(&pkt, skb, state); break; default: - nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb, state); break; } diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index cd84d4295a20..058c034be376 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -21,7 +21,7 @@ nft_do_chain_arp(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb, state); return nft_do_chain(&pkt, priv); } diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 5eefe4a355c6..8de502b0c37b 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -41,6 +41,7 @@ nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, else if (len < thoff) return; + pkt->tprot_set = true; pkt->tprot = iph->protocol; pkt->xt.thoff = thoff; pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET; @@ -74,6 +75,7 @@ __nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, if (protohdr < 0) return; + pkt->tprot_set = true; pkt->tprot = protohdr; pkt->xt.thoff = thoff; pkt->xt.fragoff = frag_off; @@ -102,7 +104,7 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb, nft_netdev_set_pktinfo_ipv6(&pkt, skb, state); break; default: - nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb, state); break; } diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 2863f3493038..14264edf2d77 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -52,6 +52,8 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = pkt->pf; break; case NFT_META_L4PROTO: + if (!pkt->tprot_set) + goto err; *dest = pkt->tprot; break; case NFT_META_PRIORITY: -- cgit v1.2.3 From 8df9e32e7ed2978f90cce780ce6a27513044158a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:50 +0200 Subject: netfilter: nf_tables_ipv6: setup pktinfo transport field on failure to parse Make sure the pktinfo protocol fields are initialized if this fails to parse the transport header. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv6.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 6aeee47b1b5e..1e0ffd5aea47 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -15,9 +15,10 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, nft_set_pktinfo(pkt, skb, state); protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); - /* If malformed, drop it */ - if (protohdr < 0) + if (protohdr < 0) { + nft_set_pktinfo_proto_unspec(pkt, skb); return -1; + } pkt->tprot_set = true; pkt->tprot = protohdr; -- cgit v1.2.3 From ddc8b6027ad08d145a6d7a6a6abc00e43f315bd1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:51 +0200 Subject: netfilter: introduce nft_set_pktinfo_{ipv4, ipv6}_validate() These functions are extracted from the netdev family, they initialize the pktinfo structure and validate that the IPv4 and IPv6 headers are well-formed given that these functions are called from a path where layer 3 sanitization did not happen yet. These functions are placed in include/net/netfilter/nf_tables_ipv{4,6}.h so they can be reused by a follow up patch to use them from the bridge family too. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv4.h | 42 ++++++++++++++++++ include/net/netfilter/nf_tables_ipv6.h | 49 +++++++++++++++++++++ net/netfilter/nf_tables_netdev.c | 79 +--------------------------------- 3 files changed, 93 insertions(+), 77 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index af952f7843ee..968f00b82fb5 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -20,6 +20,48 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; } +static inline int +__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct iphdr *iph, _iph; + u32 len, thoff; + + iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph), + &_iph); + if (!iph) + return -1; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return -1; + + len = ntohs(iph->tot_len); + thoff = iph->ihl * 4; + if (skb->len < len) + return -1; + else if (len < thoff) + return -1; + + pkt->tprot_set = true; + pkt->tprot = iph->protocol; + pkt->xt.thoff = thoff; + pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET; + + return 0; +} + +static inline void +nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + nft_set_pktinfo(pkt, skb, state); + if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0) + nft_set_pktinfo_proto_unspec(pkt, skb); +} + extern struct nft_af_info nft_af_ipv4; #endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 1e0ffd5aea47..39b7b717b540 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -28,6 +28,55 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, return 0; } +static inline int +__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *ip6h, _ip6h; + unsigned int thoff = 0; + unsigned short frag_off; + int protohdr; + u32 pkt_len; + + ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h), + &_ip6h); + if (!ip6h) + return -1; + + if (ip6h->version != 6) + return -1; + + pkt_len = ntohs(ip6h->payload_len); + if (pkt_len + sizeof(*ip6h) > skb->len) + return -1; + + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + if (protohdr < 0) + return -1; + + pkt->tprot_set = true; + pkt->tprot = protohdr; + pkt->xt.thoff = thoff; + pkt->xt.fragoff = frag_off; + + return 0; +#else + return -1; +#endif +} + +static inline void +nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + nft_set_pktinfo(pkt, skb, state); + if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0) + nft_set_pktinfo_proto_unspec(pkt, skb); +} + extern struct nft_af_info nft_af_ipv6; #endif diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 8de502b0c37b..3e5475a833a5 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -15,81 +15,6 @@ #include #include -static inline void -nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct iphdr *iph, _iph; - u32 len, thoff; - - nft_set_pktinfo(pkt, skb, state); - - iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph), - &_iph); - if (!iph) - return; - - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) - return; - - len = ntohs(iph->tot_len); - thoff = iph->ihl * 4; - if (skb->len < len) - return; - else if (len < thoff) - return; - - pkt->tprot_set = true; - pkt->tprot = iph->protocol; - pkt->xt.thoff = thoff; - pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET; -} - -static inline void -__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ -#if IS_ENABLED(CONFIG_IPV6) - struct ipv6hdr *ip6h, _ip6h; - unsigned int thoff = 0; - unsigned short frag_off; - int protohdr; - u32 pkt_len; - - ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h), - &_ip6h); - if (!ip6h) - return; - - if (ip6h->version != 6) - return; - - pkt_len = ntohs(ip6h->payload_len); - if (pkt_len + sizeof(*ip6h) > skb->len) - return; - - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); - if (protohdr < 0) - return; - - pkt->tprot_set = true; - pkt->tprot = protohdr; - pkt->xt.thoff = thoff; - pkt->xt.fragoff = frag_off; -#endif -} - -static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - nft_set_pktinfo(pkt, skb, state); - __nft_netdev_set_pktinfo_ipv6(pkt, skb, state); -} - static unsigned int nft_do_chain_netdev(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -98,10 +23,10 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb, switch (skb->protocol) { case htons(ETH_P_IP): - nft_netdev_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo_ipv4_validate(&pkt, skb, state); break; case htons(ETH_P_IPV6): - nft_netdev_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo_ipv6_validate(&pkt, skb, state); break; default: nft_set_pktinfo_unspec(&pkt, skb, state); -- cgit v1.2.3 From 10151d7b03e23afce76a59f717f2616a10ddef86 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:52 +0200 Subject: netfilter: nf_tables_bridge: use nft_set_pktinfo_ipv{4, 6}_validate Consolidate pktinfo setup and validation by using the new generic functions so we converge to the netdev family codebase. We only need a linear IPv4 and IPv6 header from the reject expression, so move nft_bridge_iphdr_validate() and nft_bridge_ip6hdr_validate() to net/bridge/netfilter/nft_reject_bridge.c. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_bridge.h | 7 ---- net/bridge/netfilter/nf_tables_bridge.c | 72 +------------------------------- net/bridge/netfilter/nft_reject_bridge.c | 44 ++++++++++++++++++- 3 files changed, 45 insertions(+), 78 deletions(-) delete mode 100644 include/net/netfilter/nf_tables_bridge.h (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_bridge.h b/include/net/netfilter/nf_tables_bridge.h deleted file mode 100644 index 511fb79f6dad..000000000000 --- a/include/net/netfilter/nf_tables_bridge.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _NET_NF_TABLES_BRIDGE_H -#define _NET_NF_TABLES_BRIDGE_H - -int nft_bridge_iphdr_validate(struct sk_buff *skb); -int nft_bridge_ip6hdr_validate(struct sk_buff *skb); - -#endif /* _NET_NF_TABLES_BRIDGE_H */ diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 29899887163e..06f0f81456a0 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -13,79 +13,11 @@ #include #include #include -#include #include #include #include #include -int nft_bridge_iphdr_validate(struct sk_buff *skb) -{ - struct iphdr *iph; - u32 len; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return 0; - - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) - return 0; - - len = ntohs(iph->tot_len); - if (skb->len < len) - return 0; - else if (len < (iph->ihl*4)) - return 0; - - if (!pskb_may_pull(skb, iph->ihl*4)) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nft_bridge_iphdr_validate); - -int nft_bridge_ip6hdr_validate(struct sk_buff *skb) -{ - struct ipv6hdr *hdr; - u32 pkt_len; - - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - return 0; - - hdr = ipv6_hdr(skb); - if (hdr->version != 6) - return 0; - - pkt_len = ntohs(hdr->payload_len); - if (pkt_len + sizeof(struct ipv6hdr) > skb->len) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate); - -static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - if (nft_bridge_iphdr_validate(skb)) - nft_set_pktinfo_ipv4(pkt, skb, state); - else - nft_set_pktinfo_unspec(pkt, skb, state); -} - -static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (nft_bridge_ip6hdr_validate(skb) && - nft_set_pktinfo_ipv6(pkt, skb, state) == 0) - return; -#endif - nft_set_pktinfo_unspec(pkt, skb, state); -} - static unsigned int nft_do_chain_bridge(void *priv, struct sk_buff *skb, @@ -95,10 +27,10 @@ nft_do_chain_bridge(void *priv, switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): - nft_bridge_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo_ipv4_validate(&pkt, skb, state); break; case htons(ETH_P_IPV6): - nft_bridge_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo_ipv6_validate(&pkt, skb, state); break; default: nft_set_pktinfo_unspec(&pkt, skb, state); diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 0b77ffbc27d6..4b3df6b0e3b9 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -37,6 +36,30 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, skb_pull(nskb, ETH_HLEN); } +static int nft_bridge_iphdr_validate(struct sk_buff *skb) +{ + struct iphdr *iph; + u32 len; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return 0; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return 0; + + len = ntohs(iph->tot_len); + if (skb->len < len) + return 0; + else if (len < (iph->ihl*4)) + return 0; + + if (!pskb_may_pull(skb, iph->ihl*4)) + return 0; + + return 1; +} + /* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) * or the bridge port (NF_BRIDGE PREROUTING). */ @@ -143,6 +166,25 @@ static void nft_reject_br_send_v4_unreach(struct net *net, br_forward(br_port_get_rcu(dev), nskb, false, true); } +static int nft_bridge_ip6hdr_validate(struct sk_buff *skb) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + return 0; + + hdr = ipv6_hdr(skb); + if (hdr->version != 6) + return 0; + + pkt_len = ntohs(hdr->payload_len); + if (pkt_len + sizeof(struct ipv6hdr) > skb->len) + return 0; + + return 1; +} + static void nft_reject_br_send_v6_tcp_reset(struct net *net, struct sk_buff *oldskb, const struct net_device *dev, -- cgit v1.2.3 From 71212c9b04eba76faa4dca26ccd1552d6bb300c1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 12:42:53 +0200 Subject: netfilter: nf_tables: don't drop IPv6 packets that cannot parse transport This is overly conservative and not flexible at all, so better let them go through and let the filtering policy decide what to do with them. We use skb_header_pointer() all over the place so we would just fail to match when trying to access fields from malformed traffic. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv6.h | 6 ++---- net/ipv6/netfilter/nf_tables_ipv6.c | 4 +--- net/ipv6/netfilter/nft_chain_route_ipv6.c | 4 +--- 3 files changed, 4 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 39b7b717b540..d150b5066201 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -4,7 +4,7 @@ #include #include -static inline int +static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, struct sk_buff *skb, const struct nf_hook_state *state) @@ -17,15 +17,13 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); if (protohdr < 0) { nft_set_pktinfo_proto_unspec(pkt, skb); - return -1; + return; } pkt->tprot_set = true; pkt->tprot = protohdr; pkt->xt.thoff = thoff; pkt->xt.fragoff = frag_off; - - return 0; } static inline int diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 30b22f4dff55..05d05926962a 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -22,9 +22,7 @@ static unsigned int nft_do_chain_ipv6(void *priv, { struct nft_pktinfo pkt; - /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) - return NF_DROP; + nft_set_pktinfo_ipv6(&pkt, skb, state); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 71d995ff3108..01eb0f658366 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -32,9 +32,7 @@ static unsigned int nf_route_table_hook(void *priv, u_int8_t hop_limit; u32 mark, flowlabel; - /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) - return NF_DROP; + nft_set_pktinfo_ipv6(&pkt, skb, state); /* save source/dest address, mark, hoplimit, flowlabel, priority */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); -- cgit v1.2.3 From 6bd14303a908833e10ac731a3308eba938305269 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 11 Sep 2016 22:05:27 +0800 Subject: netfilter: nf_queue: get rid of dependency on IP6_NF_IPTABLES hash_v6 is used by both nftables and ip6tables, so depend on IP6_NF_IPTABLES is not properly. Actually, it only parses ipv6hdr and computes a hash value, so even if IPV6 is disabled, there's no side effect too, remove it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 0dbce55437f2..cc8a11f7e306 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -54,7 +54,6 @@ static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval) (__force u32)iph->saddr, iph->protocol, jhash_initval); } -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -77,7 +76,6 @@ static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) return jhash_3words(a, b, c, jhash_initval); } -#endif static inline u32 nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, @@ -85,10 +83,8 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, { if (family == NFPROTO_IPV4) queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32; -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) else if (family == NFPROTO_IPV6) queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32; -#endif return queue; } -- cgit v1.2.3 From c7e9dbcf09bddd01568113103d62423d8894eabd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 10:03:00 +0200 Subject: mac80211: remove sta_remove_debugfs driver callback No drivers implement this, relying either on the recursive directory removal to remove their debugfs, or not having any to start with. Remove the dead driver callback. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 11 ++--------- net/mac80211/debugfs_sta.c | 4 ---- net/mac80211/driver-ops.h | 15 --------------- 3 files changed, 2 insertions(+), 28 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 08bac23c8de1..d9c8ccd6b4e6 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3101,11 +3101,8 @@ enum ieee80211_reconfig_type { * * @sta_add_debugfs: Drivers can use this callback to add debugfs files * when a station is added to mac80211's station list. This callback - * and @sta_remove_debugfs should be within a CONFIG_MAC80211_DEBUGFS - * conditional. This callback can sleep. - * - * @sta_remove_debugfs: Remove the debugfs files which were added using - * @sta_add_debugfs. This callback can sleep. + * should be within a CONFIG_MAC80211_DEBUGFS conditional. This + * callback can sleep. * * @sta_notify: Notifies low level driver about power state transition of an * associated station, AP, IBSS/WDS/mesh peer etc. For a VIF operating @@ -3501,10 +3498,6 @@ struct ieee80211_ops { struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct dentry *dir); - void (*sta_remove_debugfs)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - struct dentry *dir); #endif void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index fb2693582e40..a2fcdb47a0e6 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -544,10 +544,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) void ieee80211_sta_debugfs_remove(struct sta_info *sta) { - struct ieee80211_local *local = sta->local; - struct ieee80211_sub_if_data *sdata = sta->sdata; - - drv_sta_remove_debugfs(local, sdata, &sta->sta, sta->debugfs_dir); debugfs_remove_recursive(sta->debugfs_dir); sta->debugfs_dir = NULL; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index c39f93b48791..fe35a1c0dc86 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -499,21 +499,6 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local, local->ops->sta_add_debugfs(&local->hw, &sdata->vif, sta, dir); } - -static inline void drv_sta_remove_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, - struct dentry *dir) -{ - might_sleep(); - - sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); - - if (local->ops->sta_remove_debugfs) - local->ops->sta_remove_debugfs(&local->hw, &sdata->vif, - sta, dir); -} #endif static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, -- cgit v1.2.3 From e8a24cd4b87247beedb1addc7b683422092047e5 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 14 Sep 2016 12:48:32 +0530 Subject: mac80211: allow driver to handle packet-loss mechanism Based on consecutive msdu failures, mac80211 triggers CQM packet-loss mechanism. Drivers like ath10k that have its own connection monitoring algorithm, offloaded to firmware for triggering station kickout. In case of station kickout, driver will report low ack status by mac80211 API (ieee80211_report_low_ack). This flag will enable the driver to completely rely on firmware events for station kickout and bypass mac80211 packet loss mechanism. Signed-off-by: Rajkumar Manoharan Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ net/mac80211/debugfs.c | 1 + net/mac80211/status.c | 6 ++++++ 3 files changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d9c8ccd6b4e6..5296100f3889 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2018,6 +2018,11 @@ struct ieee80211_txq { * @IEEE80211_HW_TX_FRAG_LIST: Hardware (or driver) supports sending frag_list * skbs, needed for zero-copy software A-MSDU. * + * @IEEE80211_HW_REPORTS_LOW_ACK: The driver (or firmware) reports low ack event + * by ieee80211_report_low_ack() based on its own algorithm. For such + * drivers, mac80211 packet loss mechanism will not be triggered and driver + * is completely depending on firmware event for station kickout. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2058,6 +2063,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_USES_RSS, IEEE80211_HW_TX_AMSDU, IEEE80211_HW_TX_FRAG_LIST, + IEEE80211_HW_REPORTS_LOW_ACK, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 5bbb470f335f..8ca62b6bb02a 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -201,6 +201,7 @@ static const char *hw_flag_names[] = { FLAG(USES_RSS), FLAG(TX_AMSDU), FLAG(TX_FRAG_LIST), + FLAG(REPORTS_LOW_ACK), #undef FLAG }; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index fabd9ff710d9..ea39f8a7baf3 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -557,6 +557,12 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, static void ieee80211_lost_packet(struct sta_info *sta, struct ieee80211_tx_info *info) { + /* If driver relies on its own algorithm for station kickout, skip + * mac80211 packet loss mechanism. + */ + if (ieee80211_hw_check(&sta->local->hw, REPORTS_LOW_ACK)) + return; + /* This packet was aggregated but doesn't carry status info */ if ((info->flags & IEEE80211_TX_CTL_AMPDU) && !(info->flags & IEEE80211_TX_STAT_AMPDU)) -- cgit v1.2.3 From 86da71b57383d40993cb90baafb3735cffe5d800 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 12 Sep 2016 20:13:09 -0400 Subject: net_sched: Introduce skbmod action This action is intended to be an upgrade from a usability perspective from pedit (as well as operational debugability). Compare this: sudo tc filter add dev $ETH parent 1: protocol ip prio 10 \ u32 match ip protocol 1 0xff flowid 1:2 \ action pedit munge offset -14 u8 set 0x02 \ munge offset -13 u8 set 0x15 \ munge offset -12 u8 set 0x15 \ munge offset -11 u8 set 0x15 \ munge offset -10 u16 set 0x1515 \ pipe to: sudo tc filter add dev $ETH parent 1: protocol ip prio 10 \ u32 match ip protocol 1 0xff flowid 1:2 \ action skbmod dmac 02:15:15:15:15:15 Also try to do a MAC address swap with pedit or worse try to debug a policy with destination mac, source mac and etherype. Then make few rules out of those and you'll get my point. In the future common use cases on pedit can be migrated to this action (as an example different fields in ip v4/6, transports like tcp/udp/sctp etc). For this first cut, this allows modifying basic ethernet header. The most important ethernet use case at the moment is when redirecting or mirroring packets to a remote machine. The dst mac address needs a re-write so that it doesnt get dropped or confuse an interconnecting (learning) switch or dropped by a target machine (which looks at the dst mac). And at times when flipping back the packet a swap of the MAC addresses is needed. Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_skbmod.h | 30 ++++ include/uapi/linux/tc_act/tc_skbmod.h | 39 +++++ net/sched/Kconfig | 11 ++ net/sched/Makefile | 1 + net/sched/act_skbmod.c | 301 ++++++++++++++++++++++++++++++++++ 5 files changed, 382 insertions(+) create mode 100644 include/net/tc_act/tc_skbmod.h create mode 100644 include/uapi/linux/tc_act/tc_skbmod.h create mode 100644 net/sched/act_skbmod.c (limited to 'include/net') diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h new file mode 100644 index 000000000000..644a2116b47b --- /dev/null +++ b/include/net/tc_act/tc_skbmod.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016, Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#ifndef __NET_TC_SKBMOD_H +#define __NET_TC_SKBMOD_H + +#include +#include + +struct tcf_skbmod_params { + struct rcu_head rcu; + u64 flags; /*up to 64 types of operations; extend if needed */ + u8 eth_dst[ETH_ALEN]; + u16 eth_type; + u8 eth_src[ETH_ALEN]; +}; + +struct tcf_skbmod { + struct tc_action common; + struct tcf_skbmod_params __rcu *skbmod_p; +}; +#define to_skbmod(a) ((struct tcf_skbmod *)a) + +#endif /* __NET_TC_SKBMOD_H */ diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h new file mode 100644 index 000000000000..10fc07da6c69 --- /dev/null +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016, Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#ifndef __LINUX_TC_SKBMOD_H +#define __LINUX_TC_SKBMOD_H + +#include + +#define TCA_ACT_SKBMOD 15 + +#define SKBMOD_F_DMAC 0x1 +#define SKBMOD_F_SMAC 0x2 +#define SKBMOD_F_ETYPE 0x4 +#define SKBMOD_F_SWAPMAC 0x8 + +struct tc_skbmod { + tc_gen; + __u64 flags; +}; + +enum { + TCA_SKBMOD_UNSPEC, + TCA_SKBMOD_TM, + TCA_SKBMOD_PARMS, + TCA_SKBMOD_DMAC, + TCA_SKBMOD_SMAC, + TCA_SKBMOD_ETYPE, + TCA_SKBMOD_PAD, + __TCA_SKBMOD_MAX +}; +#define TCA_SKBMOD_MAX (__TCA_SKBMOD_MAX - 1) + +#endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 72e3426fa48f..7795d5a3f79a 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -749,6 +749,17 @@ config NET_ACT_CONNMARK To compile this code as a module, choose M here: the module will be called act_connmark. +config NET_ACT_SKBMOD + tristate "skb data modification action" + depends on NET_CLS_ACT + ---help--- + Say Y here to allow modification of skb data + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_skbmod. + config NET_ACT_IFE tristate "Inter-FE action based on IETF ForCES InterFE LFB" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index b9d046b9535a..148ae0d5ac2c 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o +obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c new file mode 100644 index 000000000000..e7d96381c908 --- /dev/null +++ b/net/sched/act_skbmod.c @@ -0,0 +1,301 @@ +/* + * net/sched/act_skbmod.c skb data modifier + * + * Copyright (c) 2016 Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define SKBMOD_TAB_MASK 15 + +static int skbmod_net_id; +static struct tc_action_ops act_skbmod_ops; + +#define MAX_EDIT_LEN ETH_HLEN +static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_skbmod *d = to_skbmod(a); + int action; + struct tcf_skbmod_params *p; + u64 flags; + int err; + + tcf_lastuse_update(&d->tcf_tm); + bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); + + /* XXX: if you are going to edit more fields beyond ethernet header + * (example when you add IP header replacement or vlan swap) + * then MAX_EDIT_LEN needs to change appropriately + */ + err = skb_ensure_writable(skb, MAX_EDIT_LEN); + if (unlikely(err)) { /* best policy is to drop on the floor */ + qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); + return TC_ACT_SHOT; + } + + rcu_read_lock(); + action = READ_ONCE(d->tcf_action); + if (unlikely(action == TC_ACT_SHOT)) { + qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); + rcu_read_unlock(); + return action; + } + + p = rcu_dereference(d->skbmod_p); + flags = p->flags; + if (flags & SKBMOD_F_DMAC) + ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst); + if (flags & SKBMOD_F_SMAC) + ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src); + if (flags & SKBMOD_F_ETYPE) + eth_hdr(skb)->h_proto = p->eth_type; + rcu_read_unlock(); + + if (flags & SKBMOD_F_SWAPMAC) { + u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ + /*XXX: I am sure we can come up with more efficient swapping*/ + ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest); + ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); + ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr); + } + + return action; +} + +static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { + [TCA_SKBMOD_PARMS] = { .len = sizeof(struct tc_skbmod) }, + [TCA_SKBMOD_DMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_SMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_ETYPE] = { .type = NLA_U16 }, +}; + +static int tcf_skbmod_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct nlattr *tb[TCA_SKBMOD_MAX + 1]; + struct tcf_skbmod_params *p, *p_old; + struct tc_skbmod *parm; + struct tcf_skbmod *d; + bool exists = false; + u8 *daddr = NULL; + u8 *saddr = NULL; + u16 eth_type = 0; + u32 lflags = 0; + int ret = 0, err; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy); + if (err < 0) + return err; + + if (!tb[TCA_SKBMOD_PARMS]) + return -EINVAL; + + if (tb[TCA_SKBMOD_DMAC]) { + daddr = nla_data(tb[TCA_SKBMOD_DMAC]); + lflags |= SKBMOD_F_DMAC; + } + + if (tb[TCA_SKBMOD_SMAC]) { + saddr = nla_data(tb[TCA_SKBMOD_SMAC]); + lflags |= SKBMOD_F_SMAC; + } + + if (tb[TCA_SKBMOD_ETYPE]) { + eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]); + lflags |= SKBMOD_F_ETYPE; + } + + parm = nla_data(tb[TCA_SKBMOD_PARMS]); + if (parm->flags & SKBMOD_F_SWAPMAC) + lflags = SKBMOD_F_SWAPMAC; + + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (!lflags) + return -EINVAL; + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_skbmod_ops, bind, true); + if (ret) + return ret; + + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + + d = to_skbmod(*a); + + ASSERT_RTNL(); + p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); + if (unlikely(!p)) { + if (ovr) + tcf_hash_release(*a, bind); + return -ENOMEM; + } + + p->flags = lflags; + d->tcf_action = parm->action; + + p_old = rtnl_dereference(d->skbmod_p); + + if (ovr) + spin_lock_bh(&d->tcf_lock); + + if (lflags & SKBMOD_F_DMAC) + ether_addr_copy(p->eth_dst, daddr); + if (lflags & SKBMOD_F_SMAC) + ether_addr_copy(p->eth_src, saddr); + if (lflags & SKBMOD_F_ETYPE) + p->eth_type = htons(eth_type); + + rcu_assign_pointer(d->skbmod_p, p); + if (ovr) + spin_unlock_bh(&d->tcf_lock); + + if (p_old) + kfree_rcu(p_old, rcu); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + return ret; +} + +static void tcf_skbmod_cleanup(struct tc_action *a, int bind) +{ + struct tcf_skbmod *d = to_skbmod(a); + struct tcf_skbmod_params *p; + + p = rcu_dereference_protected(d->skbmod_p, 1); + kfree_rcu(p, rcu); +} + +static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + struct tcf_skbmod *d = to_skbmod(a); + unsigned char *b = skb_tail_pointer(skb); + struct tcf_skbmod_params *p = rtnl_dereference(d->skbmod_p); + struct tc_skbmod opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; + struct tcf_t t; + + opt.flags = p->flags; + if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_DMAC) && + nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_SMAC) && + nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_ETYPE) && + nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type))) + goto nla_put_failure; + + tcf_tm_dump(&t, &d->tcf_tm); + if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD)) + goto nla_put_failure; + + return skb->len; +nla_put_failure: + rcu_read_unlock(); + nlmsg_trim(skb, b); + return -1; +} + +static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_skbmod_ops = { + .kind = "skbmod", + .type = TCA_ACT_SKBMOD, + .owner = THIS_MODULE, + .act = tcf_skbmod_run, + .dump = tcf_skbmod_dump, + .init = tcf_skbmod_init, + .cleanup = tcf_skbmod_cleanup, + .walk = tcf_skbmod_walker, + .lookup = tcf_skbmod_search, + .size = sizeof(struct tcf_skbmod), +}; + +static __net_init int skbmod_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK); +} + +static void __net_exit skbmod_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations skbmod_net_ops = { + .init = skbmod_init_net, + .exit = skbmod_exit_net, + .id = &skbmod_net_id, + .size = sizeof(struct tc_action_net), +}; + +MODULE_AUTHOR("Jamal Hadi Salim, "); +MODULE_DESCRIPTION("SKB data mod-ing"); +MODULE_LICENSE("GPL"); + +static int __init skbmod_init_module(void) +{ + return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops); +} + +static void __exit skbmod_cleanup_module(void) +{ + tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops); +} + +module_init(skbmod_init_module); +module_exit(skbmod_cleanup_module); -- cgit v1.2.3 From cafdc45c949b9963cbfb8fe3a68d0ab16b0208ce Mon Sep 17 00:00:00 2001 From: John Crispin Date: Thu, 15 Sep 2016 16:26:40 +0200 Subject: net-next: dsa: add Qualcomm tag RX/TX handler Add support for the 2-bytes Qualcomm tag that gigabit switches such as the QCA8337/N might insert when receiving packets, or that we need to insert while targeting specific switch ports. The tag is inserted directly behind the ethernet header. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: John Crispin Signed-off-by: David S. Miller --- include/net/dsa.h | 1 + net/dsa/Kconfig | 3 ++ net/dsa/Makefile | 1 + net/dsa/dsa.c | 3 ++ net/dsa/dsa_priv.h | 2 + net/dsa/tag_qca.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 148 insertions(+) create mode 100644 net/dsa/tag_qca.c (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 9d97c5214341..7556646db2d3 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -26,6 +26,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_TRAILER, DSA_TAG_PROTO_EDSA, DSA_TAG_PROTO_BRCM, + DSA_TAG_PROTO_QCA, DSA_TAG_LAST, /* MUST BE LAST */ }; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index ff7736f7ff42..96e47c539bee 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -38,4 +38,7 @@ config NET_DSA_TAG_EDSA config NET_DSA_TAG_TRAILER bool +config NET_DSA_TAG_QCA + bool + endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 8af4ded70f1c..a3380ed0e0be 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -7,3 +7,4 @@ dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o +dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index d8d267e9a872..66e31acfcad8 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -53,6 +53,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { #endif #ifdef CONFIG_NET_DSA_TAG_BRCM [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops, +#endif +#ifdef CONFIG_NET_DSA_TAG_QCA + [DSA_TAG_PROTO_QCA] = &qca_netdev_ops, #endif [DSA_TAG_PROTO_NONE] = &none_ops, }; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 00077a9c97f4..6cfd7388834e 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -81,5 +81,7 @@ extern const struct dsa_device_ops trailer_netdev_ops; /* tag_brcm.c */ extern const struct dsa_device_ops brcm_netdev_ops; +/* tag_qca.c */ +extern const struct dsa_device_ops qca_netdev_ops; #endif diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c new file mode 100644 index 000000000000..0c90cacee7aa --- /dev/null +++ b/net/dsa/tag_qca.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include "dsa_priv.h" + +#define QCA_HDR_LEN 2 +#define QCA_HDR_VERSION 0x2 + +#define QCA_HDR_RECV_VERSION_MASK GENMASK(15, 14) +#define QCA_HDR_RECV_VERSION_S 14 +#define QCA_HDR_RECV_PRIORITY_MASK GENMASK(13, 11) +#define QCA_HDR_RECV_PRIORITY_S 11 +#define QCA_HDR_RECV_TYPE_MASK GENMASK(10, 6) +#define QCA_HDR_RECV_TYPE_S 6 +#define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3) +#define QCA_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) + +#define QCA_HDR_XMIT_VERSION_MASK GENMASK(15, 14) +#define QCA_HDR_XMIT_VERSION_S 14 +#define QCA_HDR_XMIT_PRIORITY_MASK GENMASK(13, 11) +#define QCA_HDR_XMIT_PRIORITY_S 11 +#define QCA_HDR_XMIT_CONTROL_MASK GENMASK(10, 8) +#define QCA_HDR_XMIT_CONTROL_S 8 +#define QCA_HDR_XMIT_FROM_CPU BIT(7) +#define QCA_HDR_XMIT_DP_BIT_MASK GENMASK(6, 0) + +static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u16 *phdr, hdr; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + if (skb_cow_head(skb, 0) < 0) + goto out_free; + + skb_push(skb, QCA_HDR_LEN); + + memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN); + phdr = (u16 *)(skb->data + 2 * ETH_ALEN); + + /* Set the version field, and set destination port information */ + hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S | + QCA_HDR_XMIT_FROM_CPU | + BIT(p->port); + + *phdr = htons(hdr); + + return skb; + +out_free: + kfree_skb(skb); + return NULL; +} + +static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; + u8 ver; + int port; + __be16 *phdr, hdr; + + if (unlikely(!dst)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + goto out; + + if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN))) + goto out_drop; + + /* The QCA header is added by the switch between src addr and Ethertype + * At this point, skb->data points to ethertype so header should be + * right before + */ + phdr = (__be16 *)(skb->data - 2); + hdr = ntohs(*phdr); + + /* Make sure the version is correct */ + ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S; + if (unlikely(ver != QCA_HDR_VERSION)) + goto out_drop; + + /* Remove QCA tag and recalculate checksum */ + skb_pull_rcsum(skb, QCA_HDR_LEN); + memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN, + ETH_HLEN - QCA_HDR_LEN); + + /* This protocol doesn't support cascading multiple switches so it's + * safe to assume the switch is first in the tree + */ + ds = dst->ds[0]; + if (!ds) + goto out_drop; + + /* Get source port information */ + port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); + if (!ds->ports[port].netdev) + goto out_drop; + + /* Update skb & forward the frame accordingly */ + skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; + skb->dev = ds->ports[port].netdev; + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +const struct dsa_device_ops qca_netdev_ops = { + .xmit = qca_tag_xmit, + .rcv = qca_tag_rcv, +}; -- cgit v1.2.3 From fbd05e4a6e82fd573d3aa79e284e424b8d78c149 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 15 Sep 2016 18:15:09 +0300 Subject: cfg80211: add helper to find an IE that matches a byte-array There are a few places where an IE that matches not only the EID, but also other bytes inside the element, needs to be found. To simplify that and reduce the amount of similar code, implement a new helper function to match the EID and an extra array of bytes. Additionally, simplify cfg80211_find_vendor_ie() by using the new match function. Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 33 +++++++++++++++++++++++++++- net/wireless/scan.c | 58 +++++++++++++++++++++++--------------------------- 2 files changed, 59 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d5e7f690bad9..533cb6410678 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3946,6 +3946,34 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, unsigned int cfg80211_classify8021d(struct sk_buff *skb, struct cfg80211_qos_map *qos_map); +/** + * cfg80211_find_ie_match - match information element and byte array in data + * + * @eid: element ID + * @ies: data consisting of IEs + * @len: length of data + * @match: byte array to match + * @match_len: number of bytes in the match array + * @match_offset: offset in the IE where the byte array should match. + * If match_len is zero, this must also be set to zero. + * Otherwise this must be set to 2 or more, because the first + * byte is the element id, which is already compared to eid, and + * the second byte is the IE length. + * + * Return: %NULL if the element ID could not be found or if + * the element is invalid (claims to be longer than the given + * data) or if the byte array doesn't match, or a pointer to the first + * byte of the requested element, that is the byte containing the + * element ID. + * + * Note: There are no checks on the element length other than + * having to fit into the given data and being large enough for the + * byte array to match. + */ +const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len, + const u8 *match, int match_len, + int match_offset); + /** * cfg80211_find_ie - find information element in data * @@ -3961,7 +3989,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, * Note: There are no checks on the element length other than * having to fit into the given data. */ -const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len); +static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) +{ + return cfg80211_find_ie_match(eid, ies, len, NULL, 0, 0); +} /** * cfg80211_find_vendor_ie - find vendor specific information element in data diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 0358e12be54b..b5bd58d0f731 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -352,52 +352,48 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *rdev) __cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE); } -const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) +const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len, + const u8 *match, int match_len, + int match_offset) { - while (len > 2 && ies[0] != eid) { + /* match_offset can't be smaller than 2, unless match_len is + * zero, in which case match_offset must be zero as well. + */ + if (WARN_ON((match_len && match_offset < 2) || + (!match_len && match_offset))) + return NULL; + + while (len >= 2 && len >= ies[1] + 2) { + if ((ies[0] == eid) && + (ies[1] + 2 >= match_offset + match_len) && + !memcmp(ies + match_offset, match, match_len)) + return ies; + len -= ies[1] + 2; ies += ies[1] + 2; } - if (len < 2) - return NULL; - if (len < 2 + ies[1]) - return NULL; - return ies; + + return NULL; } -EXPORT_SYMBOL(cfg80211_find_ie); +EXPORT_SYMBOL(cfg80211_find_ie_match); const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type, const u8 *ies, int len) { - struct ieee80211_vendor_ie *ie; - const u8 *pos = ies, *end = ies + len; - int ie_oui; + const u8 *ie; + u8 match[] = { oui >> 16, oui >> 8, oui, oui_type }; + int match_len = (oui_type < 0) ? 3 : sizeof(match); if (WARN_ON(oui_type > 0xff)) return NULL; - while (pos < end) { - pos = cfg80211_find_ie(WLAN_EID_VENDOR_SPECIFIC, pos, - end - pos); - if (!pos) - return NULL; - - ie = (struct ieee80211_vendor_ie *)pos; - - /* make sure we can access ie->len */ - BUILD_BUG_ON(offsetof(struct ieee80211_vendor_ie, len) != 1); + ie = cfg80211_find_ie_match(WLAN_EID_VENDOR_SPECIFIC, ies, len, + match, match_len, 2); - if (ie->len < sizeof(*ie)) - goto cont; + if (ie && (ie[1] < 4)) + return NULL; - ie_oui = ie->oui[0] << 16 | ie->oui[1] << 8 | ie->oui[2]; - if (ie_oui == oui && - (oui_type < 0 || ie->oui_type == oui_type)) - return pos; -cont: - pos += 2 + ie->len; - } - return NULL; + return ie; } EXPORT_SYMBOL(cfg80211_find_vendor_ie); -- cgit v1.2.3 From 19664c6a000956290cce84c6924b13488ab794d6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 15 Sep 2016 10:18:45 -0700 Subject: net: l3mdev: Remove netif_index_is_l3_master No longer used after e0d56fdd73422 ("net: l3mdev: remove redundant calls") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include/net') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 3832099289c5..b220dabeab45 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -114,25 +114,6 @@ static inline u32 l3mdev_fib_table(const struct net_device *dev) return tb_id; } -static inline bool netif_index_is_l3_master(struct net *net, int ifindex) -{ - struct net_device *dev; - bool rc = false; - - if (ifindex == 0) - return false; - - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev) - rc = netif_is_l3_master(dev); - - rcu_read_unlock(); - - return rc; -} - struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); static inline @@ -226,11 +207,6 @@ static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) return 0; } -static inline bool netif_index_is_l3_master(struct net *net, int ifindex) -{ - return false; -} - static inline struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { -- cgit v1.2.3 From cfc7381b3002756b1dcada32979e942aa3126e31 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 15 Sep 2016 13:00:29 -0700 Subject: ip_tunnel: add collect_md mode to IPIP tunnel Similar to gre, vxlan, geneve tunnels allow IPIP tunnels to operate in 'collect metadata' mode. bpf_skb_[gs]et_tunnel_key() helpers can make use of it right away. ovs can use it as well in the future (once appropriate ovs-vport abstractions and user apis are added). Note that just like in other tunnels we cannot cache the dst, since tunnel_info metadata can be different for every packet. Signed-off-by: Alexei Starovoitov Acked-by: Thomas Graf Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 2 ++ include/uapi/linux/if_tunnel.h | 1 + net/ipv4/ip_tunnel.c | 76 ++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ipip.c | 35 +++++++++++++++---- 4 files changed, 108 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index e598c639aa6f..59557c07904b 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -255,6 +255,8 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); +void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + const u8 proto); int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 9865c8caedde..18d5dc13985d 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -73,6 +73,7 @@ enum { IFLA_IPTUN_ENCAP_FLAGS, IFLA_IPTUN_ENCAP_SPORT, IFLA_IPTUN_ENCAP_DPORT, + IFLA_IPTUN_COLLECT_METADATA, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 95649ebd2874..5719d6ba0824 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -55,6 +55,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6) #include @@ -546,6 +547,81 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, return 0; } +void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + u32 headroom = sizeof(struct iphdr); + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + const struct iphdr *inner_iph; + struct rtable *rt; + struct flowi4 fl4; + __be16 df = 0; + u8 tos, ttl; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET)) + goto tx_error; + key = &tun_info->key; + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + tos = key->tos; + if (tos == 1) { + if (skb->protocol == htons(ETH_P_IP)) + tos = inner_iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); + } + init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0, + RT_TOS(tos), tunnel->parms.link); + if (tunnel->encap.type != TUNNEL_ENCAP_NONE) + goto tx_error; + rt = ip_route_output_key(tunnel->net, &fl4); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + if (rt->dst.dev == dev) { + ip_rt_put(rt); + dev->stats.collisions++; + goto tx_error; + } + tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); + ttl = key->ttl; + if (ttl == 0) { + if (skb->protocol == htons(ETH_P_IP)) + ttl = inner_iph->ttl; + else if (skb->protocol == htons(ETH_P_IPV6)) + ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; + else + ttl = ip4_dst_hoplimit(&rt->dst); + } + if (key->tun_flags & TUNNEL_DONT_FRAGMENT) + df = htons(IP_DF); + else if (skb->protocol == htons(ETH_P_IP)) + df = inner_iph->frag_off & htons(IP_DF); + headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; + if (headroom > dev->needed_headroom) + dev->needed_headroom = headroom; + + if (skb_cow_head(skb, dev->needed_headroom)) { + ip_rt_put(rt); + goto tx_dropped; + } + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos, + key->ttl, df, !net_eq(tunnel->net, dev_net(dev))); + return; +tx_error: + dev->stats.tx_errors++; + goto kfree; +tx_dropped: + dev->stats.tx_dropped++; +kfree: + kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit); + void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, u8 protocol) { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 4ae3f8e6c6cc..c9392589c415 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -115,6 +115,7 @@ #include #include #include +#include static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); @@ -193,6 +194,7 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) { struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); + struct metadata_dst *tun_dst = NULL; struct ip_tunnel *tunnel; const struct iphdr *iph; @@ -216,7 +218,12 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) tpi = &ipip_tpi; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; - return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error); + if (tunnel->collect_md) { + tun_dst = ip_tun_rx_dst(skb, 0, 0, 0); + if (!tun_dst) + return 0; + } + return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); } return -1; @@ -270,7 +277,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, skb_set_inner_ipproto(skb, ipproto); - ip_tunnel_xmit(skb, dev, tiph, ipproto); + if (tunnel->collect_md) + ip_md_tunnel_xmit(skb, dev, ipproto); + else + ip_tunnel_xmit(skb, dev, tiph, ipproto); return NETDEV_TX_OK; tx_error: @@ -380,13 +390,14 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) } static void ipip_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms) + struct ip_tunnel_parm *parms, bool *collect_md) { memset(parms, 0, sizeof(*parms)); parms->iph.version = 4; parms->iph.protocol = IPPROTO_IPIP; parms->iph.ihl = 5; + *collect_md = false; if (!data) return; @@ -414,6 +425,9 @@ static void ipip_netlink_parms(struct nlattr *data[], if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) parms->iph.frag_off = htons(IP_DF); + + if (data[IFLA_IPTUN_COLLECT_METADATA]) + *collect_md = true; } /* This function returns true when ENCAP attributes are present in the nl msg */ @@ -453,18 +467,18 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[], static int ipip_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; if (ipip_netlink_encap_parms(data, &ipencap)) { - struct ip_tunnel *t = netdev_priv(dev); int err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; } - ipip_netlink_parms(data, &p); + ipip_netlink_parms(data, &p, &t->collect_md); return ip_tunnel_newlink(dev, tb, &p); } @@ -473,6 +487,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], { struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; + bool collect_md; if (ipip_netlink_encap_parms(data, &ipencap)) { struct ip_tunnel *t = netdev_priv(dev); @@ -482,7 +497,9 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], return err; } - ipip_netlink_parms(data, &p); + ipip_netlink_parms(data, &p, &collect_md); + if (collect_md) + return -EINVAL; if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) || (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) @@ -516,6 +533,8 @@ static size_t ipip_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_IPTUN_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_IPTUN_COLLECT_METADATA */ + nla_total_size(0) + 0; } @@ -544,6 +563,9 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) tunnel->encap.flags)) goto nla_put_failure; + if (tunnel->collect_md) + if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) + goto nla_put_failure; return 0; nla_put_failure: @@ -562,6 +584,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static struct rtnl_link_ops ipip_link_ops __read_mostly = { -- cgit v1.2.3 From 8d79266bc48c6ab6477d04e159cabf1e7809cb72 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 15 Sep 2016 13:00:30 -0700 Subject: ip6_tunnel: add collect_md mode to IPv6 tunnels Similar to gre, vxlan, geneve tunnels allow IPIP6 and IP6IP6 tunnels to operate in 'collect metadata' mode. Unlike ipv4 code here it's possible to reuse ip6_tnl_xmit() function for both collect_md and traditional tunnels. bpf_skb_[gs]et_tunnel_key() helpers and ovs (in the future) are the users. Signed-off-by: Alexei Starovoitov Acked-by: Thomas Graf Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 1 + net/ipv6/ip6_tunnel.c | 178 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 134 insertions(+), 45 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 43a5a0e4524c..20ed9699fcd4 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -23,6 +23,7 @@ struct __ip6_tnl_parm { __u8 proto; /* tunnel protocol */ __u8 encap_limit; /* encapsulation limit for tunnel */ __u8 hop_limit; /* hop limit for tunnel */ + bool collect_md; __be32 flowinfo; /* traffic class and flowlabel for tunnel */ __u32 flags; /* tunnel flags */ struct in6_addr laddr; /* local tunnel end-point address */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5c5779720ef1..6a66adba0c22 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -57,6 +57,7 @@ #include #include #include +#include MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); @@ -90,6 +91,7 @@ struct ip6_tnl_net { struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE]; struct ip6_tnl __rcu *tnls_wc[1]; struct ip6_tnl __rcu **tnls[2]; + struct ip6_tnl __rcu *collect_md_tun; }; static struct net_device_stats *ip6_get_stats(struct net_device *dev) @@ -166,6 +168,10 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ return t; } + t = rcu_dereference(ip6n->collect_md_tun); + if (t) + return t; + t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; @@ -209,6 +215,8 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); + if (t->parms.collect_md) + rcu_assign_pointer(ip6n->collect_md_tun, t); rcu_assign_pointer(t->next , rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } @@ -224,6 +232,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) struct ip6_tnl __rcu **tp; struct ip6_tnl *iter; + if (t->parms.collect_md) + rcu_assign_pointer(ip6n->collect_md_tun, NULL); + for (tp = ip6_tnl_bucket(ip6n, &t->parms); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { @@ -829,6 +840,9 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); + if (tun_dst) + skb_dst_set(skb, (struct dst_entry *)tun_dst); + gro_cells_receive(&tunnel->gro_cells, skb); return 0; @@ -865,6 +879,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct metadata_dst *tun_dst = NULL; int ret = -1; rcu_read_lock(); @@ -881,7 +896,12 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, goto drop; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; - ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate, + if (t->parms.collect_md) { + tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); + if (!tun_dst) + return 0; + } + ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_error); } @@ -1012,8 +1032,16 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, int mtu; unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; unsigned int max_headroom = psh_hlen; + u8 hop_limit; int err = -1; + if (t->parms.collect_md) { + hop_limit = skb_tunnel_info(skb)->key.ttl; + goto route_lookup; + } else { + hop_limit = t->parms.hop_limit; + } + /* NBMA tunnel */ if (ipv6_addr_any(&t->parms.raddr)) { struct in6_addr *addr6; @@ -1043,6 +1071,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, goto tx_err_link_failure; if (!dst) { +route_lookup: dst = ip6_route_output(net, NULL, fl6); if (dst->error) @@ -1053,6 +1082,10 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, dst = NULL; goto tx_err_link_failure; } + if (t->parms.collect_md && + ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, + &fl6->daddr, 0, &fl6->saddr)) + goto tx_err_link_failure; ndst = dst; } @@ -1071,7 +1104,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, } if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (skb_dst(skb)) + if (skb_dst(skb) && !t->parms.collect_md) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { *pmtu = mtu; @@ -1111,8 +1144,13 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, skb = new_skb; } - if (!fl6->flowi6_mark && ndst) - dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); + if (t->parms.collect_md) { + if (t->encap.type != TUNNEL_ENCAP_NONE) + goto tx_err_dst_release; + } else { + if (!fl6->flowi6_mark && ndst) + dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); + } skb_dst_set(skb, dst); if (encap_limit >= 0) { @@ -1137,7 +1175,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, ipv6h = ipv6_hdr(skb); ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); - ipv6h->hop_limit = t->parms.hop_limit; + ipv6h->hop_limit = hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; @@ -1170,19 +1208,34 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (tproto != IPPROTO_IPIP && tproto != 0) return -1; - if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - encap_limit = t->parms.encap_limit; + dsfield = ipv4_get_dsfield(iph); - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; - dsfield = ipv4_get_dsfield(iph); + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -1; + key = &tun_info->key; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; + } else { + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) - & IPV6_TCLASS_MASK; - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) + & IPV6_TCLASS_MASK; + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + } if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1220,29 +1273,47 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) ip6_tnl_addr_conflict(t, ipv6h)) return -1; - offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); - if (offset > 0) { - struct ipv6_tlv_tnl_enc_lim *tel; - tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; - if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); + dsfield = ipv6_get_dsfield(ipv6h); + + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) return -1; + key = &tun_info->key; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; + } else { + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; + + tel = (void *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + encap_limit = tel->encap_limit - 1; + } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { + encap_limit = t->parms.encap_limit; } - encap_limit = tel->encap_limit - 1; - } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - encap_limit = t->parms.encap_limit; - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; - dsfield = ipv6_get_dsfield(ipv6h); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= ip6_flowlabel(ipv6h); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= ip6_flowlabel(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + } if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1741,6 +1812,10 @@ static int ip6_tnl_dev_init(struct net_device *dev) if (err) return err; ip6_tnl_link_config(t); + if (t->parms.collect_md) { + dev->features |= NETIF_F_NETNS_LOCAL; + netif_keep_dst(dev); + } return 0; } @@ -1811,6 +1886,9 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_PROTO]) parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + + if (data[IFLA_IPTUN_COLLECT_METADATA]) + parms->collect_md = true; } static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], @@ -1850,6 +1928,7 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct ip6_tnl *nt, *t; struct ip_tunnel_encap ipencap; @@ -1864,9 +1943,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, ip6_tnl_netlink_parms(data, &nt->parms); - t = ip6_tnl_locate(net, &nt->parms, 0); - if (!IS_ERR(t)) - return -EEXIST; + if (nt->parms.collect_md) { + if (rtnl_dereference(ip6n->collect_md_tun)) + return -EEXIST; + } else { + t = ip6_tnl_locate(net, &nt->parms, 0); + if (!IS_ERR(t)) + return -EEXIST; + } return ip6_tnl_create2(dev); } @@ -1890,6 +1974,8 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], return err; } ip6_tnl_netlink_parms(data, &p); + if (p.collect_md) + return -EINVAL; t = ip6_tnl_locate(net, &p, 0); if (!IS_ERR(t)) { @@ -1937,6 +2023,8 @@ static size_t ip6_tnl_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_IPTUN_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_IPTUN_COLLECT_METADATA */ + nla_total_size(0) + 0; } @@ -1955,16 +2043,15 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) goto nla_put_failure; - if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, - tunnel->encap.type) || - nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, - tunnel->encap.sport) || - nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, - tunnel->encap.dport) || - nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, - tunnel->encap.flags)) + if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || + nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) goto nla_put_failure; + if (parm->collect_md) + if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) + goto nla_put_failure; return 0; nla_put_failure: @@ -1992,6 +2079,7 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static struct rtnl_link_ops ip6_link_ops __read_mostly = { -- cgit v1.2.3 From b61c654f9b3f1a271217e46c893f80565b1f754d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:20 +0800 Subject: sctp: free msg->chunks when sctp_primitive_SEND return err Last patch "sctp: do not return the transmit err back to sctp_sendmsg" made sctp_primitive_SEND return err only when asoc state is unavailable. In this case, chunks are not enqueued, they have no chance to be freed if we don't take care of them later. This Patch is actually to revert commit 1cd4d5c4326a ("sctp: remove the unused sctp_datamsg_free()"), commit 69b5777f2e57 ("sctp: hold the chunks only after the chunk is enqueued in outq") and commit 8b570dc9f7b6 ("sctp: only drop the reference on the datamsg after sending a msg"), to use sctp_datamsg_free to free the chunks of current msg. Fixes: 8b570dc9f7b6 ("sctp: only drop the reference on the datamsg after sending a msg") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/chunk.c | 13 +++++++++++++ net/sctp/outqueue.c | 1 - net/sctp/socket.c | 8 ++++++-- 4 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ce93c4b10d26..f61fb7c87e53 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -537,6 +537,7 @@ struct sctp_datamsg { struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *, struct sctp_sndrcvinfo *, struct iov_iter *); +void sctp_datamsg_free(struct sctp_datamsg *); void sctp_datamsg_put(struct sctp_datamsg *); void sctp_chunk_fail(struct sctp_chunk *, int error); int sctp_chunk_abandoned(struct sctp_chunk *); diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index a55e54738b81..af9cc8055465 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -70,6 +70,19 @@ static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) return msg; } +void sctp_datamsg_free(struct sctp_datamsg *msg) +{ + struct sctp_chunk *chunk; + + /* This doesn't have to be a _safe vairant because + * sctp_chunk_free() only drops the refs. + */ + list_for_each_entry(chunk, &msg->chunks, frag_list) + sctp_chunk_free(chunk); + + sctp_datamsg_put(msg); +} + /* Final destructruction of datamsg memory. */ static void sctp_datamsg_destroy(struct sctp_datamsg *msg) { diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index da2418b64c86..6c109b0f8495 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -304,7 +304,6 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : "illegal chunk"); - sctp_chunk_hold(chunk); sctp_outq_tail_data(q, chunk); if (chunk->asoc->prsctp_enable && SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9fc417a8b476..6cdc61c21438 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1958,6 +1958,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) /* Now send the (possibly) fragmented message. */ list_for_each_entry(chunk, &datamsg->chunks, frag_list) { + sctp_chunk_hold(chunk); + /* Do accounting for the write space. */ sctp_set_owner_w(chunk); @@ -1970,13 +1972,15 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) * breaks. */ err = sctp_primitive_SEND(net, asoc, datamsg); - sctp_datamsg_put(datamsg); /* Did the lower layer accept the chunk? */ - if (err) + if (err) { + sctp_datamsg_free(datamsg); goto out_free; + } pr_debug("%s: we sent primitively\n", __func__); + sctp_datamsg_put(datamsg); err = msg_len; if (unlikely(wait_connect)) { -- cgit v1.2.3 From 83dbc3d4a38411ef38f680d7045c8478cc9c5a56 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Sep 2016 02:04:22 +0800 Subject: sctp: make sctp_outq_flush/tail/uncork return void sctp_outq_flush return value is meaningless now, this patch is to make sctp_outq_flush return void, as well as sctp_outq_fail and sctp_outq_uncork. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- net/sctp/outqueue.c | 19 +++++++------------ net/sctp/sm_sideeffect.c | 9 ++++----- 3 files changed, 13 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f61fb7c87e53..8693dc452a7f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1077,7 +1077,7 @@ struct sctp_outq { void sctp_outq_init(struct sctp_association *, struct sctp_outq *); void sctp_outq_teardown(struct sctp_outq *); void sctp_outq_free(struct sctp_outq*); -int sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t); +void sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t); int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *); int sctp_outq_is_empty(const struct sctp_outq *); void sctp_outq_restart(struct sctp_outq *); @@ -1085,7 +1085,7 @@ void sctp_outq_restart(struct sctp_outq *); void sctp_retransmit(struct sctp_outq *, struct sctp_transport *, sctp_retransmit_reason_t); void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); -int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); +void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); void sctp_prsctp_prune(struct sctp_association *asoc, struct sctp_sndrcvinfo *sinfo, int msg_len); /* Uncork and flush an outqueue. */ diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 052a4796a457..8c3f446d965c 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -68,7 +68,7 @@ static void sctp_mark_missing(struct sctp_outq *q, static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn); -static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); +static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); /* Add data to the front of the queue. */ static inline void sctp_outq_head_data(struct sctp_outq *q, @@ -285,10 +285,9 @@ void sctp_outq_free(struct sctp_outq *q) } /* Put a new chunk in an sctp_outq. */ -int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) +void sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) { struct net *net = sock_net(q->asoc->base.sk); - int error = 0; pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk, chunk && chunk->chunk_hdr ? @@ -318,9 +317,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) } if (!q->cork) - error = sctp_outq_flush(q, 0, gfp); - - return error; + sctp_outq_flush(q, 0, gfp); } /* Insert a chunk into the sorted list based on the TSNs. The retransmit list @@ -748,12 +745,12 @@ redo: } /* Cork the outqueue so queued chunks are really queued. */ -int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp) +void sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp) { if (q->cork) q->cork = 0; - return sctp_outq_flush(q, 0, gfp); + sctp_outq_flush(q, 0, gfp); } @@ -766,7 +763,7 @@ int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp) * locking concerns must be made. Today we use the sock lock to protect * this function. */ -static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) +static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) { struct sctp_packet *packet; struct sctp_packet singleton; @@ -891,7 +888,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) error = sctp_packet_transmit(&singleton, gfp); if (error < 0) { asoc->base.sk->sk_err = -error; - return 0; + return; } break; @@ -1175,8 +1172,6 @@ sctp_flush_out: /* Clear the burst limited state, if any */ sctp_transport_burst_reset(t); } - - return 0; } /* Update unack_data based on the incoming SACK chunk */ diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index cf6e4f0de729..c345bf153bed 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1421,8 +1421,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, local_cork = 1; } /* Send a chunk to our peer. */ - error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk, - gfp); + sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk, gfp); break; case SCTP_CMD_SEND_PKT: @@ -1676,7 +1675,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_FORCE_PRIM_RETRAN: t = asoc->peer.retran_path; asoc->peer.retran_path = asoc->peer.primary_path; - error = sctp_outq_uncork(&asoc->outqueue, gfp); + sctp_outq_uncork(&asoc->outqueue, gfp); local_cork = 0; asoc->peer.retran_path = t; break; @@ -1733,9 +1732,9 @@ out: */ if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) { if (chunk->end_of_packet || chunk->singleton) - error = sctp_outq_uncork(&asoc->outqueue, gfp); + sctp_outq_uncork(&asoc->outqueue, gfp); } else if (local_cork) - error = sctp_outq_uncork(&asoc->outqueue, gfp); + sctp_outq_uncork(&asoc->outqueue, gfp); if (sp->data_ready_signalled) sp->data_ready_signalled = 0; -- cgit v1.2.3 From d409b84768037ad03d1d73538d99fb902adf7365 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Fri, 16 Sep 2016 12:59:08 -0700 Subject: ipv6: Export p6_route_input_lookup symbol Make ip6_route_input_lookup available outside of ipv6 the module similar to ip_route_input_noref in the IPv4 world. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- include/net/ip6_route.h | 3 +++ net/ipv6/route.c | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index d97305d0e71f..e0cd318d5103 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -64,6 +64,9 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) } void ip6_route_input(struct sk_buff *skb); +struct dst_entry *ip6_route_input_lookup(struct net *net, + struct net_device *dev, + struct flowi6 *fl6, int flags); struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad4a7ff301fc..4dab585f7642 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1147,15 +1147,16 @@ static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table * return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); } -static struct dst_entry *ip6_route_input_lookup(struct net *net, - struct net_device *dev, - struct flowi6 *fl6, int flags) +struct dst_entry *ip6_route_input_lookup(struct net *net, + struct net_device *dev, + struct flowi6 *fl6, int flags) { if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); } +EXPORT_SYMBOL_GPL(ip6_route_input_lookup); void ip6_route_input(struct sk_buff *skb) { -- cgit v1.2.3 From ec323368793b8570c02e723127611a8d906a9b3f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:32 +0200 Subject: sched: remove qdisc arg from __qdisc_dequeue_head Moves qdisc stat accouting to qdisc_dequeue_head. The only direct caller of the __qdisc_dequeue_head version open-codes this now. This allows us to later use __qdisc_dequeue_head as a replacement of __skb_dequeue() (which operates on sk_buff_head list). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/sch_generic.h | 15 ++++++++------- net/sched/sch_generic.c | 7 ++++++- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 52a2015667b4..0741ed41575b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -614,11 +614,17 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) return __qdisc_enqueue_tail(skb, sch, &sch->q); } -static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, - struct sk_buff_head *list) +static inline struct sk_buff *__qdisc_dequeue_head(struct sk_buff_head *list) { struct sk_buff *skb = __skb_dequeue(list); + return skb; +} + +static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) +{ + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); + if (likely(skb != NULL)) { qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); @@ -627,11 +633,6 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, return skb; } -static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) -{ - return __qdisc_dequeue_head(sch, &sch->q); -} - /* Instead of calling kfree_skb() while root qdisc lock is held, * queue the skb for future freeing at end of __dev_xmit_skb() */ diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5e63bf638350..73877d9c2bcb 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -506,7 +506,12 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) if (likely(band >= 0)) { struct sk_buff_head *list = band2list(priv, band); - struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list); + struct sk_buff *skb = __qdisc_dequeue_head(list); + + if (likely(skb != NULL)) { + qdisc_qstats_backlog_dec(qdisc, skb); + qdisc_bstats_update(qdisc, skb); + } qdisc->q.qlen--; if (skb_queue_empty(list)) -- cgit v1.2.3 From 48da34b7a74201f15315cb1fc40bb9a7bd2b4940 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 18 Sep 2016 00:57:34 +0200 Subject: sched: add and use qdisc_skb_head helpers This change replaces sk_buff_head struct in Qdiscs with new qdisc_skb_head. Its similar to the skb_buff_head api, but does not use skb->prev pointers. Qdiscs will commonly enqueue at the tail of a list and dequeue at head. While skb_buff_head works fine for this, enqueue/dequeue needs to also adjust the prev pointer of next element. The ->prev pointer is not required for qdiscs so we can just leave it undefined and avoid one cacheline write access for en/dequeue. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/sch_generic.h | 63 ++++++++++++++++++++++++++++++++++++++--------- net/sched/sch_generic.c | 21 ++++++++-------- net/sched/sch_htb.c | 24 +++++++++++++++--- net/sched/sch_netem.c | 14 +++++++++-- 4 files changed, 94 insertions(+), 28 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 0741ed41575b..e6aa0a249672 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -36,6 +36,14 @@ struct qdisc_size_table { u16 data[]; }; +/* similar to sk_buff_head, but skb->prev pointer is undefined. */ +struct qdisc_skb_head { + struct sk_buff *head; + struct sk_buff *tail; + __u32 qlen; + spinlock_t lock; +}; + struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, @@ -76,7 +84,7 @@ struct Qdisc { * For performance sake on SMP, we put highly modified fields at the end */ struct sk_buff *gso_skb ____cacheline_aligned_in_smp; - struct sk_buff_head q; + struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; seqcount_t running; struct gnet_stats_queue qstats; @@ -600,10 +608,27 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch) sch->qstats.overlimits++; } +static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) +{ + qh->head = NULL; + qh->tail = NULL; + qh->qlen = 0; +} + static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff_head *list) + struct qdisc_skb_head *qh) { - __skb_queue_tail(list, skb); + struct sk_buff *last = qh->tail; + + if (last) { + skb->next = NULL; + last->next = skb; + qh->tail = skb; + } else { + qh->tail = skb; + qh->head = skb; + } + qh->qlen++; qdisc_qstats_backlog_inc(sch, skb); return NET_XMIT_SUCCESS; @@ -614,9 +639,17 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) return __qdisc_enqueue_tail(skb, sch, &sch->q); } -static inline struct sk_buff *__qdisc_dequeue_head(struct sk_buff_head *list) +static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) { - struct sk_buff *skb = __skb_dequeue(list); + struct sk_buff *skb = qh->head; + + if (likely(skb != NULL)) { + qh->head = skb->next; + qh->qlen--; + if (qh->head == NULL) + qh->tail = NULL; + skb->next = NULL; + } return skb; } @@ -643,10 +676,10 @@ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free) } static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, - struct sk_buff_head *list, + struct qdisc_skb_head *qh, struct sk_buff **to_free) { - struct sk_buff *skb = __skb_dequeue(list); + struct sk_buff *skb = __qdisc_dequeue_head(qh); if (likely(skb != NULL)) { unsigned int len = qdisc_pkt_len(skb); @@ -667,7 +700,9 @@ static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch, static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) { - return skb_peek(&sch->q); + const struct qdisc_skb_head *qh = &sch->q; + + return qh->head; } /* generic pseudo peek method for non-work-conserving qdisc */ @@ -702,15 +737,19 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) return skb; } -static inline void __qdisc_reset_queue(struct sk_buff_head *list) +static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) { /* * We do not know the backlog in bytes of this list, it * is up to the caller to correct it */ - if (!skb_queue_empty(list)) { - rtnl_kfree_skbs(list->next, list->prev); - __skb_queue_head_init(list); + ASSERT_RTNL(); + if (qh->qlen) { + rtnl_kfree_skbs(qh->head, qh->tail); + + qh->head = NULL; + qh->tail = NULL; + qh->qlen = 0; } } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 73877d9c2bcb..6cfb6e9038c2 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -466,7 +466,7 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = { */ struct pfifo_fast_priv { u32 bitmap; - struct sk_buff_head q[PFIFO_FAST_BANDS]; + struct qdisc_skb_head q[PFIFO_FAST_BANDS]; }; /* @@ -477,7 +477,7 @@ struct pfifo_fast_priv { */ static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0}; -static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, +static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv, int band) { return priv->q + band; @@ -489,7 +489,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) { int band = prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - struct sk_buff_head *list = band2list(priv, band); + struct qdisc_skb_head *list = band2list(priv, band); priv->bitmap |= (1 << band); qdisc->q.qlen++; @@ -505,8 +505,8 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) int band = bitmap2band[priv->bitmap]; if (likely(band >= 0)) { - struct sk_buff_head *list = band2list(priv, band); - struct sk_buff *skb = __qdisc_dequeue_head(list); + struct qdisc_skb_head *qh = band2list(priv, band); + struct sk_buff *skb = __qdisc_dequeue_head(qh); if (likely(skb != NULL)) { qdisc_qstats_backlog_dec(qdisc, skb); @@ -514,7 +514,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) } qdisc->q.qlen--; - if (skb_queue_empty(list)) + if (qh->qlen == 0) priv->bitmap &= ~(1 << band); return skb; @@ -529,9 +529,9 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) int band = bitmap2band[priv->bitmap]; if (band >= 0) { - struct sk_buff_head *list = band2list(priv, band); + struct qdisc_skb_head *qh = band2list(priv, band); - return skb_peek(list); + return qh->head; } return NULL; @@ -569,7 +569,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) struct pfifo_fast_priv *priv = qdisc_priv(qdisc); for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __skb_queue_head_init(band2list(priv, prio)); + qdisc_skb_head_init(band2list(priv, prio)); /* Can by-pass the queue discipline */ qdisc->flags |= TCQ_F_CAN_BYPASS; @@ -617,7 +617,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); sch->padded = (char *) sch - (char *) p; } - skb_queue_head_init(&sch->q); + qdisc_skb_head_init(&sch->q); + spin_lock_init(&sch->q.lock); spin_lock_init(&sch->busylock); lockdep_set_class(&sch->busylock, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 53dbfa187870..c798d0de8a9d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -162,7 +162,7 @@ struct htb_sched { struct work_struct work; /* non shaped skbs; let them go directly thru */ - struct sk_buff_head direct_queue; + struct qdisc_skb_head direct_queue; long direct_pkts; struct qdisc_watchdog watchdog; @@ -570,6 +570,22 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) list_del_init(&cl->un.leaf.drop_list); } +static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, + struct qdisc_skb_head *qh) +{ + struct sk_buff *last = qh->tail; + + if (last) { + skb->next = NULL; + last->next = skb; + qh->tail = skb; + } else { + qh->tail = skb; + qh->head = skb; + } + qh->qlen++; +} + static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -580,7 +596,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (cl == HTB_DIRECT) { /* enqueue to helper queue */ if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_tail(&q->direct_queue, skb); + htb_enqueue_tail(skb, sch, &q->direct_queue); q->direct_pkts++; } else { return qdisc_drop(skb, sch, to_free); @@ -888,7 +904,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) unsigned long start_at; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ - skb = __skb_dequeue(&q->direct_queue); + skb = __qdisc_dequeue_head(&q->direct_queue); if (skb != NULL) { ok: qdisc_bstats_update(sch, skb); @@ -1019,7 +1035,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); INIT_WORK(&q->work, htb_work_func); - __skb_queue_head_init(&q->direct_queue); + qdisc_skb_head_init(&q->direct_queue); if (tb[TCA_HTB_DIRECT_QLEN]) q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 0a964b35f8c7..9f7b380cf0a3 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -413,6 +413,16 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, return segs; } +static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb) +{ + skb->next = qh->head; + + if (!qh->head) + qh->tail = skb; + qh->head = skb; + qh->qlen++; +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -523,7 +533,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff *last; if (sch->q.qlen) - last = skb_peek_tail(&sch->q); + last = sch->q.tail; else last = netem_rb_to_skb(rb_last(&q->t_root)); if (last) { @@ -552,7 +562,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, cb->time_to_send = psched_get_time(); q->counter = 0; - __skb_queue_head(&sch->q, skb); + netem_enqueue_skb_head(&sch->q, skb); sch->qstats.requeues++; } -- cgit v1.2.3 From 53f863a66904542b03204f2b115d050b04c11ba5 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 21 Jul 2016 14:12:40 +0200 Subject: Bluetooth: Put led_trigger field behind CONFIG_BT_LEDS The led_trigger field in hci_dev should be conditional based on if CONFIG_BT_LEDS is set or not. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ee7fc47680a1..b8d43bd9c71b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -399,7 +399,9 @@ struct hci_dev { struct delayed_work rpa_expired; bdaddr_t rpa; +#if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; +#endif int (*open)(struct hci_dev *hdev); int (*close)(struct hci_dev *hdev); -- cgit v1.2.3 From 65010e68efbeda4275845240869138c0c4587422 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 12 Aug 2016 17:01:27 -0700 Subject: Bluetooth: Add HCI device identifier for Qualcomm SMD This patch assigns the next free HCI device identifier to Bluetooth devices based on the Qualcomm Shared Memory channels. Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 003b25283407..0aac123b5eee 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -63,6 +63,7 @@ #define HCI_SDIO 6 #define HCI_SPI 7 #define HCI_I2C 8 +#define HCI_SMD 9 /* HCI controller types */ #define HCI_PRIMARY 0x00 -- cgit v1.2.3 From 1aabbbcefe8e62fbffaaa01ca8bdd4cd6ed1625b Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 29 Jul 2016 13:28:25 +0200 Subject: Bluetooth: add printf format attribute to hci_set_[fh]w_info() Commit 5177a83827cd ("Bluetooth: Add debugfs fields for hardware and firmware info") introduced hci_set_hw_info() and hci_set_fw_info(). These functions use kvasprintf_const() but are not marked with a __printf attribute. Adding such an attribute helps detecting issues related to printf-formatting at build time. Signed-off-by: Nicolas Iooss Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b8d43bd9c71b..cc349f633570 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1028,8 +1028,8 @@ int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb); -void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); -void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); +__printf(2, 3) void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); +__printf(2, 3) void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); int hci_dev_open(__u16 dev); int hci_dev_close(__u16 dev); int hci_dev_do_close(struct hci_dev *hdev); -- cgit v1.2.3 From 70ecce91e3a2d7e332fe56fd065c67d404b8fccf Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:38 +0200 Subject: Bluetooth: Store control socket cookie and comm information To further allow unique identification and tracking of control socket, store cookie and comm information when binding the socket. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 1 + net/bluetooth/hci_sock.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index bfd1590821d6..69b5174168b7 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -371,6 +371,7 @@ void hci_sock_set_flag(struct sock *sk, int nr); void hci_sock_clear_flag(struct sock *sk, int nr); int hci_sock_test_flag(struct sock *sk, int nr); unsigned short hci_sock_get_channel(struct sock *sk); +u32 hci_sock_get_cookie(struct sock *sk); int hci_sock_init(void); void hci_sock_cleanup(void); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 99dd1503ef56..4dce6dfdb0f2 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -38,6 +39,8 @@ static LIST_HEAD(mgmt_chan_list); static DEFINE_MUTEX(mgmt_chan_list_lock); +static DEFINE_IDA(sock_cookie_ida); + static atomic_t monitor_promisc = ATOMIC_INIT(0); /* ----- HCI socket interface ----- */ @@ -52,6 +55,8 @@ struct hci_pinfo { __u32 cmsg_mask; unsigned short channel; unsigned long flags; + __u32 cookie; + char comm[TASK_COMM_LEN]; }; void hci_sock_set_flag(struct sock *sk, int nr) @@ -74,6 +79,11 @@ unsigned short hci_sock_get_channel(struct sock *sk) return hci_pi(sk)->channel; } +u32 hci_sock_get_cookie(struct sock *sk) +{ + return hci_pi(sk)->cookie; +} + static inline int hci_test_bit(int nr, const void *addr) { return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); @@ -585,6 +595,7 @@ static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; struct hci_dev *hdev; + int id; BT_DBG("sock %p sk %p", sock, sk); @@ -593,8 +604,17 @@ static int hci_sock_release(struct socket *sock) hdev = hci_pi(sk)->hdev; - if (hci_pi(sk)->channel == HCI_CHANNEL_MONITOR) + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); + break; + case HCI_CHANNEL_CONTROL: + id = hci_pi(sk)->cookie; + + hci_pi(sk)->cookie = 0xffffffff; + ida_simple_remove(&sock_cookie_ida, id); + break; + } bt_sock_unlink(&hci_sk_list, sk); @@ -957,6 +977,15 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * are changes to settings, class of device, name etc. */ if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + int id; + + id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); + if (id < 0) + id = 0xffffffff; + + hci_pi(sk)->cookie = id; + get_task_comm(hci_pi(sk)->comm, current); + hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); -- cgit v1.2.3 From 03c979c4717c7fa0c058fafe76ac4d6acdd1fb0d Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:39 +0200 Subject: Bluetooth: Introduce helper to pack mgmt version information The mgmt version information will be also needed for the control changell tracing feature. This provides a helper to pack them. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/mgmt.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index cc349f633570..9f181b583b96 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1451,6 +1451,7 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); #define DISCOV_BREDR_INQUIRY_LEN 0x08 #define DISCOV_LE_RESTART_DELAY msecs_to_jiffies(200) /* msec */ +void mgmt_fill_version_info(void *ver); int mgmt_new_settings(struct hci_dev *hdev); void mgmt_index_added(struct hci_dev *hdev); void mgmt_index_removed(struct hci_dev *hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7639290b6de3..9071886df194 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -278,6 +278,14 @@ static u8 le_addr_type(u8 mgmt_addr_type) return ADDR_LE_DEV_RANDOM; } +void mgmt_fill_version_info(void *ver) +{ + struct mgmt_rp_read_version *rp = ver; + + rp->version = MGMT_VERSION; + rp->revision = cpu_to_le16(MGMT_REVISION); +} + static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -285,8 +293,7 @@ static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("sock %p", sk); - rp.version = MGMT_VERSION; - rp.revision = cpu_to_le16(MGMT_REVISION); + mgmt_fill_version_info(&rp); return mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, 0, &rp, sizeof(rp)); -- cgit v1.2.3 From 249fa1699f8642c73eb43e61b321969f0549ab2c Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:40 +0200 Subject: Bluetooth: Add support for sending MGMT open and close to monitor This sends new notifications to the monitor support whenever a management channel has been opened or closed. This allows tracing of control channels really easily. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_mon.h | 2 + net/bluetooth/hci_sock.c | 95 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 587d0131b349..9640790cbbcc 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -45,6 +45,8 @@ struct hci_mon_hdr { #define HCI_MON_VENDOR_DIAG 11 #define HCI_MON_SYSTEM_NOTE 12 #define HCI_MON_USER_LOGGING 13 +#define HCI_MON_CTRL_OPEN 14 +#define HCI_MON_CTRL_CLOSE 15 struct hci_mon_new_index { __u8 type; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 4dce6dfdb0f2..2d8725006838 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -394,6 +394,59 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) return skb; } +static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + u16 format = 0x0002; + u8 ver[3]; + u32 flags; + + skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC); + if (!skb) + return NULL; + + mgmt_fill_version_info(ver); + flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(format, skb_put(skb, 2)); + memcpy(skb_put(skb, sizeof(ver)), ver, sizeof(ver)); + put_unaligned_le32(flags, skb_put(skb, 4)); + *skb_put(skb, 1) = TASK_COMM_LEN; + memcpy(skb_put(skb, TASK_COMM_LEN), hci_pi(sk)->comm, TASK_COMM_LEN); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN); + hdr->index = cpu_to_le16(HCI_DEV_NONE); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + +static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(4, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE); + hdr->index = cpu_to_le16(HCI_DEV_NONE); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + static void __printf(2, 3) send_monitor_note(struct sock *sk, const char *fmt, ...) { @@ -468,6 +521,29 @@ static void send_monitor_replay(struct sock *sk) read_unlock(&hci_dev_list_lock); } +static void send_monitor_control_replay(struct sock *mon_sk) +{ + struct sock *sk; + + read_lock(&hci_sk_list.lock); + + sk_for_each(sk, &hci_sk_list.head) { + struct sk_buff *skb; + + if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) + continue; + + skb = create_monitor_ctrl_open(sk); + if (!skb) + continue; + + if (sock_queue_rcv_skb(mon_sk, skb)) + kfree_skb(skb); + } + + read_unlock(&hci_sk_list.lock); +} + /* Generate internal stack event */ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) { @@ -595,6 +671,7 @@ static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; struct hci_dev *hdev; + struct sk_buff *skb; int id; BT_DBG("sock %p sk %p", sock, sk); @@ -611,6 +688,14 @@ static int hci_sock_release(struct socket *sock) case HCI_CHANNEL_CONTROL: id = hci_pi(sk)->cookie; + /* Send event to monitor */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + hci_pi(sk)->cookie = 0xffffffff; ida_simple_remove(&sock_cookie_ida, id); break; @@ -931,6 +1016,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, send_monitor_note(sk, "Bluetooth subsystem version %s", BT_SUBSYS_VERSION); send_monitor_replay(sk); + send_monitor_control_replay(sk); atomic_inc(&monitor_promisc); break; @@ -977,6 +1063,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * are changes to settings, class of device, name etc. */ if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + struct sk_buff *skb; int id; id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); @@ -986,6 +1073,14 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_pi(sk)->cookie = id; get_task_comm(hci_pi(sk)->comm, current); + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); -- cgit v1.2.3 From 38ceaa00d02dceb22c6bdd5268f5a44d5c00e123 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 27 Aug 2016 20:23:41 +0200 Subject: Bluetooth: Add support for sending MGMT commands and events to monitor This adds support for tracing all management commands and events via the monitor interface. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_core.h | 3 ++ include/net/bluetooth/hci_mon.h | 2 + net/bluetooth/hci_sock.c | 94 ++++++++++++++++++++++++++++++++++++++++ net/bluetooth/mgmt_util.c | 66 ++++++++++++++++++++++++++-- 4 files changed, 162 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9f181b583b96..a48f71d73dc8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1406,6 +1406,9 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, int flag, struct sock *skip_sk); void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb); +void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, + void *data, u16 data_len, ktime_t tstamp, + int flag, struct sock *skip_sk); void hci_sock_dev_event(struct hci_dev *hdev, int event); diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 9640790cbbcc..240786b04a46 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -47,6 +47,8 @@ struct hci_mon_hdr { #define HCI_MON_USER_LOGGING 13 #define HCI_MON_CTRL_OPEN 14 #define HCI_MON_CTRL_CLOSE 15 +#define HCI_MON_CTRL_COMMAND 16 +#define HCI_MON_CTRL_EVENT 17 struct hci_mon_new_index { __u8 type; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 2d8725006838..576ea48631b9 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -315,6 +315,60 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb_copy); } +void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, + void *data, u16 data_len, ktime_t tstamp, + int flag, struct sock *skip_sk) +{ + struct sock *sk; + __le16 index; + + if (hdev) + index = cpu_to_le16(hdev->id); + else + index = cpu_to_le16(MGMT_INDEX_NONE); + + read_lock(&hci_sk_list.lock); + + sk_for_each(sk, &hci_sk_list.head) { + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) + continue; + + /* Ignore socket without the flag set */ + if (!hci_sock_test_flag(sk, flag)) + continue; + + /* Skip the original socket */ + if (sk == skip_sk) + continue; + + skb = bt_skb_alloc(6 + data_len, GFP_ATOMIC); + if (!skb) + continue; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(event, skb_put(skb, 2)); + + if (data) + memcpy(skb_put(skb, data_len), data, data_len); + + skb->tstamp = tstamp; + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); + hdr->index = index; + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + + read_unlock(&hci_sk_list.lock); +} + static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) { struct hci_mon_hdr *hdr; @@ -447,6 +501,33 @@ static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) return skb; } +static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index, + u16 opcode, u16 len, + const void *buf) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(6 + len, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(opcode, skb_put(skb, 2)); + + if (buf) + memcpy(skb_put(skb, len), buf, len); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_COMMAND); + hdr->index = cpu_to_le16(index); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + static void __printf(2, 3) send_monitor_note(struct sock *sk, const char *fmt, ...) { @@ -1257,6 +1338,19 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, goto done; } + if (chan->channel == HCI_CHANNEL_CONTROL) { + struct sk_buff *skb; + + /* Send event to monitor */ + skb = create_monitor_ctrl_command(sk, index, opcode, len, + buf + sizeof(*hdr)); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + if (opcode >= chan->handler_count || chan->handlers[opcode].func == NULL) { BT_DBG("Unknown op %u", opcode); diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index 8c30c7eb8bef..c933bd08c1fe 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -21,12 +21,41 @@ SOFTWARE IS DISCLAIMED. */ +#include + #include #include +#include #include #include "mgmt_util.h" +static struct sk_buff *create_monitor_ctrl_event(__le16 index, u32 cookie, + u16 opcode, u16 len, void *buf) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(6 + len, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(cookie, skb_put(skb, 4)); + put_unaligned_le16(opcode, skb_put(skb, 2)); + + if (buf) + memcpy(skb_put(skb, len), buf, len); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); + hdr->index = index; + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, void *data, u16 data_len, int flag, struct sock *skip_sk) { @@ -52,14 +81,18 @@ int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, __net_timestamp(skb); hci_send_to_channel(channel, skb, flag, skip_sk); - kfree_skb(skb); + if (channel == HCI_CHANNEL_CONTROL) + hci_send_monitor_ctrl_event(hdev, event, data, data_len, + skb_get_ktime(skb), flag, skip_sk); + + kfree_skb(skb); return 0; } int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) { - struct sk_buff *skb; + struct sk_buff *skb, *mskb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_status *ev; int err; @@ -80,17 +113,30 @@ int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) ev->status = status; ev->opcode = cpu_to_le16(cmd); + mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk), + MGMT_EV_CMD_STATUS, sizeof(*ev), ev); + if (mskb) + skb->tstamp = mskb->tstamp; + else + __net_timestamp(skb); + err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); + if (mskb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(mskb); + } + return err; } int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, void *rp, size_t rp_len) { - struct sk_buff *skb; + struct sk_buff *skb, *mskb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_complete *ev; int err; @@ -114,10 +160,24 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, if (rp) memcpy(ev->data, rp, rp_len); + mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk), + MGMT_EV_CMD_COMPLETE, + sizeof(*ev) + rp_len, ev); + if (mskb) + skb->tstamp = mskb->tstamp; + else + __net_timestamp(skb); + err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); + if (mskb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(mskb); + } + return err; } -- cgit v1.2.3 From 5504c3a31061704512707bb23bd7835e8a5281e4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 29 Aug 2016 06:19:46 +0200 Subject: Bluetooth: Use individual flags for certain management events Instead of hiding everything behind a general managment events flag, introduce indivdual flags that allow fine control over which events are send to a given management channel. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci.h | 5 ++++- net/bluetooth/hci_sock.c | 5 ++++- net/bluetooth/mgmt.c | 32 +++++++++++++------------------- 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 0aac123b5eee..ddb9accac3a5 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -208,7 +208,10 @@ enum { HCI_MGMT_INDEX_EVENTS, HCI_MGMT_UNCONF_INDEX_EVENTS, HCI_MGMT_EXT_INDEX_EVENTS, - HCI_MGMT_GENERIC_EVENTS, + HCI_MGMT_OPTION_EVENTS, + HCI_MGMT_SETTING_EVENTS, + HCI_MGMT_DEV_CLASS_EVENTS, + HCI_MGMT_LOCAL_NAME_EVENTS, HCI_MGMT_OOB_DATA_EVENTS, }; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 576ea48631b9..d37c2243157b 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1164,7 +1164,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); - hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_OPTION_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_SETTING_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); } break; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f9af5f7c2ea2..469f5cc3109b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -256,13 +256,6 @@ static int mgmt_limited_event(u16 event, struct hci_dev *hdev, void *data, flag, skip_sk); } -static int mgmt_generic_event(u16 event, struct hci_dev *hdev, void *data, - u16 len, struct sock *skip_sk) -{ - return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, - HCI_MGMT_GENERIC_EVENTS, skip_sk); -} - static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len, struct sock *skip_sk) { @@ -579,8 +572,8 @@ static int new_options(struct hci_dev *hdev, struct sock *skip) { __le32 options = get_missing_options(hdev); - return mgmt_generic_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, - sizeof(options), skip); + return mgmt_limited_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, + sizeof(options), HCI_MGMT_OPTION_EVENTS, skip); } static int send_options_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) @@ -1007,8 +1000,8 @@ static int new_settings(struct hci_dev *hdev, struct sock *skip) { __le32 ev = cpu_to_le32(get_current_settings(hdev)); - return mgmt_generic_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, - sizeof(ev), skip); + return mgmt_limited_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, + sizeof(ev), HCI_MGMT_SETTING_EVENTS, skip); } int mgmt_new_settings(struct hci_dev *hdev) @@ -3000,8 +2993,8 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, if (err < 0) goto failed; - err = mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, - data, len, sk); + err = mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data, + len, HCI_MGMT_LOCAL_NAME_EVENTS, sk); goto failed; } @@ -6502,8 +6495,9 @@ void __mgmt_power_off(struct hci_dev *hdev) mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) - mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, - zero_cod, sizeof(zero_cod), NULL); + mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, + zero_cod, sizeof(zero_cod), + HCI_MGMT_DEV_CLASS_EVENTS, NULL); new_settings(hdev, match.sk); @@ -7100,8 +7094,8 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); if (!status) - mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, - dev_class, 3, NULL); + mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, + 3, HCI_MGMT_DEV_CLASS_EVENTS, NULL); if (match.sk) sock_put(match.sk); @@ -7130,8 +7124,8 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) return; } - mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), - cmd ? cmd->sk : NULL); + mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), + HCI_MGMT_LOCAL_NAME_EVENTS, cmd ? cmd->sk : NULL); } static inline bool has_uuid(u8 *uuid, u16 uuid_count, u8 (*uuids)[16]) -- cgit v1.2.3 From 9e8305b39bfa23a83b932007654097f4676c2ba2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 30 Aug 2016 05:00:35 +0200 Subject: Bluetooth: Use numbers for subsystem version string Instead of keeping a version string around, use version and revision numbers and then stringify them for use as module parameter. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 3 ++- net/bluetooth/af_bluetooth.c | 10 +++++++--- net/bluetooth/hci_sock.c | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 69b5174168b7..d705bcf40710 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -29,7 +29,8 @@ #include #include -#define BT_SUBSYS_VERSION "2.21" +#define BT_SUBSYS_VERSION 2 +#define BT_SUBSYS_REVISION 21 #ifndef AF_BLUETOOTH #define AF_BLUETOOTH 31 diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 1d96ff3a8d87..1aff2da9bc74 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -713,13 +714,16 @@ static struct net_proto_family bt_sock_family_ops = { struct dentry *bt_debugfs; EXPORT_SYMBOL_GPL(bt_debugfs); +#define VERSION __stringify(BT_SUBSYS_VERSION) "." \ + __stringify(BT_SUBSYS_REVISION) + static int __init bt_init(void) { int err; sock_skb_cb_check_size(sizeof(struct bt_skb_cb)); - BT_INFO("Core ver %s", BT_SUBSYS_VERSION); + BT_INFO("Core ver %s", VERSION); err = bt_selftest(); if (err < 0) @@ -797,7 +801,7 @@ subsys_initcall(bt_init); module_exit(bt_exit); MODULE_AUTHOR("Marcel Holtmann "); -MODULE_DESCRIPTION("Bluetooth Core ver " BT_SUBSYS_VERSION); -MODULE_VERSION(BT_SUBSYS_VERSION); +MODULE_DESCRIPTION("Bluetooth Core ver " VERSION); +MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_BLUETOOTH); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 804208d48368..a4227c777d16 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1117,8 +1117,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, send_monitor_note(sk, "Linux version %s (%s)", init_utsname()->release, init_utsname()->machine); - send_monitor_note(sk, "Bluetooth subsystem version %s", - BT_SUBSYS_VERSION); + send_monitor_note(sk, "Bluetooth subsystem version %u.%u", + BT_SUBSYS_VERSION, BT_SUBSYS_REVISION); send_monitor_replay(sk); send_monitor_control_replay(sk); -- cgit v1.2.3 From 321c6feed2519a2691f65e41c4d62332d6ee3d52 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 1 Sep 2016 16:46:23 +0200 Subject: Bluetooth: Add framework for Extended Controller Information This command is used to retrieve the current state and basic information of a controller. It is typically used right after getting the response to the Read Controller Index List command or an Index Added event (or its extended counterparts). When any of the values in the EIR_Data field changes, the event Extended Controller Information Changed will be used to inform clients about the updated information. Signed-off-by: Marcel Holtmann Signed-off-by: MichaÅ‚ Narajowski --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/mgmt.h | 18 +++++++++++++ net/bluetooth/mgmt.c | 62 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 79 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index ddb9accac3a5..99aa5e5e3100 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -208,6 +208,7 @@ enum { HCI_MGMT_INDEX_EVENTS, HCI_MGMT_UNCONF_INDEX_EVENTS, HCI_MGMT_EXT_INDEX_EVENTS, + HCI_MGMT_EXT_INFO_EVENTS, HCI_MGMT_OPTION_EVENTS, HCI_MGMT_SETTING_EVENTS, HCI_MGMT_DEV_CLASS_EVENTS, diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 7647964b1efa..611b243713ea 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -586,6 +586,18 @@ struct mgmt_rp_get_adv_size_info { #define MGMT_OP_START_LIMITED_DISCOVERY 0x0041 +#define MGMT_OP_READ_EXT_INFO 0x0042 +#define MGMT_READ_EXT_INFO_SIZE 0 +struct mgmt_rp_read_ext_info { + bdaddr_t bdaddr; + __u8 version; + __le16 manufacturer; + __le32 supported_settings; + __le32 current_settings; + __le16 eir_len; + __u8 eir[0]; +} __packed; + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; @@ -800,3 +812,9 @@ struct mgmt_ev_advertising_added { struct mgmt_ev_advertising_removed { __u8 instance; } __packed; + +#define MGMT_EV_EXT_INFO_CHANGED 0x0025 +struct mgmt_ev_ext_info_changed { + __le16 eir_len; + __u8 eir[0]; +} __packed; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 47efdb4a669a..69001f415efa 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -104,6 +104,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_REMOVE_ADVERTISING, MGMT_OP_GET_ADV_SIZE_INFO, MGMT_OP_START_LIMITED_DISCOVERY, + MGMT_OP_READ_EXT_INFO, }; static const u16 mgmt_events[] = { @@ -141,6 +142,7 @@ static const u16 mgmt_events[] = { MGMT_EV_LOCAL_OOB_DATA_UPDATED, MGMT_EV_ADVERTISING_ADDED, MGMT_EV_ADVERTISING_REMOVED, + MGMT_EV_EXT_INFO_CHANGED, }; static const u16 mgmt_untrusted_commands[] = { @@ -149,6 +151,7 @@ static const u16 mgmt_untrusted_commands[] = { MGMT_OP_READ_UNCONF_INDEX_LIST, MGMT_OP_READ_CONFIG_INFO, MGMT_OP_READ_EXT_INDEX_LIST, + MGMT_OP_READ_EXT_INFO, }; static const u16 mgmt_untrusted_events[] = { @@ -162,6 +165,7 @@ static const u16 mgmt_untrusted_events[] = { MGMT_EV_NEW_CONFIG_OPTIONS, MGMT_EV_EXT_INDEX_ADDED, MGMT_EV_EXT_INDEX_REMOVED, + MGMT_EV_EXT_INFO_CHANGED, }; #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) @@ -862,6 +866,52 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, sizeof(rp)); } +static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_rp_read_ext_info rp; + + BT_DBG("sock %p %s", sk, hdev->name); + + hci_dev_lock(hdev); + + memset(&rp, 0, sizeof(rp)); + + bacpy(&rp.bdaddr, &hdev->bdaddr); + + rp.version = hdev->hci_ver; + rp.manufacturer = cpu_to_le16(hdev->manufacturer); + + rp.supported_settings = cpu_to_le32(get_supported_settings(hdev)); + rp.current_settings = cpu_to_le32(get_current_settings(hdev)); + + rp.eir_len = cpu_to_le16(0); + + hci_dev_unlock(hdev); + + /* If this command is called at least once, then the events + * for class of device and local name changes are disabled + * and only the new extended controller information event + * is used. + */ + hci_sock_set_flag(sk, HCI_MGMT_EXT_INFO_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); + + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_EXT_INFO, 0, &rp, + sizeof(rp)); +} + +static int ext_info_changed(struct hci_dev *hdev, struct sock *skip) +{ + struct mgmt_ev_ext_info_changed ev; + + ev.eir_len = cpu_to_le16(0); + + return mgmt_limited_event(MGMT_EV_EXT_INFO_CHANGED, hdev, &ev, + sizeof(ev), HCI_MGMT_EXT_INFO_EVENTS, skip); +} + static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) { __le32 settings = cpu_to_le32(get_current_settings(hdev)); @@ -2995,6 +3045,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, err = mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data, len, HCI_MGMT_LOCAL_NAME_EVENTS, sk); + ext_info_changed(hdev, sk); goto failed; } @@ -6356,6 +6407,8 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, { get_adv_size_info, MGMT_GET_ADV_SIZE_INFO_SIZE }, { start_limited_discovery, MGMT_START_DISCOVERY_SIZE }, + { read_ext_controller_info,MGMT_READ_EXT_INFO_SIZE, + HCI_MGMT_UNTRUSTED }, }; void mgmt_index_added(struct hci_dev *hdev) @@ -6494,10 +6547,12 @@ void __mgmt_power_off(struct hci_dev *hdev) mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); - if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) + if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, zero_cod, sizeof(zero_cod), HCI_MGMT_DEV_CLASS_EVENTS, NULL); + ext_info_changed(hdev, NULL); + } new_settings(hdev, match.sk); @@ -7093,9 +7148,11 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); - if (!status) + if (!status) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, 3, HCI_MGMT_DEV_CLASS_EVENTS, NULL); + ext_info_changed(hdev, NULL); + } if (match.sk) sock_put(match.sk); @@ -7126,6 +7183,7 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), HCI_MGMT_LOCAL_NAME_EVENTS, cmd ? cmd->sk : NULL); + ext_info_changed(hdev, cmd ? cmd->sk : NULL); } static inline bool has_uuid(u8 *uuid, u16 uuid_count, u8 (*uuids)[16]) -- cgit v1.2.3 From 4037a7747d7b5a3e5bb4d10fb9ea6e2fd8a23c3b Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 8 Sep 2016 18:07:07 +0200 Subject: Bluetooth: Increase the subsystem minor version number While the subsystem version information are purely informational, increase the minor number due to the addition of user channel and management control monitoring suppport. It is helpful for debugging purposes to see the version numbers change. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index d705bcf40710..0a1e21d7bce1 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -30,7 +30,7 @@ #include #define BT_SUBSYS_VERSION 2 -#define BT_SUBSYS_REVISION 21 +#define BT_SUBSYS_REVISION 22 #ifndef AF_BLUETOOTH #define AF_BLUETOOTH 31 -- cgit v1.2.3 From c4960ecf2b09210930964ef2c05ce2590802ccf4 Mon Sep 17 00:00:00 2001 From: MichaÅ‚ Narajowski Date: Sun, 18 Sep 2016 12:50:03 +0200 Subject: Bluetooth: Add support for appearance in scan rsp This patch enables prepending appearance value to scan response data. It also adds support for setting appearance value through mgmt command. If currently advertised instance has apperance flag set it is expired immediately. Signed-off-by: MichaÅ‚ Narajowski Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + include/net/bluetooth/mgmt.h | 6 ++++++ net/bluetooth/hci_request.c | 8 ++++++++ net/bluetooth/mgmt.c | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 52 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a48f71d73dc8..f00bf667ec33 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -211,6 +211,7 @@ struct hci_dev { __u8 dev_name[HCI_MAX_NAME_LENGTH]; __u8 short_name[HCI_MAX_SHORT_NAME_LENGTH]; __u8 eir[HCI_MAX_EIR_LENGTH]; + __u16 appearance; __u8 dev_class[3]; __u8 major_class; __u8 minor_class; diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 611b243713ea..72a456bbbcd5 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -598,6 +598,12 @@ struct mgmt_rp_read_ext_info { __u8 eir[0]; } __packed; +#define MGMT_OP_SET_APPEARANCE 0x0043 +struct mgmt_cp_set_appearance { + __u16 appearance; +} __packed; +#define MGMT_SET_APPEARANCE_SIZE 2 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 0ce6cdd278b2..c8135680c43e 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1015,6 +1015,14 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, instance_flags = adv_instance->flags; + if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) { + ptr[0] = 3; + ptr[1] = EIR_APPEARANCE; + put_unaligned_le16(hdev->appearance, ptr + 2); + scan_rsp_len += 4; + ptr += 4; + } + memcpy(ptr, adv_instance->scan_rsp_data, adv_instance->scan_rsp_len); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 89954bb19222..78d708851208 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -105,6 +105,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_GET_ADV_SIZE_INFO, MGMT_OP_START_LIMITED_DISCOVERY, MGMT_OP_READ_EXT_INFO, + MGMT_OP_SET_APPEARANCE, }; static const u16 mgmt_events[] = { @@ -3143,6 +3144,34 @@ failed: return err; } +static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, + u16 len) +{ + struct mgmt_cp_set_appearance *cp = data; + u16 apperance; + int err; + + BT_DBG(""); + + apperance = le16_to_cpu(cp->appearance); + + hci_dev_lock(hdev); + + if (hdev->appearance != apperance) { + hdev->appearance = apperance; + + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + adv_expire(hdev, MGMT_ADV_FLAG_APPEARANCE); + } + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_APPEARANCE, 0, NULL, + 0); + + hci_dev_unlock(hdev); + + return err; +} + static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -5918,6 +5947,7 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev) flags |= MGMT_ADV_FLAG_DISCOV; flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; flags |= MGMT_ADV_FLAG_MANAGED_FLAGS; + flags |= MGMT_ADV_FLAG_APPEARANCE; flags |= MGMT_ADV_FLAG_LOCAL_NAME; if (hdev->adv_tx_power != HCI_TX_POWER_INVALID) @@ -5999,6 +6029,9 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) max_len -= 3; + + if (adv_flags & MGMT_ADV_FLAG_APPEARANCE) + max_len -= 4; } if (len > max_len) @@ -6335,6 +6368,9 @@ static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) max_len -= 3; + + if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) + max_len -= 4; } return max_len; @@ -6470,6 +6506,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { start_limited_discovery, MGMT_START_DISCOVERY_SIZE }, { read_ext_controller_info,MGMT_READ_EXT_INFO_SIZE, HCI_MGMT_UNTRUSTED }, + { set_appearance, MGMT_SET_APPEARANCE_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) -- cgit v1.2.3 From 6a5d58b67e205f2ffc62d0a9ee4ef7d237e9a7fb Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 18 Sep 2016 07:31:42 -0400 Subject: net sched ife action: add 16 bit helpers encoder and checker for 16 bits metadata Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_ife.h | 2 ++ net/sched/act_ife.c | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'include/net') diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 5164bd7a38fb..9fd2bea0a6e0 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -50,9 +50,11 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi); +int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi); int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi); int ife_validate_meta_u32(void *val, int len); int ife_validate_meta_u16(void *val, int len); +int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi); void ife_release_meta_gen(struct tcf_meta_info *mi); int register_ife_op(struct tcf_meta_ops *mops); int unregister_ife_op(struct tcf_meta_ops *mops); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index e87cd81315e1..ccf7b4b655fe 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -63,6 +63,23 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) } EXPORT_SYMBOL_GPL(ife_tlv_meta_encode); +int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi) +{ + u16 edata = 0; + + if (mi->metaval) + edata = *(u16 *)mi->metaval; + else if (metaval) + edata = metaval; + + if (!edata) /* will not encode */ + return 0; + + edata = htons(edata); + return ife_tlv_meta_encode(skbdata, mi->metaid, 2, &edata); +} +EXPORT_SYMBOL_GPL(ife_encode_meta_u16); + int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi) { if (mi->metaval) @@ -81,6 +98,15 @@ int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi) } EXPORT_SYMBOL_GPL(ife_check_meta_u32); +int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi) +{ + if (metaval || mi->metaval) + return 8; /* T+L+(V) == 2+2+(2+2bytepad) */ + + return 0; +} +EXPORT_SYMBOL_GPL(ife_check_meta_u16); + int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi) { u32 edata = metaval; -- cgit v1.2.3 From 6403389211e1f4d40ed963fe47a96fce1a3ba7a9 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:10 -0400 Subject: tcp: use windowed min filter library for TCP min_rtt estimation Refactor the TCP min_rtt code to reuse the new win_minmax library in lib/win_minmax.c to simplify the TCP code. This is a pure refactor: the functionality is exactly the same. We just moved the windowed min code to make TCP easier to read and maintain, and to allow other parts of the kernel to use the windowed min/max filter code. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 ++-- include/net/tcp.h | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 64 ++++-------------------------------------------- net/ipv4/tcp_minisocks.c | 2 +- 5 files changed, 10 insertions(+), 65 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c723a465125d..6433cc8b4667 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -19,6 +19,7 @@ #include +#include #include #include #include @@ -234,9 +235,7 @@ struct tcp_sock { u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max */ u32 rtt_seq; /* sequence number to update rttvar */ - struct rtt_meas { - u32 rtt, ts; /* RTT in usec and sampling time in jiffies. */ - } rtt_min[3]; + struct minmax rtt_min; u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ diff --git a/include/net/tcp.h b/include/net/tcp.h index fdfbedd61c67..2f1648af4d12 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -671,7 +671,7 @@ static inline bool tcp_ca_dst_locked(const struct dst_entry *dst) /* Minimum RTT in usec. ~0 means not available. */ static inline u32 tcp_min_rtt(const struct tcp_sock *tp) { - return tp->rtt_min[0].rtt; + return minmax_get(&tp->rtt_min); } /* Compute the actual receive window we are currently advertising. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7dae800092e6..e79ed17ccfd6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -387,7 +387,7 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - tp->rtt_min[0].rtt = ~0U; + minmax_reset(&tp->rtt_min, tcp_time_stamp, ~0U); /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dad3e7eeed94..6886f386464f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2879,67 +2879,13 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, *rexmit = REXMIT_LOST; } -/* Kathleen Nichols' algorithm for tracking the minimum value of - * a data stream over some fixed time interval. (E.g., the minimum - * RTT over the past five minutes.) It uses constant space and constant - * time per update yet almost always delivers the same minimum as an - * implementation that has to keep all the data in the window. - * - * The algorithm keeps track of the best, 2nd best & 3rd best min - * values, maintaining an invariant that the measurement time of the - * n'th best >= n-1'th best. It also makes sure that the three values - * are widely separated in the time window since that bounds the worse - * case error when that data is monotonically increasing over the window. - * - * Upon getting a new min, we can forget everything earlier because it - * has no value - the new min is <= everything else in the window by - * definition and it's the most recent. So we restart fresh on every new min - * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd - * best. - */ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) { - const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ; - struct rtt_meas *m = tcp_sk(sk)->rtt_min; - struct rtt_meas rttm = { - .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1), - .ts = now, - }; - u32 elapsed; - - /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */ - if (unlikely(rttm.rtt <= m[0].rtt)) - m[0] = m[1] = m[2] = rttm; - else if (rttm.rtt <= m[1].rtt) - m[1] = m[2] = rttm; - else if (rttm.rtt <= m[2].rtt) - m[2] = rttm; - - elapsed = now - m[0].ts; - if (unlikely(elapsed > wlen)) { - /* Passed entire window without a new min so make 2nd choice - * the new min & 3rd choice the new 2nd. So forth and so on. - */ - m[0] = m[1]; - m[1] = m[2]; - m[2] = rttm; - if (now - m[0].ts > wlen) { - m[0] = m[1]; - m[1] = rttm; - if (now - m[0].ts > wlen) - m[0] = rttm; - } - } else if (m[1].ts == m[0].ts && elapsed > wlen / 4) { - /* Passed a quarter of the window without a new min so - * take 2nd choice from the 2nd quarter of the window. - */ - m[2] = m[1] = rttm; - } else if (m[2].ts == m[1].ts && elapsed > wlen / 2) { - /* Passed half the window without a new min so take the 3rd - * choice from the last half of the window. - */ - m[2] = rttm; - } + struct tcp_sock *tp = tcp_sk(sk); + u32 wlen = sysctl_tcp_min_rtt_wlen * HZ; + + minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp, + rtt_us ? : jiffies_to_usecs(1)); } static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f63c73dc0acb..568947110b60 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -464,7 +464,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->srtt_us = 0; newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - newtp->rtt_min[0].rtt = ~0U; + minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U); newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; -- cgit v1.2.3 From b9f64820fb226a4e8ab10591f46cecd91ca56b30 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:14 -0400 Subject: tcp: track data delivery rate for a TCP connection This patch generates data delivery rate (throughput) samples on a per-ACK basis. These rate samples can be used by congestion control modules, and specifically will be used by TCP BBR in later patches in this series. Key state: tp->delivered: Tracks the total number of data packets (original or not) delivered so far. This is an already-existing field. tp->delivered_mstamp: the last time tp->delivered was updated. Algorithm: A rate sample is calculated as (d1 - d0)/(t1 - t0) on a per-ACK basis: d1: the current tp->delivered after processing the ACK t1: the current time after processing the ACK d0: the prior tp->delivered when the acked skb was transmitted t0: the prior tp->delivered_mstamp when the acked skb was transmitted When an skb is transmitted, we snapshot d0 and t0 in its control block in tcp_rate_skb_sent(). When an ACK arrives, it may SACK and ACK some skbs. For each SACKed or ACKed skb, tcp_rate_skb_delivered() updates the rate_sample struct to reflect the latest (d0, t0). Finally, tcp_rate_gen() generates a rate sample by storing (d1 - d0) in rs->delivered and (t1 - t0) in rs->interval_us. One caveat: if an skb was sent with no packets in flight, then tp->delivered_mstamp may be either invalid (if the connection is starting) or outdated (if the connection was idle). In that case, we'll re-stamp tp->delivered_mstamp. At first glance it seems t0 should always be the time when an skb was transmitted, but actually this could over-estimate the rate due to phase mismatch between transmit and ACK events. To track the delivery rate, we ensure that if packets are in flight then t0 and and t1 are times at which packets were marked delivered. If the initial and final RTTs are different then one may be corrupted by some sort of noise. The noise we see most often is sending gaps caused by delayed, compressed, or stretched acks. This either affects both RTTs equally or artificially reduces the final RTT. We approach this by recording the info we need to compute the initial RTT (duration of the "send phase" of the window) when we recorded the associated inflight. Then, for a filter to avoid bandwidth overestimates, we generalize the per-sample bandwidth computation from: bw = delivered / ack_phase_rtt to the following: bw = delivered / max(send_phase_rtt, ack_phase_rtt) In large-scale experiments, this filtering approach incorporating send_phase_rtt is effective at avoiding bandwidth overestimates due to ACK compression or stretched ACKs. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 + include/net/tcp.h | 35 +++++++++++- net/ipv4/Makefile | 2 +- net/ipv4/tcp_input.c | 46 +++++++++++----- net/ipv4/tcp_output.c | 4 ++ net/ipv4/tcp_rate.c | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 222 insertions(+), 16 deletions(-) create mode 100644 net/ipv4/tcp_rate.c (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 38590fbc0ac5..c50e6aec005a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -268,6 +268,8 @@ struct tcp_sock { u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 lost; /* Total data packets lost incl. rexmits */ + struct skb_mstamp first_tx_mstamp; /* start of window send phase */ + struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 2f1648af4d12..b261c892605a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -763,8 +763,14 @@ struct tcp_skb_cb { __u32 ack_seq; /* Sequence number ACK'd */ union { struct { - /* There is space for up to 20 bytes */ + /* There is space for up to 24 bytes */ __u32 in_flight;/* Bytes in flight when packet sent */ + /* pkts S/ACKed so far upon tx of skb, incl retrans: */ + __u32 delivered; + /* start of send pipeline phase */ + struct skb_mstamp first_tx_mstamp; + /* when we reached the "delivered" count */ + struct skb_mstamp delivered_mstamp; } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -860,6 +866,26 @@ struct ack_sample { u32 in_flight; }; +/* A rate sample measures the number of (original/retransmitted) data + * packets delivered "delivered" over an interval of time "interval_us". + * The tcp_rate.c code fills in the rate sample, and congestion + * control modules that define a cong_control function to run at the end + * of ACK processing can optionally chose to consult this sample when + * setting cwnd and pacing rate. + * A sample is invalid if "delivered" or "interval_us" is negative. + */ +struct rate_sample { + struct skb_mstamp prior_mstamp; /* starting timestamp for interval */ + u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ + s32 delivered; /* number of packets delivered over interval */ + long interval_us; /* time for tp->delivered to incr "delivered" */ + long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ + int losses; /* number of packets marked lost upon ACK */ + u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ + u32 prior_in_flight; /* in flight before this ACK */ + bool is_retrans; /* is sample from retransmission? */ +}; + struct tcp_congestion_ops { struct list_head list; u32 key; @@ -946,6 +972,13 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) icsk->icsk_ca_ops->cwnd_event(sk, event); } +/* From tcp_rate.c */ +void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); +void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, + struct rate_sample *rs); +void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, + struct skb_mstamp *now, struct rate_sample *rs); + /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary * between different flows. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 24629b6f57cc..9cfff1a0bf71 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ - tcp_recovery.o \ + tcp_rate.o tcp_recovery.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9413288c2778..d9ed4bb96f74 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1112,6 +1112,7 @@ struct tcp_sacktag_state { */ struct skb_mstamp first_sackt; struct skb_mstamp last_sackt; + struct rate_sample *rate; int flag; }; @@ -1279,6 +1280,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, start_seq, end_seq, dup_sack, pcount, &skb->skb_mstamp); + tcp_rate_skb_delivered(sk, skb, state->rate); if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; @@ -1329,6 +1331,9 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_advance_highest_sack(sk, skb); tcp_skb_collapse_tstamp(prev, skb); + if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64)) + TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0; + tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); @@ -1558,6 +1563,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, dup_sack, tcp_skb_pcount(skb), &skb->skb_mstamp); + tcp_rate_skb_delivered(sk, skb, state->rate); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) @@ -1640,8 +1646,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); - if (found_dup_sack) + if (found_dup_sack) { state->flag |= FLAG_DSACKING_ACK; + tp->delivered++; /* A spurious retransmission is delivered */ + } /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can @@ -3071,10 +3079,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, */ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, u32 prior_snd_una, int *acked, - struct tcp_sacktag_state *sack) + struct tcp_sacktag_state *sack, + struct skb_mstamp *now) { const struct inet_connection_sock *icsk = inet_csk(sk); - struct skb_mstamp first_ackt, last_ackt, now; + struct skb_mstamp first_ackt, last_ackt; struct tcp_sock *tp = tcp_sk(sk); u32 prior_sacked = tp->sacked_out; u32 reord = tp->packets_out; @@ -3106,7 +3115,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, acked_pcount = tcp_tso_acked(sk, skb); if (!acked_pcount) break; - fully_acked = false; } else { /* Speedup tcp_unlink_write_queue() and next loop */ @@ -3142,6 +3150,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->packets_out -= acked_pcount; pkts_acked += acked_pcount; + tcp_rate_skb_delivered(sk, skb, sack->rate); /* Initial outgoing SYN's get put onto the write_queue * just like anything else we transmit. It is not @@ -3174,16 +3183,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - skb_mstamp_get(&now); if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) { - seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); - ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); + seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt); + ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt); } if (sack->first_sackt.v64) { - sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt); - ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt); + sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt); + ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt); } - + sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */ rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us, ca_rtt_us); @@ -3211,7 +3219,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); } else if (skb && rtt_update && sack_rtt_us >= 0 && - sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { + sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. @@ -3548,17 +3556,21 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_sacktag_state sack_state; + struct rate_sample rs = { .prior_delivered = 0 }; u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; u32 prior_fackets; int prior_packets = tp->packets_out; - u32 prior_delivered = tp->delivered; + u32 delivered = tp->delivered; + u32 lost = tp->lost; int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ + struct skb_mstamp now; sack_state.first_sackt.v64 = 0; + sack_state.rate = &rs; /* We very likely will need to access write queue head. */ prefetchw(sk->sk_write_queue.next); @@ -3581,6 +3593,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; + skb_mstamp_get(&now); + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); @@ -3591,6 +3605,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) } prior_fackets = tp->fackets_out; + rs.prior_in_flight = tcp_packets_in_flight(tp); /* ts_recent update must be made after we are sure that the packet * is in window. @@ -3646,7 +3661,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, - &sack_state); + &sack_state, &now); if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); @@ -3663,7 +3678,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); - tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag); + delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ + lost = tp->lost - lost; /* freshly marked lost */ + tcp_rate_gen(sk, delivered, lost, &now, &rs); + tcp_cong_control(sk, ack, delivered, flag); tcp_xmit_recovery(sk, rexmit); return 1; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8b45794eb6b2..e02c8ebf3ed4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -918,6 +918,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_mstamp_get(&skb->skb_mstamp); TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; + tcp_rate_skb_sent(sk, skb); if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); @@ -1213,6 +1214,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, tcp_set_skb_tso_segs(skb, mss_now); tcp_set_skb_tso_segs(buff, mss_now); + /* Update delivered info for the new segment */ + TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx; + /* If this packet has been sent out already, we must * adjust the various packet counters. */ diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c new file mode 100644 index 000000000000..1daed6af6e80 --- /dev/null +++ b/net/ipv4/tcp_rate.c @@ -0,0 +1,149 @@ +#include + +/* The bandwidth estimator estimates the rate at which the network + * can currently deliver outbound data packets for this flow. At a high + * level, it operates by taking a delivery rate sample for each ACK. + * + * A rate sample records the rate at which the network delivered packets + * for this flow, calculated over the time interval between the transmission + * of a data packet and the acknowledgment of that packet. + * + * Specifically, over the interval between each transmit and corresponding ACK, + * the estimator generates a delivery rate sample. Typically it uses the rate + * at which packets were acknowledged. However, the approach of using only the + * acknowledgment rate faces a challenge under the prevalent ACK decimation or + * compression: packets can temporarily appear to be delivered much quicker + * than the bottleneck rate. Since it is physically impossible to do that in a + * sustained fashion, when the estimator notices that the ACK rate is faster + * than the transmit rate, it uses the latter: + * + * send_rate = #pkts_delivered/(last_snd_time - first_snd_time) + * ack_rate = #pkts_delivered/(last_ack_time - first_ack_time) + * bw = min(send_rate, ack_rate) + * + * Notice the estimator essentially estimates the goodput, not always the + * network bottleneck link rate when the sending or receiving is limited by + * other factors like applications or receiver window limits. The estimator + * deliberately avoids using the inter-packet spacing approach because that + * approach requires a large number of samples and sophisticated filtering. + */ + + +/* Snapshot the current delivery information in the skb, to generate + * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). + */ +void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* In general we need to start delivery rate samples from the + * time we received the most recent ACK, to ensure we include + * the full time the network needs to deliver all in-flight + * packets. If there are no packets in flight yet, then we + * know that any ACKs after now indicate that the network was + * able to deliver those packets completely in the sampling + * interval between now and the next ACK. + * + * Note that we use packets_out instead of tcp_packets_in_flight(tp) + * because the latter is a guess based on RTO and loss-marking + * heuristics. We don't want spurious RTOs or loss markings to cause + * a spuriously small time interval, causing a spuriously high + * bandwidth estimate. + */ + if (!tp->packets_out) { + tp->first_tx_mstamp = skb->skb_mstamp; + tp->delivered_mstamp = skb->skb_mstamp; + } + + TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; + TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; + TCP_SKB_CB(skb)->tx.delivered = tp->delivered; +} + +/* When an skb is sacked or acked, we fill in the rate sample with the (prior) + * delivery information when the skb was last transmitted. + * + * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is + * called multiple times. We favor the information from the most recently + * sent skb, i.e., the skb with the highest prior_delivered count. + */ +void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, + struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + + if (!scb->tx.delivered_mstamp.v64) + return; + + if (!rs->prior_delivered || + after(scb->tx.delivered, rs->prior_delivered)) { + rs->prior_delivered = scb->tx.delivered; + rs->prior_mstamp = scb->tx.delivered_mstamp; + rs->is_retrans = scb->sacked & TCPCB_RETRANS; + + /* Find the duration of the "send phase" of this window: */ + rs->interval_us = skb_mstamp_us_delta( + &skb->skb_mstamp, + &scb->tx.first_tx_mstamp); + + /* Record send time of most recently ACKed packet: */ + tp->first_tx_mstamp = skb->skb_mstamp; + } + /* Mark off the skb delivered once it's sacked to avoid being + * used again when it's cumulatively acked. For acked packets + * we don't need to reset since it'll be freed soon. + */ + if (scb->sacked & TCPCB_SACKED_ACKED) + scb->tx.delivered_mstamp.v64 = 0; +} + +/* Update the connection delivery information and generate a rate sample. */ +void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, + struct skb_mstamp *now, struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 snd_us, ack_us; + + /* TODO: there are multiple places throughout tcp_ack() to get + * current time. Refactor the code using a new "tcp_acktag_state" + * to carry current time, flags, stats like "tcp_sacktag_state". + */ + if (delivered) + tp->delivered_mstamp = *now; + + rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ + rs->losses = lost; /* freshly marked lost */ + /* Return an invalid sample if no timing information is available. */ + if (!rs->prior_mstamp.v64) { + rs->delivered = -1; + rs->interval_us = -1; + return; + } + rs->delivered = tp->delivered - rs->prior_delivered; + + /* Model sending data and receiving ACKs as separate pipeline phases + * for a window. Usually the ACK phase is longer, but with ACK + * compression the send phase can be longer. To be safe we use the + * longer phase. + */ + snd_us = rs->interval_us; /* send phase */ + ack_us = skb_mstamp_us_delta(now, &rs->prior_mstamp); /* ack phase */ + rs->interval_us = max(snd_us, ack_us); + + /* Normally we expect interval_us >= min-rtt. + * Note that rate may still be over-estimated when a spuriously + * retransmistted skb was first (s)acked because "interval_us" + * is under-estimated (up to an RTT). However continuously + * measuring the delivery rate during loss recovery is crucial + * for connections suffer heavy or prolonged losses. + */ + if (unlikely(rs->interval_us < tcp_min_rtt(tp))) { + rs->interval_us = -1; + if (!rs->is_retrans) + pr_debug("tcp rate: %ld %d %u %u %u\n", + rs->interval_us, rs->delivered, + inet_csk(sk)->icsk_ca_state, + tp->rx_opt.sack_ok, tcp_min_rtt(tp)); + } +} -- cgit v1.2.3 From d7722e8570fc0f1e003cee7cf37694041828918b Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Mon, 19 Sep 2016 23:39:15 -0400 Subject: tcp: track application-limited rate samples This commit adds code to track whether the delivery rate represented by each rate_sample was limited by the application. Upon each transmit, we store in the is_app_limited field in the skb a boolean bit indicating whether there is a known "bubble in the pipe": a point in the rate sample interval where the sender was application-limited, and did not transmit even though the cwnd and pacing rate allowed it. This logic marks the flow app-limited on a write if *all* of the following are true: 1) There is less than 1 MSS of unsent data in the write queue available to transmit. 2) There is no packet in the sender's queues (e.g. in fq or the NIC tx queue). 3) The connection is not limited by cwnd. 4) There are no lost packets to retransmit. The tcp_rate_check_app_limited() code in tcp_rate.c determines whether the connection is application-limited at the moment. If the flow is application-limited, it sets the tp->app_limited field. If the flow is application-limited then that means there is effectively a "bubble" of silence in the pipe now, and this silence will be reflected in a lower bandwidth sample for any rate samples from now until we get an ACK indicating this bubble has exited the pipe: specifically, until we get an ACK for the next packet we transmit. When we send every skb we record in scb->tx.is_app_limited whether the resulting rate sample will be application-limited. The code in tcp_rate_gen() checks to see when it is safe to mark all known application-limited bubbles of silence as having exited the pipe. It does this by checking to see when the delivered count moves past the tp->app_limited marker. At this point it zeroes the tp->app_limited marker, as all known bubbles are out of the pipe. We make room for the tx.is_app_limited bit in the skb by borrowing a bit from the in_flight field used by NV to record the number of bytes in flight. The receive window in the TCP header is 16 bits, and the max receive window scaling shift factor is 14 (RFC 1323). So the max receive window offered by the TCP protocol is 2^(16+14) = 2^30. So we only need 30 bits for the tx.in_flight used by NV. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/net/tcp.h | 6 +++++- net/ipv4/tcp.c | 8 ++++++++ net/ipv4/tcp_minisocks.c | 3 +++ net/ipv4/tcp_rate.c | 29 ++++++++++++++++++++++++++++- 5 files changed, 45 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c50e6aec005a..fdcd00ffcb66 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -268,6 +268,7 @@ struct tcp_sock { u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ u32 lost; /* Total data packets lost incl. rexmits */ + u32 app_limited; /* limited until "delivered" reaches this val */ struct skb_mstamp first_tx_mstamp; /* start of window send phase */ struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ diff --git a/include/net/tcp.h b/include/net/tcp.h index b261c892605a..a69ed7f0030c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -764,7 +764,9 @@ struct tcp_skb_cb { union { struct { /* There is space for up to 24 bytes */ - __u32 in_flight;/* Bytes in flight when packet sent */ + __u32 in_flight:30,/* Bytes in flight at transmit */ + is_app_limited:1, /* cwnd not fully used? */ + unused:1; /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ @@ -883,6 +885,7 @@ struct rate_sample { int losses; /* number of packets marked lost upon ACK */ u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ u32 prior_in_flight; /* in flight before this ACK */ + bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ }; @@ -978,6 +981,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, struct skb_mstamp *now, struct rate_sample *rs); +void tcp_rate_check_app_limited(struct sock *sk); /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index de02fb4b1349..2250f891f931 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -396,6 +396,9 @@ void tcp_init_sock(struct sock *sk) */ tp->snd_cwnd = TCP_INIT_CWND; + /* There's a bubble in the pipe until at least the first ACK. */ + tp->app_limited = ~0U; + /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. */ @@ -1014,6 +1017,9 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, flags); lock_sock(sk); + + tcp_rate_check_app_limited(sk); /* is sending application-limited? */ + res = do_tcp_sendpages(sk, page, offset, size, flags); release_sock(sk); return res; @@ -1115,6 +1121,8 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + tcp_rate_check_app_limited(sk); /* is sending application-limited? */ + /* Wait for a connection to finish. One exception is TCP Fast Open * (passive side) where data is allowed to be sent before a connection * is fully established. diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 568947110b60..6234ebaa7db1 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -487,6 +487,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; + /* There's a bubble in the pipe until at least the first ACK. */ + newtp->app_limited = ~0U; + tcp_init_xmit_timers(newsk); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 1daed6af6e80..52ff84be59ab 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -26,9 +26,13 @@ * other factors like applications or receiver window limits. The estimator * deliberately avoids using the inter-packet spacing approach because that * approach requires a large number of samples and sophisticated filtering. + * + * TCP flows can often be application-limited in request/response workloads. + * The estimator marks a bandwidth sample as application-limited if there + * was some moment during the sampled window of packets when there was no data + * ready to send in the write queue. */ - /* Snapshot the current delivery information in the skb, to generate * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). */ @@ -58,6 +62,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; TCP_SKB_CB(skb)->tx.delivered = tp->delivered; + TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; } /* When an skb is sacked or acked, we fill in the rate sample with the (prior) @@ -80,6 +85,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, after(scb->tx.delivered, rs->prior_delivered)) { rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; + rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; /* Find the duration of the "send phase" of this window: */ @@ -105,6 +111,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; + /* Clear app limited if bubble is acked and gone. */ + if (tp->app_limited && after(tp->delivered, tp->app_limited)) + tp->app_limited = 0; + /* TODO: there are multiple places throughout tcp_ack() to get * current time. Refactor the code using a new "tcp_acktag_state" * to carry current time, flags, stats like "tcp_sacktag_state". @@ -147,3 +157,20 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, tp->rx_opt.sack_ok, tcp_min_rtt(tp)); } } + +/* If a gap is detected between sends, mark the socket application-limited. */ +void tcp_rate_check_app_limited(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (/* We have less than one packet to send. */ + tp->write_seq - tp->snd_nxt < tp->mss_cache && + /* Nothing in sending host's qdisc queues or NIC tx queue. */ + sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) && + /* We are not limited by CWND. */ + tcp_packets_in_flight(tp) < tp->snd_cwnd && + /* All lost packets have been retransmitted. */ + tp->lost_out <= tp->retrans_out) + tp->app_limited = + (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; +} -- cgit v1.2.3 From ed6e7268b930e0a9a65d895d368eac79a438d992 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:17 -0400 Subject: tcp: allow congestion control module to request TSO skb segment count Add the tso_segs_goal() function in tcp_congestion_ops to allow the congestion control module to specify the number of segments that should be in a TSO skb sent by tcp_write_xmit() and tcp_xmit_retransmit_queue(). The congestion control module can either request a particular number of segments in TSO skb that we transmit, or return 0 if it doesn't care. This allows the upcoming BBR congestion control module to select small TSO skb sizes if the module detects that the bottleneck bandwidth is very low, or that the connection is policed to a low rate. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_output.c | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index a69ed7f0030c..f8f581fd05f5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -913,6 +913,8 @@ struct tcp_congestion_ops { u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); + /* suggest number of segments for each skb to transmit (optional) */ + u32 (*tso_segs_goal)(struct sock *sk); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e02c8ebf3ed4..01379567a732 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1566,6 +1566,17 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) return min_t(u32, segs, sk->sk_gso_max_segs); } +/* Return the number of segments we want in the skb we are transmitting. + * See if congestion control module wants to decide; otherwise, autosize. + */ +static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) +{ + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; + + return tso_segs ? : tcp_tso_autosize(sk, mss_now); +} + /* Returns the portion of skb which can be sent right away */ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, @@ -2061,7 +2072,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } } - max_segs = tcp_tso_autosize(sk, mss_now); + max_segs = tcp_tso_segs(sk, mss_now); while ((skb = tcp_send_head(sk))) { unsigned int limit; @@ -2778,7 +2789,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) last_lost = tp->snd_una; } - max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk)); + max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); tcp_for_write_queue_from(skb, sk) { __u8 sacked; int segs; -- cgit v1.2.3 From 1b3878ca1551f3baab2c408d1e703b5ef785a1b2 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:18 -0400 Subject: tcp: export tcp_tso_autosize() and parameterize minimum number of TSO segments To allow congestion control modules to use the default TSO auto-sizing algorithm as one of the ingredients in their own decision about TSO sizing: 1) Export tcp_tso_autosize() so that CC modules can use it. 2) Change tcp_tso_autosize() to allow callers to specify a minimum number of segments per TSO skb, in case the congestion control module has a different notion of the best floor for TSO skbs for the connection right now. For very low-rate paths or policed connections it can be appropriate to use smaller TSO skbs. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_output.c | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index f8f581fd05f5..349204130d84 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -533,6 +533,8 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); #endif /* tcp_output.c */ +u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); bool tcp_may_send_now(struct sock *sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 01379567a732..0bf3d481fa85 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1549,7 +1549,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, /* Return how many segs we'd like on a TSO packet, * to send one TSO packet per ms */ -static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) +u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs) { u32 bytes, segs; @@ -1561,10 +1562,11 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) * This preserves ACK clocking and is consistent * with tcp_tso_should_defer() heuristic. */ - segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs); + segs = max_t(u32, bytes / mss_now, min_tso_segs); return min_t(u32, segs, sk->sk_gso_max_segs); } +EXPORT_SYMBOL(tcp_tso_autosize); /* Return the number of segments we want in the skb we are transmitting. * See if congestion control module wants to decide; otherwise, autosize. @@ -1574,7 +1576,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; - return tso_segs ? : tcp_tso_autosize(sk, mss_now); + return tso_segs ? : + tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs); } /* Returns the portion of skb which can be sent right away */ -- cgit v1.2.3 From 77bfc174c38e558a3425d3b069aa2762b2fedfdd Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:20 -0400 Subject: tcp: allow congestion control to expand send buffer differently Currently the TCP send buffer expands to twice cwnd, in order to allow limited transmits in the CA_Recovery state. This assumes that cwnd does not increase in the CA_Recovery. For some congestion control algorithms, like the upcoming BBR module, if the losses in recovery do not indicate congestion then we may continue to raise cwnd multiplicatively in recovery. In such cases the current multiplier will falsely limit the sending rate, much as if it were limited by the application. This commit adds an optional congestion control callback to use a different multiplier to expand the TCP send buffer. For congestion control modules that do not specificy this callback, TCP continues to use the previous default of 2. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/tcp_input.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 349204130d84..1aa9628ae608 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -917,6 +917,8 @@ struct tcp_congestion_ops { void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); /* suggest number of segments for each skb to transmit (optional) */ u32 (*tso_segs_goal)(struct sock *sk); + /* returns the multiplier used in tcp_sndbuf_expand (optional) */ + u32 (*sndbuf_expand)(struct sock *sk); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d9ed4bb96f74..13a2e70141f5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -289,6 +289,7 @@ static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr static void tcp_sndbuf_expand(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; int sndmem, per_mss; u32 nr_segs; @@ -309,7 +310,8 @@ static void tcp_sndbuf_expand(struct sock *sk) * Cubic needs 1.7 factor, rounded to 2 to include * extra cushion (application might react slowly to POLLOUT) */ - sndmem = 2 * nr_segs * per_mss; + sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2; + sndmem *= nr_segs * per_mss; if (sk->sk_sndbuf < sndmem) sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); -- cgit v1.2.3 From c0402760f565ae066621ebf8720a32fba074d538 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 19 Sep 2016 23:39:21 -0400 Subject: tcp: new CC hook to set sending rate with rate_sample in any CA state This commit introduces an optional new "omnipotent" hook, cong_control(), for congestion control modules. The cong_control() function is called at the end of processing an ACK (i.e., after updating sequence numbers, the SACK scoreboard, and loss detection). At that moment we have precise delivery rate information the congestion control module can use to control the sending behavior (using cwnd, TSO skb size, and pacing rate) in any CA state. This function can also be used by a congestion control that prefers not to use the default cwnd reduction approach (i.e., the PRR algorithm) during CA_Recovery to control the cwnd and sending rate during loss recovery. We take advantage of the fact that recent changes defer the retransmission or transmission of new data (e.g. by F-RTO) in recovery until the new tcp_cong_control() function is run. With this commit, we only run tcp_update_pacing_rate() if the congestion control is not using this new API. New congestion controls which use the new API do not want the TCP stack to run the default pacing rate calculation and overwrite whatever pacing rate they have chosen at initialization time. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++++ net/ipv4/tcp_cong.c | 2 +- net/ipv4/tcp_input.c | 17 ++++++++++++++--- 3 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 1aa9628ae608..f83b7f220a65 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -919,6 +919,10 @@ struct tcp_congestion_ops { u32 (*tso_segs_goal)(struct sock *sk); /* returns the multiplier used in tcp_sndbuf_expand (optional) */ u32 (*sndbuf_expand)(struct sock *sk); + /* call when packets are delivered to update cwnd and pacing rate, + * after all the ca_state processing. (optional) + */ + void (*cong_control)(struct sock *sk, const struct rate_sample *rs); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 882caa4e72bc..1294af4e0127 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -69,7 +69,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) int ret = 0; /* all algorithms must implement ssthresh and cong_avoid ops */ - if (!ca->ssthresh || !ca->cong_avoid) { + if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) { pr_err("%s does not implement required ops\n", ca->name); return -EINVAL; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 13a2e70141f5..980a83edfa63 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2536,6 +2536,9 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + if (inet_csk(sk)->icsk_ca_ops->cong_control) + return; + /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { @@ -3312,8 +3315,15 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) * information. All transmission or retransmission are delayed afterwards. */ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, - int flag) + int flag, const struct rate_sample *rs) { + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->cong_control) { + icsk->icsk_ca_ops->cong_control(sk, rs); + return; + } + if (tcp_in_cwnd_reduction(sk)) { /* Reduce cwnd if state mandates */ tcp_cwnd_reduction(sk, acked_sacked, flag); @@ -3683,7 +3693,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ tcp_rate_gen(sk, delivered, lost, &now, &rs); - tcp_cong_control(sk, ack, delivered, flag); + tcp_cong_control(sk, ack, delivered, flag, &rs); tcp_xmit_recovery(sk, rexmit); return 1; @@ -5982,7 +5992,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) } else tcp_init_metrics(sk); - tcp_update_pacing_rate(sk); + if (!inet_csk(sk)->icsk_ca_ops->cong_control) + tcp_update_pacing_rate(sk); /* Prevent spurious tcp_cwnd_restart() on first data packet */ tp->lsndtime = tcp_time_stamp; -- cgit v1.2.3 From 7e744171386ae6da1248d3d27d10b6dbdc54f0ff Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Sep 2016 23:39:22 -0400 Subject: tcp: increase ICSK_CA_PRIV_SIZE from 64 bytes to 88 The TCP CUBIC module already uses 64 bytes. The upcoming TCP BBR module uses 88 bytes. Signed-off-by: Van Jacobson Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 49dcad4fe99e..197a30d221e9 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -134,8 +134,8 @@ struct inet_connection_sock { } icsk_mtup; u32 icsk_user_timeout; - u64 icsk_ca_priv[64 / sizeof(u64)]; -#define ICSK_CA_PRIV_SIZE (8 * sizeof(u64)) + u64 icsk_ca_priv[88 / sizeof(u64)]; +#define ICSK_CA_PRIV_SIZE (11 * sizeof(u64)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ -- cgit v1.2.3 From 332ae8e2f6ecda5e50c5c62ed62894963e3a83f5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:53 +0100 Subject: net: cls_bpf: add hardware offload This patch adds hardware offload capability to cls_bpf classifier, similar to what have been done with U32 and flower. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ include/net/pkt_cls.h | 14 ++++++++++ net/sched/cls_bpf.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+) (limited to 'include/net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a10d8d18ce19..69f242c71865 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -789,6 +789,7 @@ enum { TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, TC_SETUP_MATCHALL, + TC_SETUP_CLSBPF, }; struct tc_cls_u32_offload; @@ -800,6 +801,7 @@ struct tc_to_netdev { struct tc_cls_u32_offload *cls_u32; struct tc_cls_flower_offload *cls_flower; struct tc_cls_matchall_offload *cls_mall; + struct tc_cls_bpf_offload *cls_bpf; }; }; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a459be5fe1c2..41e8071dff87 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -486,4 +486,18 @@ struct tc_cls_matchall_offload { unsigned long cookie; }; +enum tc_clsbpf_command { + TC_CLSBPF_ADD, + TC_CLSBPF_REPLACE, + TC_CLSBPF_DESTROY, +}; + +struct tc_cls_bpf_offload { + enum tc_clsbpf_command command; + struct tcf_exts *exts; + struct bpf_prog *prog; + const char *name; + bool exts_integrated; +}; + #endif diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index c6f7a47541eb..6523c5b4c0a5 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -39,6 +39,7 @@ struct cls_bpf_prog { struct list_head link; struct tcf_result res; bool exts_integrated; + bool offloaded; struct tcf_exts exts; u32 handle; union { @@ -138,6 +139,71 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) return !prog->bpf_ops; } +static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, + enum tc_clsbpf_command cmd) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_bpf_offload bpf_offload = {}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSBPF; + offload.cls_bpf = &bpf_offload; + + bpf_offload.command = cmd; + bpf_offload.exts = &prog->exts; + bpf_offload.prog = prog->filter; + bpf_offload.name = prog->bpf_name; + bpf_offload.exts_integrated = prog->exts_integrated; + + return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); +} + +static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, + struct cls_bpf_prog *oldprog) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct cls_bpf_prog *obj = prog; + enum tc_clsbpf_command cmd; + + if (oldprog && oldprog->offloaded) { + if (tc_should_offload(dev, tp, 0)) { + cmd = TC_CLSBPF_REPLACE; + } else { + obj = oldprog; + cmd = TC_CLSBPF_DESTROY; + } + } else { + if (!tc_should_offload(dev, tp, 0)) + return; + cmd = TC_CLSBPF_ADD; + } + + if (cls_bpf_offload_cmd(tp, obj, cmd)) + return; + + obj->offloaded = true; + if (oldprog) + oldprog->offloaded = false; +} + +static void cls_bpf_stop_offload(struct tcf_proto *tp, + struct cls_bpf_prog *prog) +{ + int err; + + if (!prog->offloaded) + return; + + err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); + if (err) { + pr_err("Stopping hardware offload failed: %d\n", err); + return; + } + + prog->offloaded = false; +} + static int cls_bpf_init(struct tcf_proto *tp) { struct cls_bpf_head *head; @@ -177,6 +243,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) { struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg; + cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); @@ -193,6 +260,7 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) return false; list_for_each_entry_safe(prog, tmp, &head->plist, link) { + cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); @@ -415,6 +483,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (ret < 0) goto errout; + cls_bpf_offload(tp, prog, oldprog); + if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); -- cgit v1.2.3 From 0d01d45f1b251448590c710baa32f722e43c62c7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:43:54 +0100 Subject: net: cls_bpf: limit hardware offload by software-only flag Add cls_bpf support for the TCA_CLS_FLAGS_SKIP_HW flag. Unlike U32 and flower cls_bpf already has some netlink flags defined. Create a new attribute to be able to use the same flag values as the above. Unlike U32 and flower reject unknown flags. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 1 + include/uapi/linux/pkt_cls.h | 1 + net/sched/cls_bpf.c | 22 ++++++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 41e8071dff87..57af9f3032ff 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -498,6 +498,7 @@ struct tc_cls_bpf_offload { struct bpf_prog *prog; const char *name; bool exts_integrated; + u32 gen_flags; }; #endif diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 8915b61bbf83..8fd715f806a2 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -396,6 +396,7 @@ enum { TCA_BPF_FD, TCA_BPF_NAME, TCA_BPF_FLAGS, + TCA_BPF_FLAGS_GEN, __TCA_BPF_MAX, }; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 6523c5b4c0a5..ebf01f7c1470 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -27,6 +27,8 @@ MODULE_AUTHOR("Daniel Borkmann "); MODULE_DESCRIPTION("TC BPF based classifier"); #define CLS_BPF_NAME_LEN 256 +#define CLS_BPF_SUPPORTED_GEN_FLAGS \ + TCA_CLS_FLAGS_SKIP_HW struct cls_bpf_head { struct list_head plist; @@ -40,6 +42,7 @@ struct cls_bpf_prog { struct tcf_result res; bool exts_integrated; bool offloaded; + u32 gen_flags; struct tcf_exts exts; u32 handle; union { @@ -55,6 +58,7 @@ struct cls_bpf_prog { static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, [TCA_BPF_FLAGS] = { .type = NLA_U32 }, + [TCA_BPF_FLAGS_GEN] = { .type = NLA_U32 }, [TCA_BPF_FD] = { .type = NLA_U32 }, [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, @@ -154,6 +158,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, bpf_offload.prog = prog->filter; bpf_offload.name = prog->bpf_name; bpf_offload.exts_integrated = prog->exts_integrated; + bpf_offload.gen_flags = prog->gen_flags; return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); @@ -167,14 +172,14 @@ static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, enum tc_clsbpf_command cmd; if (oldprog && oldprog->offloaded) { - if (tc_should_offload(dev, tp, 0)) { + if (tc_should_offload(dev, tp, prog->gen_flags)) { cmd = TC_CLSBPF_REPLACE; } else { obj = oldprog; cmd = TC_CLSBPF_DESTROY; } } else { - if (!tc_should_offload(dev, tp, 0)) + if (!tc_should_offload(dev, tp, prog->gen_flags)) return; cmd = TC_CLSBPF_ADD; } @@ -370,6 +375,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, { bool is_bpf, is_ebpf, have_exts = false; struct tcf_exts exts; + u32 gen_flags = 0; int ret; is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; @@ -394,8 +400,17 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; } + if (tb[TCA_BPF_FLAGS_GEN]) { + gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]); + if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS || + !tc_flags_valid(gen_flags)) { + ret = -EINVAL; + goto errout; + } + } prog->exts_integrated = have_exts; + prog->gen_flags = gen_flags; ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : cls_bpf_prog_from_efd(tb, prog, tp); @@ -568,6 +583,9 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT; if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags)) goto nla_put_failure; + if (prog->gen_flags && + nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags)) + goto nla_put_failure; nla_nest_end(skb, nest); -- cgit v1.2.3 From 68d640630d4ef2a4bf3f68b5073dec5e4c4f878b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 21 Sep 2016 11:44:02 +0100 Subject: net: cls_bpf: allow offloaded filters to update stats Call into offloaded filters to update stats. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 1 + net/sched/cls_bpf.c | 11 +++++++++++ 2 files changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 57af9f3032ff..5ccaa4be7d96 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -490,6 +490,7 @@ enum tc_clsbpf_command { TC_CLSBPF_ADD, TC_CLSBPF_REPLACE, TC_CLSBPF_DESTROY, + TC_CLSBPF_STATS, }; struct tc_cls_bpf_offload { diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 1becc2fe1bc5..bb1d5a487081 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -221,6 +221,15 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp, prog->offloaded = false; } +static void cls_bpf_offload_update_stats(struct tcf_proto *tp, + struct cls_bpf_prog *prog) +{ + if (!prog->offloaded) + return; + + cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); +} + static int cls_bpf_init(struct tcf_proto *tp) { struct cls_bpf_head *head; @@ -577,6 +586,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, tm->tcm_handle = prog->handle; + cls_bpf_offload_update_stats(tp, prog); + nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; -- cgit v1.2.3 From e2f036a97271cf5811ee754bf321a29a814577f9 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 21 Sep 2016 08:45:55 -0300 Subject: sctp: rename WORD_TRUNC/ROUND macros To something more meaningful these days, specially because this is working on packet headers or lengths and which are not tied to any CPU arch but to the protocol itself. So, WORD_TRUNC becomes SCTP_TRUNC4 and WORD_ROUND becomes SCTP_PAD4. Reported-by: David Laight Reported-by: David Miller Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 10 +++++----- net/netfilter/xt_sctp.c | 2 +- net/sctp/associola.c | 2 +- net/sctp/chunk.c | 6 +++--- net/sctp/input.c | 8 ++++---- net/sctp/inqueue.c | 2 +- net/sctp/output.c | 12 ++++++------ net/sctp/sm_make_chunk.c | 28 ++++++++++++++-------------- net/sctp/sm_statefuns.c | 6 +++--- net/sctp/transport.c | 4 ++-- net/sctp/ulpevent.c | 4 ++-- 11 files changed, 42 insertions(+), 42 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 632e205ca54b..87a7f42e7639 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -83,9 +83,9 @@ #endif /* Round an int up to the next multiple of 4. */ -#define WORD_ROUND(s) (((s)+3)&~3) +#define SCTP_PAD4(s) (((s)+3)&~3) /* Truncate to the previous multiple of 4. */ -#define WORD_TRUNC(s) ((s)&~3) +#define SCTP_TRUNC4(s) ((s)&~3) /* * Function declarations. @@ -433,7 +433,7 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); - frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); + frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); return frag; } @@ -462,7 +462,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) for (pos.v = chunk->member;\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ - pos.v += WORD_ROUND(ntohs(pos.p->length))) + pos.v += SCTP_PAD4(ntohs(pos.p->length))) #define sctp_walk_errors(err, chunk_hdr)\ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) @@ -472,7 +472,7 @@ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(sctp_chunkhdr_t));\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ - err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length)))) + err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) #define sctp_walk_fwdtsn(pos, chunk)\ _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk)) diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index ef36a56a02c6..4dedb96d1a06 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -68,7 +68,7 @@ match_packet(const struct sk_buff *skb, ++i, offset, sch->type, htons(sch->length), sch->flags); #endif - offset += WORD_ROUND(ntohs(sch->length)); + offset += SCTP_PAD4(ntohs(sch->length)); pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 1c23060c41a6..f10d3397f917 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1408,7 +1408,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) transports) { if (t->pmtu_pending && t->dst) { sctp_transport_update_pmtu(sk, t, - WORD_TRUNC(dst_mtu(t->dst))); + SCTP_TRUNC4(dst_mtu(t->dst))); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index af9cc8055465..76eae828ec89 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -208,8 +208,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc); if (hmac_desc) - max_data -= WORD_ROUND(sizeof(sctp_auth_chunk_t) + - hmac_desc->hmac_len); + max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) + + hmac_desc->hmac_len); } /* Now, check if we need to reduce our max */ @@ -229,7 +229,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, asoc->outqueue.out_qlen == 0 && list_empty(&asoc->outqueue.retransmit) && msg_len > max) - max_data -= WORD_ROUND(sizeof(sctp_sack_chunk_t)); + max_data -= SCTP_PAD4(sizeof(sctp_sack_chunk_t)); /* Encourage Cookie-ECHO bundling. */ if (asoc->state < SCTP_STATE_COOKIE_ECHOED) diff --git a/net/sctp/input.c b/net/sctp/input.c index 69444d32ecda..a1d85065bfc0 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -605,7 +605,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) /* PMTU discovery (RFC1191) */ if (ICMP_FRAG_NEEDED == code) { sctp_icmp_frag_needed(sk, asoc, transport, - WORD_TRUNC(info)); + SCTP_TRUNC4(info)); goto out_unlock; } else { if (ICMP_PROT_UNREACH == code) { @@ -673,7 +673,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb) if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) break; - ch_end = offset + WORD_ROUND(ntohs(ch->length)); + ch_end = offset + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb->len) break; @@ -1121,7 +1121,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) break; - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) break; @@ -1190,7 +1190,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, * that the chunk length doesn't cause overflow. Otherwise, we'll * walk off the end. */ - if (WORD_ROUND(ntohs(ch->length)) > skb->len) + if (SCTP_PAD4(ntohs(ch->length)) > skb->len) return NULL; /* If this is INIT/INIT-ACK look inside the chunk too. */ diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 6437aa97cfd7..f731de3e8428 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -213,7 +213,7 @@ new_skb: } chunk->chunk_hdr = ch; - chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 0c605ec74dc4..2a5c1896d18f 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -297,7 +297,7 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, struct sctp_chunk *chunk) { sctp_xmit_t retval = SCTP_XMIT_OK; - __u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length)); + __u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length)); /* Check to see if this chunk will fit into the packet */ retval = sctp_packet_will_fit(packet, chunk, chunk_len); @@ -508,7 +508,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) if (gso) { pkt_size = packet->overhead; list_for_each_entry(chunk, &packet->chunk_list, list) { - int padded = WORD_ROUND(chunk->skb->len); + int padded = SCTP_PAD4(chunk->skb->len); if (pkt_size + padded > tp->pathmtu) break; @@ -538,7 +538,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * included in the chunk length field. The sender should * never pad with more than 3 bytes. * - * [This whole comment explains WORD_ROUND() below.] + * [This whole comment explains SCTP_PAD4() below.] */ pkt_size -= packet->overhead; @@ -560,7 +560,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) has_data = 1; } - padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len; + padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len; if (padding) memset(skb_put(chunk->skb, padding), 0, padding); @@ -587,7 +587,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * acknowledged or have failed. * Re-queue auth chunks if needed. */ - pkt_size -= WORD_ROUND(chunk->skb->len); + pkt_size -= SCTP_PAD4(chunk->skb->len); if (!sctp_chunk_is_data(chunk) && chunk != packet->auth) sctp_chunk_free(chunk); @@ -911,7 +911,7 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, */ maxsize = pmtu - packet->overhead; if (packet->auth) - maxsize -= WORD_ROUND(packet->auth->skb->len); + maxsize -= SCTP_PAD4(packet->auth->skb->len); if (chunk_len > maxsize) retval = SCTP_XMIT_PMTU_FULL; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 8c77b87a8565..79dd66079dd7 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -253,7 +253,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, num_types = sp->pf->supported_addrs(sp, types); chunksize = sizeof(init) + addrs_len; - chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types)); + chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); if (asoc->prsctp_enable) @@ -283,14 +283,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* Add HMACS parameter length if any were defined */ auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); + chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; /* Add CHUNKS parameter length */ auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += WORD_ROUND(ntohs(auth_chunks->length)); + chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -300,8 +300,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* If we have any extensions to report, account for that */ if (num_ext) - chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + + num_ext); /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -443,13 +443,13 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); + chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += WORD_ROUND(ntohs(auth_chunks->length)); + chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -458,8 +458,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, } if (num_ext) - chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + + num_ext); /* Now allocate and fill out the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp); @@ -1390,7 +1390,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, struct sock *sk; /* No need to allocate LL here, as this is only a chunk. */ - skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp); + skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp); if (!skb) goto nodata; @@ -1482,7 +1482,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) void *target; void *padding; int chunklen = ntohs(chunk->chunk_hdr->length); - int padlen = WORD_ROUND(chunklen) - chunklen; + int padlen = SCTP_PAD4(chunklen) - chunklen; padding = skb_put(chunk->skb, padlen); target = skb_put(chunk->skb, len); @@ -1900,7 +1900,7 @@ static int sctp_process_missing_param(const struct sctp_association *asoc, struct __sctp_missing report; __u16 len; - len = WORD_ROUND(sizeof(report)); + len = SCTP_PAD4(sizeof(report)); /* Make an ERROR chunk, preparing enough room for * returning multiple unknown parameters. @@ -2098,9 +2098,9 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, if (*errp) { if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, - WORD_ROUND(ntohs(param.p->length)))) + SCTP_PAD4(ntohs(param.p->length)))) sctp_addto_chunk_fixed(*errp, - WORD_ROUND(ntohs(param.p->length)), + SCTP_PAD4(ntohs(param.p->length)), param.v); } else { /* If there is no memory for generating the ERROR diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index d88bb2b0b699..026e3bca4a94 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3454,7 +3454,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, } /* Report violation if chunk len overflows */ - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -4185,7 +4185,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length)), + SCTP_PAD4(ntohs(hdr->length)), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, @@ -4203,7 +4203,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length)), + SCTP_PAD4(ntohs(hdr->length)), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 81b86678be4d..ce54dce13ddb 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -233,7 +233,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) } if (transport->dst) { - transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); + transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst)); } else transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } @@ -287,7 +287,7 @@ void sctp_transport_route(struct sctp_transport *transport, return; } if (transport->dst) { - transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); + transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst)); /* Initialize sk->sk_rcv_saddr, if the transport is the * association's active path for getsockname(). diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index d85b803da11d..bea00058ce35 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -383,7 +383,7 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc, ch = (sctp_errhdr_t *)(chunk->skb->data); cause = ch->cause; - elen = WORD_ROUND(ntohs(ch->length)) - sizeof(sctp_errhdr_t); + elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t); /* Pull off the ERROR header. */ skb_pull(chunk->skb, sizeof(sctp_errhdr_t)); @@ -688,7 +688,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, * MUST ignore the padding bytes. */ len = ntohs(chunk->chunk_hdr->length); - padding = WORD_ROUND(len) - len; + padding = SCTP_PAD4(len) - len; /* Fixup cloned skb with just this chunks data. */ skb_trim(skb, chunk->chunk_end - padding - skb->data); -- cgit v1.2.3 From 36b701fae12ac763a568037e4e7c96b5727a8b3e Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana Date: Wed, 14 Sep 2016 15:00:02 +0200 Subject: netfilter: nf_tables: validate maximum value of u32 netlink attributes Fetch value and validate u32 netlink attribute. This validation is usually required when the u32 netlink attributes are being stored in a field whose size is smaller. This patch revisits 4da449ae1df9 ("netfilter: nft_exthdr: Add size check on u8 nft_exthdr attributes"). Fixes: 96518518cc41 ("netfilter: add nftables") Suggested-by: Pablo Neira Ayuso Signed-off-by: Laura Garcia Liebana Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 25 +++++++++++++++++++++++++ net/netfilter/nft_bitwise.c | 8 +++++++- net/netfilter/nft_byteorder.c | 15 +++++++++++++-- net/netfilter/nft_cmp.c | 3 +++ net/netfilter/nft_exthdr.c | 12 +++++++----- net/netfilter/nft_immediate.c | 4 ++++ 7 files changed, 60 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a7a7cebc8d07..5031e072567b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -145,6 +145,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } +unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index bd9715e5ff26..b70d3ea1430e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4409,6 +4409,31 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, return 0; } +/** + * nft_parse_u32_check - fetch u32 attribute and check for maximum value + * + * @attr: netlink attribute to fetch value from + * @max: maximum value to be stored in dest + * @dest: pointer to the variable + * + * Parse, check and store a given u32 netlink attribute into variable. + * This function returns -ERANGE if the value goes over maximum value. + * Otherwise a 0 is returned and the attribute value is stored in the + * destination variable. + */ +unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) +{ + int val; + + val = ntohl(nla_get_be32(attr)); + if (val > max) + return -ERANGE; + + *dest = val; + return 0; +} +EXPORT_SYMBOL_GPL(nft_parse_u32_check); + /** * nft_parse_register - parse a register value from a netlink attribute * diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index d71cc18fa35d..31c15ed2e5fc 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -52,6 +52,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, { struct nft_bitwise *priv = nft_expr_priv(expr); struct nft_data_desc d1, d2; + u32 len; int err; if (tb[NFTA_BITWISE_SREG] == NULL || @@ -61,7 +62,12 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, tb[NFTA_BITWISE_XOR] == NULL) return -EINVAL; - priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN])); + err = nft_parse_u32_check(tb[NFTA_BITWISE_LEN], U8_MAX, &len); + if (err < 0) + return err; + + priv->len = len; + priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); err = nft_validate_register_load(priv->sreg, priv->len); if (err < 0) diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index b78c28ba465f..ee63d981268d 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -99,6 +99,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_byteorder *priv = nft_expr_priv(expr); + u32 size, len; int err; if (tb[NFTA_BYTEORDER_SREG] == NULL || @@ -117,7 +118,12 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, return -EINVAL; } - priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE])); + err = nft_parse_u32_check(tb[NFTA_BYTEORDER_SIZE], U8_MAX, &size); + if (err < 0) + return err; + + priv->size = size; + switch (priv->size) { case 2: case 4: @@ -128,7 +134,12 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, } priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]); - priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN])); + err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len); + if (err < 0) + return err; + + priv->len = len; + err = nft_validate_register_load(priv->sreg, priv->len); if (err < 0) return err; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index e25b35d70e4d..2e53739812b1 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -84,6 +84,9 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; + if (desc.len > U8_MAX) + return -ERANGE; + priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); priv->len = desc.len; return 0; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 82c264e40278..a84cf3d66056 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -59,7 +59,7 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); - u32 offset, len; + u32 offset, len, err; if (tb[NFTA_EXTHDR_DREG] == NULL || tb[NFTA_EXTHDR_TYPE] == NULL || @@ -67,11 +67,13 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, tb[NFTA_EXTHDR_LEN] == NULL) return -EINVAL; - offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); - len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); + err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset); + if (err < 0) + return err; - if (offset > U8_MAX || len > U8_MAX) - return -ERANGE; + err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len); + if (err < 0) + return err; priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index db3b746858e3..d17018ff54e6 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -53,6 +53,10 @@ static int nft_immediate_init(const struct nft_ctx *ctx, tb[NFTA_IMMEDIATE_DATA]); if (err < 0) return err; + + if (desc.len > U8_MAX) + return -ERANGE; + priv->dlen = desc.len; priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); -- cgit v1.2.3 From 2462f3f4a7e079192b78f36900c34f18dad824a7 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Thu, 15 Sep 2016 20:50:16 +0800 Subject: netfilter: nf_queue: improve queue range support for bridge family After commit ac2863445686 ("netfilter: bridge: add nf_afinfo to enable queuing to userspace"), we can queue packets to the user space in bridge family. But when the user specify the queue range, packets will be only delivered to the first queue num. Because in nfqueue_hash, we only support ipv4 and ipv6 family. Now add support for bridge family too. Suggested-by: Pablo Neira Ayuso Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 56 ++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index cc8a11f7e306..903dca050fb6 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -41,22 +41,19 @@ static inline void init_hashrandom(u32 *jhash_initval) *jhash_initval = prandom_u32(); } -static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval) +static inline u32 hash_v4(const struct iphdr *iph, u32 initval) { - const struct iphdr *iph = ip_hdr(skb); - /* packets in either direction go into same queue */ if ((__force u32)iph->saddr < (__force u32)iph->daddr) return jhash_3words((__force u32)iph->saddr, - (__force u32)iph->daddr, iph->protocol, jhash_initval); + (__force u32)iph->daddr, iph->protocol, initval); return jhash_3words((__force u32)iph->daddr, - (__force u32)iph->saddr, iph->protocol, jhash_initval); + (__force u32)iph->saddr, iph->protocol, initval); } -static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) +static inline u32 hash_v6(const struct ipv6hdr *ip6h, u32 initval) { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); u32 a, b, c; if ((__force u32)ip6h->saddr.s6_addr32[3] < @@ -74,17 +71,50 @@ static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) else c = (__force u32) ip6h->daddr.s6_addr32[1]; - return jhash_3words(a, b, c, jhash_initval); + return jhash_3words(a, b, c, initval); +} + +static inline u32 hash_bridge(const struct sk_buff *skb, u32 initval) +{ + struct ipv6hdr *ip6h, _ip6h; + struct iphdr *iph, _iph; + + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + iph = skb_header_pointer(skb, skb_network_offset(skb), + sizeof(*iph), &_iph); + if (iph) + return hash_v4(iph, initval); + break; + case htons(ETH_P_IPV6): + ip6h = skb_header_pointer(skb, skb_network_offset(skb), + sizeof(*ip6h), &_ip6h); + if (ip6h) + return hash_v6(ip6h, initval); + break; + } + + return 0; } static inline u32 nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, - u32 jhash_initval) + u32 initval) { - if (family == NFPROTO_IPV4) - queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32; - else if (family == NFPROTO_IPV6) - queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32; + switch (family) { + case NFPROTO_IPV4: + queue += reciprocal_scale(hash_v4(ip_hdr(skb), initval), + queues_total); + break; + case NFPROTO_IPV6: + queue += reciprocal_scale(hash_v6(ipv6_hdr(skb), initval), + queues_total); + break; + case NFPROTO_BRIDGE: + queue += reciprocal_scale(hash_bridge(skb, initval), + queues_total); + break; + } return queue; } -- cgit v1.2.3 From 182691d0998400f35ad304718024e60feaa864aa Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 20 Sep 2016 18:19:14 -0300 Subject: sctp: improve how SSN, TSN and ASCONF serial are compared Make it similar to time_before() macros: - easier to understand - make use of typecheck() to avoid working on unexpected variable types (made the issue on previous patch visible) - for _[lg]te versions, slighly faster, as the compiler used to generate a sequence of cmp/je/cmp/js instructions and now it's sub/test/jle (for _lte): Before, for sctp_outq_sack: if (primary->cacc.changeover_active) { 1f01: 80 b9 84 02 00 00 00 cmpb $0x0,0x284(%rcx) 1f08: 74 6e je 1f78 u8 clear_cycling = 0; if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { 1f0a: 8b 81 80 02 00 00 mov 0x280(%rcx),%eax return ((s) - (t)) & TSN_SIGN_BIT; } static inline int TSN_lte(__u32 s, __u32 t) { return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT); 1f10: 8b 7d bc mov -0x44(%rbp),%edi 1f13: 39 c7 cmp %eax,%edi 1f15: 74 25 je 1f3c 1f17: 39 f8 cmp %edi,%eax 1f19: 78 21 js 1f3c primary->cacc.changeover_active = 0; After: if (primary->cacc.changeover_active) { 1ee7: 80 b9 84 02 00 00 00 cmpb $0x0,0x284(%rcx) 1eee: 74 73 je 1f63 u8 clear_cycling = 0; if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { 1ef0: 8b 81 80 02 00 00 mov 0x280(%rcx),%eax 1ef6: 2b 45 b4 sub -0x4c(%rbp),%eax 1ef9: 85 c0 test %eax,%eax 1efb: 7e 26 jle 1f23 primary->cacc.changeover_active = 0; *_lt() generated pretty much the same code. Tested with gcc (GCC) 6.1.1 20160621. This patch also removes SSN_lte as it is not used and cleanups some comments. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sm.h | 94 ++++++++++----------------------------------------- 1 file changed, 18 insertions(+), 76 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index bafe2a0ab908..ca6c971dd74a 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -307,85 +307,27 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) } /* Compare two TSNs */ +#define TSN_lt(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((a) - (b)) < 0)) -/* RFC 1982 - Serial Number Arithmetic - * - * 2. Comparison - * Then, s1 is said to be equal to s2 if and only if i1 is equal to i2, - * in all other cases, s1 is not equal to s2. - * - * s1 is said to be less than s2 if, and only if, s1 is not equal to s2, - * and - * - * (i1 < i2 and i2 - i1 < 2^(SERIAL_BITS - 1)) or - * (i1 > i2 and i1 - i2 > 2^(SERIAL_BITS - 1)) - * - * s1 is said to be greater than s2 if, and only if, s1 is not equal to - * s2, and - * - * (i1 < i2 and i2 - i1 > 2^(SERIAL_BITS - 1)) or - * (i1 > i2 and i1 - i2 < 2^(SERIAL_BITS - 1)) - */ - -/* - * RFC 2960 - * 1.6 Serial Number Arithmetic - * - * Comparisons and arithmetic on TSNs in this document SHOULD use Serial - * Number Arithmetic as defined in [RFC1982] where SERIAL_BITS = 32. - */ - -enum { - TSN_SIGN_BIT = (1<<31) -}; - -static inline int TSN_lt(__u32 s, __u32 t) -{ - return ((s) - (t)) & TSN_SIGN_BIT; -} - -static inline int TSN_lte(__u32 s, __u32 t) -{ - return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT); -} +#define TSN_lte(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((a) - (b)) <= 0)) /* Compare two SSNs */ - -/* - * RFC 2960 - * 1.6 Serial Number Arithmetic - * - * Comparisons and arithmetic on Stream Sequence Numbers in this document - * SHOULD use Serial Number Arithmetic as defined in [RFC1982] where - * SERIAL_BITS = 16. - */ -enum { - SSN_SIGN_BIT = (1<<15) -}; - -static inline int SSN_lt(__u16 s, __u16 t) -{ - return ((s) - (t)) & SSN_SIGN_BIT; -} - -static inline int SSN_lte(__u16 s, __u16 t) -{ - return ((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT); -} - -/* - * ADDIP 3.1.1 - * The valid range of Serial Number is from 0 to 4294967295 (2**32 - 1). Serial - * Numbers wrap back to 0 after reaching 4294967295. - */ -enum { - ADDIP_SERIAL_SIGN_BIT = (1<<31) -}; - -static inline int ADDIP_SERIAL_gte(__u32 s, __u32 t) -{ - return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT); -} +#define SSN_lt(a,b) \ + (typecheck(__u16, a) && \ + typecheck(__u16, b) && \ + ((__s16)((a) - (b)) < 0)) + +/* ADDIP 3.1.1 */ +#define ADDIP_SERIAL_gte(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((b) - (a)) <= 0)) /* Check VTAG of the packet matches the sender's own tag. */ static inline int -- cgit v1.2.3 From 53e89941ba2a969c483aa29b907de9a823179297 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 22 Sep 2016 20:01:41 +0300 Subject: net_sched: act_vlan: add helper inlines to access tcf_vlan info Needed e.g for offloading drivers to pick the relevant attributes. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/net/tc_act/tc_vlan.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/net') diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index 6b835889ea30..48cca321ee6c 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -11,6 +11,7 @@ #define __NET_TC_VLAN_H #include +#include #define VLAN_F_POP 0x1 #define VLAN_F_PUSH 0x2 @@ -24,4 +25,28 @@ struct tcf_vlan { }; #define to_vlan(a) ((struct tcf_vlan *)a) +static inline bool is_tcf_vlan(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_VLAN) + return true; +#endif + return false; +} + +static inline u32 tcf_vlan_action(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_action; +} + +static inline u16 tcf_vlan_push_vid(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_push_vid; +} + +static inline __be16 tcf_vlan_push_proto(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_push_proto; +} + #endif /* __NET_TC_VLAN_H */ -- cgit v1.2.3 From 732f794c1baf58e1eb2be4431635829c3da655bd Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 22 Sep 2016 16:49:22 -0400 Subject: net: dsa: add port fast ageing Today the DSA drivers are in charge of flushing the MAC addresses associated to a port when its STP state changes from Learning or Forwarding, to Disabled or Blocking or Listening. This makes the drivers more complex and hides the generic switch logic. Introduce a new optional port_fast_age operation to dsa_switch_ops, to move this logic to the DSA layer and keep drivers simple. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 2 ++ net/dsa/slave.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 7556646db2d3..b122196d5a1f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -143,6 +143,7 @@ struct dsa_port { struct net_device *netdev; struct device_node *dn; unsigned int ageing_time; + u8 stp_state; }; struct dsa_switch { @@ -339,6 +340,7 @@ struct dsa_switch_ops { void (*port_bridge_leave)(struct dsa_switch *ds, int port); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); + void (*port_fast_age)(struct dsa_switch *ds, int port); /* * VLAN support diff --git a/net/dsa/slave.c b/net/dsa/slave.c index fd78d4c9b197..6b1282c006b1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -71,8 +71,26 @@ static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) static void dsa_port_set_stp_state(struct dsa_switch *ds, int port, u8 state) { + struct dsa_port *dp = &ds->ports[port]; + if (ds->ops->port_stp_state_set) ds->ops->port_stp_state_set(ds, port, state); + + if (ds->ops->port_fast_age) { + /* Fast age FDB entries or flush appropriate forwarding database + * for the given port, if we are moving it from Learning or + * Forwarding state, to Disabled or Blocking or Listening state. + */ + + if ((dp->stp_state == BR_STATE_LEARNING || + dp->stp_state == BR_STATE_FORWARDING) && + (state == BR_STATE_DISABLED || + state == BR_STATE_BLOCKING || + state == BR_STATE_LISTENING)) + ds->ops->port_fast_age(ds, port); + } + + dp->stp_state = state; } static int dsa_slave_open(struct net_device *dev) -- cgit v1.2.3 From c5136b15ea364124299c8a9ba96b300e96061e3a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Sep 2016 11:35:01 -0400 Subject: netfilter: bridge: add and use br_nf_hook_thresh This replaces the last uses of NF_HOOK_THRESH(). Followup patch will remove it and rename nf_hook_thresh. The reason is that inet (non-bridge) netfilter no longer invokes the hooks from hooks, so we do no longer need the thresh value to skip hooks with a lower priority. The bridge netfilter however may need to do this. br_nf_hook_thresh is a wrapper that is supposed to do this, i.e. only call hooks with a priority that exceeds NF_BR_PRI_BRNF. It's used only in the recursion cases of br_netfilter. It invokes nf_hook_slow while holding an rcu read-side critical section to make a future cleanup simpler. Signed-off-by: Florian Westphal Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/br_netfilter.h | 6 ++++ net/bridge/br_netfilter_hooks.c | 60 ++++++++++++++++++++++++++++++------ net/bridge/br_netfilter_ipv6.c | 12 +++----- 3 files changed, 62 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index e8d1448425a7..0b0c35c37125 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -15,6 +15,12 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) void nf_bridge_update_protocol(struct sk_buff *skb); +int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk, + struct sk_buff *skb, struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, + struct sk_buff *)); + static inline struct nf_bridge_info * nf_bridge_info_get(const struct sk_buff *skb) { diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 77e7f69bf80d..6029af47377d 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -395,11 +396,10 @@ bridged_dnat: skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, - NF_BR_PRE_ROUTING, - net, sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish_bridge, - 1); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, + net, sk, skb, skb->dev, + NULL, + br_nf_pre_routing_finish); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); @@ -417,10 +417,8 @@ bridged_dnat: skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb, - skb->dev, NULL, - br_handle_frame_finish, 1); - + br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, + br_handle_frame_finish); return 0; } @@ -992,6 +990,50 @@ static struct notifier_block brnf_notifier __read_mostly = { .notifier_call = brnf_device_event, }; +/* recursively invokes nf_hook_slow (again), skipping already-called + * hooks (< NF_BR_PRI_BRNF). + * + * Called with rcu read lock held. + */ +int br_nf_hook_thresh(unsigned int hook, struct net *net, + struct sock *sk, struct sk_buff *skb, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, + struct sk_buff *)) +{ + struct nf_hook_ops *elem; + struct nf_hook_state state; + struct list_head *head; + int ret; + + head = &net->nf.hooks[NFPROTO_BRIDGE][hook]; + + list_for_each_entry_rcu(elem, head, list) { + struct nf_hook_ops *next; + + next = list_entry_rcu(list_next_rcu(&elem->list), + struct nf_hook_ops, list); + if (next->priority <= NF_BR_PRI_BRNF) + continue; + } + + if (&elem->list == head) + return okfn(net, sk, skb); + + /* We may already have this, but read-locks nest anyway */ + rcu_read_lock(); + nf_hook_state_init(&state, head, hook, NF_BR_PRI_BRNF + 1, + NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); + + ret = nf_hook_slow(skb, &state); + rcu_read_unlock(); + if (ret == 1) + ret = okfn(net, sk, skb); + + return ret; +} + #ifdef CONFIG_SYSCTL static int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 5e59a8457e7b..5989661c659f 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -187,10 +187,9 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - net, sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish_bridge, - 1); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, + net, sk, skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); @@ -207,9 +206,8 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb, - skb->dev, NULL, - br_handle_frame_finish, 1); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, + skb->dev, NULL, br_handle_frame_finish); return 0; } -- cgit v1.2.3 From 54f17bbc52f71e2d313721046627c383d6c5c7da Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 21 Sep 2016 11:35:06 -0400 Subject: netfilter: nf_queue: whitespace cleanup A future patch will modify the hook drop and outfn functions. This will cause the line lengths to take up too much space. This is simply a readability change. Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 903dca050fb6..8fe85b98b5c8 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -22,10 +22,10 @@ struct nf_queue_entry { /* Packet queuing */ struct nf_queue_handler { - int (*outfn)(struct nf_queue_entry *entry, - unsigned int queuenum); - void (*nf_hook_drop)(struct net *net, - struct nf_hook_ops *ops); + int (*outfn)(struct nf_queue_entry *entry, + unsigned int queuenum); + void (*nf_hook_drop)(struct net *net, + struct nf_hook_ops *ops); }; void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); -- cgit v1.2.3 From e3b37f11e6e4e6b6f02cc762f182ce233d2c1c9d Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 21 Sep 2016 11:35:07 -0400 Subject: netfilter: replace list_head with single linked list The netfilter hook list never uses the prev pointer, and so can be trimmed to be a simple singly-linked list. In addition to having a more light weight structure for hook traversal, struct net becomes 5568 bytes (down from 6400) and struct net_device becomes 2176 bytes (down from 2240). Signed-off-by: Aaron Conole Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice.h | 2 +- include/linux/netfilter.h | 63 +++++++++-------- include/linux/netfilter_ingress.h | 17 +++-- include/net/netfilter/nf_queue.h | 3 +- include/net/netns/netfilter.h | 2 +- net/bridge/br_netfilter_hooks.c | 19 ++--- net/netfilter/core.c | 141 +++++++++++++++++++++++++------------- net/netfilter/nf_internals.h | 10 +-- net/netfilter/nf_queue.c | 18 ++--- net/netfilter/nfnetlink_queue.c | 8 ++- 10 files changed, 167 insertions(+), 116 deletions(-) (limited to 'include/net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 67bb978470dc..41f49f5ab62a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1783,7 +1783,7 @@ struct net_device { #endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS - struct list_head nf_hooks_ingress; + struct nf_hook_entry __rcu *nf_hooks_ingress; #endif unsigned char broadcast[MAX_ADDR_LEN]; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ad444f0b4ed0..44e20dac98a9 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -55,12 +55,34 @@ struct nf_hook_state { struct net_device *out; struct sock *sk; struct net *net; - struct list_head *hook_list; + struct nf_hook_entry __rcu *hook_entries; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; +typedef unsigned int nf_hookfn(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state); +struct nf_hook_ops { + struct list_head list; + + /* User fills in from here down. */ + nf_hookfn *hook; + struct net_device *dev; + void *priv; + u_int8_t pf; + unsigned int hooknum; + /* Hooks are ordered in ascending priority. */ + int priority; +}; + +struct nf_hook_entry { + struct nf_hook_entry __rcu *next; + struct nf_hook_ops ops; + const struct nf_hook_ops *orig_ops; +}; + static inline void nf_hook_state_init(struct nf_hook_state *p, - struct list_head *hook_list, + struct nf_hook_entry *hook_entry, unsigned int hook, int thresh, u_int8_t pf, struct net_device *indev, @@ -76,26 +98,11 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->out = outdev; p->sk = sk; p->net = net; - p->hook_list = hook_list; + RCU_INIT_POINTER(p->hook_entries, hook_entry); p->okfn = okfn; } -typedef unsigned int nf_hookfn(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state); - -struct nf_hook_ops { - struct list_head list; - /* User fills in from here down. */ - nf_hookfn *hook; - struct net_device *dev; - void *priv; - u_int8_t pf; - unsigned int hooknum; - /* Hooks are ordered in ascending priority. */ - int priority; -}; struct nf_sockopt_ops { struct list_head list; @@ -161,7 +168,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, int (*okfn)(struct net *, struct sock *, struct sk_buff *), int thresh) { - struct list_head *hook_list; + struct nf_hook_entry *hook_head; + int ret = 1; #ifdef HAVE_JUMP_LABEL if (__builtin_constant_p(pf) && @@ -170,22 +178,19 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, return 1; #endif - hook_list = &net->nf.hooks[pf][hook]; - - if (!list_empty(hook_list)) { + rcu_read_lock(); + hook_head = rcu_dereference(net->nf.hooks[pf][hook]); + if (hook_head) { struct nf_hook_state state; - int ret; - /* We may already have this, but read-locks nest anyway */ - rcu_read_lock(); - nf_hook_state_init(&state, hook_list, hook, thresh, + nf_hook_state_init(&state, hook_head, hook, thresh, pf, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state); - rcu_read_unlock(); - return ret; } - return 1; + rcu_read_unlock(); + + return ret; } static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 6965ba09eba7..33e37fb41d5d 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -11,23 +11,30 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb) if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) return false; #endif - return !list_empty(&skb->dev->nf_hooks_ingress); + return rcu_access_pointer(skb->dev->nf_hooks_ingress); } /* caller must hold rcu_read_lock */ static inline int nf_hook_ingress(struct sk_buff *skb) { + struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; - nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress, - NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, - skb->dev, NULL, NULL, dev_net(skb->dev), NULL); + /* Must recheck the ingress hook head, in the event it became NULL + * after the check in nf_hook_ingress_active evaluated to true. + */ + if (unlikely(!e)) + return 0; + + nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN, + NFPROTO_NETDEV, skb->dev, NULL, NULL, + dev_net(skb->dev), NULL); return nf_hook_slow(skb, &state); } static inline void nf_hook_ingress_init(struct net_device *dev) { - INIT_LIST_HEAD(&dev->nf_hooks_ingress); + RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); } #else /* CONFIG_NETFILTER_INGRESS */ static inline int nf_hook_ingress_active(struct sk_buff *skb) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 8fe85b98b5c8..2280cfe86c56 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -11,7 +11,6 @@ struct nf_queue_entry { struct sk_buff *skb; unsigned int id; - struct nf_hook_ops *elem; struct nf_hook_state state; u16 size; /* sizeof(entry) + saved route keys */ @@ -25,7 +24,7 @@ struct nf_queue_handler { int (*outfn)(struct nf_queue_entry *entry, unsigned int queuenum); void (*nf_hook_drop)(struct net *net, - struct nf_hook_ops *ops); + const struct nf_hook_entry *hooks); }; void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 36d723579af2..58487b1cc99a 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -16,6 +16,6 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif - struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; + struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; }; #endif diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 6029af47377d..2fe9345c1407 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1002,28 +1002,21 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - struct nf_hook_ops *elem; + struct nf_hook_entry *elem; struct nf_hook_state state; - struct list_head *head; int ret; - head = &net->nf.hooks[NFPROTO_BRIDGE][hook]; + elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); - list_for_each_entry_rcu(elem, head, list) { - struct nf_hook_ops *next; + while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF)) + elem = rcu_dereference(elem->next); - next = list_entry_rcu(list_next_rcu(&elem->list), - struct nf_hook_ops, list); - if (next->priority <= NF_BR_PRI_BRNF) - continue; - } - - if (&elem->list == head) + if (!elem) return okfn(net, sk, skb); /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - nf_hook_state_init(&state, head, hook, NF_BR_PRI_BRNF + 1, + nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 67b74287535d..72fc514ec676 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -61,33 +62,50 @@ EXPORT_SYMBOL(nf_hooks_needed); #endif static DEFINE_MUTEX(nf_hook_mutex); +#define nf_entry_dereference(e) \ + rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex)) -static struct list_head *nf_find_hook_list(struct net *net, - const struct nf_hook_ops *reg) +static struct nf_hook_entry *nf_hook_entry_head(struct net *net, + const struct nf_hook_ops *reg) { - struct list_head *hook_list = NULL; + struct nf_hook_entry *hook_head = NULL; if (reg->pf != NFPROTO_NETDEV) - hook_list = &net->nf.hooks[reg->pf][reg->hooknum]; + hook_head = nf_entry_dereference(net->nf.hooks[reg->pf] + [reg->hooknum]); else if (reg->hooknum == NF_NETDEV_INGRESS) { #ifdef CONFIG_NETFILTER_INGRESS if (reg->dev && dev_net(reg->dev) == net) - hook_list = ®->dev->nf_hooks_ingress; + hook_head = + nf_entry_dereference( + reg->dev->nf_hooks_ingress); #endif } - return hook_list; + return hook_head; } -struct nf_hook_entry { - const struct nf_hook_ops *orig_ops; - struct nf_hook_ops ops; -}; +/* must hold nf_hook_mutex */ +static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg, + struct nf_hook_entry *entry) +{ + switch (reg->pf) { + case NFPROTO_NETDEV: + /* We already checked in nf_register_net_hook() that this is + * used from ingress. + */ + rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry); + break; + default: + rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum], + entry); + break; + } +} int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct list_head *hook_list; + struct nf_hook_entry *hooks_entry; struct nf_hook_entry *entry; - struct nf_hook_ops *elem; if (reg->pf == NFPROTO_NETDEV && (reg->hooknum != NF_NETDEV_INGRESS || @@ -100,19 +118,30 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) entry->orig_ops = reg; entry->ops = *reg; + entry->next = NULL; + + mutex_lock(&nf_hook_mutex); + hooks_entry = nf_hook_entry_head(net, reg); - hook_list = nf_find_hook_list(net, reg); - if (!hook_list) { - kfree(entry); - return -ENOENT; + if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) { + /* This is the case where we need to insert at the head */ + entry->next = hooks_entry; + hooks_entry = NULL; } - mutex_lock(&nf_hook_mutex); - list_for_each_entry(elem, hook_list, list) { - if (reg->priority < elem->priority) - break; + while (hooks_entry && + reg->priority >= hooks_entry->orig_ops->priority && + nf_entry_dereference(hooks_entry->next)) { + hooks_entry = nf_entry_dereference(hooks_entry->next); + } + + if (hooks_entry) { + entry->next = nf_entry_dereference(hooks_entry->next); + rcu_assign_pointer(hooks_entry->next, entry); + } else { + nf_set_hooks_head(net, reg, entry); } - list_add_rcu(&entry->ops.list, elem->list.prev); + mutex_unlock(&nf_hook_mutex); #ifdef CONFIG_NETFILTER_INGRESS if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) @@ -127,24 +156,33 @@ EXPORT_SYMBOL(nf_register_net_hook); void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct list_head *hook_list; - struct nf_hook_entry *entry; - struct nf_hook_ops *elem; - - hook_list = nf_find_hook_list(net, reg); - if (!hook_list) - return; + struct nf_hook_entry *hooks_entry; mutex_lock(&nf_hook_mutex); - list_for_each_entry(elem, hook_list, list) { - entry = container_of(elem, struct nf_hook_entry, ops); - if (entry->orig_ops == reg) { - list_del_rcu(&entry->ops.list); - break; + hooks_entry = nf_hook_entry_head(net, reg); + if (hooks_entry->orig_ops == reg) { + nf_set_hooks_head(net, reg, + nf_entry_dereference(hooks_entry->next)); + goto unlock; + } + while (hooks_entry && nf_entry_dereference(hooks_entry->next)) { + struct nf_hook_entry *next = + nf_entry_dereference(hooks_entry->next); + struct nf_hook_entry *nnext; + + if (next->orig_ops != reg) { + hooks_entry = next; + continue; } + nnext = nf_entry_dereference(next->next); + rcu_assign_pointer(hooks_entry->next, nnext); + hooks_entry = next; + break; } + +unlock: mutex_unlock(&nf_hook_mutex); - if (&elem->list == hook_list) { + if (!hooks_entry) { WARN(1, "nf_unregister_net_hook: hook not found!\n"); return; } @@ -156,10 +194,10 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); - nf_queue_nf_hook_drop(net, &entry->ops); + nf_queue_nf_hook_drop(net, hooks_entry); /* other cpu might still process nfqueue verdict that used reg */ synchronize_net(); - kfree(entry); + kfree(hooks_entry); } EXPORT_SYMBOL(nf_unregister_net_hook); @@ -258,10 +296,9 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) } EXPORT_SYMBOL(nf_unregister_hooks); -unsigned int nf_iterate(struct list_head *head, - struct sk_buff *skb, +unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_ops **elemp) + struct nf_hook_entry **entryp) { unsigned int verdict; @@ -269,20 +306,23 @@ unsigned int nf_iterate(struct list_head *head, * The caller must not block between calls to this * function because of risk of continuing from deleted element. */ - list_for_each_entry_continue_rcu((*elemp), head, list) { - if (state->thresh > (*elemp)->priority) + while (*entryp) { + if (state->thresh > (*entryp)->ops.priority) { + *entryp = rcu_dereference((*entryp)->next); continue; + } /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook((*elemp)->priv, skb, state); + verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)) { NFDEBUG("Evil return from %p(%u).\n", - (*elemp)->hook, state->hook); + (*entryp)->ops.hook, state->hook); + *entryp = rcu_dereference((*entryp)->next); continue; } #endif @@ -290,6 +330,7 @@ repeat: return verdict; goto repeat; } + *entryp = rcu_dereference((*entryp)->next); } return NF_ACCEPT; } @@ -299,13 +340,13 @@ repeat: * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) { - struct nf_hook_ops *elem; + struct nf_hook_entry *entry; unsigned int verdict; int ret = 0; - elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list); + entry = rcu_dereference(state->hook_entries); next_hook: - verdict = nf_iterate(state->hook_list, skb, state, &elem); + verdict = nf_iterate(skb, state, &entry); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { @@ -314,8 +355,10 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - int err = nf_queue(skb, elem, state, - verdict >> NF_VERDICT_QBITS); + int err; + + RCU_INIT_POINTER(state->hook_entries, entry); + err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ESRCH && (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) @@ -442,7 +485,7 @@ static int __net_init netfilter_net_init(struct net *net) for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) { for (h = 0; h < NF_MAX_HOOKS; h++) - INIT_LIST_HEAD(&net->nf.hooks[i][h]); + RCU_INIT_POINTER(net->nf.hooks[i][h], NULL); } #ifdef CONFIG_PROC_FS diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 065522564ac6..e0adb5959342 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -13,13 +13,13 @@ /* core.c */ -unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - struct nf_hook_state *state, struct nf_hook_ops **elemp); +unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry **entryp); /* nf_queue.c */ -int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, - struct nf_hook_state *state, unsigned int queuenum); -void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops); +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + unsigned int queuenum); +void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry); int __init netfilter_queue_init(void); /* nf_log.c */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index b19ad20a705c..96964a0070e1 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -96,14 +96,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); -void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops) +void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) { const struct nf_queue_handler *qh; rcu_read_lock(); qh = rcu_dereference(net->nf.queue_handler); if (qh) - qh->nf_hook_drop(net, ops); + qh->nf_hook_drop(net, entry); rcu_read_unlock(); } @@ -112,7 +112,6 @@ void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops) * through nf_reinject(). */ int nf_queue(struct sk_buff *skb, - struct nf_hook_ops *elem, struct nf_hook_state *state, unsigned int queuenum) { @@ -141,7 +140,6 @@ int nf_queue(struct sk_buff *skb, *entry = (struct nf_queue_entry) { .skb = skb, - .elem = elem, .state = *state, .size = sizeof(*entry) + afinfo->route_key_size, }; @@ -165,11 +163,15 @@ err: void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { + struct nf_hook_entry *hook_entry; struct sk_buff *skb = entry->skb; - struct nf_hook_ops *elem = entry->elem; const struct nf_afinfo *afinfo; + struct nf_hook_ops *elem; int err; + hook_entry = rcu_dereference(entry->state.hook_entries); + elem = &hook_entry->ops; + nf_queue_entry_release_refs(entry); /* Continue traversal iff userspace said ok... */ @@ -186,8 +188,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) if (verdict == NF_ACCEPT) { next_hook: - verdict = nf_iterate(entry->state.hook_list, - skb, &entry->state, &elem); + verdict = nf_iterate(skb, &entry->state, &hook_entry); } switch (verdict & NF_VERDICT_MASK) { @@ -198,7 +199,8 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) local_bh_enable(); break; case NF_QUEUE: - err = nf_queue(skb, elem, &entry->state, + RCU_INIT_POINTER(entry->state.hook_entries, hook_entry); + err = nf_queue(skb, &entry->state, verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ESRCH && diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7caa8b082c41..af832c526048 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -917,12 +917,14 @@ static struct notifier_block nfqnl_dev_notifier = { .notifier_call = nfqnl_rcv_dev_event, }; -static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr) +static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr) { - return entry->elem == (struct nf_hook_ops *)ops_ptr; + return rcu_access_pointer(entry->state.hook_entries) == + (struct nf_hook_entry *)entry_ptr; } -static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook) +static void nfqnl_nf_hook_drop(struct net *net, + const struct nf_hook_entry *hook) { struct nfnl_queue_net *q = nfnl_queue_pernet(net); int i; -- cgit v1.2.3 From 0f3cd9b3697708c86a825ae3cedabf7be6fd3e72 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 23 Sep 2016 15:23:33 +0200 Subject: netfilter: nf_tables: add range expression Inverse ranges != [a,b] are not currently possible because rules are composites of && operations, and we need to express this: data < a || data > b This patch adds a new range expression. Positive ranges can be already through two cmp expressions: cmp(sreg, data, >=) cmp(sreg, data, <=) This new range expression provides an alternative way to express this. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 3 + include/uapi/linux/netfilter/nf_tables.h | 29 +++++++ net/netfilter/Makefile | 3 +- net/netfilter/nf_tables_core.c | 7 +- net/netfilter/nft_range.c | 138 +++++++++++++++++++++++++++++++ 5 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 net/netfilter/nft_range.c (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index a9060dd99db7..00f4f6b1b1ba 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -28,6 +28,9 @@ extern const struct nft_expr_ops nft_cmp_fast_ops; int nft_cmp_module_init(void); void nft_cmp_module_exit(void); +int nft_range_module_init(void); +void nft_range_module_exit(void); + int nft_lookup_module_init(void); void nft_lookup_module_exit(void); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 1cf41dd838b2..c6c4477c136b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -546,6 +546,35 @@ enum nft_cmp_attributes { }; #define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) +/** + * enum nft_range_ops - nf_tables range operator + * + * @NFT_RANGE_EQ: equal + * @NFT_RANGE_NEQ: not equal + */ +enum nft_range_ops { + NFT_RANGE_EQ, + NFT_RANGE_NEQ, +}; + +/** + * enum nft_range_attributes - nf_tables range expression netlink attributes + * + * @NFTA_RANGE_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_RANGE_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_RANGE_FROM_DATA: data range from (NLA_NESTED: nft_data_attributes) + * @NFTA_RANGE_TO_DATA: data range to (NLA_NESTED: nft_data_attributes) + */ +enum nft_range_attributes { + NFTA_RANGE_UNSPEC, + NFTA_RANGE_SREG, + NFTA_RANGE_OP, + NFTA_RANGE_FROM_DATA, + NFTA_RANGE_TO_DATA, + __NFTA_RANGE_MAX +}; +#define NFTA_RANGE_MAX (__NFTA_RANGE_MAX - 1) + enum nft_lookup_flags { NFT_LOOKUP_F_INV = (1 << 0), }; diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0c8581100ac6..c23c3c84416f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -71,8 +71,9 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o # nf_tables nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o -nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o +nf_tables-objs += nft_lookup.o nft_dynset.o obj-$(CONFIG_NF_TABLES) += nf_tables.o obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 67259cefef06..7c94ce0080d5 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -263,8 +263,13 @@ int __init nf_tables_core_module_init(void) if (err < 0) goto err7; - return 0; + err = nft_range_module_init(); + if (err < 0) + goto err8; + return 0; +err8: + nft_dynset_module_exit(); err7: nft_payload_module_exit(); err6: diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c new file mode 100644 index 000000000000..c6d5358482d1 --- /dev/null +++ b/net/netfilter/nft_range.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2016 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_range_expr { + struct nft_data data_from; + struct nft_data data_to; + enum nft_registers sreg:8; + u8 len; + enum nft_range_ops op:8; +}; + +static void nft_range_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_range_expr *priv = nft_expr_priv(expr); + bool mismatch; + int d1, d2; + + d1 = memcmp(®s->data[priv->sreg], &priv->data_from, priv->len); + d2 = memcmp(®s->data[priv->sreg], &priv->data_to, priv->len); + switch (priv->op) { + case NFT_RANGE_EQ: + mismatch = (d1 < 0 || d2 > 0); + break; + case NFT_RANGE_NEQ: + mismatch = (d1 >= 0 && d2 <= 0); + break; + } + + if (mismatch) + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = { + [NFTA_RANGE_SREG] = { .type = NLA_U32 }, + [NFTA_RANGE_OP] = { .type = NLA_U32 }, + [NFTA_RANGE_FROM_DATA] = { .type = NLA_NESTED }, + [NFTA_RANGE_TO_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_range_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc_from, desc_to; + int err; + + err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from), + &desc_from, tb[NFTA_RANGE_FROM_DATA]); + if (err < 0) + return err; + + err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to), + &desc_to, tb[NFTA_RANGE_TO_DATA]); + if (err < 0) + goto err1; + + if (desc_from.len != desc_to.len) { + err = -EINVAL; + goto err2; + } + + priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]); + err = nft_validate_register_load(priv->sreg, desc_from.len); + if (err < 0) + goto err2; + + priv->op = ntohl(nla_get_be32(tb[NFTA_RANGE_OP])); + priv->len = desc_from.len; + return 0; +err2: + nft_data_uninit(&priv->data_to, desc_to.type); +err1: + nft_data_uninit(&priv->data_from, desc_from.type); + return err; +} + +static int nft_range_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_range_expr *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_RANGE_SREG, priv->sreg)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_RANGE_OP, htonl(priv->op))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_RANGE_FROM_DATA, &priv->data_from, + NFT_DATA_VALUE, priv->len) < 0 || + nft_data_dump(skb, NFTA_RANGE_TO_DATA, &priv->data_to, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_range_type; +static const struct nft_expr_ops nft_range_ops = { + .type = &nft_range_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)), + .eval = nft_range_eval, + .init = nft_range_init, + .dump = nft_range_dump, +}; + +static struct nft_expr_type nft_range_type __read_mostly = { + .name = "range", + .ops = &nft_range_ops, + .policy = nft_range_policy, + .maxattr = NFTA_RANGE_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_range_module_init(void) +{ + return nft_register_expr(&nft_range_type); +} + +void nft_range_module_exit(void) +{ + nft_unregister_expr(&nft_range_type); +} -- cgit v1.2.3 From ff107d27761ff4b644c82c209e004ec9c8fbbc22 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 25 Sep 2016 16:35:56 +0800 Subject: netfilter: nft_log: complete NFTA_LOG_FLAGS attr support NFTA_LOG_FLAGS attribute is already supported, but the related NF_LOG_XXX flags are not exposed to the userspace. So we cannot explicitly enable log flags to log uid, tcp sequence, ip options and so on, i.e. such rule "nft add rule filter output log uid" is not supported yet. So move NF_LOG_XXX macro definitions to the uapi/../nf_log.h. In order to keep consistent with other modules, change NF_LOG_MASK to refer to all supported log flags. On the other hand, add a new NF_LOG_DEFAULT_MASK to refer to the original default log flags. Finally, if user specify the unsupported log flags or NFTA_LOG_GROUP and NFTA_LOG_FLAGS are set at the same time, report EINVAL to the userspace. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 11 +++-------- include/uapi/linux/netfilter/nf_log.h | 12 ++++++++++++ net/bridge/netfilter/ebt_log.c | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv4/netfilter/nf_log_arp.c | 2 +- net/ipv4/netfilter/nf_log_ipv4.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 2 +- net/ipv6/netfilter/nf_log_ipv6.c | 4 ++-- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nft_log.c | 9 ++++++++- 10 files changed, 32 insertions(+), 18 deletions(-) create mode 100644 include/uapi/linux/netfilter/nf_log.h (limited to 'include/net') diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index ee07dc8b0a7b..309cd267be4f 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -2,15 +2,10 @@ #define _NF_LOG_H #include +#include -/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will - * disappear once iptables is replaced with pkttables. Please DO NOT use them - * for any new code! */ -#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ -#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ -#define NF_LOG_IPOPT 0x04 /* Log IP options */ -#define NF_LOG_UID 0x08 /* Log UID owning local socket */ -#define NF_LOG_MASK 0x0f +/* Log tcp sequence, tcp options, ip options and uid owning local socket */ +#define NF_LOG_DEFAULT_MASK 0x0f /* This flag indicates that copy_len field in nf_loginfo is set */ #define NF_LOG_F_COPY_LEN 0x1 diff --git a/include/uapi/linux/netfilter/nf_log.h b/include/uapi/linux/netfilter/nf_log.h new file mode 100644 index 000000000000..8be21e02387d --- /dev/null +++ b/include/uapi/linux/netfilter/nf_log.h @@ -0,0 +1,12 @@ +#ifndef _NETFILTER_NF_LOG_H +#define _NETFILTER_NF_LOG_H + +#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ +#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ +#define NF_LOG_IPOPT 0x04 /* Log IP options */ +#define NF_LOG_UID 0x08 /* Log UID owning local socket */ +#define NF_LOG_NFLOG 0x10 /* Unsupported, don't reuse */ +#define NF_LOG_MACDECODE 0x20 /* Decode MAC header */ +#define NF_LOG_MASK 0x2f + +#endif /* _NETFILTER_NF_LOG_H */ diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 152300d164ac..9a11086ba6ff 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -91,7 +91,7 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, if (loginfo->type == NF_LOG_TYPE_LOG) bitmask = loginfo->u.log.logflags; else - bitmask = NF_LOG_MASK; + bitmask = NF_LOG_DEFAULT_MASK; if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f993545a3373..7c00ce90adb8 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -156,7 +156,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = 4, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index 8945c2653814..b24795e2ee6d 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 20f225593a8b..5b571e1b5f15 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -29,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; @@ -46,7 +46,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else - logflags = NF_LOG_MASK; + logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); if (ih == NULL) { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 552fac2f390a..55aacea24396 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -190,7 +190,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = LOGLEVEL_WARNING, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index c1bcf699a23d..f6aee2895fee 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; @@ -52,7 +52,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else - logflags = NF_LOG_MASK; + logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); if (ih == NULL) { diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 7c94ce0080d5..0dd5c695482f 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -34,7 +34,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = LOGLEVEL_WARNING, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 24a73bb26e94..1b01404bb33f 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -58,8 +58,11 @@ static int nft_log_init(const struct nft_ctx *ctx, if (tb[NFTA_LOG_LEVEL] != NULL && tb[NFTA_LOG_GROUP] != NULL) return -EINVAL; - if (tb[NFTA_LOG_GROUP] != NULL) + if (tb[NFTA_LOG_GROUP] != NULL) { li->type = NF_LOG_TYPE_ULOG; + if (tb[NFTA_LOG_FLAGS] != NULL) + return -EINVAL; + } nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { @@ -87,6 +90,10 @@ static int nft_log_init(const struct nft_ctx *ctx, if (tb[NFTA_LOG_FLAGS] != NULL) { li->u.log.logflags = ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS])); + if (li->u.log.logflags & ~NF_LOG_MASK) { + err = -EINVAL; + goto err1; + } } break; case NF_LOG_TYPE_ULOG: -- cgit v1.2.3 From a7c7fbff6a408d00431c705bbe3dfc5f51e3f1c4 Mon Sep 17 00:00:00 2001 From: Purushottam Kushwaha Date: Wed, 14 Sep 2016 17:38:44 +0530 Subject: cfg80211: Add support to configure a beacon data rate This allows an option to configure a single beacon tx rate for an AP. Signed-off-by: Purushottam Kushwaha Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 25 +-- net/wireless/nl80211.c | 510 ++++++++++++++++++++++++++++--------------------- 2 files changed, 302 insertions(+), 233 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index bd26cc6e2d79..e0949c8bc2d1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -676,6 +676,18 @@ struct cfg80211_acl_data { struct mac_address mac_addrs[]; }; +/* + * cfg80211_bitrate_mask - masks for bitrate control + */ +struct cfg80211_bitrate_mask { + struct { + u32 legacy; + u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN]; + u16 vht_mcs[NL80211_VHT_NSS_MAX]; + enum nl80211_txrate_gi gi; + } control[NUM_NL80211_BANDS]; +}; + /** * struct cfg80211_ap_settings - AP configuration * @@ -700,6 +712,7 @@ struct cfg80211_acl_data { * MAC address based access control * @pbss: If set, start as a PCP instead of AP. Relevant for DMG * networks. + * @beacon_rate: masks for setting user configured beacon tx rate. */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -719,6 +732,7 @@ struct cfg80211_ap_settings { bool p2p_opp_ps; const struct cfg80211_acl_data *acl; bool pbss; + struct cfg80211_bitrate_mask beacon_rate; }; /** @@ -2010,17 +2024,6 @@ enum wiphy_params_flags { WIPHY_PARAM_DYN_ACK = 1 << 5, }; -/* - * cfg80211_bitrate_mask - masks for bitrate control - */ -struct cfg80211_bitrate_mask { - struct { - u32 legacy; - u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN]; - u16 vht_mcs[NL80211_VHT_NSS_MAX]; - enum nl80211_txrate_gi gi; - } control[NUM_NL80211_BANDS]; -}; /** * struct cfg80211_pmksa - PMK Security Association * diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 887c4c114206..a10484da60c0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3340,6 +3340,279 @@ static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) return err; } +static u32 rateset_to_mask(struct ieee80211_supported_band *sband, + u8 *rates, u8 rates_len) +{ + u8 i; + u32 mask = 0; + + for (i = 0; i < rates_len; i++) { + int rate = (rates[i] & 0x7f) * 5; + int ridx; + + for (ridx = 0; ridx < sband->n_bitrates; ridx++) { + struct ieee80211_rate *srate = + &sband->bitrates[ridx]; + if (rate == srate->bitrate) { + mask |= 1 << ridx; + break; + } + } + if (ridx == sband->n_bitrates) + return 0; /* rate not found */ + } + + return mask; +} + +static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, + u8 *rates, u8 rates_len, + u8 mcs[IEEE80211_HT_MCS_MASK_LEN]) +{ + u8 i; + + memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN); + + for (i = 0; i < rates_len; i++) { + int ridx, rbit; + + ridx = rates[i] / 8; + rbit = BIT(rates[i] % 8); + + /* check validity */ + if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN)) + return false; + + /* check availability */ + if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) + mcs[ridx] |= rbit; + else + return false; + } + + return true; +} + +static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map) +{ + u16 mcs_mask = 0; + + switch (vht_mcs_map) { + case IEEE80211_VHT_MCS_NOT_SUPPORTED: + break; + case IEEE80211_VHT_MCS_SUPPORT_0_7: + mcs_mask = 0x00FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_8: + mcs_mask = 0x01FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_9: + mcs_mask = 0x03FF; + break; + default: + break; + } + + return mcs_mask; +} + +static void vht_build_mcs_mask(u16 vht_mcs_map, + u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) +{ + u8 nss; + + for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { + vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03); + vht_mcs_map >>= 2; + } +} + +static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, + struct nl80211_txrate_vht *txrate, + u16 mcs[NL80211_VHT_NSS_MAX]) +{ + u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {}; + u8 i; + + if (!sband->vht_cap.vht_supported) + return false; + + memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX); + + /* Build vht_mcs_mask from VHT capabilities */ + vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask); + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { + if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) + mcs[i] = txrate->mcs[i]; + else + return false; + } + + return true; +} + +static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { + [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_RATES }, + [NL80211_TXRATE_HT] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_HT_RATES }, + [NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)}, + [NL80211_TXRATE_GI] = { .type = NLA_U8 }, +}; + +static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, + struct cfg80211_bitrate_mask *mask) +{ + struct nlattr *tb[NL80211_TXRATE_MAX + 1]; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + int rem, i; + struct nlattr *tx_rates; + struct ieee80211_supported_band *sband; + u16 vht_tx_mcs_map; + + memset(mask, 0, sizeof(*mask)); + /* Default to all rates enabled */ + for (i = 0; i < NUM_NL80211_BANDS; i++) { + sband = rdev->wiphy.bands[i]; + + if (!sband) + continue; + + mask->control[i].legacy = (1 << sband->n_bitrates) - 1; + memcpy(mask->control[i].ht_mcs, + sband->ht_cap.mcs.rx_mask, + sizeof(mask->control[i].ht_mcs)); + + if (!sband->vht_cap.vht_supported) + continue; + + vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs); + } + + /* if no rates are given set it back to the defaults */ + if (!info->attrs[NL80211_ATTR_TX_RATES]) + goto out; + + /* The nested attribute uses enum nl80211_band as the index. This maps + * directly to the enum nl80211_band values used in cfg80211. + */ + BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); + nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) { + enum nl80211_band band = nla_type(tx_rates); + int err; + + if (band < 0 || band >= NUM_NL80211_BANDS) + return -EINVAL; + sband = rdev->wiphy.bands[band]; + if (sband == NULL) + return -EINVAL; + err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), + nla_len(tx_rates), nl80211_txattr_policy); + if (err) + return err; + if (tb[NL80211_TXRATE_LEGACY]) { + mask->control[band].legacy = rateset_to_mask( + sband, + nla_data(tb[NL80211_TXRATE_LEGACY]), + nla_len(tb[NL80211_TXRATE_LEGACY])); + if ((mask->control[band].legacy == 0) && + nla_len(tb[NL80211_TXRATE_LEGACY])) + return -EINVAL; + } + if (tb[NL80211_TXRATE_HT]) { + if (!ht_rateset_to_mask( + sband, + nla_data(tb[NL80211_TXRATE_HT]), + nla_len(tb[NL80211_TXRATE_HT]), + mask->control[band].ht_mcs)) + return -EINVAL; + } + if (tb[NL80211_TXRATE_VHT]) { + if (!vht_set_mcs_mask( + sband, + nla_data(tb[NL80211_TXRATE_VHT]), + mask->control[band].vht_mcs)) + return -EINVAL; + } + if (tb[NL80211_TXRATE_GI]) { + mask->control[band].gi = + nla_get_u8(tb[NL80211_TXRATE_GI]); + if (mask->control[band].gi > NL80211_TXRATE_FORCE_LGI) + return -EINVAL; + } + + if (mask->control[band].legacy == 0) { + /* don't allow empty legacy rates if HT or VHT + * are not even supported. + */ + if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported || + rdev->wiphy.bands[band]->vht_cap.vht_supported)) + return -EINVAL; + + for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) + if (mask->control[band].ht_mcs[i]) + goto out; + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) + if (mask->control[band].vht_mcs[i]) + goto out; + + /* legacy and mcs rates may not be both empty */ + return -EINVAL; + } + } + +out: + return 0; +} + +static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params) +{ + u32 rate, count_ht, count_vht, i; + enum nl80211_band band; + + band = params->chandef.chan->band; + rate = params->beacon_rate.control[band].legacy; + + /* Allow only one rate */ + if (hweight32(rate) > 1) + return -EINVAL; + + count_ht = 0; + for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) { + if (hweight8(params->beacon_rate.control[band].ht_mcs[i]) > 1) { + return -EINVAL; + } else if (params->beacon_rate.control[band].ht_mcs[i]) { + count_ht++; + if (count_ht > 1) + return -EINVAL; + } + if (count_ht && rate) + return -EINVAL; + } + + count_vht = 0; + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { + if (hweight16(params->beacon_rate.control[band].vht_mcs[i]) > 1) { + return -EINVAL; + } else if (params->beacon_rate.control[band].vht_mcs[i]) { + count_vht++; + if (count_vht > 1) + return -EINVAL; + } + if (count_vht && rate) + return -EINVAL; + } + + if ((count_ht && count_vht) || (!rate && !count_ht && !count_vht)) + return -EINVAL; + + return 0; +} + static int nl80211_parse_beacon(struct nlattr *attrs[], struct cfg80211_beacon_data *bcn) { @@ -3569,6 +3842,16 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->iftype)) return -EINVAL; + if (info->attrs[NL80211_ATTR_TX_RATES]) { + err = nl80211_parse_tx_bitrate_mask(info, ¶ms.beacon_rate); + if (err) + return err; + + err = validate_beacon_tx_rate(¶ms); + if (err) + return err; + } + if (info->attrs[NL80211_ATTR_SMPS_MODE]) { params.smps_mode = nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]); @@ -8641,238 +8924,21 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, return rdev_cancel_remain_on_channel(rdev, wdev, cookie); } -static u32 rateset_to_mask(struct ieee80211_supported_band *sband, - u8 *rates, u8 rates_len) -{ - u8 i; - u32 mask = 0; - - for (i = 0; i < rates_len; i++) { - int rate = (rates[i] & 0x7f) * 5; - int ridx; - - for (ridx = 0; ridx < sband->n_bitrates; ridx++) { - struct ieee80211_rate *srate = - &sband->bitrates[ridx]; - if (rate == srate->bitrate) { - mask |= 1 << ridx; - break; - } - } - if (ridx == sband->n_bitrates) - return 0; /* rate not found */ - } - - return mask; -} - -static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, - u8 *rates, u8 rates_len, - u8 mcs[IEEE80211_HT_MCS_MASK_LEN]) -{ - u8 i; - - memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN); - - for (i = 0; i < rates_len; i++) { - int ridx, rbit; - - ridx = rates[i] / 8; - rbit = BIT(rates[i] % 8); - - /* check validity */ - if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN)) - return false; - - /* check availability */ - if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) - mcs[ridx] |= rbit; - else - return false; - } - - return true; -} - -static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map) -{ - u16 mcs_mask = 0; - - switch (vht_mcs_map) { - case IEEE80211_VHT_MCS_NOT_SUPPORTED: - break; - case IEEE80211_VHT_MCS_SUPPORT_0_7: - mcs_mask = 0x00FF; - break; - case IEEE80211_VHT_MCS_SUPPORT_0_8: - mcs_mask = 0x01FF; - break; - case IEEE80211_VHT_MCS_SUPPORT_0_9: - mcs_mask = 0x03FF; - break; - default: - break; - } - - return mcs_mask; -} - -static void vht_build_mcs_mask(u16 vht_mcs_map, - u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) -{ - u8 nss; - - for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { - vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03); - vht_mcs_map >>= 2; - } -} - -static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, - struct nl80211_txrate_vht *txrate, - u16 mcs[NL80211_VHT_NSS_MAX]) -{ - u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); - u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {}; - u8 i; - - if (!sband->vht_cap.vht_supported) - return false; - - memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX); - - /* Build vht_mcs_mask from VHT capabilities */ - vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask); - - for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { - if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) - mcs[i] = txrate->mcs[i]; - else - return false; - } - - return true; -} - -static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { - [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, - .len = NL80211_MAX_SUPP_RATES }, - [NL80211_TXRATE_HT] = { .type = NLA_BINARY, - .len = NL80211_MAX_SUPP_HT_RATES }, - [NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)}, - [NL80211_TXRATE_GI] = { .type = NLA_U8 }, -}; - static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *tb[NL80211_TXRATE_MAX + 1]; - struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_bitrate_mask mask; - int rem, i; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - struct nlattr *tx_rates; - struct ieee80211_supported_band *sband; - u16 vht_tx_mcs_map; + int err; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; - memset(&mask, 0, sizeof(mask)); - /* Default to all rates enabled */ - for (i = 0; i < NUM_NL80211_BANDS; i++) { - sband = rdev->wiphy.bands[i]; - - if (!sband) - continue; - - mask.control[i].legacy = (1 << sband->n_bitrates) - 1; - memcpy(mask.control[i].ht_mcs, - sband->ht_cap.mcs.rx_mask, - sizeof(mask.control[i].ht_mcs)); - - if (!sband->vht_cap.vht_supported) - continue; - - vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); - vht_build_mcs_mask(vht_tx_mcs_map, mask.control[i].vht_mcs); - } - - /* if no rates are given set it back to the defaults */ - if (!info->attrs[NL80211_ATTR_TX_RATES]) - goto out; - - /* - * The nested attribute uses enum nl80211_band as the index. This maps - * directly to the enum nl80211_band values used in cfg80211. - */ - BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); - nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) { - enum nl80211_band band = nla_type(tx_rates); - int err; - - if (band < 0 || band >= NUM_NL80211_BANDS) - return -EINVAL; - sband = rdev->wiphy.bands[band]; - if (sband == NULL) - return -EINVAL; - err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), - nla_len(tx_rates), nl80211_txattr_policy); - if (err) - return err; - if (tb[NL80211_TXRATE_LEGACY]) { - mask.control[band].legacy = rateset_to_mask( - sband, - nla_data(tb[NL80211_TXRATE_LEGACY]), - nla_len(tb[NL80211_TXRATE_LEGACY])); - if ((mask.control[band].legacy == 0) && - nla_len(tb[NL80211_TXRATE_LEGACY])) - return -EINVAL; - } - if (tb[NL80211_TXRATE_HT]) { - if (!ht_rateset_to_mask( - sband, - nla_data(tb[NL80211_TXRATE_HT]), - nla_len(tb[NL80211_TXRATE_HT]), - mask.control[band].ht_mcs)) - return -EINVAL; - } - if (tb[NL80211_TXRATE_VHT]) { - if (!vht_set_mcs_mask( - sband, - nla_data(tb[NL80211_TXRATE_VHT]), - mask.control[band].vht_mcs)) - return -EINVAL; - } - if (tb[NL80211_TXRATE_GI]) { - mask.control[band].gi = - nla_get_u8(tb[NL80211_TXRATE_GI]); - if (mask.control[band].gi > NL80211_TXRATE_FORCE_LGI) - return -EINVAL; - } - - if (mask.control[band].legacy == 0) { - /* don't allow empty legacy rates if HT or VHT - * are not even supported. - */ - if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported || - rdev->wiphy.bands[band]->vht_cap.vht_supported)) - return -EINVAL; - - for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) - if (mask.control[band].ht_mcs[i]) - goto out; - - for (i = 0; i < NL80211_VHT_NSS_MAX; i++) - if (mask.control[band].vht_mcs[i]) - goto out; - - /* legacy and mcs rates may not be both empty */ - return -EINVAL; - } - } + err = nl80211_parse_tx_bitrate_mask(info, &mask); + if (err) + return err; -out: return rdev_set_bitrate_mask(rdev, dev, NULL, &mask); } -- cgit v1.2.3 From 8564e38206de2ff005a27c8d7c2ce3869a44f0dd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 19 Sep 2016 09:44:44 +0200 Subject: cfg80211: add checks for beacon rate, extend to mesh The previous commit added support for specifying the beacon rate for AP mode. Add features checks to this, and extend it to also support the rate configuration for mesh networks. For IBSS it's not as simple due to joining etc., so that's not yet supported. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 +++- include/uapi/linux/nl80211.h | 17 +++++++++++++++- net/wireless/nl80211.c | 46 +++++++++++++++++++++++++++++++++----------- 3 files changed, 54 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e0949c8bc2d1..ed37304fa09d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -712,7 +712,7 @@ struct cfg80211_bitrate_mask { * MAC address based access control * @pbss: If set, start as a PCP instead of AP. Relevant for DMG * networks. - * @beacon_rate: masks for setting user configured beacon tx rate. + * @beacon_rate: bitrate to be used for beacons */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -1365,6 +1365,7 @@ struct mesh_config { * @beacon_interval: beacon interval to use * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a] * @basic_rates: basic rates to use when creating the mesh + * @beacon_rate: bitrate to be used for beacons * * These parameters are fixed when the mesh is created. */ @@ -1385,6 +1386,7 @@ struct mesh_setup { u16 beacon_interval; int mcast_rate[NUM_NL80211_BANDS]; u32 basic_rates; + struct cfg80211_bitrate_mask beacon_rate; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 220694151434..ec10d1b2838f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1343,7 +1343,13 @@ enum nl80211_commands { * enum nl80211_band value is used as the index (nla_type() of the nested * data. If a band is not included, it will be configured to allow all * rates based on negotiated supported rates information. This attribute - * is used with %NL80211_CMD_SET_TX_BITRATE_MASK. + * is used with %NL80211_CMD_SET_TX_BITRATE_MASK and with starting AP, + * and joining mesh networks (not IBSS yet). In the later case, it must + * specify just a single bitrate, which is to be used for the beacon. + * The driver must also specify support for this with the extended + * features NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, + * NL80211_EXT_FEATURE_BEACON_RATE_HT and + * NL80211_EXT_FEATURE_BEACON_RATE_VHT. * * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME. @@ -4551,6 +4557,12 @@ enum nl80211_feature_flags { * (if available). * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of * channel dwell time. + * @NL80211_EXT_FEATURE_BEACON_RATE_LEGACY: Driver supports beacon rate + * configuration (AP/mesh), supporting a legacy (non HT/VHT) rate. + * @NL80211_EXT_FEATURE_BEACON_RATE_HT: Driver supports beacon rate + * configuration (AP/mesh) with HT rates. + * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate + * configuration (AP/mesh) with VHT rates. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4562,6 +4574,9 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SCAN_START_TIME, NL80211_EXT_FEATURE_BSS_PARENT_TSF, NL80211_EXT_FEATURE_SET_SCAN_DWELL, + NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, + NL80211_EXT_FEATURE_BEACON_RATE_HT, + NL80211_EXT_FEATURE_BEACON_RATE_VHT, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a10484da60c0..b8441e60b0f6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3569,13 +3569,12 @@ out: return 0; } -static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params) +static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev, + enum nl80211_band band, + struct cfg80211_bitrate_mask *beacon_rate) { - u32 rate, count_ht, count_vht, i; - enum nl80211_band band; - - band = params->chandef.chan->band; - rate = params->beacon_rate.control[band].legacy; + u32 count_ht, count_vht, i; + u32 rate = beacon_rate->control[band].legacy; /* Allow only one rate */ if (hweight32(rate) > 1) @@ -3583,9 +3582,9 @@ static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params) count_ht = 0; for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) { - if (hweight8(params->beacon_rate.control[band].ht_mcs[i]) > 1) { + if (hweight8(beacon_rate->control[band].ht_mcs[i]) > 1) { return -EINVAL; - } else if (params->beacon_rate.control[band].ht_mcs[i]) { + } else if (beacon_rate->control[band].ht_mcs[i]) { count_ht++; if (count_ht > 1) return -EINVAL; @@ -3596,9 +3595,9 @@ static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params) count_vht = 0; for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { - if (hweight16(params->beacon_rate.control[band].vht_mcs[i]) > 1) { + if (hweight16(beacon_rate->control[band].vht_mcs[i]) > 1) { return -EINVAL; - } else if (params->beacon_rate.control[band].vht_mcs[i]) { + } else if (beacon_rate->control[band].vht_mcs[i]) { count_vht++; if (count_vht > 1) return -EINVAL; @@ -3610,6 +3609,19 @@ static int validate_beacon_tx_rate(struct cfg80211_ap_settings *params) if ((count_ht && count_vht) || (!rate && !count_ht && !count_vht)) return -EINVAL; + if (rate && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) + return -EINVAL; + if (count_ht && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_HT)) + return -EINVAL; + if (count_vht && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_VHT)) + return -EINVAL; + return 0; } @@ -3847,7 +3859,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (err) return err; - err = validate_beacon_tx_rate(¶ms); + err = validate_beacon_tx_rate(rdev, params.chandef.chan->band, + ¶ms.beacon_rate); if (err) return err; } @@ -9406,6 +9419,17 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) return err; } + if (info->attrs[NL80211_ATTR_TX_RATES]) { + err = nl80211_parse_tx_bitrate_mask(info, &setup.beacon_rate); + if (err) + return err; + + err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band, + &setup.beacon_rate); + if (err) + return err; + } + return cfg80211_join_mesh(rdev, dev, &setup, &cfg); } -- cgit v1.2.3 From 4ad41c1e2616a64c9e789d7069b1cb3402d2af3a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Sep 2016 19:37:25 -0400 Subject: bonding: quit messing with IOCTL The only remaining users are issuing SIOCGMIIPHY and SIOCGMIIREG, neither of which deals with userland pointers. Simply calling ->ndo_do_ioctl() is fine; no messing with set_fs() is needed. It used to mess with SIOCETHTOOL, which would've needed set_fs(), but that has been killed in "[NET] ethtool ops are the only way" 9 years ago... Signed-off-by: Al Viro --- drivers/net/bonding/bond_main.c | 4 ++-- include/net/bonding.h | 12 ------------ 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 9599ed6f1213..3892811079d0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -471,9 +471,9 @@ static int bond_check_dev_link(struct bonding *bond, /* Yes, the mii is overlaid on the ifreq.ifr_ifru */ strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ); mii = if_mii(&ifr); - if (IOCTL(slave_dev, &ifr, SIOCGMIIPHY) == 0) { + if (ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) { mii->reg_num = MII_BMSR; - if (IOCTL(slave_dev, &ifr, SIOCGMIIREG) == 0) + if (ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0) return mii->val_out & BMSR_LSTATUS; } } diff --git a/include/net/bonding.h b/include/net/bonding.h index 6360c259da6d..f32f7ef8a23a 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -37,18 +37,6 @@ #ifndef __long_aligned #define __long_aligned __attribute__((aligned((sizeof(long))))) #endif -/* - * Less bad way to call ioctl from within the kernel; this needs to be - * done some other way to get the call out of interrupt context. - * Needs "ioctl" variable to be supplied by calling context. - */ -#define IOCTL(dev, arg, cmd) ({ \ - int res = 0; \ - mm_segment_t fs = get_fs(); \ - set_fs(get_ds()); \ - res = ioctl(dev, arg, cmd); \ - set_fs(fs); \ - res; }) #define BOND_MODE(bond) ((bond)->params.mode) -- cgit v1.2.3 From b90eb754949931b2e4481b1df9a03f84d4be66ba Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:29 +0200 Subject: fib: introduce FIB notification infrastructure This allows to pass information about added/deleted FIB entries/rules to whoever is interested. This is done in a very similar way as devinet notifies address additions/removals. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip_fib.h | 34 ++++++++++++++++++++++++--- net/ipv4/fib_frontend.c | 16 ++++++------- net/ipv4/fib_rules.c | 10 ++++++++ net/ipv4/fib_trie.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 108 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 7d4a72e75f33..116a9c0eb455 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -22,6 +22,7 @@ #include #include #include +#include struct fib_config { u8 fc_dst_len; @@ -185,6 +186,33 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); #define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ FIB_RES_SADDR(net, res)) +struct fib_notifier_info { + struct net *net; +}; + +struct fib_entry_notifier_info { + struct fib_notifier_info info; /* must be first */ + u32 dst; + int dst_len; + struct fib_info *fi; + u8 tos; + u8 type; + u32 tb_id; + u32 nlflags; +}; + +enum fib_event_type { + FIB_EVENT_ENTRY_ADD, + FIB_EVENT_ENTRY_DEL, + FIB_EVENT_RULE_ADD, + FIB_EVENT_RULE_DEL, +}; + +int register_fib_notifier(struct notifier_block *nb); +int unregister_fib_notifier(struct notifier_block *nb); +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); + struct fib_table { struct hlist_node tb_hlist; u32 tb_id; @@ -196,11 +224,11 @@ struct fib_table { int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags); -int fib_table_insert(struct fib_table *, struct fib_config *); -int fib_table_delete(struct fib_table *, struct fib_config *); +int fib_table_insert(struct net *, struct fib_table *, struct fib_config *); +int fib_table_delete(struct net *, struct fib_table *, struct fib_config *); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); -int fib_table_flush(struct fib_table *table); +int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4e56a4c20a3c..86c43dc9a60e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -182,7 +182,7 @@ static void fib_flush(struct net *net) struct fib_table *tb; hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) - flushed += fib_table_flush(tb); + flushed += fib_table_flush(net, tb); } if (flushed) @@ -590,13 +590,13 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (cmd == SIOCDELRT) { tb = fib_get_table(net, cfg.fc_table); if (tb) - err = fib_table_delete(tb, &cfg); + err = fib_table_delete(net, tb, &cfg); else err = -ESRCH; } else { tb = fib_new_table(net, cfg.fc_table); if (tb) - err = fib_table_insert(tb, &cfg); + err = fib_table_insert(net, tb, &cfg); else err = -ENOBUFS; } @@ -719,7 +719,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; } - err = fib_table_delete(tb, &cfg); + err = fib_table_delete(net, tb, &cfg); errout: return err; } @@ -741,7 +741,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; } - err = fib_table_insert(tb, &cfg); + err = fib_table_insert(net, tb, &cfg); errout: return err; } @@ -828,9 +828,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad cfg.fc_scope = RT_SCOPE_HOST; if (cmd == RTM_NEWROUTE) - fib_table_insert(tb, &cfg); + fib_table_insert(net, tb, &cfg); else - fib_table_delete(tb, &cfg); + fib_table_delete(net, tb, &cfg); } void fib_add_ifaddr(struct in_ifaddr *ifa) @@ -1254,7 +1254,7 @@ static void ip_fib_net_exit(struct net *net) hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { hlist_del(&tb->tb_hlist); - fib_table_flush(tb); + fib_table_flush(net, tb); fib_free_table(tb); } } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 770bebed6b28..ebadf6b99499 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -164,6 +164,14 @@ static struct fib_table *fib_empty_table(struct net *net) return NULL; } +static int call_fib_rule_notifiers(struct net *net, + enum fib_event_type event_type) +{ + struct fib_notifier_info info; + + return call_fib_notifiers(net, event_type, &info); +} + static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { FRA_GENERIC_POLICY, [FRA_FLOW] = { .type = NLA_U32 }, @@ -221,6 +229,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, net->ipv4.fib_has_custom_rules = true; fib_flush_external(rule->fr_net); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD); err = 0; errout: @@ -243,6 +252,7 @@ static int fib4_rule_delete(struct fib_rule *rule) #endif net->ipv4.fib_has_custom_rules = true; fib_flush_external(rule->fr_net); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL); errout: return err; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 241f27bbd7ad..51a4537eb145 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -84,6 +85,44 @@ #include #include "fib_lookup.h" +static BLOCKING_NOTIFIER_HEAD(fib_chain); + +int register_fib_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&fib_chain, nb); +} +EXPORT_SYMBOL(register_fib_notifier); + +int unregister_fib_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&fib_chain, nb); +} +EXPORT_SYMBOL(unregister_fib_notifier); + +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->net = net; + return blocking_notifier_call_chain(&fib_chain, event_type, info); +} + +static int call_fib_entry_notifiers(struct net *net, + enum fib_event_type event_type, u32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id, u32 nlflags) +{ + struct fib_entry_notifier_info info = { + .dst = dst, + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .tb_id = tb_id, + .nlflags = nlflags, + }; + return call_fib_notifiers(net, event_type, &info.info); +} + #define MAX_STAT_DEPTH 32 #define KEYLENGTH (8*sizeof(t_key)) @@ -1076,7 +1115,8 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, } /* Caller must hold RTNL. */ -int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) +int fib_table_insert(struct net *net, struct fib_table *tb, + struct fib_config *cfg) { struct trie *t = (struct trie *)tb->tb_data; struct fib_alias *fa, *new_fa; @@ -1193,6 +1233,11 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) rt_cache_flush(cfg->fc_nlinfo.nl_net); + + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, + key, plen, fi, + new_fa->fa_tos, cfg->fc_type, + tb->tb_id, cfg->fc_nlflags); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, nlflags); @@ -1245,6 +1290,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) tb->tb_num_default++; rt_cache_flush(cfg->fc_nlinfo.nl_net); + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, key, plen, fi, tos, + cfg->fc_type, tb->tb_id, cfg->fc_nlflags); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, nlflags); succeeded: @@ -1490,7 +1537,8 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp, } /* Caller must hold RTNL. */ -int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) +int fib_table_delete(struct net *net, struct fib_table *tb, + struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *fa_to_delete; @@ -1546,6 +1594,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, cfg->fc_type, tb->tb_id); + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, + fa_to_delete->fa_info, tos, cfg->fc_type, + tb->tb_id, 0); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1809,7 +1860,7 @@ void fib_table_flush_external(struct fib_table *tb) } /* Caller must hold RTNL. */ -int fib_table_flush(struct fib_table *tb) +int fib_table_flush(struct net *net, struct fib_table *tb) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *pn = t->kv; @@ -1861,6 +1912,11 @@ int fib_table_flush(struct fib_table *tb) switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, fa->fa_type, tb->tb_id); + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, + n->key, + KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, fa->fa_type, + tb->tb_id, 0); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); -- cgit v1.2.3 From c98501879b1b1af90c7325574f2672e9efca592c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:30 +0200 Subject: fib: introduce FIB info offload flag helpers These helpers are to be used in case someone offloads the FIB entry. The result is that if the entry is offloaded to at least one device, the offload flag is set. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/ip_fib.h | 13 +++++++++++++ net/switchdev/switchdev.c | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 116a9c0eb455..ffccf1787914 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -123,6 +123,7 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; #endif + unsigned int fib_offload_cnt; struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev @@ -174,6 +175,18 @@ struct fib_result_nl { __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); +static inline void fib_info_offload_inc(struct fib_info *fi) +{ + fi->fib_offload_cnt++; + fi->fib_flags |= RTNH_F_OFFLOAD; +} + +static inline void fib_info_offload_dec(struct fib_info *fi) +{ + if (--fi->fib_offload_cnt == 0) + fi->fib_flags &= ~RTNH_F_OFFLOAD; +} + #define FIB_RES_SADDR(net, res) \ ((FIB_RES_NH(res).nh_saddr_genid == \ atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 10b819308439..abd8d2a38a7d 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1216,7 +1216,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_add(dev, &ipv4_fib.obj); if (!err) - fi->fib_flags |= RTNH_F_OFFLOAD; + fib_info_offload_inc(fi); return err == -EOPNOTSUPP ? 0 : err; } @@ -1260,7 +1260,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_del(dev, &ipv4_fib.obj); if (!err) - fi->fib_flags &= ~RTNH_F_OFFLOAD; + fib_info_offload_dec(fi); return err == -EOPNOTSUPP ? 0 : err; } -- cgit v1.2.3 From 347e3b28c1ba24c1ae2f30290d8247480ab9ce14 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 26 Sep 2016 12:52:33 +0200 Subject: switchdev: remove FIB offload infrastructure Since this is now taken care of by FIB notifier, remove the code, with all unused dependencies. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 - include/net/switchdev.h | 40 ---------- net/ipv4/fib_frontend.c | 13 ---- net/ipv4/fib_rules.c | 2 - net/ipv4/fib_trie.c | 104 +------------------------- net/switchdev/switchdev.c | 181 ---------------------------------------------- 6 files changed, 1 insertion(+), 341 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ffccf1787914..b9314b48e39f 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -243,7 +243,6 @@ int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); -void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -356,7 +355,6 @@ static inline int fib_num_tclassid_users(struct net *net) } #endif int fib_unmerge(struct net *net); -void fib_flush_external(struct net *net); /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 729fe1534160..eba80c4fc56f 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -68,7 +68,6 @@ struct switchdev_attr { enum switchdev_obj_id { SWITCHDEV_OBJ_ID_UNDEFINED, SWITCHDEV_OBJ_ID_PORT_VLAN, - SWITCHDEV_OBJ_ID_IPV4_FIB, SWITCHDEV_OBJ_ID_PORT_FDB, SWITCHDEV_OBJ_ID_PORT_MDB, }; @@ -92,21 +91,6 @@ struct switchdev_obj_port_vlan { #define SWITCHDEV_OBJ_PORT_VLAN(obj) \ container_of(obj, struct switchdev_obj_port_vlan, obj) -/* SWITCHDEV_OBJ_ID_IPV4_FIB */ -struct switchdev_obj_ipv4_fib { - struct switchdev_obj obj; - u32 dst; - int dst_len; - struct fib_info *fi; - u8 tos; - u8 type; - u32 nlflags; - u32 tb_id; -}; - -#define SWITCHDEV_OBJ_IPV4_FIB(obj) \ - container_of(obj, struct switchdev_obj_ipv4_fib, obj) - /* SWITCHDEV_OBJ_ID_PORT_FDB */ struct switchdev_obj_port_fdb { struct switchdev_obj obj; @@ -209,11 +193,6 @@ int switchdev_port_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int switchdev_port_bridge_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); -int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id); -int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); -void switchdev_fib_ipv4_abort(struct fib_info *fi); int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlm_flags); @@ -304,25 +283,6 @@ static inline int switchdev_port_bridge_dellink(struct net_device *dev, return -EOPNOTSUPP; } -static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) -{ - return 0; -} - -static inline int switchdev_fib_ipv4_del(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) -{ - return 0; -} - -static inline void switchdev_fib_ipv4_abort(struct fib_info *fi) -{ -} - static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 86c43dc9a60e..c3b80478226e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -189,19 +189,6 @@ static void fib_flush(struct net *net) rt_cache_flush(net); } -void fib_flush_external(struct net *net) -{ - struct fib_table *tb; - struct hlist_head *head; - unsigned int h; - - for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, head, tb_hlist) - fib_table_flush_external(tb); - } -} - /* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index ebadf6b99499..2e50062f642d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -228,7 +228,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; - fib_flush_external(rule->fr_net); call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD); err = 0; @@ -251,7 +250,6 @@ static int fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; - fib_flush_external(rule->fr_net); call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL); errout: return err; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 51a4537eb145..31cef3602585 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -81,7 +81,6 @@ #include #include #include -#include #include #include "fib_lookup.h" @@ -1215,17 +1214,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - err = switchdev_fib_ipv4_add(key, plen, fi, - new_fa->fa_tos, - cfg->fc_type, - cfg->fc_nlflags, - tb->tb_id); - if (err) { - switchdev_fib_ipv4_abort(fi); - kmem_cache_free(fn_alias_kmem, new_fa); - goto out; - } - hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); alias_free_mem_rcu(fa); @@ -1273,18 +1261,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - /* (Optionally) offload fib entry to switch hardware. */ - err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, - cfg->fc_nlflags, tb->tb_id); - if (err) { - switchdev_fib_ipv4_abort(fi); - goto out_free_new_fa; - } - /* Insert new entry to the list. */ err = fib_insert_alias(t, tp, l, new_fa, fa, key); if (err) - goto out_sw_fib_del; + goto out_free_new_fa; if (!plen) tb->tb_num_default++; @@ -1297,8 +1277,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb, succeeded: return 0; -out_sw_fib_del: - switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -1591,9 +1569,6 @@ int fib_table_delete(struct net *net, struct fib_table *tb, if (!fa_to_delete) return -ESRCH; - switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, - cfg->fc_type, tb->tb_id); - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, fa_to_delete->fa_info, tos, cfg->fc_type, tb->tb_id, 0); @@ -1785,80 +1760,6 @@ out: return NULL; } -/* Caller must hold RTNL */ -void fib_table_flush_external(struct fib_table *tb) -{ - struct trie *t = (struct trie *)tb->tb_data; - struct key_vector *pn = t->kv; - unsigned long cindex = 1; - struct hlist_node *tmp; - struct fib_alias *fa; - - /* walk trie in reverse order */ - for (;;) { - unsigned char slen = 0; - struct key_vector *n; - - if (!(cindex--)) { - t_key pkey = pn->key; - - /* cannot resize the trie vector */ - if (IS_TRIE(pn)) - break; - - /* resize completed node */ - pn = resize(t, pn); - cindex = get_index(pkey, pn); - - continue; - } - - /* grab the next available node */ - n = get_child(pn, cindex); - if (!n) - continue; - - if (IS_TNODE(n)) { - /* record pn and cindex for leaf walking */ - pn = n; - cindex = 1ul << n->bits; - - continue; - } - - hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { - struct fib_info *fi = fa->fa_info; - - /* if alias was cloned to local then we just - * need to remove the local copy from main - */ - if (tb->tb_id != fa->tb_id) { - hlist_del_rcu(&fa->fa_list); - alias_free_mem_rcu(fa); - continue; - } - - /* record local slen */ - slen = fa->fa_slen; - - if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) - continue; - - switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, fa->fa_type, - tb->tb_id); - } - - /* update leaf slen */ - n->slen = slen; - - if (hlist_empty(&n->leaf)) { - put_child_root(pn, n->key, NULL); - node_free(n); - } - } -} - /* Caller must hold RTNL. */ int fib_table_flush(struct net *net, struct fib_table *tb) { @@ -1909,9 +1810,6 @@ int fib_table_flush(struct net *net, struct fib_table *tb) continue; } - switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, fa->fa_type, - tb->tb_id); call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, n->key, KEYLENGTH - fa->fa_slen, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index abd8d2a38a7d..02beb35f577f 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -21,7 +21,6 @@ #include #include #include -#include #include /** @@ -344,8 +343,6 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: return sizeof(struct switchdev_obj_port_vlan); - case SWITCHDEV_OBJ_ID_IPV4_FIB: - return sizeof(struct switchdev_obj_ipv4_fib); case SWITCHDEV_OBJ_ID_PORT_FDB: return sizeof(struct switchdev_obj_port_fdb); case SWITCHDEV_OBJ_ID_PORT_MDB: @@ -1108,184 +1105,6 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, } EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); -static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) -{ - const struct switchdev_ops *ops = dev->switchdev_ops; - struct net_device *lower_dev; - struct net_device *port_dev; - struct list_head *iter; - - /* Recusively search down until we find a sw port dev. - * (A sw port dev supports switchdev_port_attr_get). - */ - - if (ops && ops->switchdev_port_attr_get) - return dev; - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - port_dev = switchdev_get_lowest_dev(lower_dev); - if (port_dev) - return port_dev; - } - - return NULL; -} - -static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) -{ - struct switchdev_attr attr = { - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - }; - struct switchdev_attr prev_attr; - struct net_device *dev = NULL; - int nhsel; - - ASSERT_RTNL(); - - /* For this route, all nexthop devs must be on the same switch. */ - - for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { - const struct fib_nh *nh = &fi->fib_nh[nhsel]; - - if (!nh->nh_dev) - return NULL; - - dev = switchdev_get_lowest_dev(nh->nh_dev); - if (!dev) - return NULL; - - attr.orig_dev = dev; - if (switchdev_port_attr_get(dev, &attr)) - return NULL; - - if (nhsel > 0 && - !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) - return NULL; - - prev_attr = attr; - } - - return dev; -} - -/** - * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry - * - * @dst: route's IPv4 destination address - * @dst_len: destination address length (prefix length) - * @fi: route FIB info structure - * @tos: route TOS - * @type: route type - * @nlflags: netlink flags passed in (NLM_F_*) - * @tb_id: route table ID - * - * Add/modify switch IPv4 route entry. - */ -int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id) -{ - struct switchdev_obj_ipv4_fib ipv4_fib = { - .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, - .dst = dst, - .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .nlflags = nlflags, - .tb_id = tb_id, - }; - struct net_device *dev; - int err = 0; - - /* Don't offload route if using custom ip rules or if - * IPv4 FIB offloading has been disabled completely. - */ - -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (fi->fib_net->ipv4.fib_has_custom_rules) - return 0; -#endif - - if (fi->fib_net->ipv4.fib_offload_disabled) - return 0; - - dev = switchdev_get_dev_by_nhs(fi); - if (!dev) - return 0; - - ipv4_fib.obj.orig_dev = dev; - err = switchdev_port_obj_add(dev, &ipv4_fib.obj); - if (!err) - fib_info_offload_inc(fi); - - return err == -EOPNOTSUPP ? 0 : err; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); - -/** - * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch - * - * @dst: route's IPv4 destination address - * @dst_len: destination address length (prefix length) - * @fi: route FIB info structure - * @tos: route TOS - * @type: route type - * @tb_id: route table ID - * - * Delete IPv4 route entry from switch device. - */ -int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) -{ - struct switchdev_obj_ipv4_fib ipv4_fib = { - .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, - .dst = dst, - .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .nlflags = 0, - .tb_id = tb_id, - }; - struct net_device *dev; - int err = 0; - - if (!(fi->fib_flags & RTNH_F_OFFLOAD)) - return 0; - - dev = switchdev_get_dev_by_nhs(fi); - if (!dev) - return 0; - - ipv4_fib.obj.orig_dev = dev; - err = switchdev_port_obj_del(dev, &ipv4_fib.obj); - if (!err) - fib_info_offload_dec(fi); - - return err == -EOPNOTSUPP ? 0 : err; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); - -/** - * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation - * - * @fi: route FIB info structure - */ -void switchdev_fib_ipv4_abort(struct fib_info *fi) -{ - /* There was a problem installing this route to the offload - * device. For now, until we come up with more refined - * policy handling, abruptly end IPv4 fib offloading for - * for entire net by flushing offload device(s) of all - * IPv4 routes, and mark IPv4 fib offloading broken from - * this point forward. - */ - - fib_flush_external(fi->fib_net); - fi->fib_net->ipv4.fib_offload_disabled = true; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); - bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { -- cgit v1.2.3 From fa5effe7664b1606dfd639dff58686f6dbb119d7 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Tue, 27 Sep 2016 11:09:51 +0300 Subject: net/sched: pkt_cls: change tc actions order to be as the user sets Currently the created tc actions list is reversed against the order set by the user. Change the actions list order to be the same as was set by the user. This patch doesn't affect dump actions behavior. For dumping, action->order parameter is used so the list order doesn't matter. Signed-off-by: Hadar Hen Zion Acked-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 5ccaa4be7d96..767b03a3fe67 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -123,7 +123,7 @@ static inline void tcf_exts_to_list(const struct tcf_exts *exts, for (i = 0; i < exts->nr_actions; i++) { struct tc_action *a = exts->actions[i]; - list_add(&a->list, actions); + list_add_tail(&a->list, actions); } #endif } -- cgit v1.2.3 From 6348ef2dbbd96c4488a1ee83cc0f0f3d9a314a2f Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 30 Sep 2016 11:28:58 +0800 Subject: net:snmp: Introduce generic interfaces for snmp_get_cpu_field{, 64} This is to introduce the generic interfaces for snmp_get_cpu_field{,64}. It exchanges the two for-loops for collecting the percpu statistics data. This can aggregate the data by going through all the items of each cpu sequentially. Signed-off-by: Jia He Suggested-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/ip.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 9742b92dc933..bc43c0fcae12 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -219,6 +219,29 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } #endif +#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff64[i] += snmp_get_cpu_field64( \ + mib_statistic, \ + c, stats_list[i].entry, \ + offset); \ + } \ +} + +#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff[i] += snmp_get_cpu_field( \ + mib_statistic, \ + c, stats_list[i].entry); \ + } \ +} + void inet_get_local_port_range(struct net *net, int *low, int *high); #ifdef CONFIG_SYSCTL -- cgit v1.2.3 From bd11f0741fa5a2c296629898ad07759dd12b35bb Mon Sep 17 00:00:00 2001 From: Maciej Å»enczykowski Date: Tue, 27 Sep 2016 23:57:58 -0700 Subject: ipv6 addrconf: implement RFC7559 router solicitation backoff This implements: https://tools.ietf.org/html/rfc7559 Backoff is performed according to RFC3315 section 14: https://tools.ietf.org/html/rfc3315#section-14 We allow setting /proc/sys/net/ipv6/conf/*/router_solicitations to a negative value meaning an unlimited number of retransmits, and we make this the new default (inline with the RFC). We also add a new setting: /proc/sys/net/ipv6/conf/*/router_solicitation_max_interval defaulting to 1 hour (per RFC recommendation). Signed-off-by: Maciej Å»enczykowski Acked-by: Erik Kline Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/net/addrconf.h | 3 ++- include/net/if_inet6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 51 ++++++++++++++++++++++++++++++++++++++++------- 5 files changed, 49 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c6dbcd84a2c7..7e9a789be5e0 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -18,6 +18,7 @@ struct ipv6_devconf { __s32 dad_transmits; __s32 rtr_solicits; __s32 rtr_solicit_interval; + __s32 rtr_solicit_max_interval; __s32 rtr_solicit_delay; __s32 force_mld_version; __s32 mldv1_unsolicited_report_interval; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 9826d3a9464c..f2d072787947 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -1,8 +1,9 @@ #ifndef _ADDRCONF_H #define _ADDRCONF_H -#define MAX_RTR_SOLICITATIONS 3 +#define MAX_RTR_SOLICITATIONS -1 /* unlimited */ #define RTR_SOLICITATION_INTERVAL (4*HZ) +#define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ #define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 1c8b6820b694..515352c6280a 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -201,6 +201,7 @@ struct inet6_dev { struct ipv6_devstat stats; struct timer_list rs_timer; + __s32 rs_interval; /* in jiffies */ __u8 rs_probes; __u8 addr_gen_mode; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 395876060f50..8c2772340c3f 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -177,6 +177,7 @@ enum { DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, DEVCONF_DROP_UNSOLICITED_NA, DEVCONF_KEEP_ADDR_ON_DOWN, + DEVCONF_RTR_SOLICIT_MAX_INTERVAL, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 35d4baa55c9d..87183983724d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -112,6 +112,27 @@ static inline u32 cstamp_delta(unsigned long cstamp) return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; } +static inline s32 rfc3315_s14_backoff_init(s32 irt) +{ + /* multiply 'initial retransmission time' by 0.9 .. 1.1 */ + u64 tmp = (900000 + prandom_u32() % 200001) * (u64)irt; + do_div(tmp, 1000000); + return (s32)tmp; +} + +static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt) +{ + /* multiply 'retransmission timeout' by 1.9 .. 2.1 */ + u64 tmp = (1900000 + prandom_u32() % 200001) * (u64)rt; + do_div(tmp, 1000000); + if ((s32)tmp > mrt) { + /* multiply 'maximum retransmission time' by 0.9 .. 1.1 */ + tmp = (900000 + prandom_u32() % 200001) * (u64)mrt; + do_div(tmp, 1000000); + } + return (s32)tmp; +} + #ifdef CONFIG_SYSCTL static int addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); @@ -187,6 +208,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -232,6 +254,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -3687,7 +3710,7 @@ static void addrconf_rs_timer(unsigned long data) if (idev->if_flags & IF_RA_RCVD) goto out; - if (idev->rs_probes++ < idev->cnf.rtr_solicits) { + if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) { write_unlock(&idev->lock); if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) ndisc_send_rs(dev, &lladdr, @@ -3696,11 +3719,13 @@ static void addrconf_rs_timer(unsigned long data) goto put; write_lock(&idev->lock); + idev->rs_interval = rfc3315_s14_backoff_update( + idev->rs_interval, idev->cnf.rtr_solicit_max_interval); /* The wait after the last probe can be shorter */ addrconf_mod_rs_timer(idev, (idev->rs_probes == idev->cnf.rtr_solicits) ? idev->cnf.rtr_solicit_delay : - idev->cnf.rtr_solicit_interval); + idev->rs_interval); } else { /* * Note: we do not support deprecated "all on-link" @@ -3949,7 +3974,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp); send_rs = send_mld && ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits > 0 && + ifp->idev->cnf.rtr_solicits != 0 && (dev->flags&IFF_LOOPBACK) == 0; read_unlock_bh(&ifp->idev->lock); @@ -3971,10 +3996,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); + ifp->idev->rs_interval = rfc3315_s14_backoff_init( + ifp->idev->cnf.rtr_solicit_interval); ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; - addrconf_mod_rs_timer(ifp->idev, - ifp->idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval); spin_unlock(&ifp->lock); write_unlock_bh(&ifp->idev->lock); } @@ -4891,6 +4917,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; array[DEVCONF_RTR_SOLICIT_INTERVAL] = jiffies_to_msecs(cnf->rtr_solicit_interval); + array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] = + jiffies_to_msecs(cnf->rtr_solicit_max_interval); array[DEVCONF_RTR_SOLICIT_DELAY] = jiffies_to_msecs(cnf->rtr_solicit_delay); array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; @@ -5099,7 +5127,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) return -EINVAL; if (!ipv6_accept_ra(idev)) return -EINVAL; - if (idev->cnf.rtr_solicits <= 0) + if (idev->cnf.rtr_solicits == 0) return -EINVAL; write_lock_bh(&idev->lock); @@ -5128,8 +5156,10 @@ update_lft: if (update_rs) { idev->if_flags |= IF_RS_SENT; + idev->rs_interval = rfc3315_s14_backoff_init( + idev->cnf.rtr_solicit_interval); idev->rs_probes = 1; - addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(idev, idev->rs_interval); } /* Well, that's kinda nasty ... */ @@ -5777,6 +5807,13 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "router_solicitation_max_interval", + .data = &ipv6_devconf.rtr_solicit_max_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { .procname = "router_solicitation_delay", .data = &ipv6_devconf.rtr_solicit_delay, -- cgit v1.2.3 From b8676221f00dd5b6018f0fd88cd278f93e11143a Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Thu, 22 Sep 2016 23:16:50 +0300 Subject: cfg80211: Add support for static WEP in the driver Add support for drivers that implement static WEP internally, i.e. expose connection keys to the driver in connect flow and don't upload the keys after the connection. Signed-off-by: David Spinadel Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 +++++++++++- net/wireless/core.h | 4 ++-- net/wireless/ibss.c | 5 +++-- net/wireless/sme.c | 6 +++++- net/wireless/util.c | 2 +- net/wireless/wext-compat.c | 2 +- net/wireless/wext-sme.c | 2 +- 7 files changed, 24 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ed37304fa09d..68dca3d93b85 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5,7 +5,7 @@ * * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2015 Intel Deutschland GmbH + * Copyright 2015-2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -593,6 +593,8 @@ struct survey_info { s8 noise; }; +#define CFG80211_MAX_WEP_KEYS 4 + /** * struct cfg80211_crypto_settings - Crypto settings * @wpa_versions: indicates which, if any, WPA versions are enabled @@ -610,6 +612,9 @@ struct survey_info { * allowed through even on unauthorized ports * @control_port_no_encrypt: TRUE to prevent encryption of control port * protocol frames. + * @wep_keys: static WEP keys, if not NULL points to an array of + * CFG80211_MAX_WEP_KEYS WEP keys + * @wep_tx_key: key index (0..3) of the default TX static WEP key */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -621,6 +626,8 @@ struct cfg80211_crypto_settings { bool control_port; __be16 control_port_ethertype; bool control_port_no_encrypt; + struct key_params *wep_keys; + int wep_tx_key; }; /** @@ -2905,6 +2912,8 @@ struct cfg80211_ops { * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels. * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in * beaconing mode (AP, IBSS, Mesh, ...). + * @WIPHY_FLAG_HAS_STATIC_WEP: The device supports static WEP key installation + * before connection. */ enum wiphy_flags { /* use hole at 0 */ @@ -2930,6 +2939,7 @@ enum wiphy_flags { WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL = BIT(21), WIPHY_FLAG_SUPPORTS_5_10_MHZ = BIT(22), WIPHY_FLAG_HAS_CHANNEL_SWITCH = BIT(23), + WIPHY_FLAG_HAS_STATIC_WEP = BIT(24), }; /** diff --git a/net/wireless/core.h b/net/wireless/core.h index 5555e3c13ae9..554f87d0f991 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -249,8 +249,8 @@ struct cfg80211_event { }; struct cfg80211_cached_keys { - struct key_params params[4]; - u8 data[4][WLAN_KEY_LEN_WEP104]; + struct key_params params[CFG80211_MAX_WEP_KEYS]; + u8 data[CFG80211_MAX_WEP_KEYS][WLAN_KEY_LEN_WEP104]; int def; }; diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index eafdfa5798ae..364f900a3dc4 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -43,7 +43,8 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - cfg80211_upload_connect_keys(wdev); + if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP)) + cfg80211_upload_connect_keys(wdev); nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid, GFP_KERNEL); @@ -296,7 +297,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; - for (i = 0; i < 4; i++) + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) ck->params[i].key = ck->data[i]; } err = __cfg80211_join_ibss(rdev, wdev->netdev, diff --git a/net/wireless/sme.c b/net/wireless/sme.c index c08a3b57dca1..a77db333927e 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -726,7 +726,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, wdev->current_bss = bss_from_pub(bss); - cfg80211_upload_connect_keys(wdev); + if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP)) + cfg80211_upload_connect_keys(wdev); rcu_read_lock(); country_ie = ieee80211_bss_get_ie(bss, WLAN_EID_COUNTRY); @@ -1043,6 +1044,9 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, connect->crypto.ciphers_pairwise[0] = cipher; } } + + connect->crypto.wep_keys = connkeys->params; + connect->crypto.wep_tx_key = connkeys->def; } else { if (WARN_ON(connkeys)) return -EINVAL; diff --git a/net/wireless/util.c b/net/wireless/util.c index 9e6e2aaa7766..e02141d66b69 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -912,7 +912,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) if (!wdev->connect_keys) return; - for (i = 0; i < 4; i++) { + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) { if (!wdev->connect_keys->params[i].cipher) continue; if (rdev_add_key(rdev, dev, i, false, NULL, diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 2b096c02eb85..a220156cf217 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -415,7 +415,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, GFP_KERNEL); if (!wdev->wext.keys) return -ENOMEM; - for (i = 0; i < 4; i++) + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) wdev->wext.keys->params[i].key = wdev->wext.keys->data[i]; } diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 88f1f6931ab8..995163830a61 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -46,7 +46,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; - for (i = 0; i < 4; i++) + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) ck->params[i].key = ck->data[i]; } -- cgit v1.2.3 From cb3b7d87652aeb37cfb5295a6157a3280dae10cb Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:13 +0300 Subject: cfg80211: add start / stop NAN commands This allows user space to start/stop NAN interface. A NAN interface is like P2P device in a few aspects: it doesn't have a netdev associated to it. Add the new interface type and prevent operations that can't be executed on NAN interface like scan. Define several attributes that may be configured by user space when starting NAN functionality (master preference and dual band operation) Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 21 +++++++++- include/uapi/linux/nl80211.h | 47 +++++++++++++++++++++++ net/mac80211/cfg.c | 2 + net/mac80211/chan.c | 3 ++ net/mac80211/iface.c | 4 ++ net/mac80211/offchannel.c | 1 + net/mac80211/rx.c | 3 ++ net/mac80211/util.c | 1 + net/wireless/chan.c | 2 + net/wireless/core.c | 34 +++++++++++++++++ net/wireless/core.h | 3 ++ net/wireless/mlme.c | 1 + net/wireless/nl80211.c | 91 ++++++++++++++++++++++++++++++++++++++++++-- net/wireless/rdev-ops.h | 20 ++++++++++ net/wireless/trace.h | 27 +++++++++++++ net/wireless/util.c | 6 ++- 16 files changed, 260 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 68dca3d93b85..9898e1f883e2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2313,6 +2313,19 @@ struct cfg80211_qos_map { struct cfg80211_dscp_range up[8]; }; +/** + * struct cfg80211_nan_conf - NAN configuration + * + * This struct defines NAN configuration parameters + * + * @master_pref: master preference (1 - 255) + * @dual: dual band operation mode, see &enum nl80211_nan_dual_band_conf + */ +struct cfg80211_nan_conf { + u8 master_pref; + u8 dual; +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -2601,6 +2614,8 @@ struct cfg80211_qos_map { * and returning to the base channel for communication with the AP. * @tdls_cancel_channel_switch: Stop channel-switching with a TDLS peer. Both * peers must be on the base channel when the call completes. + * @start_nan: Start the NAN interface. + * @stop_nan: Stop the NAN interface. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2866,6 +2881,9 @@ struct cfg80211_ops { void (*tdls_cancel_channel_switch)(struct wiphy *wiphy, struct net_device *dev, const u8 *addr); + int (*start_nan)(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf); + void (*stop_nan)(struct wiphy *wiphy, struct wireless_dev *wdev); }; /* @@ -3626,6 +3644,7 @@ struct cfg80211_cached_keys; * beacons, 0 when not valid * @address: The address for this device, valid only if @netdev is %NULL * @p2p_started: true if this is a P2P Device that has been started + * @nan_started: true if this is a NAN interface that has been started * @cac_started: true if DFS channel availability check has been started * @cac_start_time: timestamp (jiffies) when the dfs state was entered. * @cac_time_ms: CAC time in ms @@ -3657,7 +3676,7 @@ struct wireless_dev { struct mutex mtx; - bool use_4addr, p2p_started; + bool use_4addr, p2p_started, nan_started; u8 address[ETH_ALEN] __aligned(sizeof(u16)); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index ec10d1b2838f..98fd3ec8598d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -838,6 +838,16 @@ * not running. The driver indicates the status of the scan through * cfg80211_scan_done(). * + * @NL80211_CMD_START_NAN: Start NAN operation, identified by its + * %NL80211_ATTR_WDEV interface. This interface must have been previously + * created with %NL80211_CMD_NEW_INTERFACE. After it has been started, the + * NAN interface will create or join a cluster. This command must have a + * valid %NL80211_ATTR_NAN_MASTER_PREF attribute and optional + * %NL80211_ATTR_NAN_DUAL attributes. + * After this command NAN functions can be added. + * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by + * its %NL80211_ATTR_WDEV interface. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1026,6 +1036,9 @@ enum nl80211_commands { NL80211_CMD_ABORT_SCAN, + NL80211_CMD_START_NAN, + NL80211_CMD_STOP_NAN, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1739,6 +1752,12 @@ enum nl80211_commands { * regulatory indoor configuration would be owned by the netlink socket * that configured the indoor setting, and the indoor operation would be * cleared when the socket is closed. + * If set during NAN interface creation, the interface will be destroyed + * if the socket is closed just like any other interface. Moreover, only + * the netlink socket that created the interface will be allowed to add + * and remove functions. NAN notifications will be sent in unicast to that + * socket. Without this attribute, any socket can add functions and the + * notifications will be sent to the %NL80211_MCGRP_NAN multicast group. * * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is * the TDLS link initiator. @@ -1873,6 +1892,14 @@ enum nl80211_commands { * @NL80211_ATTR_MESH_PEER_AID: Association ID for the mesh peer (u16). This is * used to pull the stored data for mesh peer in power save state. * + * @NL80211_ATTR_NAN_MASTER_PREF: the master preference to be used by + * %NL80211_CMD_START_NAN. Its type is u8 and it can't be 0. + * Also, values 1 and 255 are reserved for certification purposes and + * should not be used during a normal device operation. + * @NL80211_ATTR_NAN_DUAL: NAN dual band operation config (see + * &enum nl80211_nan_dual_band_conf). This attribute is used with + * %NL80211_CMD_START_NAN. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2267,6 +2294,9 @@ enum nl80211_attrs { NL80211_ATTR_MESH_PEER_AID, + NL80211_ATTR_NAN_MASTER_PREF, + NL80211_ATTR_NAN_DUAL, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2345,6 +2375,7 @@ enum nl80211_attrs { * commands to create and destroy one * @NL80211_IF_TYPE_OCB: Outside Context of a BSS * This mode corresponds to the MIB variable dot11OCBActivated=true + * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev) * @NL80211_IFTYPE_MAX: highest interface type number currently defined * @NUM_NL80211_IFTYPES: number of defined interface types * @@ -2365,6 +2396,7 @@ enum nl80211_iftype { NL80211_IFTYPE_P2P_GO, NL80211_IFTYPE_P2P_DEVICE, NL80211_IFTYPE_OCB, + NL80211_IFTYPE_NAN, /* keep last */ NUM_NL80211_IFTYPES, @@ -4870,4 +4902,19 @@ enum nl80211_bss_select_attr { NL80211_BSS_SELECT_ATTR_MAX = __NL80211_BSS_SELECT_ATTR_AFTER_LAST - 1 }; +/** + * enum nl80211_nan_dual_band_conf - NAN dual band configuration + * + * Defines the NAN dual band mode of operation + * + * @NL80211_NAN_BAND_DEFAULT: device default mode + * @NL80211_NAN_BAND_2GHZ: 2.4GHz mode + * @NL80211_NAN_BAND_5GHZ: 5GHz mode + */ +enum nl80211_nan_dual_band_conf { + NL80211_NAN_BAND_DEFAULT = 1 << 0, + NL80211_NAN_BAND_2GHZ = 1 << 1, + NL80211_NAN_BAND_5GHZ = 1 << 2, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e29ff5749944..a74027f887bc 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -257,6 +257,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: @@ -2036,6 +2037,7 @@ static int ieee80211_scan(struct wiphy *wiphy, !(req->flags & NL80211_SCAN_FLAG_AP))) return -EOPNOTSUPP; break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 74142d07ad31..d035801569eb 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -274,6 +274,7 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, ieee80211_get_max_required_bw(sdata)); break; case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: continue; case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_WDS: @@ -718,6 +719,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, switch (sdata->vif.type) { case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: continue; case NL80211_IFTYPE_STATION: if (!sdata->u.mgd.associated) @@ -980,6 +982,7 @@ ieee80211_vif_chanctx_reservation_complete(struct ieee80211_sub_if_data *sdata) case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: case NUM_NL80211_IFTYPES: WARN_ON(1); break; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b0abddc714ef..e694ca2baad0 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -545,6 +545,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_OCB: + case NL80211_IFTYPE_NAN: /* no special treatment */ break; case NL80211_IFTYPE_UNSPECIFIED: @@ -660,6 +661,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: break; default: /* not reached */ @@ -948,6 +950,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, /* relies on synchronize_rcu() below */ RCU_INIT_POINTER(local->p2p_sdata, NULL); /* fall through */ + case NL80211_IFTYPE_NAN: default: cancel_work_sync(&sdata->work); /* @@ -1457,6 +1460,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: sdata->vif.bss_conf.bssid = sdata->vif.addr; break; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 55a9c5b94ce1..75d5c960ce67 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -838,6 +838,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, case NL80211_IFTYPE_P2P_DEVICE: need_offchan = true; break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e796060b7c5e..c9489a86e6d6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3586,6 +3586,9 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) ieee80211_is_probe_req(hdr->frame_control) || ieee80211_is_probe_resp(hdr->frame_control) || ieee80211_is_beacon(hdr->frame_control); + case NL80211_IFTYPE_NAN: + /* Currently no frames on NAN interface are allowed */ + return false; default: break; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b6865d884487..2c78541f695c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1975,6 +1975,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: /* nothing to do */ break; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 0f506220a3bd..5497d022fada 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -372,6 +372,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: @@ -946,6 +947,7 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: /* these interface types don't really have a channel */ return; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/wireless/core.c b/net/wireless/core.c index 4911cd997b9a..013987243c0b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -225,6 +225,23 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, } } +void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + ASSERT_RTNL(); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN)) + return; + + if (!wdev->nan_started) + return; + + rdev_stop_nan(rdev, wdev); + wdev->nan_started = false; + + rdev->opencount--; +} + void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); @@ -242,6 +259,9 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) case NL80211_IFTYPE_P2P_DEVICE: cfg80211_stop_p2p_device(rdev, wdev); break; + case NL80211_IFTYPE_NAN: + cfg80211_stop_nan(rdev, wdev); + break; default: break; } @@ -537,6 +557,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) c->limits[j].max > 1)) return -EINVAL; + /* Only a single NAN can be allowed */ + if (WARN_ON(types & BIT(NL80211_IFTYPE_NAN) && + c->limits[j].max > 1)) + return -EINVAL; + cnt += c->limits[j].max; /* * Don't advertise an unsupported type @@ -579,6 +604,10 @@ int wiphy_register(struct wiphy *wiphy) !rdev->ops->tdls_cancel_channel_switch))) return -EINVAL; + if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN)) && + (!rdev->ops->start_nan || !rdev->ops->stop_nan))) + return -EINVAL; + /* * if a wiphy has unsupported modes for regulatory channel enforcement, * opt-out of enforcement checking @@ -589,6 +618,7 @@ int wiphy_register(struct wiphy *wiphy) BIT(NL80211_IFTYPE_P2P_GO) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_P2P_DEVICE) | + BIT(NL80211_IFTYPE_NAN) | BIT(NL80211_IFTYPE_AP_VLAN) | BIT(NL80211_IFTYPE_MONITOR))) wiphy->regulatory_flags |= REGULATORY_IGNORE_STALE_KICKOFF; @@ -916,6 +946,9 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) cfg80211_mlme_purge_registrations(wdev); cfg80211_stop_p2p_device(rdev, wdev); break; + case NL80211_IFTYPE_NAN: + cfg80211_stop_nan(rdev, wdev); + break; default: WARN_ON_ONCE(1); break; @@ -979,6 +1012,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, /* must be handled by mac80211/driver, has no APIs */ break; case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: /* cannot happen, has no netdev */ break; case NL80211_IFTYPE_AP_VLAN: diff --git a/net/wireless/core.h b/net/wireless/core.h index 554f87d0f991..08d2e948c9ad 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -488,6 +488,9 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); + #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index d6abb0704db5..cbb48e26a871 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -634,6 +634,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, * fall through, P2P device only supports * public action frames */ + case NL80211_IFTYPE_NAN: default: err = -EOPNOTSUPP; break; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b8441e60b0f6..9e9fb37087fc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -409,6 +409,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = VHT_MUMIMO_GROUPS_DATA_LEN }, [NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN }, + [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, + [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, }; /* policy for the key attributes */ @@ -934,6 +936,7 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: @@ -2819,7 +2822,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) !(rdev->wiphy.interface_modes & (1 << type))) return -EOPNOTSUPP; - if ((type == NL80211_IFTYPE_P2P_DEVICE || + if ((type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN || rdev->wiphy.features & NL80211_FEATURE_MAC_ON_CREATE) && info->attrs[NL80211_ATTR_MAC]) { nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC], @@ -2875,9 +2878,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) wdev->mesh_id_up_len); wdev_unlock(wdev); break; + case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_P2P_DEVICE: /* - * P2P Device doesn't have a netdev, so doesn't go + * P2P Device and NAN do not have a netdev, so don't go * through the netdev notifier and must be added here */ mutex_init(&wdev->mtx); @@ -6434,6 +6438,9 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) wiphy = &rdev->wiphy; + if (wdev->iftype == NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + if (!rdev->ops->scan) return -EOPNOTSUPP; @@ -8977,6 +8984,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -9022,6 +9030,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -9138,6 +9147,7 @@ static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *in case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -10504,6 +10514,58 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) return 0; } +static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct cfg80211_nan_conf conf = {}; + int err; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (wdev->nan_started) + return -EEXIST; + + if (rfkill_blocked(rdev->rfkill)) + return -ERFKILL; + + if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_NAN_DUAL]) + return -EINVAL; + + conf.master_pref = + nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); + if (!conf.master_pref) + return -EINVAL; + + conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]); + + err = rdev_start_nan(rdev, wdev, &conf); + if (err) + return err; + + wdev->nan_started = true; + rdev->opencount++; + + return 0; +} + +static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + cfg80211_stop_nan(rdev, wdev); + + return 0; +} + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { @@ -11205,7 +11267,14 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, dev_hold(dev); } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { - if (!wdev->p2p_started) { + if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE && + !wdev->p2p_started) { + if (rtnl) + rtnl_unlock(); + return -ENETDOWN; + } + if (wdev->iftype == NL80211_IFTYPE_NAN && + !wdev->nan_started) { if (rtnl) rtnl_unlock(); return -ENETDOWN; @@ -11838,6 +11907,22 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_START_NAN, + .doit = nl80211_start_nan, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_STOP_NAN, + .doit = nl80211_stop_nan, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, { .cmd = NL80211_CMD_SET_MCAST_RATE, .doit = nl80211_set_mcast_rate, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 85ff30bee2b9..afb68a8428b9 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -887,6 +887,26 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev, trace_rdev_return_void(&rdev->wiphy); } +static inline int rdev_start_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf) +{ + int ret; + + trace_rdev_start_nan(&rdev->wiphy, wdev, conf); + ret = rdev->ops->start_nan(&rdev->wiphy, wdev, conf); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline void rdev_stop_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + trace_rdev_stop_nan(&rdev->wiphy, wdev); + rdev->ops->stop_nan(&rdev->wiphy, wdev); + trace_rdev_return_void(&rdev->wiphy); +} + static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_acl_data *params) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 72b5255cefe2..5f3370f4c6a2 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1889,6 +1889,33 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device, TP_ARGS(wiphy, wdev) ); +TRACE_EVENT(rdev_start_nan, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf), + TP_ARGS(wiphy, wdev, conf), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u8, master_pref) + __field(u8, dual); + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT + ", master preference: %u, dual: %d", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, + __entry->dual) +); + +DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev) +); + TRACE_EVENT(rdev_set_mac_acl, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_acl_data *params), diff --git a/net/wireless/util.c b/net/wireless/util.c index e02141d66b69..7a2d46b0058a 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1008,8 +1008,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, if (otype == NL80211_IFTYPE_AP_VLAN) return -EOPNOTSUPP; - /* cannot change into P2P device type */ - if (ntype == NL80211_IFTYPE_P2P_DEVICE) + /* cannot change into P2P device or NAN */ + if (ntype == NL80211_IFTYPE_P2P_DEVICE || + ntype == NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!rdev->ops->change_virtual_intf || @@ -1088,6 +1089,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, /* not happening */ break; case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: WARN_ON(1); break; } -- cgit v1.2.3 From 708d50edb149fe488c7c96f59ba9a89a64985cf2 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:14 +0300 Subject: mac80211: add boilerplate code for start / stop NAN This code doesn't do much besides allowing to start and stop the vif. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Ayala Beker Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 9 +++++++++ net/mac80211/cfg.c | 36 ++++++++++++++++++++++++++++++++++ net/mac80211/chan.c | 3 +++ net/mac80211/driver-ops.h | 27 +++++++++++++++++++++++++ net/mac80211/iface.c | 8 ++++++-- net/mac80211/main.c | 5 +++++ net/mac80211/offchannel.c | 3 ++- net/mac80211/trace.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/util.c | 3 ++- 9 files changed, 140 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5296100f3889..df9b5cff300c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3420,6 +3420,9 @@ enum ieee80211_reconfig_type { * synchronization which is needed in case driver has in its RSS queues * pending frames that were received prior to the control path action * currently taken (e.g. disassociation) but are not processed yet. + * + * @start_nan: join an existing NAN cluster, or create a new one. + * @stop_nan: leave the NAN cluster. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -3655,6 +3658,12 @@ struct ieee80211_ops { void (*wake_tx_queue)(struct ieee80211_hw *hw, struct ieee80211_txq *txq); void (*sync_rx_queues)(struct ieee80211_hw *hw); + + int (*start_nan)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct cfg80211_nan_conf *conf); + int (*stop_nan)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a74027f887bc..9aabb0932d24 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3,6 +3,7 @@ * * Copyright 2006-2010 Johannes Berg * Copyright 2013-2015 Intel Mobile Communications GmbH + * Copyright (C) 2015-2016 Intel Deutschland GmbH * * This file is GPLv2 as found in COPYING. */ @@ -152,6 +153,39 @@ static void ieee80211_stop_p2p_device(struct wiphy *wiphy, ieee80211_sdata_stop(IEEE80211_WDEV_TO_SUB_IF(wdev)); } +static int ieee80211_start_nan(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int ret; + + mutex_lock(&sdata->local->chanctx_mtx); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); + mutex_unlock(&sdata->local->chanctx_mtx); + if (ret < 0) + return ret; + + ret = ieee80211_do_open(wdev, true); + if (ret) + return ret; + + ret = drv_start_nan(sdata->local, sdata, conf); + if (ret) + ieee80211_sdata_stop(sdata); + + return ret; +} + +static void ieee80211_stop_nan(struct wiphy *wiphy, + struct wireless_dev *wdev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + + drv_stop_nan(sdata->local, sdata); + ieee80211_sdata_stop(sdata); +} + static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) @@ -3464,4 +3498,6 @@ const struct cfg80211_ops mac80211_config_ops = { .set_ap_chanwidth = ieee80211_set_ap_chanwidth, .add_tx_ts = ieee80211_add_tx_ts, .del_tx_ts = ieee80211_del_tx_ts, + .start_nan = ieee80211_start_nan, + .stop_nan = ieee80211_stop_nan, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index d035801569eb..e75cbf6ecc26 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -647,6 +647,9 @@ static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *curr_ctx = NULL; int ret = 0; + if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN)) + return -ENOTSUPP; + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index fe35a1c0dc86..e52cfb855bd9 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -162,6 +162,7 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, return; if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || + sdata->vif.type == NL80211_IFTYPE_NAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && !sdata->vif.mu_mimo_owner))) return; @@ -1165,4 +1166,30 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, local->ops->wake_tx_queue(&local->hw, &txq->txq); } +static inline int drv_start_nan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf) +{ + int ret; + + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_start_nan(local, sdata, conf); + ret = local->ops->start_nan(&local->hw, &sdata->vif, conf); + trace_drv_return_int(local, ret); + return ret; +} + +static inline void drv_stop_nan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_stop_nan(local, sdata); + local->ops->stop_nan(&local->hw, &sdata->vif); + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index e694ca2baad0..507f46a8eb1c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -327,6 +327,9 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata, int n_queues = sdata->local->hw.queues; int i; + if (iftype == NL80211_IFTYPE_NAN) + return 0; + if (iftype != NL80211_IFTYPE_P2P_DEVICE) { for (i = 0; i < IEEE80211_NUM_ACS; i++) { if (WARN_ON_ONCE(sdata->vif.hw_queue[i] == @@ -647,7 +650,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) local->fif_probe_req++; } - if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_NAN) changed |= ieee80211_reset_erp_info(sdata); ieee80211_bss_info_change_notify(sdata, changed); @@ -1726,7 +1730,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ASSERT_RTNL(); - if (type == NL80211_IFTYPE_P2P_DEVICE) { + if (type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN) { struct wireless_dev *wdev; sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ac053a9df36d..b5cf2c5cc166 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -821,6 +821,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) !local->ops->tdls_recv_channel_switch)) return -EOPNOTSUPP; + if (WARN_ON(local->hw.wiphy->interface_modes & + BIT(NL80211_IFTYPE_NAN) && + (!local->ops->start_nan || !local->ops->stop_nan))) + return -EINVAL; + #ifdef CONFIG_PM if (hw->wiphy->wowlan && (!local->ops->suspend || !local->ops->resume)) return -EINVAL; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 75d5c960ce67..c3f610bba3fe 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -128,7 +128,8 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) if (!ieee80211_sdata_running(sdata)) continue; - if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) + if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || + sdata->vif.type == NL80211_IFTYPE_NAN) continue; if (sdata->vif.type != NL80211_IFTYPE_MONITOR) diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 77e4c53baefb..deefbfb9f6fb 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1700,6 +1700,56 @@ TRACE_EVENT(drv_get_expected_throughput, ) ); +TRACE_EVENT(drv_start_nan, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf), + + TP_ARGS(local, sdata, conf), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, master_pref) + __field(u8, dual) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", master preference: %u, dual: %d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, + __entry->dual + ) +); + +TRACE_EVENT(drv_stop_nan, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + + TP_ARGS(local, sdata), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG + ) +); + /* * Tracing for API calls that drivers call. */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 2c78541f695c..5b57fcaaec9b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1209,7 +1209,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, } if (sdata->vif.type != NL80211_IFTYPE_MONITOR && - sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) { + sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_NAN) { sdata->vif.bss_conf.qos = enable_qos; if (bss_notify) ieee80211_bss_info_change_notify(sdata, -- cgit v1.2.3 From a442b761b24b6886f9a4e2ff5f8cb4824c96526b Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:15 +0300 Subject: cfg80211: add add_nan_func / del_nan_func A NAN function can be either publish, subscribe or follow up. Make all the necessary verifications and just pass the request to the driver. Allow the user space application that starts NAN to forbid any other socket to add or remove functions. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Ayala Beker Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 91 +++++++++++ include/uapi/linux/nl80211.h | 150 ++++++++++++++++++ net/wireless/core.c | 3 +- net/wireless/nl80211.c | 369 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 21 +++ net/wireless/trace.h | 39 +++++ net/wireless/util.c | 22 +++ 7 files changed, 694 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9898e1f883e2..2f35ccf6da83 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2326,6 +2326,73 @@ struct cfg80211_nan_conf { u8 dual; }; +/** + * struct cfg80211_nan_func_filter - a NAN function Rx / Tx filter + * + * @filter: the content of the filter + * @len: the length of the filter + */ +struct cfg80211_nan_func_filter { + const u8 *filter; + u8 len; +}; + +/** + * struct cfg80211_nan_func - a NAN function + * + * @type: &enum nl80211_nan_function_type + * @service_id: the service ID of the function + * @publish_type: &nl80211_nan_publish_type + * @close_range: if true, the range should be limited. Threshold is + * implementation specific. + * @publish_bcast: if true, the solicited publish should be broadcasted + * @subscribe_active: if true, the subscribe is active + * @followup_id: the instance ID for follow up + * @followup_reqid: the requestor instance ID for follow up + * @followup_dest: MAC address of the recipient of the follow up + * @ttl: time to live counter in DW. + * @serv_spec_info: Service Specific Info + * @serv_spec_info_len: Service Specific Info length + * @srf_include: if true, SRF is inclusive + * @srf_bf: Bloom Filter + * @srf_bf_len: Bloom Filter length + * @srf_bf_idx: Bloom Filter index + * @srf_macs: SRF MAC addresses + * @srf_num_macs: number of MAC addresses in SRF + * @rx_filters: rx filters that are matched with corresponding peer's tx_filter + * @tx_filters: filters that should be transmitted in the SDF. + * @num_rx_filters: length of &rx_filters. + * @num_tx_filters: length of &tx_filters. + * @instance_id: driver allocated id of the function. + * @cookie: unique NAN function identifier. + */ +struct cfg80211_nan_func { + enum nl80211_nan_function_type type; + u8 service_id[NL80211_NAN_FUNC_SERVICE_ID_LEN]; + u8 publish_type; + bool close_range; + bool publish_bcast; + bool subscribe_active; + u8 followup_id; + u8 followup_reqid; + struct mac_address followup_dest; + u32 ttl; + const u8 *serv_spec_info; + u8 serv_spec_info_len; + bool srf_include; + const u8 *srf_bf; + u8 srf_bf_len; + u8 srf_bf_idx; + struct mac_address *srf_macs; + int srf_num_macs; + struct cfg80211_nan_func_filter *rx_filters; + struct cfg80211_nan_func_filter *tx_filters; + u8 num_tx_filters; + u8 num_rx_filters; + u8 instance_id; + u64 cookie; +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -2616,6 +2683,14 @@ struct cfg80211_nan_conf { * peers must be on the base channel when the call completes. * @start_nan: Start the NAN interface. * @stop_nan: Stop the NAN interface. + * @add_nan_func: Add a NAN function. Returns negative value on failure. + * On success @nan_func ownership is transferred to the driver and + * it may access it outside of the scope of this function. The driver + * should free the @nan_func when no longer needed by calling + * cfg80211_free_nan_func(). + * On success the driver should assign an instance_id in the + * provided @nan_func. + * @del_nan_func: Delete a NAN function. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2884,6 +2959,10 @@ struct cfg80211_ops { int (*start_nan)(struct wiphy *wiphy, struct wireless_dev *wdev, struct cfg80211_nan_conf *conf); void (*stop_nan)(struct wiphy *wiphy, struct wireless_dev *wdev); + int (*add_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_func *nan_func); + void (*del_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev, + u64 cookie); }; /* @@ -3335,6 +3414,8 @@ struct wiphy_iftype_ext_capab { * @bss_select_support: bitmask indicating the BSS selection criteria supported * by the driver in the .connect() callback. The bit position maps to the * attribute indices defined in &enum nl80211_bss_select_attr. + * + * @cookie_counter: unique generic cookie counter, used to identify objects. */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -3464,6 +3545,8 @@ struct wiphy { u32 bss_select_support; + u64 cookie_counter; + char priv[0] __aligned(NETDEV_ALIGN); }; @@ -5584,6 +5667,14 @@ wiphy_ext_feature_isset(struct wiphy *wiphy, return (ft_byte & BIT(ftidx % 8)) != 0; } +/** + * cfg80211_free_nan_func - free NAN function + * @f: NAN function that should be freed + * + * Frees all the NAN function and all it's allocated members. + */ +void cfg80211_free_nan_func(struct cfg80211_nan_func *f); + /* ethtool helper */ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 98fd3ec8598d..e4935d963061 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -847,6 +847,21 @@ * After this command NAN functions can be added. * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by * its %NL80211_ATTR_WDEV interface. + * @NL80211_CMD_ADD_NAN_FUNCTION: Add a NAN function. The function is defined + * with %NL80211_ATTR_NAN_FUNC nested attribute. When called, this + * operation returns the strictly positive and unique instance id + * (%NL80211_ATTR_NAN_FUNC_INST_ID) and a cookie (%NL80211_ATTR_COOKIE) + * of the function upon success. + * Since instance ID's can be re-used, this cookie is the right + * way to identify the function. This will avoid races when a termination + * event is handled by the user space after it has already added a new + * function that got the same instance id from the kernel as the one + * which just terminated. + * This cookie may be used in NAN events even before the command + * returns, so userspace shouldn't process NAN events until it processes + * the response to this command. + * Look at %NL80211_ATTR_SOCKET_OWNER as well. + * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie. * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -1038,6 +1053,8 @@ enum nl80211_commands { NL80211_CMD_START_NAN, NL80211_CMD_STOP_NAN, + NL80211_CMD_ADD_NAN_FUNCTION, + NL80211_CMD_DEL_NAN_FUNCTION, /* add new commands above here */ @@ -1899,6 +1916,9 @@ enum nl80211_commands { * @NL80211_ATTR_NAN_DUAL: NAN dual band operation config (see * &enum nl80211_nan_dual_band_conf). This attribute is used with * %NL80211_CMD_START_NAN. + * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See + * &enum nl80211_nan_func_attributes for description of this nested + * attribute. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2296,6 +2316,7 @@ enum nl80211_attrs { NL80211_ATTR_NAN_MASTER_PREF, NL80211_ATTR_NAN_DUAL, + NL80211_ATTR_NAN_FUNC, /* add attributes here, update the policy in nl80211.c */ @@ -4917,4 +4938,133 @@ enum nl80211_nan_dual_band_conf { NL80211_NAN_BAND_5GHZ = 1 << 2, }; +/** + * enum nl80211_nan_function_type - NAN function type + * + * Defines the function type of a NAN function + * + * @NL80211_NAN_FUNC_PUBLISH: function is publish + * @NL80211_NAN_FUNC_SUBSCRIBE: function is subscribe + * @NL80211_NAN_FUNC_FOLLOW_UP: function is follow-up + */ +enum nl80211_nan_function_type { + NL80211_NAN_FUNC_PUBLISH, + NL80211_NAN_FUNC_SUBSCRIBE, + NL80211_NAN_FUNC_FOLLOW_UP, + + /* keep last */ + __NL80211_NAN_FUNC_TYPE_AFTER_LAST, + NL80211_NAN_FUNC_MAX_TYPE = __NL80211_NAN_FUNC_TYPE_AFTER_LAST - 1, +}; + +/** + * enum nl80211_nan_publish_type - NAN publish tx type + * + * Defines how to send publish Service Discovery Frames + * + * @NL80211_NAN_SOLICITED_PUBLISH: publish function is solicited + * @NL80211_NAN_UNSOLICITED_PUBLISH: publish function is unsolicited + */ +enum nl80211_nan_publish_type { + NL80211_NAN_SOLICITED_PUBLISH = 1 << 0, + NL80211_NAN_UNSOLICITED_PUBLISH = 1 << 1, +}; + +#define NL80211_NAN_FUNC_SERVICE_ID_LEN 6 +#define NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN 0xff +#define NL80211_NAN_FUNC_SRF_MAX_LEN 0xff + +/** + * enum nl80211_nan_func_attributes - NAN function attributes + * @__NL80211_NAN_FUNC_INVALID: invalid + * @NL80211_NAN_FUNC_TYPE: &enum nl80211_nan_function_type (u8). + * @NL80211_NAN_FUNC_SERVICE_ID: 6 bytes of the service ID hash as + * specified in NAN spec. This is a binary attribute. + * @NL80211_NAN_FUNC_PUBLISH_TYPE: relevant if the function's type is + * publish. Defines the transmission type for the publish Service Discovery + * Frame, see &enum nl80211_nan_publish_type. Its type is u8. + * @NL80211_NAN_FUNC_PUBLISH_BCAST: relevant if the function is a solicited + * publish. Should the solicited publish Service Discovery Frame be sent to + * the NAN Broadcast address. This is a flag. + * @NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE: relevant if the function's type is + * subscribe. Is the subscribe active. This is a flag. + * @NL80211_NAN_FUNC_FOLLOW_UP_ID: relevant if the function's type is follow up. + * The instance ID for the follow up Service Discovery Frame. This is u8. + * @NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID: relevant if the function's type + * is follow up. This is a u8. + * The requestor instance ID for the follow up Service Discovery Frame. + * @NL80211_NAN_FUNC_FOLLOW_UP_DEST: the MAC address of the recipient of the + * follow up Service Discovery Frame. This is a binary attribute. + * @NL80211_NAN_FUNC_CLOSE_RANGE: is this function limited for devices in a + * close range. The range itself (RSSI) is defined by the device. + * This is a flag. + * @NL80211_NAN_FUNC_TTL: strictly positive number of DWs this function should + * stay active. If not present infinite TTL is assumed. This is a u32. + * @NL80211_NAN_FUNC_SERVICE_INFO: array of bytes describing the service + * specific info. This is a binary attribute. + * @NL80211_NAN_FUNC_SRF: Service Receive Filter. This is a nested attribute. + * See &enum nl80211_nan_srf_attributes. + * @NL80211_NAN_FUNC_RX_MATCH_FILTER: Receive Matching filter. This is a nested + * attribute. It is a list of binary values. + * @NL80211_NAN_FUNC_TX_MATCH_FILTER: Transmit Matching filter. This is a + * nested attribute. It is a list of binary values. + * @NL80211_NAN_FUNC_INSTANCE_ID: The instance ID of the function. + * Its type is u8 and it cannot be 0. + * @NL80211_NAN_FUNC_TERM_REASON: NAN function termination reason. + * See &enum nl80211_nan_func_term_reason. + * + * @NUM_NL80211_NAN_FUNC_ATTR: internal + * @NL80211_NAN_FUNC_ATTR_MAX: highest NAN function attribute + */ +enum nl80211_nan_func_attributes { + __NL80211_NAN_FUNC_INVALID, + NL80211_NAN_FUNC_TYPE, + NL80211_NAN_FUNC_SERVICE_ID, + NL80211_NAN_FUNC_PUBLISH_TYPE, + NL80211_NAN_FUNC_PUBLISH_BCAST, + NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE, + NL80211_NAN_FUNC_FOLLOW_UP_ID, + NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID, + NL80211_NAN_FUNC_FOLLOW_UP_DEST, + NL80211_NAN_FUNC_CLOSE_RANGE, + NL80211_NAN_FUNC_TTL, + NL80211_NAN_FUNC_SERVICE_INFO, + NL80211_NAN_FUNC_SRF, + NL80211_NAN_FUNC_RX_MATCH_FILTER, + NL80211_NAN_FUNC_TX_MATCH_FILTER, + NL80211_NAN_FUNC_INSTANCE_ID, + NL80211_NAN_FUNC_TERM_REASON, + + /* keep last */ + NUM_NL80211_NAN_FUNC_ATTR, + NL80211_NAN_FUNC_ATTR_MAX = NUM_NL80211_NAN_FUNC_ATTR - 1 +}; + +/** + * enum nl80211_nan_srf_attributes - NAN Service Response filter attributes + * @__NL80211_NAN_SRF_INVALID: invalid + * @NL80211_NAN_SRF_INCLUDE: present if the include bit of the SRF set. + * This is a flag. + * @NL80211_NAN_SRF_BF: Bloom Filter. Present if and only if + * &NL80211_NAN_SRF_MAC_ADDRS isn't present. This attribute is binary. + * @NL80211_NAN_SRF_BF_IDX: index of the Bloom Filter. Mandatory if + * &NL80211_NAN_SRF_BF is present. This is a u8. + * @NL80211_NAN_SRF_MAC_ADDRS: list of MAC addresses for the SRF. Present if + * and only if &NL80211_NAN_SRF_BF isn't present. This is a nested + * attribute. Each nested attribute is a MAC address. + * @NUM_NL80211_NAN_SRF_ATTR: internal + * @NL80211_NAN_SRF_ATTR_MAX: highest NAN SRF attribute + */ +enum nl80211_nan_srf_attributes { + __NL80211_NAN_SRF_INVALID, + NL80211_NAN_SRF_INCLUDE, + NL80211_NAN_SRF_BF, + NL80211_NAN_SRF_BF_IDX, + NL80211_NAN_SRF_MAC_ADDRS, + + /* keep last */ + NUM_NL80211_NAN_SRF_ATTR, + NL80211_NAN_SRF_ATTR_MAX = NUM_NL80211_NAN_SRF_ATTR - 1, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/core.c b/net/wireless/core.c index 013987243c0b..8201e6d7449e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -605,7 +605,8 @@ int wiphy_register(struct wiphy *wiphy) return -EINVAL; if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN)) && - (!rdev->ops->start_nan || !rdev->ops->stop_nan))) + (!rdev->ops->start_nan || !rdev->ops->stop_nan || + !rdev->ops->add_nan_func || !rdev->ops->del_nan_func))) return -EINVAL; /* diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9e9fb37087fc..0eca59ccd685 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -411,6 +411,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN }, [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, + [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, }; /* policy for the key attributes */ @@ -504,6 +505,39 @@ nl80211_bss_select_policy[NL80211_BSS_SELECT_ATTR_MAX + 1] = { }, }; +/* policy for NAN function attributes */ +static const struct nla_policy +nl80211_nan_func_policy[NL80211_NAN_FUNC_ATTR_MAX + 1] = { + [NL80211_NAN_FUNC_TYPE] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_SERVICE_ID] = { .type = NLA_BINARY, + .len = NL80211_NAN_FUNC_SERVICE_ID_LEN }, + [NL80211_NAN_FUNC_PUBLISH_TYPE] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_PUBLISH_BCAST] = { .type = NLA_FLAG }, + [NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE] = { .type = NLA_FLAG }, + [NL80211_NAN_FUNC_FOLLOW_UP_ID] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_FOLLOW_UP_DEST] = { .len = ETH_ALEN }, + [NL80211_NAN_FUNC_CLOSE_RANGE] = { .type = NLA_FLAG }, + [NL80211_NAN_FUNC_TTL] = { .type = NLA_U32 }, + [NL80211_NAN_FUNC_SERVICE_INFO] = { .type = NLA_BINARY, + .len = NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN }, + [NL80211_NAN_FUNC_SRF] = { .type = NLA_NESTED }, + [NL80211_NAN_FUNC_RX_MATCH_FILTER] = { .type = NLA_NESTED }, + [NL80211_NAN_FUNC_TX_MATCH_FILTER] = { .type = NLA_NESTED }, + [NL80211_NAN_FUNC_INSTANCE_ID] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_TERM_REASON] = { .type = NLA_U8 }, +}; + +/* policy for Service Response Filter attributes */ +static const struct nla_policy +nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = { + [NL80211_NAN_SRF_INCLUDE] = { .type = NLA_FLAG }, + [NL80211_NAN_SRF_BF] = { .type = NLA_BINARY, + .len = NL80211_NAN_FUNC_SRF_MAX_LEN }, + [NL80211_NAN_SRF_BF_IDX] = { .type = NLA_U8 }, + [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED }, +}; + static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, @@ -10566,6 +10600,325 @@ static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info) return 0; } +static int validate_nan_filter(struct nlattr *filter_attr) +{ + struct nlattr *attr; + int len = 0, n_entries = 0, rem; + + nla_for_each_nested(attr, filter_attr, rem) { + len += nla_len(attr); + n_entries++; + } + + if (len >= U8_MAX) + return -EINVAL; + + return n_entries; +} + +static int handle_nan_filter(struct nlattr *attr_filter, + struct cfg80211_nan_func *func, + bool tx) +{ + struct nlattr *attr; + int n_entries, rem, i; + struct cfg80211_nan_func_filter *filter; + + n_entries = validate_nan_filter(attr_filter); + if (n_entries < 0) + return n_entries; + + BUILD_BUG_ON(sizeof(*func->rx_filters) != sizeof(*func->tx_filters)); + + filter = kcalloc(n_entries, sizeof(*func->rx_filters), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + i = 0; + nla_for_each_nested(attr, attr_filter, rem) { + filter[i].filter = kmemdup(nla_data(attr), nla_len(attr), + GFP_KERNEL); + filter[i].len = nla_len(attr); + i++; + } + if (tx) { + func->num_tx_filters = n_entries; + func->tx_filters = filter; + } else { + func->num_rx_filters = n_entries; + func->rx_filters = filter; + } + + return 0; +} + +static int nl80211_nan_add_func(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct nlattr *tb[NUM_NL80211_NAN_FUNC_ATTR], *func_attr; + struct cfg80211_nan_func *func; + struct sk_buff *msg = NULL; + void *hdr = NULL; + int err = 0; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev->nan_started) + return -ENOTCONN; + + if (!info->attrs[NL80211_ATTR_NAN_FUNC]) + return -EINVAL; + + if (wdev->owner_nlportid && + wdev->owner_nlportid != info->snd_portid) + return -ENOTCONN; + + err = nla_parse(tb, NL80211_NAN_FUNC_ATTR_MAX, + nla_data(info->attrs[NL80211_ATTR_NAN_FUNC]), + nla_len(info->attrs[NL80211_ATTR_NAN_FUNC]), + nl80211_nan_func_policy); + if (err) + return err; + + func = kzalloc(sizeof(*func), GFP_KERNEL); + if (!func) + return -ENOMEM; + + func->cookie = wdev->wiphy->cookie_counter++; + + if (!tb[NL80211_NAN_FUNC_TYPE] || + nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]) > NL80211_NAN_FUNC_MAX_TYPE) { + err = -EINVAL; + goto out; + } + + + func->type = nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]); + + if (!tb[NL80211_NAN_FUNC_SERVICE_ID]) { + err = -EINVAL; + goto out; + } + + memcpy(func->service_id, nla_data(tb[NL80211_NAN_FUNC_SERVICE_ID]), + sizeof(func->service_id)); + + func->close_range = + nla_get_flag(tb[NL80211_NAN_FUNC_CLOSE_RANGE]); + + if (tb[NL80211_NAN_FUNC_SERVICE_INFO]) { + func->serv_spec_info_len = + nla_len(tb[NL80211_NAN_FUNC_SERVICE_INFO]); + func->serv_spec_info = + kmemdup(nla_data(tb[NL80211_NAN_FUNC_SERVICE_INFO]), + func->serv_spec_info_len, + GFP_KERNEL); + if (!func->serv_spec_info) { + err = -ENOMEM; + goto out; + } + } + + if (tb[NL80211_NAN_FUNC_TTL]) + func->ttl = nla_get_u32(tb[NL80211_NAN_FUNC_TTL]); + + switch (func->type) { + case NL80211_NAN_FUNC_PUBLISH: + if (!tb[NL80211_NAN_FUNC_PUBLISH_TYPE]) { + err = -EINVAL; + goto out; + } + + func->publish_type = + nla_get_u8(tb[NL80211_NAN_FUNC_PUBLISH_TYPE]); + func->publish_bcast = + nla_get_flag(tb[NL80211_NAN_FUNC_PUBLISH_BCAST]); + + if ((!(func->publish_type & NL80211_NAN_SOLICITED_PUBLISH)) && + func->publish_bcast) { + err = -EINVAL; + goto out; + } + break; + case NL80211_NAN_FUNC_SUBSCRIBE: + func->subscribe_active = + nla_get_flag(tb[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE]); + break; + case NL80211_NAN_FUNC_FOLLOW_UP: + if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || + !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) { + err = -EINVAL; + goto out; + } + + func->followup_id = + nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_ID]); + func->followup_reqid = + nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]); + memcpy(func->followup_dest.addr, + nla_data(tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]), + sizeof(func->followup_dest.addr)); + if (func->ttl) { + err = -EINVAL; + goto out; + } + break; + default: + err = -EINVAL; + goto out; + } + + if (tb[NL80211_NAN_FUNC_SRF]) { + struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR]; + + err = nla_parse(srf_tb, NL80211_NAN_SRF_ATTR_MAX, + nla_data(tb[NL80211_NAN_FUNC_SRF]), + nla_len(tb[NL80211_NAN_FUNC_SRF]), NULL); + if (err) + goto out; + + func->srf_include = + nla_get_flag(srf_tb[NL80211_NAN_SRF_INCLUDE]); + + if (srf_tb[NL80211_NAN_SRF_BF]) { + if (srf_tb[NL80211_NAN_SRF_MAC_ADDRS] || + !srf_tb[NL80211_NAN_SRF_BF_IDX]) { + err = -EINVAL; + goto out; + } + + func->srf_bf_len = + nla_len(srf_tb[NL80211_NAN_SRF_BF]); + func->srf_bf = + kmemdup(nla_data(srf_tb[NL80211_NAN_SRF_BF]), + func->srf_bf_len, GFP_KERNEL); + if (!func->srf_bf) { + err = -ENOMEM; + goto out; + } + + func->srf_bf_idx = + nla_get_u8(srf_tb[NL80211_NAN_SRF_BF_IDX]); + } else { + struct nlattr *attr, *mac_attr = + srf_tb[NL80211_NAN_SRF_MAC_ADDRS]; + int n_entries, rem, i = 0; + + if (!mac_attr) { + err = -EINVAL; + goto out; + } + + n_entries = validate_acl_mac_addrs(mac_attr); + if (n_entries <= 0) { + err = -EINVAL; + goto out; + } + + func->srf_num_macs = n_entries; + func->srf_macs = + kzalloc(sizeof(*func->srf_macs) * n_entries, + GFP_KERNEL); + if (!func->srf_macs) { + err = -ENOMEM; + goto out; + } + + nla_for_each_nested(attr, mac_attr, rem) + memcpy(func->srf_macs[i++].addr, nla_data(attr), + sizeof(*func->srf_macs)); + } + } + + if (tb[NL80211_NAN_FUNC_TX_MATCH_FILTER]) { + err = handle_nan_filter(tb[NL80211_NAN_FUNC_TX_MATCH_FILTER], + func, true); + if (err) + goto out; + } + + if (tb[NL80211_NAN_FUNC_RX_MATCH_FILTER]) { + err = handle_nan_filter(tb[NL80211_NAN_FUNC_RX_MATCH_FILTER], + func, false); + if (err) + goto out; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto out; + } + + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, + NL80211_CMD_ADD_NAN_FUNCTION); + /* This can't really happen - we just allocated 4KB */ + if (WARN_ON(!hdr)) { + err = -ENOMEM; + goto out; + } + + err = rdev_add_nan_func(rdev, wdev, func); +out: + if (err < 0) { + cfg80211_free_nan_func(func); + nlmsg_free(msg); + return err; + } + + /* propagate the instance id and cookie to userspace */ + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, func->cookie, + NL80211_ATTR_PAD)) + goto nla_put_failure; + + func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC); + if (!func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, + func->instance_id)) + goto nla_put_failure; + + nla_nest_end(msg, func_attr); + + genlmsg_end(msg, hdr); + return genlmsg_reply(msg, info); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} + +static int nl80211_nan_del_func(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + u64 cookie; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev->nan_started) + return -ENOTCONN; + + if (!info->attrs[NL80211_ATTR_COOKIE]) + return -EINVAL; + + if (wdev->owner_nlportid && + wdev->owner_nlportid != info->snd_portid) + return -ENOTCONN; + + cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); + + rdev_del_nan_func(rdev, wdev, cookie); + + return 0; +} + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { @@ -11923,6 +12276,22 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_ADD_NAN_FUNCTION, + .doit = nl80211_nan_add_func, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_DEL_NAN_FUNCTION, + .doit = nl80211_nan_del_func, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, { .cmd = NL80211_CMD_SET_MCAST_RATE, .doit = nl80211_set_mcast_rate, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index afb68a8428b9..98c4c3bdcb11 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -907,6 +907,27 @@ static inline void rdev_stop_nan(struct cfg80211_registered_device *rdev, trace_rdev_return_void(&rdev->wiphy); } +static inline int +rdev_add_nan_func(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_func *nan_func) +{ + int ret; + + trace_rdev_add_nan_func(&rdev->wiphy, wdev, nan_func); + ret = rdev->ops->add_nan_func(&rdev->wiphy, wdev, nan_func); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline void rdev_del_nan_func(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, u64 cookie) +{ + trace_rdev_del_nan_func(&rdev->wiphy, wdev, cookie); + rdev->ops->del_nan_func(&rdev->wiphy, wdev, cookie); + trace_rdev_return_void(&rdev->wiphy); +} + static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_acl_data *params) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 5f3370f4c6a2..56089843d619 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1916,6 +1916,45 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan, TP_ARGS(wiphy, wdev) ); +TRACE_EVENT(rdev_add_nan_func, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + const struct cfg80211_nan_func *func), + TP_ARGS(wiphy, wdev, func), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u8, func_type) + __field(u64, cookie) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->func_type = func->type; + __entry->cookie = func->cookie + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", type=%u, cookie=%llu", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->func_type, + __entry->cookie) +); + +TRACE_EVENT(rdev_del_nan_func, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + u64 cookie), + TP_ARGS(wiphy, wdev, cookie), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u64, cookie) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->cookie = cookie; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie=%llu", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie) +); + TRACE_EVENT(rdev_set_mac_acl, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_acl_data *params), diff --git a/net/wireless/util.c b/net/wireless/util.c index 7a2d46b0058a..8edce22d1b93 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1762,6 +1762,28 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, } EXPORT_SYMBOL(cfg80211_get_station); +void cfg80211_free_nan_func(struct cfg80211_nan_func *f) +{ + int i; + + if (!f) + return; + + kfree(f->serv_spec_info); + kfree(f->srf_bf); + kfree(f->srf_macs); + for (i = 0; i < f->num_rx_filters; i++) + kfree(f->rx_filters[i].filter); + + for (i = 0; i < f->num_tx_filters; i++) + kfree(f->tx_filters[i].filter); + + kfree(f->rx_filters); + kfree(f->tx_filters); + kfree(f); +} +EXPORT_SYMBOL(cfg80211_free_nan_func); + /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ const unsigned char rfc1042_header[] __aligned(2) = -- cgit v1.2.3 From a5a9dcf291e1e541243878eed2d73a74006fa1f1 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:16 +0300 Subject: cfg80211: allow the user space to change current NAN configuration Some NAN configuration paramaters may change during the operation of the NAN device. For example, a user may want to update master preference value when the device gets plugged/unplugged to the power. Add API that allows to do so. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 19 +++++++++++++++++++ include/uapi/linux/nl80211.h | 11 +++++++++-- net/wireless/nl80211.c | 42 ++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 17 +++++++++++++++++ net/wireless/trace.h | 24 ++++++++++++++++++++++++ 5 files changed, 111 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2f35ccf6da83..8574a57e19ba 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2326,6 +2326,18 @@ struct cfg80211_nan_conf { u8 dual; }; +/** + * enum cfg80211_nan_conf_changes - indicates changed fields in NAN + * configuration + * + * @CFG80211_NAN_CONF_CHANGED_PREF: master preference + * @CFG80211_NAN_CONF_CHANGED_DUAL: dual band operation + */ +enum cfg80211_nan_conf_changes { + CFG80211_NAN_CONF_CHANGED_PREF = BIT(0), + CFG80211_NAN_CONF_CHANGED_DUAL = BIT(1), +}; + /** * struct cfg80211_nan_func_filter - a NAN function Rx / Tx filter * @@ -2691,6 +2703,9 @@ struct cfg80211_nan_func { * On success the driver should assign an instance_id in the * provided @nan_func. * @del_nan_func: Delete a NAN function. + * @nan_change_conf: changes NAN configuration. The changed parameters must + * be specified in @changes (using &enum cfg80211_nan_conf_changes); + * All other parameters must be ignored. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2963,6 +2978,10 @@ struct cfg80211_ops { struct cfg80211_nan_func *nan_func); void (*del_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie); + int (*nan_change_conf)(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, + u32 changes); }; /* diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e4935d963061..9c9c0c352873 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -862,6 +862,10 @@ * the response to this command. * Look at %NL80211_ATTR_SOCKET_OWNER as well. * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie. + * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN configuration. NAN + * must be operational (%NL80211_CMD_START_NAN was executed). + * It must contain at least one of the following attributes: + * %NL80211_ATTR_NAN_MASTER_PREF, %NL80211_ATTR_NAN_DUAL. * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -1055,6 +1059,7 @@ enum nl80211_commands { NL80211_CMD_STOP_NAN, NL80211_CMD_ADD_NAN_FUNCTION, NL80211_CMD_DEL_NAN_FUNCTION, + NL80211_CMD_CHANGE_NAN_CONFIG, /* add new commands above here */ @@ -1910,12 +1915,14 @@ enum nl80211_commands { * used to pull the stored data for mesh peer in power save state. * * @NL80211_ATTR_NAN_MASTER_PREF: the master preference to be used by - * %NL80211_CMD_START_NAN. Its type is u8 and it can't be 0. + * %NL80211_CMD_START_NAN and optionally with + * %NL80211_CMD_CHANGE_NAN_CONFIG. Its type is u8 and it can't be 0. * Also, values 1 and 255 are reserved for certification purposes and * should not be used during a normal device operation. * @NL80211_ATTR_NAN_DUAL: NAN dual band operation config (see * &enum nl80211_nan_dual_band_conf). This attribute is used with - * %NL80211_CMD_START_NAN. + * %NL80211_CMD_START_NAN and optionally with + * %NL80211_CMD_CHANGE_NAN_CONFIG. * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See * &enum nl80211_nan_func_attributes for description of this nested * attribute. diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0eca59ccd685..c0b5ae4af2d8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10919,6 +10919,40 @@ static int nl80211_nan_del_func(struct sk_buff *skb, return 0; } +static int nl80211_nan_change_config(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct cfg80211_nan_conf conf = {}; + u32 changed = 0; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev->nan_started) + return -ENOTCONN; + + if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) { + conf.master_pref = + nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); + if (conf.master_pref <= 1 || conf.master_pref == 255) + return -EINVAL; + + changed |= CFG80211_NAN_CONF_CHANGED_PREF; + } + + if (info->attrs[NL80211_ATTR_NAN_DUAL]) { + conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]); + changed |= CFG80211_NAN_CONF_CHANGED_DUAL; + } + + if (!changed) + return -EINVAL; + + return rdev_nan_change_conf(rdev, wdev, &conf, changed); +} + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { @@ -12292,6 +12326,14 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_CHANGE_NAN_CONFIG, + .doit = nl80211_nan_change_config, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, { .cmd = NL80211_CMD_SET_MCAST_RATE, .doit = nl80211_set_mcast_rate, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 98c4c3bdcb11..11cf83c8ad4f 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -928,6 +928,23 @@ static inline void rdev_del_nan_func(struct cfg80211_registered_device *rdev, trace_rdev_return_void(&rdev->wiphy); } +static inline int +rdev_nan_change_conf(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, u32 changes) +{ + int ret; + + trace_rdev_nan_change_conf(&rdev->wiphy, wdev, conf, changes); + if (rdev->ops->nan_change_conf) + ret = rdev->ops->nan_change_conf(&rdev->wiphy, wdev, conf, + changes); + else + ret = -ENOTSUPP; + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_acl_data *params) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 56089843d619..a3d0a91b1e09 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1911,6 +1911,30 @@ TRACE_EVENT(rdev_start_nan, __entry->dual) ); +TRACE_EVENT(rdev_nan_change_conf, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, u32 changes), + TP_ARGS(wiphy, wdev, conf, changes), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u8, master_pref) + __field(u8, dual); + __field(u32, changes); + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + __entry->changes = changes; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT + ", master preference: %u, dual: %d, changes: %x", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, + __entry->dual, __entry->changes) +); + DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev) -- cgit v1.2.3 From 50bcd31d9992e99c231820f5276e70346cbfbc51 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:17 +0300 Subject: cfg80211: provide a function to report a match for NAN Provide a function the driver can call to report a match. This will send the event to the user space. If the NAN instance is tied to the owner, the notifications will be sent to the socket that started the NAN interface only. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 37 ++++++++++++++++++++ include/uapi/linux/nl80211.h | 31 +++++++++++++++++ net/wireless/nl80211.c | 80 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8574a57e19ba..5481664b5389 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5694,6 +5694,43 @@ wiphy_ext_feature_isset(struct wiphy *wiphy, */ void cfg80211_free_nan_func(struct cfg80211_nan_func *f); +/** + * struct cfg80211_nan_match_params - NAN match parameters + * @type: the type of the function that triggered a match. If it is + * %NL80211_NAN_FUNC_SUBSCRIBE it means that we replied to a subscriber. + * If it is %NL80211_NAN_FUNC_PUBLISH, it means that we got a discovery + * result. + * If it is %NL80211_NAN_FUNC_FOLLOW_UP, we received a follow up. + * @inst_id: the local instance id + * @peer_inst_id: the instance id of the peer's function + * @addr: the MAC address of the peer + * @info_len: the length of the &info + * @info: the Service Specific Info from the peer (if any) + * @cookie: unique identifier of the corresponding function + */ +struct cfg80211_nan_match_params { + enum nl80211_nan_function_type type; + u8 inst_id; + u8 peer_inst_id; + const u8 *addr; + u8 info_len; + const u8 *info; + u64 cookie; +}; + +/** + * cfg80211_nan_match - report a match for a NAN function. + * @wdev: the wireless device reporting the match + * @match: match notification parameters + * @gfp: allocation flags + * + * This function reports that the a NAN function had a match. This + * can be a subscribe that had a match or a solicited publish that + * was sent. It can also be a follow up that was received. + */ +void cfg80211_nan_match(struct wireless_dev *wdev, + struct cfg80211_nan_match_params *match, gfp_t gfp); + /* ethtool helper */ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9c9c0c352873..995bf802d604 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -48,6 +48,7 @@ #define NL80211_MULTICAST_GROUP_REG "regulatory" #define NL80211_MULTICAST_GROUP_MLME "mlme" #define NL80211_MULTICAST_GROUP_VENDOR "vendor" +#define NL80211_MULTICAST_GROUP_NAN "nan" #define NL80211_MULTICAST_GROUP_TESTMODE "testmode" /** @@ -866,6 +867,9 @@ * must be operational (%NL80211_CMD_START_NAN was executed). * It must contain at least one of the following attributes: * %NL80211_ATTR_NAN_MASTER_PREF, %NL80211_ATTR_NAN_DUAL. + * @NL80211_CMD_NAN_FUNC_MATCH: Notification sent when a match is reported. + * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and + * %NL80211_ATTR_COOKIE. * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -1060,6 +1064,7 @@ enum nl80211_commands { NL80211_CMD_ADD_NAN_FUNCTION, NL80211_CMD_DEL_NAN_FUNCTION, NL80211_CMD_CHANGE_NAN_CONFIG, + NL80211_CMD_NAN_MATCH, /* add new commands above here */ @@ -1926,6 +1931,8 @@ enum nl80211_commands { * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See * &enum nl80211_nan_func_attributes for description of this nested * attribute. + * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute. + * See &enum nl80211_nan_match_attributes. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2324,6 +2331,7 @@ enum nl80211_attrs { NL80211_ATTR_NAN_MASTER_PREF, NL80211_ATTR_NAN_DUAL, NL80211_ATTR_NAN_FUNC, + NL80211_ATTR_NAN_MATCH, /* add attributes here, update the policy in nl80211.c */ @@ -5074,4 +5082,27 @@ enum nl80211_nan_srf_attributes { NL80211_NAN_SRF_ATTR_MAX = NUM_NL80211_NAN_SRF_ATTR - 1, }; +/** + * enum nl80211_nan_match_attributes - NAN match attributes + * @__NL80211_NAN_MATCH_INVALID: invalid + * @NL80211_NAN_MATCH_FUNC_LOCAL: the local function that had the + * match. This is a nested attribute. + * See &enum nl80211_nan_func_attributes. + * @NL80211_NAN_MATCH_FUNC_PEER: the peer function + * that caused the match. This is a nested attribute. + * See &enum nl80211_nan_func_attributes. + * + * @NUM_NL80211_NAN_MATCH_ATTR: internal + * @NL80211_NAN_MATCH_ATTR_MAX: highest NAN match attribute + */ +enum nl80211_nan_match_attributes { + __NL80211_NAN_MATCH_INVALID, + NL80211_NAN_MATCH_FUNC_LOCAL, + NL80211_NAN_MATCH_FUNC_PEER, + + /* keep last */ + NUM_NL80211_NAN_MATCH_ATTR, + NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1 +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c0b5ae4af2d8..0bbd9ed28318 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -56,6 +56,7 @@ enum nl80211_multicast_groups { NL80211_MCGRP_REGULATORY, NL80211_MCGRP_MLME, NL80211_MCGRP_VENDOR, + NL80211_MCGRP_NAN, NL80211_MCGRP_TESTMODE /* keep last - ifdef! */ }; @@ -65,6 +66,7 @@ static const struct genl_multicast_group nl80211_mcgrps[] = { [NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG }, [NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME }, [NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR }, + [NL80211_MCGRP_NAN] = { .name = NL80211_MULTICAST_GROUP_NAN }, #ifdef CONFIG_NL80211_TESTMODE [NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE } #endif @@ -10953,6 +10955,84 @@ static int nl80211_nan_change_config(struct sk_buff *skb, return rdev_nan_change_conf(rdev, wdev, &conf, changed); } +void cfg80211_nan_match(struct wireless_dev *wdev, + struct cfg80211_nan_match_params *match, gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct nlattr *match_attr, *local_func_attr, *peer_func_attr; + struct sk_buff *msg; + void *hdr; + + if (WARN_ON(!match->inst_id || !match->peer_inst_id || !match->addr)) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_MATCH); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, match->cookie, + NL80211_ATTR_PAD) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, match->addr)) + goto nla_put_failure; + + match_attr = nla_nest_start(msg, NL80211_ATTR_NAN_MATCH); + if (!match_attr) + goto nla_put_failure; + + local_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_LOCAL); + if (!local_func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->inst_id)) + goto nla_put_failure; + + nla_nest_end(msg, local_func_attr); + + peer_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_PEER); + if (!peer_func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_TYPE, match->type) || + nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->peer_inst_id)) + goto nla_put_failure; + + if (match->info && match->info_len && + nla_put(msg, NL80211_NAN_FUNC_SERVICE_INFO, match->info_len, + match->info)) + goto nla_put_failure; + + nla_nest_end(msg, peer_func_attr); + nla_nest_end(msg, match_attr); + genlmsg_end(msg, hdr); + + if (!wdev->owner_nlportid) + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), + msg, 0, NL80211_MCGRP_NAN, gfp); + else + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, + wdev->owner_nlportid); + + return; + +nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_match); + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { -- cgit v1.2.3 From 368e5a7b4ecb71b3d347799cb9351b0dce5dec70 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:18 +0300 Subject: cfg80211: Provide an API to report NAN function termination Provide a function that reports NAN DE function termination. The function may be terminated due to one of the following reasons: user request, ttl expiration or failure. If the NAN instance is tied to the owner, the notification will be sent to the socket that started the NAN interface only Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 16 ++++++++++++ include/uapi/linux/nl80211.h | 18 +++++++++++++ net/wireless/nl80211.c | 60 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5481664b5389..fe78f02a242e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5731,6 +5731,22 @@ struct cfg80211_nan_match_params { void cfg80211_nan_match(struct wireless_dev *wdev, struct cfg80211_nan_match_params *match, gfp_t gfp); +/** + * cfg80211_nan_func_terminated - notify about NAN function termination. + * + * @wdev: the wireless device reporting the match + * @inst_id: the local instance id + * @reason: termination reason (one of the NL80211_NAN_FUNC_TERM_REASON_*) + * @cookie: unique NAN function identifier + * @gfp: allocation flags + * + * This function reports that the a NAN function is terminated. + */ +void cfg80211_nan_func_terminated(struct wireless_dev *wdev, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + u64 cookie, gfp_t gfp); + /* ethtool helper */ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 995bf802d604..56368e9b4622 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -863,6 +863,9 @@ * the response to this command. * Look at %NL80211_ATTR_SOCKET_OWNER as well. * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie. + * This command is also used as a notification sent when a NAN function is + * terminated. This will contain a %NL80211_ATTR_NAN_FUNC_INST_ID + * and %NL80211_ATTR_COOKIE attributes. * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN configuration. NAN * must be operational (%NL80211_CMD_START_NAN was executed). * It must contain at least one of the following attributes: @@ -4985,6 +4988,21 @@ enum nl80211_nan_publish_type { NL80211_NAN_UNSOLICITED_PUBLISH = 1 << 1, }; +/** + * enum nl80211_nan_func_term_reason - NAN functions termination reason + * + * Defines termination reasons of a NAN function + * + * @NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST: requested by user + * @NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED: timeout + * @NL80211_NAN_FUNC_TERM_REASON_ERROR: errored + */ +enum nl80211_nan_func_term_reason { + NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST, + NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED, + NL80211_NAN_FUNC_TERM_REASON_ERROR, +}; + #define NL80211_NAN_FUNC_SERVICE_ID_LEN 6 #define NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN 0xff #define NL80211_NAN_FUNC_SRF_MAX_LEN 0xff diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0bbd9ed28318..92eb6f0b9f3d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11033,6 +11033,66 @@ nla_put_failure: } EXPORT_SYMBOL(cfg80211_nan_match); +void cfg80211_nan_func_terminated(struct wireless_dev *wdev, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + u64 cookie, gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + struct nlattr *func_attr; + void *hdr; + + if (WARN_ON(!inst_id)) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_NAN_FUNCTION); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD)) + goto nla_put_failure; + + func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC); + if (!func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, inst_id) || + nla_put_u8(msg, NL80211_NAN_FUNC_TERM_REASON, reason)) + goto nla_put_failure; + + nla_nest_end(msg, func_attr); + genlmsg_end(msg, hdr); + + if (!wdev->owner_nlportid) + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), + msg, 0, NL80211_MCGRP_NAN, gfp); + else + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, + wdev->owner_nlportid); + + return; + +nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_func_terminated); + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { -- cgit v1.2.3 From 5953ff6d6a3e92dd4f8d9d8e8a9359d7e180ae93 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:19 +0300 Subject: mac80211: implement nan_change_conf Implement nan_change_conf callback which allows to change current NAN configuration (master preference and dual band operation). Store the current NAN configuration in sdata, so it can be used both to provide the driver the updated configuration with changes and also it will be used in hw reconfig flows in next patches. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 9 +++++++++ net/mac80211/cfg.c | 31 +++++++++++++++++++++++++++++++ net/mac80211/driver-ops.h | 21 +++++++++++++++++++++ net/mac80211/ieee80211_i.h | 10 ++++++++++ net/mac80211/trace.h | 31 +++++++++++++++++++++++++++++++ 5 files changed, 102 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index df9b5cff300c..ef8d02a2ce1a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3423,6 +3423,12 @@ enum ieee80211_reconfig_type { * * @start_nan: join an existing NAN cluster, or create a new one. * @stop_nan: leave the NAN cluster. + * @nan_change_conf: change NAN configuration. The data in cfg80211_nan_conf + * contains full new configuration and changes specify which parameters + * are changed with respect to the last NAN config. + * The driver gets both full configuration and the changed parameters since + * some devices may need the full configuration while others need only the + * changed parameters. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -3664,6 +3670,9 @@ struct ieee80211_ops { struct cfg80211_nan_conf *conf); int (*stop_nan)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); + int (*nan_change_conf)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct cfg80211_nan_conf *conf, u32 changes); }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9aabb0932d24..38fdb539cab3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -186,6 +186,36 @@ static void ieee80211_stop_nan(struct wiphy *wiphy, ieee80211_sdata_stop(sdata); } +static int ieee80211_nan_change_conf(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, + u32 changes) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct cfg80211_nan_conf new_conf; + int ret = 0; + + if (sdata->vif.type != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!ieee80211_sdata_running(sdata)) + return -ENETDOWN; + + new_conf = sdata->u.nan.conf; + + if (changes & CFG80211_NAN_CONF_CHANGED_PREF) + new_conf.master_pref = conf->master_pref; + + if (changes & CFG80211_NAN_CONF_CHANGED_DUAL) + new_conf.dual = conf->dual; + + ret = drv_nan_change_conf(sdata->local, sdata, &new_conf, changes); + if (!ret) + sdata->u.nan.conf = new_conf; + + return ret; +} + static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) @@ -3500,4 +3530,5 @@ const struct cfg80211_ops mac80211_config_ops = { .del_tx_ts = ieee80211_del_tx_ts, .start_nan = ieee80211_start_nan, .stop_nan = ieee80211_stop_nan, + .nan_change_conf = ieee80211_nan_change_conf, }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index e52cfb855bd9..daaa409bec6f 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1192,4 +1192,25 @@ static inline void drv_stop_nan(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline int drv_nan_change_conf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf, + u32 changes) +{ + int ret; + + might_sleep(); + check_sdata_in_driver(sdata); + + if (!local->ops->nan_change_conf) + return -EOPNOTSUPP; + + trace_drv_nan_change_conf(local, sdata, conf, changes); + ret = local->ops->nan_change_conf(&local->hw, &sdata->vif, conf, + changes); + trace_drv_return_int(local, ret); + + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c71c73594790..712b20b05660 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -830,6 +830,15 @@ struct ieee80211_if_mntr { u8 mu_follow_addr[ETH_ALEN] __aligned(2); }; +/** + * struct ieee80211_if_nan - NAN state + * + * @conf: current NAN configuration + */ +struct ieee80211_if_nan { + struct cfg80211_nan_conf conf; +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -929,6 +938,7 @@ struct ieee80211_sub_if_data { struct ieee80211_if_mesh mesh; struct ieee80211_if_ocb ocb; struct ieee80211_if_mntr mntr; + struct ieee80211_if_nan nan; } u; #ifdef CONFIG_MAC80211_DEBUGFS diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index deefbfb9f6fb..0bafe1159d01 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1750,6 +1750,37 @@ TRACE_EVENT(drv_stop_nan, ) ); +TRACE_EVENT(drv_nan_change_conf, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf, + u32 changes), + + TP_ARGS(local, sdata, conf, changes), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, master_pref) + __field(u8, dual) + __field(u32, changes) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + __entry->changes = changes; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", master preference: %u, dual: %d, changes: 0x%x", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, + __entry->dual, __entry->changes + ) +); + /* * Tracing for API calls that drivers call. */ -- cgit v1.2.3 From 167e33f4f68cc8e4e3bdaf6d43641176c51f2d79 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:20 +0300 Subject: mac80211: Implement add_nan_func and rm_nan_func Implement add/rm_nan_func functions and handle NAN function termination notifications. Handle instance_id allocation for NAN functions and implement the reconfig flow. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 31 ++++++++++++ net/mac80211/cfg.c | 114 +++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/driver-ops.h | 32 +++++++++++++ net/mac80211/ieee80211_i.h | 7 +++ net/mac80211/iface.c | 20 +++++++- net/mac80211/main.c | 3 ++ net/mac80211/trace.h | 52 +++++++++++++++++++++ net/mac80211/util.c | 48 ++++++++++++++++++- 8 files changed, 304 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ef8d02a2ce1a..d4ddf476dc76 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2177,6 +2177,8 @@ enum ieee80211_hw_flags { * @n_cipher_schemes: a size of an array of cipher schemes definitions. * @cipher_schemes: a pointer to an array of cipher scheme definitions * supported by HW. + * @max_nan_de_entries: maximum number of NAN DE functions supported by the + * device. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -2211,6 +2213,7 @@ struct ieee80211_hw { u8 uapsd_max_sp_len; u8 n_cipher_schemes; const struct ieee80211_cipher_scheme *cipher_schemes; + u8 max_nan_de_entries; }; static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, @@ -3429,6 +3432,12 @@ enum ieee80211_reconfig_type { * The driver gets both full configuration and the changed parameters since * some devices may need the full configuration while others need only the * changed parameters. + * @add_nan_func: Add a NAN function. Returns 0 on success. The data in + * cfg80211_nan_func must not be referenced outside the scope of + * this call. + * @del_nan_func: Remove a NAN function. The driver must call + * ieee80211_nan_func_terminated() with + * NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST reason code upon removal. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -3673,6 +3682,12 @@ struct ieee80211_ops { int (*nan_change_conf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct cfg80211_nan_conf *conf, u32 changes); + int (*add_nan_func)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + const struct cfg80211_nan_func *nan_func); + void (*del_nan_func)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u8 instance_id); }; /** @@ -5746,4 +5761,20 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, void ieee80211_txq_get_depth(struct ieee80211_txq *txq, unsigned long *frame_cnt, unsigned long *byte_cnt); + +/** + * ieee80211_nan_func_terminated - notify about NAN function termination. + * + * This function is used to notify mac80211 about NAN function termination. + * Note that this function can't be called from hard irq. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @inst_id: the local instance id + * @reason: termination reason (one of the NL80211_NAN_FUNC_TERM_REASON_*) + * @gfp: allocation flags + */ +void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + gfp_t gfp); #endif /* MAC80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 38fdb539cab3..72ddb4379319 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -174,6 +174,8 @@ static int ieee80211_start_nan(struct wiphy *wiphy, if (ret) ieee80211_sdata_stop(sdata); + sdata->u.nan.conf = *conf; + return ret; } @@ -216,6 +218,84 @@ static int ieee80211_nan_change_conf(struct wiphy *wiphy, return ret; } +static int ieee80211_add_nan_func(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_func *nan_func) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int ret; + + if (sdata->vif.type != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!ieee80211_sdata_running(sdata)) + return -ENETDOWN; + + spin_lock_bh(&sdata->u.nan.func_lock); + + ret = idr_alloc(&sdata->u.nan.function_inst_ids, + nan_func, 1, sdata->local->hw.max_nan_de_entries + 1, + GFP_ATOMIC); + spin_unlock_bh(&sdata->u.nan.func_lock); + + if (ret < 0) + return ret; + + nan_func->instance_id = ret; + + WARN_ON(nan_func->instance_id == 0); + + ret = drv_add_nan_func(sdata->local, sdata, nan_func); + if (ret) { + spin_lock_bh(&sdata->u.nan.func_lock); + idr_remove(&sdata->u.nan.function_inst_ids, + nan_func->instance_id); + spin_unlock_bh(&sdata->u.nan.func_lock); + } + + return ret; +} + +static struct cfg80211_nan_func * +ieee80211_find_nan_func_by_cookie(struct ieee80211_sub_if_data *sdata, + u64 cookie) +{ + struct cfg80211_nan_func *func; + int id; + + lockdep_assert_held(&sdata->u.nan.func_lock); + + idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) { + if (func->cookie == cookie) + return func; + } + + return NULL; +} + +static void ieee80211_del_nan_func(struct wiphy *wiphy, + struct wireless_dev *wdev, u64 cookie) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct cfg80211_nan_func *func; + u8 instance_id = 0; + + if (sdata->vif.type != NL80211_IFTYPE_NAN || + !ieee80211_sdata_running(sdata)) + return; + + spin_lock_bh(&sdata->u.nan.func_lock); + + func = ieee80211_find_nan_func_by_cookie(sdata, cookie); + if (func) + instance_id = func->instance_id; + + spin_unlock_bh(&sdata->u.nan.func_lock); + + if (instance_id) + drv_del_nan_func(sdata->local, sdata, instance_id); +} + static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) @@ -3443,6 +3523,38 @@ static int ieee80211_del_tx_ts(struct wiphy *wiphy, struct net_device *dev, return -ENOENT; } +void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct cfg80211_nan_func *func; + u64 cookie; + + if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) + return; + + spin_lock_bh(&sdata->u.nan.func_lock); + + func = idr_find(&sdata->u.nan.function_inst_ids, inst_id); + if (WARN_ON(!func)) { + spin_unlock_bh(&sdata->u.nan.func_lock); + return; + } + + cookie = func->cookie; + idr_remove(&sdata->u.nan.function_inst_ids, inst_id); + + spin_unlock_bh(&sdata->u.nan.func_lock); + + cfg80211_free_nan_func(func); + + cfg80211_nan_func_terminated(ieee80211_vif_to_wdev(vif), inst_id, + reason, cookie, gfp); +} +EXPORT_SYMBOL(ieee80211_nan_func_terminated); + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -3531,4 +3643,6 @@ const struct cfg80211_ops mac80211_config_ops = { .start_nan = ieee80211_start_nan, .stop_nan = ieee80211_stop_nan, .nan_change_conf = ieee80211_nan_change_conf, + .add_nan_func = ieee80211_add_nan_func, + .del_nan_func = ieee80211_del_nan_func, }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index daaa409bec6f..dea92c33b2ca 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1213,4 +1213,36 @@ static inline int drv_nan_change_conf(struct ieee80211_local *local, return ret; } +static inline int drv_add_nan_func(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const struct cfg80211_nan_func *nan_func) +{ + int ret; + + might_sleep(); + check_sdata_in_driver(sdata); + + if (!local->ops->add_nan_func) + return -EOPNOTSUPP; + + trace_drv_add_nan_func(local, sdata, nan_func); + ret = local->ops->add_nan_func(&local->hw, &sdata->vif, nan_func); + trace_drv_return_int(local, ret); + + return ret; +} + +static inline void drv_del_nan_func(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u8 instance_id) +{ + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_del_nan_func(local, sdata, instance_id); + if (local->ops->del_nan_func) + local->ops->del_nan_func(&local->hw, &sdata->vif, instance_id); + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 712b20b05660..2b391f242e58 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -86,6 +86,8 @@ struct ieee80211_local; #define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */) +#define IEEE80211_MAX_NAN_INSTANCE_ID 255 + struct ieee80211_fragment_entry { struct sk_buff_head skb_list; unsigned long first_frag_time; @@ -834,9 +836,14 @@ struct ieee80211_if_mntr { * struct ieee80211_if_nan - NAN state * * @conf: current NAN configuration + * @func_ids: a bitmap of available instance_id's */ struct ieee80211_if_nan { struct cfg80211_nan_conf conf; + + /* protects function_inst_ids */ + spinlock_t func_lock; + struct idr function_inst_ids; }; struct ieee80211_sub_if_data { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 507f46a8eb1c..638ec0759078 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -798,6 +798,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, struct ps_data *ps; struct cfg80211_chan_def chandef; bool cancel_scan; + struct cfg80211_nan_func *func; clear_bit(SDATA_STATE_RUNNING, &sdata->state); @@ -950,11 +951,22 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_adjust_monitor_flags(sdata, -1); break; + case NL80211_IFTYPE_NAN: + /* clean all the functions */ + spin_lock_bh(&sdata->u.nan.func_lock); + + idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, i) { + idr_remove(&sdata->u.nan.function_inst_ids, i); + cfg80211_free_nan_func(func); + } + idr_destroy(&sdata->u.nan.function_inst_ids); + + spin_unlock_bh(&sdata->u.nan.func_lock); + break; case NL80211_IFTYPE_P2P_DEVICE: /* relies on synchronize_rcu() below */ RCU_INIT_POINTER(local->p2p_sdata, NULL); /* fall through */ - case NL80211_IFTYPE_NAN: default: cancel_work_sync(&sdata->work); /* @@ -1462,9 +1474,13 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_WDS: sdata->vif.bss_conf.bssid = NULL; break; + case NL80211_IFTYPE_NAN: + idr_init(&sdata->u.nan.function_inst_ids); + spin_lock_init(&sdata->u.nan.func_lock); + sdata->vif.bss_conf.bssid = sdata->vif.addr; + break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: - case NL80211_IFTYPE_NAN: sdata->vif.bss_conf.bssid = sdata->vif.addr; break; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/mac80211/main.c b/net/mac80211/main.c index b5cf2c5cc166..1075ac24c8c5 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1063,6 +1063,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->dynamic_ps_forced_timeout = -1; + if (!local->hw.max_nan_de_entries) + local->hw.max_nan_de_entries = IEEE80211_MAX_NAN_INSTANCE_ID; + result = ieee80211_wep_init(local); if (result < 0) wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 0bafe1159d01..37891fa67e9a 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1781,6 +1781,58 @@ TRACE_EVENT(drv_nan_change_conf, ) ); +TRACE_EVENT(drv_add_nan_func, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const struct cfg80211_nan_func *func), + + TP_ARGS(local, sdata, func), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, type) + __field(u8, inst_id) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->type = func->type; + __entry->inst_id = func->instance_id; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", type: %u, inst_id: %u", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->type, __entry->inst_id + ) +); + +TRACE_EVENT(drv_del_nan_func, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u8 instance_id), + + TP_ARGS(local, sdata, instance_id), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, instance_id) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->instance_id = instance_id; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", instance_id: %u", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->instance_id + ) +); + /* * Tracing for API calls that drivers call. */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 5b57fcaaec9b..91754c8dafb2 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1749,6 +1749,46 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->sta_mtx); } +static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) +{ + struct cfg80211_nan_func *func, **funcs; + int res, id, i = 0; + + res = drv_start_nan(sdata->local, sdata, + &sdata->u.nan.conf); + if (WARN_ON(res)) + return res; + + funcs = kzalloc((sdata->local->hw.max_nan_de_entries + 1) * + sizeof(*funcs), GFP_KERNEL); + if (!funcs) + return -ENOMEM; + + /* Add all the functions: + * This is a little bit ugly. We need to call a potentially sleeping + * callback for each NAN function, so we can't hold the spinlock. + */ + spin_lock_bh(&sdata->u.nan.func_lock); + + idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) + funcs[i++] = func; + + spin_unlock_bh(&sdata->u.nan.func_lock); + + for (i = 0; funcs[i]; i++) { + res = drv_add_nan_func(sdata->local, sdata, funcs[i]); + if (WARN_ON(res)) + ieee80211_nan_func_terminated(&sdata->vif, + funcs[i]->instance_id, + NL80211_NAN_FUNC_TERM_REASON_ERROR, + GFP_KERNEL); + } + + kfree(funcs); + + return 0; +} + int ieee80211_reconfig(struct ieee80211_local *local) { struct ieee80211_hw *hw = &local->hw; @@ -1972,11 +2012,17 @@ int ieee80211_reconfig(struct ieee80211_local *local) ieee80211_bss_info_change_notify(sdata, changed); } break; + case NL80211_IFTYPE_NAN: + res = ieee80211_reconfig_nan(sdata); + if (res < 0) { + ieee80211_handle_reconfig_failure(local); + return res; + } + break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: - case NL80211_IFTYPE_NAN: /* nothing to do */ break; case NL80211_IFTYPE_UNSPECIFIED: -- cgit v1.2.3 From 92bc43bce2849c814cece258694f167d28524fbd Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 20 Sep 2016 17:31:21 +0300 Subject: mac80211: Add API to report NAN function match Provide an API to report NAN function match. Mac80211 will lookup the corresponding cookie and report the match to cfg80211. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 16 ++++++++++++++++ net/mac80211/cfg.c | 25 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d4ddf476dc76..fc589ba90a48 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5777,4 +5777,20 @@ void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, u8 inst_id, enum nl80211_nan_func_term_reason reason, gfp_t gfp); + +/** + * ieee80211_nan_func_match - notify about NAN function match event. + * + * This function is used to notify mac80211 about NAN function match. The + * cookie inside the match struct will be assigned by mac80211. + * Note that this function can't be called from hard irq. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @match: match event information + * @gfp: allocation flags + */ +void ieee80211_nan_func_match(struct ieee80211_vif *vif, + struct cfg80211_nan_match_params *match, + gfp_t gfp); + #endif /* MAC80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 72ddb4379319..fd6541f3ade3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3555,6 +3555,31 @@ void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, } EXPORT_SYMBOL(ieee80211_nan_func_terminated); +void ieee80211_nan_func_match(struct ieee80211_vif *vif, + struct cfg80211_nan_match_params *match, + gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct cfg80211_nan_func *func; + + if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) + return; + + spin_lock_bh(&sdata->u.nan.func_lock); + + func = idr_find(&sdata->u.nan.function_inst_ids, match->inst_id); + if (WARN_ON(!func)) { + spin_unlock_bh(&sdata->u.nan.func_lock); + return; + } + match->cookie = func->cookie; + + spin_unlock_bh(&sdata->u.nan.func_lock); + + cfg80211_nan_match(ieee80211_vif_to_wdev(vif), match, gfp); +} +EXPORT_SYMBOL(ieee80211_nan_func_match); + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, -- cgit v1.2.3 From 097b065b5cbfa3fd57b47f3c86d6baa96c30bf31 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Fri, 23 Sep 2016 21:59:09 +0200 Subject: fq.h: Port memory limit mechanism from fq_codel The reusable fairness queueing implementation (fq.h) lacks the memory usage limit that the fq_codel qdisc has. This means that small devices (e.g. WiFi routers) can run out of memory when flooded with a large number of packets. This ports the memory limit feature from fq_codel to fq.h. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- include/net/fq.h | 3 +++ include/net/fq_impl.h | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/fq.h b/include/net/fq.h index 268b49049c37..6d8521a30c5c 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -72,9 +72,12 @@ struct fq { u32 flows_cnt; u32 perturbation; u32 limit; + u32 memory_limit; + u32 memory_usage; u32 quantum; u32 backlog; u32 overlimit; + u32 overmemory; u32 collisions; }; diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 163f3ed0f05a..4e6131cd3f43 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -29,6 +29,7 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq, tin->backlog_packets--; flow->backlog -= skb->len; fq->backlog--; + fq->memory_usage -= skb->truesize; if (flow->backlog == 0) { list_del_init(&flow->backlogchain); @@ -154,6 +155,7 @@ static void fq_tin_enqueue(struct fq *fq, flow->backlog += skb->len; tin->backlog_bytes += skb->len; tin->backlog_packets++; + fq->memory_usage += skb->truesize; fq->backlog++; fq_recalc_backlog(fq, tin, flow); @@ -166,7 +168,7 @@ static void fq_tin_enqueue(struct fq *fq, __skb_queue_tail(&flow->queue, skb); - if (fq->backlog > fq->limit) { + if (fq->backlog > fq->limit || fq->memory_usage > fq->memory_limit) { flow = list_first_entry_or_null(&fq->backlogs, struct fq_flow, backlogchain); @@ -181,6 +183,8 @@ static void fq_tin_enqueue(struct fq *fq, flow->tin->overlimit++; fq->overlimit++; + if (fq->memory_usage > fq->memory_limit) + fq->overmemory++; } } @@ -251,6 +255,7 @@ static int fq_init(struct fq *fq, int flows_cnt) fq->perturbation = prandom_u32(); fq->quantum = 300; fq->limit = 8192; + fq->memory_limit = 16 << 20; /* 16 MBytes */ fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); if (!fq->flows) -- cgit v1.2.3 From 354d381baf1126c45d03b5c0d87d22caf938b86b Mon Sep 17 00:00:00 2001 From: Pedersen, Thomas Date: Wed, 28 Sep 2016 16:56:28 -0700 Subject: mac80211: add offset_tsf driver op and use it for mesh This allows the mesh sync (and debugfs) code to make incremental TSF adjustments, avoiding any uncertainty introduced by delay in programming absolute TSF. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- include/net/mac80211.h | 8 ++++++++ net/mac80211/debugfs_netdev.c | 12 +++++++++--- net/mac80211/driver-ops.c | 15 +++++++++++++++ net/mac80211/driver-ops.h | 3 +++ net/mac80211/mesh_sync.c | 10 +++++++--- net/mac80211/trace.h | 26 ++++++++++++++++++++++++++ 6 files changed, 68 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fc589ba90a48..c9f39538ac17 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3169,6 +3169,12 @@ enum ieee80211_reconfig_type { * required function. * The callback can sleep. * + * @offset_tsf: Offset the TSF timer by the specified value in the + * firmware/hardware. Preferred to set_tsf as it avoids delay between + * calling set_tsf() and hardware getting programmed, which will show up + * as TSF delay. Is not a required function. + * The callback can sleep. + * * @reset_tsf: Reset the TSF timer and allow firmware/hardware to synchronize * with other STAs in the IBSS. This is only used in IBSS mode. This * function is optional if the firmware/hardware takes full care of @@ -3549,6 +3555,8 @@ struct ieee80211_ops { u64 (*get_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void (*set_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u64 tsf); + void (*offset_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + s64 offset); void (*reset_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*tx_last_beacon)(struct ieee80211_hw *hw); int (*ampdu_action)(struct ieee80211_hw *hw, diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 5d35c0f37bb7..bcec1240f41d 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -556,9 +556,15 @@ static ssize_t ieee80211_if_parse_tsf( ret = kstrtoull(buf, 10, &tsf); if (ret < 0) return ret; - if (tsf_is_delta) - tsf = drv_get_tsf(local, sdata) + tsf_is_delta * tsf; - if (local->ops->set_tsf) { + if (tsf_is_delta && local->ops->offset_tsf) { + drv_offset_tsf(local, sdata, tsf_is_delta * tsf); + wiphy_info(local->hw.wiphy, + "debugfs offset TSF by %018lld\n", + tsf_is_delta * tsf); + } else if (local->ops->set_tsf) { + if (tsf_is_delta) + tsf = drv_get_tsf(local, sdata) + + tsf_is_delta * tsf; drv_set_tsf(local, sdata, tsf); wiphy_info(local->hw.wiphy, "debugfs set TSF to %#018llx\n", tsf); diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index c701b6438bd9..bb886e7db47f 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -215,6 +215,21 @@ void drv_set_tsf(struct ieee80211_local *local, trace_drv_return_void(local); } +void drv_offset_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + s64 offset) +{ + might_sleep(); + + if (!check_sdata_in_driver(sdata)) + return; + + trace_drv_offset_tsf(local, sdata, offset); + if (local->ops->offset_tsf) + local->ops->offset_tsf(&local->hw, &sdata->vif, offset); + trace_drv_return_void(local); +} + void drv_reset_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index dea92c33b2ca..09f77e4a8a79 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -569,6 +569,9 @@ u64 drv_get_tsf(struct ieee80211_local *local, void drv_set_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u64 tsf); +void drv_offset_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + s64 offset); void drv_reset_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 64bc22ad9496..22ca43c500e4 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -70,9 +70,13 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata) } spin_unlock_bh(&ifmsh->sync_offset_lock); - tsf = drv_get_tsf(local, sdata); - if (tsf != -1ULL) - drv_set_tsf(local, sdata, tsf + tsfdelta); + if (local->ops->offset_tsf) { + drv_offset_tsf(local, sdata, tsfdelta); + } else { + tsf = drv_get_tsf(local, sdata); + if (tsf != -1ULL) + drv_set_tsf(local, sdata, tsf + tsfdelta); + } } static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 37891fa67e9a..92a47afaa989 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -984,6 +984,32 @@ TRACE_EVENT(drv_set_tsf, ) ); +TRACE_EVENT(drv_offset_tsf, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + s64 offset), + + TP_ARGS(local, sdata, offset), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(s64, tsf_offset) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->tsf_offset = offset; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT " tsf offset:%lld", + LOCAL_PR_ARG, VIF_PR_ARG, + (unsigned long long)__entry->tsf_offset + ) +); + DEFINE_EVENT(local_sdata_evt, drv_reset_tsf, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), -- cgit v1.2.3 From bb42f2d13ffcd0baed7547b37d05add51fcd50e1 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Thu, 22 Sep 2016 19:04:20 +0200 Subject: mac80211: Move reorder-sensitive TX handlers to after TXQ dequeue The TXQ intermediate queues can cause packet reordering when more than one flow is active to a single station. Since some of the wifi-specific packet handling (notably sequence number and encryption handling) is sensitive to re-ordering, things break if they are applied before the TXQ. This splits up the TX handlers and fast_xmit logic into two parts: An early part and a late part. The former is applied before TXQ enqueue, and the latter after dequeue. The non-TXQ path just applies both parts at once. Because fragments shouldn't be split up or reordered, the fragmentation handler is run after dequeue. Any fragments are then kept in the TXQ and on subsequent dequeues they take precedence over dequeueing from the FQ structure. This approach avoids having to scatter special cases all over the place for when TXQ is enabled, at the cost of making the fast_xmit and TX handler code slightly more complex. Signed-off-by: Toke Høiland-Jørgensen [fix a few code-style nits, make ieee80211_xmit_fast_finish void, remove a useless txq->sta check] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 + net/mac80211/ieee80211_i.h | 9 ++ net/mac80211/rx.c | 4 +- net/mac80211/sta_info.c | 10 +- net/mac80211/tx.c | 284 ++++++++++++++++++++++++++++++++------------- net/mac80211/util.c | 11 +- 6 files changed, 230 insertions(+), 90 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c9f39538ac17..a810dfcb83c2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -715,6 +715,7 @@ enum mac80211_tx_info_flags { * frame (PS-Poll or uAPSD). * @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame + * @IEEE80211_TX_CTRL_FAST_XMIT: This frame is going through the fast_xmit path * * These flags are used in tx_info->control.flags. */ @@ -723,6 +724,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_PS_RESPONSE = BIT(1), IEEE80211_TX_CTRL_RATE_INJECT = BIT(2), IEEE80211_TX_CTRL_AMSDU = BIT(3), + IEEE80211_TX_CTRL_FAST_XMIT = BIT(4), }; /* diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2b391f242e58..8f8bddd5c8d8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -815,12 +815,14 @@ enum txq_info_flags { * @def_flow: used as a fallback flow when a packet destined to @tin hashes to * a fq_flow which is already owned by a different tin * @def_cvars: codel vars for @def_flow + * @frags: used to keep fragments created after dequeue */ struct txq_info { struct fq_tin tin; struct fq_flow def_flow; struct codel_vars def_cvars; struct codel_stats cstats; + struct sk_buff_head frags; unsigned long flags; /* keep last! */ @@ -1498,6 +1500,13 @@ static inline struct txq_info *to_txq_info(struct ieee80211_txq *txq) return container_of(txq, struct txq_info, txq); } +static inline bool txq_has_queue(struct ieee80211_txq *txq) +{ + struct txq_info *txqi = to_txq_info(txq); + + return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets); +} + static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) { return ether_addr_equal(raddr, addr) || diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c9489a86e6d6..b2fe725881dc 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1323,9 +1323,7 @@ static void sta_ps_start(struct sta_info *sta) return; for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { - struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - - if (txqi->tin.backlog_packets) + if (txq_has_queue(sta->sta.txq[tid])) set_bit(tid, &sta->txq_buffered_tids); else clear_bit(tid, &sta->txq_buffered_tids); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 1b1b28ff4fdb..167bff078bdd 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1212,12 +1212,10 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) if (sta->sta.txq[0]) { for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { - struct txq_info *txqi = to_txq_info(sta->sta.txq[i]); - - if (!txqi->tin.backlog_packets) + if (!txq_has_queue(sta->sta.txq[i])) continue; - drv_wake_tx_queue(local, txqi); + drv_wake_tx_queue(local, to_txq_info(sta->sta.txq[i])); } } @@ -1649,9 +1647,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, return; for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { - struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - - if (!(tids & BIT(tid)) || txqi->tin.backlog_packets) + if (!(tids & BIT(tid)) || txq_has_queue(sta->sta.txq[tid])) continue; sta_info_recalc_tim(sta); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 378a7a6b6dbe..0ea1b0d02186 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -853,8 +853,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) tid = *qc & IEEE80211_QOS_CTL_TID_MASK; tx->sta->tx_stats.msdu[tid]++; - if (!tx->sta->sta.txq[0]) - hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); + hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); return TX_CONTINUE; } @@ -1404,6 +1403,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, fq_flow_init(&txqi->def_flow); codel_vars_init(&txqi->def_cvars); codel_stats_init(&txqi->cstats); + __skb_queue_head_init(&txqi->frags); txqi->txq.vif = &sdata->vif; @@ -1426,6 +1426,7 @@ void ieee80211_txq_purge(struct ieee80211_local *local, struct fq_tin *tin = &txqi->tin; fq_tin_reset(fq, tin, fq_skb_free_func); + ieee80211_purge_tx_queue(&local->hw, &txqi->frags); } int ieee80211_txq_setup_flows(struct ieee80211_local *local) @@ -1495,6 +1496,47 @@ void ieee80211_txq_teardown_flows(struct ieee80211_local *local) spin_unlock_bh(&fq->lock); } +static bool ieee80211_queue_skb(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct fq *fq = &local->fq; + struct ieee80211_vif *vif; + struct txq_info *txqi; + struct ieee80211_sta *pubsta; + + if (!local->ops->wake_tx_queue || + sdata->vif.type == NL80211_IFTYPE_MONITOR) + return false; + + if (sta && sta->uploaded) + pubsta = &sta->sta; + else + pubsta = NULL; + + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + + vif = &sdata->vif; + txqi = ieee80211_get_txq(local, vif, pubsta, skb); + + if (!txqi) + return false; + + info->control.vif = vif; + + spin_lock_bh(&fq->lock); + ieee80211_txq_enqueue(local, txqi, skb); + spin_unlock_bh(&fq->lock); + + drv_wake_tx_queue(local, txqi); + + return true; +} + static bool ieee80211_tx_frags(struct ieee80211_local *local, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -1502,9 +1544,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, bool txpending) { struct ieee80211_tx_control control = {}; - struct fq *fq = &local->fq; struct sk_buff *skb, *tmp; - struct txq_info *txqi; unsigned long flags; skb_queue_walk_safe(skbs, skb, tmp) { @@ -1519,21 +1559,6 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, } #endif - txqi = ieee80211_get_txq(local, vif, sta, skb); - if (txqi) { - info->control.vif = vif; - - __skb_unlink(skb, skbs); - - spin_lock_bh(&fq->lock); - ieee80211_txq_enqueue(local, txqi, skb); - spin_unlock_bh(&fq->lock); - - drv_wake_tx_queue(local, txqi); - - continue; - } - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); if (local->queue_stop_reasons[q] || (!txpending && !skb_queue_empty(&local->pending[q]))) { @@ -1654,10 +1679,13 @@ static bool __ieee80211_tx(struct ieee80211_local *local, /* * Invoke TX handlers, return 0 on success and non-zero if the * frame was dropped or queued. + * + * The handlers are split into an early and late part. The latter is everything + * that can be sensitive to reordering, and will be deferred to after packets + * are dequeued from the intermediate queues (when they are enabled). */ -static int invoke_tx_handlers(struct ieee80211_tx_data *tx) +static int invoke_tx_handlers_early(struct ieee80211_tx_data *tx) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); ieee80211_tx_result res = TX_DROP; #define CALL_TXH(txh) \ @@ -1675,6 +1703,31 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) CALL_TXH(ieee80211_tx_h_rate_ctrl); + txh_done: + if (unlikely(res == TX_DROP)) { + I802_DEBUG_INC(tx->local->tx_handlers_drop); + if (tx->skb) + ieee80211_free_txskb(&tx->local->hw, tx->skb); + else + ieee80211_purge_tx_queue(&tx->local->hw, &tx->skbs); + return -1; + } else if (unlikely(res == TX_QUEUED)) { + I802_DEBUG_INC(tx->local->tx_handlers_queued); + return -1; + } + + return 0; +} + +/* + * Late handlers can be called while the sta lock is held. Handlers that can + * cause packets to be generated will cause deadlock! + */ +static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + ieee80211_tx_result res = TX_CONTINUE; + if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) { __skb_queue_tail(&tx->skbs, tx->skb); tx->skb = NULL; @@ -1707,6 +1760,15 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) return 0; } +static int invoke_tx_handlers(struct ieee80211_tx_data *tx) +{ + int r = invoke_tx_handlers_early(tx); + + if (r) + return r; + return invoke_tx_handlers_late(tx); +} + bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct sk_buff *skb, int band, struct ieee80211_sta **sta) @@ -1781,7 +1843,13 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; - if (!invoke_tx_handlers(&tx)) + if (invoke_tx_handlers_early(&tx)) + return false; + + if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb)) + return true; + + if (!invoke_tx_handlers_late(&tx)) result = __ieee80211_tx(local, &tx.skbs, led_len, tx.sta, txpending); @@ -3125,8 +3193,71 @@ out: return ret; } +/* + * Can be called while the sta lock is held. Anything that can cause packets to + * be generated will cause deadlock! + */ +static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, u8 pn_offs, + struct ieee80211_key *key, + struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)skb->data; + u8 tid = IEEE80211_NUM_TIDS; + + if (key) + info->control.hw_key = &key->conf; + + ieee80211_tx_stats(skb->dev, skb->len); + + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + *ieee80211_get_qos_ctl(hdr) = tid; + hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); + } else { + info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; + hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number); + sdata->sequence_number += 0x10; + } + + if (skb_shinfo(skb)->gso_size) + sta->tx_stats.msdu[tid] += + DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size); + else + sta->tx_stats.msdu[tid]++; + + info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; + + /* statistics normally done by ieee80211_tx_h_stats (but that + * has to consider fragmentation, so is more complex) + */ + sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; + sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; + + if (pn_offs) { + u64 pn; + u8 *crypto_hdr = skb->data + pn_offs; + + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn = atomic64_inc_return(&key->conf.tx_pn); + crypto_hdr[0] = pn; + crypto_hdr[1] = pn >> 8; + crypto_hdr[4] = pn >> 16; + crypto_hdr[5] = pn >> 24; + crypto_hdr[6] = pn >> 32; + crypto_hdr[7] = pn >> 40; + break; + } + } +} + static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, - struct net_device *dev, struct sta_info *sta, + struct sta_info *sta, struct ieee80211_fast_tx *fast_tx, struct sk_buff *skb) { @@ -3177,8 +3308,6 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, return true; } - ieee80211_tx_stats(dev, skb->len + extra_head); - if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) && ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb)) return true; @@ -3207,24 +3336,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT | IEEE80211_TX_CTL_DONTFRAG | (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0); - - if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { - *ieee80211_get_qos_ctl(hdr) = tid; - if (!sta->sta.txq[0]) - hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); - } else { - info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; - hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number); - sdata->sequence_number += 0x10; - } - - if (skb_shinfo(skb)->gso_size) - sta->tx_stats.msdu[tid] += - DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size); - else - sta->tx_stats.msdu[tid]++; - - info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; + info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT; __skb_queue_head_init(&tx.skbs); @@ -3234,9 +3346,6 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, tx.sta = sta; tx.key = fast_tx->key; - if (fast_tx->key) - info->control.hw_key = &fast_tx->key->conf; - if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { tx.skb = skb; r = ieee80211_tx_h_rate_ctrl(&tx); @@ -3250,31 +3359,11 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, } } - /* statistics normally done by ieee80211_tx_h_stats (but that - * has to consider fragmentation, so is more complex) - */ - sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; - sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; + if (ieee80211_queue_skb(local, sdata, sta, skb)) + return true; - if (fast_tx->pn_offs) { - u64 pn; - u8 *crypto_hdr = skb->data + fast_tx->pn_offs; - - switch (fast_tx->key->conf.cipher) { - case WLAN_CIPHER_SUITE_CCMP: - case WLAN_CIPHER_SUITE_CCMP_256: - case WLAN_CIPHER_SUITE_GCMP: - case WLAN_CIPHER_SUITE_GCMP_256: - pn = atomic64_inc_return(&fast_tx->key->conf.tx_pn); - crypto_hdr[0] = pn; - crypto_hdr[1] = pn >> 8; - crypto_hdr[4] = pn >> 16; - crypto_hdr[5] = pn >> 24; - crypto_hdr[6] = pn >> 32; - crypto_hdr[7] = pn >> 40; - break; - } - } + ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs, + fast_tx->key, skb); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, @@ -3294,12 +3383,21 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, struct sk_buff *skb = NULL; struct fq *fq = &local->fq; struct fq_tin *tin = &txqi->tin; + struct ieee80211_tx_info *info; + struct ieee80211_tx_data tx; + ieee80211_tx_result r; spin_lock_bh(&fq->lock); if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags)) goto out; + /* Make sure fragments stay together. */ + skb = __skb_dequeue(&txqi->frags); + if (skb) + goto out; + +begin: skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func); if (!skb) goto out; @@ -3307,16 +3405,46 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, ieee80211_set_skb_vif(skb, txqi); hdr = (struct ieee80211_hdr *)skb->data; - if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) { + info = IEEE80211_SKB_CB(skb); + + memset(&tx, 0, sizeof(tx)); + __skb_queue_head_init(&tx.skbs); + tx.local = local; + tx.skb = skb; + tx.sdata = vif_to_sdata(info->control.vif); + + if (txq->sta) + tx.sta = container_of(txq->sta, struct sta_info, sta); + + /* + * The key can be removed while the packet was queued, so need to call + * this here to get the current key. + */ + r = ieee80211_tx_h_select_key(&tx); + if (r != TX_CONTINUE) { + ieee80211_free_txskb(&local->hw, skb); + goto begin; + } + + if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) { struct sta_info *sta = container_of(txq->sta, struct sta_info, sta); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + u8 pn_offs = 0; - hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid); - if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags)) - info->flags |= IEEE80211_TX_CTL_AMPDU; - else - info->flags &= ~IEEE80211_TX_CTL_AMPDU; + if (tx.key && + (tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) + pn_offs = ieee80211_hdrlen(hdr->frame_control); + + ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs, + tx.key, skb); + } else { + if (invoke_tx_handlers_late(&tx)) + goto begin; + + skb = __skb_dequeue(&tx.skbs); + + if (!skb_queue_empty(&tx.skbs)) + skb_queue_splice_tail(&tx.skbs, &txqi->frags); } out: @@ -3354,7 +3482,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, fast_tx = rcu_dereference(sta->fast_tx); if (fast_tx && - ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb)) + ieee80211_xmit_fast(sdata, sta, fast_tx, skb)) goto out; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 91754c8dafb2..545c79a42a77 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3441,11 +3441,18 @@ void ieee80211_txq_get_depth(struct ieee80211_txq *txq, unsigned long *byte_cnt) { struct txq_info *txqi = to_txq_info(txq); + u32 frag_cnt = 0, frag_bytes = 0; + struct sk_buff *skb; + + skb_queue_walk(&txqi->frags, skb) { + frag_cnt++; + frag_bytes += skb->len; + } if (frame_cnt) - *frame_cnt = txqi->tin.backlog_packets; + *frame_cnt = txqi->tin.backlog_packets + frag_cnt; if (byte_cnt) - *byte_cnt = txqi->tin.backlog_bytes; + *byte_cnt = txqi->tin.backlog_bytes + frag_bytes; } EXPORT_SYMBOL(ieee80211_txq_get_depth); -- cgit v1.2.3 From 9095e10edd28e1e4a10ba5ca61fb54d9f74f8968 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 30 Sep 2016 19:08:06 +0200 Subject: mpls: move mpls_hdr to a common location This will be also used by openvswitch. Signed-off-by: Jiri Benc Acked-by: David Ahern Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/mpls.h | 9 +++++++++ net/mpls/internal.h | 10 +--------- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/mpls.h b/include/net/mpls.h index 5b3b5addfb08..3ebbc0bb57ff 100644 --- a/include/net/mpls.h +++ b/include/net/mpls.h @@ -19,12 +19,21 @@ #define MPLS_HLEN 4 +struct mpls_shim_hdr { + __be32 label_stack_entry; +}; + static inline bool eth_p_mpls(__be16 eth_type) { return eth_type == htons(ETH_P_MPLS_UC) || eth_type == htons(ETH_P_MPLS_MC); } +static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) +{ + return (struct mpls_shim_hdr *)skb_network_header(skb); +} + /* * For non-MPLS skbs this will correspond to the network header. * For MPLS skbs it will be before the network_header as the MPLS diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 732a5c17e986..bdfef6c3271a 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -1,9 +1,6 @@ #ifndef MPLS_INTERNAL_H #define MPLS_INTERNAL_H - -struct mpls_shim_hdr { - __be32 label_stack_entry; -}; +#include struct mpls_entry_decoded { u32 label; @@ -93,11 +90,6 @@ struct mpls_route { /* next hop label forwarding entry */ #define endfor_nexthops(rt) } -static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) -{ - return (struct mpls_shim_hdr *)skb_network_header(skb); -} - static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos) { struct mpls_shim_hdr result; -- cgit v1.2.3 From 85de4a2101acb85c3b1dde465e84596ccca99f2c Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 30 Sep 2016 19:08:07 +0200 Subject: openvswitch: use mpls_hdr skb_mpls_header is equivalent to mpls_hdr now. Use the existing helper instead. Signed-off-by: Jiri Benc Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/mpls.h | 12 ------------ net/openvswitch/actions.c | 24 ++++++++++++------------ 2 files changed, 12 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/mpls.h b/include/net/mpls.h index 3ebbc0bb57ff..1dbc669b770e 100644 --- a/include/net/mpls.h +++ b/include/net/mpls.h @@ -33,16 +33,4 @@ static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) { return (struct mpls_shim_hdr *)skb_network_header(skb); } - -/* - * For non-MPLS skbs this will correspond to the network header. - * For MPLS skbs it will be before the network_header as the MPLS - * label stack lies between the end of the mac header and the network - * header. That is, for MPLS skbs the end of the mac header - * is the top of the MPLS label stack. - */ -static inline unsigned char *skb_mpls_header(struct sk_buff *skb) -{ - return skb_mac_header(skb) + skb->mac_len; -} #endif diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 863e992dfbc0..4e03f64709bc 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -160,7 +160,7 @@ static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_mpls *mpls) { - __be32 *new_mpls_lse; + struct mpls_shim_hdr *new_mpls_lse; /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */ if (skb->encapsulation) @@ -180,8 +180,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_reset_mac_header(skb); skb_set_network_header(skb, skb->mac_len); - new_mpls_lse = (__be32 *)skb_mpls_header(skb); - *new_mpls_lse = mpls->mpls_lse; + new_mpls_lse = mpls_hdr(skb); + new_mpls_lse->label_stack_entry = mpls->mpls_lse; skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); @@ -202,7 +202,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, if (unlikely(err)) return err; - skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN); + skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN); memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), skb->mac_len); @@ -211,10 +211,10 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, skb_reset_mac_header(skb); skb_set_network_header(skb, skb->mac_len); - /* skb_mpls_header() is used to locate the ethertype - * field correctly in the presence of VLAN tags. + /* mpls_hdr() is used to locate the ethertype field correctly in the + * presence of VLAN tags. */ - hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN); + hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); update_ethertype(skb, hdr, ethertype); if (eth_p_mpls(skb->protocol)) skb->protocol = ethertype; @@ -226,7 +226,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, const __be32 *mpls_lse, const __be32 *mask) { - __be32 *stack; + struct mpls_shim_hdr *stack; __be32 lse; int err; @@ -234,16 +234,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, if (unlikely(err)) return err; - stack = (__be32 *)skb_mpls_header(skb); - lse = OVS_MASKED(*stack, *mpls_lse, *mask); + stack = mpls_hdr(skb); + lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask); if (skb->ip_summed == CHECKSUM_COMPLETE) { - __be32 diff[] = { ~(*stack), lse }; + __be32 diff[] = { ~(stack->label_stack_entry), lse }; skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } - *stack = lse; + stack->label_stack_entry = lse; flow_key->mpls.top_lse = lse; return 0; } -- cgit v1.2.3 From c0cd1ba4f8bd8b5fef43bc51a2983673b8f086ff Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Oct 2016 11:25:53 +1100 Subject: net/ncsi: Introduce ncsi_stop_dev() This introduces ncsi_stop_dev(), as counterpart to ncsi_start_dev(), to stop the NCSI device so that it can be reenabled in future. This API should be called when the network device driver is going to shutdown the device. There are 3 things done in the function: Stop the channel monitoring; Reset channels to inactive state; Report NCSI link down. Signed-off-by: Gavin Shan Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- include/net/ncsi.h | 5 +++++ net/ncsi/ncsi-manage.c | 37 ++++++++++++++++++++++++------------- 2 files changed, 29 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/ncsi.h b/include/net/ncsi.h index 1dbf42f79750..68680baac0fd 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -31,6 +31,7 @@ struct ncsi_dev { struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*notifier)(struct ncsi_dev *nd)); int ncsi_start_dev(struct ncsi_dev *nd); +void ncsi_stop_dev(struct ncsi_dev *nd); void ncsi_unregister_dev(struct ncsi_dev *nd); #else /* !CONFIG_NET_NCSI */ static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev, @@ -44,6 +45,10 @@ static inline int ncsi_start_dev(struct ncsi_dev *nd) return -ENOTTY; } +static void ncsi_stop_dev(struct ncsi_dev *nd) +{ +} + static inline void ncsi_unregister_dev(struct ncsi_dev *nd) { } diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 4742c7c6c748..5e509e547c2d 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1187,11 +1187,7 @@ EXPORT_SYMBOL_GPL(ncsi_register_dev); int ncsi_start_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); - struct ncsi_package *np; - struct ncsi_channel *nc; - unsigned long flags; - bool chained; - int old_state, ret; + int ret; if (nd->state != ncsi_dev_state_registered && nd->state != ncsi_dev_state_functional) @@ -1203,9 +1199,29 @@ int ncsi_start_dev(struct ncsi_dev *nd) return 0; } - /* Reset channel's state and start over */ + if (ndp->flags & NCSI_DEV_HWA) + ret = ncsi_enable_hwa(ndp); + else + ret = ncsi_choose_active_channel(ndp); + + return ret; +} +EXPORT_SYMBOL_GPL(ncsi_start_dev); + +void ncsi_stop_dev(struct ncsi_dev *nd) +{ + struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); + struct ncsi_package *np; + struct ncsi_channel *nc; + bool chained; + int old_state; + unsigned long flags; + + /* Stop the channel monitor and reset channel's state */ NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + ncsi_stop_channel_monitor(nc); + spin_lock_irqsave(&nc->lock, flags); chained = !list_empty(&nc->link); old_state = nc->state; @@ -1217,14 +1233,9 @@ int ncsi_start_dev(struct ncsi_dev *nd) } } - if (ndp->flags & NCSI_DEV_HWA) - ret = ncsi_enable_hwa(ndp); - else - ret = ncsi_choose_active_channel(ndp); - - return ret; + ncsi_report_link(ndp, true); } -EXPORT_SYMBOL_GPL(ncsi_start_dev); +EXPORT_SYMBOL_GPL(ncsi_stop_dev); void ncsi_unregister_dev(struct ncsi_dev *nd) { -- cgit v1.2.3 From a44c984f1ef16229cdfd92326ad10118b1940ff9 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Sep 2016 10:08:58 +1000 Subject: netfilter: merge fixup for "nf_tables_netdev: remove redundant ip_hdr assignment" Signed-off-by: Stephen Rothwell Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/net/netfilter/nf_tables_ipv4.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index 968f00b82fb5..25e33aee91e7 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -33,7 +33,6 @@ __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, if (!iph) return -1; - iph = ip_hdr(skb); if (iph->ihl < 5 || iph->version != 4) return -1; -- cgit v1.2.3 From 819bf593767c0966f320c51c6ed3f3835062abc8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 Oct 2016 14:56:53 +0200 Subject: docs-rst: sphinxify 802.11 documentation This is just a very basic conversion, I've split up the original multi-book template, and also split up the multi-part mac80211 part in the original book; neither of those were handled by the automatic pandoc conversion. Fix errors that showed up, resulting in a much nicer rendering, at least for the interface combinations documentation. Signed-off-by: Johannes Berg Signed-off-by: Jonathan Corbet --- Documentation/80211/cfg80211.rst | 345 ++++++++++++++++++ Documentation/80211/conf.py | 5 + Documentation/80211/index.rst | 17 + Documentation/80211/introduction.rst | 17 + Documentation/80211/mac80211-advanced.rst | 295 +++++++++++++++ Documentation/80211/mac80211.rst | 216 +++++++++++ Documentation/DocBook/80211.tmpl | 584 ------------------------------ Documentation/DocBook/Makefile | 2 +- Documentation/index.rst | 1 + include/net/cfg80211.h | 77 ++-- 10 files changed, 939 insertions(+), 620 deletions(-) create mode 100644 Documentation/80211/cfg80211.rst create mode 100644 Documentation/80211/conf.py create mode 100644 Documentation/80211/index.rst create mode 100644 Documentation/80211/introduction.rst create mode 100644 Documentation/80211/mac80211-advanced.rst create mode 100644 Documentation/80211/mac80211.rst delete mode 100644 Documentation/DocBook/80211.tmpl (limited to 'include/net') diff --git a/Documentation/80211/cfg80211.rst b/Documentation/80211/cfg80211.rst new file mode 100644 index 000000000000..b1e149ea6fee --- /dev/null +++ b/Documentation/80211/cfg80211.rst @@ -0,0 +1,345 @@ +================== +cfg80211 subsystem +================== + +Device registration +=================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: Device registration + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_channel_flags + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_channel + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_rate_flags + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_rate + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_sta_ht_cap + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_supported_band + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_signal_type + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_params_flags + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_flags + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy + +.. kernel-doc:: include/net/cfg80211.h + :functions: wireless_dev + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_new + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_register + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_unregister + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_free + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_name + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_dev + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_priv + +.. kernel-doc:: include/net/cfg80211.h + :functions: priv_to_wiphy + +.. kernel-doc:: include/net/cfg80211.h + :functions: set_wiphy_dev + +.. kernel-doc:: include/net/cfg80211.h + :functions: wdev_priv + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_iface_limit + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_iface_combination + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_check_combinations + +Actions and configuration +========================= + +.. kernel-doc:: include/net/cfg80211.h + :doc: Actions and configuration + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_ops + +.. kernel-doc:: include/net/cfg80211.h + :functions: vif_params + +.. kernel-doc:: include/net/cfg80211.h + :functions: key_params + +.. kernel-doc:: include/net/cfg80211.h + :functions: survey_info_flags + +.. kernel-doc:: include/net/cfg80211.h + :functions: survey_info + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_beacon_data + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_ap_settings + +.. kernel-doc:: include/net/cfg80211.h + :functions: station_parameters + +.. kernel-doc:: include/net/cfg80211.h + :functions: rate_info_flags + +.. kernel-doc:: include/net/cfg80211.h + :functions: rate_info + +.. kernel-doc:: include/net/cfg80211.h + :functions: station_info + +.. kernel-doc:: include/net/cfg80211.h + :functions: monitor_flags + +.. kernel-doc:: include/net/cfg80211.h + :functions: mpath_info_flags + +.. kernel-doc:: include/net/cfg80211.h + :functions: mpath_info + +.. kernel-doc:: include/net/cfg80211.h + :functions: bss_parameters + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_txq_params + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_crypto_settings + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_auth_request + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_assoc_request + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_deauth_request + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_disassoc_request + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_ibss_params + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_connect_params + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_pmksa + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_rx_mlme_mgmt + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_auth_timeout + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_rx_assoc_resp + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_assoc_timeout + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_tx_mlme_mgmt + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_ibss_joined + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_connect_result + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_connect_bss + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_connect_timeout + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_roamed + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_disconnected + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_ready_on_channel + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_remain_on_channel_expired + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_new_sta + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_rx_mgmt + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_mgmt_tx_status + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_cqm_rssi_notify + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_cqm_pktloss_notify + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_michael_mic_failure + +Scanning and BSS list handling +============================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: Scanning and BSS list handling + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_ssid + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_scan_request + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_scan_done + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_bss + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_inform_bss + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_inform_bss_frame_data + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_inform_bss_data + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_unlink_bss + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_find_ie + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_bss_get_ie + +Utility functions +================= + +.. kernel-doc:: include/net/cfg80211.h + :doc: Utility functions + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_channel_to_frequency + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_frequency_to_channel + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_get_channel + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_get_response_rate + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_hdrlen + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_get_hdrlen_from_skb + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_radiotap_iterator + +Data path helpers +================= + +.. kernel-doc:: include/net/cfg80211.h + :doc: Data path helpers + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_data_to_8023 + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_data_from_8023 + +.. kernel-doc:: include/net/cfg80211.h + :functions: ieee80211_amsdu_to_8023s + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_classify8021d + +Regulatory enforcement infrastructure +===================================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: Regulatory enforcement infrastructure + +.. kernel-doc:: include/net/cfg80211.h + :functions: regulatory_hint + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_apply_custom_regulatory + +.. kernel-doc:: include/net/cfg80211.h + :functions: freq_reg_info + +RFkill integration +================== + +.. kernel-doc:: include/net/cfg80211.h + :doc: RFkill integration + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_rfkill_set_hw_state + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_rfkill_start_polling + +.. kernel-doc:: include/net/cfg80211.h + :functions: wiphy_rfkill_stop_polling + +Test mode +========= + +.. kernel-doc:: include/net/cfg80211.h + :doc: Test mode + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_testmode_alloc_reply_skb + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_testmode_reply + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_testmode_alloc_event_skb + +.. kernel-doc:: include/net/cfg80211.h + :functions: cfg80211_testmode_event diff --git a/Documentation/80211/conf.py b/Documentation/80211/conf.py new file mode 100644 index 000000000000..20c7c275ef4a --- /dev/null +++ b/Documentation/80211/conf.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8; mode: python -*- + +project = "Linux 802.11 Driver Developer's Guide" + +tags.add("subproject") diff --git a/Documentation/80211/index.rst b/Documentation/80211/index.rst new file mode 100644 index 000000000000..90bba476f442 --- /dev/null +++ b/Documentation/80211/index.rst @@ -0,0 +1,17 @@ +===================================== +Linux 802.11 Driver Developer's Guide +===================================== + +.. toctree:: + + introduction + cfg80211 + mac80211 + mac80211-advanced + +.. only:: subproject + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/80211/introduction.rst b/Documentation/80211/introduction.rst new file mode 100644 index 000000000000..4938fa87691c --- /dev/null +++ b/Documentation/80211/introduction.rst @@ -0,0 +1,17 @@ +============ +Introduction +============ + +Explaining wireless 802.11 networking in the Linux kernel + +Copyright 2007-2009 Johannes Berg + +These books attempt to give a description of the various subsystems +that play a role in 802.11 wireless networking in Linux. Since these +books are for kernel developers they attempts to document the +structures and functions used in the kernel as well as giving a +higher-level overview. + +The reader is expected to be familiar with the 802.11 standard as +published by the IEEE in 802.11-2007 (or possibly later versions). +References to this standard will be given as "802.11-2007 8.1.5". diff --git a/Documentation/80211/mac80211-advanced.rst b/Documentation/80211/mac80211-advanced.rst new file mode 100644 index 000000000000..70a89b2163c2 --- /dev/null +++ b/Documentation/80211/mac80211-advanced.rst @@ -0,0 +1,295 @@ +============================= +mac80211 subsystem (advanced) +============================= + +Information contained within this part of the book is of interest only +for advanced interaction of mac80211 with drivers to exploit more +hardware capabilities and improve performance. + +LED support +=========== + +Mac80211 supports various ways of blinking LEDs. Wherever possible, +device LEDs should be exposed as LED class devices and hooked up to the +appropriate trigger, which will then be triggered appropriately by +mac80211. + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_get_tx_led_name + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_get_rx_led_name + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_get_assoc_led_name + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_get_radio_led_name + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_tpt_blink + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_tpt_led_trigger_flags + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_create_tpt_led_trigger + +Hardware crypto acceleration +============================ + +.. kernel-doc:: include/net/mac80211.h + :doc: Hardware crypto acceleration + +.. kernel-doc:: include/net/mac80211.h + :functions: set_key_cmd + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_key_conf + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_key_flags + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_get_tkip_p1k + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_get_tkip_p1k_iv + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_get_tkip_p2k + +Powersave support +================= + +.. kernel-doc:: include/net/mac80211.h + :doc: Powersave support + +Beacon filter support +===================== + +.. kernel-doc:: include/net/mac80211.h + :doc: Beacon filter support + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_beacon_loss + +Multiple queues and QoS support +=============================== + +TBD + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_tx_queue_params + +Access point mode support +========================= + +TBD + +Some parts of the if_conf should be discussed here instead + +Insert notes about VLAN interfaces with hw crypto here or in the hw +crypto chapter. + +support for powersaving clients +------------------------------- + +.. kernel-doc:: include/net/mac80211.h + :doc: AP support for powersaving clients + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_get_buffered_bc + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_beacon_get + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_sta_eosp + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_frame_release_type + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_sta_ps_transition + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_sta_ps_transition_ni + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_sta_set_buffered + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_sta_block_awake + +Supporting multiple virtual interfaces +====================================== + +TBD + +Note: WDS with identical MAC address should almost always be OK + +Insert notes about having multiple virtual interfaces with different MAC +addresses here, note which configurations are supported by mac80211, add +notes about supporting hw crypto with it. + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_iterate_active_interfaces + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_iterate_active_interfaces_atomic + +Station handling +================ + +TODO + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_sta + +.. kernel-doc:: include/net/mac80211.h + :functions: sta_notify_cmd + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_find_sta + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_find_sta_by_ifaddr + +Hardware scan offload +===================== + +TBD + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_scan_completed + +Aggregation +=========== + +TX A-MPDU aggregation +--------------------- + +.. kernel-doc:: net/mac80211/agg-tx.c + :doc: TX A-MPDU aggregation + +.. WARNING: DOCPROC directive not supported: !Cnet/mac80211/agg-tx.c + +RX A-MPDU aggregation +--------------------- + +.. kernel-doc:: net/mac80211/agg-rx.c + :doc: RX A-MPDU aggregation + +.. WARNING: DOCPROC directive not supported: !Cnet/mac80211/agg-rx.c + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_ampdu_mlme_action + +Spatial Multiplexing Powersave (SMPS) +===================================== + +.. kernel-doc:: include/net/mac80211.h + :doc: Spatial multiplexing power save + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_request_smps + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_smps_mode + +TBD + +This part of the book describes the rate control algorithm interface and +how it relates to mac80211 and drivers. + +Rate Control API +================ + +TBD + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_start_tx_ba_session + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_start_tx_ba_cb_irqsafe + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_stop_tx_ba_session + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_stop_tx_ba_cb_irqsafe + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_rate_control_changed + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_tx_rate_control + +.. kernel-doc:: include/net/mac80211.h + :functions: rate_control_send_low + +TBD + +This part of the book describes mac80211 internals. + +Key handling +============ + +Key handling basics +------------------- + +.. kernel-doc:: net/mac80211/key.c + :doc: Key handling basics + +MORE TBD +-------- + +TBD + +Receive processing +================== + +TBD + +Transmit processing +=================== + +TBD + +Station info handling +===================== + +Programming information +----------------------- + +.. kernel-doc:: net/mac80211/sta_info.h + :functions: sta_info + +.. kernel-doc:: net/mac80211/sta_info.h + :functions: ieee80211_sta_info_flags + +STA information lifetime rules +------------------------------ + +.. kernel-doc:: net/mac80211/sta_info.c + :doc: STA information lifetime rules + +Aggregation +=========== + +.. kernel-doc:: net/mac80211/sta_info.h + :functions: sta_ampdu_mlme + +.. kernel-doc:: net/mac80211/sta_info.h + :functions: tid_ampdu_tx + +.. kernel-doc:: net/mac80211/sta_info.h + :functions: tid_ampdu_rx + +Synchronisation +=============== + +TBD + +Locking, lots of RCU diff --git a/Documentation/80211/mac80211.rst b/Documentation/80211/mac80211.rst new file mode 100644 index 000000000000..85a8335e80b6 --- /dev/null +++ b/Documentation/80211/mac80211.rst @@ -0,0 +1,216 @@ +=========================== +mac80211 subsystem (basics) +=========================== + +You should read and understand the information contained within this +part of the book while implementing a mac80211 driver. In some chapters, +advanced usage is noted, those may be skipped if this isn't needed. + +This part of the book only covers station and monitor mode +functionality, additional information required to implement the other +modes is covered in the second part of the book. + +Basic hardware handling +======================= + +TBD + +This chapter shall contain information on getting a hw struct allocated +and registered with mac80211. + +Since it is required to allocate rates/modes before registering a hw +struct, this chapter shall also contain information on setting up the +rate/mode structs. + +Additionally, some discussion about the callbacks and the general +programming model should be in here, including the definition of +ieee80211_ops which will be referred to a lot. + +Finally, a discussion of hardware capabilities should be done with +references to other parts of the book. + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_hw + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_hw_flags + +.. kernel-doc:: include/net/mac80211.h + :functions: SET_IEEE80211_DEV + +.. kernel-doc:: include/net/mac80211.h + :functions: SET_IEEE80211_PERM_ADDR + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_ops + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_alloc_hw + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_register_hw + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_unregister_hw + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_free_hw + +PHY configuration +================= + +TBD + +This chapter should describe PHY handling including start/stop callbacks +and the various structures used. + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_conf + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_conf_flags + +Virtual interfaces +================== + +TBD + +This chapter should describe virtual interface basics that are relevant +to the driver (VLANs, MGMT etc are not.) It should explain the use of +the add_iface/remove_iface callbacks as well as the interface +configuration callbacks. + +Things related to AP mode should be discussed there. + +Things related to supporting multiple interfaces should be in the +appropriate chapter, a BIG FAT note should be here about this though and +the recommendation to allow only a single interface in STA mode at +first! + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_vif + +Receive and transmit processing +=============================== + +what should be here +------------------- + +TBD + +This should describe the receive and transmit paths in mac80211/the +drivers as well as transmit status handling. + +Frame format +------------ + +.. kernel-doc:: include/net/mac80211.h + :doc: Frame format + +Packet alignment +---------------- + +.. kernel-doc:: net/mac80211/rx.c + :doc: Packet alignment + +Calling into mac80211 from interrupts +------------------------------------- + +.. kernel-doc:: include/net/mac80211.h + :doc: Calling mac80211 from interrupts + +functions/definitions +--------------------- + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_rx_status + +.. kernel-doc:: include/net/mac80211.h + :functions: mac80211_rx_flags + +.. kernel-doc:: include/net/mac80211.h + :functions: mac80211_tx_info_flags + +.. kernel-doc:: include/net/mac80211.h + :functions: mac80211_tx_control_flags + +.. kernel-doc:: include/net/mac80211.h + :functions: mac80211_rate_control_flags + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_tx_rate + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_tx_info + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_tx_info_clear_status + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_rx + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_rx_ni + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_rx_irqsafe + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_tx_status + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_tx_status_ni + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_tx_status_irqsafe + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_rts_get + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_rts_duration + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_ctstoself_get + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_ctstoself_duration + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_generic_frame_duration + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_wake_queue + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_stop_queue + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_wake_queues + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_stop_queues + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_queue_stopped + +Frame filtering +=============== + +.. kernel-doc:: include/net/mac80211.h + :doc: Frame filtering + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_filter_flags + +The mac80211 workqueue +====================== + +.. kernel-doc:: include/net/mac80211.h + :doc: mac80211 workqueue + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_queue_work + +.. kernel-doc:: include/net/mac80211.h + :functions: ieee80211_queue_delayed_work diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl deleted file mode 100644 index 800fe7a9024c..000000000000 --- a/Documentation/DocBook/80211.tmpl +++ /dev/null @@ -1,584 +0,0 @@ - - - - - The 802.11 subsystems – for kernel developers - - Explaining wireless 802.11 networking in the Linux kernel - - - - 2007-2009 - Johannes Berg - - - - - Johannes - Berg - -
johannes@sipsolutions.net
-
-
-
- - - - This documentation is free software; you can redistribute - it and/or modify it under the terms of the GNU General Public - License version 2 as published by the Free Software Foundation. - - - This documentation is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied - warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - - You should have received a copy of the GNU General Public - License along with this documentation; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307 USA - - - For more details see the file COPYING in the source - distribution of Linux. - - - - - - These books attempt to give a description of the - various subsystems that play a role in 802.11 wireless - networking in Linux. Since these books are for kernel - developers they attempts to document the structures - and functions used in the kernel as well as giving a - higher-level overview. - - - The reader is expected to be familiar with the 802.11 - standard as published by the IEEE in 802.11-2007 (or - possibly later versions). References to this standard - will be given as "802.11-2007 8.1.5". - - -
- - - The cfg80211 subsystem - - -!Pinclude/net/cfg80211.h Introduction - - - - Device registration -!Pinclude/net/cfg80211.h Device registration -!Finclude/net/cfg80211.h ieee80211_channel_flags -!Finclude/net/cfg80211.h ieee80211_channel -!Finclude/net/cfg80211.h ieee80211_rate_flags -!Finclude/net/cfg80211.h ieee80211_rate -!Finclude/net/cfg80211.h ieee80211_sta_ht_cap -!Finclude/net/cfg80211.h ieee80211_supported_band -!Finclude/net/cfg80211.h cfg80211_signal_type -!Finclude/net/cfg80211.h wiphy_params_flags -!Finclude/net/cfg80211.h wiphy_flags -!Finclude/net/cfg80211.h wiphy -!Finclude/net/cfg80211.h wireless_dev -!Finclude/net/cfg80211.h wiphy_new -!Finclude/net/cfg80211.h wiphy_register -!Finclude/net/cfg80211.h wiphy_unregister -!Finclude/net/cfg80211.h wiphy_free - -!Finclude/net/cfg80211.h wiphy_name -!Finclude/net/cfg80211.h wiphy_dev -!Finclude/net/cfg80211.h wiphy_priv -!Finclude/net/cfg80211.h priv_to_wiphy -!Finclude/net/cfg80211.h set_wiphy_dev -!Finclude/net/cfg80211.h wdev_priv -!Finclude/net/cfg80211.h ieee80211_iface_limit -!Finclude/net/cfg80211.h ieee80211_iface_combination -!Finclude/net/cfg80211.h cfg80211_check_combinations - - - Actions and configuration -!Pinclude/net/cfg80211.h Actions and configuration -!Finclude/net/cfg80211.h cfg80211_ops -!Finclude/net/cfg80211.h vif_params -!Finclude/net/cfg80211.h key_params -!Finclude/net/cfg80211.h survey_info_flags -!Finclude/net/cfg80211.h survey_info -!Finclude/net/cfg80211.h cfg80211_beacon_data -!Finclude/net/cfg80211.h cfg80211_ap_settings -!Finclude/net/cfg80211.h station_parameters -!Finclude/net/cfg80211.h rate_info_flags -!Finclude/net/cfg80211.h rate_info -!Finclude/net/cfg80211.h station_info -!Finclude/net/cfg80211.h monitor_flags -!Finclude/net/cfg80211.h mpath_info_flags -!Finclude/net/cfg80211.h mpath_info -!Finclude/net/cfg80211.h bss_parameters -!Finclude/net/cfg80211.h ieee80211_txq_params -!Finclude/net/cfg80211.h cfg80211_crypto_settings -!Finclude/net/cfg80211.h cfg80211_auth_request -!Finclude/net/cfg80211.h cfg80211_assoc_request -!Finclude/net/cfg80211.h cfg80211_deauth_request -!Finclude/net/cfg80211.h cfg80211_disassoc_request -!Finclude/net/cfg80211.h cfg80211_ibss_params -!Finclude/net/cfg80211.h cfg80211_connect_params -!Finclude/net/cfg80211.h cfg80211_pmksa -!Finclude/net/cfg80211.h cfg80211_rx_mlme_mgmt -!Finclude/net/cfg80211.h cfg80211_auth_timeout -!Finclude/net/cfg80211.h cfg80211_rx_assoc_resp -!Finclude/net/cfg80211.h cfg80211_assoc_timeout -!Finclude/net/cfg80211.h cfg80211_tx_mlme_mgmt -!Finclude/net/cfg80211.h cfg80211_ibss_joined -!Finclude/net/cfg80211.h cfg80211_connect_result -!Finclude/net/cfg80211.h cfg80211_connect_bss -!Finclude/net/cfg80211.h cfg80211_connect_timeout -!Finclude/net/cfg80211.h cfg80211_roamed -!Finclude/net/cfg80211.h cfg80211_disconnected -!Finclude/net/cfg80211.h cfg80211_ready_on_channel -!Finclude/net/cfg80211.h cfg80211_remain_on_channel_expired -!Finclude/net/cfg80211.h cfg80211_new_sta -!Finclude/net/cfg80211.h cfg80211_rx_mgmt -!Finclude/net/cfg80211.h cfg80211_mgmt_tx_status -!Finclude/net/cfg80211.h cfg80211_cqm_rssi_notify -!Finclude/net/cfg80211.h cfg80211_cqm_pktloss_notify -!Finclude/net/cfg80211.h cfg80211_michael_mic_failure - - - Scanning and BSS list handling -!Pinclude/net/cfg80211.h Scanning and BSS list handling -!Finclude/net/cfg80211.h cfg80211_ssid -!Finclude/net/cfg80211.h cfg80211_scan_request -!Finclude/net/cfg80211.h cfg80211_scan_done -!Finclude/net/cfg80211.h cfg80211_bss -!Finclude/net/cfg80211.h cfg80211_inform_bss -!Finclude/net/cfg80211.h cfg80211_inform_bss_frame_data -!Finclude/net/cfg80211.h cfg80211_inform_bss_data -!Finclude/net/cfg80211.h cfg80211_unlink_bss -!Finclude/net/cfg80211.h cfg80211_find_ie -!Finclude/net/cfg80211.h ieee80211_bss_get_ie - - - Utility functions -!Pinclude/net/cfg80211.h Utility functions -!Finclude/net/cfg80211.h ieee80211_channel_to_frequency -!Finclude/net/cfg80211.h ieee80211_frequency_to_channel -!Finclude/net/cfg80211.h ieee80211_get_channel -!Finclude/net/cfg80211.h ieee80211_get_response_rate -!Finclude/net/cfg80211.h ieee80211_hdrlen -!Finclude/net/cfg80211.h ieee80211_get_hdrlen_from_skb -!Finclude/net/cfg80211.h ieee80211_radiotap_iterator - - - Data path helpers -!Pinclude/net/cfg80211.h Data path helpers -!Finclude/net/cfg80211.h ieee80211_data_to_8023 -!Finclude/net/cfg80211.h ieee80211_data_from_8023 -!Finclude/net/cfg80211.h ieee80211_amsdu_to_8023s -!Finclude/net/cfg80211.h cfg80211_classify8021d - - - Regulatory enforcement infrastructure -!Pinclude/net/cfg80211.h Regulatory enforcement infrastructure -!Finclude/net/cfg80211.h regulatory_hint -!Finclude/net/cfg80211.h wiphy_apply_custom_regulatory -!Finclude/net/cfg80211.h freq_reg_info - - - RFkill integration -!Pinclude/net/cfg80211.h RFkill integration -!Finclude/net/cfg80211.h wiphy_rfkill_set_hw_state -!Finclude/net/cfg80211.h wiphy_rfkill_start_polling -!Finclude/net/cfg80211.h wiphy_rfkill_stop_polling - - - Test mode -!Pinclude/net/cfg80211.h Test mode -!Finclude/net/cfg80211.h cfg80211_testmode_alloc_reply_skb -!Finclude/net/cfg80211.h cfg80211_testmode_reply -!Finclude/net/cfg80211.h cfg80211_testmode_alloc_event_skb -!Finclude/net/cfg80211.h cfg80211_testmode_event - - - - - The mac80211 subsystem - -!Pinclude/net/mac80211.h Introduction -!Pinclude/net/mac80211.h Warning - - - - - - - - - The basic mac80211 driver interface - - - You should read and understand the information contained - within this part of the book while implementing a driver. - In some chapters, advanced usage is noted, that may be - skipped at first. - - - This part of the book only covers station and monitor mode - functionality, additional information required to implement - the other modes is covered in the second part of the book. - - - - - Basic hardware handling - TBD - - This chapter shall contain information on getting a hw - struct allocated and registered with mac80211. - - - Since it is required to allocate rates/modes before registering - a hw struct, this chapter shall also contain information on setting - up the rate/mode structs. - - - Additionally, some discussion about the callbacks and - the general programming model should be in here, including - the definition of ieee80211_ops which will be referred to - a lot. - - - Finally, a discussion of hardware capabilities should be done - with references to other parts of the book. - - -!Finclude/net/mac80211.h ieee80211_hw -!Finclude/net/mac80211.h ieee80211_hw_flags -!Finclude/net/mac80211.h SET_IEEE80211_DEV -!Finclude/net/mac80211.h SET_IEEE80211_PERM_ADDR -!Finclude/net/mac80211.h ieee80211_ops -!Finclude/net/mac80211.h ieee80211_alloc_hw -!Finclude/net/mac80211.h ieee80211_register_hw -!Finclude/net/mac80211.h ieee80211_unregister_hw -!Finclude/net/mac80211.h ieee80211_free_hw - - - - PHY configuration - TBD - - This chapter should describe PHY handling including - start/stop callbacks and the various structures used. - -!Finclude/net/mac80211.h ieee80211_conf -!Finclude/net/mac80211.h ieee80211_conf_flags - - - - Virtual interfaces - TBD - - This chapter should describe virtual interface basics - that are relevant to the driver (VLANs, MGMT etc are not.) - It should explain the use of the add_iface/remove_iface - callbacks as well as the interface configuration callbacks. - - Things related to AP mode should be discussed there. - - Things related to supporting multiple interfaces should be - in the appropriate chapter, a BIG FAT note should be here about - this though and the recommendation to allow only a single - interface in STA mode at first! - -!Finclude/net/mac80211.h ieee80211_vif - - - - Receive and transmit processing - - what should be here - TBD - - This should describe the receive and transmit - paths in mac80211/the drivers as well as - transmit status handling. - - - - Frame format -!Pinclude/net/mac80211.h Frame format - - - Packet alignment -!Pnet/mac80211/rx.c Packet alignment - - - Calling into mac80211 from interrupts -!Pinclude/net/mac80211.h Calling mac80211 from interrupts - - - functions/definitions -!Finclude/net/mac80211.h ieee80211_rx_status -!Finclude/net/mac80211.h mac80211_rx_flags -!Finclude/net/mac80211.h mac80211_tx_info_flags -!Finclude/net/mac80211.h mac80211_tx_control_flags -!Finclude/net/mac80211.h mac80211_rate_control_flags -!Finclude/net/mac80211.h ieee80211_tx_rate -!Finclude/net/mac80211.h ieee80211_tx_info -!Finclude/net/mac80211.h ieee80211_tx_info_clear_status -!Finclude/net/mac80211.h ieee80211_rx -!Finclude/net/mac80211.h ieee80211_rx_ni -!Finclude/net/mac80211.h ieee80211_rx_irqsafe -!Finclude/net/mac80211.h ieee80211_tx_status -!Finclude/net/mac80211.h ieee80211_tx_status_ni -!Finclude/net/mac80211.h ieee80211_tx_status_irqsafe -!Finclude/net/mac80211.h ieee80211_rts_get -!Finclude/net/mac80211.h ieee80211_rts_duration -!Finclude/net/mac80211.h ieee80211_ctstoself_get -!Finclude/net/mac80211.h ieee80211_ctstoself_duration -!Finclude/net/mac80211.h ieee80211_generic_frame_duration -!Finclude/net/mac80211.h ieee80211_wake_queue -!Finclude/net/mac80211.h ieee80211_stop_queue -!Finclude/net/mac80211.h ieee80211_wake_queues -!Finclude/net/mac80211.h ieee80211_stop_queues -!Finclude/net/mac80211.h ieee80211_queue_stopped - - - - - Frame filtering -!Pinclude/net/mac80211.h Frame filtering -!Finclude/net/mac80211.h ieee80211_filter_flags - - - - The mac80211 workqueue -!Pinclude/net/mac80211.h mac80211 workqueue -!Finclude/net/mac80211.h ieee80211_queue_work -!Finclude/net/mac80211.h ieee80211_queue_delayed_work - - - - - Advanced driver interface - - - Information contained within this part of the book is - of interest only for advanced interaction of mac80211 - with drivers to exploit more hardware capabilities and - improve performance. - - - - - LED support - - Mac80211 supports various ways of blinking LEDs. Wherever possible, - device LEDs should be exposed as LED class devices and hooked up to - the appropriate trigger, which will then be triggered appropriately - by mac80211. - -!Finclude/net/mac80211.h ieee80211_get_tx_led_name -!Finclude/net/mac80211.h ieee80211_get_rx_led_name -!Finclude/net/mac80211.h ieee80211_get_assoc_led_name -!Finclude/net/mac80211.h ieee80211_get_radio_led_name -!Finclude/net/mac80211.h ieee80211_tpt_blink -!Finclude/net/mac80211.h ieee80211_tpt_led_trigger_flags -!Finclude/net/mac80211.h ieee80211_create_tpt_led_trigger - - - - Hardware crypto acceleration -!Pinclude/net/mac80211.h Hardware crypto acceleration - -!Finclude/net/mac80211.h set_key_cmd -!Finclude/net/mac80211.h ieee80211_key_conf -!Finclude/net/mac80211.h ieee80211_key_flags -!Finclude/net/mac80211.h ieee80211_get_tkip_p1k -!Finclude/net/mac80211.h ieee80211_get_tkip_p1k_iv -!Finclude/net/mac80211.h ieee80211_get_tkip_p2k - - - - Powersave support -!Pinclude/net/mac80211.h Powersave support - - - - Beacon filter support -!Pinclude/net/mac80211.h Beacon filter support -!Finclude/net/mac80211.h ieee80211_beacon_loss - - - - Multiple queues and QoS support - TBD -!Finclude/net/mac80211.h ieee80211_tx_queue_params - - - - Access point mode support - TBD - Some parts of the if_conf should be discussed here instead - - Insert notes about VLAN interfaces with hw crypto here or - in the hw crypto chapter. - -
- support for powersaving clients -!Pinclude/net/mac80211.h AP support for powersaving clients -!Finclude/net/mac80211.h ieee80211_get_buffered_bc -!Finclude/net/mac80211.h ieee80211_beacon_get -!Finclude/net/mac80211.h ieee80211_sta_eosp -!Finclude/net/mac80211.h ieee80211_frame_release_type -!Finclude/net/mac80211.h ieee80211_sta_ps_transition -!Finclude/net/mac80211.h ieee80211_sta_ps_transition_ni -!Finclude/net/mac80211.h ieee80211_sta_set_buffered -!Finclude/net/mac80211.h ieee80211_sta_block_awake -
-
- - - Supporting multiple virtual interfaces - TBD - - Note: WDS with identical MAC address should almost always be OK - - - Insert notes about having multiple virtual interfaces with - different MAC addresses here, note which configurations are - supported by mac80211, add notes about supporting hw crypto - with it. - -!Finclude/net/mac80211.h ieee80211_iterate_active_interfaces -!Finclude/net/mac80211.h ieee80211_iterate_active_interfaces_atomic - - - - Station handling - TODO -!Finclude/net/mac80211.h ieee80211_sta -!Finclude/net/mac80211.h sta_notify_cmd -!Finclude/net/mac80211.h ieee80211_find_sta -!Finclude/net/mac80211.h ieee80211_find_sta_by_ifaddr - - - - Hardware scan offload - TBD -!Finclude/net/mac80211.h ieee80211_scan_completed - - - - Aggregation - - TX A-MPDU aggregation -!Pnet/mac80211/agg-tx.c TX A-MPDU aggregation -!Cnet/mac80211/agg-tx.c - - - RX A-MPDU aggregation -!Pnet/mac80211/agg-rx.c RX A-MPDU aggregation -!Cnet/mac80211/agg-rx.c -!Finclude/net/mac80211.h ieee80211_ampdu_mlme_action - - - - - Spatial Multiplexing Powersave (SMPS) -!Pinclude/net/mac80211.h Spatial multiplexing power save -!Finclude/net/mac80211.h ieee80211_request_smps -!Finclude/net/mac80211.h ieee80211_smps_mode - -
- - - Rate control interface - - TBD - - This part of the book describes the rate control algorithm - interface and how it relates to mac80211 and drivers. - - - - Rate Control API - TBD -!Finclude/net/mac80211.h ieee80211_start_tx_ba_session -!Finclude/net/mac80211.h ieee80211_start_tx_ba_cb_irqsafe -!Finclude/net/mac80211.h ieee80211_stop_tx_ba_session -!Finclude/net/mac80211.h ieee80211_stop_tx_ba_cb_irqsafe -!Finclude/net/mac80211.h ieee80211_rate_control_changed -!Finclude/net/mac80211.h ieee80211_tx_rate_control -!Finclude/net/mac80211.h rate_control_send_low - - - - - Internals - - TBD - - This part of the book describes mac80211 internals. - - - - - Key handling - - Key handling basics -!Pnet/mac80211/key.c Key handling basics - - - MORE TBD - TBD - - - - - Receive processing - TBD - - - - Transmit processing - TBD - - - - Station info handling - - Programming information -!Fnet/mac80211/sta_info.h sta_info -!Fnet/mac80211/sta_info.h ieee80211_sta_info_flags - - - STA information lifetime rules -!Pnet/mac80211/sta_info.c STA information lifetime rules - - - - - Aggregation -!Fnet/mac80211/sta_info.h sta_ampdu_mlme -!Fnet/mac80211/sta_info.h tid_ampdu_tx -!Fnet/mac80211/sta_info.h tid_ampdu_rx - - - - Synchronisation - TBD - Locking, lots of RCU - - -
-
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 736f5916daea..fdf8232d0eeb 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -12,7 +12,7 @@ DOCBOOKS := z8530book.xml \ kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ - 80211.xml debugobjects.xml sh.xml regulator.xml \ + debugobjects.xml sh.xml regulator.xml \ alsa-driver-api.xml writing-an-alsa-driver.xml \ tracepoint.xml w1.xml \ writing_musb_glue_layer.xml crypto-API.xml iio.xml diff --git a/Documentation/index.rst b/Documentation/index.rst index d9ccb94fca95..c53d089455a4 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -17,6 +17,7 @@ Contents: driver-api/index media/index gpu/index + 80211/index Indices and tables ================== diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9c23f4d33e06..02b6690d7162 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -775,9 +775,9 @@ enum station_parameters_apply_mask { * (or NULL for no change) * @supported_rates_len: number of supported rates * @sta_flags_mask: station flags that changed - * (bitmask of BIT(NL80211_STA_FLAG_...)) + * (bitmask of BIT(%NL80211_STA_FLAG_...)) * @sta_flags_set: station flags values - * (bitmask of BIT(NL80211_STA_FLAG_...)) + * (bitmask of BIT(%NL80211_STA_FLAG_...)) * @listen_interval: listen interval or -1 for no change * @aid: AID or zero for no change * @peer_aid: mesh peer AID or zero for no change @@ -2947,47 +2947,54 @@ struct ieee80211_iface_limit { * * 1. Allow #STA <= 1, #AP <= 1, matching BI, channels = 1, 2 total: * - * struct ieee80211_iface_limit limits1[] = { - * { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), }, - * { .max = 1, .types = BIT(NL80211_IFTYPE_AP}, }, - * }; - * struct ieee80211_iface_combination combination1 = { - * .limits = limits1, - * .n_limits = ARRAY_SIZE(limits1), - * .max_interfaces = 2, - * .beacon_int_infra_match = true, - * }; + * .. code-block:: c + * + * struct ieee80211_iface_limit limits1[] = { + * { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), }, + * { .max = 1, .types = BIT(NL80211_IFTYPE_AP}, }, + * }; + * struct ieee80211_iface_combination combination1 = { + * .limits = limits1, + * .n_limits = ARRAY_SIZE(limits1), + * .max_interfaces = 2, + * .beacon_int_infra_match = true, + * }; * * * 2. Allow #{AP, P2P-GO} <= 8, channels = 1, 8 total: * - * struct ieee80211_iface_limit limits2[] = { - * { .max = 8, .types = BIT(NL80211_IFTYPE_AP) | - * BIT(NL80211_IFTYPE_P2P_GO), }, - * }; - * struct ieee80211_iface_combination combination2 = { - * .limits = limits2, - * .n_limits = ARRAY_SIZE(limits2), - * .max_interfaces = 8, - * .num_different_channels = 1, - * }; + * .. code-block:: c + * + * struct ieee80211_iface_limit limits2[] = { + * { .max = 8, .types = BIT(NL80211_IFTYPE_AP) | + * BIT(NL80211_IFTYPE_P2P_GO), }, + * }; + * struct ieee80211_iface_combination combination2 = { + * .limits = limits2, + * .n_limits = ARRAY_SIZE(limits2), + * .max_interfaces = 8, + * .num_different_channels = 1, + * }; * * * 3. Allow #STA <= 1, #{P2P-client,P2P-GO} <= 3 on two channels, 4 total. * - * This allows for an infrastructure connection and three P2P connections. - * - * struct ieee80211_iface_limit limits3[] = { - * { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), }, - * { .max = 3, .types = BIT(NL80211_IFTYPE_P2P_GO) | - * BIT(NL80211_IFTYPE_P2P_CLIENT), }, - * }; - * struct ieee80211_iface_combination combination3 = { - * .limits = limits3, - * .n_limits = ARRAY_SIZE(limits3), - * .max_interfaces = 4, - * .num_different_channels = 2, - * }; + * This allows for an infrastructure connection and three P2P connections. + * + * .. code-block:: c + * + * struct ieee80211_iface_limit limits3[] = { + * { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), }, + * { .max = 3, .types = BIT(NL80211_IFTYPE_P2P_GO) | + * BIT(NL80211_IFTYPE_P2P_CLIENT), }, + * }; + * struct ieee80211_iface_combination combination3 = { + * .limits = limits3, + * .n_limits = ARRAY_SIZE(limits3), + * .max_interfaces = 4, + * .num_different_channels = 2, + * }; + * */ struct ieee80211_iface_combination { const struct ieee80211_iface_limit *limits; -- cgit v1.2.3 From 7f6990c830f3e8d703f13d7963acf51936c52ad2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Oct 2016 15:29:49 +0200 Subject: cfg80211: let ieee80211_amsdu_to_8023s() take only header-less SKB There's only a single case where has_80211_header is passed as true, which is in mac80211. Given that there's only simple code that needs to be done before calling it, export that function from cfg80211 instead and let mac80211 call it itself. Signed-off-by: Johannes Berg --- .../net/wireless/marvell/mwifiex/11n_rxreorder.c | 2 +- include/net/cfg80211.h | 31 +++++++++++++++------- net/mac80211/rx.c | 8 +++++- net/wireless/util.c | 24 ++++------------- 4 files changed, 35 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index 94480123efa3..e9f462e3730b 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -45,7 +45,7 @@ static int mwifiex_11n_dispatch_amsdu_pkt(struct mwifiex_private *priv, skb_trim(skb, le16_to_cpu(local_rx_pd->rx_pkt_length)); ieee80211_amsdu_to_8023s(skb, &list, priv->curr_addr, - priv->wdev.iftype, 0, false); + priv->wdev.iftype, 0); while (!skb_queue_empty(&list)) { struct rx_packet_hdr *rx_hdr; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fe78f02a242e..f372eadd1963 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4039,6 +4039,18 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); * that do not do the 802.11/802.3 conversion on the device. */ +/** + * ieee80211_data_to_8023_exthdr - convert an 802.11 data frame to 802.3 + * @skb: the 802.11 data frame + * @ehdr: pointer to a &struct ethhdr that will get the header, instead + * of it being pushed into the SKB + * @addr: the device MAC address + * @iftype: the virtual interface type + * Return: 0 on success. Non-zero on error. + */ +int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, + const u8 *addr, enum nl80211_iftype iftype); + /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 * @skb: the 802.11 data frame @@ -4046,8 +4058,11 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); * @iftype: the virtual interface type * Return: 0 on success. Non-zero on error. */ -int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype); +static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, + enum nl80211_iftype iftype) +{ + return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype); +} /** * ieee80211_data_from_8023 - convert an 802.3 frame to 802.11 @@ -4065,22 +4080,20 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, /** * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame * - * Decode an IEEE 802.11n A-MSDU frame and convert it to a list of - * 802.3 frames. The @list will be empty if the decode fails. The - * @skb is consumed after the function returns. + * Decode an IEEE 802.11 A-MSDU and convert it to a list of 802.3 frames. + * The @list will be empty if the decode fails. The @skb must be fully + * header-less before being passed in here; it is freed in this function. * - * @skb: The input IEEE 802.11n A-MSDU frame. + * @skb: The input A-MSDU frame without any headers. * @list: The output list of 802.3 frames. It must be allocated and * initialized by by the caller. * @addr: The device MAC address. * @iftype: The device interface type. * @extra_headroom: The hardware extra headroom for SKBs in the @list. - * @has_80211_header: Set it true if SKB is with IEEE 802.11 header. */ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, - const unsigned int extra_headroom, - bool has_80211_header); + const unsigned int extra_headroom); /** * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3ac2f1cba317..de2d75fa5c51 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2298,6 +2298,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) __le16 fc = hdr->frame_control; struct sk_buff_head frame_list; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); + struct ethhdr ethhdr; if (unlikely(!ieee80211_is_data(fc))) return RX_CONTINUE; @@ -2329,9 +2330,14 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) skb->dev = dev; __skb_queue_head_init(&frame_list); + if (ieee80211_data_to_8023_exthdr(skb, ðhdr, + rx->sdata->vif.addr, + rx->sdata->vif.type)) + return RX_DROP_UNUSABLE; + ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, rx->sdata->vif.type, - rx->local->hw.extra_tx_headroom, true); + rx->local->hw.extra_tx_headroom); while (!skb_queue_empty(&frame_list)) { rx->skb = __skb_dequeue(&frame_list); diff --git a/net/wireless/util.c b/net/wireless/util.c index 8edce22d1b93..e36ede840b88 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -420,8 +420,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) } EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen); -static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr, - const u8 *addr, enum nl80211_iftype iftype) +int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, + const u8 *addr, enum nl80211_iftype iftype) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct { @@ -525,13 +525,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr, return 0; } - -int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype) -{ - return __ieee80211_data_to_8023(skb, NULL, addr, iftype); -} -EXPORT_SYMBOL(ieee80211_data_to_8023); +EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr); int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, enum nl80211_iftype iftype, @@ -745,25 +739,18 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen, void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, - const unsigned int extra_headroom, - bool has_80211_header) + const unsigned int extra_headroom) { unsigned int hlen = ALIGN(extra_headroom, 4); struct sk_buff *frame = NULL; u16 ethertype; u8 *payload; - int offset = 0, remaining, err; + int offset = 0, remaining; struct ethhdr eth; bool reuse_frag = skb->head_frag && !skb_has_frag_list(skb); bool reuse_skb = false; bool last = false; - if (has_80211_header) { - err = __ieee80211_data_to_8023(skb, ð, addr, iftype); - if (err) - goto out; - } - while (!last) { unsigned int subframe_len; int len; @@ -819,7 +806,6 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, purge: __skb_queue_purge(list); - out: dev_kfree_skb(skb); } EXPORT_SYMBOL(ieee80211_amsdu_to_8023s); -- cgit v1.2.3 From 8b935ee2ea17db720d70f6420f77f594c0c93f75 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Oct 2016 16:17:01 +0200 Subject: cfg80211: add ability to check DA/SA in A-MSDU decapsulation We should not accept arbitrary DA/SA inside A-MSDUs, it could be used to circumvent protections, like allowing a station to send frames and make them seem to come from somewhere else. Add the necessary infrastructure in cfg80211 to allow such checks, in further patches we'll start using them. Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c | 2 +- include/net/cfg80211.h | 5 ++++- net/mac80211/rx.c | 3 ++- net/wireless/util.c | 14 ++++++++++++-- 4 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index e9f462e3730b..274dd5a1574a 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -45,7 +45,7 @@ static int mwifiex_11n_dispatch_amsdu_pkt(struct mwifiex_private *priv, skb_trim(skb, le16_to_cpu(local_rx_pd->rx_pkt_length)); ieee80211_amsdu_to_8023s(skb, &list, priv->curr_addr, - priv->wdev.iftype, 0); + priv->wdev.iftype, 0, NULL, NULL); while (!skb_queue_empty(&list)) { struct rx_packet_hdr *rx_hdr; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f372eadd1963..7df600c463eb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4090,10 +4090,13 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, * @addr: The device MAC address. * @iftype: The device interface type. * @extra_headroom: The hardware extra headroom for SKBs in the @list. + * @check_da: DA to check in the inner ethernet header, or NULL + * @check_sa: SA to check in the inner ethernet header, or NULL */ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, - const unsigned int extra_headroom); + const unsigned int extra_headroom, + const u8 *check_da, const u8 *check_sa); /** * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index de2d75fa5c51..cf53fe1a0aa2 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2337,7 +2337,8 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, rx->sdata->vif.type, - rx->local->hw.extra_tx_headroom); + rx->local->hw.extra_tx_headroom, + NULL, NULL); while (!skb_queue_empty(&frame_list)) { rx->skb = __skb_dequeue(&frame_list); diff --git a/net/wireless/util.c b/net/wireless/util.c index e36ede840b88..5ea12afc7706 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -739,7 +739,8 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen, void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, - const unsigned int extra_headroom) + const unsigned int extra_headroom, + const u8 *check_da, const u8 *check_sa) { unsigned int hlen = ALIGN(extra_headroom, 4); struct sk_buff *frame = NULL; @@ -767,8 +768,17 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, goto purge; offset += sizeof(struct ethhdr); - /* reuse skb for the last subframe */ last = remaining <= subframe_len + padding; + + /* FIXME: should we really accept multicast DA? */ + if ((check_da && !is_multicast_ether_addr(eth.h_dest) && + !ether_addr_equal(check_da, eth.h_dest)) || + (check_sa && !ether_addr_equal(check_sa, eth.h_source))) { + offset += len + padding; + continue; + } + + /* reuse skb for the last subframe */ if (!skb_is_nonlinear(skb) && !reuse_frag && last) { skb_pull(skb, offset); frame = skb; -- cgit v1.2.3 From e227300c8395dffaa7614ce7c7666a82180ebc60 Mon Sep 17 00:00:00 2001 From: Purushottam Kushwaha Date: Wed, 12 Oct 2016 18:25:35 +0530 Subject: cfg80211: pass struct to interface combination check/iter Move the growing parameter list to a structure for the interface combination check and iteration functions in cfg80211 and mac80211 to make the code easier to understand. Signed-off-by: Purushottam Kushwaha [edit commit message] Signed-off-by: Johannes Berg --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 22 ++++++----- include/net/cfg80211.h | 46 +++++++++++----------- net/mac80211/util.c | 44 ++++++++++----------- net/wireless/util.c | 28 ++++++------- 4 files changed, 68 insertions(+), 72 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index b777e1b2f87a..2295336355df 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -414,23 +414,24 @@ static int brcmf_vif_change_validate(struct brcmf_cfg80211_info *cfg, struct brcmf_cfg80211_vif *vif, enum nl80211_iftype new_type) { - int iftype_num[NUM_NL80211_IFTYPES]; struct brcmf_cfg80211_vif *pos; bool check_combos = false; int ret = 0; + struct iface_combination_params params = { + .num_different_channels = 1, + }; - memset(&iftype_num[0], 0, sizeof(iftype_num)); list_for_each_entry(pos, &cfg->vif_list, list) if (pos == vif) { - iftype_num[new_type]++; + params.iftype_num[new_type]++; } else { /* concurrent interfaces so need check combinations */ check_combos = true; - iftype_num[pos->wdev.iftype]++; + params.iftype_num[pos->wdev.iftype]++; } if (check_combos) - ret = cfg80211_check_combinations(cfg->wiphy, 1, 0, iftype_num); + ret = cfg80211_check_combinations(cfg->wiphy, ¶ms); return ret; } @@ -438,15 +439,16 @@ static int brcmf_vif_change_validate(struct brcmf_cfg80211_info *cfg, static int brcmf_vif_add_validate(struct brcmf_cfg80211_info *cfg, enum nl80211_iftype new_type) { - int iftype_num[NUM_NL80211_IFTYPES]; struct brcmf_cfg80211_vif *pos; + struct iface_combination_params params = { + .num_different_channels = 1, + }; - memset(&iftype_num[0], 0, sizeof(iftype_num)); list_for_each_entry(pos, &cfg->vif_list, list) - iftype_num[pos->wdev.iftype]++; + params.iftype_num[pos->wdev.iftype]++; - iftype_num[new_type]++; - return cfg80211_check_combinations(cfg->wiphy, 1, 0, iftype_num); + params.iftype_num[new_type]++; + return cfg80211_check_combinations(cfg->wiphy, ¶ms); } static void convert_key_from_CPU(struct brcmf_wsec_key *key, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fe78f02a242e..ea108541e1e0 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -771,6 +771,26 @@ struct cfg80211_csa_settings { u8 count; }; +/** + * struct iface_combination_params - input parameters for interface combinations + * + * Used to pass interface combination parameters + * + * @num_different_channels: the number of different channels we want + * to use for verification + * @radar_detect: a bitmap where each bit corresponds to a channel + * width where radar detection is needed, as in the definition of + * &struct ieee80211_iface_combination.@radar_detect_widths + * @iftype_num: array with the number of interfaces of each interface + * type. The index is the interface type as specified in &enum + * nl80211_iftype. + */ +struct iface_combination_params { + int num_different_channels; + u8 radar_detect; + int iftype_num[NUM_NL80211_IFTYPES]; +}; + /** * enum station_parameters_apply_mask - station parameter values to apply * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp) @@ -5575,36 +5595,20 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy); * cfg80211_check_combinations - check interface combinations * * @wiphy: the wiphy - * @num_different_channels: the number of different channels we want - * to use for verification - * @radar_detect: a bitmap where each bit corresponds to a channel - * width where radar detection is needed, as in the definition of - * &struct ieee80211_iface_combination.@radar_detect_widths - * @iftype_num: array with the numbers of interfaces of each interface - * type. The index is the interface type as specified in &enum - * nl80211_iftype. + * @params: the interface combinations parameter * * This function can be called by the driver to check whether a * combination of interfaces and their types are allowed according to * the interface combinations. */ int cfg80211_check_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES]); + struct iface_combination_params *params); /** * cfg80211_iter_combinations - iterate over matching combinations * * @wiphy: the wiphy - * @num_different_channels: the number of different channels we want - * to use for verification - * @radar_detect: a bitmap where each bit corresponds to a channel - * width where radar detection is needed, as in the definition of - * &struct ieee80211_iface_combination.@radar_detect_widths - * @iftype_num: array with the numbers of interfaces of each interface - * type. The index is the interface type as specified in &enum - * nl80211_iftype. + * @params: the interface combinations parameter * @iter: function to call for each matching combination * @data: pointer to pass to iter function * @@ -5613,9 +5617,7 @@ int cfg80211_check_combinations(struct wiphy *wiphy, * purposes. */ int cfg80211_iter_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES], + struct iface_combination_params *params, void (*iter)(const struct ieee80211_iface_combination *c, void *data), void *data); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 545c79a42a77..031273a61d27 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3308,10 +3308,11 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *sdata_iter; enum nl80211_iftype iftype = sdata->wdev.iftype; - int num[NUM_NL80211_IFTYPES]; struct ieee80211_chanctx *ctx; - int num_different_channels = 0; int total = 1; + struct iface_combination_params params = { + .radar_detect = radar_detect, + }; lockdep_assert_held(&local->chanctx_mtx); @@ -3322,9 +3323,6 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, !chandef->chan)) return -EINVAL; - if (chandef) - num_different_channels = 1; - if (WARN_ON(iftype >= NUM_NL80211_IFTYPES)) return -EINVAL; @@ -3335,24 +3333,26 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, return 0; } - memset(num, 0, sizeof(num)); + if (chandef) + params.num_different_channels = 1; if (iftype != NL80211_IFTYPE_UNSPECIFIED) - num[iftype] = 1; + params.iftype_num[iftype] = 1; list_for_each_entry(ctx, &local->chanctx_list, list) { if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) continue; - radar_detect |= ieee80211_chanctx_radar_detect(local, ctx); + params.radar_detect |= + ieee80211_chanctx_radar_detect(local, ctx); if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) { - num_different_channels++; + params.num_different_channels++; continue; } if (chandef && chanmode == IEEE80211_CHANCTX_SHARED && cfg80211_chandef_compatible(chandef, &ctx->conf.def)) continue; - num_different_channels++; + params.num_different_channels++; } list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) { @@ -3365,16 +3365,14 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) continue; - num[wdev_iter->iftype]++; + params.iftype_num[wdev_iter->iftype]++; total++; } - if (total == 1 && !radar_detect) + if (total == 1 && !params.radar_detect) return 0; - return cfg80211_check_combinations(local->hw.wiphy, - num_different_channels, - radar_detect, num); + return cfg80211_check_combinations(local->hw.wiphy, ¶ms); } static void @@ -3390,12 +3388,10 @@ ieee80211_iter_max_chans(const struct ieee80211_iface_combination *c, int ieee80211_max_num_channels(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - int num[NUM_NL80211_IFTYPES] = {}; struct ieee80211_chanctx *ctx; - int num_different_channels = 0; - u8 radar_detect = 0; u32 max_num_different_channels = 1; int err; + struct iface_combination_params params = {0}; lockdep_assert_held(&local->chanctx_mtx); @@ -3403,17 +3399,17 @@ int ieee80211_max_num_channels(struct ieee80211_local *local) if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) continue; - num_different_channels++; + params.num_different_channels++; - radar_detect |= ieee80211_chanctx_radar_detect(local, ctx); + params.radar_detect |= + ieee80211_chanctx_radar_detect(local, ctx); } list_for_each_entry_rcu(sdata, &local->interfaces, list) - num[sdata->wdev.iftype]++; + params.iftype_num[sdata->wdev.iftype]++; - err = cfg80211_iter_combinations(local->hw.wiphy, - num_different_channels, radar_detect, - num, ieee80211_iter_max_chans, + err = cfg80211_iter_combinations(local->hw.wiphy, ¶ms, + ieee80211_iter_max_chans, &max_num_different_channels); if (err < 0) return err; diff --git a/net/wireless/util.c b/net/wireless/util.c index 8edce22d1b93..0d69b257793d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1580,9 +1580,7 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, } int cfg80211_iter_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES], + struct iface_combination_params *params, void (*iter)(const struct ieee80211_iface_combination *c, void *data), void *data) @@ -1593,7 +1591,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, int num_interfaces = 0; u32 used_iftypes = 0; - if (radar_detect) { + if (params->radar_detect) { rcu_read_lock(); regdom = rcu_dereference(cfg80211_regdomain); if (regdom) @@ -1602,8 +1600,8 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, } for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) { - num_interfaces += iftype_num[iftype]; - if (iftype_num[iftype] > 0 && + num_interfaces += params->iftype_num[iftype]; + if (params->iftype_num[iftype] > 0 && !(wiphy->software_iftypes & BIT(iftype))) used_iftypes |= BIT(iftype); } @@ -1617,7 +1615,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, if (num_interfaces > c->max_interfaces) continue; - if (num_different_channels > c->num_different_channels) + if (params->num_different_channels > c->num_different_channels) continue; limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits, @@ -1632,16 +1630,17 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, all_iftypes |= limits[j].types; if (!(limits[j].types & BIT(iftype))) continue; - if (limits[j].max < iftype_num[iftype]) + if (limits[j].max < params->iftype_num[iftype]) goto cont; - limits[j].max -= iftype_num[iftype]; + limits[j].max -= params->iftype_num[iftype]; } } - if (radar_detect != (c->radar_detect_widths & radar_detect)) + if (params->radar_detect != + (c->radar_detect_widths & params->radar_detect)) goto cont; - if (radar_detect && c->radar_detect_regions && + if (params->radar_detect && c->radar_detect_regions && !(c->radar_detect_regions & BIT(region))) goto cont; @@ -1675,14 +1674,11 @@ cfg80211_iter_sum_ifcombs(const struct ieee80211_iface_combination *c, } int cfg80211_check_combinations(struct wiphy *wiphy, - const int num_different_channels, - const u8 radar_detect, - const int iftype_num[NUM_NL80211_IFTYPES]) + struct iface_combination_params *params) { int err, num = 0; - err = cfg80211_iter_combinations(wiphy, num_different_channels, - radar_detect, iftype_num, + err = cfg80211_iter_combinations(wiphy, params, cfg80211_iter_sum_ifcombs, &num); if (err) return err; -- cgit v1.2.3 From 0c317a02ca982ca093e71bf07cb562265ba40032 Mon Sep 17 00:00:00 2001 From: Purushottam Kushwaha Date: Wed, 12 Oct 2016 18:26:51 +0530 Subject: cfg80211: support virtual interfaces with different beacon intervals This commit provides a mechanism for the host drivers to advertise the support for different beacon intervals among the respective interface combinations in a group, through NL80211_IFACE_COMB_BI_MIN_GCD (u32). This value will be compared against GCD of all beaconing interfaces of matching combinations. If the driver doesn't advertise this value, the old behaviour where all beacon intervals must be identical is retained. If it is specified, then any beacon interval for an interface in the interface combination as well as the GCD of all active beacon intervals in the combination must be greater or equal to this value. Signed-off-by: Purushottam Kushwaha [change commit message, some variable names, small other things] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 15 +++++++++++++++ include/uapi/linux/nl80211.h | 8 ++++++-- net/wireless/core.h | 2 +- net/wireless/nl80211.c | 14 +++++++++++--- net/wireless/util.c | 43 +++++++++++++++++++++++++++++++++++++------ 5 files changed, 70 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ea108541e1e0..5000ec758eb3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -784,11 +784,19 @@ struct cfg80211_csa_settings { * @iftype_num: array with the number of interfaces of each interface * type. The index is the interface type as specified in &enum * nl80211_iftype. + * @beacon_int_gcd: a value specifying GCD of all beaconing interfaces, + * the GCD of a single value is considered the value itself, so for + * a single interface this should be set to that interface's beacon + * interval + * @beacon_int_different: a flag indicating whether or not all beacon + * intervals (of beaconing interfaces) are different or not. */ struct iface_combination_params { int num_different_channels; u8 radar_detect; int iftype_num[NUM_NL80211_IFTYPES]; + u32 beacon_int_gcd; + bool beacon_int_different; }; /** @@ -3100,6 +3108,12 @@ struct ieee80211_iface_limit { * only in special cases. * @radar_detect_widths: bitmap of channel widths supported for radar detection * @radar_detect_regions: bitmap of regions supported for radar detection + * @beacon_int_min_gcd: This interface combination supports different + * beacon intervals. + * = 0 - all beacon intervals for different interface must be same. + * > 0 - any beacon interval for the interface part of this combination AND + * *GCD* of all beacon intervals from beaconing interfaces of this + * combination must be greater or equal to this value. * * With this structure the driver can describe which interface * combinations it supports concurrently. @@ -3158,6 +3172,7 @@ struct ieee80211_iface_combination { bool beacon_int_infra_match; u8 radar_detect_widths; u8 radar_detect_regions; + u32 beacon_int_min_gcd; }; struct ieee80211_txrx_stypes { diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 56368e9b4622..1362d24957b5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4280,6 +4280,9 @@ enum nl80211_iface_limit_attrs { * of supported channel widths for radar detection. * @NL80211_IFACE_COMB_RADAR_DETECT_REGIONS: u32 attribute containing the bitmap * of supported regulatory regions for radar detection. + * @NL80211_IFACE_COMB_BI_MIN_GCD: u32 attribute specifying the minimum GCD of + * different beacon intervals supported by all the interface combinations + * in this group (if not present, all beacon intervals be identical). * @NUM_NL80211_IFACE_COMB: number of attributes * @MAX_NL80211_IFACE_COMB: highest attribute number * @@ -4287,8 +4290,8 @@ enum nl80211_iface_limit_attrs { * limits = [ #{STA} <= 1, #{AP} <= 1 ], matching BI, channels = 1, max = 2 * => allows an AP and a STA that must match BIs * - * numbers = [ #{AP, P2P-GO} <= 8 ], channels = 1, max = 8 - * => allows 8 of AP/GO + * numbers = [ #{AP, P2P-GO} <= 8 ], BI min gcd, channels = 1, max = 8, + * => allows 8 of AP/GO that can have BI gcd >= min gcd * * numbers = [ #{STA} <= 2 ], channels = 2, max = 2 * => allows two STAs on different channels @@ -4314,6 +4317,7 @@ enum nl80211_if_combination_attrs { NL80211_IFACE_COMB_NUM_CHANNELS, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, + NL80211_IFACE_COMB_BI_MIN_GCD, /* keep last */ NUM_NL80211_IFACE_COMB, diff --git a/net/wireless/core.h b/net/wireless/core.h index 08d2e948c9ad..21e31888cfa9 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -475,7 +475,7 @@ int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, u32 *mask); int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, - u32 beacon_int); + enum nl80211_iftype iftype, u32 beacon_int); void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c510810f0b7c..903cd5a5d1ce 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1075,6 +1075,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, c->radar_detect_regions))) goto nla_put_failure; + if (c->beacon_int_min_gcd && + nla_put_u32(msg, NL80211_IFACE_COMB_BI_MIN_GCD, + c->beacon_int_min_gcd)) + goto nla_put_failure; nla_nest_end(msg, nl_combi); } @@ -3803,7 +3807,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params.dtim_period = nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); - err = cfg80211_validate_beacon_int(rdev, params.beacon_interval); + err = cfg80211_validate_beacon_int(rdev, dev->ieee80211_ptr->iftype, + params.beacon_interval); if (err) return err; @@ -8152,7 +8157,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) ibss.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval); + err = cfg80211_validate_beacon_int(rdev, NL80211_IFTYPE_ADHOC, + ibss.beacon_interval); if (err) return err; @@ -9417,7 +9423,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) setup.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval); + err = cfg80211_validate_beacon_int(rdev, + NL80211_IFTYPE_MESH_POINT, + setup.beacon_interval); if (err) return err; } diff --git a/net/wireless/util.c b/net/wireless/util.c index 0d69b257793d..d2ea1f152d17 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1559,24 +1559,46 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, EXPORT_SYMBOL(ieee80211_chandef_to_operating_class); int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, - u32 beacon_int) + enum nl80211_iftype iftype, u32 beacon_int) { struct wireless_dev *wdev; - int res = 0; + struct iface_combination_params params = { + .beacon_int_gcd = beacon_int, /* GCD(n) = n */ + }; if (beacon_int < 10 || beacon_int > 10000) return -EINVAL; + params.iftype_num[iftype] = 1; list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->beacon_interval) continue; - if (wdev->beacon_interval != beacon_int) { - res = -EINVAL; - break; + + params.iftype_num[wdev->iftype]++; + } + + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + u32 bi_prev = wdev->beacon_interval; + + if (!wdev->beacon_interval) + continue; + + /* slight optimisation - skip identical BIs */ + if (wdev->beacon_interval == beacon_int) + continue; + + params.beacon_int_different = true; + + /* Get the GCD */ + while (bi_prev != 0) { + u32 tmp_bi = bi_prev; + + bi_prev = params.beacon_int_gcd % bi_prev; + params.beacon_int_gcd = tmp_bi; } } - return res; + return cfg80211_check_combinations(&rdev->wiphy, ¶ms); } int cfg80211_iter_combinations(struct wiphy *wiphy, @@ -1652,6 +1674,15 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, if ((all_iftypes & used_iftypes) != used_iftypes) goto cont; + if (params->beacon_int_gcd) { + if (c->beacon_int_min_gcd && + params->beacon_int_gcd < c->beacon_int_min_gcd) + return -EINVAL; + if (!c->beacon_int_min_gcd && + params->beacon_int_different) + goto cont; + } + /* This combination covered all interface types and * supported the requested numbers, so we're good. */ -- cgit v1.2.3 From cc6ac9bccf6b9814d37932e86a92f8e6a92960dc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 8 Oct 2016 11:36:05 +0800 Subject: sctp: reuse sent_count to avoid retransmitted chunks for RTT measurements Now sctp uses chunk->resent to record if a chunk is retransmitted, for RTT measurements with retransmitted DATA chunks. chunk->sent_count was introduced to record how many times one chunk has been sent for prsctp RTX policy before. We actually can know if one chunk is retransmitted by checking chunk->sent_count is greater than 1. This patch is to remove resent from sctp_chunk and reuse sent_count to avoid retransmitted chunks for RTT measurements. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 +- net/sctp/output.c | 3 ++- net/sctp/outqueue.c | 4 +--- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 11c3bf262a85..27a933e4c90e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -641,7 +641,6 @@ struct sctp_chunk { #define SCTP_NEED_FRTX 0x1 #define SCTP_DONT_FRTX 0x2 __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ - resent:1, /* Has this chunk ever been resent. */ has_tsn:1, /* Does this chunk have a TSN yet? */ has_ssn:1, /* Does this chunk have a SSN yet? */ singleton:1, /* Only chunk in the packet? */ @@ -656,6 +655,7 @@ struct sctp_chunk { fast_retransmit:2; /* Is this chunk fast retransmitted? */ }; +#define sctp_chunk_retransmitted(chunk) (chunk->sent_count > 1) void sctp_chunk_hold(struct sctp_chunk *); void sctp_chunk_put(struct sctp_chunk *); int sctp_user_addto_chunk(struct sctp_chunk *chunk, int len, diff --git a/net/sctp/output.c b/net/sctp/output.c index 2a5c1896d18f..84f66d560b02 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -552,7 +552,8 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * for a given destination transport address. */ - if (!chunk->resent && !tp->rto_pending) { + if (!sctp_chunk_retransmitted(chunk) && + !tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 582585393d35..e54082699520 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -507,8 +507,6 @@ void sctp_retransmit_mark(struct sctp_outq *q, transport->rto_pending = 0; } - chunk->resent = 1; - /* Move the chunk to the retransmit queue. The chunks * on the retransmit queue are always kept in order. */ @@ -1439,7 +1437,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * instance). */ if (!tchunk->tsn_gap_acked && - !tchunk->resent && + !sctp_chunk_retransmitted(tchunk) && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; -- cgit v1.2.3 From 8ae808eb853e3789b81b8a502cdf22bb01b76880 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 8 Oct 2016 11:40:16 +0800 Subject: sctp: remove the old ttl expires policy The prsctp polices include ttl expires policy already, we should remove the old ttl expires codes, and just adjust the new polices' codes to be compatible with the old one for users. This patch is to remove all the old expires codes, and if prsctp polices are not set, it will still set msg's expires_at and check the expires in sctp_check_abandoned. Note that asoc->prsctp_enable is set by default, so users can't feel any difference even if they use the old expires api in userspace. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 - net/sctp/chunk.c | 32 ++++++++------------------------ net/sctp/output.c | 3 --- 3 files changed, 8 insertions(+), 28 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 27a933e4c90e..bd4a3ded7c87 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -530,7 +530,6 @@ struct sctp_datamsg { /* Did the messenge fail to send? */ int send_error; u8 send_failed:1, - can_abandon:1, /* can chunks from this message can be abandoned. */ can_delay; /* should this message be Nagle delayed */ }; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 7a1cdf43e49d..615f0ddd41df 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -52,7 +52,6 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) atomic_set(&msg->refcnt, 1); msg->send_failed = 0; msg->send_error = 0; - msg->can_abandon = 0; msg->can_delay = 1; msg->expires_at = 0; INIT_LIST_HEAD(&msg->chunks); @@ -182,20 +181,11 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, /* Note: Calculate this outside of the loop, so that all fragments * have the same expiration. */ - if (sinfo->sinfo_timetolive) { - /* sinfo_timetolive is in milliseconds */ + if (asoc->peer.prsctp_capable && sinfo->sinfo_timetolive && + (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags) || + !SCTP_PR_POLICY(sinfo->sinfo_flags))) msg->expires_at = jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); - msg->can_abandon = 1; - - pr_debug("%s: msg:%p expires_at:%ld jiffies:%ld\n", __func__, - msg, msg->expires_at, jiffies); - } - - if (asoc->peer.prsctp_capable && - SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags)) - msg->expires_at = - jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); /* This is the biggest possible DATA chunk that can fit into * the packet @@ -354,18 +344,8 @@ errout: /* Check whether this message has expired. */ int sctp_chunk_abandoned(struct sctp_chunk *chunk) { - if (!chunk->asoc->peer.prsctp_capable || - !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) { - struct sctp_datamsg *msg = chunk->msg; - - if (!msg->can_abandon) - return 0; - - if (time_after(jiffies, msg->expires_at)) - return 1; - + if (!chunk->asoc->peer.prsctp_capable) return 0; - } if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { @@ -378,6 +358,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) chunk->sent_count > chunk->sinfo.sinfo_timetolive) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; + } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && + chunk->msg->expires_at && + time_after(jiffies, chunk->msg->expires_at)) { + return 1; } /* PRIO policy is processed by sendmsg, not here */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 84f66d560b02..4282b488985b 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -866,9 +866,6 @@ static void sctp_packet_append_data(struct sctp_packet *packet, rwnd = 0; asoc->peer.rwnd = rwnd; - /* Has been accepted for transmission. */ - if (!asoc->peer.prsctp_capable) - chunk->msg->can_abandon = 0; sctp_chunk_assign_tsn(chunk); sctp_chunk_assign_ssn(chunk); } -- cgit v1.2.3 From 6104e112f4a613506ba1ea9d4b974279f888006b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 12 Oct 2016 13:20:11 -0700 Subject: net: ipv4: Do not drop to make_route if oif is l3mdev Commit e0d56fdd7342 was a bit aggressive removing l3mdev calls in the IPv4 stack. If the fib_lookup fails we do not want to drop to make_route if the oif is an l3mdev device. Also reverts 19664c6a0009 ("net: l3mdev: Remove netif_index_is_l3_master") which removed netif_index_is_l3_master. Fixes: e0d56fdd7342 ("net: l3mdev: remove redundant calls") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 24 ++++++++++++++++++++++++ net/ipv4/route.c | 3 ++- 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index b220dabeab45..3832099289c5 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -114,6 +114,25 @@ static inline u32 l3mdev_fib_table(const struct net_device *dev) return tb_id; } +static inline bool netif_index_is_l3_master(struct net *net, int ifindex) +{ + struct net_device *dev; + bool rc = false; + + if (ifindex == 0) + return false; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev) + rc = netif_is_l3_master(dev); + + rcu_read_unlock(); + + return rc; +} + struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); static inline @@ -207,6 +226,11 @@ static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) return 0; } +static inline bool netif_index_is_l3_master(struct net *net, int ifindex) +{ + return false; +} + static inline struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f2be689a6c85..62d4d90c1389 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2265,7 +2265,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, if (err) { res.fi = NULL; res.table = NULL; - if (fl4->flowi4_oif) { + if (fl4->flowi4_oif && + !netif_index_is_l3_master(net, fl4->flowi4_oif)) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. -- cgit v1.2.3 From 165779231ff9e9c4ac7baaee84eff91d589f3e22 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Thu, 13 Oct 2016 09:06:41 +0300 Subject: net/sched: act_mirred: Rename tcfm_ok_push to tcfm_mac_header_xmit and make it a bool 'tcfm_ok_push' specifies whether a mac_len sized push is needed upon egress to the target device (if action is performed at ingress). Rename it to 'tcfm_mac_header_xmit' as this is actually an attribute of the target device (and use a bool instead of int). This allows to decouple the attribute from the action to be taken. Signed-off-by: Shmulik Ladkani Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_mirred.h | 2 +- net/sched/act_mirred.c | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 62770add15bd..95431092c4b6 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -8,7 +8,7 @@ struct tcf_mirred { struct tc_action common; int tcfm_eaction; int tcfm_ifindex; - int tcfm_ok_push; + bool tcfm_mac_header_xmit; struct net_device __rcu *tcfm_dev; struct list_head tcfm_list; }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 667dc382df82..16e17a887bd6 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -60,11 +60,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, mirred_net_id); struct nlattr *tb[TCA_MIRRED_MAX + 1]; + bool mac_header_xmit = false; struct tc_mirred *parm; struct tcf_mirred *m; struct net_device *dev; - int ret, ok_push = 0; bool exists = false; + int ret; if (nla == NULL) return -EINVAL; @@ -102,10 +103,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, case ARPHRD_IPGRE: case ARPHRD_VOID: case ARPHRD_NONE: - ok_push = 0; + mac_header_xmit = false; break; default: - ok_push = 1; + mac_header_xmit = true; break; } } else { @@ -136,7 +137,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); dev_hold(dev); rcu_assign_pointer(m->tcfm_dev, dev); - m->tcfm_ok_push = ok_push; + m->tcfm_mac_header_xmit = mac_header_xmit; } if (ret == ACT_P_CREATED) { @@ -181,7 +182,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, goto out; if (!(at & AT_EGRESS)) { - if (m->tcfm_ok_push) + if (m->tcfm_mac_header_xmit) skb_push_rcsum(skb2, skb->mac_len); } -- cgit v1.2.3 From 5724b8b5694794829a071c6da7dd0bc146df0756 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Thu, 13 Oct 2016 09:06:43 +0300 Subject: net/sched: tc_mirred: Rename public predicates 'is_tcf_mirred_redirect' and 'is_tcf_mirred_mirror' These accessors are used in various drivers that support tc offloading, to detect properties of a given 'tc_action'. 'is_tcf_mirred_redirect' tests that the action is TCA_EGRESS_REDIR. 'is_tcf_mirred_mirror' tests that the action is TCA_EGRESS_MIRROR. As a prep towards supporting INGRESS redir/mirror, rename these predicates to reflect their true meaning: s/is_tcf_mirred_redirect/is_tcf_mirred_egress_redirect/ s/is_tcf_mirred_mirror/is_tcf_mirred_egress_mirror/ Signed-off-by: Shmulik Ladkani Cc: Hariprasad S Cc: Jeff Kirsher Cc: Saeed Mahameed Cc: Jiri Pirko Cc: Ido Schimmel Cc: Jakub Kicinski Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 +++- drivers/net/ethernet/netronome/nfp/nfp_net_offload.c | 2 +- include/net/tc_act/tc_mirred.h | 4 ++-- 6 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 49d2debb334e..52af62e0ecb6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -113,7 +113,7 @@ static int fill_action_fields(struct adapter *adap, } /* Re-direct to specified port in hardware. */ - if (is_tcf_mirred_redirect(a)) { + if (is_tcf_mirred_egress_redirect(a)) { struct net_device *n_dev; unsigned int i, index; bool found = false; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index a244d9a67264..784b0b98ab2f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8410,7 +8410,7 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter, } /* Redirect to a VF or a offloaded macvlan */ - if (is_tcf_mirred_redirect(a)) { + if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); err = handle_redirect_action(adapter, ifindex, queue, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index ce8c54d18906..135a95bcc392 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -404,7 +404,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } - if (is_tcf_mirred_redirect(a)) { + if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; struct mlx5e_priv *out_priv; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1ec0a4ce3c46..43a5eddc2c11 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1237,8 +1237,10 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, tcf_exts_to_list(cls->exts, &actions); list_for_each_entry(a, &actions, list) { - if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL)) + if (!is_tcf_mirred_egress_mirror(a) || + protocol != htons(ETH_P_ALL)) { return -ENOTSUPP; + } err = mlxsw_sp_port_add_cls_matchall_mirror(mlxsw_sp_port, cls, a, ingress); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c index 8acfb631a0ea..cfed40c0e310 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -128,7 +128,7 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) if (is_tcf_gact_shot(a)) return NN_ACT_TC_DROP; - if (is_tcf_mirred_redirect(a) && + if (is_tcf_mirred_egress_redirect(a) && tcf_mirred_ifindex(a) == nn->netdev->ifindex) return NN_ACT_TC_REDIR; } diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index 95431092c4b6..604bc31e23ab 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -14,7 +14,7 @@ struct tcf_mirred { }; #define to_mirred(a) ((struct tcf_mirred *)a) -static inline bool is_tcf_mirred_redirect(const struct tc_action *a) +static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT if (a->ops && a->ops->type == TCA_ACT_MIRRED) @@ -23,7 +23,7 @@ static inline bool is_tcf_mirred_redirect(const struct tc_action *a) return false; } -static inline bool is_tcf_mirred_mirror(const struct tc_action *a) +static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a) { #ifdef CONFIG_NET_CLS_ACT if (a->ops && a->ops->type == TCA_ACT_MIRRED) -- cgit v1.2.3 From 9d6280da39c04832d6abf5f4ecc8175c9aad91c0 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 13 Oct 2016 18:50:02 +0200 Subject: IPv6: Drop the temporary address regen_timer The randomized interface identifier (rndid) was periodically updated from the regen_timer timer. Simplify the code by updating the rndid only when needed by ipv6_try_regen_rndid(). This makes the follow-up DESYNC_FACTOR fix much simpler. Also it fixes a reference counting error in this error path, where an in6_dev_put was missing: err = addrconf_sysctl_register(ndev); if (err) { ipv6_mc_destroy_dev(ndev); - del_timer(&ndev->regen_timer); snmp6_unregister_dev(ndev); goto err_release; Signed-off-by: Jiri Bohac Signed-off-by: David S. Miller --- include/net/if_inet6.h | 1 - net/ipv6/addrconf.c | 61 ++++++++------------------------------------------ 2 files changed, 9 insertions(+), 53 deletions(-) (limited to 'include/net') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 515352c6280a..ae707352f463 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -191,7 +191,6 @@ struct inet6_dev { int dead; u8 rndid[8]; - struct timer_list regen_timer; struct list_head tempaddr_list; struct in6_addr token; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9faafe58516a..58c3d73403a5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -147,9 +147,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) } #endif -static void __ipv6_regen_rndid(struct inet6_dev *idev); -static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); -static void ipv6_regen_rndid(unsigned long data); +static void ipv6_regen_rndid(struct inet6_dev *idev); +static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); @@ -409,9 +408,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) goto err_release; } - /* One reference from device. We must do this before - * we invoke __ipv6_regen_rndid(). - */ + /* One reference from device. */ in6_dev_hold(ndev); if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) @@ -425,17 +422,14 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) #endif INIT_LIST_HEAD(&ndev->tempaddr_list); - setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || dev->type == ARPHRD_TUNNEL6 || dev->type == ARPHRD_SIT || dev->type == ARPHRD_NONE) { ndev->cnf.use_tempaddr = -1; - } else { - in6_dev_hold(ndev); - ipv6_regen_rndid((unsigned long) ndev); - } + } else + ipv6_regen_rndid(ndev); ndev->token = in6addr_any; @@ -447,7 +441,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) err = addrconf_sysctl_register(ndev); if (err) { ipv6_mc_destroy_dev(ndev); - del_timer(&ndev->regen_timer); snmp6_unregister_dev(ndev); goto err_release; } @@ -1222,7 +1215,7 @@ retry: } in6_ifa_hold(ifp); memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); - __ipv6_try_regen_rndid(idev, tmpaddr); + ipv6_try_regen_rndid(idev, tmpaddr); memcpy(&addr.s6_addr[8], idev->rndid, 8); age = (now - ifp->tstamp) / HZ; tmp_valid_lft = min_t(__u32, @@ -2150,7 +2143,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) } /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ -static void __ipv6_regen_rndid(struct inet6_dev *idev) +static void ipv6_regen_rndid(struct inet6_dev *idev) { regen: get_random_bytes(idev->rndid, sizeof(idev->rndid)); @@ -2179,43 +2172,10 @@ regen: } } -static void ipv6_regen_rndid(unsigned long data) -{ - struct inet6_dev *idev = (struct inet6_dev *) data; - unsigned long expires; - - rcu_read_lock_bh(); - write_lock_bh(&idev->lock); - - if (idev->dead) - goto out; - - __ipv6_regen_rndid(idev); - - expires = jiffies + - idev->cnf.temp_prefered_lft * HZ - - idev->cnf.regen_max_retry * idev->cnf.dad_transmits * - NEIGH_VAR(idev->nd_parms, RETRANS_TIME) - - idev->cnf.max_desync_factor * HZ; - if (time_before(expires, jiffies)) { - pr_warn("%s: too short regeneration interval; timer disabled for %s\n", - __func__, idev->dev->name); - goto out; - } - - if (!mod_timer(&idev->regen_timer, expires)) - in6_dev_hold(idev); - -out: - write_unlock_bh(&idev->lock); - rcu_read_unlock_bh(); - in6_dev_put(idev); -} - -static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) +static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) { if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) - __ipv6_regen_rndid(idev); + ipv6_regen_rndid(idev); } /* @@ -3594,9 +3554,6 @@ restart: if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); - if (how && del_timer(&idev->regen_timer)) - in6_dev_put(idev); - /* Step 3: clear tempaddr list */ while (!list_empty(&idev->tempaddr_list)) { ifa = list_first_entry(&idev->tempaddr_list, -- cgit v1.2.3 From 76506a986dc31394fd1f2741db037d29c7e57843 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 13 Oct 2016 18:52:15 +0200 Subject: IPv6: fix DESYNC_FACTOR The IPv6 temporary address generation uses a variable called DESYNC_FACTOR to prevent hosts updating the addresses at the same time. Quoting RFC 4941: ... The value DESYNC_FACTOR is a random value (different for each client) that ensures that clients don't synchronize with each other and generate new addresses at exactly the same time ... DESYNC_FACTOR is defined as: DESYNC_FACTOR -- A random value within the range 0 - MAX_DESYNC_FACTOR. It is computed once at system start (rather than each time it is used) and must never be greater than (TEMP_VALID_LIFETIME - REGEN_ADVANCE). First, I believe the RFC has a typo in it and meant to say: "and must never be greater than (TEMP_PREFERRED_LIFETIME - REGEN_ADVANCE)" The reason is that at various places in the RFC, DESYNC_FACTOR is used in a calculation like (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR) or (TEMP_PREFERRED_LIFETIME - REGEN_ADVANCE - DESYNC_FACTOR). It needs to be smaller than (TEMP_PREFERRED_LIFETIME - REGEN_ADVANCE) for the result of these calculations to be larger than zero. It's never used in a calculation together with TEMP_VALID_LIFETIME. I already submitted an errata to the rfc-editor: https://www.rfc-editor.org/errata_search.php?rfc=4941 The Linux implementation of DESYNC_FACTOR is very wrong: max_desync_factor is used in places DESYNC_FACTOR should be used. max_desync_factor is initialized to the RFC-recommended value for MAX_DESYNC_FACTOR (600) but the whole point is to get a _random_ value. And nothing ensures that the value used is not greater than (TEMP_PREFERRED_LIFETIME - REGEN_ADVANCE), which leads to underflows. The effect can easily be observed when setting the temp_prefered_lft sysctl e.g. to 60. The preferred lifetime of the temporary addresses will be bogus. TEMP_PREFERRED_LIFETIME and REGEN_ADVANCE are not constants and can be influenced by these three sysctls: regen_max_retry, dad_transmits and temp_prefered_lft. Thus, the upper bound for desync_factor needs to be re-calculated each time a new address is generated and if desync_factor is larger than the new upper bound, a new random value needs to be re-generated. And since we already have max_desync_factor configurable per interface, we also need to calculate and store desync_factor per interface. Signed-off-by: Jiri Bohac Signed-off-by: David S. Miller --- include/net/if_inet6.h | 1 + net/ipv6/addrconf.c | 39 +++++++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index ae707352f463..b0576cb2ab25 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -190,6 +190,7 @@ struct inet6_dev { __u32 if_flags; int dead; + u32 desync_factor; u8 rndid[8]; struct list_head tempaddr_list; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 58c3d73403a5..cc7c26d05f45 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -422,6 +422,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) #endif INIT_LIST_HEAD(&ndev->tempaddr_list); + ndev->desync_factor = U32_MAX; if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || dev->type == ARPHRD_TUNNEL6 || @@ -1183,6 +1184,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i int ret = 0; u32 addr_flags; unsigned long now = jiffies; + long max_desync_factor; write_lock_bh(&idev->lock); if (ift) { @@ -1218,20 +1220,41 @@ retry: ipv6_try_regen_rndid(idev, tmpaddr); memcpy(&addr.s6_addr[8], idev->rndid, 8); age = (now - ifp->tstamp) / HZ; + + regen_advance = idev->cnf.regen_max_retry * + idev->cnf.dad_transmits * + NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; + + /* recalculate max_desync_factor each time and update + * idev->desync_factor if it's larger + */ + max_desync_factor = min_t(__u32, + idev->cnf.max_desync_factor, + idev->cnf.temp_prefered_lft - regen_advance); + + if (unlikely(idev->desync_factor > max_desync_factor)) { + if (max_desync_factor > 0) { + get_random_bytes(&idev->desync_factor, + sizeof(idev->desync_factor)); + idev->desync_factor %= max_desync_factor; + } else { + idev->desync_factor = 0; + } + } + tmp_valid_lft = min_t(__u32, ifp->valid_lft, idev->cnf.temp_valid_lft + age); - tmp_prefered_lft = min_t(__u32, - ifp->prefered_lft, - idev->cnf.temp_prefered_lft + age - - idev->cnf.max_desync_factor); + tmp_prefered_lft = idev->cnf.temp_prefered_lft + age - + idev->desync_factor; + /* guard against underflow in case of concurrent updates to cnf */ + if (unlikely(tmp_prefered_lft < 0)) + tmp_prefered_lft = 0; + tmp_prefered_lft = min_t(__u32, ifp->prefered_lft, tmp_prefered_lft); tmp_plen = ifp->prefix_len; tmp_tstamp = ifp->tstamp; spin_unlock_bh(&ifp->lock); - regen_advance = idev->cnf.regen_max_retry * - idev->cnf.dad_transmits * - NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; write_unlock_bh(&idev->lock); /* A temporary address is created only if this calculated Preferred @@ -2316,7 +2339,7 @@ static void manage_tempaddrs(struct inet6_dev *idev, max_valid = 0; max_prefered = idev->cnf.temp_prefered_lft - - idev->cnf.max_desync_factor - age; + idev->desync_factor - age; if (max_prefered < 0) max_prefered = 0; -- cgit v1.2.3 From 1104d9ba443a3972052ea4eaa01e51f9ee084652 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 14 Oct 2016 11:25:36 -0700 Subject: lwtunnel: Add destroy state operation Users of lwt tunnels may set up some secondary state in build_state function. Add a corresponding destroy_state function to allow users to clean up state. This destroy state function is called from lwstate_free. Also, we now free lwstate using kfree_rcu so user can assume structure is not freed before rcu. Acked-by: Roopa Prabhu Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/lwtunnel.h | 7 +++---- net/core/lwtunnel.c | 13 +++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index ea3f80f58fd6..67d235f43202 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -29,6 +29,7 @@ struct lwtunnel_state { int (*orig_input)(struct sk_buff *); int len; __u16 headroom; + struct rcu_head rcu; __u8 data[0]; }; @@ -36,6 +37,7 @@ struct lwtunnel_encap_ops { int (*build_state)(struct net_device *dev, struct nlattr *encap, unsigned int family, const void *cfg, struct lwtunnel_state **ts); + void (*destroy_state)(struct lwtunnel_state *lws); int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*input)(struct sk_buff *skb); int (*fill_encap)(struct sk_buff *skb, @@ -46,10 +48,7 @@ struct lwtunnel_encap_ops { }; #ifdef CONFIG_LWTUNNEL -static inline void lwtstate_free(struct lwtunnel_state *lws) -{ - kfree(lws); -} +void lwtstate_free(struct lwtunnel_state *lws); static inline struct lwtunnel_state * lwtstate_get(struct lwtunnel_state *lws) diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index e5f84c26ba1a..88fd64250b02 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -130,6 +130,19 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type, } EXPORT_SYMBOL(lwtunnel_build_state); +void lwtstate_free(struct lwtunnel_state *lws) +{ + const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type]; + + if (ops->destroy_state) { + ops->destroy_state(lws); + kfree_rcu(lws, rcu); + } else { + kfree(lws); + } +} +EXPORT_SYMBOL(lwtstate_free); + int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { const struct lwtunnel_encap_ops *ops; -- cgit v1.2.3 From a3e2f4b6ed9de85086850fe49801f9b00adb6ae1 Mon Sep 17 00:00:00 2001 From: Michael Braun Date: Sat, 15 Oct 2016 13:28:19 +0200 Subject: mac80211: fix A-MSDU outer SA/DA According to IEEE 802.11-2012 section 8.3.2 table 8-19, the outer SA/DA of A-MSDU frames need to be changed depending on FromDS/ToDS values. Signed-off-by: Michael Braun [use ether_addr_copy and add alignment annotations] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 +- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/tx.c | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a810dfcb83c2..e50c9e02889a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1438,7 +1438,7 @@ enum ieee80211_vif_flags { struct ieee80211_vif { enum nl80211_iftype type; struct ieee80211_bss_conf bss_conf; - u8 addr[ETH_ALEN]; + u8 addr[ETH_ALEN] __aligned(2); bool p2p; bool csa_active; bool mu_mimo_owner; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 103187ca9474..42d194a04e1a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -443,7 +443,7 @@ struct ieee80211_if_managed { struct ieee80211_mgd_auth_data *auth_data; struct ieee80211_mgd_assoc_data *assoc_data; - u8 bssid[ETH_ALEN]; + u8 bssid[ETH_ALEN] __aligned(2); u16 aid; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 9661f5441686..772e36909fa3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3064,6 +3064,7 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, int subframe_len = skb->len - hdr_len; void *data; u8 *qc, *h_80211_src, *h_80211_dst; + const u8 *bssid; if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) return false; @@ -3087,6 +3088,28 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, ether_addr_copy(amsdu_hdr->h_source, h_80211_src); ether_addr_copy(amsdu_hdr->h_dest, h_80211_dst); + /* according to IEEE 802.11-2012 8.3.2 table 8-19, the outer SA/DA + * fields needs to be changed to BSSID for A-MSDU frames depending + * on FromDS/ToDS values. + */ + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + bssid = sdata->u.mgd.bssid; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + bssid = sdata->vif.addr; + break; + default: + bssid = NULL; + } + + if (bssid && ieee80211_has_fromds(hdr->frame_control)) + ether_addr_copy(h_80211_src, bssid); + + if (bssid && ieee80211_has_tods(hdr->frame_control)) + ether_addr_copy(h_80211_dst, bssid); + qc = ieee80211_get_qos_ctl(hdr); *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT; -- cgit v1.2.3 From a04a480d4392ea6efd117be2de564117b2a009c0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 16 Oct 2016 20:02:52 -0700 Subject: net: Require exact match for TCP socket lookups if dif is l3mdev Currently, socket lookups for l3mdev (vrf) use cases can match a socket that is bound to a port but not a device (ie., a global socket). If the sysctl tcp_l3mdev_accept is not set this leads to ack packets going out based on the main table even though the packet came in from an L3 domain. The end result is that the connection does not establish creating confusion for users since the service is running and a socket shows in ss output. Fix by requiring an exact dif to sk_bound_dev_if match if the skb came through an interface enslaved to an l3mdev device and the tcp_l3mdev_accept is not set. skb's through an l3mdev interface are marked by setting a flag in inet{6}_skb_parm. The IPv6 variant is already set; this patch adds the flag for IPv4. Using an skb flag avoids a device lookup on the dif. The flag is set in the VRF driver using the IP{6}CB macros. For IPv4, the inet_skb_parm struct is moved in the cb per commit 971f10eca186, so the match function in the TCP stack needs to use TCP_SKB_CB. For IPv6, the move is done after the socket lookup, so IP6CB is used. The flags field in inet_skb_parm struct needs to be increased to add another flag. There is currently a 1-byte hole following the flags, so it can be expanded to u16 without increasing the size of the struct. Fixes: 193125dbd8eb ("net: Introduce VRF device driver") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 2 ++ include/linux/ipv6.h | 17 ++++++++++++++--- include/net/ip.h | 8 +++++++- include/net/tcp.h | 13 ++++++++++++- net/ipv4/inet_hashtables.c | 8 +++++--- net/ipv6/inet6_hashtables.c | 7 ++++--- 6 files changed, 44 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 85c271c70d42..820de6a9ddde 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -956,6 +956,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, if (skb->pkt_type == PACKET_LOOPBACK) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; + IP6CB(skb)->flags |= IP6SKB_L3SLAVE; skb->pkt_type = PACKET_HOST; goto out; } @@ -996,6 +997,7 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; + IPCB(skb)->flags |= IPSKB_L3SLAVE; /* loopback traffic; do not push through packet taps again. * Reset pkt_type for upper layers to process skb diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7e9a789be5e0..ca1ad9ebbc92 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -123,12 +123,12 @@ struct inet6_skb_parm { }; #if defined(CONFIG_NET_L3_MASTER_DEV) -static inline bool skb_l3mdev_slave(__u16 flags) +static inline bool ipv6_l3mdev_skb(__u16 flags) { return flags & IP6SKB_L3SLAVE; } #else -static inline bool skb_l3mdev_slave(__u16 flags) +static inline bool ipv6_l3mdev_skb(__u16 flags) { return false; } @@ -139,11 +139,22 @@ static inline bool skb_l3mdev_slave(__u16 flags) static inline int inet6_iif(const struct sk_buff *skb) { - bool l3_slave = skb_l3mdev_slave(IP6CB(skb)->flags); + bool l3_slave = ipv6_l3mdev_skb(IP6CB(skb)->flags); return l3_slave ? skb->skb_iif : IP6CB(skb)->iif; } +/* can not be used in TCP layer after tcp_v6_fill_cb */ +static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) +{ +#if defined(CONFIG_NET_L3_MASTER_DEV) + if (!net->ipv4.sysctl_tcp_l3mdev_accept && + ipv6_l3mdev_skb(IP6CB(skb)->flags)) + return true; +#endif + return false; +} + struct tcp6_request_sock { struct tcp_request_sock tcp6rsk_tcp; }; diff --git a/include/net/ip.h b/include/net/ip.h index bc43c0fcae12..c9d07988911e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -38,7 +38,7 @@ struct sock; struct inet_skb_parm { int iif; struct ip_options opt; /* Compiled IP options */ - unsigned char flags; + u16 flags; #define IPSKB_FORWARDED BIT(0) #define IPSKB_XFRM_TUNNEL_SIZE BIT(1) @@ -48,10 +48,16 @@ struct inet_skb_parm { #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) #define IPSKB_FRAG_SEGS BIT(7) +#define IPSKB_L3SLAVE BIT(8) u16 frag_max_size; }; +static inline bool ipv4_l3mdev_skb(u16 flags) +{ + return !!(flags & IPSKB_L3SLAVE); +} + static inline unsigned int ip_hdrlen(const struct sk_buff *skb) { return ip_hdr(skb)->ihl * 4; diff --git a/include/net/tcp.h b/include/net/tcp.h index f83b7f220a65..5b82d4d94834 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -794,12 +794,23 @@ struct tcp_skb_cb { */ static inline int tcp_v6_iif(const struct sk_buff *skb) { - bool l3_slave = skb_l3mdev_slave(TCP_SKB_CB(skb)->header.h6.flags); + bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif; } #endif +/* TCP_SKB_CB reference means this can not be used from early demux */ +static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (!net->ipv4.sysctl_tcp_l3mdev_accept && + ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + return true; +#endif + return false; +} + /* Due to TSO, an SKB can be composed of multiple actual * packets. To keep these tracked properly, we use this. */ diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 77c20a489218..ca97835bfec4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -25,6 +25,7 @@ #include #include #include +#include #include static u32 inet_ehashfn(const struct net *net, const __be32 laddr, @@ -172,7 +173,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const __be32 daddr, - const int dif) + const int dif, bool exact_dif) { int score = -1; struct inet_sock *inet = inet_sk(sk); @@ -186,7 +187,7 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score += 4; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score += 4; @@ -215,11 +216,12 @@ struct sock *__inet_lookup_listener(struct net *net, unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; int score, hiscore = 0, matches = 0, reuseport = 0; + bool exact_dif = inet_exact_dif_match(net, skb); struct sock *sk, *result = NULL; u32 phash = 0; sk_for_each_rcu(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif); + score = compute_score(sk, net, hnum, daddr, dif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; if (reuseport) { diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 00cf28ad4565..2fd0374a35b1 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const struct in6_addr *daddr, - const int dif) + const int dif, bool exact_dif) { int score = -1; @@ -109,7 +109,7 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score++; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score++; @@ -131,11 +131,12 @@ struct sock *inet6_lookup_listener(struct net *net, unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; int score, hiscore = 0, matches = 0, reuseport = 0; + bool exact_dif = inet6_exact_dif_match(net, skb); struct sock *sk, *result = NULL; u32 phash = 0; sk_for_each(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif); + score = compute_score(sk, net, hnum, daddr, dif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; if (reuseport) { -- cgit v1.2.3 From a1264c3d6c04f0e4e9d447caaa249d6288b01520 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 18 Oct 2016 23:12:07 +0300 Subject: wireless: radiotap: fix timestamp sampling position values The values don't match the radiotap spec, corrected that. Reported-by: Oz Shalev Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index ba07b9d8ed63..d0e7e3f8e67a 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -333,9 +333,9 @@ enum ieee80211_radiotap_type { #define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS 0x0003 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK 0x00F0 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU 0x0000 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0010 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0010 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU 0x0020 -#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0030 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0030 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN 0x00F0 #define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT 0x00 -- cgit v1.2.3 From 0aa419ec6e7b98d485f6c66a62a90965eda3c1bb Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 18 Oct 2016 23:12:10 +0300 Subject: mac80211: allow the driver not to pass the tid to ieee80211_sta_uapsd_trigger iwlwifi will check internally that the tid maps to an AC that is trigger enabled, but can't know what tid exactly. Allow the driver to pass a generic tid and make mac80211 assume that a trigger frame was received. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ net/mac80211/rx.c | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e50c9e02889a..f3dbadafe16e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4087,6 +4087,10 @@ void ieee80211_sta_pspoll(struct ieee80211_sta *sta); * This must be used in conjunction with ieee80211_sta_ps_transition() * and possibly ieee80211_sta_pspoll(); calls to all three must be * serialized. + * %IEEE80211_NUM_TIDS can be passed as the tid if the tid is unknown. + * In this case, mac80211 will not check that this tid maps to an AC + * that is trigger enabled and assume that the caller did the proper + * checks. */ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index fc08a50b3ebd..837d56261bb2 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1394,13 +1394,14 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *pubsta, u8 tid) u8 ac = ieee802_1d_to_ac[tid & 7]; /* - * If this AC is not trigger-enabled do nothing. + * If this AC is not trigger-enabled do nothing unless the + * driver is calling us after it already checked. * * NB: This could/should check a separate bitmap of trigger- * enabled queues, but for now we only implement uAPSD w/o * TSPEC changes to the ACs, so they're always the same. */ - if (!(sta->sta.uapsd_queues & BIT(ac))) + if (!(sta->sta.uapsd_queues & BIT(ac)) && tid != IEEE80211_NUM_TIDS) return; /* if we are in a service period, do nothing */ -- cgit v1.2.3 From f3fe4e93dd6346c01fd4070ae02ec746fbae73bb Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 18 Oct 2016 23:12:11 +0300 Subject: mac80211: add a HW flag for supporting HW TX fragmentation Currently mac80211 determines whether HW does fragmentation by checking whether the set_frag_threshold callback is set or not. However, some drivers may want to set the HW fragmentation capability depending on HW generation. Allow this by checking a HW flag instead of checking the callback. Signed-off-by: Sara Sharon [added the flag to ath10k and wlcore] Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/mac.c | 1 + drivers/net/wireless/ti/wlcore/main.c | 1 + include/net/mac80211.h | 10 ++++++++-- net/mac80211/debugfs.c | 1 + net/mac80211/main.c | 4 ++++ net/mac80211/tx.c | 4 ++-- net/mac80211/wpa.c | 2 +- 7 files changed, 18 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 76297d69f1ed..e322b6df0ebc 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -7881,6 +7881,7 @@ int ath10k_mac_register(struct ath10k *ar) ieee80211_hw_set(ar->hw, WANT_MONITOR_VIF); ieee80211_hw_set(ar->hw, CHANCTX_STA_CSA); ieee80211_hw_set(ar->hw, QUEUE_CONTROL); + ieee80211_hw_set(ar->hw, SUPPORTS_TX_FRAG); if (!test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) ieee80211_hw_set(ar->hw, SW_CRYPTO_CONTROL); diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 471521a0db7b..9f39c6cf98fb 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -6086,6 +6086,7 @@ static int wl1271_init_ieee80211(struct wl1271 *wl) ieee80211_hw_set(wl->hw, SUPPORTS_DYNAMIC_PS); ieee80211_hw_set(wl->hw, SIGNAL_DBM); ieee80211_hw_set(wl->hw, SUPPORTS_PS); + ieee80211_hw_set(wl->hw, SUPPORTS_TX_FRAG); wl->hw->wiphy->cipher_suites = cipher_suites; wl->hw->wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f3dbadafe16e..a1a27021f452 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2025,6 +2025,10 @@ struct ieee80211_txq { * drivers, mac80211 packet loss mechanism will not be triggered and driver * is completely depending on firmware event for station kickout. * + * @IEEE80211_HW_SUPPORTS_TX_FRAG: Hardware does fragmentation by itself. + * The stack will not do fragmentation. + * The callback for @set_frag_threshold should be set as well. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2066,6 +2070,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_TX_AMSDU, IEEE80211_HW_TX_FRAG_LIST, IEEE80211_HW_REPORTS_LOW_ACK, + IEEE80211_HW_SUPPORTS_TX_FRAG, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -3093,8 +3098,9 @@ enum ieee80211_reconfig_type { * The callback must be atomic. * * @set_frag_threshold: Configuration of fragmentation threshold. Assign this - * if the device does fragmentation by itself; if this callback is - * implemented then the stack will not do fragmentation. + * if the device does fragmentation by itself. Note that to prevent the + * stack from doing fragmentation IEEE80211_HW_SUPPORTS_TX_FRAG + * should be set as well. * The callback can sleep. * * @set_rts_threshold: Configuration of RTS threshold (if device needs it) diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index f56e2f487d09..e02ba42ca827 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -210,6 +210,7 @@ static const char *hw_flag_names[] = { FLAG(TX_AMSDU), FLAG(TX_FRAG_LIST), FLAG(REPORTS_LOW_ACK), + FLAG(SUPPORTS_TX_FRAG), #undef FLAG }; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1075ac24c8c5..0d9163c16dda 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -821,6 +821,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) !local->ops->tdls_recv_channel_switch)) return -EOPNOTSUPP; + if (WARN_ON(ieee80211_hw_check(hw, SUPPORTS_TX_FRAG) && + !local->ops->set_frag_threshold)) + return -EINVAL; + if (WARN_ON(local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN) && (!local->ops->start_nan || !local->ops->stop_nan))) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 772e36909fa3..62ccaf6f585d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -934,7 +934,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) if (info->flags & IEEE80211_TX_CTL_DONTFRAG) return TX_CONTINUE; - if (tx->local->ops->set_frag_threshold) + if (ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) return TX_CONTINUE; /* @@ -2800,7 +2800,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) /* fast-xmit doesn't handle fragmentation at all */ if (local->hw.wiphy->frag_threshold != (u32)-1 && - !local->ops->set_frag_threshold) + !ieee80211_hw_check(&local->hw, SUPPORTS_TX_FRAG)) goto out; rcu_read_lock(); diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index b48c1e13e281..c24934544f9c 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -57,7 +57,7 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) if (info->control.hw_key && (info->flags & IEEE80211_TX_CTL_DONTFRAG || - tx->local->ops->set_frag_threshold) && + ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { /* hwaccel - with no need for SW-generated MMIC */ return TX_CONTINUE; -- cgit v1.2.3 From f438ceb81d424cb90a5a1aad569056bd7c2ab4c5 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 18 Oct 2016 23:12:12 +0300 Subject: mac80211: uapsd_queues is in QoS IE order The uapsd_queue field is in QoS IE order and not in IEEE80211_AC_*'s order. This means that mac80211 would get confused between BK and BE which is certainly not such a big deal but needs to be fixed. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 ++- include/net/mac80211.h | 3 ++- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/mlme.c | 2 +- net/mac80211/rx.c | 3 ++- net/mac80211/sta_info.c | 13 ++++++++----- net/mac80211/util.c | 7 +++++++ 7 files changed, 24 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5000ec758eb3..10a26f0fbafe 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4574,7 +4574,8 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); * moves to cfg80211 in this call * @buf: authentication frame (header + body) * @len: length of the frame data - * @uapsd_queues: bitmap of ACs configured to uapsd. -1 if n/a. + * @uapsd_queues: bitmap of queues configured for uapsd. Same format + * as the AC bitmap in the QoS info field * * After being asked to associate via cfg80211_ops::assoc() the driver must * call either this function or cfg80211_auth_timeout(). diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a1a27021f452..b9b24abd9103 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1745,7 +1745,8 @@ struct ieee80211_sta_rates { * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *), size is determined in hw information. * @uapsd_queues: bitmap of queues configured for uapsd. Only valid - * if wme is supported. + * if wme is supported. The bits order is like in + * IEEE80211_WMM_IE_STA_QOSINFO_AC_*. * @max_sp: max Service Period. Only valid if wme is supported. * @bandwidth: current bandwidth the station can receive with * @rx_nss: in HT/VHT, the maximum number of spatial streams the diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 42d194a04e1a..b4e2b6cf4099 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -84,6 +84,8 @@ struct ieee80211_local; #define IEEE80211_DEFAULT_MAX_SP_LEN \ IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL +extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS]; + #define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */) #define IEEE80211_MAX_NAN_INSTANCE_ID 255 diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7486f2dab4ba..c8d3a9b02fb6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3193,7 +3193,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, uapsd_queues = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) if (sdata->tx_conf[ac].uapsd) - uapsd_queues |= BIT(ac); + uapsd_queues |= ieee80211_ac_to_qos_mask[ac]; } cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len, uapsd_queues); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 837d56261bb2..21a8947651e1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1401,7 +1401,8 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *pubsta, u8 tid) * enabled queues, but for now we only implement uAPSD w/o * TSPEC changes to the ACs, so they're always the same. */ - if (!(sta->sta.uapsd_queues & BIT(ac)) && tid != IEEE80211_NUM_TIDS) + if (!(sta->sta.uapsd_queues & ieee80211_ac_to_qos_mask[ac]) && + tid != IEEE80211_NUM_TIDS) return; /* if we are in a service period, do nothing */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ab91e1e1b8ec..236d47e76ced 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -709,7 +709,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { unsigned long tids; - if (ignore_for_tim & BIT(ac)) + if (ignore_for_tim & ieee80211_ac_to_qos_mask[ac]) continue; indicate_tim |= !skb_queue_empty(&sta->tx_filtered[ac]) || @@ -1389,7 +1389,7 @@ ieee80211_sta_ps_more_data(struct sta_info *sta, u8 ignored_acs, return true; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - if (ignored_acs & BIT(ac)) + if (ignored_acs & ieee80211_ac_to_qos_mask[ac]) continue; if (!skb_queue_empty(&sta->tx_filtered[ac]) || @@ -1414,7 +1414,7 @@ ieee80211_sta_ps_get_frames(struct sta_info *sta, int n_frames, u8 ignored_acs, for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { unsigned long tids; - if (ignored_acs & BIT(ac)) + if (ignored_acs & ieee80211_ac_to_qos_mask[ac]) continue; tids = ieee80211_tids_for_ac(ac); @@ -1482,7 +1482,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, BIT(find_highest_prio_tid(driver_release_tids)); if (skb_queue_empty(&frames) && !driver_release_tids) { - int tid; + int tid, ac; /* * For PS-Poll, this can only happen due to a race condition @@ -1500,7 +1500,10 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, */ /* This will evaluate to 1, 3, 5 or 7. */ - tid = 7 - ((ffs(~ignored_acs) - 1) << 1); + for (ac = IEEE80211_AC_VO; ac < IEEE80211_NUM_ACS; ac++) + if (ignored_acs & BIT(ac)) + continue; + tid = 7 - 2 * ac; ieee80211_send_null_response(sta, tid, reason, true, false); } else if (!driver_release_tids) { diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 031273a61d27..7f24bdc68f71 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3452,3 +3452,10 @@ void ieee80211_txq_get_depth(struct ieee80211_txq *txq, *byte_cnt = txqi->tin.backlog_bytes + frag_bytes; } EXPORT_SYMBOL(ieee80211_txq_get_depth); + +const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS] = { + IEEE80211_WMM_IE_STA_QOSINFO_AC_VO, + IEEE80211_WMM_IE_STA_QOSINFO_AC_VI, + IEEE80211_WMM_IE_STA_QOSINFO_AC_BE, + IEEE80211_WMM_IE_STA_QOSINFO_AC_BK +}; -- cgit v1.2.3 From 0711d638786941ec02551dd9b4aa0d8341f7db5b Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Tue, 18 Oct 2016 23:12:13 +0300 Subject: cfg80211: allow aborting in-progress connection atttempts On a disconnect request from userspace, cfg80211 currently calls called rdev_disconnect() only in case that 'current_bss' was set, i.e. connection had been established. Change this to allow the userspace call to succeed and call the driver's disconnect() method also while the connection attempt is in progress, to be able to abort attempts. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho [change commit subject/message] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 ++++--- net/wireless/sme.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 10a26f0fbafe..2bbbcc3eecac 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2564,9 +2564,10 @@ struct cfg80211_nan_func { * cases, the result of roaming is indicated with a call to * cfg80211_roamed() or cfg80211_roamed_bss(). * (invoked with the wireless_dev mutex held) - * @disconnect: Disconnect from the BSS/ESS. Once done, call - * cfg80211_disconnected(). - * (invoked with the wireless_dev mutex held) + * @disconnect: Disconnect from the BSS/ESS or stop connection attempts if + * connection is in progress. Once done, call cfg80211_disconnected() in + * case connection was already established (invoked with the + * wireless_dev mutex held), otherwise call cfg80211_connect_timeout(). * * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call * cfg80211_ibss_joined(), also call that function when changing BSSID due diff --git a/net/wireless/sme.c b/net/wireless/sme.c index a77db333927e..2b5bb380414b 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1088,7 +1088,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, err = cfg80211_sme_disconnect(wdev, reason); else if (!rdev->ops->disconnect) cfg80211_mlme_down(rdev, dev); - else if (wdev->current_bss) + else if (wdev->ssid_len) err = rdev_disconnect(rdev, dev, reason); return err; -- cgit v1.2.3 From 286c72deabaa240b7eebbd99496ed3324d69f3c0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Oct 2016 09:39:40 -0700 Subject: udp: must lock the socket in udp_disconnect() Baozeng Ding reported KASAN traces showing uses after free in udp_lib_get_port() and other related UDP functions. A CONFIG_DEBUG_PAGEALLOC=y kernel would eventually crash. I could write a reproducer with two threads doing : static int sock_fd; static void *thr1(void *arg) { for (;;) { connect(sock_fd, (const struct sockaddr *)arg, sizeof(struct sockaddr_in)); } } static void *thr2(void *arg) { struct sockaddr_in unspec; for (;;) { memset(&unspec, 0, sizeof(unspec)); connect(sock_fd, (const struct sockaddr *)&unspec, sizeof(unspec)); } } Problem is that udp_disconnect() could run without holding socket lock, and this was causing list corruptions. Signed-off-by: Eric Dumazet Reported-by: Baozeng Ding Signed-off-by: David S. Miller --- include/net/udp.h | 1 + net/ipv4/ping.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 13 +++++++++++-- net/ipv6/ping.c | 2 +- net/ipv6/raw.c | 2 +- net/l2tp/l2tp_ip.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- 8 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index ea53a87d880f..4948790d393d 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -258,6 +258,7 @@ void udp_flush_pending_frames(struct sock *sk); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 7cf7d6e380c2..205e2000d395 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -994,7 +994,7 @@ struct proto ping_prot = { .init = ping_init_sock, .close = ping_close, .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, .sendmsg = ping_v4_sendmsg, diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 90a85c955872..ecbe5a7c2d6d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -918,7 +918,7 @@ struct proto raw_prot = { .close = raw_close, .destroy = raw_destroy, .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .ioctl = raw_ioctl, .init = raw_init, .setsockopt = raw_setsockopt, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7d96dc2d3d08..311613e413cb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1345,7 +1345,7 @@ csum_copy_err: goto try_again; } -int udp_disconnect(struct sock *sk, int flags) +int __udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); /* @@ -1367,6 +1367,15 @@ int udp_disconnect(struct sock *sk, int flags) sk_dst_reset(sk); return 0; } +EXPORT_SYMBOL(__udp_disconnect); + +int udp_disconnect(struct sock *sk, int flags) +{ + lock_sock(sk); + __udp_disconnect(sk, flags); + release_sock(sk); + return 0; +} EXPORT_SYMBOL(udp_disconnect); void udp_lib_unhash(struct sock *sk) @@ -2193,7 +2202,7 @@ int udp_abort(struct sock *sk, int err) sk->sk_err = err; sk->sk_error_report(sk); - udp_disconnect(sk, 0); + __udp_disconnect(sk, 0); release_sock(sk); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 0e983b694ee8..66e2d9dfc43a 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -180,7 +180,7 @@ struct proto pingv6_prot = { .init = ping_init_sock, .close = ping_close, .connect = ip6_datagram_connect_v6_only, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .sendmsg = ping_v6_sendmsg, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 54404f08efcc..054a1d84fc5e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1241,7 +1241,7 @@ struct proto rawv6_prot = { .close = rawv6_close, .destroy = raw6_destroy, .connect = ip6_datagram_connect_v6_only, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .ioctl = rawv6_ioctl, .init = rawv6_init_sk, .setsockopt = rawv6_setsockopt, diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 42de4ccd159f..fce25afb652a 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -338,7 +338,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags) if (sock_flag(sk, SOCK_ZAPPED)) return 0; - return udp_disconnect(sk, flags); + return __udp_disconnect(sk, flags); } static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ea2ae6664cc8..ad3468c32b53 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -410,7 +410,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags) if (sock_flag(sk, SOCK_ZAPPED)) return 0; - return udp_disconnect(sk, flags); + return __udp_disconnect(sk, flags); } static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, -- cgit v1.2.3 From 8651be8f14a12d24f203f283601d9b0418c389ff Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 19 Oct 2016 23:35:12 -0700 Subject: ipv6: fix a potential deadlock in do_ipv6_setsockopt() Baozeng reported this deadlock case: CPU0 CPU1 ---- ---- lock([ 165.136033] sk_lock-AF_INET6); lock([ 165.136033] rtnl_mutex); lock([ 165.136033] sk_lock-AF_INET6); lock([ 165.136033] rtnl_mutex); Similar to commit 87e9f0315952 ("ipv4: fix a potential deadlock in mcast getsockopt() path") this is due to we still have a case, ipv6_sock_mc_close(), where we acquire sk_lock before rtnl_lock. Close this deadlock with the similar solution, that is always acquire rtnl lock first. Fixes: baf606d9c9b1 ("ipv4,ipv6: grab rtnl before locking the socket") Reported-by: Baozeng Ding Tested-by: Baozeng Ding Cc: Marcelo Ricardo Leitner Signed-off-by: Cong Wang Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 + net/ipv6/ipv6_sockglue.c | 3 ++- net/ipv6/mcast.c | 17 ++++++++++++----- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f2d072787947..8f998afc1384 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -174,6 +174,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); +void __ipv6_sock_mc_close(struct sock *sk); void ipv6_sock_mc_close(struct sock *sk); bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, const struct in6_addr *src_addr); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 5330262ab673..636ec56f5f50 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -120,6 +120,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, static bool setsockopt_needs_rtnl(int optname) { switch (optname) { + case IPV6_ADDRFORM: case IPV6_ADD_MEMBERSHIP: case IPV6_DROP_MEMBERSHIP: case IPV6_JOIN_ANYCAST: @@ -198,7 +199,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } fl6_free_socklist(sk); - ipv6_sock_mc_close(sk); + __ipv6_sock_mc_close(sk); /* * Sock is moving from IPv6 to IPv4 (sk_prot), so diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 75c1fc54f188..14a3903f1c82 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -276,16 +276,14 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, return idev; } -void ipv6_sock_mc_close(struct sock *sk) +void __ipv6_sock_mc_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); - if (!rcu_access_pointer(np->ipv6_mc_list)) - return; + ASSERT_RTNL(); - rtnl_lock(); while ((mc_lst = rtnl_dereference(np->ipv6_mc_list)) != NULL) { struct net_device *dev; @@ -303,8 +301,17 @@ void ipv6_sock_mc_close(struct sock *sk) atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); - } +} + +void ipv6_sock_mc_close(struct sock *sk) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + + if (!rcu_access_pointer(np->ipv6_mc_list)) + return; + rtnl_lock(); + __ipv6_sock_mc_close(sk); rtnl_unlock(); } -- cgit v1.2.3 From f8c3bf00d440df2bc2c3f669d460868d9ba67226 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 21 Oct 2016 13:55:45 +0200 Subject: net/socket: factor out helpers for memory and queue manipulation Basic sock operations that udp code can use with its own memory accounting schema. No functional change is introduced in the existing APIs. v4 -> v5: - avoid whitespace changes v2 -> v4: - avoid exporting __sock_enqueue_skb v1 -> v2: - avoid export sock_rmem_free Acked-by: Hannes Frederic Sowa Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++++ net/core/datagram.c | 36 ++++++++++++++++++------------ net/core/sock.c | 64 +++++++++++++++++++++++++++++++++++++---------------- 3 files changed, 71 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index ebf75db08e06..276489553338 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1274,7 +1274,9 @@ static inline struct inode *SOCK_INODE(struct socket *socket) /* * Functions for memory accounting */ +int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind); int __sk_mem_schedule(struct sock *sk, int size, int kind); +void __sk_mem_reduce_allocated(struct sock *sk, int amount); void __sk_mem_reclaim(struct sock *sk, int amount); #define SK_MEM_QUANTUM ((int)PAGE_SIZE) @@ -1950,6 +1952,8 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer, void sk_stop_timer(struct sock *sk, struct timer_list *timer); +int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, + unsigned int flags); int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); diff --git a/net/core/datagram.c b/net/core/datagram.c index b7de71f8d5d3..bfb973aebb5b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -323,6 +323,27 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len) } EXPORT_SYMBOL(__skb_free_datagram_locked); +int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb, + unsigned int flags) +{ + int err = 0; + + if (flags & MSG_PEEK) { + err = -ENOENT; + spin_lock_bh(&sk->sk_receive_queue.lock); + if (skb == skb_peek(&sk->sk_receive_queue)) { + __skb_unlink(skb, &sk->sk_receive_queue); + atomic_dec(&skb->users); + err = 0; + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + } + + atomic_inc(&sk->sk_drops); + return err; +} +EXPORT_SYMBOL(__sk_queue_drop_skb); + /** * skb_kill_datagram - Free a datagram skbuff forcibly * @sk: socket @@ -346,23 +367,10 @@ EXPORT_SYMBOL(__skb_free_datagram_locked); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) { - int err = 0; - - if (flags & MSG_PEEK) { - err = -ENOENT; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); - atomic_dec(&skb->users); - err = 0; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - } + int err = __sk_queue_drop_skb(sk, skb, flags); kfree_skb(skb); - atomic_inc(&sk->sk_drops); sk_mem_reclaim_partial(sk); - return err; } EXPORT_SYMBOL(skb_kill_datagram); diff --git a/net/core/sock.c b/net/core/sock.c index c73e28fc9c2a..d8e4532e89e7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2091,24 +2091,18 @@ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) EXPORT_SYMBOL(sk_wait_data); /** - * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated + * __sk_mem_raise_allocated - increase memory_allocated * @sk: socket * @size: memory size to allocate + * @amt: pages to allocate * @kind: allocation type * - * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means - * rmem allocation. This function assumes that protocols which have - * memory_pressure use sk_wmem_queued as write buffer accounting. + * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc */ -int __sk_mem_schedule(struct sock *sk, int size, int kind) +int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { struct proto *prot = sk->sk_prot; - int amt = sk_mem_pages(size); - long allocated; - - sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; - - allocated = sk_memory_allocated_add(sk, amt); + long allocated = sk_memory_allocated_add(sk, amt); if (mem_cgroup_sockets_enabled && sk->sk_memcg && !mem_cgroup_charge_skmem(sk->sk_memcg, amt)) @@ -2169,9 +2163,6 @@ suppress_allocation: trace_sock_exceed_buf_limit(sk, prot, allocated); - /* Alas. Undo changes. */ - sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; - sk_memory_allocated_sub(sk, amt); if (mem_cgroup_sockets_enabled && sk->sk_memcg) @@ -2179,18 +2170,40 @@ suppress_allocation: return 0; } +EXPORT_SYMBOL(__sk_mem_raise_allocated); + +/** + * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated + * @sk: socket + * @size: memory size to allocate + * @kind: allocation type + * + * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means + * rmem allocation. This function assumes that protocols which have + * memory_pressure use sk_wmem_queued as write buffer accounting. + */ +int __sk_mem_schedule(struct sock *sk, int size, int kind) +{ + int ret, amt = sk_mem_pages(size); + + sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT; + ret = __sk_mem_raise_allocated(sk, size, amt, kind); + if (!ret) + sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT; + return ret; +} EXPORT_SYMBOL(__sk_mem_schedule); /** - * __sk_mem_reclaim - reclaim memory_allocated + * __sk_mem_reduce_allocated - reclaim memory_allocated * @sk: socket - * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) + * @amount: number of quanta + * + * Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc */ -void __sk_mem_reclaim(struct sock *sk, int amount) +void __sk_mem_reduce_allocated(struct sock *sk, int amount) { - amount >>= SK_MEM_QUANTUM_SHIFT; sk_memory_allocated_sub(sk, amount); - sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; if (mem_cgroup_sockets_enabled && sk->sk_memcg) mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); @@ -2199,6 +2212,19 @@ void __sk_mem_reclaim(struct sock *sk, int amount) (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) sk_leave_memory_pressure(sk); } +EXPORT_SYMBOL(__sk_mem_reduce_allocated); + +/** + * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated + * @sk: socket + * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) + */ +void __sk_mem_reclaim(struct sock *sk, int amount) +{ + amount >>= SK_MEM_QUANTUM_SHIFT; + sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; + __sk_mem_reduce_allocated(sk, amount); +} EXPORT_SYMBOL(__sk_mem_reclaim); int sk_set_peek_off(struct sock *sk, int val) -- cgit v1.2.3 From f970bd9e3a06f06df8d8ecf1f8ad2c8615cc17eb Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 21 Oct 2016 13:55:46 +0200 Subject: udp: implement memory accounting helpers Avoid using the generic helpers. Use the receive queue spin lock to protect the memory accounting operation, both on enqueue and on dequeue. On dequeue perform partial memory reclaiming, trying to leave a quantum of forward allocated memory. On enqueue use a custom helper, to allow some optimizations: - use a plain spin_lock() variant instead of the slightly costly spin_lock_irqsave(), - avoid dst_force check, since the calling code has already dropped the skb dst - avoid orphaning the skb, since skb_steal_sock() already did the work for us The above needs custom memory reclaiming on shutdown, provided by the udp_destruct_sock(). v5 -> v6: - don't orphan the skb on enqueue v4 -> v5: - replace the mem_lock with the receive queue spin lock - ensure that the bh is always allowed to enqueue at least a skb, even if sk_rcvbuf is exceeded v3 -> v4: - reworked memory accunting, simplifying the schema - provide an helper for both memory scheduling and enqueuing v1 -> v2: - use a udp specific destrctor to perform memory reclaiming - remove a couple of helpers, unneeded after the above cleanup - do not reclaim memory on dequeue if not under memory pressure - reworked the fwd accounting schema to avoid potential integer overflow Acked-by: Hannes Frederic Sowa Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/udp.h | 4 +++ net/ipv4/udp.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index ea53a87d880f..18f1e6b91927 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -246,6 +246,9 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, } /* net/ipv4/udp.c */ +void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); +int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); + void udp_v4_early_demux(struct sk_buff *skb); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, @@ -258,6 +261,7 @@ void udp_flush_pending_frames(struct sock *sk); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int udp_init_sock(struct sock *sk); int udp_disconnect(struct sock *sk, int flags); unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7d96dc2d3d08..be3c3312df8d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1172,6 +1172,112 @@ out: return ret; } +static void udp_rmem_release(struct sock *sk, int size, int partial) +{ + int amt; + + atomic_sub(size, &sk->sk_rmem_alloc); + + spin_lock_bh(&sk->sk_receive_queue.lock); + sk->sk_forward_alloc += size; + amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1); + sk->sk_forward_alloc -= amt; + spin_unlock_bh(&sk->sk_receive_queue.lock); + + if (amt) + __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT); +} + +static void udp_rmem_free(struct sk_buff *skb) +{ + udp_rmem_release(skb->sk, skb->truesize, 1); +} + +int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff_head *list = &sk->sk_receive_queue; + int rmem, delta, amt, err = -ENOMEM; + int size = skb->truesize; + + /* try to avoid the costly atomic add/sub pair when the receive + * queue is full; always allow at least a packet + */ + rmem = atomic_read(&sk->sk_rmem_alloc); + if (rmem && (rmem + size > sk->sk_rcvbuf)) + goto drop; + + /* we drop only if the receive buf is full and the receive + * queue contains some other skb + */ + rmem = atomic_add_return(size, &sk->sk_rmem_alloc); + if ((rmem > sk->sk_rcvbuf) && (rmem > size)) + goto uncharge_drop; + + spin_lock(&list->lock); + if (size >= sk->sk_forward_alloc) { + amt = sk_mem_pages(size); + delta = amt << SK_MEM_QUANTUM_SHIFT; + if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) { + err = -ENOBUFS; + spin_unlock(&list->lock); + goto uncharge_drop; + } + + sk->sk_forward_alloc += delta; + } + + sk->sk_forward_alloc -= size; + + /* the skb owner in now the udp socket */ + skb->sk = sk; + skb->destructor = udp_rmem_free; + skb->dev = NULL; + sock_skb_set_dropcount(sk, skb); + + __skb_queue_tail(list, skb); + spin_unlock(&list->lock); + + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk); + + return 0; + +uncharge_drop: + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + +drop: + atomic_inc(&sk->sk_drops); + return err; +} +EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); + +static void udp_destruct_sock(struct sock *sk) +{ + /* reclaim completely the forward allocated memory */ + __skb_queue_purge(&sk->sk_receive_queue); + udp_rmem_release(sk, 0, 0); + inet_sock_destruct(sk); +} + +int udp_init_sock(struct sock *sk) +{ + sk->sk_destruct = udp_destruct_sock; + return 0; +} +EXPORT_SYMBOL_GPL(udp_init_sock); + +void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) +{ + if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) { + bool slow = lock_sock_fast(sk); + + sk_peek_offset_bwd(sk, len); + unlock_sock_fast(sk, slow); + } + consume_skb(skb); +} +EXPORT_SYMBOL_GPL(skb_consume_udp); + /** * first_packet_length - return length of first packet in receive queue * @sk: socket -- cgit v1.2.3 From f76a9db351f8beb3259c4ba38058de0058ab8000 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 21 Oct 2016 16:10:22 +0200 Subject: lwt: Remove unused len field The field is initialized by ILA and MPLS but never used. Remove it. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/lwtunnel.h | 3 +-- net/ipv6/ila/ila_lwt.c | 4 +--- net/mpls/mpls_iptunnel.c | 5 +---- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 67d235f43202..82e76fe1c1f7 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -24,11 +24,10 @@ enum { struct lwtunnel_state { __u16 type; __u16 flags; + __u16 headroom; atomic_t refcnt; int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*orig_input)(struct sk_buff *); - int len; - __u16 headroom; struct rcu_head rcu; __u8 data[0]; }; diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index c7a39d07bde8..a7bc54ab46e2 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -122,7 +122,6 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, struct ila_lwt *ilwt; struct ila_params *p; struct nlattr *tb[ILA_ATTR_MAX + 1]; - size_t encap_len = sizeof(*ilwt); struct lwtunnel_state *newts; const struct fib6_config *cfg6 = cfg; struct ila_addr *iaddr; @@ -155,7 +154,7 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, if (!tb[ILA_ATTR_LOCATOR]) return -EINVAL; - newts = lwtunnel_state_alloc(encap_len); + newts = lwtunnel_state_alloc(sizeof(*ilwt)); if (!newts) return -ENOMEM; @@ -166,7 +165,6 @@ static int ila_build_state(struct net_device *dev, struct nlattr *nla, return ret; } - newts->len = encap_len; p = ila_params_lwtunnel(newts); p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index cf52cf30ac4b..2f7ccd934416 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -133,7 +133,6 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla, struct mpls_iptunnel_encap *tun_encap_info; struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1]; struct lwtunnel_state *newts; - int tun_encap_info_len; int ret; ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, @@ -144,13 +143,11 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla, if (!tb[MPLS_IPTUNNEL_DST]) return -EINVAL; - tun_encap_info_len = sizeof(*tun_encap_info); - newts = lwtunnel_state_alloc(tun_encap_info_len); + newts = lwtunnel_state_alloc(sizeof(*tun_encap_info)); if (!newts) return -ENOMEM; - newts->len = tun_encap_info_len; tun_encap_info = mpls_lwtunnel_encap(newts); ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS, &tun_encap_info->labels, tun_encap_info->label); -- cgit v1.2.3 From 432490f9d455fb842d70219f22d9d2c812371676 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 21 Oct 2016 13:03:44 +0300 Subject: net: ip, diag -- Add diag interface for raw sockets In criu we are actively using diag interface to collect sockets present in the system when dumping applications. And while for unix, tcp, udp[lite], packet, netlink it works as expected, the raw sockets do not have. Thus add it. v2: - add missing sock_put calls in raw_diag_dump_one (by eric.dumazet@) - implement @destroy for diag requests (by dsa@) v3: - add export of raw_abort for IPv6 (by dsa@) - pass net-admin flag into inet_sk_diag_fill due to changes in net-next branch (by dsa@) v4: - use @pad in struct inet_diag_req_v2 for raw socket protocol specification: raw module carries sockets which may have custom protocol passed from socket() syscall and sole @sdiag_protocol is not enough to match underlied ones - start reporting protocol specifed in socket() call when sockets are raw ones for the same reason: user space tools like ss may parse this attribute and use it for socket matching v5 (by eric.dumazet@): - use sock_hold in raw_sock_get instead of atomic_inc, we're holding (raw_v4_hashinfo|raw_v6_hashinfo)->lock when looking up so counter won't be zero here. v6: - use sdiag_raw_protocol() helper which will access @pad structure used for raw sockets protocol specification: we can't simply rename this member without breaking uapi v7: - sine sdiag_raw_protocol() helper is not suitable for uapi lets rather make an alias structure with proper names. __check_inet_diag_req_raw helper will catch if any of structure unintentionally changed. CC: David S. Miller CC: Eric Dumazet CC: David Ahern CC: Alexey Kuznetsov CC: James Morris CC: Hideaki YOSHIFUJI CC: Patrick McHardy CC: Andrey Vagin CC: Stephen Hemminger Signed-off-by: Cyrill Gorcunov Signed-off-by: David S. Miller --- include/net/raw.h | 6 + include/net/rawv6.h | 7 ++ include/uapi/linux/inet_diag.h | 17 +++ net/ipv4/Kconfig | 8 ++ net/ipv4/Makefile | 1 + net/ipv4/inet_diag.c | 9 ++ net/ipv4/raw.c | 21 +++- net/ipv4/raw_diag.c | 261 +++++++++++++++++++++++++++++++++++++++++ net/ipv6/raw.c | 7 +- 9 files changed, 333 insertions(+), 4 deletions(-) create mode 100644 net/ipv4/raw_diag.c (limited to 'include/net') diff --git a/include/net/raw.h b/include/net/raw.h index 3e789008394d..57c33dd22ec4 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -23,6 +23,12 @@ extern struct proto raw_prot; +extern struct raw_hashinfo raw_v4_hashinfo; +struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, + unsigned short num, __be32 raddr, + __be32 laddr, int dif); + +int raw_abort(struct sock *sk, int err); void raw_icmp_error(struct sk_buff *, int, u32); int raw_local_deliver(struct sk_buff *, int); diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 87783dea0791..cbe4e9de1894 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -3,6 +3,13 @@ #include +extern struct raw_hashinfo raw_v6_hashinfo; +struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, + unsigned short num, const struct in6_addr *loc_addr, + const struct in6_addr *rmt_addr, int dif); + +int raw_abort(struct sock *sk, int err); + void raw6_icmp_error(struct sk_buff *, int nexthdr, u8 type, u8 code, int inner_offset, __be32); bool raw6_local_deliver(struct sk_buff *, int); diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 509cd961068d..bbe201047df6 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -43,6 +43,23 @@ struct inet_diag_req_v2 { struct inet_diag_sockid id; }; +/* + * SOCK_RAW sockets require the underlied protocol to be + * additionally specified so we can use @pad member for + * this, but we can't rename it because userspace programs + * still may depend on this name. Instead lets use another + * structure definition as an alias for struct + * @inet_diag_req_v2. + */ +struct inet_diag_req_raw { + __u8 sdiag_family; + __u8 sdiag_protocol; + __u8 idiag_ext; + __u8 sdiag_raw_protocol; + __u32 idiag_states; + struct inet_diag_sockid id; +}; + enum { INET_DIAG_REQ_NONE, INET_DIAG_REQ_BYTECODE, diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 300b06888fdf..28e051a8e847 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -430,6 +430,14 @@ config INET_UDP_DIAG Support for UDP socket monitoring interface used by the ss tool. If unsure, say Y. +config INET_RAW_DIAG + tristate "RAW: socket monitoring interface" + depends on INET_DIAG && (IPV6 || IPV6=n) + default n + ---help--- + Support for RAW socket monitoring interface used by the ss tool. + If unsure, say Y. + config INET_DIAG_DESTROY bool "INET: allow privileged process to administratively close sockets" depends on INET_DIAG diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index bc6a6c8b9bcd..48af58a5686e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o +obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 0a1d4a896a26..3b34024202d8 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -200,6 +200,15 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO)) goto errout; + /* + * RAW sockets might have user-defined protocols assigned, + * so report the one supplied on socket creation. + */ + if (sk->sk_type == SOCK_RAW) { + if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol)) + goto errout; + } + if (!icsk) { handler->idiag_get_info(sk, r, NULL); goto out; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 90a85c955872..03618ed03532 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -89,9 +89,10 @@ struct raw_frag_vec { int hlen; }; -static struct raw_hashinfo raw_v4_hashinfo = { +struct raw_hashinfo raw_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), }; +EXPORT_SYMBOL_GPL(raw_v4_hashinfo); int raw_hash_sk(struct sock *sk) { @@ -120,7 +121,7 @@ void raw_unhash_sk(struct sock *sk) } EXPORT_SYMBOL_GPL(raw_unhash_sk); -static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, +struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif) { sk_for_each_from(sk) { @@ -136,6 +137,7 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, found: return sk; } +EXPORT_SYMBOL_GPL(__raw_v4_lookup); /* * 0 - deliver @@ -912,6 +914,20 @@ static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg } #endif +int raw_abort(struct sock *sk, int err) +{ + lock_sock(sk); + + sk->sk_err = err; + sk->sk_error_report(sk); + udp_disconnect(sk, 0); + + release_sock(sk); + + return 0; +} +EXPORT_SYMBOL_GPL(raw_abort); + struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, @@ -937,6 +953,7 @@ struct proto raw_prot = { .compat_getsockopt = compat_raw_getsockopt, .compat_ioctl = compat_raw_ioctl, #endif + .diag_destroy = raw_abort, }; #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c new file mode 100644 index 000000000000..ef3bea061b75 --- /dev/null +++ b/net/ipv4/raw_diag.c @@ -0,0 +1,261 @@ +#include + +#include +#include + +#include +#include + +#ifdef pr_fmt +# undef pr_fmt +#endif + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +static struct raw_hashinfo * +raw_get_hashinfo(const struct inet_diag_req_v2 *r) +{ + if (r->sdiag_family == AF_INET) { + return &raw_v4_hashinfo; +#if IS_ENABLED(CONFIG_IPV6) + } else if (r->sdiag_family == AF_INET6) { + return &raw_v6_hashinfo; +#endif + } else { + pr_warn_once("Unexpected inet family %d\n", + r->sdiag_family); + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); + } +} + +/* + * Due to requirement of not breaking user API we can't simply + * rename @pad field in inet_diag_req_v2 structure, instead + * use helper to figure it out. + */ + +static struct sock *raw_lookup(struct net *net, struct sock *from, + const struct inet_diag_req_v2 *req) +{ + struct inet_diag_req_raw *r = (void *)req; + struct sock *sk = NULL; + + if (r->sdiag_family == AF_INET) + sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol, + r->id.idiag_dst[0], + r->id.idiag_src[0], + r->id.idiag_if); +#if IS_ENABLED(CONFIG_IPV6) + else + sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol, + (const struct in6_addr *)r->id.idiag_src, + (const struct in6_addr *)r->id.idiag_dst, + r->id.idiag_if); +#endif + return sk; +} + +static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r) +{ + struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); + struct sock *sk = NULL, *s; + int slot; + + if (IS_ERR(hashinfo)) + return ERR_CAST(hashinfo); + + read_lock(&hashinfo->lock); + for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) { + sk_for_each(s, &hashinfo->ht[slot]) { + sk = raw_lookup(net, s, r); + if (sk) { + /* + * Grab it and keep until we fill + * diag meaage to be reported, so + * caller should call sock_put then. + * We can do that because we're keeping + * hashinfo->lock here. + */ + sock_hold(sk); + break; + } + } + } + read_unlock(&hashinfo->lock); + + return sk ? sk : ERR_PTR(-ENOENT); +} + +static int raw_diag_dump_one(struct sk_buff *in_skb, + const struct nlmsghdr *nlh, + const struct inet_diag_req_v2 *r) +{ + struct net *net = sock_net(in_skb->sk); + struct sk_buff *rep; + struct sock *sk; + int err; + + sk = raw_sock_get(net, r); + if (IS_ERR(sk)) + return PTR_ERR(sk); + + rep = nlmsg_new(sizeof(struct inet_diag_msg) + + sizeof(struct inet_diag_meminfo) + 64, + GFP_KERNEL); + if (!rep) { + sock_put(sk); + return -ENOMEM; + } + + err = inet_sk_diag_fill(sk, NULL, rep, r, + sk_user_ns(NETLINK_CB(in_skb).sk), + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0, nlh, + netlink_net_capable(in_skb, CAP_NET_ADMIN)); + sock_put(sk); + + if (err < 0) { + kfree_skb(rep); + return err; + } + + err = netlink_unicast(net->diag_nlsk, rep, + NETLINK_CB(in_skb).portid, + MSG_DONTWAIT); + if (err > 0) + err = 0; + return err; +} + +static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, + struct nlattr *bc, bool net_admin) +{ + if (!inet_diag_bc_sk(bc, sk)) + return 0; + + return inet_sk_diag_fill(sk, NULL, skb, r, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->nlh, net_admin); +} + +static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *r, struct nlattr *bc) +{ + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); + struct net *net = sock_net(skb->sk); + int num, s_num, slot, s_slot; + struct sock *sk = NULL; + + if (IS_ERR(hashinfo)) + return; + + s_slot = cb->args[0]; + num = s_num = cb->args[1]; + + read_lock(&hashinfo->lock); + for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) { + num = 0; + + sk_for_each(sk, &hashinfo->ht[slot]) { + struct inet_sock *inet = inet_sk(sk); + + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) + goto next; + if (sk->sk_family != r->sdiag_family) + goto next; + if (r->id.idiag_sport != inet->inet_sport && + r->id.idiag_sport) + goto next; + if (r->id.idiag_dport != inet->inet_dport && + r->id.idiag_dport) + goto next; + if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) + goto out_unlock; +next: + num++; + } + } + +out_unlock: + read_unlock(&hashinfo->lock); + + cb->args[0] = slot; + cb->args[1] = num; +} + +static void raw_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *info) +{ + r->idiag_rqueue = sk_rmem_alloc_get(sk); + r->idiag_wqueue = sk_wmem_alloc_get(sk); +} + +#ifdef CONFIG_INET_DIAG_DESTROY +static int raw_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *r) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk; + + sk = raw_sock_get(net, r); + if (IS_ERR(sk)) + return PTR_ERR(sk); + return sock_diag_destroy(sk, ECONNABORTED); +} +#endif + +static const struct inet_diag_handler raw_diag_handler = { + .dump = raw_diag_dump, + .dump_one = raw_diag_dump_one, + .idiag_get_info = raw_diag_get_info, + .idiag_type = IPPROTO_RAW, + .idiag_info_size = 0, +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = raw_diag_destroy, +#endif +}; + +static void __always_unused __check_inet_diag_req_raw(void) +{ + /* + * Make sure the two structures are identical, + * except the @pad field. + */ +#define __offset_mismatch(m1, m2) \ + (offsetof(struct inet_diag_req_v2, m1) != \ + offsetof(struct inet_diag_req_raw, m2)) + + BUILD_BUG_ON(sizeof(struct inet_diag_req_v2) != + sizeof(struct inet_diag_req_raw)); + BUILD_BUG_ON(__offset_mismatch(sdiag_family, sdiag_family)); + BUILD_BUG_ON(__offset_mismatch(sdiag_protocol, sdiag_protocol)); + BUILD_BUG_ON(__offset_mismatch(idiag_ext, idiag_ext)); + BUILD_BUG_ON(__offset_mismatch(pad, sdiag_raw_protocol)); + BUILD_BUG_ON(__offset_mismatch(idiag_states, idiag_states)); + BUILD_BUG_ON(__offset_mismatch(id, id)); +#undef __offset_mismatch +} + +static int __init raw_diag_init(void) +{ + return inet_diag_register(&raw_diag_handler); +} + +static void __exit raw_diag_exit(void) +{ + inet_diag_unregister(&raw_diag_handler); +} + +module_init(raw_diag_init); +module_exit(raw_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 54404f08efcc..d7e8b955ade8 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -65,11 +65,12 @@ #define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */ -static struct raw_hashinfo raw_v6_hashinfo = { +struct raw_hashinfo raw_v6_hashinfo = { .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), }; +EXPORT_SYMBOL_GPL(raw_v6_hashinfo); -static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, +struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, const struct in6_addr *rmt_addr, int dif) { @@ -102,6 +103,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, found: return sk; } +EXPORT_SYMBOL_GPL(__raw_v6_lookup); /* * 0 - deliver @@ -1259,6 +1261,7 @@ struct proto rawv6_prot = { .compat_getsockopt = compat_rawv6_getsockopt, .compat_ioctl = compat_rawv6_ioctl, #endif + .diag_destroy = raw_abort, }; #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From b4f7f4ad425a84fd5d40da21aff8681997b25ff9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 21 Oct 2016 15:57:23 +0300 Subject: mac80211: fix some sphinx warnings Signed-off-by: Jani Nikula Signed-off-by: Johannes Berg --- include/net/mac80211.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a810dfcb83c2..e2dba93e374f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -811,14 +811,18 @@ enum mac80211_rate_control_flags { * in the control information, and it will be filled by the rate * control algorithm according to what should be sent. For example, * if this array contains, in the format { , } the - * information + * information:: + * * { 3, 2 }, { 2, 2 }, { 1, 4 }, { -1, 0 }, { -1, 0 } + * * then this means that the frame should be transmitted * up to twice at rate 3, up to twice at rate 2, and up to four * times at rate 1 if it doesn't get acknowledged. Say it gets * acknowledged by the peer after the fifth attempt, the status - * information should then contain + * information should then contain:: + * * { 3, 2 }, { 2, 2 }, { 1, 1 }, { -1, 0 } ... + * * since it was transmitted twice at rate 3, twice at rate 2 * and once at rate 1 after which we received an acknowledgement. */ @@ -1168,8 +1172,8 @@ enum mac80211_rx_vht_flags { * @rate_idx: index of data rate into band's supported rates or MCS index if * HT or VHT is used (%RX_FLAG_HT/%RX_FLAG_VHT) * @vht_nss: number of streams (VHT only) - * @flag: %RX_FLAG_* - * @vht_flag: %RX_VHT_FLAG_* + * @flag: %RX_FLAG_\* + * @vht_flag: %RX_VHT_FLAG_\* * @rx_flags: internal RX flags for mac80211 * @ampdu_reference: A-MPDU reference number, must be a different value for * each A-MPDU but the same for each subframe within one A-MPDU @@ -1432,7 +1436,7 @@ enum ieee80211_vif_flags { * @probe_req_reg: probe requests should be reported to mac80211 for this * interface. * @drv_priv: data area for driver use, will always be aligned to - * sizeof(void *). + * sizeof(void \*). * @txq: the multicast data TX queue (if driver uses the TXQ abstraction) */ struct ieee80211_vif { @@ -1743,7 +1747,7 @@ struct ieee80211_sta_rates { * @wme: indicates whether the STA supports QoS/WME (if local devices does, * otherwise always false) * @drv_priv: data area for driver use, will always be aligned to - * sizeof(void *), size is determined in hw information. + * sizeof(void \*), size is determined in hw information. * @uapsd_queues: bitmap of queues configured for uapsd. Only valid * if wme is supported. * @max_sp: max Service Period. Only valid if wme is supported. @@ -2146,12 +2150,12 @@ enum ieee80211_hw_flags { * * @radiotap_mcs_details: lists which MCS information can the HW * reports, by default it is set to _MCS, _GI and _BW but doesn't - * include _FMT. Use %IEEE80211_RADIOTAP_MCS_HAVE_* values, only + * include _FMT. Use %IEEE80211_RADIOTAP_MCS_HAVE_\* values, only * adding _BW is supported today. * * @radiotap_vht_details: lists which VHT MCS information the HW reports, * the default is _GI | _BANDWIDTH. - * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values. + * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_\* values. * * @radiotap_timestamp: Information for the radiotap timestamp field; if the * 'units_pos' member is set to a non-negative value it must be set to @@ -2486,6 +2490,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * in the software stack cares about, we will, in the future, have mac80211 * tell the driver which information elements are interesting in the sense * that we want to see changes in them. This will include + * * - a list of information element IDs * - a list of OUIs for the vendor information element * -- cgit v1.2.3 From 293de7dee4f9602676846dbeb84b1580123306b4 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 23 Oct 2016 09:28:29 -0700 Subject: doc: update docbook annotations for socket and skb The skbuff and sock structure both had missing parameter annotation values. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/net/sock.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 601258f6e621..32810f279f8e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -936,6 +936,7 @@ struct sk_buff_fclones { /** * skb_fclone_busy - check if fclone is busy + * @sk: socket * @skb: buffer * * Returns true if skb is a fast clone, and its clone is not freed. diff --git a/include/net/sock.h b/include/net/sock.h index ebf75db08e06..73c6b008f1b7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -252,6 +252,7 @@ struct sock_common { * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) * @sk_sndbuf: size of send buffer in bytes + * @sk_padding: unused element for alignment * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) @@ -302,7 +303,8 @@ struct sock_common { * @sk_backlog_rcv: callback to process the backlog * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 * @sk_reuseport_cb: reuseport group container - */ + * @sk_rcu: used during RCU grace period + */ struct sock { /* * Now struct inet_timewait_sock also uses sock_common, so please just -- cgit v1.2.3 From 10df8e6152c6c400a563a673e9956320bfce1871 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Oct 2016 18:03:06 -0700 Subject: udp: fix IP_CHECKSUM handling First bug was added in commit ad6f939ab193 ("ip: Add offset parameter to ip_cmsg_recv") : Tom missed that ipv4 udp messages could be received on AF_INET6 socket. ip_cmsg_recv(msg, skb) should have been replaced by ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr)); Then commit e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") forgot to adjust the offsets now UDP headers are pulled before skb are put in receive queue. Fixes: ad6f939ab193 ("ip: Add offset parameter to ip_cmsg_recv") Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Signed-off-by: Eric Dumazet Cc: Sam Kumar Cc: Willem de Bruijn Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/ip.h | 4 ++-- net/ipv4/ip_sockglue.c | 11 ++++++----- net/ipv4/udp.c | 2 +- net/ipv6/udp.c | 3 ++- 4 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index c9d07988911e..5413883ac47f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -578,7 +578,7 @@ int ip_options_rcv_srr(struct sk_buff *skb); */ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); -void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int offset); +void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, @@ -600,7 +600,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { - ip_cmsg_recv_offset(msg, skb, 0); + ip_cmsg_recv_offset(msg, skb, 0, 0); } bool icmp_global_allow(void); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index af4919792b6a..b8a2d63d1fb8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -98,7 +98,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) } static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, - int offset) + int tlen, int offset) { __wsum csum = skb->csum; @@ -106,8 +106,9 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, return; if (offset != 0) - csum = csum_sub(csum, csum_partial(skb_transport_header(skb), - offset, 0)); + csum = csum_sub(csum, + csum_partial(skb_transport_header(skb) + tlen, + offset, 0)); put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); } @@ -153,7 +154,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) } void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, - int offset) + int tlen, int offset) { struct inet_sock *inet = inet_sk(skb->sk); unsigned int flags = inet->cmsg_flags; @@ -216,7 +217,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, } if (flags & IP_CMSG_CHECKSUM) - ip_cmsg_recv_checksum(msg, skb, offset); + ip_cmsg_recv_checksum(msg, skb, tlen, offset); } EXPORT_SYMBOL(ip_cmsg_recv_offset); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 311613e413cb..d123d68f4d1d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1322,7 +1322,7 @@ try_again: *addr_len = sizeof(*sin); } if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr) + off); + ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr), off); err = copied; if (flags & MSG_TRUNC) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9aa7c1c7a9ce..b2ef061e6836 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -427,7 +427,8 @@ try_again: if (is_udp4) { if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); + ip_cmsg_recv_offset(msg, skb, + sizeof(struct udphdr), off); } else { if (np->rxopt.all) ip6_datagram_recv_specific_ctl(sk, msg, skb); -- cgit v1.2.3 From 73c7da3dae1e7cd8febeab13767b2698b84dfa15 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Thu, 20 Oct 2016 20:08:22 +0100 Subject: cfg80211: add generic helper to check interface is running Add a helper using wdev to check if interface is running. This deals with both non-netdev and netdev interfaces. In struct wireless_dev replace 'p2p_started' and 'nan_started' by 'is_running' as those are mutually exclusive anyway, and unify all the code to use wdev_running(). Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 13 ++++++++++--- net/wireless/core.c | 8 ++++---- net/wireless/nl80211.c | 53 +++++++++++++++----------------------------------- 3 files changed, 30 insertions(+), 44 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2bbbcc3eecac..ec39f891b7a3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3781,8 +3781,8 @@ struct cfg80211_cached_keys; * @beacon_interval: beacon interval used on this device for transmitting * beacons, 0 when not valid * @address: The address for this device, valid only if @netdev is %NULL - * @p2p_started: true if this is a P2P Device that has been started - * @nan_started: true if this is a NAN interface that has been started + * @is_running: true if this is a non-netdev device that has been started, e.g. + * the P2P Device. * @cac_started: true if DFS channel availability check has been started * @cac_start_time: timestamp (jiffies) when the dfs state was entered. * @cac_time_ms: CAC time in ms @@ -3814,7 +3814,7 @@ struct wireless_dev { struct mutex mtx; - bool use_4addr, p2p_started, nan_started; + bool use_4addr, is_running; u8 address[ETH_ALEN] __aligned(sizeof(u16)); @@ -3871,6 +3871,13 @@ static inline u8 *wdev_address(struct wireless_dev *wdev) return wdev->address; } +static inline bool wdev_running(struct wireless_dev *wdev) +{ + if (wdev->netdev) + return netif_running(wdev->netdev); + return wdev->is_running; +} + /** * wdev_priv - return wiphy priv from wireless_dev * diff --git a/net/wireless/core.c b/net/wireless/core.c index f433f15f9222..725adff6c7d4 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -210,11 +210,11 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) return; - if (!wdev->p2p_started) + if (!wdev_running(wdev)) return; rdev_stop_p2p_device(rdev, wdev); - wdev->p2p_started = false; + wdev->is_running = false; rdev->opencount--; @@ -233,11 +233,11 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN)) return; - if (!wdev->nan_started) + if (!wdev_running(wdev)) return; rdev_stop_nan(rdev, wdev); - wdev->nan_started = false; + wdev->is_running = false; rdev->opencount--; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fcf5b4f3f555..46cd48993ce9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10528,7 +10528,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE) return -EOPNOTSUPP; - if (wdev->p2p_started) + if (wdev_running(wdev)) return 0; if (rfkill_blocked(rdev->rfkill)) @@ -10538,7 +10538,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (err) return err; - wdev->p2p_started = true; + wdev->is_running = true; rdev->opencount++; return 0; @@ -10570,7 +10570,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; - if (wdev->nan_started) + if (!wdev_running(wdev)) return -EEXIST; if (rfkill_blocked(rdev->rfkill)) @@ -10593,7 +10593,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) if (err) return err; - wdev->nan_started = true; + wdev->is_running = true; rdev->opencount++; return 0; @@ -10678,7 +10678,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb, if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; - if (!wdev->nan_started) + if (!wdev_running(wdev)) return -ENOTCONN; if (!info->attrs[NL80211_ATTR_NAN_FUNC]) @@ -10915,7 +10915,7 @@ static int nl80211_nan_del_func(struct sk_buff *skb, if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; - if (!wdev->nan_started) + if (!wdev_running(wdev)) return -ENOTCONN; if (!info->attrs[NL80211_ATTR_COOKIE]) @@ -10943,7 +10943,7 @@ static int nl80211_nan_change_config(struct sk_buff *skb, if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; - if (!wdev->nan_started) + if (!wdev_running(wdev)) return -ENOTCONN; if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) { @@ -11255,11 +11255,7 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { - if (wdev->netdev && - !netif_running(wdev->netdev)) - return -ENETDOWN; - if (!wdev->netdev && !wdev->p2p_started && - !wdev->nan_started) + if (!wdev_running(wdev)) return -ENETDOWN; } @@ -11422,10 +11418,7 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, return -EINVAL; if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { - if (wdev->netdev && - !netif_running(wdev->netdev)) - return -ENETDOWN; - if (!wdev->netdev && !wdev->p2p_started) + if (!wdev_running(wdev)) return -ENETDOWN; } } @@ -11796,29 +11789,15 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, info->user_ptr[1] = wdev; } - if (dev) { - if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && - !netif_running(dev)) { - if (rtnl) - rtnl_unlock(); - return -ENETDOWN; - } + if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && + !wdev_running(wdev)) { + if (rtnl) + rtnl_unlock(); + return -ENETDOWN; + } + if (dev) dev_hold(dev); - } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { - if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE && - !wdev->p2p_started) { - if (rtnl) - rtnl_unlock(); - return -ENETDOWN; - } - if (wdev->iftype == NL80211_IFTYPE_NAN && - !wdev->nan_started) { - if (rtnl) - rtnl_unlock(); - return -ENETDOWN; - } - } info->user_ptr[0] = rdev; } -- cgit v1.2.3 From 4c8dea638c16141adb046fd2e0cab51dfe43650c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Oct 2016 14:25:13 +0200 Subject: cfg80211: validate beacon int as part of iface combinations Remove the pointless checking against interface combinations in the initial basic beacon interval validation, that currently isn't taking into account radar detection or channels properly. Instead, just validate the basic range there, and then delay real checking to the interface combination validation that drivers must do. This means that drivers wanting to use the beacon_int_min_gcd will now have to pass the new_beacon_int when validating the AP/mesh start. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 +++----- net/wireless/util.c | 83 +++++++++++++++++++++++++++++++------------------- 2 files changed, 56 insertions(+), 39 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ec39f891b7a3..d1ffbc3a8e55 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -784,19 +784,15 @@ struct cfg80211_csa_settings { * @iftype_num: array with the number of interfaces of each interface * type. The index is the interface type as specified in &enum * nl80211_iftype. - * @beacon_int_gcd: a value specifying GCD of all beaconing interfaces, - * the GCD of a single value is considered the value itself, so for - * a single interface this should be set to that interface's beacon - * interval - * @beacon_int_different: a flag indicating whether or not all beacon - * intervals (of beaconing interfaces) are different or not. + * @new_beacon_int: set this to the beacon interval of a new interface + * that's not operating yet, if such is to be checked as part of + * the verification */ struct iface_combination_params { int num_different_channels; u8 radar_detect; int iftype_num[NUM_NL80211_IFTYPES]; - u32 beacon_int_gcd; - bool beacon_int_different; + u32 new_beacon_int; }; /** diff --git a/net/wireless/util.c b/net/wireless/util.c index 78bf53705466..7681b65c4a3b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "core.h" #include "rdev-ops.h" @@ -1558,47 +1559,53 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, } EXPORT_SYMBOL(ieee80211_chandef_to_operating_class); -int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, - enum nl80211_iftype iftype, u32 beacon_int) +static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int, + u32 *beacon_int_gcd, + bool *beacon_int_different) { struct wireless_dev *wdev; - struct iface_combination_params params = { - .beacon_int_gcd = beacon_int, /* GCD(n) = n */ - }; - if (beacon_int < 10 || beacon_int > 10000) - return -EINVAL; + *beacon_int_gcd = 0; + *beacon_int_different = false; - params.iftype_num[iftype] = 1; - list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + list_for_each_entry(wdev, &wiphy->wdev_list, list) { if (!wdev->beacon_interval) continue; - params.iftype_num[wdev->iftype]++; - } - - list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { - u32 bi_prev = wdev->beacon_interval; - - if (!wdev->beacon_interval) + if (!*beacon_int_gcd) { + *beacon_int_gcd = wdev->beacon_interval; continue; + } - /* slight optimisation - skip identical BIs */ - if (wdev->beacon_interval == beacon_int) + if (wdev->beacon_interval == *beacon_int_gcd) continue; - params.beacon_int_different = true; - - /* Get the GCD */ - while (bi_prev != 0) { - u32 tmp_bi = bi_prev; + *beacon_int_different = true; + *beacon_int_gcd = gcd(*beacon_int_gcd, wdev->beacon_interval); + } - bi_prev = params.beacon_int_gcd % bi_prev; - params.beacon_int_gcd = tmp_bi; - } + if (new_beacon_int && *beacon_int_gcd != new_beacon_int) { + if (*beacon_int_gcd) + *beacon_int_different = true; + *beacon_int_gcd = gcd(*beacon_int_gcd, new_beacon_int); } +} - return cfg80211_check_combinations(&rdev->wiphy, ¶ms); +int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, + enum nl80211_iftype iftype, u32 beacon_int) +{ + /* + * This is just a basic pre-condition check; if interface combinations + * are possible the driver must already be checking those with a call + * to cfg80211_check_combinations(), in which case we'll validate more + * through the cfg80211_calculate_bi_data() call and code in + * cfg80211_iter_combinations(). + */ + + if (beacon_int < 10 || beacon_int > 10000) + return -EINVAL; + + return 0; } int cfg80211_iter_combinations(struct wiphy *wiphy, @@ -1612,6 +1619,21 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, int i, j, iftype; int num_interfaces = 0; u32 used_iftypes = 0; + u32 beacon_int_gcd; + bool beacon_int_different; + + /* + * This is a bit strange, since the iteration used to rely only on + * the data given by the driver, but here it now relies on context, + * in form of the currently operating interfaces. + * This is OK for all current users, and saves us from having to + * push the GCD calculations into all the drivers. + * In the future, this should probably rely more on data that's in + * cfg80211 already - the only thing not would appear to be any new + * interfaces (while being brought up) and channel/radar data. + */ + cfg80211_calculate_bi_data(wiphy, params->new_beacon_int, + &beacon_int_gcd, &beacon_int_different); if (params->radar_detect) { rcu_read_lock(); @@ -1674,12 +1696,11 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, if ((all_iftypes & used_iftypes) != used_iftypes) goto cont; - if (params->beacon_int_gcd) { + if (beacon_int_gcd) { if (c->beacon_int_min_gcd && - params->beacon_int_gcd < c->beacon_int_min_gcd) + beacon_int_gcd < c->beacon_int_min_gcd) goto cont; - if (!c->beacon_int_min_gcd && - params->beacon_int_different) + if (!c->beacon_int_min_gcd && beacon_int_different) goto cont; } -- cgit v1.2.3 From 11b6b5a4ced2f2c76073b97ee08ca0eab8358fde Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 27 Oct 2016 00:41:58 +0300 Subject: cfg80211: Rename SAE_DATA to more generic AUTH_DATA This adds defines and nl80211 extensions to allow FILS Authentication to be implemented similarly to SAE. FILS does not need the special rules for the Authentication transaction number and Status code fields, but it does need to add non-IE fields. The previously used NL80211_ATTR_SAE_DATA can be reused for this to avoid having to duplicate that implementation. Rename that attribute to more generic NL80211_ATTR_AUTH_DATA (with backwards compatibility define for NL80211_SAE_DATA). Also document the special rules related to the Authentication transaction number and Status code fiels. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 +++++++----- include/uapi/linux/nl80211.h | 15 ++++++++++++--- net/mac80211/mlme.c | 12 ++++++------ net/wireless/core.h | 2 +- net/wireless/mlme.c | 6 +++--- net/wireless/nl80211.c | 18 +++++++++--------- 6 files changed, 38 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d1ffbc3a8e55..dffc265a4fd6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1785,9 +1785,11 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie); * @key_len: length of WEP key for shared key authentication * @key_idx: index of WEP key for shared key authentication * @key: WEP key for shared key authentication - * @sae_data: Non-IE data to use with SAE or %NULL. This starts with - * Authentication transaction sequence number field. - * @sae_data_len: Length of sae_data buffer in octets + * @auth_data: Fields and elements in Authentication frames. This contains + * the authentication frame body (non-IE and IE data), excluding the + * Authentication algorithm number, i.e., starting at the Authentication + * transaction sequence number field. + * @auth_data_len: Length of auth_data buffer in octets */ struct cfg80211_auth_request { struct cfg80211_bss *bss; @@ -1796,8 +1798,8 @@ struct cfg80211_auth_request { enum nl80211_auth_type auth_type; const u8 *key; u8 key_len, key_idx; - const u8 *sae_data; - size_t sae_data_len; + const u8 *auth_data; + size_t auth_data_len; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1362d24957b5..18bcf44899aa 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1638,8 +1638,16 @@ enum nl80211_commands { * the connection request from a station. nl80211_connect_failed_reason * enum has different reasons of connection failure. * - * @NL80211_ATTR_SAE_DATA: SAE elements in Authentication frames. This starts - * with the Authentication transaction sequence number field. + * @NL80211_ATTR_AUTH_DATA: Fields and elements in Authentication frames. + * This contains the authentication frame body (non-IE and IE data), + * excluding the Authentication algorithm number, i.e., starting at the + * Authentication transaction sequence number field. It is used with + * authentication algorithms that need special fields to be added into + * the frames (SAE and FILS). Currently, only the SAE cases use the + * initial two fields (Authentication transaction sequence number and + * Status code). However, those fields are included in the attribute data + * for all authentication algorithms to keep the attribute definition + * consistent. * * @NL80211_ATTR_VHT_CAPABILITY: VHT Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) @@ -2195,7 +2203,7 @@ enum nl80211_attrs { NL80211_ATTR_CONN_FAILED_REASON, - NL80211_ATTR_SAE_DATA, + NL80211_ATTR_AUTH_DATA, NL80211_ATTR_VHT_CAPABILITY, @@ -2347,6 +2355,7 @@ enum nl80211_attrs { #define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION #define NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG #define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER +#define NL80211_ATTR_SAE_DATA NL80211_ATTR_AUTH_DATA /* * Allow user space programs to use #ifdef on new attributes by defining them diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c8d3a9b02fb6..32fd29581d4d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4483,20 +4483,20 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, return -EOPNOTSUPP; } - auth_data = kzalloc(sizeof(*auth_data) + req->sae_data_len + + auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len + req->ie_len, GFP_KERNEL); if (!auth_data) return -ENOMEM; auth_data->bss = req->bss; - if (req->sae_data_len >= 4) { - __le16 *pos = (__le16 *) req->sae_data; + if (req->auth_data_len >= 4) { + __le16 *pos = (__le16 *) req->auth_data; auth_data->sae_trans = le16_to_cpu(pos[0]); auth_data->sae_status = le16_to_cpu(pos[1]); - memcpy(auth_data->data, req->sae_data + 4, - req->sae_data_len - 4); - auth_data->data_len += req->sae_data_len - 4; + memcpy(auth_data->data, req->auth_data + 4, + req->auth_data_len - 4); + auth_data->data_len += req->auth_data_len - 4; } if (req->ie && req->ie_len) { diff --git a/net/wireless/core.h b/net/wireless/core.h index 21e31888cfa9..fb2fcd5581fe 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -345,7 +345,7 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len); + const u8 *auth_data, int auth_data_len); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index cbb48e26a871..bd1f7a159d6a 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -204,14 +204,14 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len) + const u8 *auth_data, int auth_data_len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_auth_request req = { .ie = ie, .ie_len = ie_len, - .sae_data = sae_data, - .sae_data_len = sae_data_len, + .auth_data = auth_data, + .auth_data_len = auth_data_len, .auth_type = auth_type, .key = key, .key_len = key_len, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7e746c90156e..704851142eed 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -357,7 +357,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 }, [NL80211_ATTR_WDEV] = { .type = NLA_U64 }, [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 }, - [NL80211_ATTR_SAE_DATA] = { .type = NLA_BINARY, }, + [NL80211_ATTR_AUTH_DATA] = { .type = NLA_BINARY, }, [NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN }, [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, @@ -7729,8 +7729,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct ieee80211_channel *chan; - const u8 *bssid, *ssid, *ie = NULL, *sae_data = NULL; - int err, ssid_len, ie_len = 0, sae_data_len = 0; + const u8 *bssid, *ssid, *ie = NULL, *auth_data = NULL; + int err, ssid_len, ie_len = 0, auth_data_len = 0; enum nl80211_auth_type auth_type; struct key_parse key; bool local_state_change; @@ -7811,16 +7811,16 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (auth_type == NL80211_AUTHTYPE_SAE && - !info->attrs[NL80211_ATTR_SAE_DATA]) + !info->attrs[NL80211_ATTR_AUTH_DATA]) return -EINVAL; - if (info->attrs[NL80211_ATTR_SAE_DATA]) { + if (info->attrs[NL80211_ATTR_AUTH_DATA]) { if (auth_type != NL80211_AUTHTYPE_SAE) return -EINVAL; - sae_data = nla_data(info->attrs[NL80211_ATTR_SAE_DATA]); - sae_data_len = nla_len(info->attrs[NL80211_ATTR_SAE_DATA]); + auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]); + auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]); /* need to include at least Auth Transaction and Status Code */ - if (sae_data_len < 4) + if (auth_data_len < 4) return -EINVAL; } @@ -7837,7 +7837,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, ssid, ssid_len, ie, ie_len, key.p.key, key.p.key_len, key.idx, - sae_data, sae_data_len); + auth_data, auth_data_len); wdev_unlock(dev->ieee80211_ptr); return err; } -- cgit v1.2.3 From 3f817fe718c6cb3ddcc2ab04ba86faecc20ef8fe Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 27 Oct 2016 00:42:01 +0300 Subject: cfg80211: Define IEEE P802.11ai (FILS) information elements Define the Element IDs and Element ID Extensions from IEEE P802.11ai/D11.0. In addition, add a new cfg80211_find_ext_ie() wrapper to make it easier to find information elements that used the Element ID Extension field. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 20 ++++++++++++++++++++ include/net/cfg80211.h | 21 +++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'include/net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a80516fd65c8..d428adf51446 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1960,6 +1960,26 @@ enum ieee80211_eid { WLAN_EID_VENDOR_SPECIFIC = 221, WLAN_EID_QOS_PARAMETER = 222, + WLAN_EID_CAG_NUMBER = 237, + WLAN_EID_AP_CSN = 239, + WLAN_EID_FILS_INDICATION = 240, + WLAN_EID_DILS = 241, + WLAN_EID_FRAGMENT = 242, + WLAN_EID_EXTENSION = 255 +}; + +/* Element ID Extensions for Element ID 255 */ +enum ieee80211_eid_ext { + WLAN_EID_EXT_ASSOC_DELAY_INFO = 1, + WLAN_EID_EXT_FILS_REQ_PARAMS = 2, + WLAN_EID_EXT_FILS_KEY_CONFIRM = 3, + WLAN_EID_EXT_FILS_SESSION = 4, + WLAN_EID_EXT_FILS_HLP_CONTAINER = 5, + WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN = 6, + WLAN_EID_EXT_KEY_DELIVERY = 7, + WLAN_EID_EXT_FILS_WRAPPED_DATA = 8, + WLAN_EID_EXT_FILS_PUBLIC_KEY = 12, + WLAN_EID_EXT_FILS_NONCE = 13, }; /* Action category code */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index dffc265a4fd6..8ca2e9f354f7 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4180,6 +4180,27 @@ static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) return cfg80211_find_ie_match(eid, ies, len, NULL, 0, 0); } +/** + * cfg80211_find_ext_ie - find information element with EID Extension in data + * + * @ext_eid: element ID Extension + * @ies: data consisting of IEs + * @len: length of data + * + * Return: %NULL if the extended element ID could not be found or if + * the element is invalid (claims to be longer than the given + * data), or a pointer to the first byte of the requested + * element, that is the byte containing the element ID. + * + * Note: There are no checks on the element length other than + * having to fit into the given data. + */ +static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len) +{ + return cfg80211_find_ie_match(WLAN_EID_EXTENSION, ies, len, + &ext_eid, 1, 2); +} + /** * cfg80211_find_vendor_ie - find vendor specific information element in data * -- cgit v1.2.3 From 348bd456699801920a309c66e382380809fbdf41 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 27 Oct 2016 00:42:03 +0300 Subject: cfg80211: Add KEK/nonces for FILS association frames The new nl80211 attributes can be used to provide KEK and nonces to allow the driver to encrypt and decrypt FILS (Re)Association Request/Response frames in station mode. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ include/net/cfg80211.h | 9 +++++++++ include/uapi/linux/nl80211.h | 8 ++++++++ net/wireless/nl80211.c | 12 ++++++++++++ 4 files changed, 32 insertions(+) (limited to 'include/net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 793a0174ba29..fe849329511a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2096,6 +2096,9 @@ enum ieee80211_key_len { #define IEEE80211_GCMP_MIC_LEN 16 #define IEEE80211_GCMP_PN_LEN 6 +#define FILS_NONCE_LEN 16 +#define FILS_MAX_KEK_LEN 64 + /* Public action codes */ enum ieee80211_pub_actioncode { WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8ca2e9f354f7..738b4d8a4666 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1840,6 +1840,12 @@ enum cfg80211_assoc_req_flags { * @ht_capa_mask: The bits of ht_capa which are to be used. * @vht_capa: VHT capability override * @vht_capa_mask: VHT capability mask indicating which fields to use + * @fils_kek: FILS KEK for protecting (Re)Association Request/Response frame or + * %NULL if FILS is not used. + * @fils_kek_len: Length of fils_kek in octets + * @fils_nonces: FILS nonces (part of AAD) for protecting (Re)Association + * Request/Response frame or %NULL if FILS is not used. This field starts + * with 16 octets of STA Nonce followed by 16 octets of AP Nonce. */ struct cfg80211_assoc_request { struct cfg80211_bss *bss; @@ -1851,6 +1857,9 @@ struct cfg80211_assoc_request { struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; struct ieee80211_vht_cap vht_capa, vht_capa_mask; + const u8 *fils_kek; + size_t fils_kek_len; + const u8 *fils_nonces; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4dc21265cd12..a268a009528a 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1944,6 +1944,11 @@ enum nl80211_commands { * attribute. * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute. * See &enum nl80211_nan_match_attributes. + * @NL80211_ATTR_FILS_KEK: KEK for FILS (Re)Association Request/Response frame + * protection. + * @NL80211_ATTR_FILS_NONCES: Nonces (part of AAD) for FILS (Re)Association + * Request/Response frame protection. This attribute contains the 16 octet + * STA Nonce followed by 16 octets of AP Nonce. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2344,6 +2349,9 @@ enum nl80211_attrs { NL80211_ATTR_NAN_FUNC, NL80211_ATTR_NAN_MATCH, + NL80211_ATTR_FILS_KEK, + NL80211_ATTR_FILS_NONCES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ff798620e929..667d5f719c22 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -414,6 +414,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, + [NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY, + .len = FILS_MAX_KEK_LEN }, + [NL80211_ATTR_FILS_NONCES] = { .len = 2 * FILS_NONCE_LEN }, }; /* policy for the key attributes */ @@ -8033,6 +8036,15 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) req.flags |= ASSOC_REQ_USE_RRM; } + if (info->attrs[NL80211_ATTR_FILS_KEK]) { + req.fils_kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]); + req.fils_kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]); + if (!info->attrs[NL80211_ATTR_FILS_NONCES]) + return -EINVAL; + req.fils_nonces = + nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]); + } + err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); if (!err) { wdev_lock(dev->ieee80211_ptr); -- cgit v1.2.3 From ce0ce13a1c89ff8b94b7f8fb32eb4c43e111c82e Mon Sep 17 00:00:00 2001 From: Michael Braun Date: Mon, 10 Oct 2016 19:12:22 +0200 Subject: cfg80211: configure multicast to unicast for AP interfaces Add the ability to configure if an AP (and associated VLANs) will do multicast-to-unicast conversion for ARP, IPv4 and IPv6 frames (possibly within 802.1Q). If enabled, such frames are to be sent to each station separately, with the DA replaced by their own MAC address rather than the group address. Note that this may break certain expectations of the receiver, such as the ability to drop unicast IP packets received within multicast L2 frames, or the ability to not send ICMP destination unreachable messages for packets received in L2 multicast (which is required, but the receiver can't tell the difference if this new option is enabled.) This also doesn't implement the 802.11 DMS (directed multicast service). Signed-off-by: Michael Braun [fix disabling, add better documentation & commit message] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++++ include/uapi/linux/nl80211.h | 21 +++++++++++++++++++++ net/wireless/nl80211.c | 35 +++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 12 ++++++++++++ net/wireless/trace.h | 19 +++++++++++++++++++ 5 files changed, 93 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 738b4d8a4666..41ae3f500957 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2742,6 +2742,8 @@ struct cfg80211_nan_func { * @nan_change_conf: changes NAN configuration. The changed parameters must * be specified in @changes (using &enum cfg80211_nan_conf_changes); * All other parameters must be ignored. + * + * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -3018,6 +3020,10 @@ struct cfg80211_ops { struct wireless_dev *wdev, struct cfg80211_nan_conf *conf, u32 changes); + + int (*set_multicast_to_unicast)(struct wiphy *wiphy, + struct net_device *dev, + const bool enabled); }; /* diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index a268a009528a..e21d23dcb588 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -600,6 +600,20 @@ * * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface. * + * @NL80211_CMD_SET_MULTICAST_TO_UNICAST: Configure if this AP should perform + * multicast to unicast conversion. When enabled, all multicast packets + * with ethertype ARP, IPv4 or IPv6 (possibly within an 802.1Q header) + * will be sent out to each station once with the destination (multicast) + * MAC address replaced by the station's MAC address. Note that this may + * break certain expectations of the receiver, e.g. the ability to drop + * unicast IP packets encapsulated in multicast L2 frames, or the ability + * to not send destination unreachable messages in such cases. + * This can only be toggled per BSS. Configure this on an interface of + * type %NL80211_IFTYPE_AP. It applies to all its VLAN interfaces + * (%NL80211_IFTYPE_AP_VLAN), except for those in 4addr (WDS) mode. + * If %NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED is not present with this + * command, the feature is disabled. + * * @NL80211_CMD_JOIN_MESH: Join a mesh. The mesh ID must be given, and initial * mesh config parameters may be given. * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the @@ -1069,6 +1083,8 @@ enum nl80211_commands { NL80211_CMD_CHANGE_NAN_CONFIG, NL80211_CMD_NAN_MATCH, + NL80211_CMD_SET_MULTICAST_TO_UNICAST, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1950,6 +1966,9 @@ enum nl80211_commands { * Request/Response frame protection. This attribute contains the 16 octet * STA Nonce followed by 16 octets of AP Nonce. * + * @NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED: Indicates whether or not multicast + * packets should be send out as unicast to all stations (flag attribute). + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2352,6 +2371,8 @@ enum nl80211_attrs { NL80211_ATTR_FILS_KEK, NL80211_ATTR_FILS_NONCES, + NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 667d5f719c22..0c9d00c9cef1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -417,6 +417,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY, .len = FILS_MAX_KEK_LEN }, [NL80211_ATTR_FILS_NONCES] = { .len = 2 * FILS_NONCE_LEN }, + [NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED] = { .type = NLA_FLAG, }, }; /* policy for the key attributes */ @@ -1659,6 +1660,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (rdev->wiphy.features & NL80211_FEATURE_SUPPORTS_WMM_ADMISSION) CMD(add_tx_ts, ADD_TX_TS); + CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST); } #undef CMD @@ -11766,6 +11768,31 @@ static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb, return 0; } +static int nl80211_set_multicast_to_unicast(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + const struct nlattr *nla; + bool enabled; + + if (netif_running(dev)) + return -EBUSY; + + if (!rdev->ops->set_multicast_to_unicast) + return -EOPNOTSUPP; + + if (wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + nla = info->attrs[NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED]; + enabled = nla_get_flag(nla); + + return rdev_set_multicast_to_unicast(rdev, dev, enabled); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -12625,6 +12652,14 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST, + .doit = nl80211_set_multicast_to_unicast, + .policy = nl80211_policy, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, }; /* notification functions */ diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 11cf83c8ad4f..e9ecf23e7942 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -562,6 +562,18 @@ static inline int rdev_set_wds_peer(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_multicast_to_unicast(struct cfg80211_registered_device *rdev, + struct net_device *dev, + const bool enabled) +{ + int ret; + trace_rdev_set_multicast_to_unicast(&rdev->wiphy, dev, enabled); + ret = rdev->ops->set_multicast_to_unicast(&rdev->wiphy, dev, enabled); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline void rdev_rfkill_poll(struct cfg80211_registered_device *rdev) { trace_rdev_rfkill_poll(&rdev->wiphy); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index a3d0a91b1e09..25f8318b3e0b 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3030,6 +3030,25 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), TP_ARGS(wiphy, wdev) ); + +TRACE_EVENT(rdev_set_multicast_to_unicast, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + const bool enabled), + TP_ARGS(wiphy, netdev, enabled), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(bool, enabled) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->enabled = enabled; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", unicast: %s", + WIPHY_PR_ARG, NETDEV_PR_ARG, + BOOL_TO_STR(__entry->enabled)) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 088e8df82f91a24728d49d9532cab7ebdee5117f Mon Sep 17 00:00:00 2001 From: vamsi krishna Date: Thu, 27 Oct 2016 16:51:11 +0300 Subject: cfg80211: Add support to update connection parameters Add functionality to update the connection parameters when in connected state, so that driver/firmware uses the updated parameters for subsequent roaming. This is for drivers that support internal BSS selection and roaming. The new command does not change the current association state, i.e., it can be used to update IE contents for future (re)associations without causing an immediate disassociation or reassociation with the current BSS. This commit implements the required functionality for updating IEs for (Re)Association Request frame only. Other parameters can be added in future when required. Signed-off-by: vamsi krishna Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 24 ++++++++++++++++++++++++ include/uapi/linux/nl80211.h | 8 ++++++++ net/wireless/nl80211.c | 40 ++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 12 ++++++++++++ net/wireless/trace.h | 18 ++++++++++++++++++ 5 files changed, 102 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 41ae3f500957..c575583b50fb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2050,6 +2050,18 @@ struct cfg80211_connect_params { const u8 *prev_bssid; }; +/** + * enum cfg80211_connect_params_changed - Connection parameters being updated + * + * This enum provides information of all connect parameters that + * have to be updated as part of update_connect_params() call. + * + * @UPDATE_ASSOC_IES: Indicates whether association request IEs are updated + */ +enum cfg80211_connect_params_changed { + UPDATE_ASSOC_IES = BIT(0), +}; + /** * enum wiphy_params_flags - set_wiphy_params bitfield values * @WIPHY_PARAM_RETRY_SHORT: wiphy->retry_short has changed @@ -2571,6 +2583,14 @@ struct cfg80211_nan_func { * cases, the result of roaming is indicated with a call to * cfg80211_roamed() or cfg80211_roamed_bss(). * (invoked with the wireless_dev mutex held) + * @update_connect_params: Update the connect parameters while connected to a + * BSS. The updated parameters can be used by driver/firmware for + * subsequent BSS selection (roaming) decisions and to form the + * Authentication/(Re)Association Request frames. This call does not + * request an immediate disassociation or reassociation with the current + * BSS, i.e., this impacts only subsequent (re)associations. The bits in + * changed are defined in &enum cfg80211_connect_params_changed. + * (invoked with the wireless_dev mutex held) * @disconnect: Disconnect from the BSS/ESS or stop connection attempts if * connection is in progress. Once done, call cfg80211_disconnected() in * case connection was already established (invoked with the @@ -2858,6 +2878,10 @@ struct cfg80211_ops { int (*connect)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_connect_params *sme); + int (*update_connect_params)(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_connect_params *sme, + u32 changed); int (*disconnect)(struct wiphy *wiphy, struct net_device *dev, u16 reason_code); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e21d23dcb588..259c9c77fdc1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -888,6 +888,12 @@ * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and * %NL80211_ATTR_COOKIE. * + * @NL80211_CMD_UPDATE_CONNECT_PARAMS: Update one or more connect parameters + * for subsequent roaming cases if the driver or firmware uses internal + * BSS selection. This command can be issued only while connected and it + * does not result in a change for the current association. Currently, + * only the %NL80211_ATTR_IE data is used and updated with this command. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1085,6 +1091,8 @@ enum nl80211_commands { NL80211_CMD_SET_MULTICAST_TO_UNICAST, + NL80211_CMD_UPDATE_CONNECT_PARAMS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0c9d00c9cef1..bb30fa1969f9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1661,6 +1661,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, NL80211_FEATURE_SUPPORTS_WMM_ADMISSION) CMD(add_tx_ts, ADD_TX_TS); CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST); + CMD(update_connect_params, UPDATE_CONNECT_PARAMS); } #undef CMD @@ -8779,6 +8780,37 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_update_connect_params(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_connect_params connect = {}; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + u32 changed = 0; + int ret; + + if (!rdev->ops->update_connect_params) + return -EOPNOTSUPP; + + if (info->attrs[NL80211_ATTR_IE]) { + if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) + return -EINVAL; + connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + changed |= UPDATE_ASSOC_IES; + } + + wdev_lock(dev->ieee80211_ptr); + if (!wdev->current_bss) + ret = -ENOLINK; + else + ret = rdev_update_connect_params(rdev, dev, &connect, changed); + wdev_unlock(dev->ieee80211_ptr); + + return ret; +} + static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -12231,6 +12263,14 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, + .doit = nl80211_update_connect_params, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, { .cmd = NL80211_CMD_DISCONNECT, .doit = nl80211_disconnect, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index e9ecf23e7942..2f425075ada8 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -490,6 +490,18 @@ static inline int rdev_connect(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_update_connect_params(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_connect_params *sme, u32 changed) +{ + int ret; + trace_rdev_update_connect_params(&rdev->wiphy, dev, sme, changed); + ret = rdev->ops->update_connect_params(&rdev->wiphy, dev, sme, changed); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason_code) { diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 25f8318b3e0b..ea1b47e04fa4 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1281,6 +1281,24 @@ TRACE_EVENT(rdev_connect, __entry->wpa_versions, __entry->flags, MAC_PR_ARG(prev_bssid)) ); +TRACE_EVENT(rdev_update_connect_params, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_connect_params *sme, u32 changed), + TP_ARGS(wiphy, netdev, sme, changed), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u32, changed) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->changed = changed; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", parameters changed: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->changed) +); + TRACE_EVENT(rdev_set_cqm_rssi_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, s32 rssi_thold, -- cgit v1.2.3 From 61f9e2924f4981d626b3a931fed935f2fa3cb4de Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 22 Oct 2016 18:51:25 +0800 Subject: netfilter: nf_tables: fix *leak* when expr clone fail When nft_expr_clone failed, a series of problems will happen: 1. module refcnt will leak, we call __module_get at the beginning but we forget to put it back if ops->clone returns fail 2. memory will be leaked, if clone fail, we just return NULL and forget to free the alloced element 3. set->nelems will become incorrect when set->size is specified. If clone fail, we should decrease the set->nelems Now this patch fixes these problems. And fortunately, clone fail will only happen on counter expression when memory is exhausted. Fixes: 086f332167d6 ("netfilter: nf_tables: add clone interface to expression operations") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++++-- net/netfilter/nf_tables_api.c | 11 ++++++----- net/netfilter/nft_dynset.c | 16 ++++++++++------ net/netfilter/nft_set_hash.c | 4 ++-- net/netfilter/nft_set_rbtree.c | 2 +- 5 files changed, 23 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5031e072567b..741dcded5b4f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -542,7 +542,8 @@ void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *data, u64 timeout, gfp_t gfp); -void nft_set_elem_destroy(const struct nft_set *set, void *elem); +void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr); /** * struct nft_set_gc_batch_head - nf_tables set garbage collection batch @@ -693,7 +694,6 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) { int err; - __module_get(src->ops->type->owner); if (src->ops->clone) { dst->ops = src->ops; err = src->ops->clone(dst, src); @@ -702,6 +702,8 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) } else { memcpy(dst, src, src->ops->size); } + + __module_get(src->ops->type->owner); return 0; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 24db22257586..86e48aeb20be 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3452,14 +3452,15 @@ void *nft_set_elem_init(const struct nft_set *set, return elem; } -void nft_set_elem_destroy(const struct nft_set *set, void *elem) +void nft_set_elem_destroy(const struct nft_set *set, void *elem, + bool destroy_expr) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_uninit(nft_set_ext_data(ext), set->dtype); - if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); kfree(elem); @@ -3812,7 +3813,7 @@ void nft_set_gc_batch_release(struct rcu_head *rcu) gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); for (i = 0; i < gcb->head.cnt; i++) - nft_set_elem_destroy(gcb->head.set, gcb->elems[i]); + nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true); kfree(gcb); } EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); @@ -4030,7 +4031,7 @@ static void nf_tables_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv); + nft_trans_elem(trans).priv, true); break; } kfree(trans); @@ -4171,7 +4172,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv); + nft_trans_elem(trans).priv, true); break; } kfree(trans); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index bfdb689664b0..31ca94793aa9 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -44,18 +44,22 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, ®s->data[priv->sreg_key], ®s->data[priv->sreg_data], timeout, GFP_ATOMIC); - if (elem == NULL) { - if (set->size) - atomic_dec(&set->nelems); - return NULL; - } + if (elem == NULL) + goto err1; ext = nft_set_elem_ext(set, elem); if (priv->expr != NULL && nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0) - return NULL; + goto err2; return elem; + +err2: + nft_set_elem_destroy(set, elem, false); +err1: + if (set->size) + atomic_dec(&set->nelems); + return NULL; } static void nft_dynset_eval(const struct nft_expr *expr, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 3794cb2fc788..88d9fc8343e7 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -120,7 +120,7 @@ out: return true; err2: - nft_set_elem_destroy(set, he); + nft_set_elem_destroy(set, he, true); err1: return false; } @@ -332,7 +332,7 @@ static int nft_hash_init(const struct nft_set *set, static void nft_hash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy((const struct nft_set *)arg, ptr); + nft_set_elem_destroy((const struct nft_set *)arg, ptr, true); } static void nft_hash_destroy(const struct nft_set *set) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 38b5bda242f8..36493a7cae88 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -266,7 +266,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_set_elem_destroy(set, rbe); + nft_set_elem_destroy(set, rbe, true); } } -- cgit v1.2.3 From f1d505bb762e30bf316ff5d3b604914649d6aed3 Mon Sep 17 00:00:00 2001 From: John W. Linville Date: Tue, 25 Oct 2016 15:56:39 -0400 Subject: netfilter: nf_tables: fix type mismatch with error return from nft_parse_u32_check Commit 36b701fae12ac ("netfilter: nf_tables: validate maximum value of u32 netlink attributes") introduced nft_parse_u32_check with a return value of "unsigned int", yet on error it returns "-ERANGE". This patch corrects the mismatch by changing the return value to "int", which happens to match the actual users of nft_parse_u32_check already. Found by Coverity, CID 1373930. Note that commit 21a9e0f1568ea ("netfilter: nft_exthdr: fix error handling in nft_exthdr_init()) attempted to address the issue, but did not address the return type of nft_parse_u32_check. Signed-off-by: John W. Linville Cc: Laura Garcia Liebana Cc: Pablo Neira Ayuso Cc: Dan Carpenter Fixes: 36b701fae12ac ("netfilter: nf_tables: validate maximum value...") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 741dcded5b4f..d79d1e9b9546 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -145,7 +145,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } -unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); +int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 86e48aeb20be..365d31b86816 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4422,7 +4422,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, * Otherwise a 0 is returned and the attribute value is stored in the * destination variable. */ -unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) +int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) { u32 val; -- cgit v1.2.3 From cdb436d181d21af4d273b49ec7734eecd6a37fe9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Oct 2016 23:46:17 +0200 Subject: netfilter: conntrack: avoid excess memory allocation This is now a fixed-size extension, so we don't need to pass a variable alloc size. This (harmless) error results in allocating 32 instead of the needed 16 bytes for this extension as the size gets passed twice. Fixes: 23014011ba420 ("netfilter: conntrack: support a fixed size of 128 distinct labels") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 498814626e28..1723a67c0b0a 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -30,8 +30,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) if (net->ct.labels_used == 0) return NULL; - return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, - sizeof(struct nf_conn_labels), GFP_ATOMIC); + return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC); #else return NULL; #endif -- cgit v1.2.3 From 4fe77d82ef80c77031c9c6f8554cd0dee2aa423a Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Mon, 24 Oct 2016 20:32:57 +0800 Subject: skbedit: allow the user to specify bitmask for mark The user may want to use only some bits of the skb mark in his skbedit rules because the remaining part might be used by something else. Introduce the "mask" parameter to the skbedit actor in order to implement such functionality. When the mask is specified, only those bits selected by the latter are altered really changed by the actor, while the rest is left untouched. Signed-off-by: Antonio Quartulli Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_skbedit.h | 1 + include/uapi/linux/tc_act/tc_skbedit.h | 2 ++ net/sched/act_skbedit.c | 21 ++++++++++++++++++--- 3 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index 5767e9dbcf92..19cd3d345804 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -27,6 +27,7 @@ struct tcf_skbedit { u32 flags; u32 priority; u32 mark; + u32 mask; u16 queue_mapping; u16 ptype; }; diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index a4d00c608d8f..2884425738ce 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -28,6 +28,7 @@ #define SKBEDIT_F_QUEUE_MAPPING 0x2 #define SKBEDIT_F_MARK 0x4 #define SKBEDIT_F_PTYPE 0x8 +#define SKBEDIT_F_MASK 0x10 struct tc_skbedit { tc_gen; @@ -42,6 +43,7 @@ enum { TCA_SKBEDIT_MARK, TCA_SKBEDIT_PAD, TCA_SKBEDIT_PTYPE, + TCA_SKBEDIT_MASK, __TCA_SKBEDIT_MAX }; #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index a133dcb82132..024f3a3afeff 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -46,8 +46,10 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, if (d->flags & SKBEDIT_F_QUEUE_MAPPING && skb->dev->real_num_tx_queues > d->queue_mapping) skb_set_queue_mapping(skb, d->queue_mapping); - if (d->flags & SKBEDIT_F_MARK) - skb->mark = d->mark; + if (d->flags & SKBEDIT_F_MARK) { + skb->mark &= ~d->mask; + skb->mark |= d->mark & d->mask; + } if (d->flags & SKBEDIT_F_PTYPE) skb->pkt_type = d->ptype; @@ -61,6 +63,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, [TCA_SKBEDIT_MARK] = { .len = sizeof(u32) }, [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) }, + [TCA_SKBEDIT_MASK] = { .len = sizeof(u32) }, }; static int tcf_skbedit_init(struct net *net, struct nlattr *nla, @@ -71,7 +74,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; struct tc_skbedit *parm; struct tcf_skbedit *d; - u32 flags = 0, *priority = NULL, *mark = NULL; + u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL; u16 *queue_mapping = NULL, *ptype = NULL; bool exists = false; int ret = 0, err; @@ -108,6 +111,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, mark = nla_data(tb[TCA_SKBEDIT_MARK]); } + if (tb[TCA_SKBEDIT_MASK] != NULL) { + flags |= SKBEDIT_F_MASK; + mask = nla_data(tb[TCA_SKBEDIT_MASK]); + } + parm = nla_data(tb[TCA_SKBEDIT_PARMS]); exists = tcf_hash_check(tn, parm->index, a, bind); @@ -145,6 +153,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, d->mark = *mark; if (flags & SKBEDIT_F_PTYPE) d->ptype = *ptype; + /* default behaviour is to use all the bits */ + d->mask = 0xffffffff; + if (flags & SKBEDIT_F_MASK) + d->mask = *mask; d->tcf_action = parm->action; @@ -182,6 +194,9 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, if ((d->flags & SKBEDIT_F_PTYPE) && nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype)) goto nla_put_failure; + if ((d->flags & SKBEDIT_F_MASK) && + nla_put_u32(skb, TCA_SKBEDIT_MASK, d->mask)) + goto nla_put_failure; tcf_tm_dump(&t, &d->tcf_tm); if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD)) -- cgit v1.2.3 From c90c39dab3e02ce45427a214746711f33ad13be6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Oct 2016 14:40:01 +0200 Subject: genetlink: introduce and use genl_family_attrbuf() This helper function allows family implementations to access their family's attrbuf. This gets rid of the attrbuf usage in families, and also adds locking validation, since it's not valid to use the attrbuf with parallel_ops or outside of the dumpit callback. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/genetlink.h | 2 ++ net/ieee802154/nl802154.c | 7 ++++--- net/netlink/genetlink.c | 19 ++++++++++++++++++ net/nfc/netlink.c | 9 ++++----- net/tipc/netlink.c | 2 +- net/wireless/nl80211.c | 51 +++++++++++++++++++++++------------------------ 6 files changed, 55 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 8d4608ce8716..ef9defb3f5bc 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -73,6 +73,8 @@ struct genl_family { struct module *module; }; +struct nlattr **genl_family_attrbuf(struct genl_family *family); + /** * struct genl_info - receiving information * @snd_seq: sending sequence number diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index d90a4ed5b8a0..21aabadccd0e 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -263,13 +263,14 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb, if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, - nl802154_fam.attrbuf, nl802154_fam.maxattr, + genl_family_attrbuf(&nl802154_fam), + nl802154_fam.maxattr, nl802154_policy); if (err) goto out_unlock; *wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk), - nl802154_fam.attrbuf); + genl_family_attrbuf(&nl802154_fam)); if (IS_ERR(*wpan_dev)) { err = PTR_ERR(*wpan_dev); goto out_unlock; @@ -575,7 +576,7 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb, struct netlink_callback *cb, struct nl802154_dump_wpan_phy_state *state) { - struct nlattr **tb = nl802154_fam.attrbuf; + struct nlattr **tb = genl_family_attrbuf(&nl802154_fam); int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize, tb, nl802154_fam.maxattr, nl802154_policy); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 23cc12639ba7..01291b7a27bb 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1096,6 +1096,25 @@ problem: subsys_initcall(genl_init); +/** + * genl_family_attrbuf - return family's attrbuf + * @family: the family + * + * Return the family's attrbuf, while validating that it's + * actually valid to access it. + * + * You cannot use this function with a family that has parallel_ops + * and you can only use it within (pre/post) doit/dumpit callbacks. + */ +struct nlattr **genl_family_attrbuf(struct genl_family *family) +{ + if (!WARN_ON(family->parallel_ops)) + lockdep_assert_held(&genl_mutex); + + return family->attrbuf; +} +EXPORT_SYMBOL(genl_family_attrbuf); + static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, gfp_t flags) { diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index ea023b35f1c2..79786bf62b88 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -120,21 +120,20 @@ nla_put_failure: static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb) { + struct nlattr **attrbuf = genl_family_attrbuf(&nfc_genl_family); struct nfc_dev *dev; int rc; u32 idx; rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize, - nfc_genl_family.attrbuf, - nfc_genl_family.maxattr, - nfc_genl_policy); + attrbuf, nfc_genl_family.maxattr, nfc_genl_policy); if (rc < 0) return ERR_PTR(rc); - if (!nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX]) + if (!attrbuf[NFC_ATTR_DEVICE_INDEX]) return ERR_PTR(-EINVAL); - idx = nla_get_u32(nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX]); + idx = nla_get_u32(attrbuf[NFC_ATTR_DEVICE_INDEX]); dev = nfc_get_device(idx); if (!dev) diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 3200059d14b2..4b94f3cfe3af 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -262,7 +262,7 @@ int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) { u32 maxattr = tipc_genl_family.maxattr; - *attr = tipc_genl_family.attrbuf; + *attr = genl_family_attrbuf(&tipc_genl_family); if (!*attr) return -EOPNOTSUPP; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c510810f0b7c..7d8cb3330c86 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -551,13 +551,14 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - nl80211_fam.attrbuf, nl80211_fam.maxattr, - nl80211_policy); + genl_family_attrbuf(&nl80211_fam), + nl80211_fam.maxattr, nl80211_policy); if (err) goto out_unlock; - *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), - nl80211_fam.attrbuf); + *wdev = __cfg80211_wdev_from_attrs( + sock_net(skb->sk), + genl_family_attrbuf(&nl80211_fam)); if (IS_ERR(*wdev)) { err = PTR_ERR(*wdev); goto out_unlock; @@ -1881,7 +1882,7 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, struct netlink_callback *cb, struct nl80211_dump_wiphy_state *state) { - struct nlattr **tb = nl80211_fam.attrbuf; + struct nlattr **tb = genl_family_attrbuf(&nl80211_fam); int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb, nl80211_fam.maxattr, nl80211_policy); /* ignore parse errors for backward compatibility */ @@ -7643,6 +7644,7 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) { + struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); struct survey_info survey; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; @@ -7655,7 +7657,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) return res; /* prepare_wdev_dump parsed the attributes */ - radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; + radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; if (!wdev->netdev) { res = -EINVAL; @@ -8478,14 +8480,14 @@ static int nl80211_testmode_dump(struct sk_buff *skb, */ phy_idx = cb->args[0] - 1; } else { + struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - nl80211_fam.attrbuf, nl80211_fam.maxattr, - nl80211_policy); + attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) goto out_err; - rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), - nl80211_fam.attrbuf); + rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(rdev)) { err = PTR_ERR(rdev); goto out_err; @@ -8493,9 +8495,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb, phy_idx = rdev->wiphy_idx; rdev = NULL; - if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]) - cb->args[1] = - (long)nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]; + if (attrbuf[NL80211_ATTR_TESTDATA]) + cb->args[1] = (long)attrbuf[NL80211_ATTR_TESTDATA]; } if (cb->args[1]) { @@ -11277,6 +11278,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, struct cfg80211_registered_device **rdev, struct wireless_dev **wdev) { + struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); u32 vid, subcmd; unsigned int i; int vcmd_idx = -1; @@ -11312,31 +11314,28 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, } err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - nl80211_fam.attrbuf, nl80211_fam.maxattr, - nl80211_policy); + attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) goto out_unlock; - if (!nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID] || - !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { + if (!attrbuf[NL80211_ATTR_VENDOR_ID] || + !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { err = -EINVAL; goto out_unlock; } - *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), - nl80211_fam.attrbuf); + *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(*wdev)) *wdev = NULL; - *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), - nl80211_fam.attrbuf); + *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(*rdev)) { err = PTR_ERR(*rdev); goto out_unlock; } - vid = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID]); - subcmd = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); + vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]); + subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); for (i = 0; i < (*rdev)->wiphy.n_vendor_commands; i++) { const struct wiphy_vendor_command *vcmd; @@ -11360,9 +11359,9 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, goto out_unlock; } - if (nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]) { - data = nla_data(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]); - data_len = nla_len(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]); + if (attrbuf[NL80211_ATTR_VENDOR_DATA]) { + data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]); + data_len = nla_len(attrbuf[NL80211_ATTR_VENDOR_DATA]); } /* 0 is the first index - add 1 to parse only once */ -- cgit v1.2.3 From a07ea4d9941af5a0c6f0be2a71b51ac9c083c5e5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Oct 2016 14:40:02 +0200 Subject: genetlink: no longer support using static family IDs Static family IDs have never really been used, the only use case was the workaround I introduced for those users that assumed their family ID was also their multicast group ID. Additionally, because static family IDs would never be reserved by the generic netlink code, using a relatively low ID would only work for built-in families that can be registered immediately after generic netlink is started, which is basically only the control family (apart from the workaround code, which I also had to add code for so it would reserve those IDs) Thus, anything other than GENL_ID_GENERATE is flawed and luckily not used except in the cases I mentioned. Move those workarounds into a few lines of code, and then get rid of GENL_ID_GENERATE entirely, making it more robust. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- drivers/acpi/event.c | 1 - drivers/net/gtp.c | 1 - drivers/net/macsec.c | 1 - drivers/net/team/team.c | 1 - drivers/net/wireless/mac80211_hwsim.c | 1 - drivers/scsi/pmcraid.c | 6 ------ drivers/target/target_core_user.c | 1 - drivers/thermal/thermal_core.c | 1 - fs/dlm/netlink.c | 1 - fs/quota/netlink.c | 7 ------- include/linux/genl_magic_func.h | 1 - include/net/genetlink.h | 7 ++----- include/uapi/linux/genetlink.h | 1 - kernel/taskstats.c | 1 - net/batman-adv/netlink.c | 1 - net/core/devlink.c | 1 - net/core/drop_monitor.c | 1 - net/hsr/hsr_netlink.c | 1 - net/ieee802154/netlink.c | 1 - net/ieee802154/nl802154.c | 1 - net/ipv4/fou.c | 1 - net/ipv4/tcp_metrics.c | 1 - net/ipv6/ila/ila_xlat.c | 1 - net/irda/irnetlink.c | 1 - net/l2tp/l2tp_netlink.c | 1 - net/netfilter/ipvs/ip_vs_ctl.c | 1 - net/netlabel/netlabel_calipso.c | 1 - net/netlabel/netlabel_cipso_v4.c | 1 - net/netlabel/netlabel_mgmt.c | 1 - net/netlabel/netlabel_unlabeled.c | 1 - net/netlink/genetlink.c | 37 +++++++++++++++++++++-------------- net/nfc/netlink.c | 1 - net/openvswitch/datapath.c | 4 ---- net/tipc/netlink.c | 1 - net/tipc/netlink_compat.c | 1 - net/wimax/stack.c | 1 - net/wireless/nl80211.c | 1 - 37 files changed, 24 insertions(+), 69 deletions(-) (limited to 'include/net') diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c index e24ea4e796e4..8dfca3d53131 100644 --- a/drivers/acpi/event.c +++ b/drivers/acpi/event.c @@ -83,7 +83,6 @@ static const struct genl_multicast_group acpi_event_mcgrps[] = { }; static struct genl_family acpi_event_genl_family = { - .id = GENL_ID_GENERATE, .name = ACPI_GENL_FAMILY_NAME, .version = ACPI_GENL_VERSION, .maxattr = ACPI_GENL_ATTR_MAX, diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 97e0cbca0a08..f66737ba1299 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1095,7 +1095,6 @@ static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info) } static struct genl_family gtp_genl_family = { - .id = GENL_ID_GENERATE, .name = "gtp", .version = 0, .hdrsize = 0, diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 1a134cb2d52c..a5309b81a786 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1422,7 +1422,6 @@ static void clear_tx_sa(struct macsec_tx_sa *tx_sa) } static struct genl_family macsec_fam = { - .id = GENL_ID_GENERATE, .name = MACSEC_GENL_NAME, .hdrsize = 0, .version = MACSEC_GENL_VERSION, diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index a380649bf6b5..0b50205764ff 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2151,7 +2151,6 @@ static struct rtnl_link_ops team_link_ops __read_mostly = { ***********************************/ static struct genl_family team_nl_family = { - .id = GENL_ID_GENERATE, .name = TEAM_GENL_NAME, .version = TEAM_GENL_VERSION, .maxattr = TEAM_ATTR_MAX, diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index e95b79bccf9b..54b6cd62676e 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -589,7 +589,6 @@ struct hwsim_radiotap_ack_hdr { /* MAC80211_HWSIM netlinf family */ static struct genl_family hwsim_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = "MAC80211_HWSIM", .version = 1, diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index 68a5c347fae9..cc50eb87b28a 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -1369,12 +1369,6 @@ static struct genl_multicast_group pmcraid_mcgrps[] = { }; static struct genl_family pmcraid_event_family = { - /* - * Due to prior multicast group abuse (the code having assumed that - * the family ID can be used as a multicast group ID) we need to - * statically allocate a family (and thus group) ID. - */ - .id = GENL_ID_PMCRAID, .name = "pmcraid", .version = 1, .maxattr = PMCRAID_AEN_ATTR_MAX, diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 62bf4fe5704a..313a0ef3cda7 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -148,7 +148,6 @@ static const struct genl_multicast_group tcmu_mcgrps[] = { /* Our generic netlink family */ static struct genl_family tcmu_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = "TCM-USER", .version = 1, diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 226b0b4aced6..68d7503f6417 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -2164,7 +2164,6 @@ static const struct genl_multicast_group thermal_event_mcgrps[] = { }; static struct genl_family thermal_event_genl_family = { - .id = GENL_ID_GENERATE, .name = THERMAL_GENL_FAMILY_NAME, .version = THERMAL_GENL_VERSION, .maxattr = THERMAL_GENL_ATTR_MAX, diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 1e6e227134d7..00d226956264 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -17,7 +17,6 @@ static uint32_t dlm_nl_seqnum; static uint32_t listener_nlportid; static struct genl_family family = { - .id = GENL_ID_GENERATE, .name = DLM_GENL_NAME, .version = DLM_GENL_VERSION, }; diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 8b252673d454..3965a5cdfaa2 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -13,13 +13,6 @@ static const struct genl_multicast_group quota_mcgrps[] = { /* Netlink family structure for quota */ static struct genl_family quota_genl_family = { - /* - * Needed due to multicast group ID abuse - old code assumed - * the family ID was also a valid multicast group ID (which - * isn't true) and userspace might thus rely on it. Assign a - * static ID for this group to make dealing with that easier. - */ - .id = GENL_ID_VFS_DQUOT, .hdrsize = 0, .name = "VFS_DQUOT", .version = 1, diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 667c31101b8b..7c070c1fe457 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -260,7 +260,6 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = { */ #define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) static struct genl_family ZZZ_genl_family __read_mostly = { - .id = GENL_ID_GENERATE, .name = __stringify(GENL_MAGIC_FAMILY), .version = GENL_MAGIC_VERSION, #ifdef GENL_MAGIC_FAMILY_HDRSZ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ef9defb3f5bc..43a5c3975a2f 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -20,7 +20,7 @@ struct genl_info; /** * struct genl_family - generic netlink family - * @id: protocol family idenfitier + * @id: protocol family identifier (private) * @hdrsize: length of user specific header in bytes * @name: name of family * @version: protocol version @@ -48,7 +48,7 @@ struct genl_info; * @n_ops: number of operations supported by this family (private) */ struct genl_family { - unsigned int id; + unsigned int id; /* private */ unsigned int hdrsize; char name[GENL_NAMSIZ]; unsigned int version; @@ -149,9 +149,6 @@ static inline int genl_register_family(struct genl_family *family) * Registers the specified family and operations from the specified table. * Only one family may be registered with the same family name or identifier. * - * The family id may equal GENL_ID_GENERATE causing an unique id to - * be automatically generated and assigned. - * * Either a doit or dumpit callback must be specified for every registered * operation or the function will fail. Only one operation structure per * command identifier may be registered. diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index 5512c90af7e3..d9b2db4a29c6 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -26,7 +26,6 @@ struct genlmsghdr { /* * List of reserved static generic netlink identifiers: */ -#define GENL_ID_GENERATE 0 #define GENL_ID_CTRL NLMSG_MIN_TYPE #define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) #define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index b3f05ee20d18..d7a1a9461a10 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -42,7 +42,6 @@ static int family_registered; struct kmem_cache *taskstats_cache; static struct genl_family family = { - .id = GENL_ID_GENERATE, .name = TASKSTATS_GENL_NAME, .version = TASKSTATS_GENL_VERSION, .maxattr = TASKSTATS_CMD_ATTR_MAX, diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 64cb6acbe0a6..a03b0ed7e8dd 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -49,7 +49,6 @@ #include "translation-table.h" struct genl_family batadv_netlink_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = BATADV_NL_NAME, .version = 1, diff --git a/net/core/devlink.c b/net/core/devlink.c index d2fd736de6a2..3008d9c33875 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -342,7 +342,6 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, } static struct genl_family devlink_nl_family = { - .id = GENL_ID_GENERATE, .name = DEVLINK_GENL_NAME, .version = DEVLINK_GENL_VERSION, .maxattr = DEVLINK_ATTR_MAX, diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 72cfb0c61125..a5320dfcd978 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -60,7 +60,6 @@ struct dm_hw_stat_delta { }; static struct genl_family net_drop_monitor_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = "NET_DM", .version = 2, diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index d4d1617f43a8..2ad039492bee 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -132,7 +132,6 @@ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = { }; static struct genl_family hsr_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = "HSR", .version = 1, diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index c8133c07ceee..19144158b696 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -29,7 +29,6 @@ static unsigned int ieee802154_seq_num; static DEFINE_SPINLOCK(ieee802154_seq_lock); struct genl_family nl802154_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = IEEE802154_NL_NAME, .version = 1, diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 21aabadccd0e..182299858f1d 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -34,7 +34,6 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, /* the netlink family */ static struct genl_family nl802154_fam = { - .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ .name = NL802154_GENL_NAME, /* have users key off the name instead */ .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index cf50f7e2b012..e3fc527c5d37 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -623,7 +623,6 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg) } static struct genl_family fou_nl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = FOU_GENL_NAME, .version = FOU_GENL_VERSION, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index bf1f3b2b29d1..3da305127b32 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -743,7 +743,6 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, } static struct genl_family tcp_metrics_nl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = TCP_METRICS_GENL_NAME, .version = TCP_METRICS_GENL_VERSION, diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index e604013dd814..0d57e27d1cdd 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -119,7 +119,6 @@ static const struct rhashtable_params rht_params = { }; static struct genl_family ila_nl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = ILA_GENL_NAME, .version = ILA_GENL_VERSION, diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index e15c40e86660..f23b81aa91fe 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -25,7 +25,6 @@ static struct genl_family irda_nl_family = { - .id = GENL_ID_GENERATE, .name = IRDA_NL_NAME, .hdrsize = 0, .version = IRDA_NL_VERSION, diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index bf3117771822..4fbf1f41ac52 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -32,7 +32,6 @@ static struct genl_family l2tp_nl_family = { - .id = GENL_ID_GENERATE, .name = L2TP_GENL_NAME, .version = L2TP_GENL_VERSION, .hdrsize = 0, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c3c809b2e712..ceed66cdd03e 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2841,7 +2841,6 @@ static struct nf_sockopt_ops ip_vs_sockopts = { /* IPVS genetlink family */ static struct genl_family ip_vs_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = IPVS_GENL_NAME, .version = IPVS_GENL_VERSION, diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index 2ec93c5e77bb..152e503b8c5d 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -61,7 +61,6 @@ struct netlbl_domhsh_walk_arg { /* NetLabel Generic NETLINK CALIPSO family */ static struct genl_family netlbl_calipso_gnl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = NETLBL_NLTYPE_CALIPSO_NAME, .version = NETLBL_PROTO_VERSION, diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 7fd1104ba900..755b284e7ad4 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -60,7 +60,6 @@ struct netlbl_domhsh_walk_arg { /* NetLabel Generic NETLINK CIPSOv4 family */ static struct genl_family netlbl_cipsov4_gnl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = NETLBL_NLTYPE_CIPSOV4_NAME, .version = NETLBL_PROTO_VERSION, diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index f85d0e07af2d..3b00f2368fcd 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -61,7 +61,6 @@ struct netlbl_domhsh_walk_arg { /* NetLabel Generic NETLINK CIPSOv4 family */ static struct genl_family netlbl_mgmt_gnl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = NETLBL_NLTYPE_MGMT_NAME, .version = NETLBL_PROTO_VERSION, diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 4528cff9138b..c2ea8d1f653a 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -124,7 +124,6 @@ static u8 netlabel_unlabel_acceptflg; /* NetLabel Generic NETLINK unlabeled family */ static struct genl_family netlbl_unlabel_gnl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = NETLBL_NLTYPE_UNLABELED_NAME, .version = NETLBL_PROTO_VERSION, diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 01291b7a27bb..f19ec969edee 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -349,8 +349,6 @@ static int genl_validate_ops(const struct genl_family *family) * * Registers the specified family after validating it first. Only one * family may be registered with the same family name or identifier. - * The family id may equal GENL_ID_GENERATE causing an unique id to - * be automatically generated and assigned. * * The family's ops array must already be assigned, you can use the * genl_register_family_with_ops() helper function. @@ -359,13 +357,7 @@ static int genl_validate_ops(const struct genl_family *family) */ int __genl_register_family(struct genl_family *family) { - int err = -EINVAL, i; - - if (family->id && family->id < GENL_MIN_ID) - goto errout; - - if (family->id > GENL_MAX_ID) - goto errout; + int err, i; err = genl_validate_ops(family); if (err) @@ -378,8 +370,27 @@ int __genl_register_family(struct genl_family *family) goto errout_locked; } - if (family->id == GENL_ID_GENERATE) { - u16 newid = genl_generate_id(); + if (family == &genl_ctrl) { + family->id = GENL_ID_CTRL; + } else { + u16 newid; + + /* this should be left zero in the struct */ + WARN_ON(family->id); + + /* + * Sadly, a few cases need to be special-cased + * due to them having previously abused the API + * and having used their family ID also as their + * multicast group ID, so we use reserved IDs + * for both to be sure we can do that mapping. + */ + if (strcmp(family->name, "pmcraid") == 0) + newid = GENL_ID_PMCRAID; + else if (strcmp(family->name, "VFS_DQUOT") == 0) + newid = GENL_ID_VFS_DQUOT; + else + newid = genl_generate_id(); if (!newid) { err = -ENOMEM; @@ -387,9 +398,6 @@ int __genl_register_family(struct genl_family *family) } family->id = newid; - } else if (genl_family_find_byid(family->id)) { - err = -EEXIST; - goto errout_locked; } if (family->maxattr && !family->parallel_ops) { @@ -419,7 +427,6 @@ int __genl_register_family(struct genl_family *family) errout_locked: genl_unlock_all(); -errout: return err; } EXPORT_SYMBOL(__genl_register_family); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 79786bf62b88..c230403e066c 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -39,7 +39,6 @@ static const struct genl_multicast_group nfc_genl_mcgrps[] = { }; static struct genl_family nfc_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = NFC_GENL_NAME, .version = NFC_GENL_VERSION, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 194435aa1165..f9fef7dfba15 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -671,7 +671,6 @@ static const struct genl_ops dp_packet_genl_ops[] = { }; static struct genl_family dp_packet_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, @@ -1436,7 +1435,6 @@ static const struct genl_ops dp_flow_genl_ops[] = { }; static struct genl_family dp_flow_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, @@ -1822,7 +1820,6 @@ static const struct genl_ops dp_datapath_genl_ops[] = { }; static struct genl_family dp_datapath_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, @@ -2244,7 +2241,6 @@ static const struct genl_ops dp_vport_genl_ops[] = { }; struct genl_family dp_vport_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 4b94f3cfe3af..383b8fedabc7 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -136,7 +136,6 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { * so we have a separate genl handling for the new API. */ struct genl_family tipc_genl_family = { - .id = GENL_ID_GENERATE, .name = TIPC_GENL_V2_NAME, .version = TIPC_GENL_V2_VERSION, .hdrsize = 0, diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1fd464764765..f04428e4c8e5 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1216,7 +1216,6 @@ send: } static struct genl_family tipc_genl_compat_family = { - .id = GENL_ID_GENERATE, .name = TIPC_GENL_NAME, .version = TIPC_GENL_VERSION, .hdrsize = TIPC_GENL_HDRLEN, diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 3f816e2971ee..8ac83a41585f 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -573,7 +573,6 @@ size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); struct genl_family wimax_gnl_family = { - .id = GENL_ID_GENERATE, .name = "WiMAX", .version = WIMAX_GNL_VERSION, .hdrsize = 0, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7d8cb3330c86..714beafe05e0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -39,7 +39,6 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, /* the netlink family */ static struct genl_family nl80211_fam = { - .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ .name = NL80211_GENL_NAME, /* have users key off the name instead */ .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ -- cgit v1.2.3 From 489111e5c25b93be80340c3113d71903d7c82136 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Oct 2016 14:40:03 +0200 Subject: genetlink: statically initialize families Instead of providing macros/inline functions to initialize the families, make all users initialize them statically and get rid of the macros. This reduces the kernel code size by about 1.6k on x86-64 (with allyesconfig). Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- drivers/acpi/event.c | 1 + drivers/net/gtp.c | 21 +++++++---- drivers/net/macsec.c | 21 +++++++---- drivers/net/team/team.c | 22 +++++++---- drivers/net/wireless/mac80211_hwsim.c | 26 +++++++------ drivers/scsi/pmcraid.c | 1 + drivers/target/target_core_user.c | 1 + drivers/thermal/thermal_core.c | 1 + fs/dlm/netlink.c | 15 +++++--- fs/quota/netlink.c | 1 + include/linux/drbd_genl.h | 2 +- include/linux/genl_magic_func.h | 28 ++++++++------ include/net/genetlink.h | 71 ++++++----------------------------- kernel/taskstats.c | 17 ++++++--- net/batman-adv/netlink.c | 25 +++++++----- net/core/devlink.c | 27 +++++++------ net/core/drop_monitor.c | 20 ++++++---- net/hsr/hsr_netlink.c | 22 +++++++---- net/ieee802154/netlink.c | 23 +++++++----- net/ieee802154/nl802154.c | 34 ++++++++--------- net/ipv4/fou.c | 22 ++++++----- net/ipv4/tcp_metrics.c | 22 ++++++----- net/ipv6/ila/ila_xlat.c | 24 +++++++----- net/irda/irnetlink.c | 19 ++++++---- net/l2tp/l2tp_netlink.c | 25 +++++++----- net/netfilter/ipvs/ip_vs_ctl.c | 22 ++++++----- net/netlabel/netlabel_calipso.c | 20 ++++++---- net/netlabel/netlabel_cipso_v4.c | 21 ++++++----- net/netlabel/netlabel_mgmt.c | 20 ++++++---- net/netlabel/netlabel_unlabeled.c | 20 ++++++---- net/netlink/genetlink.c | 35 +++++++++-------- net/nfc/netlink.c | 24 +++++++----- net/openvswitch/datapath.c | 4 ++ net/tipc/netlink.c | 22 ++++++----- net/tipc/netlink_compat.c | 20 +++++----- net/wimax/stack.c | 19 +++++----- net/wireless/nl80211.c | 33 ++++++++-------- 37 files changed, 414 insertions(+), 337 deletions(-) (limited to 'include/net') diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c index 8dfca3d53131..1ab12ad7d5ba 100644 --- a/drivers/acpi/event.c +++ b/drivers/acpi/event.c @@ -83,6 +83,7 @@ static const struct genl_multicast_group acpi_event_mcgrps[] = { }; static struct genl_family acpi_event_genl_family = { + .module = THIS_MODULE, .name = ACPI_GENL_FAMILY_NAME, .version = ACPI_GENL_VERSION, .maxattr = ACPI_GENL_ATTR_MAX, diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index f66737ba1299..0604fd78f826 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1094,13 +1094,7 @@ static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info) return 0; } -static struct genl_family gtp_genl_family = { - .name = "gtp", - .version = 0, - .hdrsize = 0, - .maxattr = GTPA_MAX, - .netnsok = true, -}; +static struct genl_family gtp_genl_family; static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq, u32 type, struct pdp_ctx *pctx) @@ -1296,6 +1290,17 @@ static const struct genl_ops gtp_genl_ops[] = { }, }; +static struct genl_family gtp_genl_family = { + .name = "gtp", + .version = 0, + .hdrsize = 0, + .maxattr = GTPA_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = gtp_genl_ops, + .n_ops = ARRAY_SIZE(gtp_genl_ops), +}; + static int __net_init gtp_net_init(struct net *net) { struct gtp_net *gn = net_generic(net, gtp_net_id); @@ -1335,7 +1340,7 @@ static int __init gtp_init(void) if (err < 0) goto error_out; - err = genl_register_family_with_ops(>p_genl_family, gtp_genl_ops); + err = genl_register_family(>p_genl_family); if (err < 0) goto unreg_rtnl_link; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index a5309b81a786..63ca7a3c77cf 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1421,13 +1421,7 @@ static void clear_tx_sa(struct macsec_tx_sa *tx_sa) macsec_txsa_put(tx_sa); } -static struct genl_family macsec_fam = { - .name = MACSEC_GENL_NAME, - .hdrsize = 0, - .version = MACSEC_GENL_VERSION, - .maxattr = MACSEC_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family macsec_fam; static struct net_device *get_dev_from_nl(struct net *net, struct nlattr **attrs) @@ -2654,6 +2648,17 @@ static const struct genl_ops macsec_genl_ops[] = { }, }; +static struct genl_family macsec_fam = { + .name = MACSEC_GENL_NAME, + .hdrsize = 0, + .version = MACSEC_GENL_VERSION, + .maxattr = MACSEC_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = macsec_genl_ops, + .n_ops = ARRAY_SIZE(macsec_genl_ops), +}; + static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -3461,7 +3466,7 @@ static int __init macsec_init(void) if (err) goto notifier; - err = genl_register_family_with_ops(&macsec_fam, macsec_genl_ops); + err = genl_register_family(&macsec_fam); if (err) goto rtnl; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 0b50205764ff..46bf7c1216c0 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2150,12 +2150,7 @@ static struct rtnl_link_ops team_link_ops __read_mostly = { * Generic netlink custom interface ***********************************/ -static struct genl_family team_nl_family = { - .name = TEAM_GENL_NAME, - .version = TEAM_GENL_VERSION, - .maxattr = TEAM_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family team_nl_family; static const struct nla_policy team_nl_policy[TEAM_ATTR_MAX + 1] = { [TEAM_ATTR_UNSPEC] = { .type = NLA_UNSPEC, }, @@ -2745,6 +2740,18 @@ static const struct genl_multicast_group team_nl_mcgrps[] = { { .name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME, }, }; +static struct genl_family team_nl_family = { + .name = TEAM_GENL_NAME, + .version = TEAM_GENL_VERSION, + .maxattr = TEAM_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = team_nl_ops, + .n_ops = ARRAY_SIZE(team_nl_ops), + .mcgrps = team_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(team_nl_mcgrps), +}; + static int team_nl_send_multicast(struct sk_buff *skb, struct team *team, u32 portid) { @@ -2768,8 +2775,7 @@ static int team_nl_send_event_port_get(struct team *team, static int team_nl_init(void) { - return genl_register_family_with_ops_groups(&team_nl_family, team_nl_ops, - team_nl_mcgrps); + return genl_register_family(&team_nl_family); } static void team_nl_fini(void) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 54b6cd62676e..5d4637e586e8 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -587,14 +587,8 @@ struct hwsim_radiotap_ack_hdr { __le16 rt_chbitmask; } __packed; -/* MAC80211_HWSIM netlinf family */ -static struct genl_family hwsim_genl_family = { - .hdrsize = 0, - .name = "MAC80211_HWSIM", - .version = 1, - .maxattr = HWSIM_ATTR_MAX, - .netnsok = true, -}; +/* MAC80211_HWSIM netlink family */ +static struct genl_family hwsim_genl_family; enum hwsim_multicast_groups { HWSIM_MCGRP_CONFIG, @@ -3234,6 +3228,18 @@ static const struct genl_ops hwsim_ops[] = { }, }; +static struct genl_family hwsim_genl_family = { + .name = "MAC80211_HWSIM", + .version = 1, + .maxattr = HWSIM_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = hwsim_ops, + .n_ops = ARRAY_SIZE(hwsim_ops), + .mcgrps = hwsim_mcgrps, + .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), +}; + static void destroy_radio(struct work_struct *work) { struct mac80211_hwsim_data *data = @@ -3287,9 +3293,7 @@ static int hwsim_init_netlink(void) printk(KERN_INFO "mac80211_hwsim: initializing netlink\n"); - rc = genl_register_family_with_ops_groups(&hwsim_genl_family, - hwsim_ops, - hwsim_mcgrps); + rc = genl_register_family(&hwsim_genl_family); if (rc) goto failure; diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index cc50eb87b28a..c0ab7bb8c3ce 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -1369,6 +1369,7 @@ static struct genl_multicast_group pmcraid_mcgrps[] = { }; static struct genl_family pmcraid_event_family = { + .module = THIS_MODULE, .name = "pmcraid", .version = 1, .maxattr = PMCRAID_AEN_ATTR_MAX, diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 313a0ef3cda7..3483372f5562 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -148,6 +148,7 @@ static const struct genl_multicast_group tcmu_mcgrps[] = { /* Our generic netlink family */ static struct genl_family tcmu_genl_family = { + .module = THIS_MODULE, .hdrsize = 0, .name = "TCM-USER", .version = 1, diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 68d7503f6417..93b6caab2d9f 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -2164,6 +2164,7 @@ static const struct genl_multicast_group thermal_event_mcgrps[] = { }; static struct genl_family thermal_event_genl_family = { + .module = THIS_MODULE, .name = THERMAL_GENL_FAMILY_NAME, .version = THERMAL_GENL_VERSION, .maxattr = THERMAL_GENL_ATTR_MAX, diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 00d226956264..04042d69573c 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -16,10 +16,7 @@ static uint32_t dlm_nl_seqnum; static uint32_t listener_nlportid; -static struct genl_family family = { - .name = DLM_GENL_NAME, - .version = DLM_GENL_VERSION, -}; +static struct genl_family family; static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size) { @@ -75,9 +72,17 @@ static struct genl_ops dlm_nl_ops[] = { }, }; +static struct genl_family family = { + .name = DLM_GENL_NAME, + .version = DLM_GENL_VERSION, + .ops = dlm_nl_ops, + .n_ops = ARRAY_SIZE(dlm_nl_ops), + .module = THIS_MODULE, +}; + int __init dlm_netlink_init(void) { - return genl_register_family_with_ops(&family, dlm_nl_ops); + return genl_register_family(&family); } void dlm_netlink_exit(void) diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 3965a5cdfaa2..9457c7b0dfa2 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -13,6 +13,7 @@ static const struct genl_multicast_group quota_mcgrps[] = { /* Netlink family structure for quota */ static struct genl_family quota_genl_family = { + .module = THIS_MODULE, .hdrsize = 0, .name = "VFS_DQUOT", .version = 1, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index c934d3a96b5e..2896f93808ae 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -67,7 +67,7 @@ * genl_magic_func.h * generates an entry in the static genl_ops array, * and static register/unregister functions to - * genl_register_family_with_ops(). + * genl_register_family(). * * flags and handler: * GENL_op_init( .doit = x, .dumpit = y, .flags = something) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 7c070c1fe457..40c2e39362c8 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -259,15 +259,7 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = { * {{{2 */ #define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) -static struct genl_family ZZZ_genl_family __read_mostly = { - .name = __stringify(GENL_MAGIC_FAMILY), - .version = GENL_MAGIC_VERSION, -#ifdef GENL_MAGIC_FAMILY_HDRSZ - .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), -#endif - .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, -}; - +static struct genl_family ZZZ_genl_family; /* * Magic: define multicast groups * Magic: define multicast group registration helper @@ -301,11 +293,23 @@ static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ #undef GENL_mc_group #define GENL_mc_group(group) +static struct genl_family ZZZ_genl_family __read_mostly = { + .name = __stringify(GENL_MAGIC_FAMILY), + .version = GENL_MAGIC_VERSION, +#ifdef GENL_MAGIC_FAMILY_HDRSZ + .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), +#endif + .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, + .ops = ZZZ_genl_ops, + .n_ops = ARRAY_SIZE(ZZZ_genl_ops), + .mcgrps = ZZZ_genl_mcgrps, + .n_mcgrps = ARRAY_SIZE(ZZZ_genl_mcgrps), + .module = THIS_MODULE, +}; + int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) { - return genl_register_family_with_ops_groups(&ZZZ_genl_family, \ - ZZZ_genl_ops, \ - ZZZ_genl_mcgrps); + return genl_register_family(&ZZZ_genl_family); } void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 43a5c3975a2f..2298b50cee34 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -39,13 +39,14 @@ struct genl_info; * Note that unbind() will not be called symmetrically if the * generic netlink family is removed while there are still open * sockets. - * @attrbuf: buffer to store parsed attributes - * @family_list: family list - * @mcgrps: multicast groups used by this family (private) - * @n_mcgrps: number of multicast groups (private) + * @attrbuf: buffer to store parsed attributes (private) + * @family_list: family list (private) + * @mcgrps: multicast groups used by this family + * @n_mcgrps: number of multicast groups * @mcgrp_offset: starting number of multicast group IDs in this family - * @ops: the operations supported by this family (private) - * @n_ops: number of operations supported by this family (private) + * (private) + * @ops: the operations supported by this family + * @n_ops: number of operations supported by this family */ struct genl_family { unsigned int id; /* private */ @@ -64,10 +65,10 @@ struct genl_family { int (*mcast_bind)(struct net *net, int group); void (*mcast_unbind)(struct net *net, int group); struct nlattr ** attrbuf; /* private */ - const struct genl_ops * ops; /* private */ - const struct genl_multicast_group *mcgrps; /* private */ - unsigned int n_ops; /* private */ - unsigned int n_mcgrps; /* private */ + const struct genl_ops * ops; + const struct genl_multicast_group *mcgrps; + unsigned int n_ops; + unsigned int n_mcgrps; unsigned int mcgrp_offset; /* private */ struct list_head family_list; /* private */ struct module *module; @@ -132,55 +133,7 @@ struct genl_ops { u8 flags; }; -int __genl_register_family(struct genl_family *family); - -static inline int genl_register_family(struct genl_family *family) -{ - family->module = THIS_MODULE; - return __genl_register_family(family); -} - -/** - * genl_register_family_with_ops - register a generic netlink family with ops - * @family: generic netlink family - * @ops: operations to be registered - * @n_ops: number of elements to register - * - * Registers the specified family and operations from the specified table. - * Only one family may be registered with the same family name or identifier. - * - * Either a doit or dumpit callback must be specified for every registered - * operation or the function will fail. Only one operation structure per - * command identifier may be registered. - * - * See include/net/genetlink.h for more documenation on the operations - * structure. - * - * Return 0 on success or a negative error code. - */ -static inline int -_genl_register_family_with_ops_grps(struct genl_family *family, - const struct genl_ops *ops, size_t n_ops, - const struct genl_multicast_group *mcgrps, - size_t n_mcgrps) -{ - family->module = THIS_MODULE; - family->ops = ops; - family->n_ops = n_ops; - family->mcgrps = mcgrps; - family->n_mcgrps = n_mcgrps; - return __genl_register_family(family); -} - -#define genl_register_family_with_ops(family, ops) \ - _genl_register_family_with_ops_grps((family), \ - (ops), ARRAY_SIZE(ops), \ - NULL, 0) -#define genl_register_family_with_ops_groups(family, ops, grps) \ - _genl_register_family_with_ops_grps((family), \ - (ops), ARRAY_SIZE(ops), \ - (grps), ARRAY_SIZE(grps)) - +int genl_register_family(struct genl_family *family); int genl_unregister_family(struct genl_family *family); void genl_notify(struct genl_family *family, struct sk_buff *skb, struct genl_info *info, u32 group, gfp_t flags); diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d7a1a9461a10..4075ece592f2 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -41,11 +41,7 @@ static DEFINE_PER_CPU(__u32, taskstats_seqnum); static int family_registered; struct kmem_cache *taskstats_cache; -static struct genl_family family = { - .name = TASKSTATS_GENL_NAME, - .version = TASKSTATS_GENL_VERSION, - .maxattr = TASKSTATS_CMD_ATTR_MAX, -}; +static struct genl_family family; static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, @@ -650,6 +646,15 @@ static const struct genl_ops taskstats_ops[] = { }, }; +static struct genl_family family = { + .name = TASKSTATS_GENL_NAME, + .version = TASKSTATS_GENL_VERSION, + .maxattr = TASKSTATS_CMD_ATTR_MAX, + .module = THIS_MODULE, + .ops = taskstats_ops, + .n_ops = ARRAY_SIZE(taskstats_ops), +}; + /* Needed early in initialization */ void __init taskstats_init_early(void) { @@ -666,7 +671,7 @@ static int __init taskstats_init(void) { int rc; - rc = genl_register_family_with_ops(&family, taskstats_ops); + rc = genl_register_family(&family); if (rc) return rc; diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index a03b0ed7e8dd..e28cec34a016 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -48,13 +48,7 @@ #include "tp_meter.h" #include "translation-table.h" -struct genl_family batadv_netlink_family = { - .hdrsize = 0, - .name = BATADV_NL_NAME, - .version = 1, - .maxattr = BATADV_ATTR_MAX, - .netnsok = true, -}; +struct genl_family batadv_netlink_family; /* multicast groups */ enum batadv_netlink_multicast_groups { @@ -609,6 +603,19 @@ static struct genl_ops batadv_netlink_ops[] = { }; +struct genl_family batadv_netlink_family = { + .hdrsize = 0, + .name = BATADV_NL_NAME, + .version = 1, + .maxattr = BATADV_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = batadv_netlink_ops, + .n_ops = ARRAY_SIZE(batadv_netlink_ops), + .mcgrps = batadv_netlink_mcgrps, + .n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps), +}; + /** * batadv_netlink_register - register batadv genl netlink family */ @@ -616,9 +623,7 @@ void __init batadv_netlink_register(void) { int ret; - ret = genl_register_family_with_ops_groups(&batadv_netlink_family, - batadv_netlink_ops, - batadv_netlink_mcgrps); + ret = genl_register_family(&batadv_netlink_family); if (ret) pr_warn("unable to register netlink family"); } diff --git a/net/core/devlink.c b/net/core/devlink.c index 3008d9c33875..063da8091aef 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -341,14 +341,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, mutex_unlock(&devlink_mutex); } -static struct genl_family devlink_nl_family = { - .name = DEVLINK_GENL_NAME, - .version = DEVLINK_GENL_VERSION, - .maxattr = DEVLINK_ATTR_MAX, - .netnsok = true, - .pre_doit = devlink_nl_pre_doit, - .post_doit = devlink_nl_post_doit, -}; +static struct genl_family devlink_nl_family; enum devlink_multicast_groups { DEVLINK_MCGRP_CONFIG, @@ -1619,6 +1612,20 @@ static const struct genl_ops devlink_nl_ops[] = { }, }; +static struct genl_family devlink_nl_family = { + .name = DEVLINK_GENL_NAME, + .version = DEVLINK_GENL_VERSION, + .maxattr = DEVLINK_ATTR_MAX, + .netnsok = true, + .pre_doit = devlink_nl_pre_doit, + .post_doit = devlink_nl_post_doit, + .module = THIS_MODULE, + .ops = devlink_nl_ops, + .n_ops = ARRAY_SIZE(devlink_nl_ops), + .mcgrps = devlink_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps), +}; + /** * devlink_alloc - Allocate new devlink instance resources * @@ -1841,9 +1848,7 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister); static int __init devlink_module_init(void) { - return genl_register_family_with_ops_groups(&devlink_nl_family, - devlink_nl_ops, - devlink_nl_mcgrps); + return genl_register_family(&devlink_nl_family); } static void __exit devlink_module_exit(void) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index a5320dfcd978..80c002794ff6 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -59,11 +59,7 @@ struct dm_hw_stat_delta { unsigned long last_drop_val; }; -static struct genl_family net_drop_monitor_family = { - .hdrsize = 0, - .name = "NET_DM", - .version = 2, -}; +static struct genl_family net_drop_monitor_family; static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); @@ -350,6 +346,17 @@ static const struct genl_ops dropmon_ops[] = { }, }; +static struct genl_family net_drop_monitor_family = { + .hdrsize = 0, + .name = "NET_DM", + .version = 2, + .module = THIS_MODULE, + .ops = dropmon_ops, + .n_ops = ARRAY_SIZE(dropmon_ops), + .mcgrps = dropmon_mcgrps, + .n_mcgrps = ARRAY_SIZE(dropmon_mcgrps), +}; + static struct notifier_block dropmon_net_notifier = { .notifier_call = dropmon_net_event }; @@ -366,8 +373,7 @@ static int __init init_net_drop_monitor(void) return -ENOSPC; } - rc = genl_register_family_with_ops_groups(&net_drop_monitor_family, - dropmon_ops, dropmon_mcgrps); + rc = genl_register_family(&net_drop_monitor_family); if (rc) { pr_err("Could not create drop monitor netlink family\n"); return rc; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 2ad039492bee..aab34c7f6f89 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -131,12 +131,7 @@ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = { [HSR_A_IF2_SEQ] = { .type = NLA_U16 }, }; -static struct genl_family hsr_genl_family = { - .hdrsize = 0, - .name = "HSR", - .version = 1, - .maxattr = HSR_A_MAX, -}; +static struct genl_family hsr_genl_family; static const struct genl_multicast_group hsr_mcgrps[] = { { .name = "hsr-network", }, @@ -466,6 +461,18 @@ static const struct genl_ops hsr_ops[] = { }, }; +static struct genl_family hsr_genl_family = { + .hdrsize = 0, + .name = "HSR", + .version = 1, + .maxattr = HSR_A_MAX, + .module = THIS_MODULE, + .ops = hsr_ops, + .n_ops = ARRAY_SIZE(hsr_ops), + .mcgrps = hsr_mcgrps, + .n_mcgrps = ARRAY_SIZE(hsr_mcgrps), +}; + int __init hsr_netlink_init(void) { int rc; @@ -474,8 +481,7 @@ int __init hsr_netlink_init(void) if (rc) goto fail_rtnl_link_register; - rc = genl_register_family_with_ops_groups(&hsr_genl_family, hsr_ops, - hsr_mcgrps); + rc = genl_register_family(&hsr_genl_family); if (rc) goto fail_genl_register_family; diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 19144158b696..08e62470bac2 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -28,13 +28,6 @@ static unsigned int ieee802154_seq_num; static DEFINE_SPINLOCK(ieee802154_seq_lock); -struct genl_family nl802154_family = { - .hdrsize = 0, - .name = IEEE802154_NL_NAME, - .version = 1, - .maxattr = IEEE802154_ATTR_MAX, -}; - /* Requests to userspace */ struct sk_buff *ieee802154_nl_create(int flags, u8 req) { @@ -138,11 +131,21 @@ static const struct genl_multicast_group ieee802154_mcgrps[] = { [IEEE802154_BEACON_MCGRP] = { .name = IEEE802154_MCAST_BEACON_NAME, }, }; +struct genl_family nl802154_family = { + .hdrsize = 0, + .name = IEEE802154_NL_NAME, + .version = 1, + .maxattr = IEEE802154_ATTR_MAX, + .module = THIS_MODULE, + .ops = ieee8021154_ops, + .n_ops = ARRAY_SIZE(ieee8021154_ops), + .mcgrps = ieee802154_mcgrps, + .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps), +}; + int __init ieee802154_nl_init(void) { - return genl_register_family_with_ops_groups(&nl802154_family, - ieee8021154_ops, - ieee802154_mcgrps); + return genl_register_family(&nl802154_family); } void ieee802154_nl_exit(void) diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 182299858f1d..f7e75578aedd 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -26,22 +26,8 @@ #include "rdev-ops.h" #include "core.h" -static int nl802154_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); - -static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); - /* the netlink family */ -static struct genl_family nl802154_fam = { - .name = NL802154_GENL_NAME, /* have users key off the name instead */ - .hdrsize = 0, /* no private header */ - .version = 1, /* no particular meaning now */ - .maxattr = NL802154_ATTR_MAX, - .netnsok = true, - .pre_doit = nl802154_pre_doit, - .post_doit = nl802154_post_doit, -}; +static struct genl_family nl802154_fam; /* multicast groups */ enum nl802154_multicast_groups { @@ -2476,11 +2462,25 @@ static const struct genl_ops nl802154_ops[] = { #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ }; +static struct genl_family nl802154_fam = { + .name = NL802154_GENL_NAME, /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ + .maxattr = NL802154_ATTR_MAX, + .netnsok = true, + .pre_doit = nl802154_pre_doit, + .post_doit = nl802154_post_doit, + .module = THIS_MODULE, + .ops = nl802154_ops, + .n_ops = ARRAY_SIZE(nl802154_ops), + .mcgrps = nl802154_mcgrps, + .n_mcgrps = ARRAY_SIZE(nl802154_mcgrps), +}; + /* initialisation/exit functions */ int nl802154_init(void) { - return genl_register_family_with_ops_groups(&nl802154_fam, nl802154_ops, - nl802154_mcgrps); + return genl_register_family(&nl802154_fam); } void nl802154_exit(void) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index e3fc527c5d37..5b5226a2434f 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -622,13 +622,7 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg) return err; } -static struct genl_family fou_nl_family = { - .hdrsize = 0, - .name = FOU_GENL_NAME, - .version = FOU_GENL_VERSION, - .maxattr = FOU_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family fou_nl_family; static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { [FOU_ATTR_PORT] = { .type = NLA_U16, }, @@ -830,6 +824,17 @@ static const struct genl_ops fou_nl_ops[] = { }, }; +static struct genl_family fou_nl_family = { + .hdrsize = 0, + .name = FOU_GENL_NAME, + .version = FOU_GENL_VERSION, + .maxattr = FOU_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = fou_nl_ops, + .n_ops = ARRAY_SIZE(fou_nl_ops), +}; + size_t fou_encap_hlen(struct ip_tunnel_encap *e) { return sizeof(struct udphdr); @@ -1085,8 +1090,7 @@ static int __init fou_init(void) if (ret) goto exit; - ret = genl_register_family_with_ops(&fou_nl_family, - fou_nl_ops); + ret = genl_register_family(&fou_nl_family); if (ret < 0) goto unregister; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 3da305127b32..bba3c72c4a39 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -742,13 +742,7 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, rcu_read_unlock(); } -static struct genl_family tcp_metrics_nl_family = { - .hdrsize = 0, - .name = TCP_METRICS_GENL_NAME, - .version = TCP_METRICS_GENL_VERSION, - .maxattr = TCP_METRICS_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family tcp_metrics_nl_family; static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, }, @@ -1115,6 +1109,17 @@ static const struct genl_ops tcp_metrics_nl_ops[] = { }, }; +static struct genl_family tcp_metrics_nl_family = { + .hdrsize = 0, + .name = TCP_METRICS_GENL_NAME, + .version = TCP_METRICS_GENL_VERSION, + .maxattr = TCP_METRICS_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = tcp_metrics_nl_ops, + .n_ops = ARRAY_SIZE(tcp_metrics_nl_ops), +}; + static unsigned int tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { @@ -1178,8 +1183,7 @@ void __init tcp_metrics_init(void) if (ret < 0) panic("Could not allocate the tcp_metrics hash table\n"); - ret = genl_register_family_with_ops(&tcp_metrics_nl_family, - tcp_metrics_nl_ops); + ret = genl_register_family(&tcp_metrics_nl_family); if (ret < 0) panic("Could not register tcp_metrics generic netlink\n"); } diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 0d57e27d1cdd..97f7b0cc4675 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -118,14 +118,7 @@ static const struct rhashtable_params rht_params = { .obj_cmpfn = ila_cmpfn, }; -static struct genl_family ila_nl_family = { - .hdrsize = 0, - .name = ILA_GENL_NAME, - .version = ILA_GENL_VERSION, - .maxattr = ILA_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; +static struct genl_family ila_nl_family; static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, @@ -560,6 +553,18 @@ static const struct genl_ops ila_nl_ops[] = { }, }; +static struct genl_family ila_nl_family = { + .hdrsize = 0, + .name = ILA_GENL_NAME, + .version = ILA_GENL_VERSION, + .maxattr = ILA_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .ops = ila_nl_ops, + .n_ops = ARRAY_SIZE(ila_nl_ops), +}; + #define ILA_HASH_TABLE_SIZE 1024 static __net_init int ila_init_net(struct net *net) @@ -630,8 +635,7 @@ int ila_xlat_init(void) if (ret) goto exit; - ret = genl_register_family_with_ops(&ila_nl_family, - ila_nl_ops); + ret = genl_register_family(&ila_nl_family); if (ret < 0) goto unregister; diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index f23b81aa91fe..07877347c2f7 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -24,12 +24,7 @@ -static struct genl_family irda_nl_family = { - .name = IRDA_NL_NAME, - .hdrsize = 0, - .version = IRDA_NL_VERSION, - .maxattr = IRDA_NL_CMD_MAX, -}; +static struct genl_family irda_nl_family; static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info) { @@ -146,9 +141,19 @@ static const struct genl_ops irda_nl_ops[] = { }; +static struct genl_family irda_nl_family = { + .name = IRDA_NL_NAME, + .hdrsize = 0, + .version = IRDA_NL_VERSION, + .maxattr = IRDA_NL_CMD_MAX, + .module = THIS_MODULE, + .ops = irda_nl_ops, + .n_ops = ARRAY_SIZE(irda_nl_ops), +}; + int irda_nl_register(void) { - return genl_register_family_with_ops(&irda_nl_family, irda_nl_ops); + return genl_register_family(&irda_nl_family); } void irda_nl_unregister(void) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 4fbf1f41ac52..e4e8c0769a6b 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -31,13 +31,7 @@ #include "l2tp_core.h" -static struct genl_family l2tp_nl_family = { - .name = L2TP_GENL_NAME, - .version = L2TP_GENL_VERSION, - .hdrsize = 0, - .maxattr = L2TP_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family l2tp_nl_family; static const struct genl_multicast_group l2tp_multicast_group[] = { { @@ -976,6 +970,19 @@ static const struct genl_ops l2tp_nl_ops[] = { }, }; +static struct genl_family l2tp_nl_family = { + .name = L2TP_GENL_NAME, + .version = L2TP_GENL_VERSION, + .hdrsize = 0, + .maxattr = L2TP_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = l2tp_nl_ops, + .n_ops = ARRAY_SIZE(l2tp_nl_ops), + .mcgrps = l2tp_multicast_group, + .n_mcgrps = ARRAY_SIZE(l2tp_multicast_group), +}; + int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops) { int ret; @@ -1012,9 +1019,7 @@ EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops); static int l2tp_nl_init(void) { pr_info("L2TP netlink interface\n"); - return genl_register_family_with_ops_groups(&l2tp_nl_family, - l2tp_nl_ops, - l2tp_multicast_group); + return genl_register_family(&l2tp_nl_family); } static void l2tp_nl_cleanup(void) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ceed66cdd03e..ea3e8aed063f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2840,13 +2840,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = { */ /* IPVS genetlink family */ -static struct genl_family ip_vs_genl_family = { - .hdrsize = 0, - .name = IPVS_GENL_NAME, - .version = IPVS_GENL_VERSION, - .maxattr = IPVS_CMD_MAX, - .netnsok = true, /* Make ipvsadm to work on netns */ -}; +static struct genl_family ip_vs_genl_family; /* Policy used for first-level command attributes */ static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { @@ -3871,10 +3865,20 @@ static const struct genl_ops ip_vs_genl_ops[] = { }, }; +static struct genl_family ip_vs_genl_family = { + .hdrsize = 0, + .name = IPVS_GENL_NAME, + .version = IPVS_GENL_VERSION, + .maxattr = IPVS_CMD_MAX, + .netnsok = true, /* Make ipvsadm to work on netns */ + .module = THIS_MODULE, + .ops = ip_vs_genl_ops, + .n_ops = ARRAY_SIZE(ip_vs_genl_ops), +}; + static int __init ip_vs_genl_register(void) { - return genl_register_family_with_ops(&ip_vs_genl_family, - ip_vs_genl_ops); + return genl_register_family(&ip_vs_genl_family); } static void ip_vs_genl_unregister(void) diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index 152e503b8c5d..ca7c9c411a5c 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -60,12 +60,7 @@ struct netlbl_domhsh_walk_arg { }; /* NetLabel Generic NETLINK CALIPSO family */ -static struct genl_family netlbl_calipso_gnl_family = { - .hdrsize = 0, - .name = NETLBL_NLTYPE_CALIPSO_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_CALIPSO_A_MAX, -}; +static struct genl_family netlbl_calipso_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = { @@ -354,6 +349,16 @@ static const struct genl_ops netlbl_calipso_ops[] = { }, }; +static struct genl_family netlbl_calipso_gnl_family = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_CALIPSO_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_CALIPSO_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_calipso_ops, + .n_ops = ARRAY_SIZE(netlbl_calipso_ops), +}; + /* NetLabel Generic NETLINK Protocol Functions */ @@ -367,8 +372,7 @@ static const struct genl_ops netlbl_calipso_ops[] = { */ int __init netlbl_calipso_genl_init(void) { - return genl_register_family_with_ops(&netlbl_calipso_gnl_family, - netlbl_calipso_ops); + return genl_register_family(&netlbl_calipso_gnl_family); } static const struct netlbl_calipso_ops *calipso_ops; diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 755b284e7ad4..a665eae91245 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -59,13 +59,7 @@ struct netlbl_domhsh_walk_arg { }; /* NetLabel Generic NETLINK CIPSOv4 family */ -static struct genl_family netlbl_cipsov4_gnl_family = { - .hdrsize = 0, - .name = NETLBL_NLTYPE_CIPSOV4_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_CIPSOV4_A_MAX, -}; - +static struct genl_family netlbl_cipsov4_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = { [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 }, @@ -766,6 +760,16 @@ static const struct genl_ops netlbl_cipsov4_ops[] = { }, }; +static struct genl_family netlbl_cipsov4_gnl_family = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_CIPSOV4_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_CIPSOV4_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_cipsov4_ops, + .n_ops = ARRAY_SIZE(netlbl_cipsov4_ops), +}; + /* * NetLabel Generic NETLINK Protocol Functions */ @@ -780,6 +784,5 @@ static const struct genl_ops netlbl_cipsov4_ops[] = { */ int __init netlbl_cipsov4_genl_init(void) { - return genl_register_family_with_ops(&netlbl_cipsov4_gnl_family, - netlbl_cipsov4_ops); + return genl_register_family(&netlbl_cipsov4_gnl_family); } diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 3b00f2368fcd..ecfe8eb149db 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -60,12 +60,7 @@ struct netlbl_domhsh_walk_arg { }; /* NetLabel Generic NETLINK CIPSOv4 family */ -static struct genl_family netlbl_mgmt_gnl_family = { - .hdrsize = 0, - .name = NETLBL_NLTYPE_MGMT_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_MGMT_A_MAX, -}; +static struct genl_family netlbl_mgmt_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { @@ -833,6 +828,16 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = { }, }; +static struct genl_family netlbl_mgmt_gnl_family = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_MGMT_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_MGMT_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_mgmt_genl_ops, + .n_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops), +}; + /* * NetLabel Generic NETLINK Protocol Functions */ @@ -847,6 +852,5 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = { */ int __init netlbl_mgmt_genl_init(void) { - return genl_register_family_with_ops(&netlbl_mgmt_gnl_family, - netlbl_mgmt_genl_ops); + return genl_register_family(&netlbl_mgmt_gnl_family); } diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index c2ea8d1f653a..5dbbad41114f 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -123,12 +123,7 @@ static struct netlbl_unlhsh_iface __rcu *netlbl_unlhsh_def; static u8 netlabel_unlabel_acceptflg; /* NetLabel Generic NETLINK unlabeled family */ -static struct genl_family netlbl_unlabel_gnl_family = { - .hdrsize = 0, - .name = NETLBL_NLTYPE_UNLABELED_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_UNLABEL_A_MAX, -}; +static struct genl_family netlbl_unlabel_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { @@ -1377,6 +1372,16 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = { }, }; +static struct genl_family netlbl_unlabel_gnl_family = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_UNLABELED_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_UNLABEL_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_unlabel_genl_ops, + .n_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops), +}; + /* * NetLabel Generic NETLINK Protocol Functions */ @@ -1391,8 +1396,7 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = { */ int __init netlbl_unlabel_genl_init(void) { - return genl_register_family_with_ops(&netlbl_unlabel_gnl_family, - netlbl_unlabel_genl_ops); + return genl_register_family(&netlbl_unlabel_gnl_family); } /* diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f19ec969edee..ca582ee4ae05 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -344,18 +344,18 @@ static int genl_validate_ops(const struct genl_family *family) } /** - * __genl_register_family - register a generic netlink family + * genl_register_family - register a generic netlink family * @family: generic netlink family * * Registers the specified family after validating it first. Only one * family may be registered with the same family name or identifier. * - * The family's ops array must already be assigned, you can use the - * genl_register_family_with_ops() helper function. + * The family's ops, multicast groups and module pointer must already + * be assigned. * * Return 0 on success or a negative error code. */ -int __genl_register_family(struct genl_family *family) +int genl_register_family(struct genl_family *family) { int err, i; @@ -429,7 +429,7 @@ errout_locked: genl_unlock_all(); return err; } -EXPORT_SYMBOL(__genl_register_family); +EXPORT_SYMBOL(genl_register_family); /** * genl_unregister_family - unregister generic netlink family @@ -452,7 +452,6 @@ int genl_unregister_family(struct genl_family *family) genl_unregister_mc_groups(family); list_del(&rc->family_list); - family->n_ops = 0; up_write(&cb_lock); wait_event(genl_sk_destructing_waitq, atomic_read(&genl_sk_destructing_cnt) == 0); @@ -681,13 +680,7 @@ static void genl_rcv(struct sk_buff *skb) * Controller **************************************************************************/ -static struct genl_family genl_ctrl = { - .id = GENL_ID_CTRL, - .name = "nlctrl", - .version = 0x2, - .maxattr = CTRL_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family genl_ctrl; static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) @@ -997,6 +990,19 @@ static const struct genl_multicast_group genl_ctrl_groups[] = { { .name = "notify", }, }; +static struct genl_family genl_ctrl = { + .module = THIS_MODULE, + .ops = genl_ctrl_ops, + .n_ops = ARRAY_SIZE(genl_ctrl_ops), + .mcgrps = genl_ctrl_groups, + .n_mcgrps = ARRAY_SIZE(genl_ctrl_groups), + .id = GENL_ID_CTRL, + .name = "nlctrl", + .version = 0x2, + .maxattr = CTRL_ATTR_MAX, + .netnsok = true, +}; + static int genl_bind(struct net *net, int group) { int i, err = -ENOENT; @@ -1086,8 +1092,7 @@ static int __init genl_init(void) for (i = 0; i < GENL_FAM_TAB_SIZE; i++) INIT_LIST_HEAD(&family_ht[i]); - err = genl_register_family_with_ops_groups(&genl_ctrl, genl_ctrl_ops, - genl_ctrl_groups); + err = genl_register_family(&genl_ctrl); if (err < 0) goto problem; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index c230403e066c..450b1e5144cc 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -38,13 +38,7 @@ static const struct genl_multicast_group nfc_genl_mcgrps[] = { { .name = NFC_GENL_MCAST_EVENT_NAME, }, }; -static struct genl_family nfc_genl_family = { - .hdrsize = 0, - .name = NFC_GENL_NAME, - .version = NFC_GENL_VERSION, - .maxattr = NFC_ATTR_MAX, -}; - +static struct genl_family nfc_genl_family; static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_DEVICE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING, @@ -1752,6 +1746,18 @@ static const struct genl_ops nfc_genl_ops[] = { }, }; +static struct genl_family nfc_genl_family = { + .hdrsize = 0, + .name = NFC_GENL_NAME, + .version = NFC_GENL_VERSION, + .maxattr = NFC_ATTR_MAX, + .module = THIS_MODULE, + .ops = nfc_genl_ops, + .n_ops = ARRAY_SIZE(nfc_genl_ops), + .mcgrps = nfc_genl_mcgrps, + .n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps), +}; + struct urelease_work { struct work_struct w; @@ -1837,9 +1843,7 @@ int __init nfc_genl_init(void) { int rc; - rc = genl_register_family_with_ops_groups(&nfc_genl_family, - nfc_genl_ops, - nfc_genl_mcgrps); + rc = genl_register_family(&nfc_genl_family); if (rc) return rc; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f9fef7dfba15..ad6a111a0014 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -679,6 +679,7 @@ static struct genl_family dp_packet_genl_family = { .parallel_ops = true, .ops = dp_packet_genl_ops, .n_ops = ARRAY_SIZE(dp_packet_genl_ops), + .module = THIS_MODULE, }; static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, @@ -1445,6 +1446,7 @@ static struct genl_family dp_flow_genl_family = { .n_ops = ARRAY_SIZE(dp_flow_genl_ops), .mcgrps = &ovs_dp_flow_multicast_group, .n_mcgrps = 1, + .module = THIS_MODULE, }; static size_t ovs_dp_cmd_msg_size(void) @@ -1830,6 +1832,7 @@ static struct genl_family dp_datapath_genl_family = { .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), .mcgrps = &ovs_dp_datapath_multicast_group, .n_mcgrps = 1, + .module = THIS_MODULE, }; /* Called with ovs_mutex or RCU read lock. */ @@ -2251,6 +2254,7 @@ struct genl_family dp_vport_genl_family = { .n_ops = ARRAY_SIZE(dp_vport_genl_ops), .mcgrps = &ovs_dp_vport_multicast_group, .n_mcgrps = 1, + .module = THIS_MODULE, }; static struct genl_family * const dp_genl_families[] = { diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 383b8fedabc7..74a405bf107b 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -135,14 +135,6 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { /* Users of the legacy API (tipc-config) can't handle that we add operations, * so we have a separate genl handling for the new API. */ -struct genl_family tipc_genl_family = { - .name = TIPC_GENL_V2_NAME, - .version = TIPC_GENL_V2_VERSION, - .hdrsize = 0, - .maxattr = TIPC_NLA_MAX, - .netnsok = true, -}; - static const struct genl_ops tipc_genl_v2_ops[] = { { .cmd = TIPC_NL_BEARER_DISABLE, @@ -257,6 +249,17 @@ static const struct genl_ops tipc_genl_v2_ops[] = { #endif }; +struct genl_family tipc_genl_family = { + .name = TIPC_GENL_V2_NAME, + .version = TIPC_GENL_V2_VERSION, + .hdrsize = 0, + .maxattr = TIPC_NLA_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = tipc_genl_v2_ops, + .n_ops = ARRAY_SIZE(tipc_genl_v2_ops), +}; + int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) { u32 maxattr = tipc_genl_family.maxattr; @@ -272,8 +275,7 @@ int tipc_netlink_start(void) { int res; - res = genl_register_family_with_ops(&tipc_genl_family, - tipc_genl_v2_ops); + res = genl_register_family(&tipc_genl_family); if (res) { pr_err("Failed to register netlink interface\n"); return res; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index f04428e4c8e5..07b19931e458 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1215,27 +1215,29 @@ send: return err; } +static struct genl_ops tipc_genl_compat_ops[] = { + { + .cmd = TIPC_GENL_CMD, + .doit = tipc_nl_compat_recv, + }, +}; + static struct genl_family tipc_genl_compat_family = { .name = TIPC_GENL_NAME, .version = TIPC_GENL_VERSION, .hdrsize = TIPC_GENL_HDRLEN, .maxattr = 0, .netnsok = true, -}; - -static struct genl_ops tipc_genl_compat_ops[] = { - { - .cmd = TIPC_GENL_CMD, - .doit = tipc_nl_compat_recv, - }, + .module = THIS_MODULE, + .ops = tipc_genl_compat_ops, + .n_ops = ARRAY_SIZE(tipc_genl_compat_ops), }; int tipc_netlink_compat_start(void) { int res; - res = genl_register_family_with_ops(&tipc_genl_compat_family, - tipc_genl_compat_ops); + res = genl_register_family(&tipc_genl_compat_family); if (res) { pr_err("Failed to register legacy compat interface\n"); return res; diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 8ac83a41585f..587e1627681f 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -572,15 +572,20 @@ struct d_level D_LEVEL[] = { size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); +static const struct genl_multicast_group wimax_gnl_mcgrps[] = { + { .name = "msg", }, +}; + struct genl_family wimax_gnl_family = { .name = "WiMAX", .version = WIMAX_GNL_VERSION, .hdrsize = 0, .maxattr = WIMAX_GNL_ATTR_MAX, -}; - -static const struct genl_multicast_group wimax_gnl_mcgrps[] = { - { .name = "msg", }, + .module = THIS_MODULE, + .ops = wimax_gnl_ops, + .n_ops = ARRAY_SIZE(wimax_gnl_ops), + .mcgrps = wimax_gnl_mcgrps, + .n_mcgrps = ARRAY_SIZE(wimax_gnl_mcgrps), }; @@ -595,11 +600,7 @@ int __init wimax_subsys_init(void) d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params, "wimax.debug"); - snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name), - "WiMAX"); - result = genl_register_family_with_ops_groups(&wimax_gnl_family, - wimax_gnl_ops, - wimax_gnl_mcgrps); + result = genl_register_family(&wimax_gnl_family); if (unlikely(result < 0)) { pr_err("cannot register generic netlink family: %d\n", result); goto error_register_family; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 714beafe05e0..8e5ca3c47593 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -32,21 +32,8 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, struct cfg80211_crypto_settings *settings, int cipher_limit); -static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); -static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); - /* the netlink family */ -static struct genl_family nl80211_fam = { - .name = NL80211_GENL_NAME, /* have users key off the name instead */ - .hdrsize = 0, /* no private header */ - .version = 1, /* no particular meaning now */ - .maxattr = NL80211_ATTR_MAX, - .netnsok = true, - .pre_doit = nl80211_pre_doit, - .post_doit = nl80211_post_doit, -}; +static struct genl_family nl80211_fam; /* multicast groups */ enum nl80211_multicast_groups { @@ -12599,6 +12586,21 @@ static const struct genl_ops nl80211_ops[] = { }, }; +static struct genl_family nl80211_fam = { + .name = NL80211_GENL_NAME, /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ + .maxattr = NL80211_ATTR_MAX, + .netnsok = true, + .pre_doit = nl80211_pre_doit, + .post_doit = nl80211_post_doit, + .module = THIS_MODULE, + .ops = nl80211_ops, + .n_ops = ARRAY_SIZE(nl80211_ops), + .mcgrps = nl80211_mcgrps, + .n_mcgrps = ARRAY_SIZE(nl80211_mcgrps), +}; + /* notification functions */ void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, @@ -14565,8 +14567,7 @@ int nl80211_init(void) { int err; - err = genl_register_family_with_ops_groups(&nl80211_fam, nl80211_ops, - nl80211_mcgrps); + err = genl_register_family(&nl80211_fam); if (err) return err; -- cgit v1.2.3 From 2ae0f17df1cd52aafd1ab0415ea1f1dd56dc0e2a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Oct 2016 14:40:04 +0200 Subject: genetlink: use idr to track families Since generic netlink family IDs are small integers, allocated densely, IDR is an ideal match for lookups. Replace the existing hand-written hash-table with IDR for allocation and lookup. This lets the families only be written to once, during register, since the list_head can be removed and removal of a family won't cause any writes. It also slightly reduces the code size (by about 1.3k on x86-64). Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/genetlink.h | 31 +++-- include/uapi/linux/genetlink.h | 2 + net/netlink/genetlink.c | 271 ++++++++++++++++------------------------- 3 files changed, 123 insertions(+), 181 deletions(-) (limited to 'include/net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 2298b50cee34..3ec87bacc0f5 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -40,7 +40,6 @@ struct genl_info; * generic netlink family is removed while there are still open * sockets. * @attrbuf: buffer to store parsed attributes (private) - * @family_list: family list (private) * @mcgrps: multicast groups used by this family * @n_mcgrps: number of multicast groups * @mcgrp_offset: starting number of multicast group IDs in this family @@ -70,11 +69,10 @@ struct genl_family { unsigned int n_ops; unsigned int n_mcgrps; unsigned int mcgrp_offset; /* private */ - struct list_head family_list; /* private */ struct module *module; }; -struct nlattr **genl_family_attrbuf(struct genl_family *family); +struct nlattr **genl_family_attrbuf(const struct genl_family *family); /** * struct genl_info - receiving information @@ -134,12 +132,12 @@ struct genl_ops { }; int genl_register_family(struct genl_family *family); -int genl_unregister_family(struct genl_family *family); -void genl_notify(struct genl_family *family, struct sk_buff *skb, +int genl_unregister_family(const struct genl_family *family); +void genl_notify(const struct genl_family *family, struct sk_buff *skb, struct genl_info *info, u32 group, gfp_t flags); void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, - struct genl_family *family, int flags, u8 cmd); + const struct genl_family *family, int flags, u8 cmd); /** * genlmsg_nlhdr - Obtain netlink header from user specified header @@ -148,8 +146,8 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, * * Returns pointer to netlink header. */ -static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr, - struct genl_family *family) +static inline struct nlmsghdr * +genlmsg_nlhdr(void *user_hdr, const struct genl_family *family) { return (struct nlmsghdr *)((char *)user_hdr - family->hdrsize - @@ -185,7 +183,7 @@ static inline int genlmsg_parse(const struct nlmsghdr *nlh, */ static inline void genl_dump_check_consistent(struct netlink_callback *cb, void *user_hdr, - struct genl_family *family) + const struct genl_family *family) { nl_dump_check_consistent(cb, genlmsg_nlhdr(user_hdr, family)); } @@ -202,7 +200,7 @@ static inline void genl_dump_check_consistent(struct netlink_callback *cb, */ static inline void *genlmsg_put_reply(struct sk_buff *skb, struct genl_info *info, - struct genl_family *family, + const struct genl_family *family, int flags, u8 cmd) { return genlmsg_put(skb, info->snd_portid, info->snd_seq, family, @@ -239,7 +237,7 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr) * @group: offset of multicast group in groups array * @flags: allocation flags */ -static inline int genlmsg_multicast_netns(struct genl_family *family, +static inline int genlmsg_multicast_netns(const struct genl_family *family, struct net *net, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { @@ -257,7 +255,7 @@ static inline int genlmsg_multicast_netns(struct genl_family *family, * @group: offset of multicast group in groups array * @flags: allocation flags */ -static inline int genlmsg_multicast(struct genl_family *family, +static inline int genlmsg_multicast(const struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { @@ -275,7 +273,7 @@ static inline int genlmsg_multicast(struct genl_family *family, * * This function must hold the RTNL or rcu_read_lock(). */ -int genlmsg_multicast_allns(struct genl_family *family, +int genlmsg_multicast_allns(const struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags); @@ -359,8 +357,9 @@ static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags) * This function returns the number of broadcast listeners that have set the * NETLINK_RECV_NO_ENOBUFS socket option. */ -static inline int genl_set_err(struct genl_family *family, struct net *net, - u32 portid, u32 group, int code) +static inline int genl_set_err(const struct genl_family *family, + struct net *net, u32 portid, + u32 group, int code) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; @@ -368,7 +367,7 @@ static inline int genl_set_err(struct genl_family *family, struct net *net, return netlink_set_err(net->genl_sock, portid, group, code); } -static inline int genl_has_listeners(struct genl_family *family, +static inline int genl_has_listeners(const struct genl_family *family, struct net *net, unsigned int group) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index d9b2db4a29c6..adc899381e0d 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -29,6 +29,8 @@ struct genlmsghdr { #define GENL_ID_CTRL NLMSG_MIN_TYPE #define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) #define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) +/* must be last reserved + 1 */ +#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3) /************************************************************************** * Controller diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index ca582ee4ae05..85659921e7b2 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -58,10 +59,8 @@ static void genl_unlock_all(void) up_write(&cb_lock); } -#define GENL_FAM_TAB_SIZE 16 -#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) +static DEFINE_IDR(genl_fam_idr); -static struct list_head family_ht[GENL_FAM_TAB_SIZE]; /* * Bitmap of multicast groups that are currently in use. * @@ -86,45 +85,29 @@ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) | static unsigned long *mc_groups = &mc_group_start; static unsigned long mc_groups_longs = 1; -static int genl_ctrl_event(int event, struct genl_family *family, +static int genl_ctrl_event(int event, const struct genl_family *family, const struct genl_multicast_group *grp, int grp_id); -static inline unsigned int genl_family_hash(unsigned int id) +static const struct genl_family *genl_family_find_byid(unsigned int id) { - return id & GENL_FAM_TAB_MASK; + return idr_find(&genl_fam_idr, id); } -static inline struct list_head *genl_family_chain(unsigned int id) +static const struct genl_family *genl_family_find_byname(char *name) { - return &family_ht[genl_family_hash(id)]; -} - -static struct genl_family *genl_family_find_byid(unsigned int id) -{ - struct genl_family *f; + const struct genl_family *family; + unsigned int id; - list_for_each_entry(f, genl_family_chain(id), family_list) - if (f->id == id) - return f; + idr_for_each_entry(&genl_fam_idr, family, id) + if (strcmp(family->name, name) == 0) + return family; return NULL; } -static struct genl_family *genl_family_find_byname(char *name) -{ - struct genl_family *f; - int i; - - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) - list_for_each_entry(f, genl_family_chain(i), family_list) - if (strcmp(f->name, name) == 0) - return f; - - return NULL; -} - -static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) +static const struct genl_ops *genl_get_cmd(u8 cmd, + const struct genl_family *family) { int i; @@ -135,26 +118,6 @@ static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) return NULL; } -/* Of course we are going to have problems once we hit - * 2^16 alive types, but that can only happen by year 2K -*/ -static u16 genl_generate_id(void) -{ - static u16 id_gen_idx = GENL_MIN_ID; - int i; - - for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) { - if (id_gen_idx != GENL_ID_VFS_DQUOT && - id_gen_idx != GENL_ID_PMCRAID && - !genl_family_find_byid(id_gen_idx)) - return id_gen_idx; - if (++id_gen_idx > GENL_MAX_ID) - id_gen_idx = GENL_MIN_ID; - } - - return 0; -} - static int genl_allocate_reserve_groups(int n_groups, int *first_id) { unsigned long *new_groups; @@ -295,7 +258,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family) return err; } -static void genl_unregister_mc_groups(struct genl_family *family) +static void genl_unregister_mc_groups(const struct genl_family *family) { struct net *net; int i; @@ -358,6 +321,7 @@ static int genl_validate_ops(const struct genl_family *family) int genl_register_family(struct genl_family *family) { int err, i; + int start = GENL_START_ALLOC, end = GENL_MAX_ID; err = genl_validate_ops(family); if (err) @@ -370,34 +334,20 @@ int genl_register_family(struct genl_family *family) goto errout_locked; } + /* + * Sadly, a few cases need to be special-cased + * due to them having previously abused the API + * and having used their family ID also as their + * multicast group ID, so we use reserved IDs + * for both to be sure we can do that mapping. + */ if (family == &genl_ctrl) { - family->id = GENL_ID_CTRL; - } else { - u16 newid; - - /* this should be left zero in the struct */ - WARN_ON(family->id); - - /* - * Sadly, a few cases need to be special-cased - * due to them having previously abused the API - * and having used their family ID also as their - * multicast group ID, so we use reserved IDs - * for both to be sure we can do that mapping. - */ - if (strcmp(family->name, "pmcraid") == 0) - newid = GENL_ID_PMCRAID; - else if (strcmp(family->name, "VFS_DQUOT") == 0) - newid = GENL_ID_VFS_DQUOT; - else - newid = genl_generate_id(); - - if (!newid) { - err = -ENOMEM; - goto errout_locked; - } - - family->id = newid; + /* and this needs to be special for initial family lookups */ + start = end = GENL_ID_CTRL; + } else if (strcmp(family->name, "pmcraid") == 0) { + start = end = GENL_ID_PMCRAID; + } else if (strcmp(family->name, "VFS_DQUOT") == 0) { + start = end = GENL_ID_VFS_DQUOT; } if (family->maxattr && !family->parallel_ops) { @@ -410,11 +360,15 @@ int genl_register_family(struct genl_family *family) } else family->attrbuf = NULL; + family->id = idr_alloc(&genl_fam_idr, family, + start, end + 1, GFP_KERNEL); + if (!family->id) + goto errout_locked; + err = genl_validate_assign_mc_groups(family); if (err) - goto errout_locked; + goto errout_remove; - list_add_tail(&family->family_list, genl_family_chain(family->id)); genl_unlock_all(); /* send all events */ @@ -425,6 +379,8 @@ int genl_register_family(struct genl_family *family) return 0; +errout_remove: + idr_remove(&genl_fam_idr, family->id); errout_locked: genl_unlock_all(); return err; @@ -439,32 +395,29 @@ EXPORT_SYMBOL(genl_register_family); * * Returns 0 on success or a negative error code. */ -int genl_unregister_family(struct genl_family *family) +int genl_unregister_family(const struct genl_family *family) { - struct genl_family *rc; - genl_lock_all(); - list_for_each_entry(rc, genl_family_chain(family->id), family_list) { - if (family->id != rc->id || strcmp(rc->name, family->name)) - continue; + if (genl_family_find_byid(family->id)) { + genl_unlock_all(); + return -ENOENT; + } - genl_unregister_mc_groups(family); + genl_unregister_mc_groups(family); - list_del(&rc->family_list); - up_write(&cb_lock); - wait_event(genl_sk_destructing_waitq, - atomic_read(&genl_sk_destructing_cnt) == 0); - genl_unlock(); + idr_remove(&genl_fam_idr, family->id); - kfree(family->attrbuf); - genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); - return 0; - } + up_write(&cb_lock); + wait_event(genl_sk_destructing_waitq, + atomic_read(&genl_sk_destructing_cnt) == 0); + genl_unlock(); - genl_unlock_all(); + kfree(family->attrbuf); - return -ENOENT; + genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); + + return 0; } EXPORT_SYMBOL(genl_unregister_family); @@ -480,7 +433,7 @@ EXPORT_SYMBOL(genl_unregister_family); * Returns pointer to user specific header */ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, - struct genl_family *family, int flags, u8 cmd) + const struct genl_family *family, int flags, u8 cmd) { struct nlmsghdr *nlh; struct genlmsghdr *hdr; @@ -539,7 +492,7 @@ static int genl_lock_done(struct netlink_callback *cb) return rc; } -static int genl_family_rcv_msg(struct genl_family *family, +static int genl_family_rcv_msg(const struct genl_family *family, struct sk_buff *skb, struct nlmsghdr *nlh) { @@ -651,7 +604,7 @@ out: static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct genl_family *family; + const struct genl_family *family; int err; family = genl_family_find_byid(nlh->nlmsg_type); @@ -682,7 +635,7 @@ static void genl_rcv(struct sk_buff *skb) static struct genl_family genl_ctrl; -static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, +static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { void *hdr; @@ -769,7 +722,7 @@ nla_put_failure: return -EMSGSIZE; } -static int ctrl_fill_mcgrp_info(struct genl_family *family, +static int ctrl_fill_mcgrp_info(const struct genl_family *family, const struct genl_multicast_group *grp, int grp_id, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) @@ -812,37 +765,30 @@ nla_put_failure: static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) { - - int i, n = 0; + int n = 0; struct genl_family *rt; struct net *net = sock_net(skb->sk); - int chains_to_skip = cb->args[0]; - int fams_to_skip = cb->args[1]; - - for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) { - n = 0; - list_for_each_entry(rt, genl_family_chain(i), family_list) { - if (!rt->netnsok && !net_eq(net, &init_net)) - continue; - if (++n < fams_to_skip) - continue; - if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - skb, CTRL_CMD_NEWFAMILY) < 0) - goto errout; - } + int fams_to_skip = cb->args[0]; + unsigned int id; - fams_to_skip = 0; - } + idr_for_each_entry(&genl_fam_idr, rt, id) { + if (!rt->netnsok && !net_eq(net, &init_net)) + continue; + + if (n++ < fams_to_skip) + continue; -errout: - cb->args[0] = i; - cb->args[1] = n; + if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, CTRL_CMD_NEWFAMILY) < 0) + break; + } + cb->args[0] = n; return skb->len; } -static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, +static struct sk_buff *ctrl_build_family_msg(const struct genl_family *family, u32 portid, int seq, u8 cmd) { struct sk_buff *skb; @@ -862,7 +808,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, } static struct sk_buff * -ctrl_build_mcgrp_msg(struct genl_family *family, +ctrl_build_mcgrp_msg(const struct genl_family *family, const struct genl_multicast_group *grp, int grp_id, u32 portid, int seq, u8 cmd) { @@ -892,7 +838,7 @@ static const struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] = { static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; - struct genl_family *res = NULL; + const struct genl_family *res = NULL; int err = -EINVAL; if (info->attrs[CTRL_ATTR_FAMILY_ID]) { @@ -936,7 +882,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) return genlmsg_reply(msg, info); } -static int genl_ctrl_event(int event, struct genl_family *family, +static int genl_ctrl_event(int event, const struct genl_family *family, const struct genl_multicast_group *grp, int grp_id) { @@ -1005,25 +951,24 @@ static struct genl_family genl_ctrl = { static int genl_bind(struct net *net, int group) { - int i, err = -ENOENT; + struct genl_family *f; + int err = -ENOENT; + unsigned int id; down_read(&cb_lock); - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { - struct genl_family *f; - - list_for_each_entry(f, genl_family_chain(i), family_list) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; - - if (!f->netnsok && net != &init_net) - err = -ENOENT; - else if (f->mcast_bind) - err = f->mcast_bind(net, fam_grp); - else - err = 0; - break; - } + + idr_for_each_entry(&genl_fam_idr, f, id) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; + + if (!f->netnsok && net != &init_net) + err = -ENOENT; + else if (f->mcast_bind) + err = f->mcast_bind(net, fam_grp); + else + err = 0; + break; } } up_read(&cb_lock); @@ -1033,21 +978,19 @@ static int genl_bind(struct net *net, int group) static void genl_unbind(struct net *net, int group) { - int i; + struct genl_family *f; + unsigned int id; down_read(&cb_lock); - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { - struct genl_family *f; - list_for_each_entry(f, genl_family_chain(i), family_list) { - if (group >= f->mcgrp_offset && - group < f->mcgrp_offset + f->n_mcgrps) { - int fam_grp = group - f->mcgrp_offset; + idr_for_each_entry(&genl_fam_idr, f, id) { + if (group >= f->mcgrp_offset && + group < f->mcgrp_offset + f->n_mcgrps) { + int fam_grp = group - f->mcgrp_offset; - if (f->mcast_unbind) - f->mcast_unbind(net, fam_grp); - break; - } + if (f->mcast_unbind) + f->mcast_unbind(net, fam_grp); + break; } } up_read(&cb_lock); @@ -1087,10 +1030,7 @@ static struct pernet_operations genl_pernet_ops = { static int __init genl_init(void) { - int i, err; - - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) - INIT_LIST_HEAD(&family_ht[i]); + int err; err = genl_register_family(&genl_ctrl); if (err < 0) @@ -1118,7 +1058,7 @@ subsys_initcall(genl_init); * You cannot use this function with a family that has parallel_ops * and you can only use it within (pre/post) doit/dumpit callbacks. */ -struct nlattr **genl_family_attrbuf(struct genl_family *family) +struct nlattr **genl_family_attrbuf(const struct genl_family *family) { if (!WARN_ON(family->parallel_ops)) lockdep_assert_held(&genl_mutex); @@ -1156,8 +1096,9 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, return err; } -int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb, - u32 portid, unsigned int group, gfp_t flags) +int genlmsg_multicast_allns(const struct genl_family *family, + struct sk_buff *skb, u32 portid, + unsigned int group, gfp_t flags) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; @@ -1166,7 +1107,7 @@ int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb, } EXPORT_SYMBOL(genlmsg_multicast_allns); -void genl_notify(struct genl_family *family, struct sk_buff *skb, +void genl_notify(const struct genl_family *family, struct sk_buff *skb, struct genl_info *info, u32 group, gfp_t flags) { struct net *net = genl_info_net(info); -- cgit v1.2.3 From 830218c1add1da16519b71909e5cf21522b7d062 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 24 Oct 2016 10:52:35 -0700 Subject: net: ipv6: Fix processing of RAs in presence of VRF rt6_add_route_info and rt6_add_dflt_router were updated to pull the FIB table from the device index, but the corresponding rt6_get_route_info and rt6_get_dflt_router functions were not leading to the failure to process RA's: ICMPv6: RA: ndisc_router_discovery failed to add default route Fix the 'get' functions by using the table id associated with the device when applicable. Also, now that default routes can be added to tables other than the default table, rt6_purge_dflt_routers needs to be updated as well to look at all tables. To handle that efficiently, add a flag to the table denoting if it is has a default route via RA. Fixes: ca254490c8dfd ("net: Add VRF support to IPv6 stack") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ net/ipv6/route.c | 68 ++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 50 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index fb961a576abe..a74e2aa40ef4 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -230,6 +230,8 @@ struct fib6_table { rwlock_t tb6_lock; struct fib6_node tb6_root; struct inet_peer_base tb6_peers; + unsigned int flags; +#define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; #define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bdbc38e8bf29..3ac19eb81a86 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -102,11 +102,13 @@ static int rt6_score_route(struct rt6_info *rt, int oif, int strict); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, + struct net_device *dev, unsigned int pref); static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex); + const struct in6_addr *gwaddr, + struct net_device *dev); #endif struct uncached_list { @@ -803,7 +805,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt = rt6_get_dflt_router(gwaddr, dev); else rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, - gwaddr, dev->ifindex); + gwaddr, dev); if (rt && !lifetime) { ip6_del_rt(rt); @@ -811,8 +813,8 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (!rt && lifetime) - rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, - pref); + rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, + dev, pref); else if (rt) rt->rt6i_flags = RTF_ROUTEINFO | (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); @@ -2325,13 +2327,16 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex) + const struct in6_addr *gwaddr, + struct net_device *dev) { + u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; + int ifindex = dev->ifindex; struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - table = fib6_get_table(net, RT6_TABLE_INFO); + table = fib6_get_table(net, tb_id); if (!table) return NULL; @@ -2357,12 +2362,13 @@ out: static struct rt6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, + struct net_device *dev, unsigned int pref) { struct fib6_config cfg = { .fc_metric = IP6_RT_PRIO_USER, - .fc_ifindex = ifindex, + .fc_ifindex = dev->ifindex, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), @@ -2371,7 +2377,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_nlinfo.nl_net = net, }; - cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO; + cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO, cfg.fc_dst = *prefix; cfg.fc_gateway = *gwaddr; @@ -2381,16 +2387,17 @@ static struct rt6_info *rt6_add_route_info(struct net *net, ip6_route_add(&cfg); - return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); + return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); } #endif struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) { + u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); + table = fib6_get_table(dev_net(dev), tb_id); if (!table) return NULL; @@ -2424,20 +2431,20 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, cfg.fc_gateway = *gwaddr; - ip6_route_add(&cfg); + if (!ip6_route_add(&cfg)) { + struct fib6_table *table; + + table = fib6_get_table(dev_net(dev), cfg.fc_table); + if (table) + table->flags |= RT6_TABLE_HAS_DFLT_ROUTER; + } return rt6_get_dflt_router(gwaddr, dev); } -void rt6_purge_dflt_routers(struct net *net) +static void __rt6_purge_dflt_routers(struct fib6_table *table) { struct rt6_info *rt; - struct fib6_table *table; - - /* NOTE: Keep consistent with rt6_get_dflt_router */ - table = fib6_get_table(net, RT6_TABLE_DFLT); - if (!table) - return; restart: read_lock_bh(&table->tb6_lock); @@ -2451,6 +2458,27 @@ restart: } } read_unlock_bh(&table->tb6_lock); + + table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; +} + +void rt6_purge_dflt_routers(struct net *net) +{ + struct fib6_table *table; + struct hlist_head *head; + unsigned int h; + + rcu_read_lock(); + + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { + head = &net->ipv6.fib_table_hash[h]; + hlist_for_each_entry_rcu(table, head, tb6_hlist) { + if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) + __rt6_purge_dflt_routers(table); + } + } + + rcu_read_unlock(); } static void rtmsg_to_fib6_config(struct net *net, -- cgit v1.2.3 From d5d32e4b76687f4df9ad3ba8d3702b7347f51fa6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 24 Oct 2016 12:27:23 -0700 Subject: net: ipv6: Do not consider link state for nexthop validation Similar to IPv4, do not consider link state when validating next hops. Currently, if the link is down default routes can fail to insert: $ ip -6 ro add vrf blue default via 2100:2::64 dev eth2 RTNETLINK answers: No route to host With this patch the command succeeds. Fixes: 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_route.h | 1 + net/ipv6/route.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index e0cd318d5103..f83e78d071a3 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -32,6 +32,7 @@ struct route_info { #define RT6_LOOKUP_F_SRCPREF_TMP 0x00000008 #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 +#define RT6_LOOKUP_F_IGNORE_LINKSTATE 0x00000040 /* We do not (yet ?) support IPv6 jumbograms (RFC 2675) * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3ac19eb81a86..947ed1ded026 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -658,7 +658,8 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, struct net_device *dev = rt->dst.dev; if (dev && !netif_carrier_ok(dev) && - idev->cnf.ignore_routes_with_linkdown) + idev->cnf.ignore_routes_with_linkdown && + !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) goto out; if (rt6_check_expired(rt)) @@ -1052,6 +1053,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int strict = 0; strict |= flags & RT6_LOOKUP_F_IFACE; + strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; if (net->ipv6.devconf_all->forwarding == 0) strict |= RT6_LOOKUP_F_REACHABLE; @@ -1791,7 +1793,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, }; struct fib6_table *table; struct rt6_info *rt; - int flags = RT6_LOOKUP_F_IFACE; + int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE; table = fib6_get_table(net, cfg->fc_table); if (!table) -- cgit v1.2.3 From b15ca182ed136087f6a2cb9ffe880c923f36a56e Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 26 Oct 2016 10:53:16 +0200 Subject: netlink: Add nla_memdup() to wrap kmemdup() use on nlattr Wrap several common instances of: kmemdup(nla_data(attr), nla_len(attr), GFP_KERNEL); Signed-off-by: Thomas Graf Acked-by: Johannes Berg Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/netlink.h | 10 ++++++++++ net/sched/act_bpf.c | 4 +--- net/sched/cls_bpf.c | 4 +--- net/wireless/nl80211.c | 3 +-- 4 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index 254a0fc01800..a34f53acb6d6 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1190,6 +1190,16 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla) return tmp; } +/** + * nla_memdup - duplicate attribute memory (kmemdup) + * @src: netlink attribute to duplicate from + * @gfp: GFP mask + */ +static inline void *nla_memdup(const struct nlattr *src, gfp_t gfp) +{ + return kmemdup(nla_data(src), nla_len(src), gfp); +} + /** * nla_nest_start - Start a new level of nested attributes * @skb: socket buffer to add attributes to diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1d3960033f61..9ff06cfbcdec 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -226,9 +226,7 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) return PTR_ERR(fp); if (tb[TCA_ACT_BPF_NAME]) { - name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]), - nla_len(tb[TCA_ACT_BPF_NAME]), - GFP_KERNEL); + name = nla_memdup(tb[TCA_ACT_BPF_NAME], GFP_KERNEL); if (!name) { bpf_prog_put(fp); return -ENOMEM; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index bb1d5a487081..52dc85acca7d 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -369,9 +369,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, return PTR_ERR(fp); if (tb[TCA_BPF_NAME]) { - name = kmemdup(nla_data(tb[TCA_BPF_NAME]), - nla_len(tb[TCA_BPF_NAME]), - GFP_KERNEL); + name = nla_memdup(tb[TCA_BPF_NAME], GFP_KERNEL); if (!name) { bpf_prog_put(fp); return -ENOMEM; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 271707dacfea..0d3ab4bfeacf 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10625,8 +10625,7 @@ static int handle_nan_filter(struct nlattr *attr_filter, i = 0; nla_for_each_nested(attr, attr_filter, rem) { - filter[i].filter = kmemdup(nla_data(attr), nla_len(attr), - GFP_KERNEL); + filter[i].filter = nla_memdup(attr, GFP_KERNEL); filter[i].len = nla_len(attr); i++; } -- cgit v1.2.3 From 5ea8ea2cb7f1d0db15762c9b0bb9e7330425a071 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Oct 2016 09:27:57 -0700 Subject: tcp/dccp: drop SYN packets if accept queue is full Per listen(fd, backlog) rules, there is really no point accepting a SYN, sending a SYNACK, and dropping the following ACK packet if accept queue is full, because application is not draining accept queue fast enough. This behavior is fooling TCP clients that believe they established a flow, while there is nothing at server side. They might then send about 10 MSS (if using IW10) that will be dropped anyway while server is under stress. Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 5 ----- net/dccp/ipv4.c | 8 +------- net/dccp/ipv6.c | 2 +- net/ipv4/tcp_input.c | 8 +------- 4 files changed, 3 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 197a30d221e9..146054ceea8e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -289,11 +289,6 @@ static inline int inet_csk_reqsk_queue_len(const struct sock *sk) return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); } -static inline int inet_csk_reqsk_queue_young(const struct sock *sk) -{ - return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); -} - static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) { return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 345a3aeb8c7e..a957acac2337 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -588,13 +588,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (inet_csk_reqsk_queue_is_full(sk)) goto drop; - /* - * Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + if (sk_acceptq_is_full(sk)) goto drop; req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3828f94b234c..32f9f1a189f8 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -325,7 +325,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (inet_csk_reqsk_queue_is_full(sk)) goto drop; - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + if (sk_acceptq_is_full(sk)) goto drop; req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a27b9c0e27c0..f2c59c8e57ff 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6298,13 +6298,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop; } - - /* Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { + if (sk_acceptq_is_full(sk)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); goto drop; } -- cgit v1.2.3 From c6fcc4fc5f8b592600c7409e769ab68da0fb1eca Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Fri, 28 Oct 2016 09:59:15 -0700 Subject: vxlan: avoid using stale vxlan socket. When vxlan device is closed vxlan socket is freed. This operation can race with vxlan-xmit function which dereferences vxlan socket. Following patch uses RCU mechanism to avoid this situation. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 80 +++++++++++++++++++++++++++++++++-------------------- include/net/vxlan.h | 4 +-- 2 files changed, 52 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c1639a3e95a4..f3c2fa3ab0d5 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -943,17 +943,20 @@ static bool vxlan_snoop(struct net_device *dev, static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev) { struct vxlan_dev *vxlan; + struct vxlan_sock *sock4; + struct vxlan_sock *sock6 = NULL; unsigned short family = dev->default_dst.remote_ip.sa.sa_family; + sock4 = rtnl_dereference(dev->vn4_sock); + /* The vxlan_sock is only used by dev, leaving group has * no effect on other vxlan devices. */ - if (family == AF_INET && dev->vn4_sock && - atomic_read(&dev->vn4_sock->refcnt) == 1) + if (family == AF_INET && sock4 && atomic_read(&sock4->refcnt) == 1) return false; #if IS_ENABLED(CONFIG_IPV6) - if (family == AF_INET6 && dev->vn6_sock && - atomic_read(&dev->vn6_sock->refcnt) == 1) + sock6 = rtnl_dereference(dev->vn6_sock); + if (family == AF_INET6 && sock6 && atomic_read(&sock6->refcnt) == 1) return false; #endif @@ -961,10 +964,12 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev) if (!netif_running(vxlan->dev) || vxlan == dev) continue; - if (family == AF_INET && vxlan->vn4_sock != dev->vn4_sock) + if (family == AF_INET && + rtnl_dereference(vxlan->vn4_sock) != sock4) continue; #if IS_ENABLED(CONFIG_IPV6) - if (family == AF_INET6 && vxlan->vn6_sock != dev->vn6_sock) + if (family == AF_INET6 && + rtnl_dereference(vxlan->vn6_sock) != sock6) continue; #endif @@ -1005,22 +1010,25 @@ static bool __vxlan_sock_release_prep(struct vxlan_sock *vs) static void vxlan_sock_release(struct vxlan_dev *vxlan) { - bool ipv4 = __vxlan_sock_release_prep(vxlan->vn4_sock); + struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); #if IS_ENABLED(CONFIG_IPV6) - bool ipv6 = __vxlan_sock_release_prep(vxlan->vn6_sock); + struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); + + rcu_assign_pointer(vxlan->vn6_sock, NULL); #endif + rcu_assign_pointer(vxlan->vn4_sock, NULL); synchronize_net(); - if (ipv4) { - udp_tunnel_sock_release(vxlan->vn4_sock->sock); - kfree(vxlan->vn4_sock); + if (__vxlan_sock_release_prep(sock4)) { + udp_tunnel_sock_release(sock4->sock); + kfree(sock4); } #if IS_ENABLED(CONFIG_IPV6) - if (ipv6) { - udp_tunnel_sock_release(vxlan->vn6_sock->sock); - kfree(vxlan->vn6_sock); + if (__vxlan_sock_release_prep(sock6)) { + udp_tunnel_sock_release(sock6->sock); + kfree(sock6); } #endif } @@ -1036,18 +1044,21 @@ static int vxlan_igmp_join(struct vxlan_dev *vxlan) int ret = -EINVAL; if (ip->sa.sa_family == AF_INET) { + struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); struct ip_mreqn mreq = { .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, .imr_ifindex = ifindex, }; - sk = vxlan->vn4_sock->sock->sk; + sk = sock4->sock->sk; lock_sock(sk); ret = ip_mc_join_group(sk, &mreq); release_sock(sk); #if IS_ENABLED(CONFIG_IPV6) } else { - sk = vxlan->vn6_sock->sock->sk; + struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); + + sk = sock6->sock->sk; lock_sock(sk); ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex, &ip->sin6.sin6_addr); @@ -1067,18 +1078,21 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan) int ret = -EINVAL; if (ip->sa.sa_family == AF_INET) { + struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); struct ip_mreqn mreq = { .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, .imr_ifindex = ifindex, }; - sk = vxlan->vn4_sock->sock->sk; + sk = sock4->sock->sk; lock_sock(sk); ret = ip_mc_leave_group(sk, &mreq); release_sock(sk); #if IS_ENABLED(CONFIG_IPV6) } else { - sk = vxlan->vn6_sock->sock->sk; + struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); + + sk = sock6->sock->sk; lock_sock(sk); ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex, &ip->sin6.sin6_addr); @@ -1828,11 +1842,15 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, struct dst_cache *dst_cache, const struct ip_tunnel_info *info) { + struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct dst_entry *ndst; struct flowi6 fl6; int err; + if (!sock6) + return ERR_PTR(-EIO); + if (tos && !info) use_cache = false; if (use_cache) { @@ -1850,7 +1868,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, fl6.flowi6_proto = IPPROTO_UDP; err = ipv6_stub->ipv6_dst_lookup(vxlan->net, - vxlan->vn6_sock->sock->sk, + sock6->sock->sk, &ndst, &fl6); if (err < 0) return ERR_PTR(err); @@ -1995,9 +2013,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } if (dst->sa.sa_family == AF_INET) { - if (!vxlan->vn4_sock) + struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); + + if (!sock4) goto drop; - sk = vxlan->vn4_sock->sock->sk; + sk = sock4->sock->sk; rt = vxlan_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, @@ -2050,12 +2070,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, src_port, dst_port, xnet, !udp_sum); #if IS_ENABLED(CONFIG_IPV6) } else { + struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); struct dst_entry *ndst; u32 rt6i_flags; - if (!vxlan->vn6_sock) + if (!sock6) goto drop; - sk = vxlan->vn6_sock->sock->sk; + sk = sock6->sock->sk; ndst = vxlan6_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, @@ -2415,9 +2436,10 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) dport = info->key.tp_dst ? : vxlan->cfg.dst_port; if (ip_tunnel_info_af(info) == AF_INET) { + struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); struct rtable *rt; - if (!vxlan->vn4_sock) + if (!sock4) return -EINVAL; rt = vxlan_get_route(vxlan, skb, 0, info->key.tos, info->key.u.ipv4.dst, @@ -2429,8 +2451,6 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) #if IS_ENABLED(CONFIG_IPV6) struct dst_entry *ndst; - if (!vxlan->vn6_sock) - return -EINVAL; ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos, info->key.label, &info->key.u.ipv6.dst, &info->key.u.ipv6.src, NULL, info); @@ -2740,10 +2760,10 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6) return PTR_ERR(vs); #if IS_ENABLED(CONFIG_IPV6) if (ipv6) - vxlan->vn6_sock = vs; + rcu_assign_pointer(vxlan->vn6_sock, vs); else #endif - vxlan->vn4_sock = vs; + rcu_assign_pointer(vxlan->vn4_sock, vs); vxlan_vs_add_dev(vs, vxlan); return 0; } @@ -2754,9 +2774,9 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan) bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA; int ret = 0; - vxlan->vn4_sock = NULL; + RCU_INIT_POINTER(vxlan->vn4_sock, NULL); #if IS_ENABLED(CONFIG_IPV6) - vxlan->vn6_sock = NULL; + RCU_INIT_POINTER(vxlan->vn6_sock, NULL); if (ipv6 || metadata) ret = __vxlan_sock_add(vxlan, true); #endif diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0255613a54a4..308adc4154f4 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -225,9 +225,9 @@ struct vxlan_config { struct vxlan_dev { struct hlist_node hlist; /* vni hash table */ struct list_head next; /* vxlan's per namespace list */ - struct vxlan_sock *vn4_sock; /* listening socket for IPv4 */ + struct vxlan_sock __rcu *vn4_sock; /* listening socket for IPv4 */ #if IS_ENABLED(CONFIG_IPV6) - struct vxlan_sock *vn6_sock; /* listening socket for IPv6 */ + struct vxlan_sock __rcu *vn6_sock; /* listening socket for IPv6 */ #endif struct net_device *dev; struct net *net; /* netns for packet i/o */ -- cgit v1.2.3 From dae399d7fdee84d8f5227a9711d95bb4e9a05d4e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 31 Oct 2016 20:32:33 +0800 Subject: sctp: hold transport instead of assoc when lookup assoc in rx path Prior to this patch, in rx path, before calling lock_sock, it needed to hold assoc when got it by __sctp_lookup_association, in case other place would free/put assoc. But in __sctp_lookup_association, it lookup and hold transport, then got assoc by transport->assoc, then hold assoc and put transport. It means it didn't hold transport, yet it was returned and later on directly assigned to chunk->transport. Without the protection of sock lock, the transport may be freed/put by other places, which would cause a use-after-free issue. This patch is to fix this issue by holding transport instead of assoc. As holding transport can make sure to access assoc is also safe, and actually it looks up assoc by searching transport rhashtable, to hold transport here makes more sense. Note that the function will be renamed later on on another patch. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 +- net/sctp/input.c | 32 ++++++++++++++++---------------- net/sctp/ipv6.c | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 87a7f42e7639..31acc3f4f132 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -152,7 +152,7 @@ void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *, struct sctphdr *, struct sctp_association **, struct sctp_transport **); -void sctp_err_finish(struct sock *, struct sctp_association *); +void sctp_err_finish(struct sock *, struct sctp_transport *); void sctp_icmp_frag_needed(struct sock *, struct sctp_association *, struct sctp_transport *t, __u32 pmtu); void sctp_icmp_redirect(struct sock *, struct sctp_transport *, diff --git a/net/sctp/input.c b/net/sctp/input.c index 8e0bc58eec20..a01a56ec8b8c 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -181,9 +181,10 @@ int sctp_rcv(struct sk_buff *skb) * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB */ if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) { - if (asoc) { - sctp_association_put(asoc); + if (transport) { + sctp_transport_put(transport); asoc = NULL; + transport = NULL; } else { sctp_endpoint_put(ep); ep = NULL; @@ -269,8 +270,8 @@ int sctp_rcv(struct sk_buff *skb) bh_unlock_sock(sk); /* Release the asoc/ep ref we took in the lookup calls. */ - if (asoc) - sctp_association_put(asoc); + if (transport) + sctp_transport_put(transport); else sctp_endpoint_put(ep); @@ -283,8 +284,8 @@ discard_it: discard_release: /* Release the asoc/ep ref we took in the lookup calls. */ - if (asoc) - sctp_association_put(asoc); + if (transport) + sctp_transport_put(transport); else sctp_endpoint_put(ep); @@ -300,6 +301,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; struct sctp_inq *inqueue = &chunk->rcvr->inqueue; + struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = NULL; int backloged = 0; @@ -351,7 +353,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) done: /* Release the refs we took in sctp_add_backlog */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_put(sctp_assoc(rcvr)); + sctp_transport_put(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_put(sctp_ep(rcvr)); else @@ -363,6 +365,7 @@ done: static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) { struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; + struct sctp_transport *t = chunk->transport; struct sctp_ep_common *rcvr = chunk->rcvr; int ret; @@ -373,7 +376,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) * from us */ if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type) - sctp_association_hold(sctp_assoc(rcvr)); + sctp_transport_hold(t); else if (SCTP_EP_TYPE_SOCKET == rcvr->type) sctp_endpoint_hold(sctp_ep(rcvr)); else @@ -537,15 +540,15 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, return sk; out: - sctp_association_put(asoc); + sctp_transport_put(transport); return NULL; } /* Common cleanup code for icmp/icmpv6 error handler. */ -void sctp_err_finish(struct sock *sk, struct sctp_association *asoc) +void sctp_err_finish(struct sock *sk, struct sctp_transport *t) { bh_unlock_sock(sk); - sctp_association_put(asoc); + sctp_transport_put(t); } /* @@ -641,7 +644,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) } out_unlock: - sctp_err_finish(sk, asoc); + sctp_err_finish(sk, transport); } /* @@ -952,11 +955,8 @@ static struct sctp_association *__sctp_lookup_association( goto out; asoc = t->asoc; - sctp_association_hold(asoc); *pt = t; - sctp_transport_put(t); - out: return asoc; } @@ -986,7 +986,7 @@ int sctp_has_association(struct net *net, struct sctp_transport *transport; if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) { - sctp_association_put(asoc); + sctp_transport_put(transport); return 1; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f473779e8b1c..176af3080a2b 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -198,7 +198,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } out_unlock: - sctp_err_finish(sk, asoc); + sctp_err_finish(sk, transport); out: if (likely(idev != NULL)) in6_dev_put(idev); -- cgit v1.2.3 From bd68a2a854ad5a85f0c8d0a9c8048ca3f6391efb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 Oct 2016 13:32:55 -0700 Subject: net: set SK_MEM_QUANTUM to 4096 Systems with large pages (64KB pages for example) do not always have huge quantity of memory. A big SK_MEM_QUANTUM value leads to fewer interactions with the global counters (like tcp_memory_allocated) but might trigger memory pressure much faster, giving suboptimal TCP performance since windows are lowered to ridiculous values. Note that sysctl_mem units being in pages and in ABI, we also need to change sk_prot_mem_limits() accordingly. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index f13ac87a8015..93331a1492db 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1162,11 +1162,6 @@ static inline void sk_enter_memory_pressure(struct sock *sk) sk->sk_prot->enter_memory_pressure(sk); } -static inline long sk_prot_mem_limits(const struct sock *sk, int index) -{ - return sk->sk_prot->sysctl_mem[index]; -} - static inline long sk_memory_allocated(const struct sock *sk) { @@ -1281,11 +1276,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind); void __sk_mem_reduce_allocated(struct sock *sk, int amount); void __sk_mem_reclaim(struct sock *sk, int amount); -#define SK_MEM_QUANTUM ((int)PAGE_SIZE) +/* We used to have PAGE_SIZE here, but systems with 64KB pages + * do not necessarily have 16x time more memory than 4KB ones. + */ +#define SK_MEM_QUANTUM 4096 #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) #define SK_MEM_SEND 0 #define SK_MEM_RECV 1 +/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ +static inline long sk_prot_mem_limits(const struct sock *sk, int index) +{ + long val = sk->sk_prot->sysctl_mem[index]; + +#if PAGE_SIZE > SK_MEM_QUANTUM + val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; +#elif PAGE_SIZE < SK_MEM_QUANTUM + val >>= SK_MEM_QUANTUM_SHIFT - PAGE_SHIFT; +#endif + return val; +} + static inline int sk_mem_pages(int amt) { return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT; -- cgit v1.2.3 From f6d0cbcf09c506b9b022df8f9d7693a7cec3c732 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Oct 2016 16:56:40 +0200 Subject: netfilter: nf_tables: add fib expression Add FIB expression, supported for ipv4, ipv6 and inet family (the latter just dispatches to ipv4 or ipv6 one based on nfproto). Currently supports fetching output interface index/name and the rtm_type associated with an address. This can be used for adding path filtering. rtm_type is useful to e.g. enforce a strong-end host model where packets are only accepted if daddr is configured on the interface the packet arrived on. The fib expression is a native nftables alternative to the xtables addrtype and rp_filter matches. FIB result order for oif/oifname retrieval is as follows: - if packet is local (skb has rtable, RTF_LOCAL set, this will also catch looped-back multicast packets), set oif to the loopback interface. - if fib lookup returns an error, or result points to local, store zero result. This means '--local' option of -m rpfilter is not supported. It is possible to use 'fib type local' or add explicit saddr/daddr matching rules to create exceptions if this is really needed. - store result in the destination register. In case of multiple routes, search set for desired oif in case strict matching is requested. ipv4 and ipv6 behave fib expressions are supposed to behave the same. [ I have collapsed Arnd Bergmann's ("netfilter: nf_tables: fib warnings") http://patchwork.ozlabs.org/patch/688615/ to address fallout from this patch after rebasing nf-next, that was posted to address compilation warnings. --pablo ] Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_fib.h | 31 ++++ include/uapi/linux/netfilter/nf_tables.h | 36 ++++ net/ipv4/netfilter/Kconfig | 8 + net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/nft_fib_ipv4.c | 238 ++++++++++++++++++++++++++ net/ipv6/netfilter/Kconfig | 8 + net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/nft_fib_ipv6.c | 275 +++++++++++++++++++++++++++++++ net/netfilter/Kconfig | 13 ++ net/netfilter/Makefile | 2 + net/netfilter/nft_fib.c | 159 ++++++++++++++++++ net/netfilter/nft_fib_inet.c | 82 +++++++++ 12 files changed, 854 insertions(+) create mode 100644 include/net/netfilter/nft_fib.h create mode 100644 net/ipv4/netfilter/nft_fib_ipv4.c create mode 100644 net/ipv6/netfilter/nft_fib_ipv6.c create mode 100644 net/netfilter/nft_fib.c create mode 100644 net/netfilter/nft_fib_inet.c (limited to 'include/net') diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h new file mode 100644 index 000000000000..cbedda077db2 --- /dev/null +++ b/include/net/netfilter/nft_fib.h @@ -0,0 +1,31 @@ +#ifndef _NFT_FIB_H_ +#define _NFT_FIB_H_ + +struct nft_fib { + enum nft_registers dreg:8; + u8 result; + u32 flags; +}; + +extern const struct nla_policy nft_fib_policy[]; + +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]); +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data); + + +void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); +void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_fib_store_result(void *reg, enum nft_fib_result r, + const struct nft_pktinfo *pkt, int index); +#endif diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index c6c4477c136b..a054ad2c8853 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1109,6 +1109,42 @@ enum nft_gen_attributes { }; #define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) +/* + * enum nft_fib_attributes - nf_tables fib expression netlink attributes + * + * @NFTA_FIB_DREG: destination register (NLA_U32) + * @NFTA_FIB_RESULT: desired result (NLA_U32) + * @NFTA_FIB_FLAGS: flowi fields to initialize when querying the FIB (NLA_U32) + * + * The FIB expression performs a route lookup according + * to the packet data. + */ +enum nft_fib_attributes { + NFTA_FIB_UNSPEC, + NFTA_FIB_DREG, + NFTA_FIB_RESULT, + NFTA_FIB_FLAGS, + __NFTA_FIB_MAX +}; +#define NFTA_FIB_MAX (__NFTA_FIB_MAX - 1) + +enum nft_fib_result { + NFT_FIB_RESULT_UNSPEC, + NFT_FIB_RESULT_OIF, + NFT_FIB_RESULT_OIFNAME, + NFT_FIB_RESULT_ADDRTYPE, + __NFT_FIB_RESULT_MAX +}; +#define NFT_FIB_RESULT_MAX (__NFT_FIB_RESULT_MAX - 1) + +enum nft_fib_flags { + NFTA_FIB_F_SADDR = 1 << 0, /* look up src */ + NFTA_FIB_F_DADDR = 1 << 1, /* look up dst */ + NFTA_FIB_F_MARK = 1 << 2, /* use skb->mark */ + NFTA_FIB_F_IIF = 1 << 3, /* restrict to iif */ + NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */ +}; + /** * enum nft_trace_attributes - nf_tables trace netlink attributes * diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index d613309e3e5d..f80bb8f457c8 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -54,6 +54,14 @@ config NFT_DUP_IPV4 help This module enables IPv4 packet duplication support for nf_tables. +config NFT_FIB_IPV4 + select NFT_FIB + tristate "nf_tables fib / ip route lookup support" + help + This module enables IPv4 FIB lookups, e.g. for reverse path filtering. + It also allows query of the FIB for the route type, e.g. local, unicast, + multicast or blackhole. + endif # NF_TABLES_IPV4 config NF_TABLES_ARP diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 853328f8fd05..59d7786ddce1 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o +obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c new file mode 100644 index 000000000000..db91fd42db67 --- /dev/null +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -0,0 +1,238 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* don't try to find route from mcast/bcast/zeronet */ +static __be32 get_saddr(__be32 addr) +{ + if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || + ipv4_is_zeronet(addr)) + return 0; + return addr; +} + +static bool fib4_is_local(const struct sk_buff *skb) +{ + const struct rtable *rt = skb_rtable(skb); + + return rt && (rt->rt_flags & RTCF_LOCAL); +} + +#define DSCP_BITS 0xfc + +void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + u32 *dst = ®s->data[priv->dreg]; + const struct net_device *dev = NULL; + const struct iphdr *iph; + __be32 addr; + + if (priv->flags & NFTA_FIB_F_IIF) + dev = pkt->in; + else if (priv->flags & NFTA_FIB_F_OIF) + dev = pkt->out; + + iph = ip_hdr(pkt->skb); + if (priv->flags & NFTA_FIB_F_DADDR) + addr = iph->daddr; + else + addr = iph->saddr; + + *dst = inet_dev_addr_type(pkt->net, dev, addr); +} +EXPORT_SYMBOL_GPL(nft_fib4_eval_type); + +static int get_ifindex(const struct net_device *dev) +{ + return dev ? dev->ifindex : 0; +} + +void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + u32 *dest = ®s->data[priv->dreg]; + const struct iphdr *iph; + struct fib_result res; + struct flowi4 fl4 = { + .flowi4_scope = RT_SCOPE_UNIVERSE, + .flowi4_iif = LOOPBACK_IFINDEX, + }; + const struct net_device *oif; + struct net_device *found; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + int i; +#endif + + /* + * Do not set flowi4_oif, it restricts results (for example, asking + * for oif 3 will get RTN_UNICAST result even if the daddr exits + * on another interface. + * + * Search results for the desired outinterface instead. + */ + if (priv->flags & NFTA_FIB_F_OIF) + oif = pkt->out; + else if (priv->flags & NFTA_FIB_F_IIF) + oif = pkt->in; + else + oif = NULL; + + if (pkt->hook == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) { + nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX); + return; + } + + iph = ip_hdr(pkt->skb); + if (ipv4_is_multicast(iph->daddr) && + ipv4_is_zeronet(iph->saddr) && + ipv4_is_local_multicast(iph->daddr)) { + nft_fib_store_result(dest, priv->result, pkt, + get_ifindex(pkt->skb->dev)); + return; + } + + if (priv->flags & NFTA_FIB_F_MARK) + fl4.flowi4_mark = pkt->skb->mark; + + fl4.flowi4_tos = iph->tos & DSCP_BITS; + + if (priv->flags & NFTA_FIB_F_DADDR) { + fl4.daddr = iph->daddr; + fl4.saddr = get_saddr(iph->saddr); + } else { + fl4.daddr = iph->saddr; + fl4.saddr = get_saddr(iph->daddr); + } + + if (fib_lookup(pkt->net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) + return; + + switch (res.type) { + case RTN_UNICAST: + break; + case RTN_LOCAL: /* should not appear here, see fib4_is_local() above */ + return; + default: + break; + } + + if (!oif) { + found = FIB_RES_DEV(res); + goto ok; + } + +#ifdef CONFIG_IP_ROUTE_MULTIPATH + for (i = 0; i < res.fi->fib_nhs; i++) { + struct fib_nh *nh = &res.fi->fib_nh[i]; + + if (nh->nh_dev == oif) { + found = nh->nh_dev; + goto ok; + } + } + return; +#else + found = FIB_RES_DEV(res); + if (found != oif) + return; +#endif +ok: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + *dest = found->ifindex; + break; + case NFT_FIB_RESULT_OIFNAME: + strncpy((char *)dest, found->name, IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + break; + } +} +EXPORT_SYMBOL_GPL(nft_fib4_eval); + +static struct nft_expr_type nft_fib4_type; + +static const struct nft_expr_ops nft_fib4_type_ops = { + .type = &nft_fib4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib4_eval_type, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops nft_fib4_ops = { + .type = &nft_fib4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib4_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops * +nft_fib4_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + enum nft_fib_result result; + + if (!tb[NFTA_FIB_RESULT]) + return ERR_PTR(-EINVAL); + + result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + + switch (result) { + case NFT_FIB_RESULT_OIF: + return &nft_fib4_ops; + case NFT_FIB_RESULT_OIFNAME: + return &nft_fib4_ops; + case NFT_FIB_RESULT_ADDRTYPE: + return &nft_fib4_type_ops; + default: + return ERR_PTR(-EOPNOTSUPP); + } +} + +static struct nft_expr_type nft_fib4_type __read_mostly = { + .name = "fib", + .select_ops = &nft_fib4_select_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, +}; + +static int __init nft_fib4_module_init(void) +{ + return nft_register_expr(&nft_fib4_type); +} + +static void __exit nft_fib4_module_exit(void) +{ + nft_unregister_expr(&nft_fib4_type); +} + +module_init(nft_fib4_module_init); +module_exit(nft_fib4_module_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal "); +MODULE_ALIAS_NFT_AF_EXPR(2, "fib"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index e10a04c9cdc7..144f1de33836 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -54,6 +54,14 @@ config NFT_DUP_IPV6 help This module enables IPv6 packet duplication support for nf_tables. +config NFT_FIB_IPV6 + tristate "nf_tables fib / ipv6 route lookup support" + select NFT_FIB + help + This module enables IPv6 FIB lookups, e.g. for reverse path filtering. + It also allows query of the FIB for the route type, e.g. local, unicast, + multicast or blackhole. + endif # NF_TABLES_IPV6 endif # NF_TABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index b4f7d0b4e2af..7984f3071f05 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o +obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c new file mode 100644 index 000000000000..ff1f1b6b4a4a --- /dev/null +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -0,0 +1,275 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static bool fib6_is_local(const struct sk_buff *skb) +{ + const struct rt6_info *rt = (const void *)skb_dst(skb); + + return rt && (rt->rt6i_flags & RTF_LOCAL); +} + +static int get_ifindex(const struct net_device *dev) +{ + return dev ? dev->ifindex : 0; +} + +static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, + const struct nft_pktinfo *pkt, + const struct net_device *dev) +{ + const struct ipv6hdr *iph = ipv6_hdr(pkt->skb); + int lookup_flags = 0; + + if (priv->flags & NFTA_FIB_F_DADDR) { + fl6->daddr = iph->daddr; + fl6->saddr = iph->saddr; + } else { + fl6->daddr = iph->saddr; + fl6->saddr = iph->daddr; + } + + if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) { + lookup_flags |= RT6_LOOKUP_F_IFACE; + fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev); + } + + if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST) + lookup_flags |= RT6_LOOKUP_F_HAS_SADDR; + + if (priv->flags & NFTA_FIB_F_MARK) + fl6->flowi6_mark = pkt->skb->mark; + + fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK; + + return lookup_flags; +} + +static u32 __nft_fib6_eval_type(const struct nft_fib *priv, + const struct nft_pktinfo *pkt) +{ + const struct net_device *dev = NULL; + const struct nf_ipv6_ops *v6ops; + const struct nf_afinfo *afinfo; + int route_err, addrtype; + struct rt6_info *rt; + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_proto = pkt->tprot, + }; + u32 ret = 0; + + afinfo = nf_get_afinfo(NFPROTO_IPV6); + if (!afinfo) + return RTN_UNREACHABLE; + + if (priv->flags & NFTA_FIB_F_IIF) + dev = pkt->in; + else if (priv->flags & NFTA_FIB_F_OIF) + dev = pkt->out; + + nft_fib6_flowi_init(&fl6, priv, pkt, dev); + + v6ops = nf_get_ipv6_ops(); + if (dev && v6ops && v6ops->chk_addr(pkt->net, &fl6.daddr, dev, true)) + ret = RTN_LOCAL; + + route_err = afinfo->route(pkt->net, (struct dst_entry **)&rt, + flowi6_to_flowi(&fl6), false); + if (route_err) + goto err; + + if (rt->rt6i_flags & RTF_REJECT) { + route_err = rt->dst.error; + dst_release(&rt->dst); + goto err; + } + + if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr)) + ret = RTN_ANYCAST; + else if (!dev && rt->rt6i_flags & RTF_LOCAL) + ret = RTN_LOCAL; + + dst_release(&rt->dst); + + if (ret) + return ret; + + addrtype = ipv6_addr_type(&fl6.daddr); + + if (addrtype & IPV6_ADDR_MULTICAST) + return RTN_MULTICAST; + if (addrtype & IPV6_ADDR_UNICAST) + return RTN_UNICAST; + + return RTN_UNSPEC; + err: + switch (route_err) { + case -EINVAL: + return RTN_BLACKHOLE; + case -EACCES: + return RTN_PROHIBIT; + case -EAGAIN: + return RTN_THROW; + default: + break; + } + + return RTN_UNREACHABLE; +} + +void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + u32 *dest = ®s->data[priv->dreg]; + + *dest = __nft_fib6_eval_type(priv, pkt); +} +EXPORT_SYMBOL_GPL(nft_fib6_eval_type); + +void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + const struct net_device *oif = NULL; + u32 *dest = ®s->data[priv->dreg]; + struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_proto = pkt->tprot, + }; + struct rt6_info *rt; + int lookup_flags; + + if (priv->flags & NFTA_FIB_F_IIF) + oif = pkt->in; + else if (priv->flags & NFTA_FIB_F_OIF) + oif = pkt->out; + + lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif); + + if (pkt->hook == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) { + nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX); + return; + } + + *dest = 0; + again: + rt = (void *)ip6_route_lookup(pkt->net, &fl6, lookup_flags); + if (rt->dst.error) + goto put_rt_err; + + /* Should not see RTF_LOCAL here */ + if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) + goto put_rt_err; + + if (oif && oif != rt->rt6i_idev->dev) { + /* multipath route? Try again with F_IFACE */ + if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) { + lookup_flags |= RT6_LOOKUP_F_IFACE; + fl6.flowi6_oif = oif->ifindex; + ip6_rt_put(rt); + goto again; + } + } + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + *dest = rt->rt6i_idev->dev->ifindex; + break; + case NFT_FIB_RESULT_OIFNAME: + strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + break; + } + + put_rt_err: + ip6_rt_put(rt); +} +EXPORT_SYMBOL_GPL(nft_fib6_eval); + +static struct nft_expr_type nft_fib6_type; + +static const struct nft_expr_ops nft_fib6_type_ops = { + .type = &nft_fib6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib6_eval_type, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops nft_fib6_ops = { + .type = &nft_fib6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib6_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static const struct nft_expr_ops * +nft_fib6_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + enum nft_fib_result result; + + if (!tb[NFTA_FIB_RESULT]) + return ERR_PTR(-EINVAL); + + result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + + switch (result) { + case NFT_FIB_RESULT_OIF: + return &nft_fib6_ops; + case NFT_FIB_RESULT_OIFNAME: + return &nft_fib6_ops; + case NFT_FIB_RESULT_ADDRTYPE: + return &nft_fib6_type_ops; + default: + return ERR_PTR(-EOPNOTSUPP); + } +} + +static struct nft_expr_type nft_fib6_type __read_mostly = { + .name = "fib", + .select_ops = &nft_fib6_select_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, +}; + +static int __init nft_fib6_module_init(void) +{ + return nft_register_expr(&nft_fib6_type); +} + +static void __exit nft_fib6_module_exit(void) +{ + nft_unregister_expr(&nft_fib6_type); +} +module_init(nft_fib6_module_init); +module_exit(nft_fib6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal "); +MODULE_ALIAS_NFT_AF_EXPR(10, "fib"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e8d56d9a4df2..9bcf899ce16e 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -581,6 +581,19 @@ config NFT_HASH This option adds the "hash" expression that you can use to perform a hash operation on registers. +config NFT_FIB + tristate + +config NFT_FIB_INET + depends on NF_TABLES_INET + depends on NFT_FIB_IPV4 + depends on NFT_FIB_IPV6 + tristate "Netfilter nf_tables fib inet support" + help + This option allows using the FIB expression from the inet table. + The lookup will be delegated to the IPv4 or IPv6 FIB depending + on the protocol of the packet. + if NF_TABLES_NETDEV config NF_DUP_NETDEV diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index c23c3c84416f..8faa36c0686d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -96,6 +96,8 @@ obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o obj-$(CONFIG_NFT_REDIR) += nft_redir.o obj-$(CONFIG_NFT_HASH) += nft_hash.o +obj-$(CONFIG_NFT_FIB) += nft_fib.o +obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o # nf_tables netdev obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c new file mode 100644 index 000000000000..4944a8b7f7a7 --- /dev/null +++ b/net/netfilter/nft_fib.c @@ -0,0 +1,159 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Generic part shared by ipv4 and ipv6 backends. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = { + [NFTA_FIB_DREG] = { .type = NLA_U32 }, + [NFTA_FIB_RESULT] = { .type = NLA_U32 }, + [NFTA_FIB_FLAGS] = { .type = NLA_U32 }, +}; +EXPORT_SYMBOL(nft_fib_policy); + +#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \ + NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF) + +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + unsigned int hooks; + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: /* fallthrough */ + case NFT_FIB_RESULT_OIFNAME: + hooks = (1 << NF_INET_PRE_ROUTING); + break; + case NFT_FIB_RESULT_ADDRTYPE: + if (priv->flags & NFTA_FIB_F_IIF) + hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD); + else if (priv->flags & NFTA_FIB_F_OIF) + hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_FORWARD); + else + hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING); + + break; + default: + return -EINVAL; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} +EXPORT_SYMBOL_GPL(nft_fib_validate); + +int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_fib *priv = nft_expr_priv(expr); + unsigned int len; + int err; + + if (!tb[NFTA_FIB_DREG] || !tb[NFTA_FIB_RESULT] || !tb[NFTA_FIB_FLAGS]) + return -EINVAL; + + priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS])); + + if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL)) + return -EINVAL; + + if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == + (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) + return -EINVAL; + if ((priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) == + (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) + return -EINVAL; + if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0) + return -EINVAL; + + priv->result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]); + + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + if (priv->flags & NFTA_FIB_F_OIF) + return -EINVAL; + len = sizeof(int); + break; + case NFT_FIB_RESULT_OIFNAME: + if (priv->flags & NFTA_FIB_F_OIF) + return -EINVAL; + len = IFNAMSIZ; + break; + case NFT_FIB_RESULT_ADDRTYPE: + len = sizeof(u32); + break; + default: + return -EINVAL; + } + + err = nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); + if (err < 0) + return err; + + return nft_fib_validate(ctx, expr, NULL); +} +EXPORT_SYMBOL_GPL(nft_fib_init); + +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_FIB_DREG, priv->dreg)) + return -1; + + if (nla_put_be32(skb, NFTA_FIB_RESULT, htonl(priv->result))) + return -1; + + if (nla_put_be32(skb, NFTA_FIB_FLAGS, htonl(priv->flags))) + return -1; + + return 0; +} +EXPORT_SYMBOL_GPL(nft_fib_dump); + +void nft_fib_store_result(void *reg, enum nft_fib_result r, + const struct nft_pktinfo *pkt, int index) +{ + struct net_device *dev; + u32 *dreg = reg; + + switch (r) { + case NFT_FIB_RESULT_OIF: + *dreg = index; + break; + case NFT_FIB_RESULT_OIFNAME: + dev = dev_get_by_index_rcu(pkt->net, index); + strncpy(reg, dev ? dev->name : "", IFNAMSIZ); + break; + default: + WARN_ON_ONCE(1); + *dreg = 0; + break; + } +} +EXPORT_SYMBOL_GPL(nft_fib_store_result); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal "); diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c new file mode 100644 index 000000000000..fe8943b572b7 --- /dev/null +++ b/net/netfilter/nft_fib_inet.c @@ -0,0 +1,82 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static void nft_fib_inet_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + + switch (pkt->pf) { + case NFPROTO_IPV4: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib4_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib4_eval_type(expr, regs, pkt); + } + break; + case NFPROTO_IPV6: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib6_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib6_eval_type(expr, regs, pkt); + } + break; + } + + regs->verdict.code = NF_DROP; +} + +static struct nft_expr_type nft_fib_inet_type; +static const struct nft_expr_ops nft_fib_inet_ops = { + .type = &nft_fib_inet_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib_inet_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static struct nft_expr_type nft_fib_inet_type __read_mostly = { + .family = NFPROTO_INET, + .name = "fib", + .ops = &nft_fib_inet_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_fib_inet_module_init(void) +{ + return nft_register_expr(&nft_fib_inet_type); +} + +static void __exit nft_fib_inet_module_exit(void) +{ + nft_unregister_expr(&nft_fib_inet_type); +} + +module_init(nft_fib_inet_module_init); +module_exit(nft_fib_inet_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal "); +MODULE_ALIAS_NFT_AF_EXPR(1, "fib"); -- cgit v1.2.3 From 1fddf4bad0ac9f4d32c74af286fc1eec2a03c82c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 27 Oct 2016 19:49:42 +0100 Subject: netfilter: nf_log: add packet logging for netdev family Move layer 2 packet logging into nf_log_l2packet() that resides in nf_log_common.c, so this can be shared by both bridge and netdev families. This patch adds the boiler plate code to register the netdev logging family. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 5 +++ net/bridge/netfilter/Kconfig | 1 + net/bridge/netfilter/nf_log_bridge.c | 16 +------- net/netfilter/Kconfig | 4 ++ net/netfilter/Makefile | 3 ++ net/netfilter/nf_log_common.c | 27 ++++++++++++ net/netfilter/nf_log_netdev.c | 80 ++++++++++++++++++++++++++++++++++++ 7 files changed, 121 insertions(+), 15 deletions(-) create mode 100644 net/netfilter/nf_log_netdev.c (limited to 'include/net') diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 309cd267be4f..a559aa41253c 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -109,5 +109,10 @@ void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix); +void nf_log_l2packet(struct net *net, u_int8_t pf, unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, const char *prefix); #endif /* _NF_LOG_H */ diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 9cebf47ac840..e7ef1a1ef3a6 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -22,6 +22,7 @@ config NFT_BRIDGE_REJECT config NF_LOG_BRIDGE tristate "Bridge packet logging" + select NF_LOG_COMMON endif # NF_TABLES_BRIDGE diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c index 1663df598545..c197b1f844ee 100644 --- a/net/bridge/netfilter/nf_log_bridge.c +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -24,21 +24,7 @@ static void nf_log_bridge_packet(struct net *net, u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - switch (eth_hdr(skb)->h_proto) { - case htons(ETH_P_IP): - nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - case htons(ETH_P_IPV6): - nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - case htons(ETH_P_ARP): - case htons(ETH_P_RARP): - nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out, - loginfo, "%s", prefix); - break; - } + nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix); } static struct nf_logger nf_bridge_logger __read_mostly = { diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 9bcf899ce16e..854dadb196a1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -57,6 +57,10 @@ config NF_CONNTRACK config NF_LOG_COMMON tristate +config NF_LOG_NETDEV + tristate "Netdev packet logging" + select NF_LOG_COMMON + if NF_CONNTRACK config NF_CONNTRACK_MARK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 8faa36c0686d..8edd791743fe 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -48,6 +48,9 @@ nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ # generic transport layer logging obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o +# packet logging for netdev family +obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o + obj-$(CONFIG_NF_NAT) += nf_nat.o obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index 119fe1cb1ea9..ed9b80815fa0 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -175,6 +175,33 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, } EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); +/* bridge and netdev logging families share this code. */ +void nf_log_l2packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + case htons(ETH_P_IPV6): + nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + case htons(ETH_P_ARP): + case htons(ETH_P_RARP): + nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out, + loginfo, "%s", prefix); + break; + } +} +EXPORT_SYMBOL_GPL(nf_log_l2packet); + static int __init nf_log_common_init(void) { return 0; diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c new file mode 100644 index 000000000000..1f645949f3d8 --- /dev/null +++ b/net/netfilter/nf_log_netdev.c @@ -0,0 +1,80 @@ +/* + * (C) 2016 by Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include + +static void nf_log_netdev_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix); +} + +static struct nf_logger nf_netdev_logger __read_mostly = { + .name = "nf_log_netdev", + .type = NF_LOG_TYPE_LOG, + .logfn = nf_log_netdev_packet, + .me = THIS_MODULE, +}; + +static int __net_init nf_log_netdev_net_init(struct net *net) +{ + return nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger); +} + +static void __net_exit nf_log_netdev_net_exit(struct net *net) +{ + nf_log_unset(net, &nf_netdev_logger); +} + +static struct pernet_operations nf_log_netdev_net_ops = { + .init = nf_log_netdev_net_init, + .exit = nf_log_netdev_net_exit, +}; + +static int __init nf_log_netdev_init(void) +{ + int ret; + + /* Request to load the real packet loggers. */ + nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG); + nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG); + + ret = register_pernet_subsys(&nf_log_netdev_net_ops); + if (ret < 0) + return ret; + + nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger); + return 0; +} + +static void __exit nf_log_netdev_exit(void) +{ + unregister_pernet_subsys(&nf_log_netdev_net_ops); + nf_log_unregister(&nf_netdev_logger); +} + +module_init(nf_log_netdev_init); +module_exit(nf_log_netdev_exit); + +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_DESCRIPTION("Netfilter netdev packet logging"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */ -- cgit v1.2.3 From 8db4c5be88f62ffd7a552f70687a10c614dc697b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 27 Oct 2016 19:49:48 +0100 Subject: netfilter: move socket lookup infrastructure to nf_socket_ipv{4,6}.c We need this split to reuse existing codebase for the upcoming nf_tables socket expression. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_socket.h | 27 ++++ net/ipv4/netfilter/Kconfig | 6 + net/ipv4/netfilter/Makefile | 2 + net/ipv4/netfilter/nf_socket_ipv4.c | 163 +++++++++++++++++++ net/ipv6/netfilter/Kconfig | 6 + net/ipv6/netfilter/Makefile | 2 + net/ipv6/netfilter/nf_socket_ipv6.c | 151 ++++++++++++++++++ net/netfilter/Kconfig | 3 +- net/netfilter/xt_socket.c | 305 ++---------------------------------- 9 files changed, 369 insertions(+), 296 deletions(-) create mode 100644 include/net/netfilter/nf_socket.h create mode 100644 net/ipv4/netfilter/nf_socket_ipv4.c create mode 100644 net/ipv6/netfilter/nf_socket_ipv6.c (limited to 'include/net') diff --git a/include/net/netfilter/nf_socket.h b/include/net/netfilter/nf_socket.h new file mode 100644 index 000000000000..f2fc39c97d43 --- /dev/null +++ b/include/net/netfilter/nf_socket.h @@ -0,0 +1,27 @@ +#ifndef _NF_SOCK_H_ +#define _NF_SOCK_H_ + +struct net_device; +struct sk_buff; +struct sock; +struct net; + +static inline bool nf_sk_is_transparent(struct sock *sk) +{ + switch (sk->sk_state) { + case TCP_TIME_WAIT: + return inet_twsk(sk)->tw_transparent; + case TCP_NEW_SYN_RECV: + return inet_rsk(inet_reqsk(sk))->no_srccheck; + default: + return inet_sk(sk)->transparent; + } +} + +struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, + const struct net_device *indev); + +struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, + const struct net_device *indev); + +#endif diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f80bb8f457c8..c11eb1744ab1 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV4 To compile it as a module, choose M here. If unsure, say N. +config NF_SOCKET_IPV4 + tristate "IPv4 socket lookup support" + help + This option enables the IPv4 socket lookup infrastructure. This is + is required by the iptables socket match. + if NF_TABLES config NF_TABLES_IPV4 diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 59d7786ddce1..f462fee66ac8 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -14,6 +14,8 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o # defrag obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o +obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o + # logging obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c new file mode 100644 index 000000000000..a83d558e1aae --- /dev/null +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include +#endif + +static int +extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, + __be32 *raddr, __be32 *laddr, + __be16 *rport, __be16 *lport) +{ + unsigned int outside_hdrlen = ip_hdrlen(skb); + struct iphdr *inside_iph, _inside_iph; + struct icmphdr *icmph, _icmph; + __be16 *ports, _ports[2]; + + icmph = skb_header_pointer(skb, outside_hdrlen, + sizeof(_icmph), &_icmph); + if (icmph == NULL) + return 1; + + switch (icmph->type) { + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_REDIRECT: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + break; + default: + return 1; + } + + inside_iph = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr), + sizeof(_inside_iph), &_inside_iph); + if (inside_iph == NULL) + return 1; + + if (inside_iph->protocol != IPPROTO_TCP && + inside_iph->protocol != IPPROTO_UDP) + return 1; + + ports = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr) + + (inside_iph->ihl << 2), + sizeof(_ports), &_ports); + if (ports == NULL) + return 1; + + /* the inside IP packet is the one quoted from our side, thus + * its saddr is the local address */ + *protocol = inside_iph->protocol; + *laddr = inside_iph->saddr; + *lport = ports[0]; + *raddr = inside_iph->daddr; + *rport = ports[1]; + + return 0; +} + +static struct sock * +nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, + const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in) +{ + switch (protocol) { + case IPPROTO_TCP: + return inet_lookup(net, &tcp_hashinfo, skb, doff, + saddr, sport, daddr, dport, + in->ifindex); + case IPPROTO_UDP: + return udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + } + return NULL; +} + +struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, + const struct net_device *indev) +{ + __be32 uninitialized_var(daddr), uninitialized_var(saddr); + __be16 uninitialized_var(dport), uninitialized_var(sport); + const struct iphdr *iph = ip_hdr(skb); + struct sk_buff *data_skb = NULL; + u8 uninitialized_var(protocol); +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + enum ip_conntrack_info ctinfo; + struct nf_conn const *ct; +#endif + int doff = 0; + + if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { + struct udphdr _hdr, *hp; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), + sizeof(_hdr), &_hdr); + if (hp == NULL) + return NULL; + + protocol = iph->protocol; + saddr = iph->saddr; + sport = hp->source; + daddr = iph->daddr; + dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = iph->protocol == IPPROTO_TCP ? + ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : + ip_hdrlen(skb) + sizeof(*hp); + + } else if (iph->protocol == IPPROTO_ICMP) { + if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, + &sport, &dport)) + return NULL; + } else { + return NULL; + } + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + /* Do the lookup with the original socket address in + * case this is a reply packet of an established + * SNAT-ted connection. + */ + ct = nf_ct_get(skb, &ctinfo); + if (ct && !nf_ct_is_untracked(ct) && + ((iph->protocol != IPPROTO_ICMP && + ctinfo == IP_CT_ESTABLISHED_REPLY) || + (iph->protocol == IPPROTO_ICMP && + ctinfo == IP_CT_RELATED_REPLY)) && + (ct->status & IPS_SRC_NAT_DONE)) { + + daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + dport = (iph->protocol == IPPROTO_TCP) ? + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; + } +#endif + + return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, + daddr, sport, dport, indev); +} +EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); +MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 144f1de33836..6acb2eecd986 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +config NF_SOCKET_IPV6 + tristate "IPv6 socket lookup support" + help + This option enables the IPv6 socket lookup infrastructure. This + is used by the ip6tables socket match. + if NF_TABLES config NF_TABLES_IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 7984f3071f05..fe180c96040e 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -24,6 +24,8 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o +obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o + # logging obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c new file mode 100644 index 000000000000..ebb2bf84232a --- /dev/null +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include +#endif + +static int +extract_icmp6_fields(const struct sk_buff *skb, + unsigned int outside_hdrlen, + int *protocol, + const struct in6_addr **raddr, + const struct in6_addr **laddr, + __be16 *rport, + __be16 *lport, + struct ipv6hdr *ipv6_var) +{ + const struct ipv6hdr *inside_iph; + struct icmp6hdr *icmph, _icmph; + __be16 *ports, _ports[2]; + u8 inside_nexthdr; + __be16 inside_fragoff; + int inside_hdrlen; + + icmph = skb_header_pointer(skb, outside_hdrlen, + sizeof(_icmph), &_icmph); + if (icmph == NULL) + return 1; + + if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) + return 1; + + inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), + sizeof(*ipv6_var), ipv6_var); + if (inside_iph == NULL) + return 1; + inside_nexthdr = inside_iph->nexthdr; + + inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + + sizeof(*ipv6_var), + &inside_nexthdr, &inside_fragoff); + if (inside_hdrlen < 0) + return 1; /* hjm: Packet has no/incomplete transport layer headers. */ + + if (inside_nexthdr != IPPROTO_TCP && + inside_nexthdr != IPPROTO_UDP) + return 1; + + ports = skb_header_pointer(skb, inside_hdrlen, + sizeof(_ports), &_ports); + if (ports == NULL) + return 1; + + /* the inside IP packet is the one quoted from our side, thus + * its saddr is the local address */ + *protocol = inside_nexthdr; + *laddr = &inside_iph->saddr; + *lport = ports[0]; + *raddr = &inside_iph->daddr; + *rport = ports[1]; + + return 0; +} + +static struct sock * +nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, + const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in) +{ + switch (protocol) { + case IPPROTO_TCP: + return inet6_lookup(net, &tcp_hashinfo, skb, doff, + saddr, sport, daddr, dport, + in->ifindex); + case IPPROTO_UDP: + return udp6_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + } + + return NULL; +} + +struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, + const struct net_device *indev) +{ + __be16 uninitialized_var(dport), uninitialized_var(sport); + const struct in6_addr *daddr = NULL, *saddr = NULL; + struct ipv6hdr *iph = ipv6_hdr(skb); + struct sk_buff *data_skb = NULL; + int doff = 0; + int thoff = 0, tproto; + + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); + if (tproto < 0) { + pr_debug("unable to find transport header in IPv6 packet, dropping\n"); + return NULL; + } + + if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { + struct udphdr _hdr, *hp; + + hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); + if (hp == NULL) + return NULL; + + saddr = &iph->saddr; + sport = hp->source; + daddr = &iph->daddr; + dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = tproto == IPPROTO_TCP ? + thoff + __tcp_hdrlen((struct tcphdr *)hp) : + thoff + sizeof(*hp); + + } else if (tproto == IPPROTO_ICMPV6) { + struct ipv6hdr ipv6_var; + + if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, + &sport, &dport, &ipv6_var)) + return NULL; + } else { + return NULL; + } + + return nf_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, + sport, dport, indev); +} +EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v6); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); +MODULE_DESCRIPTION("Netfilter IPv6 socket lookup infrastructure"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 854dadb196a1..ed00ec114ea2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1426,9 +1426,10 @@ config NETFILTER_XT_MATCH_SOCKET tristate '"socket" match support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED - depends on !NF_CONNTRACK || NF_CONNTRACK depends on IPV6 || IPV6=n depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n + depends on NF_SOCKET_IPV4 + depends on NF_SOCKET_IPV6 select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n help diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index b10ade272b50..018c369c9f0d 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,76 +22,14 @@ #include #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -#define XT_SOCKET_HAVE_IPV6 1 #include #include #include #endif +#include #include -#if IS_ENABLED(CONFIG_NF_CONNTRACK) -#define XT_SOCKET_HAVE_CONNTRACK 1 -#include -#endif - -static int -extract_icmp4_fields(const struct sk_buff *skb, - u8 *protocol, - __be32 *raddr, - __be32 *laddr, - __be16 *rport, - __be16 *lport) -{ - unsigned int outside_hdrlen = ip_hdrlen(skb); - struct iphdr *inside_iph, _inside_iph; - struct icmphdr *icmph, _icmph; - __be16 *ports, _ports[2]; - - icmph = skb_header_pointer(skb, outside_hdrlen, - sizeof(_icmph), &_icmph); - if (icmph == NULL) - return 1; - - switch (icmph->type) { - case ICMP_DEST_UNREACH: - case ICMP_SOURCE_QUENCH: - case ICMP_REDIRECT: - case ICMP_TIME_EXCEEDED: - case ICMP_PARAMETERPROB: - break; - default: - return 1; - } - - inside_iph = skb_header_pointer(skb, outside_hdrlen + - sizeof(struct icmphdr), - sizeof(_inside_iph), &_inside_iph); - if (inside_iph == NULL) - return 1; - - if (inside_iph->protocol != IPPROTO_TCP && - inside_iph->protocol != IPPROTO_UDP) - return 1; - - ports = skb_header_pointer(skb, outside_hdrlen + - sizeof(struct icmphdr) + - (inside_iph->ihl << 2), - sizeof(_ports), &_ports); - if (ports == NULL) - return 1; - - /* the inside IP packet is the one quoted from our side, thus - * its saddr is the local address */ - *protocol = inside_iph->protocol; - *laddr = inside_iph->saddr; - *lport = ports[0]; - *raddr = inside_iph->daddr; - *rport = ports[1]; - - return 0; -} - /* "socket" match based redirection (no specific rule) * =================================================== * @@ -111,104 +49,6 @@ extract_icmp4_fields(const struct sk_buff *skb, * then local services could intercept traffic going through the * box. */ -static struct sock * -xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, - const u8 protocol, - const __be32 saddr, const __be32 daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in) -{ - switch (protocol) { - case IPPROTO_TCP: - return inet_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, - in->ifindex); - case IPPROTO_UDP: - return udp4_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - } - return NULL; -} - -static bool xt_socket_sk_is_transparent(struct sock *sk) -{ - switch (sk->sk_state) { - case TCP_TIME_WAIT: - return inet_twsk(sk)->tw_transparent; - - case TCP_NEW_SYN_RECV: - return inet_rsk(inet_reqsk(sk))->no_srccheck; - - default: - return inet_sk(sk)->transparent; - } -} - -static struct sock *xt_socket_lookup_slow_v4(struct net *net, - const struct sk_buff *skb, - const struct net_device *indev) -{ - const struct iphdr *iph = ip_hdr(skb); - struct sk_buff *data_skb = NULL; - int doff = 0; - __be32 uninitialized_var(daddr), uninitialized_var(saddr); - __be16 uninitialized_var(dport), uninitialized_var(sport); - u8 uninitialized_var(protocol); -#ifdef XT_SOCKET_HAVE_CONNTRACK - struct nf_conn const *ct; - enum ip_conntrack_info ctinfo; -#endif - - if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { - struct udphdr _hdr, *hp; - - hp = skb_header_pointer(skb, ip_hdrlen(skb), - sizeof(_hdr), &_hdr); - if (hp == NULL) - return NULL; - - protocol = iph->protocol; - saddr = iph->saddr; - sport = hp->source; - daddr = iph->daddr; - dport = hp->dest; - data_skb = (struct sk_buff *)skb; - doff = iph->protocol == IPPROTO_TCP ? - ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : - ip_hdrlen(skb) + sizeof(*hp); - - } else if (iph->protocol == IPPROTO_ICMP) { - if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, - &sport, &dport)) - return NULL; - } else { - return NULL; - } - -#ifdef XT_SOCKET_HAVE_CONNTRACK - /* Do the lookup with the original socket address in - * case this is a reply packet of an established - * SNAT-ted connection. - */ - ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct) && - ((iph->protocol != IPPROTO_ICMP && - ctinfo == IP_CT_ESTABLISHED_REPLY) || - (iph->protocol == IPPROTO_ICMP && - ctinfo == IP_CT_RELATED_REPLY)) && - (ct->status & IPS_SRC_NAT_DONE)) { - - daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; - dport = (iph->protocol == IPPROTO_TCP) ? - ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : - ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; - } -#endif - - return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, - daddr, sport, dport, indev); -} - static bool socket_match(const struct sk_buff *skb, struct xt_action_param *par, const struct xt_socket_mtinfo1 *info) @@ -217,7 +57,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v4(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v4(par->net, skb, par->in); if (sk) { bool wildcard; bool transparent = true; @@ -233,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = xt_socket_sk_is_transparent(sk); + transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent) @@ -265,132 +105,7 @@ socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) return socket_match(skb, par, par->matchinfo); } -#ifdef XT_SOCKET_HAVE_IPV6 - -static int -extract_icmp6_fields(const struct sk_buff *skb, - unsigned int outside_hdrlen, - int *protocol, - const struct in6_addr **raddr, - const struct in6_addr **laddr, - __be16 *rport, - __be16 *lport, - struct ipv6hdr *ipv6_var) -{ - const struct ipv6hdr *inside_iph; - struct icmp6hdr *icmph, _icmph; - __be16 *ports, _ports[2]; - u8 inside_nexthdr; - __be16 inside_fragoff; - int inside_hdrlen; - - icmph = skb_header_pointer(skb, outside_hdrlen, - sizeof(_icmph), &_icmph); - if (icmph == NULL) - return 1; - - if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) - return 1; - - inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), - sizeof(*ipv6_var), ipv6_var); - if (inside_iph == NULL) - return 1; - inside_nexthdr = inside_iph->nexthdr; - - inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + - sizeof(*ipv6_var), - &inside_nexthdr, &inside_fragoff); - if (inside_hdrlen < 0) - return 1; /* hjm: Packet has no/incomplete transport layer headers. */ - - if (inside_nexthdr != IPPROTO_TCP && - inside_nexthdr != IPPROTO_UDP) - return 1; - - ports = skb_header_pointer(skb, inside_hdrlen, - sizeof(_ports), &_ports); - if (ports == NULL) - return 1; - - /* the inside IP packet is the one quoted from our side, thus - * its saddr is the local address */ - *protocol = inside_nexthdr; - *laddr = &inside_iph->saddr; - *lport = ports[0]; - *raddr = &inside_iph->daddr; - *rport = ports[1]; - - return 0; -} - -static struct sock * -xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, - const u8 protocol, - const struct in6_addr *saddr, const struct in6_addr *daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in) -{ - switch (protocol) { - case IPPROTO_TCP: - return inet6_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, - in->ifindex); - case IPPROTO_UDP: - return udp6_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - } - - return NULL; -} - -static struct sock *xt_socket_lookup_slow_v6(struct net *net, - const struct sk_buff *skb, - const struct net_device *indev) -{ - __be16 uninitialized_var(dport), uninitialized_var(sport); - const struct in6_addr *daddr = NULL, *saddr = NULL; - struct ipv6hdr *iph = ipv6_hdr(skb); - struct sk_buff *data_skb = NULL; - int doff = 0; - int thoff = 0, tproto; - - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); - if (tproto < 0) { - pr_debug("unable to find transport header in IPv6 packet, dropping\n"); - return NULL; - } - - if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { - struct udphdr _hdr, *hp; - - hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); - if (hp == NULL) - return NULL; - - saddr = &iph->saddr; - sport = hp->source; - daddr = &iph->daddr; - dport = hp->dest; - data_skb = (struct sk_buff *)skb; - doff = tproto == IPPROTO_TCP ? - thoff + __tcp_hdrlen((struct tcphdr *)hp) : - thoff + sizeof(*hp); - - } else if (tproto == IPPROTO_ICMPV6) { - struct ipv6hdr ipv6_var; - - if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, - &sport, &dport, &ipv6_var)) - return NULL; - } else { - return NULL; - } - - return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, - sport, dport, indev); -} - +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static bool socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) { @@ -399,7 +114,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk = skb->sk; if (!sk) - sk = xt_socket_lookup_slow_v6(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v6(par->net, skb, par->in); if (sk) { bool wildcard; bool transparent = true; @@ -415,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) - transparent = xt_socket_sk_is_transparent(sk); + transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent) @@ -488,7 +203,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 1, @@ -512,7 +227,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 2, @@ -536,7 +251,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 3, @@ -554,7 +269,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = { static int __init socket_mt_init(void) { nf_defrag_ipv4_enable(); -#ifdef XT_SOCKET_HAVE_IPV6 +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) nf_defrag_ipv6_enable(); #endif -- cgit v1.2.3 From 23f4ffedb7d751c7e298732ba91ca75d224bc1a6 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Tue, 1 Nov 2016 23:45:12 +0800 Subject: ip6_tunnel: Clear IP6CB in ip6tunnel_xmit() skb->cb may contain data from previous layers. In the observed scenario, the garbage data were misinterpreted as IP6CB(skb)->frag_max_size, so that small packets sent through the tunnel are mistakenly fragmented. This patch unconditionally clears the control buffer in ip6tunnel_xmit(), which affects ip6_tunnel, ip6_udp_tunnel and ip6_gre. Currently none of these tunnels set IP6CB(skb)->flags, otherwise it needs to be done earlier. Cc: stable@vger.kernel.org Signed-off-by: Eli Cooper Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 20ed9699fcd4..1b1cf33cbfb0 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -146,6 +146,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, { int pkt_len, err; + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); if (unlikely(net_xmit_eval(err))) -- cgit v1.2.3 From 613dbd95723aee7abd16860745691b6c7bda20dc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Nov 2016 10:56:21 +0100 Subject: netfilter: x_tables: move hook state into xt_action_param structure Place pointer to hook state in xt_action_param structure instead of copying the fields that we need. After this change xt_action_param fits into one cacheline. This patch also adds a set of new wrapper functions to fetch relevant hook state structure fields. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 48 +++++++++++++++++++++++------- include/net/netfilter/nf_tables.h | 11 +++---- net/bridge/netfilter/ebt_arpreply.c | 3 +- net/bridge/netfilter/ebt_log.c | 11 +++---- net/bridge/netfilter/ebt_nflog.c | 6 ++-- net/bridge/netfilter/ebt_redirect.c | 6 ++-- net/bridge/netfilter/ebtables.c | 6 +--- net/ipv4/netfilter/arp_tables.c | 6 +--- net/ipv4/netfilter/ip_tables.c | 6 +--- net/ipv4/netfilter/ipt_MASQUERADE.c | 3 +- net/ipv4/netfilter/ipt_REJECT.c | 4 +-- net/ipv4/netfilter/ipt_SYNPROXY.c | 4 +-- net/ipv4/netfilter/ipt_rpfilter.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 6 +--- net/ipv6/netfilter/ip6t_MASQUERADE.c | 2 +- net/ipv6/netfilter/ip6t_REJECT.c | 23 ++++++++------ net/ipv6/netfilter/ip6t_SYNPROXY.c | 4 +-- net/ipv6/netfilter/ip6t_rpfilter.c | 3 +- net/netfilter/ipset/ip_set_core.c | 6 ++-- net/netfilter/ipset/ip_set_hash_netiface.c | 2 +- net/netfilter/xt_AUDIT.c | 10 +++---- net/netfilter/xt_LOG.c | 6 ++-- net/netfilter/xt_NETMAP.c | 20 ++++++------- net/netfilter/xt_NFLOG.c | 6 ++-- net/netfilter/xt_NFQUEUE.c | 4 +-- net/netfilter/xt_REDIRECT.c | 4 +-- net/netfilter/xt_TCPMSS.c | 4 +-- net/netfilter/xt_TEE.c | 4 +-- net/netfilter/xt_TPROXY.c | 16 +++++----- net/netfilter/xt_addrtype.c | 10 +++---- net/netfilter/xt_cluster.c | 2 +- net/netfilter/xt_connlimit.c | 8 ++--- net/netfilter/xt_conntrack.c | 8 ++--- net/netfilter/xt_devgroup.c | 4 +-- net/netfilter/xt_dscp.c | 2 +- net/netfilter/xt_ipvs.c | 4 +-- net/netfilter/xt_nfacct.c | 2 +- net/netfilter/xt_osf.c | 10 +++---- net/netfilter/xt_owner.c | 2 +- net/netfilter/xt_pkttype.c | 4 +-- net/netfilter/xt_policy.c | 4 +-- net/netfilter/xt_recent.c | 10 +++---- net/netfilter/xt_set.c | 26 ++++++++-------- net/netfilter/xt_socket.c | 4 +-- net/sched/act_ipt.c | 12 ++++---- net/sched/em_ipset.c | 17 ++++++----- 46 files changed, 196 insertions(+), 169 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 2ad1a2b289b5..cd4eaf8df445 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -4,6 +4,7 @@ #include #include +#include #include /* Test a struct->invflags and a boolean for inequality */ @@ -17,14 +18,9 @@ * @target: the target extension * @matchinfo: per-match data * @targetinfo: per-target data - * @net network namespace through which the action was invoked - * @in: input netdevice - * @out: output netdevice + * @state: pointer to hook state this packet came from * @fragoff: packet is a fragment, this is the data offset * @thoff: position of transport header relative to skb->data - * @hook: hook number given packet came from - * @family: Actual NFPROTO_* through which the function is invoked - * (helpful when match->family == NFPROTO_UNSPEC) * * Fields written to by extensions: * @@ -38,15 +34,47 @@ struct xt_action_param { union { const void *matchinfo, *targinfo; }; - struct net *net; - const struct net_device *in, *out; + const struct nf_hook_state *state; int fragoff; unsigned int thoff; - unsigned int hooknum; - u_int8_t family; bool hotdrop; }; +static inline struct net *xt_net(const struct xt_action_param *par) +{ + return par->state->net; +} + +static inline struct net_device *xt_in(const struct xt_action_param *par) +{ + return par->state->in; +} + +static inline const char *xt_inname(const struct xt_action_param *par) +{ + return par->state->in->name; +} + +static inline struct net_device *xt_out(const struct xt_action_param *par) +{ + return par->state->out; +} + +static inline const char *xt_outname(const struct xt_action_param *par) +{ + return par->state->out->name; +} + +static inline unsigned int xt_hooknum(const struct xt_action_param *par) +{ + return par->state->hook; +} + +static inline u_int8_t xt_family(const struct xt_action_param *par) +{ + return par->state->pf; +} + /** * struct xt_mtchk_param - parameters for match extensions' * checkentry functions diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5031e072567b..44060344f958 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -30,11 +30,12 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_state *state) { pkt->skb = skb; - pkt->net = pkt->xt.net = state->net; - pkt->in = pkt->xt.in = state->in; - pkt->out = pkt->xt.out = state->out; - pkt->hook = pkt->xt.hooknum = state->hook; - pkt->pf = pkt->xt.family = state->pf; + pkt->net = state->net; + pkt->in = state->in; + pkt->out = state->out; + pkt->hook = state->hook; + pkt->pf = state->pf; + pkt->xt.state = state; } static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 070cf134a22f..5929309beaa1 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -51,7 +51,8 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par) if (diptr == NULL) return EBT_DROP; - arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)par->in, + arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, + (struct net_device *)xt_in(par), *diptr, shp, info->mac, shp); return info->target; diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 9a11086ba6ff..e88bd4827ac1 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -179,7 +179,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; - struct net *net = par->net; + struct net *net = xt_net(par); li.type = NF_LOG_TYPE_LOG; li.u.log.level = info->loglevel; @@ -190,11 +190,12 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) * nf_log_packet() with NFT_LOG_TYPE_LOG here. --Pablo */ if (info->bitmask & EBT_LOG_NFLOG) - nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, - par->in, par->out, &li, "%s", info->prefix); + nf_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, "%s", + info->prefix); else - ebt_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, info->prefix); + ebt_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, info->prefix); return EBT_CONTINUE; } diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 54816150608e..c1dc48686200 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -23,16 +23,16 @@ static unsigned int ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nflog_info *info = par->targinfo; + struct net *net = xt_net(par); struct nf_loginfo li; - struct net *net = par->net; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, "%s", info->prefix); + nf_log_packet(net, PF_BRIDGE, xt_hooknum(par), skb, xt_in(par), + xt_out(par), &li, "%s", info->prefix); return EBT_CONTINUE; } diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 2e7c4f974340..8d2a85e0594e 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -23,12 +23,12 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) if (!skb_make_writable(skb, 0)) return EBT_DROP; - if (par->hooknum != NF_BR_BROUTING) + if (xt_hooknum(par) != NF_BR_BROUTING) /* rcu_read_lock()ed by nf_hook_thresh */ ether_addr_copy(eth_hdr(skb)->h_dest, - br_port_get_rcu(par->in)->br->dev->dev_addr); + br_port_get_rcu(xt_in(par))->br->dev->dev_addr); else - ether_addr_copy(eth_hdr(skb)->h_dest, par->in->dev_addr); + ether_addr_copy(eth_hdr(skb)->h_dest, xt_in(par)->dev_addr); skb->pkt_type = PACKET_HOST; return info->target; } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f5c11bbe27db..1ab6014cf0f8 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -194,12 +194,8 @@ unsigned int ebt_do_table(struct sk_buff *skb, const struct ebt_table_info *private; struct xt_action_param acpar; - acpar.family = NFPROTO_BRIDGE; - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; + acpar.state = state; acpar.hotdrop = false; - acpar.hooknum = hook; read_lock_bh(&table->lock); private = table->private; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b31df597fd37..e76ab23a2deb 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -217,11 +217,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, */ e = get_entry(table_base, private->hook_entry[hook]); - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; - acpar.hooknum = hook; - acpar.family = NFPROTO_ARP; + acpar.state = state; acpar.hotdrop = false; arp = arp_hdr(skb); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 7c00ce90adb8..de4fa03f46f3 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -261,11 +261,7 @@ ipt_do_table(struct sk_buff *skb, acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; acpar.thoff = ip_hdrlen(skb); acpar.hotdrop = false; - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; - acpar.family = NFPROTO_IPV4; - acpar.hooknum = hook; + acpar.state = state; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); local_bh_disable(); diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index da7f02a0b868..34cfb9b0bc0a 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -55,7 +55,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) range.min_proto = mr->range[0].min; range.max_proto = mr->range[0].max; - return nf_nat_masquerade_ipv4(skb, par->hooknum, &range, par->out); + return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), &range, + xt_out(par)); } static struct xt_target masquerade_tg_reg __read_mostly = { diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 1d16c0f28df0..8bd0d7b26632 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -34,7 +34,7 @@ static unsigned int reject_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipt_reject_info *reject = par->targinfo; - int hook = par->hooknum; + int hook = xt_hooknum(par); switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: @@ -59,7 +59,7 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par) nf_send_unreach(skb, ICMP_PKT_FILTERED, hook); break; case IPT_TCP_RESET: - nf_send_reset(par->net, skb, hook); + nf_send_reset(xt_net(par), skb, hook); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index db5b87509446..361411688221 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -263,12 +263,12 @@ static unsigned int synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct net *net = par->net; + struct net *net = xt_net(par); struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; - if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP)) + if (nf_ip_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP)) return NF_DROP; th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 78cc64eddfc1..59b49945b481 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -95,7 +95,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.flowi4_tos = RT_TOS(iph->tos); flow.flowi4_scope = RT_SCOPE_UNIVERSE; - return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert; + return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 55aacea24396..7eac01d5d621 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -291,11 +291,7 @@ ip6t_do_table(struct sk_buff *skb, * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.hotdrop = false; - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; - acpar.family = NFPROTO_IPV6; - acpar.hooknum = hook; + acpar.state = state; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 7f9f45d829d2..2b1a15846f9a 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -24,7 +24,7 @@ static unsigned int masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_masquerade_ipv6(skb, par->targinfo, par->out); + return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par)); } static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index db29bbf41b59..fa51a205918d 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -39,35 +39,40 @@ static unsigned int reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_reject_info *reject = par->targinfo; - struct net *net = par->net; + struct net *net = xt_net(par); switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOROUTE, xt_hooknum(par)); break; case IP6T_ICMP6_ADM_PROHIBITED: - nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, + xt_hooknum(par)); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, + xt_hooknum(par)); break; case IP6T_ICMP6_ADDR_UNREACH: - nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, + xt_hooknum(par)); break; case IP6T_ICMP6_PORT_UNREACH: - nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, + xt_hooknum(par)); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - nf_send_reset6(net, skb, par->hooknum); + nf_send_reset6(net, skb, xt_hooknum(par)); break; case IP6T_ICMP6_POLICY_FAIL: - nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, xt_hooknum(par)); break; case IP6T_ICMP6_REJECT_ROUTE: - nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, + xt_hooknum(par)); break; } diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 06bed74cf5ee..99a1216287c8 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -277,12 +277,12 @@ static unsigned int synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct net *net = par->net; + struct net *net = xt_net(par); struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; - if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP)) + if (nf_ip6_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP)) return NF_DROP; th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 1ee1b25df096..d5263dc364a9 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -93,7 +93,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) if (unlikely(saddrtype == IPV6_ADDR_ANY)) return true ^ invert; /* not routable: forward path will drop it */ - return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert; + return rpfilter_lookup_reverse6(xt_net(par), skb, xt_in(par), + info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index a748b0c2c981..3f1b945a24d5 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -541,7 +541,7 @@ int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(par->net, index); + struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret = 0; BUG_ON(!set); @@ -579,7 +579,7 @@ int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(par->net, index); + struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret; BUG_ON(!set); @@ -601,7 +601,7 @@ int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(par->net, index); + struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret = 0; BUG_ON(!set); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index f0f688db6213..aa1a776613b9 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -170,7 +170,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); e.ip &= ip_set_netmask(e.cidr); -#define IFACE(dir) (par->dir ? par->dir->name : "") +#define IFACE(dir) (par->state->dir ? par->state->dir->name : "") #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index 4973cbddc446..19247a17e511 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -132,9 +132,9 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) goto errout; audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s", - info->type, par->hooknum, skb->len, - par->in ? par->in->name : "?", - par->out ? par->out->name : "?"); + info->type, xt_hooknum(par), skb->len, + xt_in(par) ? xt_inname(par) : "?", + xt_out(par) ? xt_outname(par) : "?"); if (skb->mark) audit_log_format(ab, " mark=%#x", skb->mark); @@ -144,7 +144,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, ntohs(eth_hdr(skb)->h_proto)); - if (par->family == NFPROTO_BRIDGE) { + if (xt_family(par) == NFPROTO_BRIDGE) { switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): audit_ip4(ab, skb); @@ -157,7 +157,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) } } - switch (par->family) { + switch (xt_family(par)) { case NFPROTO_IPV4: audit_ip4(ab, skb); break; diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index 1763ab82bcd7..c3b2017ebe41 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -32,15 +32,15 @@ static unsigned int log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_log_info *loginfo = par->targinfo; + struct net *net = xt_net(par); struct nf_loginfo li; - struct net *net = par->net; li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - nf_log_packet(net, par->family, par->hooknum, skb, par->in, par->out, - &li, "%s", loginfo->prefix); + nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par), + xt_out(par), &li, "%s", loginfo->prefix); return XT_CONTINUE; } diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c index b253e07cb1c5..94d0b5411192 100644 --- a/net/netfilter/xt_NETMAP.c +++ b/net/netfilter/xt_NETMAP.c @@ -33,8 +33,8 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) netmask.ip6[i] = ~(range->min_addr.ip6[i] ^ range->max_addr.ip6[i]); - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT) + if (xt_hooknum(par) == NF_INET_PRE_ROUTING || + xt_hooknum(par) == NF_INET_LOCAL_OUT) new_addr.in6 = ipv6_hdr(skb)->daddr; else new_addr.in6 = ipv6_hdr(skb)->saddr; @@ -51,7 +51,7 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) newrange.min_proto = range->min_proto; newrange.max_proto = range->max_proto; - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); } static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) @@ -72,16 +72,16 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par) const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; struct nf_nat_range newrange; - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_POST_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT || - par->hooknum == NF_INET_LOCAL_IN); + NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING || + xt_hooknum(par) == NF_INET_POST_ROUTING || + xt_hooknum(par) == NF_INET_LOCAL_OUT || + xt_hooknum(par) == NF_INET_LOCAL_IN); ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT) + if (xt_hooknum(par) == NF_INET_PRE_ROUTING || + xt_hooknum(par) == NF_INET_LOCAL_OUT) new_ip = ip_hdr(skb)->daddr & ~netmask; else new_ip = ip_hdr(skb)->saddr & ~netmask; @@ -96,7 +96,7 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par) newrange.max_proto = mr->range[0].max; /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); } static int netmap_tg4_check(const struct xt_tgchk_param *par) diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 8668a5c18dc3..c7f8958cea4a 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -25,8 +25,8 @@ static unsigned int nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_nflog_info *info = par->targinfo; + struct net *net = xt_net(par); struct nf_loginfo li; - struct net *net = par->net; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; @@ -37,8 +37,8 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) if (info->flags & XT_NFLOG_F_COPY_LEN) li.u.ulog.flags |= NF_LOG_F_COPY_LEN; - nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in, - par->out, &li, info->prefix); + nfulnl_log_packet(net, xt_family(par), xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, info->prefix); return XT_CONTINUE; } diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 8f1779ff7e30..a360b99a958a 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -43,7 +43,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) if (info->queues_total > 1) { queue = nfqueue_hash(skb, queue, info->queues_total, - par->family, jhash_initval); + xt_family(par), jhash_initval); } return NF_QUEUE_NR(queue); } @@ -98,7 +98,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) queue = info->queuenum + cpu % info->queues_total; } else { queue = nfqueue_hash(skb, queue, info->queues_total, - par->family, jhash_initval); + xt_family(par), jhash_initval); } } diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c index 03f0b370e178..651dce65a30b 100644 --- a/net/netfilter/xt_REDIRECT.c +++ b/net/netfilter/xt_REDIRECT.c @@ -31,7 +31,7 @@ static unsigned int redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_redirect_ipv6(skb, par->targinfo, par->hooknum); + return nf_nat_redirect_ipv6(skb, par->targinfo, xt_hooknum(par)); } static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) @@ -62,7 +62,7 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par) static unsigned int redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_redirect_ipv4(skb, par->targinfo, par->hooknum); + return nf_nat_redirect_ipv4(skb, par->targinfo, xt_hooknum(par)); } static struct xt_target redirect_tg_reg[] __read_mostly = { diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 872db2d0e2a9..27241a767f17 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - struct net *net = par->net; + struct net *net = xt_net(par); unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu); @@ -172,7 +172,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, * length IPv6 header of 60, ergo the default MSS value is 1220 * Since no MSS was provided, we must use the default values */ - if (par->family == NFPROTO_IPV4) + if (xt_family(par) == NFPROTO_IPV4) newmss = min(newmss, (u16)536); else newmss = min(newmss, (u16)1220); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 0471db4032c5..1c57ace75ae6 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -33,7 +33,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; - nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif); + nf_dup_ipv4(xt_net(par), skb, xt_hooknum(par), &info->gw.in, oif); return XT_CONTINUE; } @@ -45,7 +45,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; - nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif); + nf_dup_ipv6(xt_net(par), skb, xt_hooknum(par), &info->gw.in6, oif); return XT_CONTINUE; } diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 663c4c3c9072..dbd72cc40e42 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -364,7 +364,8 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info *tgi = par->targinfo; - return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); + return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport, + tgi->mark_mask, tgi->mark_value); } static unsigned int @@ -372,7 +373,8 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; - return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); + return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport, + tgi->mark_mask, tgi->mark_value); } #ifdef XT_TPROXY_HAVE_IPV6 @@ -442,7 +444,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, + sk2 = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), hp->source, @@ -485,10 +487,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, + sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, &iph->daddr, hp->source, hp->dest, - par->in, NFT_LOOKUP_ESTABLISHED); + xt_in(par), NFT_LOOKUP_ESTABLISHED); laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr); lport = tgi->lport ? tgi->lport : hp->dest; @@ -500,10 +502,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) else if (!sk) /* no there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, + sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, laddr, hp->source, lport, - par->in, NFT_LOOKUP_LISTENER); + xt_in(par), NFT_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ if (sk && tproxy_sk_is_transparent(sk)) { diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 11d6091991a4..e329dabde35f 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev, static bool addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); const struct xt_addrtype_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool ret = true; @@ -143,19 +143,19 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) static bool addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); const struct xt_addrtype_info_v1 *info = par->matchinfo; const struct iphdr *iph; const struct net_device *dev = NULL; bool ret = true; if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) - dev = par->in; + dev = xt_in(par); else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) - dev = par->out; + dev = xt_out(par); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - if (par->family == NFPROTO_IPV6) + if (xt_family(par) == NFPROTO_IPV6) return addrtype_mt6(net, dev, skb, info); #endif iph = ip_hdr(skb); diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 96fa26b20b67..9a9884a39c0e 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -112,7 +112,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) * know, matches should not alter packets, but we are doing this here * because we would need to add a PKTTYPE target for this sole purpose. */ - if (!xt_cluster_is_multicast_addr(skb, par->family) && + if (!xt_cluster_is_multicast_addr(skb, xt_family(par)) && skb->pkt_type == PACKET_MULTICAST) { pskb->pkt_type = PACKET_HOST; } diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index b6dc322593a3..bb3845339efd 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -317,7 +317,7 @@ static int count_them(struct net *net, static bool connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); const struct xt_connlimit_info *info = par->matchinfo; union nf_inet_addr addr; struct nf_conntrack_tuple tuple; @@ -332,11 +332,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; zone = nf_ct_zone(ct); } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - par->family, net, &tuple)) { + xt_family(par), net, &tuple)) { goto hotdrop; } - if (par->family == NFPROTO_IPV6) { + if (xt_family(par) == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? &iph->daddr : &iph->saddr, sizeof(addr.ip6)); @@ -347,7 +347,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) } connections = count_them(net, info->data, tuple_ptr, &addr, - &info->mask, par->family, zone); + &info->mask, xt_family(par), zone); if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index a3b8f697cfc5..2dea15ebc55b 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -200,22 +200,22 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, return false; if (info->match_flags & XT_CONNTRACK_ORIGSRC) - if (conntrack_mt_origsrc(ct, info, par->family) ^ + if (conntrack_mt_origsrc(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_ORIGSRC)) return false; if (info->match_flags & XT_CONNTRACK_ORIGDST) - if (conntrack_mt_origdst(ct, info, par->family) ^ + if (conntrack_mt_origdst(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_ORIGDST)) return false; if (info->match_flags & XT_CONNTRACK_REPLSRC) - if (conntrack_mt_replsrc(ct, info, par->family) ^ + if (conntrack_mt_replsrc(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_REPLSRC)) return false; if (info->match_flags & XT_CONNTRACK_REPLDST) - if (conntrack_mt_repldst(ct, info, par->family) ^ + if (conntrack_mt_repldst(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_REPLDST)) return false; diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c index d9202cdd25c9..96ebe1cdefec 100644 --- a/net/netfilter/xt_devgroup.c +++ b/net/netfilter/xt_devgroup.c @@ -24,12 +24,12 @@ static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_devgroup_info *info = par->matchinfo; if (info->flags & XT_DEVGROUP_MATCH_SRC && - (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^ + (((info->src_group ^ xt_in(par)->group) & info->src_mask ? 1 : 0) ^ ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0))) return false; if (info->flags & XT_DEVGROUP_MATCH_DST && - (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^ + (((info->dst_group ^ xt_out(par)->group) & info->dst_mask ? 1 : 0) ^ ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0))) return false; diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 64670fc5d0e1..236ac8008909 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -58,7 +58,7 @@ static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_tos_match_info *info = par->matchinfo; - if (par->family == NFPROTO_IPV4) + if (xt_family(par) == NFPROTO_IPV4) return ((ip_hdr(skb)->tos & info->tos_mask) == info->tos_value) ^ !!info->invert; else diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 71a9d95e0a81..0fdc89064488 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -48,9 +48,9 @@ static bool ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ipvs_mtinfo *data = par->matchinfo; - struct netns_ipvs *ipvs = net_ipvs(par->net); + struct netns_ipvs *ipvs = net_ipvs(xt_net(par)); /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */ - const u_int8_t family = par->family; + const u_int8_t family = xt_family(par); struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index cf327593852a..cc0518fe598e 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -26,7 +26,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) nfnl_acct_update(skb, info->nfacct); - overquota = nfnl_acct_overquota(par->net, skb, info->nfacct); + overquota = nfnl_acct_overquota(xt_net(par), skb, info->nfacct); return overquota == NFACCT_UNDERQUOTA ? false : true; } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 2455b69b5810..c05fefcec238 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -201,7 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) unsigned char opts[MAX_IPOPTLEN]; const struct xt_osf_finger *kf; const struct xt_osf_user_finger *f; - struct net *net = p->net; + struct net *net = xt_net(p); if (!info) return false; @@ -326,8 +326,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) fcount++; if (info->flags & XT_OSF_LOG) - nf_log_packet(net, p->family, p->hooknum, skb, - p->in, p->out, NULL, + nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, + xt_in(p), xt_out(p), NULL, "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", f->genre, f->version, f->subtype, &ip->saddr, ntohs(tcp->source), @@ -341,8 +341,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) rcu_read_unlock(); if (!fcount && (info->flags & XT_OSF_LOG)) - nf_log_packet(net, p->family, p->hooknum, skb, p->in, - p->out, NULL, + nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p), + xt_out(p), NULL, "Remote OS is not known: %pI4:%u -> %pI4:%u\n", &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest)); diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index a20e731b5b6c..16477df45b3b 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -63,7 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_owner_match_info *info = par->matchinfo; const struct file *filp; struct sock *sk = skb_to_full_sk(skb); - struct net *net = par->net; + struct net *net = xt_net(par); if (sk == NULL || sk->sk_socket == NULL) return (info->match ^ info->invert) == 0; diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index 5b645cb598fc..57efb703ff18 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -30,10 +30,10 @@ pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par) if (skb->pkt_type != PACKET_LOOPBACK) type = skb->pkt_type; - else if (par->family == NFPROTO_IPV4 && + else if (xt_family(par) == NFPROTO_IPV4 && ipv4_is_multicast(ip_hdr(skb)->daddr)) type = PACKET_MULTICAST; - else if (par->family == NFPROTO_IPV6 && + else if (xt_family(par) == NFPROTO_IPV6 && ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) type = PACKET_MULTICAST; else diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index f23e97bb42d7..2b4ab189bba7 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -116,9 +116,9 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par) int ret; if (info->flags & XT_POLICY_MATCH_IN) - ret = match_policy_in(skb, info, par->family); + ret = match_policy_in(skb, info, xt_family(par)); else - ret = match_policy_out(skb, info, par->family); + ret = match_policy_out(skb, info, xt_family(par)); if (ret < 0) ret = info->flags & XT_POLICY_MATCH_NONE ? true : false; diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index e3b7a09b103e..bf250000e084 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -236,7 +236,7 @@ static void recent_table_flush(struct recent_table *t) static bool recent_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); struct recent_net *recent_net = recent_pernet(net); const struct xt_recent_mtinfo_v1 *info = par->matchinfo; struct recent_table *t; @@ -245,7 +245,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) u_int8_t ttl; bool ret = info->invert; - if (par->family == NFPROTO_IPV4) { + if (xt_family(par) == NFPROTO_IPV4) { const struct iphdr *iph = ip_hdr(skb); if (info->side == XT_RECENT_DEST) @@ -266,7 +266,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) } /* use TTL as seen before forwarding */ - if (par->out != NULL && skb->sk == NULL) + if (xt_out(par) != NULL && skb->sk == NULL) ttl++; spin_lock_bh(&recent_lock); @@ -274,12 +274,12 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) nf_inet_addr_mask(&addr, &addr_mask, &t->mask); - e = recent_entry_lookup(t, &addr_mask, par->family, + e = recent_entry_lookup(t, &addr_mask, xt_family(par), (info->check_set & XT_RECENT_TTL) ? ttl : 0); if (e == NULL) { if (!(info->check_set & XT_RECENT_SET)) goto out; - e = recent_entry_init(t, &addr_mask, par->family, ttl); + e = recent_entry_init(t, &addr_mask, xt_family(par), ttl); if (e == NULL) par->hotdrop = true; ret = !ret; diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 5669e5b453f4..1bfede7be418 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -55,7 +55,7 @@ set_match_v0(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v0 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.u.compat.dim, + ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim, info->match_set.u.compat.flags, 0, UINT_MAX); return match_set(info->match_set.index, skb, par, &opt, @@ -118,7 +118,7 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v1 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.dim, + ADT_OPT(opt, xt_family(par), info->match_set.dim, info->match_set.flags, 0, UINT_MAX); if (opt.flags & IPSET_RETURN_NOMATCH) @@ -184,7 +184,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_set_info_match_v3 *info = par->matchinfo; int ret; - ADT_OPT(opt, par->family, info->match_set.dim, + ADT_OPT(opt, xt_family(par), info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); if (info->packets.op != IPSET_COUNTER_NONE || @@ -231,7 +231,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_set_info_match_v4 *info = par->matchinfo; int ret; - ADT_OPT(opt, par->family, info->match_set.dim, + ADT_OPT(opt, xt_family(par), info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); if (info->packets.op != IPSET_COUNTER_NONE || @@ -259,9 +259,9 @@ set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v0 *info = par->targinfo; - ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim, info->add_set.u.compat.flags, 0, UINT_MAX); - ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim, info->del_set.u.compat.flags, 0, UINT_MAX); if (info->add_set.index != IPSET_INVALID_ID) @@ -332,9 +332,9 @@ set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v1 *info = par->targinfo; - ADT_OPT(add_opt, par->family, info->add_set.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.dim, info->add_set.flags, 0, UINT_MAX); - ADT_OPT(del_opt, par->family, info->del_set.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.dim, info->del_set.flags, 0, UINT_MAX); if (info->add_set.index != IPSET_INVALID_ID) @@ -401,9 +401,9 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v2 *info = par->targinfo; - ADT_OPT(add_opt, par->family, info->add_set.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.dim, info->add_set.flags, info->flags, info->timeout); - ADT_OPT(del_opt, par->family, info->del_set.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.dim, info->del_set.flags, 0, UINT_MAX); /* Normalize to fit into jiffies */ @@ -429,11 +429,11 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_set_info_target_v3 *info = par->targinfo; int ret; - ADT_OPT(add_opt, par->family, info->add_set.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.dim, info->add_set.flags, info->flags, info->timeout); - ADT_OPT(del_opt, par->family, info->del_set.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.dim, info->del_set.flags, 0, UINT_MAX); - ADT_OPT(map_opt, par->family, info->map_set.dim, + ADT_OPT(map_opt, xt_family(par), info->map_set.dim, info->map_set.flags, 0, UINT_MAX); /* Normalize to fit into jiffies */ diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 018c369c9f0d..2198914707f5 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -57,7 +57,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, struct sock *sk = skb->sk; if (!sk) - sk = nf_sk_lookup_slow_v4(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par)); if (sk) { bool wildcard; bool transparent = true; @@ -114,7 +114,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk = skb->sk; if (!sk) - sk = nf_sk_lookup_slow_v6(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par)); if (sk) { bool wildcard; bool transparent = true; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 378c1c976058..ce7ea6c1c50d 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -213,6 +213,12 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, int ret = 0, result = 0; struct tcf_ipt *ipt = to_ipt(a); struct xt_action_param par; + struct nf_hook_state state = { + .net = dev_net(skb->dev), + .in = skb->dev, + .hook = ipt->tcfi_hook, + .pf = NFPROTO_IPV4, + }; if (skb_unclone(skb, GFP_ATOMIC)) return TC_ACT_UNSPEC; @@ -226,13 +232,9 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, * worry later - danger - this API seems to have changed * from earlier kernels */ - par.net = dev_net(skb->dev); - par.in = skb->dev; - par.out = NULL; - par.hooknum = ipt->tcfi_hook; + par.state = &state; par.target = ipt->tcfi_t->u.kernel.target; par.targinfo = ipt->tcfi_t->data; - par.family = NFPROTO_IPV4; ret = par.target->target(skb, &par); switch (ret) { diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index c66ca9400ab4..c1b23e3060b8 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -57,17 +57,20 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, struct xt_action_param acpar; const struct xt_set_info *set = (const void *) em->data; struct net_device *dev, *indev = NULL; + struct nf_hook_state state = { + .net = em->net, + }; int ret, network_offset; switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): - acpar.family = NFPROTO_IPV4; + state.pf = NFPROTO_IPV4; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) return 0; acpar.thoff = ip_hdrlen(skb); break; case htons(ETH_P_IPV6): - acpar.family = NFPROTO_IPV6; + state.pf = NFPROTO_IPV6; if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) return 0; /* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */ @@ -77,9 +80,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, return 0; } - acpar.hooknum = 0; - - opt.family = acpar.family; + opt.family = state.pf; opt.dim = set->dim; opt.flags = set->flags; opt.cmdflags = 0; @@ -95,9 +96,9 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, if (skb->skb_iif) indev = dev_get_by_index_rcu(em->net, skb->skb_iif); - acpar.net = em->net; - acpar.in = indev ? indev : dev; - acpar.out = dev; + state.in = indev ? indev : dev; + state.out = dev; + acpar.state = &state; ret = ip_set_test(set->index, skb, &acpar, &opt); -- cgit v1.2.3 From 0e5a1c7eb3fc705c4cc6c1e058e81d1f2e721c72 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Nov 2016 10:56:26 +0100 Subject: netfilter: nf_tables: use hook state from xt_action_param structure Don't copy relevant fields from hook state structure, instead use the one that is already available in struct xt_action_param. This patch also adds a set of new wrapper functions to fetch relevant hook state structure fields. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 35 +++++++++++++++++++++++--------- net/bridge/netfilter/nft_meta_bridge.c | 2 +- net/bridge/netfilter/nft_reject_bridge.c | 30 ++++++++++++++++----------- net/ipv4/netfilter/nft_dup_ipv4.c | 2 +- net/ipv4/netfilter/nft_fib_ipv4.c | 14 ++++++------- net/ipv4/netfilter/nft_masq_ipv4.c | 4 ++-- net/ipv4/netfilter/nft_redir_ipv4.c | 3 +-- net/ipv4/netfilter/nft_reject_ipv4.c | 4 ++-- net/ipv6/netfilter/nft_dup_ipv6.c | 2 +- net/ipv6/netfilter/nft_fib_ipv6.c | 16 +++++++-------- net/ipv6/netfilter/nft_masq_ipv6.c | 3 ++- net/ipv6/netfilter/nft_redir_ipv6.c | 3 ++- net/ipv6/netfilter/nft_reject_ipv6.c | 6 +++--- net/netfilter/nf_dup_netdev.c | 2 +- net/netfilter/nf_tables_core.c | 10 ++++----- net/netfilter/nf_tables_trace.c | 8 ++++---- net/netfilter/nft_fib.c | 2 +- net/netfilter/nft_fib_inet.c | 2 +- net/netfilter/nft_log.c | 5 +++-- net/netfilter/nft_lookup.c | 5 ++--- net/netfilter/nft_meta.c | 6 +++--- net/netfilter/nft_queue.c | 2 +- net/netfilter/nft_reject_inet.c | 18 ++++++++-------- net/netfilter/nft_rt.c | 4 ++-- 24 files changed, 105 insertions(+), 83 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 44060344f958..3295fb85bff6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -14,27 +14,42 @@ struct nft_pktinfo { struct sk_buff *skb; - struct net *net; - const struct net_device *in; - const struct net_device *out; - u8 pf; - u8 hook; bool tprot_set; u8 tprot; /* for x_tables compatibility */ struct xt_action_param xt; }; +static inline struct net *nft_net(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->net; +} + +static inline unsigned int nft_hook(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->hook; +} + +static inline u8 nft_pf(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->pf; +} + +static inline const struct net_device *nft_in(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->in; +} + +static inline const struct net_device *nft_out(const struct nft_pktinfo *pkt) +{ + return pkt->xt.state->out; +} + static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, struct sk_buff *skb, const struct nf_hook_state *state) { pkt->skb = skb; - pkt->net = state->net; - pkt->in = state->in; - pkt->out = state->out; - pkt->hook = state->hook; - pkt->pf = state->pf; pkt->xt.state = state; } diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index ad47a921b701..5974dbc1ea24 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -23,7 +23,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); - const struct net_device *in = pkt->in, *out = pkt->out; + const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); u32 *dest = ®s->data[priv->dreg]; const struct net_bridge_port *p; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 4b3df6b0e3b9..206dc266ecd2 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -315,17 +315,20 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, - pkt->in, pkt->hook, + nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v4_tcp_reset(pkt->net, pkt->skb, - pkt->in, pkt->hook); + nft_reject_br_send_v4_tcp_reset(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, - pkt->in, pkt->hook, + nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), nft_reject_icmp_code(priv->icmp_code)); break; } @@ -333,17 +336,20 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IPV6): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v6_unreach(pkt->net, pkt->skb, - pkt->in, pkt->hook, + nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v6_tcp_reset(pkt->net, pkt->skb, - pkt->in, pkt->hook); + nft_reject_br_send_v6_tcp_reset(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v6_unreach(pkt->net, pkt->skb, - pkt->in, pkt->hook, + nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb, + nft_in(pkt), + nft_hook(pkt), nft_reject_icmpv6_code(priv->icmp_code)); break; } diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index bf855e64fc45..7ab544fbc382 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -30,7 +30,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr, }; int oif = regs->data[priv->sreg_dev]; - nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif); + nf_dup_ipv4(nft_net(pkt), pkt->skb, nft_hook(pkt), &gw, oif); } static int nft_dup_ipv4_init(const struct nft_ctx *ctx, diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index db91fd42db67..1b49966484b3 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -45,9 +45,9 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, __be32 addr; if (priv->flags & NFTA_FIB_F_IIF) - dev = pkt->in; + dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) - dev = pkt->out; + dev = nft_out(pkt); iph = ip_hdr(pkt->skb); if (priv->flags & NFTA_FIB_F_DADDR) @@ -55,7 +55,7 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, else addr = iph->saddr; - *dst = inet_dev_addr_type(pkt->net, dev, addr); + *dst = inet_dev_addr_type(nft_net(pkt), dev, addr); } EXPORT_SYMBOL_GPL(nft_fib4_eval_type); @@ -89,13 +89,13 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, * Search results for the desired outinterface instead. */ if (priv->flags & NFTA_FIB_F_OIF) - oif = pkt->out; + oif = nft_out(pkt); else if (priv->flags & NFTA_FIB_F_IIF) - oif = pkt->in; + oif = nft_in(pkt); else oif = NULL; - if (pkt->hook == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) { + if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) { nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX); return; } @@ -122,7 +122,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, fl4.saddr = get_saddr(iph->daddr); } - if (fib_lookup(pkt->net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) + if (fib_lookup(nft_net(pkt), &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) return; switch (res.type) { diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index 51ced81b616c..4f697e431811 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -31,8 +31,8 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, range.max_proto.all = *(__be16 *)®s->data[priv->sreg_proto_max]; } - regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook, - &range, pkt->out); + regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt), + &range, nft_out(pkt)); } static struct nft_expr_type nft_masq_ipv4_type; diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index c09d4381427e..16df0493c5ce 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -35,8 +35,7 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr, mr.range[0].flags |= priv->flags; - regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, - pkt->hook); + regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt)); } static struct nft_expr_type nft_redir_ipv4_type; diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index 2c2553b9026c..517ce93699de 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -27,10 +27,10 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code, pkt->hook); + nf_send_unreach(pkt->skb, priv->icmp_code, nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset(pkt->net, pkt->skb, pkt->hook); + nf_send_reset(nft_net(pkt), pkt->skb, nft_hook(pkt)); break; default: break; diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 8bfd470cbe72..26074a8bada7 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -28,7 +28,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr, struct in6_addr *gw = (struct in6_addr *)®s->data[priv->sreg_addr]; int oif = regs->data[priv->sreg_dev]; - nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif); + nf_dup_ipv6(nft_net(pkt), pkt->skb, nft_hook(pkt), gw, oif); } static int nft_dup_ipv6_init(const struct nft_ctx *ctx, diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index ff1f1b6b4a4a..d526bb594956 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -80,17 +80,17 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, return RTN_UNREACHABLE; if (priv->flags & NFTA_FIB_F_IIF) - dev = pkt->in; + dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) - dev = pkt->out; + dev = nft_out(pkt); nft_fib6_flowi_init(&fl6, priv, pkt, dev); v6ops = nf_get_ipv6_ops(); - if (dev && v6ops && v6ops->chk_addr(pkt->net, &fl6.daddr, dev, true)) + if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; - route_err = afinfo->route(pkt->net, (struct dst_entry **)&rt, + route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt, flowi6_to_flowi(&fl6), false); if (route_err) goto err; @@ -158,20 +158,20 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, int lookup_flags; if (priv->flags & NFTA_FIB_F_IIF) - oif = pkt->in; + oif = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) - oif = pkt->out; + oif = nft_out(pkt); lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif); - if (pkt->hook == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) { + if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) { nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX); return; } *dest = 0; again: - rt = (void *)ip6_route_lookup(pkt->net, &fl6, lookup_flags); + rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags); if (rt->dst.error) goto put_rt_err; diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 9597ffb74077..a2aff1277b40 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -32,7 +32,8 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, range.max_proto.all = *(__be16 *)®s->data[priv->sreg_proto_max]; } - regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); + regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, + nft_out(pkt)); } static struct nft_expr_type nft_masq_ipv6_type; diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index aca44e89a881..bfcd5af6bc15 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -35,7 +35,8 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr, range.flags |= priv->flags; - regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->hook); + regs->verdict.code = + nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); } static struct nft_expr_type nft_redir_ipv6_type; diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 92bda9908bb9..057deeaff1cb 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -27,11 +27,11 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr, switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code, - pkt->hook); + nf_send_unreach6(nft_net(pkt), pkt->skb, priv->icmp_code, + nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(pkt->net, pkt->skb, pkt->hook); + nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt)); break; default: break; diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index 7ec69723940f..44ae986c383f 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -19,7 +19,7 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) struct net_device *dev; struct sk_buff *skb; - dev = dev_get_by_index_rcu(pkt->net, oif); + dev = dev_get_by_index_rcu(nft_net(pkt), oif); if (dev == NULL) return; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 70de32a6d5c0..b63b1edb76a6 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -53,10 +53,10 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, nft_trace_notify(info); - nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, - pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", - chain->table->name, chain->name, comments[type], - rulenum); + nf_log_trace(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb, + nft_in(pkt), nft_out(pkt), &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], rulenum); } static inline void nft_trace_packet(struct nft_traceinfo *info, @@ -124,7 +124,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv) { const struct nft_chain *chain = priv, *basechain = chain; - const struct net *net = pkt->net; + const struct net *net = nft_net(pkt); const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_regs regs; diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index ab695f8e2d29..12eb9041dca2 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -171,7 +171,7 @@ void nft_trace_notify(struct nft_traceinfo *info) unsigned int size; int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE; - if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE)) + if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE)) return; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + @@ -207,7 +207,7 @@ void nft_trace_notify(struct nft_traceinfo *info) nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf))) + if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt)))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) @@ -249,7 +249,7 @@ void nft_trace_notify(struct nft_traceinfo *info) goto nla_put_failure; if (!info->packet_dumped) { - if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out)) + if (nf_trace_fill_dev_info(skb, nft_in(pkt), nft_out(pkt))) goto nla_put_failure; if (nf_trace_fill_pkt_info(skb, pkt)) @@ -258,7 +258,7 @@ void nft_trace_notify(struct nft_traceinfo *info) } nlmsg_end(skb, nlh); - nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); + nfnetlink_send(skb, nft_net(pkt), 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); return; nla_put_failure: diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 4944a8b7f7a7..249c9b80c150 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -144,7 +144,7 @@ void nft_fib_store_result(void *reg, enum nft_fib_result r, *dreg = index; break; case NFT_FIB_RESULT_OIFNAME: - dev = dev_get_by_index_rcu(pkt->net, index); + dev = dev_get_by_index_rcu(nft_net(pkt), index); strncpy(reg, dev ? dev->name : "", IFNAMSIZ); break; default: diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c index fe8943b572b7..9120fc7228f4 100644 --- a/net/netfilter/nft_fib_inet.c +++ b/net/netfilter/nft_fib_inet.c @@ -21,7 +21,7 @@ static void nft_fib_inet_eval(const struct nft_expr *expr, { const struct nft_fib *priv = nft_expr_priv(expr); - switch (pkt->pf) { + switch (nft_pf(pkt)) { case NFPROTO_IPV4: switch (priv->result) { case NFT_FIB_RESULT_OIF: diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 1b01404bb33f..6271e40a3dd6 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -32,8 +32,9 @@ static void nft_log_eval(const struct nft_expr *expr, { const struct nft_log *priv = nft_expr_priv(expr); - nf_log_packet(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, - pkt->out, &priv->loginfo, "%s", priv->prefix); + nf_log_packet(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb, + nft_in(pkt), nft_out(pkt), &priv->loginfo, "%s", + priv->prefix); } static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 8166b6994cc7..a8ce49bcda80 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -35,9 +35,8 @@ static void nft_lookup_eval(const struct nft_expr *expr, const struct nft_set_ext *ext; bool found; - found = set->ops->lookup(pkt->net, set, ®s->data[priv->sreg], &ext) ^ - priv->invert; - + found = set->ops->lookup(nft_net(pkt), set, ®s->data[priv->sreg], + &ext) ^ priv->invert; if (!found) { regs->verdict.code = NFT_BREAK; return; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 64994023bf81..66c7f4b4c49b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -36,7 +36,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; - const struct net_device *in = pkt->in, *out = pkt->out; + const struct net_device *in = nft_in(pkt), *out = nft_out(pkt); struct sock *sk; u32 *dest = ®s->data[priv->dreg]; @@ -49,7 +49,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, *(__be16 *)dest = skb->protocol; break; case NFT_META_NFPROTO: - *dest = pkt->pf; + *dest = nft_pf(pkt); break; case NFT_META_L4PROTO: if (!pkt->tprot_set) @@ -146,7 +146,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, break; } - switch (pkt->pf) { + switch (nft_pf(pkt)) { case NFPROTO_IPV4: if (ipv4_is_multicast(ip_hdr(skb)->daddr)) *dest = PACKET_MULTICAST; diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 393d359a1889..3e19fa1230dc 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -43,7 +43,7 @@ static void nft_queue_eval(const struct nft_expr *expr, queue = priv->queuenum + cpu % priv->queues_total; } else { queue = nfqueue_hash(pkt->skb, queue, - priv->queues_total, pkt->pf, + priv->queues_total, nft_pf(pkt), jhash_initval); } } diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index e79d9ca2ffee..9e90a02cb104 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -23,36 +23,36 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, { struct nft_reject *priv = nft_expr_priv(expr); - switch (pkt->pf) { + switch (nft_pf(pkt)) { case NFPROTO_IPV4: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: nf_send_unreach(pkt->skb, priv->icmp_code, - pkt->hook); + nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset(pkt->net, pkt->skb, pkt->hook); + nf_send_reset(nft_net(pkt), pkt->skb, nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: nf_send_unreach(pkt->skb, nft_reject_icmp_code(priv->icmp_code), - pkt->hook); + nft_hook(pkt)); break; } break; case NFPROTO_IPV6: switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code, - pkt->hook); + nf_send_unreach6(nft_net(pkt), pkt->skb, + priv->icmp_code, nft_hook(pkt)); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(pkt->net, pkt->skb, pkt->hook); + nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt)); break; case NFT_REJECT_ICMPX_UNREACH: - nf_send_unreach6(pkt->net, pkt->skb, + nf_send_unreach6(nft_net(pkt), pkt->skb, nft_reject_icmpv6_code(priv->icmp_code), - pkt->hook); + nft_hook(pkt)); break; } break; diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 9e5ec1f67020..d3eb640bc784 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -43,14 +43,14 @@ void nft_rt_get_eval(const struct nft_expr *expr, break; #endif case NFT_RT_NEXTHOP4: - if (pkt->pf != NFPROTO_IPV4) + if (nft_pf(pkt) != NFPROTO_IPV4) goto err; *dest = rt_nexthop((const struct rtable *)dst, ip_hdr(skb)->daddr); break; case NFT_RT_NEXTHOP6: - if (pkt->pf != NFPROTO_IPV6) + if (nft_pf(pkt) != NFPROTO_IPV6) goto err; memcpy(dest, rt6_nexthop((struct rt6_info *)dst, -- cgit v1.2.3 From 01886bd91f1ba418ce669dfe97a06ca9504e482a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Nov 2016 10:56:35 +0100 Subject: netfilter: remove hook_entries field from nf_hook_state This field is only useful for nf_queue, so store it in the nf_queue_entry structure instead, away from the core path. Pass hook_head to nf_hook_slow(). Since we always have a valid entry on the first iteration in nf_iterate(), we can use 'do { ... } while (entry)' loop instead. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 10 ++++------ include/linux/netfilter_ingress.h | 4 ++-- include/net/netfilter/nf_queue.h | 1 + net/bridge/br_netfilter_hooks.c | 4 ++-- net/bridge/netfilter/ebtable_broute.c | 2 +- net/netfilter/core.c | 9 ++++----- net/netfilter/nf_queue.c | 13 +++++-------- net/netfilter/nfnetlink_queue.c | 2 +- 8 files changed, 20 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index e0d000f6c9bf..69230140215b 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,7 +54,6 @@ struct nf_hook_state { struct net_device *out; struct sock *sk; struct net *net; - struct nf_hook_entry __rcu *hook_entries; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; @@ -81,7 +80,6 @@ struct nf_hook_entry { }; static inline void nf_hook_state_init(struct nf_hook_state *p, - struct nf_hook_entry *hook_entry, unsigned int hook, u_int8_t pf, struct net_device *indev, @@ -96,7 +94,6 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->out = outdev; p->sk = sk; p->net = net; - RCU_INIT_POINTER(p->hook_entries, hook_entry); p->okfn = okfn; } @@ -150,7 +147,8 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #endif -int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry *entry); /** * nf_hook - call a netfilter hook @@ -179,10 +177,10 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, if (hook_head) { struct nf_hook_state state; - nf_hook_state_init(&state, hook_head, hook, pf, indev, outdev, + nf_hook_state_init(&state, hook, pf, indev, outdev, sk, net, okfn); - ret = nf_hook_slow(skb, &state); + ret = nf_hook_slow(skb, &state, hook_head); } rcu_read_unlock(); diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index fd44e4131710..2dc3b49b804a 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -26,10 +26,10 @@ static inline int nf_hook_ingress(struct sk_buff *skb) if (unlikely(!e)) return 0; - nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, + nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); - return nf_hook_slow(skb, &state); + return nf_hook_slow(skb, &state, e); } static inline void nf_hook_ingress_init(struct net_device *dev) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 2280cfe86c56..09948d10e38e 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -12,6 +12,7 @@ struct nf_queue_entry { unsigned int id; struct nf_hook_state state; + struct nf_hook_entry *hook; u16 size; /* sizeof(entry) + saved route keys */ /* extra space to store route keys */ diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 7e3645fa6339..8155bd2a5138 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1018,10 +1018,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - nf_hook_state_init(&state, elem, hook, NFPROTO_BRIDGE, indev, outdev, + nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); - ret = nf_hook_slow(skb, &state); + ret = nf_hook_slow(skb, &state, elem); rcu_read_unlock(); if (ret == 1) ret = okfn(net, sk, skb); diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 599679e3498d..8fe36dc3aab2 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -53,7 +53,7 @@ static int ebt_broute(struct sk_buff *skb) struct nf_hook_state state; int ret; - nf_hook_state_init(&state, NULL, NF_BR_BROUTING, + nf_hook_state_init(&state, NF_BR_BROUTING, NFPROTO_BRIDGE, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 64623374bc5f..ebece48b8392 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -308,7 +308,7 @@ unsigned int nf_iterate(struct sk_buff *skb, { unsigned int verdict; - while (*entryp) { + do { repeat: verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state); if (verdict != NF_ACCEPT) { @@ -317,20 +317,19 @@ repeat: goto repeat; } *entryp = rcu_dereference((*entryp)->next); - } + } while (*entryp); return NF_ACCEPT; } /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ -int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry *entry) { - struct nf_hook_entry *entry; unsigned int verdict; int ret; - entry = rcu_dereference(state->hook_entries); next_hook: verdict = nf_iterate(skb, state, &entry); switch (verdict & NF_VERDICT_MASK) { diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 0fb38966e5bf..2e39e38ae1c7 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -108,7 +108,7 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) } static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int queuenum) + struct nf_hook_entry *hook_entry, unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; @@ -136,6 +136,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, *entry = (struct nf_queue_entry) { .skb = skb, .state = *state, + .hook = hook_entry, .size = sizeof(*entry) + afinfo->route_key_size, }; @@ -163,8 +164,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, struct nf_hook_entry *entry = *entryp; int ret; - RCU_INIT_POINTER(state->hook_entries, entry); - ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); + ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS); if (ret < 0) { if (ret == -ESRCH && (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) { @@ -179,15 +179,12 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { - struct nf_hook_entry *hook_entry; + struct nf_hook_entry *hook_entry = entry->hook; + struct nf_hook_ops *elem = &hook_entry->ops; struct sk_buff *skb = entry->skb; const struct nf_afinfo *afinfo; - struct nf_hook_ops *elem; int err; - hook_entry = rcu_dereference(entry->state.hook_entries); - elem = &hook_entry->ops; - nf_queue_entry_release_refs(entry); /* Continue traversal iff userspace said ok... */ diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 5379f788a372..1e33115b399f 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -919,7 +919,7 @@ static struct notifier_block nfqnl_dev_notifier = { static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr) { - return rcu_access_pointer(entry->state.hook_entries) == + return rcu_access_pointer(entry->hook) == (struct nf_hook_entry *)entry_ptr; } -- cgit v1.2.3 From 70ecc24841326396a827deb55c3fefac582a729d Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 2 Nov 2016 11:02:16 -0400 Subject: ipv4: add IP_RECVFRAGSIZE cmsg The IP stack records the largest fragment of a reassembled packet in IPCB(skb)->frag_max_size. When reading a datagram or raw packet that arrived fragmented, expose the value to allow applications to estimate receive path MTU. Tested: Sent data over a veth pair of which the source has a small mtu. Sent data using netcat, received using a dedicated process. Verified that the cmsg IP_RECVFRAGSIZE is returned only when data arrives fragmented, and in that cases matches the veth mtu. ip link add veth0 type veth peer name veth1 ip netns add from ip netns add to ip link set dev veth1 netns to ip netns exec to ip addr add dev veth1 192.168.10.1/24 ip netns exec to ip link set dev veth1 up ip link set dev veth0 netns from ip netns exec from ip addr add dev veth0 192.168.10.2/24 ip netns exec from ip link set dev veth0 up ip netns exec from ip link set dev veth0 mtu 1300 ip netns exec from ethtool -K veth0 ufo off dd if=/dev/zero bs=1 count=1400 2>/dev/null > payload ip netns exec to ./recv_cmsg_recvfragsize -4 -u -p 6000 & ip netns exec from nc -q 1 -u 192.168.10.1 6000 < payload using github.com/wdebruij/kerneltools/blob/master/tests/recvfragsize.c Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 1 + include/uapi/linux/in.h | 1 + net/ipv4/ip_sockglue.c | 26 ++++++++++++++++++++++++++ 3 files changed, 28 insertions(+) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 236a81034fef..c9cff977a7fb 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -228,6 +228,7 @@ struct inet_sock { #define IP_CMSG_PASSSEC BIT(5) #define IP_CMSG_ORIGDSTADDR BIT(6) #define IP_CMSG_CHECKSUM BIT(7) +#define IP_CMSG_RECVFRAGSIZE BIT(8) /** * sk_to_full_sk - Access to a full socket diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index eaf94919291a..4e557f4e9553 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -117,6 +117,7 @@ struct in_addr { #define IP_NODEFRAG 22 #define IP_CHECKSUM 23 #define IP_BIND_ADDRESS_NO_PORT 24 +#define IP_RECVFRAGSIZE 25 /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b8a2d63d1fb8..ecbaae200131 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -97,6 +97,17 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data); } +static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb) +{ + int val; + + if (IPCB(skb)->frag_max_size == 0) + return; + + val = IPCB(skb)->frag_max_size; + put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val); +} + static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, int tlen, int offset) { @@ -218,6 +229,9 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, if (flags & IP_CMSG_CHECKSUM) ip_cmsg_recv_checksum(msg, skb, tlen, offset); + + if (flags & IP_CMSG_RECVFRAGSIZE) + ip_cmsg_recv_fragsize(msg, skb); } EXPORT_SYMBOL(ip_cmsg_recv_offset); @@ -614,6 +628,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_MULTICAST_LOOP: case IP_RECVORIGDSTADDR: case IP_CHECKSUM: + case IP_RECVFRAGSIZE: if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; @@ -726,6 +741,14 @@ static int do_ip_setsockopt(struct sock *sk, int level, } } break; + case IP_RECVFRAGSIZE: + if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM) + goto e_inval; + if (val) + inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE; + else + inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE; + break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; @@ -1357,6 +1380,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_CHECKSUM: val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0; break; + case IP_RECVFRAGSIZE: + val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0; + break; case IP_TOS: val = inet->tos; break; -- cgit v1.2.3 From da96786e26c3ae47316db2b92046b11268c4379c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 2 Nov 2016 12:08:25 -0700 Subject: net: tcp: check skb is non-NULL for exact match on lookups Andrey reported the following error report while running the syzkaller fuzzer: general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 648 Comm: syz-executor Not tainted 4.9.0-rc3+ #333 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff8800398c4480 task.stack: ffff88003b468000 RIP: 0010:[] [< inline >] inet_exact_dif_match include/net/tcp.h:808 RIP: 0010:[] [] __inet_lookup_listener+0xb6/0x500 net/ipv4/inet_hashtables.c:219 RSP: 0018:ffff88003b46f270 EFLAGS: 00010202 RAX: 0000000000000004 RBX: 0000000000004242 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffc90000e3c000 RDI: 0000000000000054 RBP: ffff88003b46f2d8 R08: 0000000000004000 R09: ffffffff830910e7 R10: 0000000000000000 R11: 000000000000000a R12: ffffffff867fa0c0 R13: 0000000000004242 R14: 0000000000000003 R15: dffffc0000000000 FS: 00007fb135881700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020cc3000 CR3: 000000006d56a000 CR4: 00000000000006f0 Stack: 0000000000000000 000000000601a8c0 0000000000000000 ffffffff00004242 424200003b9083c2 ffff88003def4041 ffffffff84e7e040 0000000000000246 ffff88003a0911c0 0000000000000000 ffff88003a091298 ffff88003b9083ae Call Trace: [] tcp_v4_send_reset+0x584/0x1700 net/ipv4/tcp_ipv4.c:643 [] tcp_v4_rcv+0x198b/0x2e50 net/ipv4/tcp_ipv4.c:1718 [] ip_local_deliver_finish+0x332/0xad0 net/ipv4/ip_input.c:216 ... MD5 has a code path that calls __inet_lookup_listener with a null skb, so inet{6}_exact_dif_match needs to check skb against null before pulling the flag. Fixes: a04a480d4392 ("net: Require exact match for TCP socket lookups if dif is l3mdev") Reported-by: Andrey Konovalov Signed-off-by: David Ahern Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/tcp.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ca1ad9ebbc92..a0649973ee5b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -149,7 +149,7 @@ static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) { #if defined(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - ipv6_l3mdev_skb(IP6CB(skb)->flags)) + skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) return true; #endif return false; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5b82d4d94834..304a8e17bc87 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -805,7 +805,7 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) if (!net->ipv4.sysctl_tcp_l3mdev_accept && - ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) return true; #endif return false; -- cgit v1.2.3 From 9ee6c5dc816aa8256257f2cd4008a9291ec7e985 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 2 Nov 2016 16:36:17 -0400 Subject: ipv4: allow local fragmentation in ip_finish_output_gso() Some configurations (e.g. geneve interface with default MTU of 1500 over an ethernet interface with 1500 MTU) result in the transmission of packets that exceed the configured MTU. While this should be considered to be a "bad" configuration, it is still allowed and should not result in the sending of packets that exceed the configured MTU. Fix by dropping the assumption in ip_finish_output_gso() that locally originated gso packets will never need fragmentation. Basic testing using iperf (observing CPU usage and bandwidth) have shown no measurable performance impact for traffic not requiring fragmentation. Fixes: c7ba65d7b649 ("net: ip: push gso skb forwarding handling down the stack") Reported-by: Jan Tluka Signed-off-by: Lance Richardson Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 3 +-- net/ipv4/ip_forward.c | 2 +- net/ipv4/ip_output.c | 6 ++---- net/ipv4/ip_tunnel_core.c | 11 ----------- net/ipv4/ipmr.c | 2 +- 5 files changed, 5 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 5413883ac47f..d3a107850a41 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -47,8 +47,7 @@ struct inet_skb_parm { #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) -#define IPSKB_FRAG_SEGS BIT(7) -#define IPSKB_L3SLAVE BIT(8) +#define IPSKB_L3SLAVE BIT(7) u16 frag_max_size; }; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 8b4ffd216839..9f0a7b96646f 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; + IPCB(skb)->flags |= IPSKB_FORWARDED; mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); if (ip_exceeds_mtu(skb, mtu)) { IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 03e7f7310423..49714010ac2e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -239,11 +239,9 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, struct sk_buff *segs; int ret = 0; - /* common case: fragmentation of segments is not allowed, - * or seglen is <= mtu + /* common case: seglen is <= mtu */ - if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) || - skb_gso_validate_mtu(skb, mtu)) + if (skb_gso_validate_mtu(skb, mtu)) return ip_finish_output2(net, sk, skb); /* Slowpath - GSO segment length is exceeding the dst MTU. diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 777bc1883870..fed3d29f9eb3 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, int pkt_len = skb->len - skb_inner_network_offset(skb); struct net *net = dev_net(rt->dst.dev); struct net_device *dev = skb->dev; - int skb_iif = skb->skb_iif; struct iphdr *iph; int err; @@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - if (skb_iif && !(df & htons(IP_DF))) { - /* Arrived from an ingress interface, got encapsulated, with - * fragmentation of encapulating frames allowed. - * If skb is gso, the resulting encapsulated network segments - * may exceed dst mtu. - * Allow IP Fragmentation of segments. - */ - IPCB(skb)->flags |= IPSKB_FRAG_SEGS; - } - /* Push down and install the IP header. */ skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f006e13de56..27089f5ebbb1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, vif->dev->stats.tx_bytes += skb->len; } - IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; + IPCB(skb)->flags |= IPSKB_FORWARDED; /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output -- cgit v1.2.3 From c3f24cfb3e508c70c26ee8569d537c8ca67a36c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Nov 2016 17:14:41 -0700 Subject: dccp: do not release listeners too soon Andrey Konovalov reported following error while fuzzing with syzkaller : IPv4: Attempt to release alive inet socket ffff880068e98940 kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 1 PID: 3905 Comm: a.out Not tainted 4.9.0-rc3+ #333 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88006b9e0000 task.stack: ffff880068770000 RIP: 0010:[] [] selinux_socket_sock_rcv_skb+0xff/0x6a0 security/selinux/hooks.c:4639 RSP: 0018:ffff8800687771c8 EFLAGS: 00010202 RAX: ffff88006b9e0000 RBX: 1ffff1000d0eee3f RCX: 1ffff1000d1d312a RDX: 1ffff1000d1d31a6 RSI: dffffc0000000000 RDI: 0000000000000010 RBP: ffff880068777360 R08: 0000000000000000 R09: 0000000000000002 R10: dffffc0000000000 R11: 0000000000000006 R12: ffff880068e98940 R13: 0000000000000002 R14: ffff880068777338 R15: 0000000000000000 FS: 00007f00ff760700(0000) GS:ffff88006cd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020008000 CR3: 000000006a308000 CR4: 00000000000006e0 Stack: ffff8800687771e0 ffffffff812508a5 ffff8800686f3168 0000000000000007 ffff88006ac8cdfc ffff8800665ea500 0000000041b58ab3 ffffffff847b5480 ffffffff819eac60 ffff88006b9e0860 ffff88006b9e0868 ffff88006b9e07f0 Call Trace: [] security_sock_rcv_skb+0x75/0xb0 security/security.c:1317 [] sk_filter_trim_cap+0x67/0x10e0 net/core/filter.c:81 [] __sk_receive_skb+0x30/0xa00 net/core/sock.c:460 [] dccp_v4_rcv+0xdb2/0x1910 net/dccp/ipv4.c:873 [] ip_local_deliver_finish+0x332/0xad0 net/ipv4/ip_input.c:216 [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 [< inline >] NF_HOOK ./include/linux/netfilter.h:255 [] ip_local_deliver+0x1c2/0x4b0 net/ipv4/ip_input.c:257 [< inline >] dst_input ./include/net/dst.h:507 [] ip_rcv_finish+0x750/0x1c40 net/ipv4/ip_input.c:396 [< inline >] NF_HOOK_THRESH ./include/linux/netfilter.h:232 [< inline >] NF_HOOK ./include/linux/netfilter.h:255 [] ip_rcv+0x96f/0x12f0 net/ipv4/ip_input.c:487 [] __netif_receive_skb_core+0x1897/0x2a50 net/core/dev.c:4213 [] __netif_receive_skb+0x2a/0x170 net/core/dev.c:4251 [] netif_receive_skb_internal+0x1b3/0x390 net/core/dev.c:4279 [] netif_receive_skb+0x48/0x250 net/core/dev.c:4303 [] tun_get_user+0xbd5/0x28a0 drivers/net/tun.c:1308 [] tun_chr_write_iter+0xda/0x190 drivers/net/tun.c:1332 [< inline >] new_sync_write fs/read_write.c:499 [] __vfs_write+0x334/0x570 fs/read_write.c:512 [] vfs_write+0x17b/0x500 fs/read_write.c:560 [< inline >] SYSC_write fs/read_write.c:607 [] SyS_write+0xd4/0x1a0 fs/read_write.c:599 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 It turns out DCCP calls __sk_receive_skb(), and this broke when lookups no longer took a reference on listeners. Fix this issue by adding a @refcounted parameter to __sk_receive_skb(), so that sock_put() is used only when needed. Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- net/core/sock.c | 5 +++-- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 3 ++- 4 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 73c6b008f1b7..92b269709b9a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1596,11 +1596,11 @@ static inline void sock_put(struct sock *sk) void sock_gen_put(struct sock *sk); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, - unsigned int trim_cap); + unsigned int trim_cap, bool refcounted); static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) { - return __sk_receive_skb(sk, skb, nested, 1); + return __sk_receive_skb(sk, skb, nested, 1, true); } static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) diff --git a/net/core/sock.c b/net/core/sock.c index df171acfe232..5e3ca414357e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) EXPORT_SYMBOL(sock_queue_rcv_skb); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, - const int nested, unsigned int trim_cap) + const int nested, unsigned int trim_cap, bool refcounted) { int rc = NET_RX_SUCCESS; @@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, bh_unlock_sock(sk); out: - sock_put(sk); + if (refcounted) + sock_put(sk); return rc; discard_and_relse: kfree_skb(skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 345a3aeb8c7e..dff7cfab1da4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -868,7 +868,7 @@ lookup: goto discard_and_relse; nf_reset(skb); - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4); + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted); no_dccp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3828f94b234c..09c4e19aa285 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -738,7 +738,8 @@ lookup: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0; + return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, + refcounted) ? -1 : 0; no_dccp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) -- cgit v1.2.3 From 86741ec25462e4c8cdce6df2f41ead05568c7d5e Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 4 Nov 2016 02:23:41 +0900 Subject: net: core: Add a UID field to struct sock. Protocol sockets (struct sock) don't have UIDs, but most of the time, they map 1:1 to userspace sockets (struct socket) which do. Various operations such as the iptables xt_owner match need access to the "UID of a socket", and do so by following the backpointer to the struct socket. This involves taking sk_callback_lock and doesn't work when there is no socket because userspace has already called close(). Simplify this by adding a sk_uid field to struct sock whose value matches the UID of the corresponding struct socket. The semantics are as follows: 1. Whenever sk_socket is non-null: sk_uid is the same as the UID in sk_socket, i.e., matches the return value of sock_i_uid. Specifically, the UID is set when userspace calls socket(), fchown(), or accept(). 2. When sk_socket is NULL, sk_uid is defined as follows: - For a socket that no longer has a sk_socket because userspace has called close(): the previous UID. - For a cloned socket (e.g., an incoming connection that is established but on which userspace has not yet called accept): the UID of the socket it was cloned from. - For a socket that has never had an sk_socket: UID 0 inside the user namespace corresponding to the network namespace the socket belongs to. Kernel sockets created by sock_create_kern are a special case of #1 and sk_uid is the user that created them. For kernel sockets created at network namespace creation time, such as the per-processor ICMP and TCP sockets, this is the user that created the network namespace. Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/net/sock.h | 7 +++++++ net/core/sock.c | 5 ++++- net/socket.c | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 93331a1492db..cf617ee16723 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -419,6 +419,7 @@ struct sock { u32 sk_max_ack_backlog; __u32 sk_priority; __u32 sk_mark; + kuid_t sk_uid; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; @@ -1664,6 +1665,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_wq = parent->wq; parent->sk = sk; sk_set_socket(sk, parent); + sk->sk_uid = SOCK_INODE(parent)->i_uid; security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } @@ -1671,6 +1673,11 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) kuid_t sock_i_uid(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); +static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) +{ + return sk ? sk->sk_uid : make_kuid(net->user_ns, 0); +} + static inline u32 net_tx_rndhash(void) { u32 v = prandom_u32(); diff --git a/net/core/sock.c b/net/core/sock.c index d8e4532e89e7..40dbc13453f9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2460,8 +2460,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_type = sock->type; sk->sk_wq = sock->wq; sock->sk = sk; - } else + sk->sk_uid = SOCK_INODE(sock)->i_uid; + } else { sk->sk_wq = NULL; + sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); + } rwlock_init(&sk->sk_callback_lock); lockdep_set_class_and_name(&sk->sk_callback_lock, diff --git a/net/socket.c b/net/socket.c index 970a7ea3fc4a..4ce33c35e606 100644 --- a/net/socket.c +++ b/net/socket.c @@ -518,8 +518,22 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, return used; } +int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) +{ + int err = simple_setattr(dentry, iattr); + + if (!err) { + struct socket *sock = SOCKET_I(d_inode(dentry)); + + sock->sk->sk_uid = iattr->ia_uid; + } + + return err; +} + static const struct inode_operations sockfs_inode_ops = { .listxattr = sockfs_listxattr, + .setattr = sockfs_setattr, }; /** -- cgit v1.2.3 From 622ec2c9d52405973c9f1ca5116eb1c393adfc7d Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 4 Nov 2016 02:23:42 +0900 Subject: net: core: add UID to flows, rules, and routes - Define a new FIB rule attributes, FRA_UID_RANGE, to describe a range of UIDs. - Define a RTA_UID attribute for per-UID route lookups and dumps. - Support passing these attributes to and from userspace via rtnetlink. The value INVALID_UID indicates no UID was specified. - Add a UID field to the flow structures. Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/net/fib_rules.h | 9 ++++- include/net/flow.h | 5 +++ include/uapi/linux/fib_rules.h | 6 ++++ include/uapi/linux/rtnetlink.h | 1 + net/core/fib_rules.c | 74 ++++++++++++++++++++++++++++++++++++++++-- net/ipv4/fib_frontend.c | 1 + net/ipv4/route.c | 11 +++++++ net/ipv6/route.c | 7 ++++ 8 files changed, 111 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 456e4a6006ab..8dbfdf728cd8 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -8,6 +8,11 @@ #include #include +struct fib_kuid_range { + kuid_t start; + kuid_t end; +}; + struct fib_rule { struct list_head list; int iifindex; @@ -30,6 +35,7 @@ struct fib_rule { int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; + struct fib_kuid_range uid_range; struct rcu_head rcu; }; @@ -92,7 +98,8 @@ struct fib_rules_ops { [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ [FRA_GOTO] = { .type = NLA_U32 }, \ - [FRA_L3MDEV] = { .type = NLA_U8 } + [FRA_L3MDEV] = { .type = NLA_U8 }, \ + [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/flow.h b/include/net/flow.h index 035aa7716967..51373f3a5e31 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -11,6 +11,7 @@ #include #include #include +#include /* * ifindex generation is per-net namespace, and loopback is @@ -37,6 +38,7 @@ struct flowi_common { #define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; + kuid_t flowic_uid; }; union flowi_uli { @@ -74,6 +76,7 @@ struct flowi4 { #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid #define flowi4_tun_key __fl_common.flowic_tun_key +#define flowi4_uid __fl_common.flowic_uid /* (saddr,daddr) must be grouped, same order as in IP header */ __be32 saddr; @@ -131,6 +134,7 @@ struct flowi6 { #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid #define flowi6_tun_key __fl_common.flowic_tun_key +#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; /* Note: flowi6_tos is encoded in flowlabel, too. */ @@ -176,6 +180,7 @@ struct flowi { #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid #define flowi_tun_key u.__fl_common.flowic_tun_key +#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 14404b3ebb89..bbf02a63a011 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -29,6 +29,11 @@ struct fib_rule_hdr { __u32 flags; }; +struct fib_rule_uid_range { + __u32 start; + __u32 end; +}; + enum { FRA_UNSPEC, FRA_DST, /* destination address */ @@ -51,6 +56,7 @@ enum { FRA_OIFNAME, FRA_PAD, FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE, /* UID range */ __FRA_MAX }; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 5a78be518101..e14377f2ec27 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -318,6 +318,7 @@ enum rtattr_type_t { RTA_ENCAP, RTA_EXPIRES, RTA_PAD, + RTA_UID, __RTA_MAX }; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index be4629c344a6..5de436a73be2 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -18,6 +18,11 @@ #include #include +static const struct fib_kuid_range fib_kuid_range_unset = { + KUIDT_INIT(0), + KUIDT_INIT(~0), +}; + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -33,6 +38,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->table = table; r->flags = flags; r->fr_net = ops->fro_net; + r->uid_range = fib_kuid_range_unset; r->suppress_prefixlen = -1; r->suppress_ifgroup = -1; @@ -172,6 +178,34 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); +static int uid_range_set(struct fib_kuid_range *range) +{ + return uid_valid(range->start) && uid_valid(range->end); +} + +static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb) +{ + struct fib_rule_uid_range *in; + struct fib_kuid_range out; + + in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]); + + out.start = make_kuid(current_user_ns(), in->start); + out.end = make_kuid(current_user_ns(), in->end); + + return out; +} + +static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range) +{ + struct fib_rule_uid_range out = { + from_kuid_munged(current_user_ns(), range->start), + from_kuid_munged(current_user_ns(), range->end) + }; + + return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) @@ -193,6 +227,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg)) goto out; + if (uid_lt(fl->flowi_uid, rule->uid_range.start) || + uid_gt(fl->flowi_uid, rule->uid_range.end)) + goto out; + ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -429,6 +467,21 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (rule->l3mdev && rule->table) goto errout_free; + if (tb[FRA_UID_RANGE]) { + if (current_user_ns() != net->user_ns) { + err = -EPERM; + goto errout_free; + } + + rule->uid_range = nla_get_kuid_range(tb); + + if (!uid_range_set(&rule->uid_range) || + !uid_lte(rule->uid_range.start, rule->uid_range.end)) + goto errout_free; + } else { + rule->uid_range = fib_kuid_range_unset; + } + if ((nlh->nlmsg_flags & NLM_F_EXCL) && rule_exists(ops, frh, tb, rule)) { err = -EEXIST; @@ -497,6 +550,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *tmp; struct nlattr *tb[FRA_MAX+1]; + struct fib_kuid_range range; int err = -EINVAL; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) @@ -516,6 +570,14 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (err < 0) goto errout; + if (tb[FRA_UID_RANGE]) { + range = nla_get_kuid_range(tb); + if (!uid_range_set(&range)) + goto errout; + } else { + range = fib_kuid_range_unset; + } + list_for_each_entry(rule, &ops->rules_list, list) { if (frh->action && (frh->action != rule->action)) continue; @@ -552,6 +614,11 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV]))) continue; + if (uid_range_set(&range) && + (!uid_eq(rule->uid_range.start, range.start) || + !uid_eq(rule->uid_range.end, range.end))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -619,7 +686,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */ + nla_total_size(4) /* FRA_FWMARK */ + nla_total_size(4) /* FRA_FWMASK */ - + nla_total_size_64bit(8); /* FRA_TUN_ID */ + + nla_total_size_64bit(8) /* FRA_TUN_ID */ + + nla_total_size(sizeof(struct fib_kuid_range)); if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -679,7 +747,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, (rule->tun_id && nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) || (rule->l3mdev && - nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev))) + nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) || + (uid_range_set(&rule->uid_range) && + nla_put_uid_range(skb, &rule->uid_range))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c3b80478226e..d93eea8e2409 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -610,6 +610,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_FLOW] = { .type = NLA_U32 }, [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4392db83d540..92e59a638d3b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2504,6 +2504,11 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; + if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && + nla_put_u32(skb, RTA_UID, + from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) + goto nla_put_failure; + error = rt->dst.error; if (rt_is_input_route(rt)) { @@ -2556,6 +2561,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) int mark; struct sk_buff *skb; u32 table_id = RT_TABLE_MAIN; + kuid_t uid; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); if (err < 0) @@ -2583,6 +2589,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; + if (tb[RTA_UID]) + uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); + else + uid = (iif ? INVALID_UID : current_uid()); memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; @@ -2590,6 +2600,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_tos = rtm->rtm_tos; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; + fl4.flowi4_uid = uid; if (iif) { struct net_device *dev; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 947ed1ded026..fdb9c87137bd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2797,6 +2797,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, [RTA_ENCAP] = { .type = NLA_NESTED }, [RTA_EXPIRES] = { .type = NLA_U32 }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -3371,6 +3372,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (tb[RTA_MARK]) fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (tb[RTA_UID]) + fl6.flowi6_uid = make_kuid(current_user_ns(), + nla_get_u32(tb[RTA_UID])); + else + fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); + if (iif) { struct net_device *dev; int flags = 0; -- cgit v1.2.3 From e2d118a1cb5e60d077131a09db1d81b90a5295fe Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Fri, 4 Nov 2016 02:23:43 +0900 Subject: net: inet: Support UID-based routing in IP protocols. - Use the UID in routing lookups made by protocol connect() and sendmsg() functions. - Make sure that routing lookups triggered by incoming packets (e.g., Path MTU discovery) take the UID of the socket into account. - For packets not associated with a userspace socket, (e.g., ping replies) use UID 0 inside the user namespace corresponding to the network namespace the socket belongs to. This allows all namespaces to apply routing and iptables rules to kernel-originated traffic in that namespaces by matching UID 0. This is better than using the UID of the kernel socket that is sending the traffic, because the UID of kernel sockets created at namespace creation time (e.g., the per-processor ICMP and TCP sockets) is the UID of the user that created the socket, which might not be mapped in the namespace. Tested: compiles allnoconfig, allyesconfig, allmodconfig Tested: https://android-review.googlesource.com/253302 Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/net/flow.h | 4 +++- include/net/ip.h | 1 + include/net/ip6_route.h | 5 +++-- include/net/route.h | 5 +++-- net/ipv4/icmp.c | 2 ++ net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/ip_output.c | 3 ++- net/ipv4/ping.c | 3 ++- net/ipv4/raw.c | 2 +- net/ipv4/route.c | 26 +++++++++++++++----------- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_ipv4.c | 9 ++++++--- net/ipv4/udp.c | 3 ++- net/ipv6/af_inet6.c | 1 + net/ipv6/ah6.c | 5 +++-- net/ipv6/datagram.c | 1 + net/ipv6/esp6.c | 5 +++-- net/ipv6/icmp.c | 7 +++++-- net/ipv6/inet6_connection_sock.c | 2 ++ net/ipv6/ip6_gre.c | 4 ++++ net/ipv6/ip6_tunnel.c | 4 ++++ net/ipv6/ip6_vti.c | 5 +++-- net/ipv6/ipcomp6.c | 5 +++-- net/ipv6/netfilter.c | 1 + net/ipv6/ping.c | 1 + net/ipv6/raw.c | 1 + net/ipv6/route.c | 13 +++++++++---- net/ipv6/syncookies.c | 1 + net/ipv6/tcp_ipv6.c | 2 ++ net/ipv6/udp.c | 1 + net/l2tp/l2tp_ip6.c | 1 + 31 files changed, 89 insertions(+), 40 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 51373f3a5e31..6bbbca8af8e3 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -96,7 +96,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be16 sport) + __be16 dport, __be16 sport, + kuid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; @@ -107,6 +108,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; fl4->flowi4_tun_key.tun_id = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; diff --git a/include/net/ip.h b/include/net/ip.h index 5413883ac47f..55cdaac02957 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -179,6 +179,7 @@ struct ip_reply_arg { /* -1 if not needed */ int bound_dev_if; u8 tos; + kuid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f83e78d071a3..9dc2c182a263 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -140,9 +140,10 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, - u32 mark); + u32 mark, kuid_t uid); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark); +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid); void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, u32 mark); void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); diff --git a/include/net/route.h b/include/net/route.h index 0429d47cad25..c0874c87c173 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -153,7 +153,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -269,7 +269,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_ANYSRC; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sk->sk_uid); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 38abe70e595f..53a890b605fc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -425,6 +425,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_mark = mark; + fl4.flowi4_uid = sock_net_uid(net, NULL); fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); @@ -473,6 +474,7 @@ static struct rtable *icmp_route_lookup(struct net *net, param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; fl4->flowi4_mark = mark; + fl4->flowi4_uid = sock_net_uid(net, NULL); fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 61a9deec2993..d5d3ead0a6c3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -415,7 +415,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -452,7 +452,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, - htons(ireq->ir_num)); + htons(ireq->ir_num), sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 03e7f7310423..37dfacd340af 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1587,7 +1587,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), daddr, saddr, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest, + arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 205e2000d395..d11129f1178d 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -789,7 +789,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 6a0bd68a565b..838ea5e33253 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -606,7 +606,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), - daddr, saddr, 0, 0); + daddr, saddr, 0, 0, sk->sk_uid); if (!inet->hdrincl) { rfv.msg = msg; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 92e59a638d3b..2355883e1025 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -507,7 +507,8 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) } EXPORT_SYMBOL(__ip_select_ident); -static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, +static void __build_flow_key(const struct net *net, struct flowi4 *fl4, + const struct sock *sk, const struct iphdr *iph, int oif, u8 tos, u8 prot, u32 mark, int flow_flags) @@ -523,7 +524,8 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, flowi4_init_output(fl4, oif, mark, tos, RT_SCOPE_UNIVERSE, prot, flow_flags, - iph->daddr, iph->saddr, 0, 0); + iph->daddr, iph->saddr, 0, 0, + sock_net_uid(net, sk)); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, @@ -535,7 +537,7 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, u8 prot = iph->protocol; u32 mark = skb->mark; - __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(sock_net(sk), fl4, sk, iph, oif, tos, prot, mark, 0); } static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) @@ -552,7 +554,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), - daddr, inet->inet_saddr, 0, 0); + daddr, inet->inet_saddr, 0, 0, sk->sk_uid); rcu_read_unlock(); } @@ -800,7 +802,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; - __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } @@ -1018,7 +1020,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, if (!mark) mark = IP4_REPLY_MARK(net, skb->mark); - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1034,7 +1036,7 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0); if (!fl4.flowi4_mark) fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); @@ -1053,6 +1055,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) struct rtable *rt; struct dst_entry *odst = NULL; bool new = false; + struct net *net = sock_net(sk); bh_lock_sock(sk); @@ -1066,7 +1069,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) goto out; } - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); rt = (struct rtable *)odst; if (odst->obsolete && !odst->ops->check(odst, 0)) { @@ -1106,7 +1109,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, struct flowi4 fl4; struct rtable *rt; - __build_flow_key(&fl4, NULL, iph, oif, + __build_flow_key(net, &fl4, NULL, iph, oif, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1121,9 +1124,10 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; + struct net *net = sock_net(sk); - __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); - rt = __ip_route_output_key(sock_net(sk), &fl4); + __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e3c4043c27de..0dc6286272aa 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -372,7 +372,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, - ireq->ir_loc_addr, th->source, th->dest); + ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b9b8282633d4..6491b7c1f975 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -691,6 +691,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) offsetof(struct inet_timewait_sock, tw_bound_dev_if)); arg.tos = ip_hdr(skb)->tos; + arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -711,7 +712,7 @@ out: outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct net *net, +static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, @@ -726,6 +727,7 @@ static void tcp_v4_send_ack(struct net *net, #endif ]; } rep; + struct net *net = sock_net(sk); struct ip_reply_arg arg; memset(&rep.th, 0, sizeof(struct tcphdr)); @@ -775,6 +777,7 @@ static void tcp_v4_send_ack(struct net *net, if (oif) arg.bound_dev_if = oif; arg.tos = tos; + arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -790,7 +793,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(sock_net(sk), skb, + tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, @@ -818,7 +821,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, * exception of segments, MUST be right-shifted by * Rcv.Wind.Shift bits: */ - tcp_v4_send_ack(sock_net(sk), skb, seq, + tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 195992e0440d..a32a890294b1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1019,7 +1019,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, flow_flags, - faddr, saddr, dport, inet->inet_sport); + faddr, saddr, dport, inet->inet_sport, + sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 46ad699937fd..c86911b63f8a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -678,6 +678,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); rcu_read_lock(); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 0630a4d5daaa..189eb10b742d 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -662,9 +662,10 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 620c79a0130a..c5d76d2edd26 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -54,6 +54,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) fl6->fl6_dport = inet->inet_dport; fl6->fl6_sport = inet->inet_sport; fl6->flowlabel = np->flow_label; + fl6->flowi6_uid = sk->sk_uid; if (!fl6->flowi6_oif) fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 060a60b2f8a6..218f0cba231c 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -474,9 +474,10 @@ static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index bd59c343d35f..ab249fee616b 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -92,9 +92,10 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); else if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) @@ -484,6 +485,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); @@ -658,6 +660,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; + fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 532c3ef282c5..1c86c478f578 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -88,6 +88,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); + fl6->flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(fl6)); dst = ip6_dst_lookup_flow(sk, fl6, final_p); @@ -136,6 +137,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; + fl6->flowi6_uid = sk->sk_uid; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); rcu_read_lock(); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d7d6d3ae0b3b..710bc79f9113 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -548,6 +548,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); if (err) return -1; @@ -602,6 +604,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) return -1; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 03e050d22508..bfa889c2a87b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1240,6 +1240,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1318,6 +1320,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl6.flowi6_mark = skb->mark; } + fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 35c5b2d8c401..af3f0e011265 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -608,9 +608,10 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 1b9316e1386a..54d165b9845a 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -74,9 +74,10 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; if (type == NDISC_REDIRECT) - ip6_redirect(skb, net, skb->dev->ifindex, 0); + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); else - ip6_update_pmtu(skb, net, info, 0, 0); + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d11c46833d61..39970e212ad5 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -26,6 +26,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct flowi6 fl6 = { .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .flowi6_mark = skb->mark, + .flowi6_uid = sock_net_uid(net, skb->sk), .daddr = iph->daddr, .saddr = iph->saddr, }; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 66e2d9dfc43a..e1f8b34d7a2e 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -113,6 +113,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.daddr = *daddr; fl6.flowi6_oif = oif; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 610e09354b2e..291ebc260e70 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -776,6 +776,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; ipc6.hlimit = -1; ipc6.tclass = -1; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fdb9c87137bd..6aa014eedccd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1405,7 +1405,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, - int oif, u32 mark) + int oif, u32 mark, kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1417,6 +1417,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) @@ -1430,7 +1431,7 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) struct dst_entry *dst; ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark); + sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); dst = __sk_dst_get(sk); if (!dst || !dst->obsolete || @@ -1522,7 +1523,8 @@ static struct dst_entry *ip6_route_redirect(struct net *net, flags, __ip6_route_redirect); } -void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, + kuid_t uid) { const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; struct dst_entry *dst; @@ -1535,6 +1537,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_uid = uid; dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1556,6 +1559,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, fl6.flowi6_mark = mark; fl6.daddr = msg->dest; fl6.saddr = iph->daddr; + fl6.flowi6_uid = sock_net_uid(net, NULL); dst = ip6_route_redirect(net, &fl6, &iph->saddr); rt6_do_redirect(dst, NULL, skb); @@ -1564,7 +1568,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) { - ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); + ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, + sk->sk_uid); } EXPORT_SYMBOL_GPL(ip6_sk_redirect); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 59c483937aec..97830a6a9cbb 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -227,6 +227,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a27ab4eab39..aece1b15e744 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -233,6 +233,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sk->sk_uid; opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); @@ -824,6 +825,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; + fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a7700bbf6788..9103c5c755db 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1138,6 +1138,7 @@ do_udp_sendmsg: fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ad3468c32b53..1cea54feab27 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -519,6 +519,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sk->sk_uid; ipc6.hlimit = -1; ipc6.tclass = -1; -- cgit v1.2.3 From ad959036a70890bea121403c6a4e373dff5b7311 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 4 Nov 2016 11:28:58 +0100 Subject: net/sock: add an explicit sk argument for ip_cmsg_recv_offset() So that we can use it even after orphaining the skbuff. Suggested-by: Eric Dumazet Signed-off-by: Paolo Abeni Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 5 +++-- net/ipv4/ip_sockglue.c | 6 +++--- net/ipv4/udp.c | 2 +- net/ipv6/udp.c | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 55cdaac02957..f48c67cab222 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -579,7 +579,8 @@ int ip_options_rcv_srr(struct sk_buff *skb); */ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); -void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int tlen, int offset); +void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, @@ -601,7 +602,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { - ip_cmsg_recv_offset(msg, skb, 0, 0); + ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0); } bool icmp_global_allow(void); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ecbaae200131..8b13881ed064 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -164,10 +164,10 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); } -void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, - int tlen, int offset) +void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb, int tlen, int offset) { - struct inet_sock *inet = inet_sk(skb->sk); + struct inet_sock *inet = inet_sk(sk); unsigned int flags = inet->cmsg_flags; /* Ordered by supposed usage frequency */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a32a890294b1..28a0165cb848 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1421,7 +1421,7 @@ try_again: *addr_len = sizeof(*sin); } if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr), off); + ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); err = copied; if (flags & MSG_TRUNC) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9103c5c755db..b5a23ce8981d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -425,7 +425,7 @@ try_again: if (is_udp4) { if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, + ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); } else { if (np->rxopt.all) -- cgit v1.2.3 From 7c13f97ffde63cc792c49ec1513f3974f2f05229 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 4 Nov 2016 11:28:59 +0100 Subject: udp: do fwd memory scheduling on dequeue A new argument is added to __skb_recv_datagram to provide an explicit skb destructor, invoked under the receive queue lock. The UDP protocol uses such argument to perform memory reclaiming on dequeue, so that the UDP protocol does not set anymore skb->desctructor. Instead explicit memory reclaiming is performed at close() time and when skbs are removed from the receive queue. The in kernel UDP protocol users now need to call a skb_recv_udp() variant instead of skb_recv_datagram() to properly perform memory accounting on dequeue. Overall, this allows acquiring only once the receive queue lock on dequeue. Tested using pktgen with random src port, 64 bytes packet, wire-speed on a 10G link as sender and udp_sink as the receiver, using an l4 tuple rxhash to stress the contention, and one or more udp_sink instances with reuseport. nr sinks vanilla patched 1 440 560 3 2150 2300 6 3650 3800 9 4450 4600 12 6250 6450 v1 -> v2: - do rmem and allocated memory scheduling under the receive lock - do bulk scheduling in first_packet_length() and in udp_destruct_sock() - avoid the typdef for the dequeue callback Suggested-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ include/net/udp.h | 15 +++++++++++++++ net/core/datagram.c | 17 ++++++++++++----- net/ipv4/udp.c | 42 ++++++++++++++++++++++++------------------ net/ipv6/udp.c | 3 +-- net/rxrpc/input.c | 7 +++---- net/sunrpc/svcsock.c | 2 +- net/sunrpc/xprtsock.c | 2 +- net/unix/af_unix.c | 4 ++-- 9 files changed, 63 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc6e23eaac91..a4aeeca7e805 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3033,9 +3033,13 @@ static inline void skb_frag_list_init(struct sk_buff *skb) int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, const struct sk_buff *skb); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); diff --git a/include/net/udp.h b/include/net/udp.h index 6134f37ba3ab..e6e4e19be387 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -248,6 +248,21 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, /* net/ipv4/udp.c */ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); +void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); +static inline struct sk_buff * +__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked, + int *off, int *err) +{ + return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + udp_skb_destructor, peeked, off, err); +} +static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, + int noblock, int *err) +{ + int peeked, off = 0; + + return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); +} void udp_v4_early_demux(struct sk_buff *skb); int udp_get_port(struct sock *sk, unsigned short snum, diff --git a/net/core/datagram.c b/net/core/datagram.c index bfb973aebb5b..49816af8586b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -165,6 +165,7 @@ done: * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags + * @destructor: invoked under the receive lock on successful dequeue * @peeked: returns non-zero if this packet has been seen before * @off: an offset in bytes to peek skb from. Returns an offset * within an skb where data actually starts @@ -197,6 +198,8 @@ done: * the standard around please. */ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err, struct sk_buff **last) { @@ -241,9 +244,11 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, } atomic_inc(&skb->users); - } else + } else { __skb_unlink(skb, queue); - + if (destructor) + destructor(sk, skb); + } spin_unlock_irqrestore(&queue->lock, cpu_flags); *off = _off; return skb; @@ -262,6 +267,8 @@ no_packet: EXPORT_SYMBOL(__skb_try_recv_datagram); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err) { struct sk_buff *skb, *last; @@ -270,8 +277,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { - skb = __skb_try_recv_datagram(sk, flags, peeked, off, err, - &last); + skb = __skb_try_recv_datagram(sk, flags, destructor, peeked, + off, err, &last); if (skb) return skb; @@ -290,7 +297,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int peeked, off = 0; return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, err); + NULL, &peeked, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 28a0165cb848..097b70628631 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1173,26 +1173,26 @@ out: return ret; } +/* fully reclaim rmem/fwd memory allocated for skb */ static void udp_rmem_release(struct sock *sk, int size, int partial) { int amt; atomic_sub(size, &sk->sk_rmem_alloc); - - spin_lock_bh(&sk->sk_receive_queue.lock); sk->sk_forward_alloc += size; amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1); sk->sk_forward_alloc -= amt; - spin_unlock_bh(&sk->sk_receive_queue.lock); if (amt) __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT); } -static void udp_rmem_free(struct sk_buff *skb) +/* Note: called with sk_receive_queue.lock held */ +void udp_skb_destructor(struct sock *sk, struct sk_buff *skb) { - udp_rmem_release(skb->sk, skb->truesize, 1); + udp_rmem_release(sk, skb->truesize, 1); } +EXPORT_SYMBOL(udp_skb_destructor); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) { @@ -1229,9 +1229,9 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) sk->sk_forward_alloc -= size; - /* the skb owner in now the udp socket */ - skb->sk = sk; - skb->destructor = udp_rmem_free; + /* no need to setup a destructor, we will explicitly release the + * forward allocated memory on dequeue + */ skb->dev = NULL; sock_skb_set_dropcount(sk, skb); @@ -1255,8 +1255,15 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); static void udp_destruct_sock(struct sock *sk) { /* reclaim completely the forward allocated memory */ - __skb_queue_purge(&sk->sk_receive_queue); - udp_rmem_release(sk, 0, 0); + unsigned int total = 0; + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + total += skb->truesize; + kfree_skb(skb); + } + udp_rmem_release(sk, total, 0); + inet_sock_destruct(sk); } @@ -1288,12 +1295,11 @@ EXPORT_SYMBOL_GPL(skb_consume_udp); */ static int first_packet_length(struct sock *sk) { - struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; + struct sk_buff_head *rcvq = &sk->sk_receive_queue; struct sk_buff *skb; + int total = 0; int res; - __skb_queue_head_init(&list_kill); - spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL && udp_lib_checksum_complete(skb)) { @@ -1303,12 +1309,13 @@ static int first_packet_length(struct sock *sk) IS_UDPLITE(sk)); atomic_inc(&sk->sk_drops); __skb_unlink(skb, rcvq); - __skb_queue_tail(&list_kill, skb); + total += skb->truesize; + kfree_skb(skb); } res = skb ? skb->len : -1; + if (total) + udp_rmem_release(sk, total, 1); spin_unlock_bh(&rcvq->lock); - - __skb_queue_purge(&list_kill); return res; } @@ -1363,8 +1370,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, try_again: peeking = off = sk_peek_offset(sk, flags); - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, &err); + skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b5a23ce8981d..5313818b7485 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -343,8 +343,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, try_again: peeking = off = sk_peek_offset(sk, flags); - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, &err); + skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 44fb8d893c7d..1d87b5453ef7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1053,7 +1053,7 @@ void rxrpc_data_ready(struct sock *udp_sk) ASSERT(!irqs_disabled()); - skb = skb_recv_datagram(udp_sk, 0, 1, &ret); + skb = skb_recv_udp(udp_sk, 0, 1, &ret); if (!skb) { if (ret == -EAGAIN) return; @@ -1075,10 +1075,9 @@ void rxrpc_data_ready(struct sock *udp_sk) __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0); - /* The socket buffer we have is owned by UDP, with UDP's data all over - * it, but we really want our own data there. + /* The UDP protocol already released all skb resources; + * we are free to add our own data there. */ - skb_orphan(skb); sp = rxrpc_skb(skb); /* dig out the RxRPC connection details */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e2a55dc787e6..78da4aee3543 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -547,7 +547,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 0, 0, MSG_PEEK | MSG_DONTWAIT); if (err >= 0) - skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err); + skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); if (skb == NULL) { if (err != -EAGAIN) { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1758665d609c..7178d0aa7861 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1080,7 +1080,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) if (sk == NULL) goto out; for (;;) { - skb = skb_recv_datagram(sk, 0, 1, &err); + skb = skb_recv_udp(sk, 0, 1, &err); if (skb != NULL) { xs_udp_data_read_skb(&transport->xprt, sk, skb); consume_skb(skb); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 145082e2ba36..87620183910e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2113,8 +2113,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, mutex_lock(&u->iolock); skip = sk_peek_offset(sk, flags); - skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err, - &last); + skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip, + &err, &last); if (skb) break; -- cgit v1.2.3 From d0a81f67cd6286d32f42a167d19c7a387c23db79 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 3 Nov 2016 14:56:01 +0100 Subject: net: make default TX queue length a defined constant The default TX queue length of Ethernet devices have been a magic constant of 1000, ever since the initial git import. Looking back in historical trees[1][2] the value used to be 100, with the same comment "Ethernet wants good queues". The commit[3] that changed this from 100 to 1000 didn't describe why, but from conversations with Robert Olsson it seems that it was changed when Ethernet devices went from 100Mbit/s to 1Gbit/s, because the link speed increased x10 the queue size were also adjusted. This value later caused much heartache for the bufferbloat community. This patch merely moves the value into a defined constant. [1] https://git.kernel.org/cgit/linux/kernel/git/davem/netdev-vger-cvs.git/ [2] https://git.kernel.org/cgit/linux/kernel/git/tglx/history.git/ [3] https://git.kernel.org/tglx/history/c/98921832c232 Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 2 ++ net/ethernet/eth.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index cd334c9584e9..f1b76b8e6d2d 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -6,6 +6,8 @@ #include #include +#define DEFAULT_TX_QUEUE_LEN 1000 + struct qdisc_walker { int stop; int skip; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index d9e2fe1da724..8c5a479681ca 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -62,6 +62,7 @@ #include #include #include +#include __setup("ether=", netdev_boot_setup); @@ -359,7 +360,7 @@ void ether_setup(struct net_device *dev) dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = ETH_DATA_LEN; dev->addr_len = ETH_ALEN; - dev->tx_queue_len = 1000; /* Ethernet wants good queues */ + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; dev->flags = IFF_BROADCAST|IFF_MULTICAST; dev->priv_flags |= IFF_TX_SKB_SHARING; -- cgit v1.2.3 From 9ce183b4c4d24559467d7712e313f2b3f9277437 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 7 Nov 2016 15:14:36 +0200 Subject: net/sched: act_tunnel_key: add helper inlines to access tcf_tunnel_key Needed for drivers to pick the relevant action when offloading tunnel key act. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/net/tc_act/tc_tunnel_key.h | 37 +++++++++++++++++++++++++++++++++++++ net/sched/act_tunnel_key.c | 1 - 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 253f8da6c2a6..efef0b4b1b2b 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -12,6 +12,8 @@ #define __NET_TC_TUNNEL_KEY_H #include +#include +#include struct tcf_tunnel_key_params { struct rcu_head rcu; @@ -27,4 +29,39 @@ struct tcf_tunnel_key { #define to_tunnel_key(a) ((struct tcf_tunnel_key *)a) +static inline bool is_tcf_tunnel_set(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET; +#endif + return false; +} + +static inline bool is_tcf_tunnel_release(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY) + return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE; +#endif + return false; +} + +static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params = rtnl_dereference(t->params); + + return ¶ms->tcft_enc_metadata->u.tun_info; +#else + return NULL; +#endif +} #endif /* __NET_TC_TUNNEL_KEY_H */ diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index af47bdf2f483..cab1fd5712d7 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 9ba6a9a9f7a42673e9fc08ff3594f64caae64d3c Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 7 Nov 2016 15:14:37 +0200 Subject: flow_dissector: Add enums for encapsulation keys New encapsulation keys were added to the flower classifier, which allow classification according to outer (encapsulation) headers attributes such as key and IP addresses. In order to expose those attributes outside flower, add corresponding enums in the flow dissector. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d9534927d93b..4e7cf38a7750 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -128,6 +128,10 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ + FLOW_DISSECTOR_KEY_ENC_KEYID, /* struct flow_dissector_key_keyid */ + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ + FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_MAX, }; -- cgit v1.2.3 From f4d997fd613001e612543339e0275c037f94ffe9 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 7 Nov 2016 15:14:39 +0200 Subject: net/sched: cls_flower: Add UDP port to tunnel parameters The current IP tunneling classification supports only IP addresses and key. Enhance UDP based IP tunneling classification parameters by adding UDP src and dst port. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 1 + include/uapi/linux/pkt_cls.h | 5 +++++ net/sched/cls_flower.c | 29 ++++++++++++++++++++++++++++- 3 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 4e7cf38a7750..c4f31666afd2 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -132,6 +132,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */ + FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index eb94781757ee..86786d45ee66 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -452,6 +452,11 @@ enum { TCA_FLOWER_KEY_SCTP_SRC, /* be16 */ TCA_FLOWER_KEY_SCTP_DST, /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */ __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 6369b74c8f5b..e8dd09af0d0c 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -43,6 +43,7 @@ struct fl_flow_key { struct flow_dissector_key_ipv4_addrs enc_ipv4; struct flow_dissector_key_ipv6_addrs enc_ipv6; }; + struct flow_dissector_key_ports enc_tp; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -155,6 +156,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, } skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id); + skb_key.enc_tp.src = key->tp_src; + skb_key.enc_tp.dst = key->tp_dst; } skb_key.indev_ifindex = skb->skb_iif; @@ -348,6 +351,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_SCTP_DST_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_SCTP_SRC] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_SCTP_DST] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NLA_U16 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -500,6 +507,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC, sizeof(key->enc_key_id.keyid)); + fl_set_key_val(tb, &key->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, + &mask->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, + sizeof(key->enc_tp.src)); + + fl_set_key_val(tb, &key->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT, + &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, + sizeof(key->enc_tp.dst)); + return 0; } @@ -577,6 +592,8 @@ static void fl_init_dissector(struct cls_fl_head *head, FL_KEY_IS_MASKED(&mask->key, enc_ipv6)) FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp); skb_flow_dissector_init(&head->dissector, keys, cnt); } @@ -951,7 +968,17 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, &mask->enc_key_id, TCA_FLOWER_UNSPEC, - sizeof(key->enc_key_id))) + sizeof(key->enc_key_id)) || + fl_dump_key_val(skb, &key->enc_tp.src, + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, + &mask->enc_tp.src, + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, + sizeof(key->enc_tp.src)) || + fl_dump_key_val(skb, &key->enc_tp.dst, + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, + &mask->enc_tp.dst, + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, + sizeof(key->enc_tp.dst))) goto nla_put_failure; nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); -- cgit v1.2.3 From 24ba898d43e87f9ac87353c7a13eef4ee726cab7 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 7 Nov 2016 15:14:40 +0200 Subject: net/dst: Add dst port to dst_metadata utility functions Add dst port parameter to __ip_tun_set_dst and __ipv6_tun_set_dst utility functions. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 10 ++++++---- net/sched/act_tunnel_key.c | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 6965c8f68ade..701fc814d0af 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -115,6 +115,7 @@ static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, + __be16 tp_dst, __be16 flags, __be64 tunnel_id, int md_size) @@ -127,7 +128,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, ip_tunnel_key_init(&tun_dst->u.tun_info.key, saddr, daddr, tos, ttl, - 0, 0, 0, tunnel_id, flags); + 0, 0, tp_dst, tunnel_id, flags); return tun_dst; } @@ -139,12 +140,13 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, const struct iphdr *iph = ip_hdr(skb); return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, - flags, tunnel_id, md_size); + 0, flags, tunnel_id, md_size); } static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 tos, __u8 ttl, + __be16 tp_dst, __be32 label, __be16 flags, __be64 tunnel_id, @@ -162,7 +164,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad info->key.tun_flags = flags; info->key.tun_id = tunnel_id; info->key.tp_src = 0; - info->key.tp_dst = 0; + info->key.tp_dst = tp_dst; info->key.u.ipv6.src = *saddr; info->key.u.ipv6.dst = *daddr; @@ -183,7 +185,7 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, ipv6_get_dsfield(ip6h), ip6h->hop_limit, - ip6_flowlabel(ip6h), flags, tunnel_id, + 0, ip6_flowlabel(ip6h), flags, tunnel_id, md_size); } #endif /* __NET_DST_METADATA_H */ diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index cab1fd5712d7..bd2f63e9cf5c 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -119,7 +119,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, - TUNNEL_KEY, key_id, 0); + 0, TUNNEL_KEY, key_id, 0); } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { struct in6_addr saddr; @@ -129,7 +129,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0, - TUNNEL_KEY, key_id, 0); + 0, TUNNEL_KEY, key_id, 0); } if (!metadata) { -- cgit v1.2.3 From 4e24877e61e8507c0843e4bddbc6ecccbfd2e87d Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 6 Nov 2016 21:15:51 +0800 Subject: netfilter: nf_tables: simplify the basic expressions' init routine Some basic expressions are built into nf_tables.ko, such as nft_cmp, nft_lookup, nft_range and so on. But these basic expressions' init routine is a little ugly, too many goto errX labels, and we forget to call nft_range_module_exit in the exit routine, although it is harmless. Acctually, the init and exit routines of these basic expressions are same, i.e. do nft_register_expr in the init routine and do nft_unregister_expr in the exit routine. So it's better to arrange them into an array and deal with them together. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 33 ++++---------- net/netfilter/nf_tables_core.c | 80 +++++++++++----------------------- net/netfilter/nft_bitwise.c | 13 +----- net/netfilter/nft_byteorder.c | 13 +----- net/netfilter/nft_cmp.c | 13 +----- net/netfilter/nft_dynset.c | 13 +----- net/netfilter/nft_immediate.c | 13 +----- net/netfilter/nft_lookup.c | 13 +----- net/netfilter/nft_payload.c | 13 +----- net/netfilter/nft_range.c | 13 +----- 10 files changed, 43 insertions(+), 174 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 00f4f6b1b1ba..862373d4ea9d 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -1,12 +1,18 @@ #ifndef _NET_NF_TABLES_CORE_H #define _NET_NF_TABLES_CORE_H +extern struct nft_expr_type nft_imm_type; +extern struct nft_expr_type nft_cmp_type; +extern struct nft_expr_type nft_lookup_type; +extern struct nft_expr_type nft_bitwise_type; +extern struct nft_expr_type nft_byteorder_type; +extern struct nft_expr_type nft_payload_type; +extern struct nft_expr_type nft_dynset_type; +extern struct nft_expr_type nft_range_type; + int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); -int nft_immediate_module_init(void); -void nft_immediate_module_exit(void); - struct nft_cmp_fast_expr { u32 data; enum nft_registers sreg:8; @@ -25,24 +31,6 @@ static inline u32 nft_cmp_fast_mask(unsigned int len) extern const struct nft_expr_ops nft_cmp_fast_ops; -int nft_cmp_module_init(void); -void nft_cmp_module_exit(void); - -int nft_range_module_init(void); -void nft_range_module_exit(void); - -int nft_lookup_module_init(void); -void nft_lookup_module_exit(void); - -int nft_dynset_module_init(void); -void nft_dynset_module_exit(void); - -int nft_bitwise_module_init(void); -void nft_bitwise_module_exit(void); - -int nft_byteorder_module_init(void); -void nft_byteorder_module_exit(void); - struct nft_payload { enum nft_payload_bases base:8; u8 offset; @@ -62,7 +50,4 @@ struct nft_payload_set { extern const struct nft_expr_ops nft_payload_fast_ops; extern struct static_key_false nft_trace_enabled; -int nft_payload_module_init(void); -void nft_payload_module_exit(void); - #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index b63b1edb76a6..65dbeadcb118 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -232,68 +232,40 @@ next_rule: } EXPORT_SYMBOL_GPL(nft_do_chain); +static struct nft_expr_type *nft_basic_types[] = { + &nft_imm_type, + &nft_cmp_type, + &nft_lookup_type, + &nft_bitwise_type, + &nft_byteorder_type, + &nft_payload_type, + &nft_dynset_type, + &nft_range_type, +}; + int __init nf_tables_core_module_init(void) { - int err; - - err = nft_immediate_module_init(); - if (err < 0) - goto err1; - - err = nft_cmp_module_init(); - if (err < 0) - goto err2; - - err = nft_lookup_module_init(); - if (err < 0) - goto err3; - - err = nft_bitwise_module_init(); - if (err < 0) - goto err4; + int err, i; - err = nft_byteorder_module_init(); - if (err < 0) - goto err5; - - err = nft_payload_module_init(); - if (err < 0) - goto err6; - - err = nft_dynset_module_init(); - if (err < 0) - goto err7; - - err = nft_range_module_init(); - if (err < 0) - goto err8; + for (i = 0; i < ARRAY_SIZE(nft_basic_types); i++) { + err = nft_register_expr(nft_basic_types[i]); + if (err) + goto err; + } return 0; -err8: - nft_dynset_module_exit(); -err7: - nft_payload_module_exit(); -err6: - nft_byteorder_module_exit(); -err5: - nft_bitwise_module_exit(); -err4: - nft_lookup_module_exit(); -err3: - nft_cmp_module_exit(); -err2: - nft_immediate_module_exit(); -err1: + +err: + while (i-- > 0) + nft_unregister_expr(nft_basic_types[i]); return err; } void nf_tables_core_module_exit(void) { - nft_dynset_module_exit(); - nft_payload_module_exit(); - nft_byteorder_module_exit(); - nft_bitwise_module_exit(); - nft_lookup_module_exit(); - nft_cmp_module_exit(); - nft_immediate_module_exit(); + int i; + + i = ARRAY_SIZE(nft_basic_types); + while (i-- > 0) + nft_unregister_expr(nft_basic_types[i]); } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 31c15ed2e5fc..877d9acd91ef 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -121,7 +121,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_bitwise_type; static const struct nft_expr_ops nft_bitwise_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), @@ -130,20 +129,10 @@ static const struct nft_expr_ops nft_bitwise_ops = { .dump = nft_bitwise_dump, }; -static struct nft_expr_type nft_bitwise_type __read_mostly = { +struct nft_expr_type nft_bitwise_type __read_mostly = { .name = "bitwise", .ops = &nft_bitwise_ops, .policy = nft_bitwise_policy, .maxattr = NFTA_BITWISE_MAX, .owner = THIS_MODULE, }; - -int __init nft_bitwise_module_init(void) -{ - return nft_register_expr(&nft_bitwise_type); -} - -void nft_bitwise_module_exit(void) -{ - nft_unregister_expr(&nft_bitwise_type); -} diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index ee63d981268d..13d4e421a6b3 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -169,7 +169,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_byteorder_type; static const struct nft_expr_ops nft_byteorder_ops = { .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), @@ -178,20 +177,10 @@ static const struct nft_expr_ops nft_byteorder_ops = { .dump = nft_byteorder_dump, }; -static struct nft_expr_type nft_byteorder_type __read_mostly = { +struct nft_expr_type nft_byteorder_type __read_mostly = { .name = "byteorder", .ops = &nft_byteorder_ops, .policy = nft_byteorder_policy, .maxattr = NFTA_BYTEORDER_MAX, .owner = THIS_MODULE, }; - -int __init nft_byteorder_module_init(void) -{ - return nft_register_expr(&nft_byteorder_type); -} - -void nft_byteorder_module_exit(void) -{ - nft_unregister_expr(&nft_byteorder_type); -} diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index e25b35d70e4d..2b96effeadc1 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -107,7 +107,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_cmp_type; static const struct nft_expr_ops nft_cmp_ops = { .type = &nft_cmp_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), @@ -208,20 +207,10 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) return &nft_cmp_ops; } -static struct nft_expr_type nft_cmp_type __read_mostly = { +struct nft_expr_type nft_cmp_type __read_mostly = { .name = "cmp", .select_ops = nft_cmp_select_ops, .policy = nft_cmp_policy, .maxattr = NFTA_CMP_MAX, .owner = THIS_MODULE, }; - -int __init nft_cmp_module_init(void) -{ - return nft_register_expr(&nft_cmp_type); -} - -void nft_cmp_module_exit(void) -{ - nft_unregister_expr(&nft_cmp_type); -} diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 517f08767a3c..4339e3f1c4b1 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -261,7 +261,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_dynset_type; static const struct nft_expr_ops nft_dynset_ops = { .type = &nft_dynset_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)), @@ -271,20 +270,10 @@ static const struct nft_expr_ops nft_dynset_ops = { .dump = nft_dynset_dump, }; -static struct nft_expr_type nft_dynset_type __read_mostly = { +struct nft_expr_type nft_dynset_type __read_mostly = { .name = "dynset", .ops = &nft_dynset_ops, .policy = nft_dynset_policy, .maxattr = NFTA_DYNSET_MAX, .owner = THIS_MODULE, }; - -int __init nft_dynset_module_init(void) -{ - return nft_register_expr(&nft_dynset_type); -} - -void nft_dynset_module_exit(void) -{ - nft_unregister_expr(&nft_dynset_type); -} diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 4528adea7ede..728baf88295a 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -102,7 +102,6 @@ static int nft_immediate_validate(const struct nft_ctx *ctx, return 0; } -static struct nft_expr_type nft_imm_type; static const struct nft_expr_ops nft_imm_ops = { .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), @@ -113,20 +112,10 @@ static const struct nft_expr_ops nft_imm_ops = { .validate = nft_immediate_validate, }; -static struct nft_expr_type nft_imm_type __read_mostly = { +struct nft_expr_type nft_imm_type __read_mostly = { .name = "immediate", .ops = &nft_imm_ops, .policy = nft_immediate_policy, .maxattr = NFTA_IMMEDIATE_MAX, .owner = THIS_MODULE, }; - -int __init nft_immediate_module_init(void) -{ - return nft_register_expr(&nft_imm_type); -} - -void nft_immediate_module_exit(void) -{ - nft_unregister_expr(&nft_imm_type); -} diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index a8ce49bcda80..d4f97fa7e21d 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -154,7 +154,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_lookup_type; static const struct nft_expr_ops nft_lookup_ops = { .type = &nft_lookup_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), @@ -164,20 +163,10 @@ static const struct nft_expr_ops nft_lookup_ops = { .dump = nft_lookup_dump, }; -static struct nft_expr_type nft_lookup_type __read_mostly = { +struct nft_expr_type nft_lookup_type __read_mostly = { .name = "lookup", .ops = &nft_lookup_ops, .policy = nft_lookup_policy, .maxattr = NFTA_LOOKUP_MAX, .owner = THIS_MODULE, }; - -int __init nft_lookup_module_init(void) -{ - return nft_register_expr(&nft_lookup_type); -} - -void nft_lookup_module_exit(void) -{ - nft_unregister_expr(&nft_lookup_type); -} diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index b2f88617611a..98fb5d7b8087 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -148,7 +148,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_payload_type; static const struct nft_expr_ops nft_payload_ops = { .type = &nft_payload_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)), @@ -320,20 +319,10 @@ nft_payload_select_ops(const struct nft_ctx *ctx, return &nft_payload_ops; } -static struct nft_expr_type nft_payload_type __read_mostly = { +struct nft_expr_type nft_payload_type __read_mostly = { .name = "payload", .select_ops = nft_payload_select_ops, .policy = nft_payload_policy, .maxattr = NFTA_PAYLOAD_MAX, .owner = THIS_MODULE, }; - -int __init nft_payload_module_init(void) -{ - return nft_register_expr(&nft_payload_type); -} - -void nft_payload_module_exit(void) -{ - nft_unregister_expr(&nft_payload_type); -} diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index fbc88009ca2e..009062606697 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -122,7 +122,6 @@ nla_put_failure: return -1; } -static struct nft_expr_type nft_range_type; static const struct nft_expr_ops nft_range_ops = { .type = &nft_range_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)), @@ -131,20 +130,10 @@ static const struct nft_expr_ops nft_range_ops = { .dump = nft_range_dump, }; -static struct nft_expr_type nft_range_type __read_mostly = { +struct nft_expr_type nft_range_type __read_mostly = { .name = "range", .ops = &nft_range_ops, .policy = nft_range_policy, .maxattr = NFTA_RANGE_MAX, .owner = THIS_MODULE, }; - -int __init nft_range_module_init(void) -{ - return nft_register_expr(&nft_range_type); -} - -void nft_range_module_exit(void) -{ - nft_unregister_expr(&nft_range_type); -} -- cgit v1.2.3 From a4fc1bfc42062e8bc7b2271a90d17403b096ce5d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 3 Nov 2016 15:50:05 +0100 Subject: net/flowcache: Convert to hotplug state machine Install the callbacks via the state machine. Use multi state support to avoid custom list handling for the multiple instances. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Steffen Klassert Cc: Herbert Xu Cc: netdev@vger.kernel.org Cc: rt@linutronix.de Cc: "David S. Miller" Link: http://lkml.kernel.org/r/20161103145021.28528-10-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + include/net/flow.h | 1 + include/net/flowcache.h | 2 +- net/core/flow.c | 60 ++++++++++++++++++++-------------------------- net/xfrm/xfrm_policy.c | 1 + 5 files changed, 30 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 394eb7ed53be..86b940f19df8 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -56,6 +56,7 @@ enum cpuhp_state { CPUHP_ARM_SHMOBILE_SCU_PREPARE, CPUHP_SH_SH3X_PREPARE, CPUHP_BLK_MQ_PREPARE, + CPUHP_NET_FLOW_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_NOTF_ERR_INJ_PREPARE, CPUHP_MIPS_SOC_PREPARE, diff --git a/include/net/flow.h b/include/net/flow.h index 035aa7716967..2e386bd6ee63 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -239,6 +239,7 @@ struct flow_cache_object *flow_cache_lookup(struct net *net, void *ctx); int flow_cache_init(struct net *net); void flow_cache_fini(struct net *net); +void flow_cache_hp_init(void); void flow_cache_flush(struct net *net); void flow_cache_flush_deferred(struct net *net); diff --git a/include/net/flowcache.h b/include/net/flowcache.h index c8f665ec6e0d..9caf3bfc8d2d 100644 --- a/include/net/flowcache.h +++ b/include/net/flowcache.h @@ -17,7 +17,7 @@ struct flow_cache_percpu { struct flow_cache { u32 hash_shift; struct flow_cache_percpu __percpu *percpu; - struct notifier_block hotcpu_notifier; + struct hlist_node node; int low_watermark; int high_watermark; struct timer_list rnd_timer; diff --git a/net/core/flow.c b/net/core/flow.c index 3937b1b68d5b..841fd7f87b30 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -419,28 +419,20 @@ static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) return 0; } -static int flow_cache_cpu(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node) { - struct flow_cache *fc = container_of(nfb, struct flow_cache, - hotcpu_notifier); - int res, cpu = (unsigned long) hcpu; + struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); + + return flow_cache_cpu_prepare(fc, cpu); +} + +static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node) +{ + struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - res = flow_cache_cpu_prepare(fc, cpu); - if (res) - return notifier_from_errno(res); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - __flow_cache_shrink(fc, fcp, 0); - break; - } - return NOTIFY_OK; + __flow_cache_shrink(fc, fcp, 0); + return 0; } int flow_cache_init(struct net *net) @@ -467,18 +459,8 @@ int flow_cache_init(struct net *net) if (!fc->percpu) return -ENOMEM; - cpu_notifier_register_begin(); - - for_each_online_cpu(i) { - if (flow_cache_cpu_prepare(fc, i)) - goto err; - } - fc->hotcpu_notifier = (struct notifier_block){ - .notifier_call = flow_cache_cpu, - }; - __register_hotcpu_notifier(&fc->hotcpu_notifier); - - cpu_notifier_register_done(); + if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node)) + goto err; setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, (unsigned long) fc); @@ -494,8 +476,6 @@ err: fcp->hash_table = NULL; } - cpu_notifier_register_done(); - free_percpu(fc->percpu); fc->percpu = NULL; @@ -509,7 +489,8 @@ void flow_cache_fini(struct net *net) struct flow_cache *fc = &net->xfrm.flow_cache_global; del_timer_sync(&fc->rnd_timer); - unregister_hotcpu_notifier(&fc->hotcpu_notifier); + + cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node); for_each_possible_cpu(i) { struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); @@ -521,3 +502,14 @@ void flow_cache_fini(struct net *net) fc->percpu = NULL; } EXPORT_SYMBOL(flow_cache_fini); + +void __init flow_cache_hp_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE, + "net/flow:prepare", + flow_cache_cpu_up_prep, + flow_cache_cpu_dead); + WARN_ON(ret < 0); +} diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index fd6986634e6f..4a8eff11bdad 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3111,6 +3111,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { + flow_cache_hp_init(); register_pernet_subsys(&xfrm_net_ops); seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); -- cgit v1.2.3 From 0e54d2179f650bac80d89a9def429dbdbed58c11 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 7 Nov 2016 18:31:17 +0100 Subject: netfilter: conntrack: simplify init/uninit of L4 protocol trackers modify registration and deregistration of layer-4 protocol trackers to facilitate inclusion of new elements into the current list of builtin protocols. Both builtin (TCP, UDP, ICMP) and non-builtin (DCCP, GRE, SCTP, UDPlite) layer-4 protocol trackers usually register/deregister themselves using consecutive calls to nf_ct_l4proto_{,pernet}_{,un}register(...). This sequence is interrupted and rolled back in case of error; in order to simplify addition of builtin protocols, the input of the above functions has been modified to allow registering/unregistering multiple protocols. Signed-off-by: Davide Caratti Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 18 ++++-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 76 +++++++---------------- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 78 ++++++++--------------- net/netfilter/nf_conntrack_proto.c | 85 ++++++++++++++++++++++---- net/netfilter/nf_conntrack_proto_dccp.c | 48 ++++----------- net/netfilter/nf_conntrack_proto_gre.c | 11 ++-- net/netfilter/nf_conntrack_proto_sctp.c | 50 ++++----------- net/netfilter/nf_conntrack_proto_udplite.c | 50 ++++----------- 8 files changed, 179 insertions(+), 237 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index de629f1520df..2152b70626d5 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -125,14 +125,24 @@ struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); /* Protocol pernet registration. */ +int nf_ct_l4proto_pernet_register_one(struct net *net, + struct nf_conntrack_l4proto *proto); +void nf_ct_l4proto_pernet_unregister_one(struct net *net, + struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *proto); + struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *proto); + struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); /* Protocol global registration. */ -int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto); +int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto); +void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto); +int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); +void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[], + unsigned int num_proto); /* Generic netlink helpers */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 713c09a74b90..7130ed5dc1fa 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -336,47 +336,34 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); MODULE_ALIAS("ip_conntrack"); MODULE_LICENSE("GPL"); +static struct nf_conntrack_l4proto *builtin_l4proto4[] = { + &nf_conntrack_l4proto_tcp4, + &nf_conntrack_l4proto_udp4, + &nf_conntrack_l4proto_icmp, +}; + static int ipv4_net_init(struct net *net) { int ret = 0; - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4); - if (ret < 0) { - pr_err("nf_conntrack_tcp4: pernet registration failed\n"); - goto out_tcp; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4); - if (ret < 0) { - pr_err("nf_conntrack_udp4: pernet registration failed\n"); - goto out_udp; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp); - if (ret < 0) { - pr_err("nf_conntrack_icmp4: pernet registration failed\n"); - goto out_icmp; - } + ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); + if (ret < 0) + return ret; ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4); if (ret < 0) { pr_err("nf_conntrack_ipv4: pernet registration failed\n"); - goto out_ipv4; + nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); } - return 0; -out_ipv4: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); -out_icmp: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); -out_udp: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); -out_tcp: return ret; } static void ipv4_net_exit(struct net *net) { nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); + nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); } static struct pernet_operations ipv4_net_ops = { @@ -410,37 +397,21 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) goto cleanup_pernet; } - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4); - if (ret < 0) { - pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n"); + ret = nf_ct_l4proto_register(builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); + if (ret < 0) goto cleanup_hooks; - } - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4); - if (ret < 0) { - pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n"); - goto cleanup_tcp4; - } - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp); - if (ret < 0) { - pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n"); - goto cleanup_udp4; - } ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4); if (ret < 0) { pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n"); - goto cleanup_icmpv4; + goto cleanup_l4proto; } return ret; - cleanup_icmpv4: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); - cleanup_udp4: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); - cleanup_tcp4: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); +cleanup_l4proto: + nf_ct_l4proto_unregister(builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); cleanup_hooks: nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); cleanup_pernet: @@ -454,9 +425,8 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) { synchronize_net(); nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); + nf_ct_l4proto_unregister(builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); unregister_pernet_subsys(&ipv4_net_ops); nf_unregister_sockopt(&so_getorigdst); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 963ee3848675..500be28ff563 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -336,47 +336,35 @@ static struct nf_sockopt_ops so_getorigdst6 = { .owner = THIS_MODULE, }; +static struct nf_conntrack_l4proto *builtin_l4proto6[] = { + &nf_conntrack_l4proto_tcp6, + &nf_conntrack_l4proto_udp6, + &nf_conntrack_l4proto_icmpv6, +}; + static int ipv6_net_init(struct net *net) { int ret = 0; - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6); - if (ret < 0) { - pr_err("nf_conntrack_tcp6: pernet registration failed\n"); - goto out; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6); - if (ret < 0) { - pr_err("nf_conntrack_udp6: pernet registration failed\n"); - goto cleanup_tcp6; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6); - if (ret < 0) { - pr_err("nf_conntrack_icmp6: pernet registration failed\n"); - goto cleanup_udp6; - } + ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); + if (ret < 0) + return ret; + ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6); if (ret < 0) { pr_err("nf_conntrack_ipv6: pernet registration failed.\n"); - goto cleanup_icmpv6; + nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); } - return 0; - cleanup_icmpv6: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); - cleanup_udp6: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); - cleanup_tcp6: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); - out: return ret; } static void ipv6_net_exit(struct net *net) { nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6); + nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); } static struct pernet_operations ipv6_net_ops = { @@ -409,37 +397,20 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) goto cleanup_pernet; } - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n"); + ret = nf_ct_l4proto_register(builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); + if (ret < 0) goto cleanup_hooks; - } - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n"); - goto cleanup_tcp6; - } - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n"); - goto cleanup_udp6; - } ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n"); - goto cleanup_icmpv6; + goto cleanup_l4proto; } return ret; - - cleanup_icmpv6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); - cleanup_udp6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); - cleanup_tcp6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); +cleanup_l4proto: + nf_ct_l4proto_unregister(builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); cleanup_hooks: nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); cleanup_pernet: @@ -453,9 +424,8 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) { synchronize_net(); nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); + nf_ct_l4proto_unregister(builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); unregister_pernet_subsys(&ipv6_net_ops); nf_unregister_sockopt(&so_getorigdst6); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 8d2c7d8c666a..9bd34647225a 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -281,15 +281,15 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net, /* FIXME: Allow NULL functions and sub in pointers to generic for them. --RR */ -int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto) +int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) { int ret = 0; if (l4proto->l3proto >= PF_MAX) return -EBUSY; - if ((l4proto->to_nlattr && !l4proto->nlattr_size) - || (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size)) + if ((l4proto->to_nlattr && !l4proto->nlattr_size) || + (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size)) return -EINVAL; mutex_lock(&nf_ct_proto_mutex); @@ -307,7 +307,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto) } for (i = 0; i < MAX_NF_CT_PROTO; i++) - RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic); + RCU_INIT_POINTER(proto_array[i], + &nf_conntrack_l4proto_generic); /* Before making proto_array visible to lockless readers, * we must make sure its content is committed to memory. @@ -335,10 +336,10 @@ out_unlock: mutex_unlock(&nf_ct_proto_mutex); return ret; } -EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one); -int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *l4proto) +int nf_ct_l4proto_pernet_register_one(struct net *net, + struct nf_conntrack_l4proto *l4proto) { int ret = 0; struct nf_proto_net *pn = NULL; @@ -361,9 +362,9 @@ int nf_ct_l4proto_pernet_register(struct net *net, out: return ret; } -EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one); -void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) +void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) { BUG_ON(l4proto->l3proto >= PF_MAX); @@ -378,10 +379,10 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); } -EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); +EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one); -void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *l4proto) +void nf_ct_l4proto_pernet_unregister_one(struct net *net, + struct nf_conntrack_l4proto *l4proto) { struct nf_proto_net *pn = NULL; @@ -395,6 +396,66 @@ void nf_ct_l4proto_pernet_unregister(struct net *net, /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0); } +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); + +int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[], + unsigned int num_proto) +{ + int ret = -EINVAL, ver; + unsigned int i; + + for (i = 0; i < num_proto; i++) { + ret = nf_ct_l4proto_register_one(l4proto[i]); + if (ret < 0) + break; + } + if (i != num_proto) { + ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4; + pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n", + ver, l4proto[i]->name, ver); + nf_ct_l4proto_unregister(l4proto, i); + } + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); + +int nf_ct_l4proto_pernet_register(struct net *net, + struct nf_conntrack_l4proto *l4proto[], + unsigned int num_proto) +{ + int ret = -EINVAL; + unsigned int i; + + for (i = 0; i < num_proto; i++) { + ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]); + if (ret < 0) + break; + } + if (i != num_proto) { + pr_err("nf_conntrack_%s%d: pernet registration failed\n", + l4proto[i]->name, + l4proto[i]->l3proto == PF_INET6 ? 6 : 4); + nf_ct_l4proto_pernet_unregister(net, l4proto, i); + } + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); + +void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[], + unsigned int num_proto) +{ + while (num_proto-- != 0) + nf_ct_l4proto_unregister_one(l4proto[num_proto]); +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); + +void nf_ct_l4proto_pernet_unregister(struct net *net, + struct nf_conntrack_l4proto *l4proto[], + unsigned int num_proto) +{ + while (num_proto-- != 0) + nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]); +} EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); int nf_conntrack_proto_pernet_init(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index a45bee52dccc..ac8976964975 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -936,30 +936,21 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .init_net = dccp_init_net, }; +static struct nf_conntrack_l4proto *dccp_proto[] = { + &dccp_proto4, + &dccp_proto6, +}; + static __net_init int dccp_net_init(struct net *net) { - int ret = 0; - ret = nf_ct_l4proto_pernet_register(net, &dccp_proto4); - if (ret < 0) { - pr_err("nf_conntrack_dccp4: pernet registration failed.\n"); - goto out; - } - ret = nf_ct_l4proto_pernet_register(net, &dccp_proto6); - if (ret < 0) { - pr_err("nf_conntrack_dccp6: pernet registration failed.\n"); - goto cleanup_dccp4; - } - return 0; -cleanup_dccp4: - nf_ct_l4proto_pernet_unregister(net, &dccp_proto4); -out: - return ret; + return nf_ct_l4proto_pernet_register(net, dccp_proto, + ARRAY_SIZE(dccp_proto)); } static __net_exit void dccp_net_exit(struct net *net) { - nf_ct_l4proto_pernet_unregister(net, &dccp_proto6); - nf_ct_l4proto_pernet_unregister(net, &dccp_proto4); + nf_ct_l4proto_pernet_unregister(net, dccp_proto, + ARRAY_SIZE(dccp_proto)); } static struct pernet_operations dccp_net_ops = { @@ -975,29 +966,16 @@ static int __init nf_conntrack_proto_dccp_init(void) ret = register_pernet_subsys(&dccp_net_ops); if (ret < 0) - goto out_pernet; - - ret = nf_ct_l4proto_register(&dccp_proto4); - if (ret < 0) - goto out_dccp4; - - ret = nf_ct_l4proto_register(&dccp_proto6); + return ret; + ret = nf_ct_l4proto_register(dccp_proto, ARRAY_SIZE(dccp_proto)); if (ret < 0) - goto out_dccp6; - - return 0; -out_dccp6: - nf_ct_l4proto_unregister(&dccp_proto4); -out_dccp4: - unregister_pernet_subsys(&dccp_net_ops); -out_pernet: + unregister_pernet_subsys(&dccp_net_ops); return ret; } static void __exit nf_conntrack_proto_dccp_fini(void) { - nf_ct_l4proto_unregister(&dccp_proto6); - nf_ct_l4proto_unregister(&dccp_proto4); + nf_ct_l4proto_unregister(dccp_proto, ARRAY_SIZE(dccp_proto)); unregister_pernet_subsys(&dccp_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 9a715f88b2f1..ff405c9183f1 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -396,7 +396,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { static int proto_gre_net_init(struct net *net) { int ret = 0; - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre4); + + ret = nf_ct_l4proto_pernet_register_one(net, + &nf_conntrack_l4proto_gre4); if (ret < 0) pr_err("nf_conntrack_gre4: pernet registration failed.\n"); return ret; @@ -404,7 +406,7 @@ static int proto_gre_net_init(struct net *net) static void proto_gre_net_exit(struct net *net) { - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4); + nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4); nf_ct_gre_keymap_flush(net); } @@ -422,8 +424,7 @@ static int __init nf_ct_proto_gre_init(void) ret = register_pernet_subsys(&proto_gre_net_ops); if (ret < 0) goto out_pernet; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4); + ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4); if (ret < 0) goto out_gre4; @@ -436,7 +437,7 @@ out_pernet: static void __exit nf_ct_proto_gre_fini(void) { - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); + nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4); unregister_pernet_subsys(&proto_gre_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 982ea62606c7..17c0ade23fd8 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -816,32 +816,21 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .init_net = sctp_init_net, }; +static struct nf_conntrack_l4proto *sctp_proto[] = { + &nf_conntrack_l4proto_sctp4, + &nf_conntrack_l4proto_sctp6, +}; + static int sctp_net_init(struct net *net) { - int ret = 0; - - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp4); - if (ret < 0) { - pr_err("nf_conntrack_sctp4: pernet registration failed.\n"); - goto out; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp6); - if (ret < 0) { - pr_err("nf_conntrack_sctp6: pernet registration failed.\n"); - goto cleanup_sctp4; - } - return 0; - -cleanup_sctp4: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4); -out: - return ret; + return nf_ct_l4proto_pernet_register(net, sctp_proto, + ARRAY_SIZE(sctp_proto)); } static void sctp_net_exit(struct net *net) { - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4); + nf_ct_l4proto_pernet_unregister(net, sctp_proto, + ARRAY_SIZE(sctp_proto)); } static struct pernet_operations sctp_net_ops = { @@ -857,29 +846,16 @@ static int __init nf_conntrack_proto_sctp_init(void) ret = register_pernet_subsys(&sctp_net_ops); if (ret < 0) - goto out_pernet; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4); - if (ret < 0) - goto out_sctp4; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp6); + return ret; + ret = nf_ct_l4proto_register(sctp_proto, ARRAY_SIZE(sctp_proto)); if (ret < 0) - goto out_sctp6; - - return 0; -out_sctp6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); -out_sctp4: - unregister_pernet_subsys(&sctp_net_ops); -out_pernet: + unregister_pernet_subsys(&sctp_net_ops); return ret; } static void __exit nf_conntrack_proto_sctp_fini(void) { - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4); + nf_ct_l4proto_unregister(sctp_proto, ARRAY_SIZE(sctp_proto)); unregister_pernet_subsys(&sctp_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 029206e8dec4..8cdb4b1bf933 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -336,32 +336,21 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .init_net = udplite_init_net, }; +static struct nf_conntrack_l4proto *udplite_proto[] = { + &nf_conntrack_l4proto_udplite4, + &nf_conntrack_l4proto_udplite6, +}; + static int udplite_net_init(struct net *net) { - int ret = 0; - - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite4); - if (ret < 0) { - pr_err("nf_conntrack_udplite4: pernet registration failed.\n"); - goto out; - } - ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite6); - if (ret < 0) { - pr_err("nf_conntrack_udplite6: pernet registration failed.\n"); - goto cleanup_udplite4; - } - return 0; - -cleanup_udplite4: - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4); -out: - return ret; + return nf_ct_l4proto_pernet_register(net, udplite_proto, + ARRAY_SIZE(udplite_proto)); } static void udplite_net_exit(struct net *net) { - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite6); - nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4); + nf_ct_l4proto_pernet_unregister(net, udplite_proto, + ARRAY_SIZE(udplite_proto)); } static struct pernet_operations udplite_net_ops = { @@ -377,29 +366,16 @@ static int __init nf_conntrack_proto_udplite_init(void) ret = register_pernet_subsys(&udplite_net_ops); if (ret < 0) - goto out_pernet; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4); - if (ret < 0) - goto out_udplite4; - - ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite6); + return ret; + ret = nf_ct_l4proto_register(udplite_proto, ARRAY_SIZE(udplite_proto)); if (ret < 0) - goto out_udplite6; - - return 0; -out_udplite6: - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); -out_udplite4: - unregister_pernet_subsys(&udplite_net_ops); -out_pernet: + unregister_pernet_subsys(&udplite_net_ops); return ret; } static void __exit nf_conntrack_proto_udplite_exit(void) { - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6); - nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4); + nf_ct_l4proto_unregister(udplite_proto, ARRAY_SIZE(udplite_proto)); unregister_pernet_subsys(&udplite_net_ops); } -- cgit v1.2.3 From 1ababeba4a21f3dba3da3523c670b207fb2feb62 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:39 +0100 Subject: ipv6: implement dataplane support for rthdr type 4 (Segment Routing Header) Implement minimal support for processing of SR-enabled packets as described in https://tools.ietf.org/html/draft-ietf-6man-segment-routing-header-02. This patch implements the following operations: - Intermediate segment endpoint: incrementation of active segment and rerouting. - Egress for SR-encapsulated packets: decapsulation of outer IPv6 header + SRH and routing of inner packet. - Cleanup flag support for SR-inlined packets: removal of SRH if we are the penultimate segment endpoint. A per-interface sysctl seg6_enabled is provided, to accept/deny SR-enabled packets. Default is deny. This patch does not provide support for HMAC-signed packets. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/linux/seg6.h | 6 ++ include/net/seg6.h | 36 ++++++++++ include/uapi/linux/ipv6.h | 2 + include/uapi/linux/seg6.h | 54 ++++++++++++++ net/ipv6/addrconf.c | 10 +++ net/ipv6/exthdrs.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 284 insertions(+) create mode 100644 include/linux/seg6.h create mode 100644 include/net/seg6.h create mode 100644 include/uapi/linux/seg6.h (limited to 'include/net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 1afb6e8d35c3..68d3f71f0abf 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -64,6 +64,7 @@ struct ipv6_devconf { } stable_secret; __s32 use_oif_addrs_only; __s32 keep_addr_on_down; + __s32 seg6_enabled; struct ctl_table_header *sysctl_header; }; diff --git a/include/linux/seg6.h b/include/linux/seg6.h new file mode 100644 index 000000000000..7a66d2b4c5a6 --- /dev/null +++ b/include/linux/seg6.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_H +#define _LINUX_SEG6_H + +#include + +#endif diff --git a/include/net/seg6.h b/include/net/seg6.h new file mode 100644 index 000000000000..4dd52a7e95f1 --- /dev/null +++ b/include/net/seg6.h @@ -0,0 +1,36 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SEG6_H +#define _NET_SEG6_H + +static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, + __be32 to) +{ + __be32 diff[] = { ~from, to }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); +} + +static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from, + __be32 *to) +{ + __be32 diff[] = { + ~from[0], ~from[1], ~from[2], ~from[3], + to[0], to[1], to[2], to[3], + }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); +} + +#endif diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 8c2772340c3f..7ff1d654e333 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -39,6 +39,7 @@ struct in6_ifreq { #define IPV6_SRCRT_STRICT 0x01 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_0 0 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ +#define IPV6_SRCRT_TYPE_4 4 /* Segment Routing with IPv6 */ /* * routing header @@ -178,6 +179,7 @@ enum { DEVCONF_DROP_UNSOLICITED_NA, DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_RTR_SOLICIT_MAX_INTERVAL, + DEVCONF_SEG6_ENABLED, DEVCONF_MAX }; diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h new file mode 100644 index 000000000000..c396a8052f73 --- /dev/null +++ b/include/uapi/linux/seg6.h @@ -0,0 +1,54 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_H +#define _UAPI_LINUX_SEG6_H + +/* + * SRH + */ +struct ipv6_sr_hdr { + __u8 nexthdr; + __u8 hdrlen; + __u8 type; + __u8 segments_left; + __u8 first_segment; + __u8 flag_1; + __u8 flag_2; + __u8 reserved; + + struct in6_addr segments[0]; +}; + +#define SR6_FLAG1_CLEANUP (1 << 7) +#define SR6_FLAG1_PROTECTED (1 << 6) +#define SR6_FLAG1_OAM (1 << 5) +#define SR6_FLAG1_ALERT (1 << 4) +#define SR6_FLAG1_HMAC (1 << 3) + +#define SR6_TLV_INGRESS 1 +#define SR6_TLV_EGRESS 2 +#define SR6_TLV_OPAQUE 3 +#define SR6_TLV_PADDING 4 +#define SR6_TLV_HMAC 5 + +#define sr_has_cleanup(srh) ((srh)->flag_1 & SR6_FLAG1_CLEANUP) +#define sr_has_hmac(srh) ((srh)->flag_1 & SR6_FLAG1_HMAC) + +struct sr6_tlv { + __u8 type; + __u8 len; + __u8 data[0]; +}; + +#endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 060dd9922018..2ac6cb460af0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -238,6 +238,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, + .seg6_enabled = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -284,6 +285,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, + .seg6_enabled = 0, }; /* Check if a valid qdisc is available */ @@ -4944,6 +4946,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; + array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled; } static inline size_t inet6_ifla6_size(void) @@ -6035,6 +6038,13 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_dointvec, }, + { + .procname = "seg6_enabled", + .data = &ipv6_devconf.seg6_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { /* sentinel */ } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 139ceb68bd37..b8ba3961ff8a 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -47,6 +47,8 @@ #if IS_ENABLED(CONFIG_IPV6_MIP6) #include #endif +#include +#include #include @@ -286,6 +288,175 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return -1; } +static void seg6_update_csum(struct sk_buff *skb) +{ + struct ipv6_sr_hdr *hdr; + struct in6_addr *addr; + __be32 from, to; + + /* srh is at transport offset and seg_left is already decremented + * but daddr is not yet updated with next segment + */ + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + addr = hdr->segments + hdr->segments_left; + + hdr->segments_left++; + from = *(__be32 *)hdr; + + hdr->segments_left--; + to = *(__be32 *)hdr; + + /* update skb csum with diff resulting from seg_left decrement */ + + update_csum_diff4(skb, from, to); + + /* compute csum diff between current and next segment and update */ + + update_csum_diff16(skb, (__be32 *)(&ipv6_hdr(skb)->daddr), + (__be32 *)addr); +} + +static int ipv6_srh_rcv(struct sk_buff *skb) +{ + struct inet6_skb_parm *opt = IP6CB(skb); + struct net *net = dev_net(skb->dev); + struct ipv6_sr_hdr *hdr; + struct inet6_dev *idev; + struct in6_addr *addr; + bool cleanup = false; + int accept_seg6; + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + idev = __in6_dev_get(skb->dev); + + accept_seg6 = net->ipv6.devconf_all->seg6_enabled; + if (accept_seg6 > idev->cnf.seg6_enabled) + accept_seg6 = idev->cnf.seg6_enabled; + + if (!accept_seg6) { + kfree_skb(skb); + return -1; + } + +looped_back: + if (hdr->segments_left > 0) { + if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 && + sr_has_cleanup(hdr)) + cleanup = true; + } else { + if (hdr->nexthdr == NEXTHDR_IPV6) { + int offset = (hdr->hdrlen + 1) << 3; + + skb_postpull_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); + + if (!pskb_pull(skb, offset)) { + kfree_skb(skb); + return -1; + } + skb_postpull_rcsum(skb, skb_transport_header(skb), + offset); + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->encapsulation = 0; + + __skb_tunnel_rx(skb, skb->dev, net); + + netif_rx(skb); + return -1; + } + + opt->srcrt = skb_network_header_len(skb); + opt->lastopt = opt->srcrt; + skb->transport_header += (hdr->hdrlen + 1) << 3; + opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); + + return 1; + } + + if (hdr->segments_left >= (hdr->hdrlen >> 1)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + ((&hdr->segments_left) - + skb_network_header(skb))); + kfree_skb(skb); + return -1; + } + + if (skb_cloned(skb)) { + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return -1; + } + } + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + hdr->segments_left--; + addr = hdr->segments + hdr->segments_left; + + skb_push(skb, sizeof(struct ipv6hdr)); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + seg6_update_csum(skb); + + ipv6_hdr(skb)->daddr = *addr; + + if (cleanup) { + int srhlen = (hdr->hdrlen + 1) << 3; + int nh = hdr->nexthdr; + + skb_pull_rcsum(skb, sizeof(struct ipv6hdr) + srhlen); + memmove(skb_network_header(skb) + srhlen, + skb_network_header(skb), + (unsigned char *)hdr - skb_network_header(skb)); + skb->network_header += srhlen; + ipv6_hdr(skb)->nexthdr = nh; + ipv6_hdr(skb)->payload_len = htons(skb->len - + sizeof(struct ipv6hdr)); + skb_push_rcsum(skb, sizeof(struct ipv6hdr)); + } + + skb_dst_drop(skb); + + ip6_route_input(skb); + + if (skb_dst(skb)->error) { + dst_input(skb); + return -1; + } + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { + if (ipv6_hdr(skb)->hop_limit <= 1) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_send(skb, ICMPV6_TIME_EXCEED, + ICMPV6_EXC_HOPLIMIT, 0); + kfree_skb(skb); + return -1; + } + ipv6_hdr(skb)->hop_limit--; + + /* be sure that srh is still present before reinjecting */ + if (!cleanup) { + skb_pull(skb, sizeof(struct ipv6hdr)); + goto looped_back; + } + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + } + + dst_input(skb); + + return -1; +} + /******************************** Routing header. ********************************/ @@ -326,6 +497,10 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) return -1; } + /* segment routing */ + if (hdr->type == IPV6_SRCRT_TYPE_4) + return ipv6_srh_rcv(skb); + looped_back: if (hdr->segments_left == 0) { switch (hdr->type) { -- cgit v1.2.3 From 915d7e5e5930b4f01d0971d93b9b25ed17d221aa Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:40 +0100 Subject: ipv6: sr: add code base for control plane support of SR-IPv6 This patch adds the necessary hooks and structures to provide support for SR-IPv6 control plane, essentially the Generic Netlink commands that will be used for userspace control over the Segment Routing kernel structures. The genetlink commands provide control over two different structures: tunnel source and HMAC data. The tunnel source is the source address that will be used by default when encapsulating packets into an outer IPv6 header + SRH. If the tunnel source is set to :: then an address of the outgoing interface will be selected as the source. The HMAC commands currently just return ENOTSUPP and will be implemented in a future patch. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/seg6_genl.h | 6 ++ include/net/netns/ipv6.h | 1 + include/net/seg6.h | 16 +++ include/uapi/linux/seg6_genl.h | 32 ++++++ net/ipv6/Makefile | 2 +- net/ipv6/af_inet6.c | 9 +- net/ipv6/seg6.c | 214 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 278 insertions(+), 2 deletions(-) create mode 100644 include/linux/seg6_genl.h create mode 100644 include/uapi/linux/seg6_genl.h create mode 100644 net/ipv6/seg6.c (limited to 'include/net') diff --git a/include/linux/seg6_genl.h b/include/linux/seg6_genl.h new file mode 100644 index 000000000000..d6c3fb4f3734 --- /dev/null +++ b/include/linux/seg6_genl.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_GENL_H +#define _LINUX_SEG6_GENL_H + +#include + +#endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 10d0848f5b8a..de7745e2edcc 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -85,6 +85,7 @@ struct netns_ipv6 { #endif atomic_t dev_addr_genid; atomic_t fib6_sernum; + struct seg6_pernet_data *seg6_data; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/include/net/seg6.h b/include/net/seg6.h index 4dd52a7e95f1..7c7b8ed39661 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -14,6 +14,9 @@ #ifndef _NET_SEG6_H #define _NET_SEG6_H +#include +#include + static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, __be32 to) { @@ -33,4 +36,17 @@ static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from, skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } +struct seg6_pernet_data { + struct mutex lock; + struct in6_addr __rcu *tun_src; +}; + +static inline struct seg6_pernet_data *seg6_pernet(struct net *net) +{ + return net->ipv6.seg6_data; +} + +extern int seg6_init(void); +extern void seg6_exit(void); + #endif diff --git a/include/uapi/linux/seg6_genl.h b/include/uapi/linux/seg6_genl.h new file mode 100644 index 000000000000..fcf1c60d7df3 --- /dev/null +++ b/include/uapi/linux/seg6_genl.h @@ -0,0 +1,32 @@ +#ifndef _UAPI_LINUX_SEG6_GENL_H +#define _UAPI_LINUX_SEG6_GENL_H + +#define SEG6_GENL_NAME "SEG6" +#define SEG6_GENL_VERSION 0x1 + +enum { + SEG6_ATTR_UNSPEC, + SEG6_ATTR_DST, + SEG6_ATTR_DSTLEN, + SEG6_ATTR_HMACKEYID, + SEG6_ATTR_SECRET, + SEG6_ATTR_SECRETLEN, + SEG6_ATTR_ALGID, + SEG6_ATTR_HMACINFO, + __SEG6_ATTR_MAX, +}; + +#define SEG6_ATTR_MAX (__SEG6_ATTR_MAX - 1) + +enum { + SEG6_CMD_UNSPEC, + SEG6_CMD_SETHMAC, + SEG6_CMD_DUMPHMAC, + SEG6_CMD_SET_TUNSRC, + SEG6_CMD_GET_TUNSRC, + __SEG6_CMD_MAX, +}; + +#define SEG6_CMD_MAX (__SEG6_CMD_MAX - 1) + +#endif diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index c174ccb340a1..c92010d62afc 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o + udp_offload.o seg6.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c86911b63f8a..d424f3a3737a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -61,6 +61,7 @@ #include #endif #include +#include #include #include @@ -991,6 +992,10 @@ static int __init inet6_init(void) if (err) goto calipso_fail; + err = seg6_init(); + if (err) + goto seg6_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -1001,8 +1006,10 @@ out: #ifdef CONFIG_SYSCTL sysctl_fail: - calipso_exit(); + seg6_exit(); #endif +seg6_fail: + calipso_exit(); calipso_fail: pingv6_exit(); pingv6_fail: diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c new file mode 100644 index 000000000000..e246b0ba12ac --- /dev/null +++ b/net/ipv6/seg6.c @@ -0,0 +1,214 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +static struct genl_family seg6_genl_family; + +static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = { + [SEG6_ATTR_DST] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [SEG6_ATTR_DSTLEN] = { .type = NLA_S32, }, + [SEG6_ATTR_HMACKEYID] = { .type = NLA_U32, }, + [SEG6_ATTR_SECRET] = { .type = NLA_BINARY, }, + [SEG6_ATTR_SECRETLEN] = { .type = NLA_U8, }, + [SEG6_ATTR_ALGID] = { .type = NLA_U8, }, + [SEG6_ATTR_HMACINFO] = { .type = NLA_NESTED, }, +}; + +static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info) +{ + return -ENOTSUPP; +} + +static int seg6_genl_set_tunsrc(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct in6_addr *val, *t_old, *t_new; + struct seg6_pernet_data *sdata; + + sdata = seg6_pernet(net); + + if (!info->attrs[SEG6_ATTR_DST]) + return -EINVAL; + + val = nla_data(info->attrs[SEG6_ATTR_DST]); + t_new = kmemdup(val, sizeof(*val), GFP_KERNEL); + + mutex_lock(&sdata->lock); + + t_old = sdata->tun_src; + rcu_assign_pointer(sdata->tun_src, t_new); + + mutex_unlock(&sdata->lock); + + synchronize_net(); + kfree(t_old); + + return 0; +} + +static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct in6_addr *tun_src; + struct sk_buff *msg; + void *hdr; + + msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &seg6_genl_family, 0, SEG6_CMD_GET_TUNSRC); + if (!hdr) + goto free_msg; + + rcu_read_lock(); + tun_src = rcu_dereference(seg6_pernet(net)->tun_src); + + if (nla_put(msg, SEG6_ATTR_DST, sizeof(struct in6_addr), tun_src)) + goto nla_put_failure; + + rcu_read_unlock(); + + genlmsg_end(msg, hdr); + genlmsg_reply(msg, info); + + return 0; + +nla_put_failure: + rcu_read_unlock(); + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -ENOMEM; +} + +static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) +{ + return -ENOTSUPP; +} + +static int __net_init seg6_net_init(struct net *net) +{ + struct seg6_pernet_data *sdata; + + sdata = kzalloc(sizeof(*sdata), GFP_KERNEL); + if (!sdata) + return -ENOMEM; + + mutex_init(&sdata->lock); + + sdata->tun_src = kzalloc(sizeof(*sdata->tun_src), GFP_KERNEL); + if (!sdata->tun_src) { + kfree(sdata); + return -ENOMEM; + } + + net->ipv6.seg6_data = sdata; + + return 0; +} + +static void __net_exit seg6_net_exit(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + kfree(sdata->tun_src); + kfree(sdata); +} + +static struct pernet_operations ip6_segments_ops = { + .init = seg6_net_init, + .exit = seg6_net_exit, +}; + +static const struct genl_ops seg6_genl_ops[] = { + { + .cmd = SEG6_CMD_SETHMAC, + .doit = seg6_genl_sethmac, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_DUMPHMAC, + .dumpit = seg6_genl_dumphmac, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_SET_TUNSRC, + .doit = seg6_genl_set_tunsrc, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_GET_TUNSRC, + .doit = seg6_genl_get_tunsrc, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, +}; + +static struct genl_family seg6_genl_family __ro_after_init = { + .hdrsize = 0, + .name = SEG6_GENL_NAME, + .version = SEG6_GENL_VERSION, + .maxattr = SEG6_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = seg6_genl_ops, + .n_ops = ARRAY_SIZE(seg6_genl_ops), + .module = THIS_MODULE, +}; + +int __init seg6_init(void) +{ + int err = -ENOMEM; + + err = genl_register_family(&seg6_genl_family); + if (err) + goto out; + + err = register_pernet_subsys(&ip6_segments_ops); + if (err) + goto out_unregister_genl; + + pr_info("Segment Routing with IPv6\n"); + +out: + return err; +out_unregister_genl: + genl_unregister_family(&seg6_genl_family); + goto out; +} + +void seg6_exit(void) +{ + unregister_pernet_subsys(&ip6_segments_ops); + genl_unregister_family(&seg6_genl_family); +} -- cgit v1.2.3 From 6c8702c60b88651072460f3f4026c7dfe2521d12 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:41 +0100 Subject: ipv6: sr: add support for SRH encapsulation and injection with lwtunnels This patch creates a new type of interfaceless lightweight tunnel (SEG6), enabling the encapsulation and injection of SRH within locally emitted packets and forwarded packets. >From a configuration viewpoint, a seg6 tunnel would be configured as follows: ip -6 ro ad fc00::1/128 encap seg6 mode encap segs fc42::1,fc42::2,fc42::3 dev eth0 Any packet whose destination address is fc00::1 would thus be encapsulated within an outer IPv6 header containing the SRH with three segments, and would actually be routed to the first segment of the list. If `mode inline' was specified instead of `mode encap', then the SRH would be directly inserted after the IPv6 header without outer encapsulation. The inline mode is only available if CONFIG_IPV6_SEG6_INLINE is enabled. This feature was made configurable because direct header insertion may break several mechanisms such as PMTUD or IPSec AH. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/seg6_iptunnel.h | 6 + include/net/seg6.h | 6 + include/uapi/linux/lwtunnel.h | 1 + include/uapi/linux/seg6_iptunnel.h | 44 ++++ net/core/lwtunnel.c | 2 + net/ipv6/Kconfig | 12 ++ net/ipv6/Makefile | 2 +- net/ipv6/seg6.c | 44 ++++ net/ipv6/seg6_iptunnel.c | 410 +++++++++++++++++++++++++++++++++++++ 9 files changed, 526 insertions(+), 1 deletion(-) create mode 100644 include/linux/seg6_iptunnel.h create mode 100644 include/uapi/linux/seg6_iptunnel.h create mode 100644 net/ipv6/seg6_iptunnel.c (limited to 'include/net') diff --git a/include/linux/seg6_iptunnel.h b/include/linux/seg6_iptunnel.h new file mode 100644 index 000000000000..5377cf6a5a02 --- /dev/null +++ b/include/linux/seg6_iptunnel.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_IPTUNNEL_H +#define _LINUX_SEG6_IPTUNNEL_H + +#include + +#endif diff --git a/include/net/seg6.h b/include/net/seg6.h index 7c7b8ed39661..ff5da0ce83e9 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -16,6 +16,8 @@ #include #include +#include +#include static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, __be32 to) @@ -48,5 +50,9 @@ static inline struct seg6_pernet_data *seg6_pernet(struct net *net) extern int seg6_init(void); extern void seg6_exit(void); +extern int seg6_iptunnel_init(void); +extern void seg6_iptunnel_exit(void); + +extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); #endif diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index a478fe80e203..453cc6215bfd 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -9,6 +9,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_IP, LWTUNNEL_ENCAP_ILA, LWTUNNEL_ENCAP_IP6, + LWTUNNEL_ENCAP_SEG6, __LWTUNNEL_ENCAP_MAX, }; diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h new file mode 100644 index 000000000000..0f7dbd280a9c --- /dev/null +++ b/include/uapi/linux/seg6_iptunnel.h @@ -0,0 +1,44 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_IPTUNNEL_H +#define _UAPI_LINUX_SEG6_IPTUNNEL_H + +enum { + SEG6_IPTUNNEL_UNSPEC, + SEG6_IPTUNNEL_SRH, + __SEG6_IPTUNNEL_MAX, +}; +#define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1) + +struct seg6_iptunnel_encap { + int mode; + struct ipv6_sr_hdr srh[0]; +}; + +#define SEG6_IPTUN_ENCAP_SIZE(x) ((sizeof(*x)) + (((x)->srh->hdrlen + 1) << 3)) + +enum { + SEG6_IPTUN_MODE_INLINE, + SEG6_IPTUN_MODE_ENCAP, +}; + +static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) +{ + int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP); + + return ((tuninfo->srh->hdrlen + 1) << 3) + + (encap * sizeof(struct ipv6hdr)); +} + +#endif diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 88fd64250b02..03976e939818 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -39,6 +39,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "MPLS"; case LWTUNNEL_ENCAP_ILA: return "ILA"; + case LWTUNNEL_ENCAP_SEG6: + return "SEG6"; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: case LWTUNNEL_ENCAP_NONE: diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 2343e4f2e0bf..1123a001d729 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -289,4 +289,16 @@ config IPV6_PIMSM_V2 Support for IPv6 PIM multicast routing protocol PIM-SMv2. If unsure, say N. +config IPV6_SEG6_INLINE + bool "IPv6: direct Segment Routing Header insertion " + depends on IPV6 + ---help--- + Support for direct insertion of the Segment Routing Header, + also known as inline mode. Be aware that direct insertion of + extension headers (as opposed to encapsulation) may break + multiple mechanisms such as PMTUD or IPSec AH. Use this feature + only if you know exactly what you are doing. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index c92010d62afc..59ee92fb3689 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o seg6.o + udp_offload.o seg6.o seg6_iptunnel.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index e246b0ba12ac..9c78053e67e0 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -26,6 +26,43 @@ #include #include +bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len) +{ + int trailing; + unsigned int tlv_offset; + + if (srh->type != IPV6_SRCRT_TYPE_4) + return false; + + if (((srh->hdrlen + 1) << 3) != len) + return false; + + if (srh->segments_left != srh->first_segment) + return false; + + tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4); + + trailing = len - tlv_offset; + if (trailing < 0) + return false; + + while (trailing) { + struct sr6_tlv *tlv; + unsigned int tlv_len; + + tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset); + tlv_len = sizeof(*tlv) + tlv->len; + + trailing -= tlv_len; + if (trailing < 0) + return false; + + tlv_offset += tlv_len; + } + + return true; +} + static struct genl_family seg6_genl_family; static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = { @@ -198,10 +235,16 @@ int __init seg6_init(void) if (err) goto out_unregister_genl; + err = seg6_iptunnel_init(); + if (err) + goto out_unregister_pernet; + pr_info("Segment Routing with IPv6\n"); out: return err; +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); out_unregister_genl: genl_unregister_family(&seg6_genl_family); goto out; @@ -209,6 +252,7 @@ out_unregister_genl: void seg6_exit(void) { + seg6_iptunnel_exit(); unregister_pernet_subsys(&ip6_segments_ops); genl_unregister_family(&seg6_genl_family); } diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c new file mode 100644 index 000000000000..39762b2fa7a2 --- /dev/null +++ b/net/ipv6/seg6_iptunnel.c @@ -0,0 +1,410 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_DST_CACHE +#include +#endif + +struct seg6_lwt { +#ifdef CONFIG_DST_CACHE + struct dst_cache cache; +#endif + struct seg6_iptunnel_encap tuninfo[0]; +}; + +static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt) +{ + return (struct seg6_lwt *)lwt->data; +} + +static inline struct seg6_iptunnel_encap * +seg6_encap_lwtunnel(struct lwtunnel_state *lwt) +{ + return seg6_lwt_lwtunnel(lwt)->tuninfo; +} + +static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { + [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, +}; + +int nla_put_srh(struct sk_buff *skb, int attrtype, + struct seg6_iptunnel_encap *tuninfo) +{ + struct seg6_iptunnel_encap *data; + struct nlattr *nla; + int len; + + len = SEG6_IPTUN_ENCAP_SIZE(tuninfo); + + nla = nla_reserve(skb, attrtype, len); + if (!nla) + return -EMSGSIZE; + + data = nla_data(nla); + memcpy(data, tuninfo, len); + + return 0; +} + +static void set_tun_src(struct net *net, struct net_device *dev, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct in6_addr *tun_src; + + rcu_read_lock(); + + tun_src = rcu_dereference(sdata->tun_src); + + if (!ipv6_addr_any(tun_src)) { + memcpy(saddr, tun_src, sizeof(struct in6_addr)); + } else { + ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, + saddr); + } + + rcu_read_unlock(); +} + +/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ +static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +{ + struct net *net = dev_net(skb_dst(skb)->dev); + struct ipv6hdr *hdr, *inner_hdr; + struct ipv6_sr_hdr *isrh; + int hdrlen, tot_len, err; + + hdrlen = (osrh->hdrlen + 1) << 3; + tot_len = hdrlen + sizeof(*hdr); + + err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC); + if (unlikely(err)) + return err; + + inner_hdr = ipv6_hdr(skb); + + skb_push(skb, tot_len); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + hdr = ipv6_hdr(skb); + + /* inherit tc, flowlabel and hlim + * hlim will be decremented in ip6_forward() afterwards and + * decapsulation will overwrite inner hlim with outer hlim + */ + ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), + ip6_flowlabel(inner_hdr)); + hdr->hop_limit = inner_hdr->hop_limit; + hdr->nexthdr = NEXTHDR_ROUTING; + + isrh = (void *)hdr + sizeof(*hdr); + memcpy(isrh, osrh, hdrlen); + + isrh->nexthdr = NEXTHDR_IPV6; + + hdr->daddr = isrh->segments[isrh->first_segment]; + set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr); + + skb_postpush_rcsum(skb, hdr, tot_len); + + return 0; +} + +/* insert an SRH within an IPv6 packet, just after the IPv6 header */ +#ifdef CONFIG_IPV6_SEG6_INLINE +static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +{ + struct ipv6hdr *hdr, *oldhdr; + struct ipv6_sr_hdr *isrh; + int hdrlen, err; + + hdrlen = (osrh->hdrlen + 1) << 3; + + err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC); + if (unlikely(err)) + return err; + + oldhdr = ipv6_hdr(skb); + + skb_pull(skb, sizeof(struct ipv6hdr)); + skb_postpull_rcsum(skb, skb_network_header(skb), + sizeof(struct ipv6hdr)); + + skb_push(skb, sizeof(struct ipv6hdr) + hdrlen); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + hdr = ipv6_hdr(skb); + + memmove(hdr, oldhdr, sizeof(*hdr)); + + isrh = (void *)hdr + sizeof(*hdr); + memcpy(isrh, osrh, hdrlen); + + isrh->nexthdr = hdr->nexthdr; + hdr->nexthdr = NEXTHDR_ROUTING; + + isrh->segments[0] = hdr->daddr; + hdr->daddr = isrh->segments[isrh->first_segment]; + + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); + + return 0; +} +#endif + +static int seg6_do_srh(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct seg6_iptunnel_encap *tinfo; + int err = 0; + + tinfo = seg6_encap_lwtunnel(dst->lwtstate); + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + switch (tinfo->mode) { +#ifdef CONFIG_IPV6_SEG6_INLINE + case SEG6_IPTUN_MODE_INLINE: + err = seg6_do_srh_inline(skb, tinfo->srh); + skb_reset_inner_headers(skb); + break; +#endif + case SEG6_IPTUN_MODE_ENCAP: + err = seg6_do_srh_encap(skb, tinfo->srh); + break; + } + + if (err) + return err; + + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + skb_set_inner_protocol(skb, skb->protocol); + + return 0; +} + +int seg6_input(struct sk_buff *skb) +{ + int err; + + err = seg6_do_srh(skb); + if (unlikely(err)) { + kfree_skb(skb); + return err; + } + + skb_dst_drop(skb); + ip6_route_input(skb); + + return dst_input(skb); +} + +int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; + struct seg6_lwt *slwt; + int err = -EINVAL; + + err = seg6_do_srh(skb); + if (unlikely(err)) + goto drop; + + slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + +#ifdef CONFIG_DST_CACHE + dst = dst_cache_get(&slwt->cache); +#endif + + if (unlikely(!dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); + struct flowi6 fl6; + + fl6.daddr = hdr->daddr; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = hdr->nexthdr; + + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = dst->error; + dst_release(dst); + goto drop; + } + +#ifdef CONFIG_DST_CACHE + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); +#endif + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return dst_output(net, sk, skb); +drop: + kfree_skb(skb); + return err; +} + +static int seg6_build_state(struct net_device *dev, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts) +{ + struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1]; + struct seg6_iptunnel_encap *tuninfo; + struct lwtunnel_state *newts; + int tuninfo_len, min_size; + struct seg6_lwt *slwt; + int err; + + err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, + seg6_iptunnel_policy); + + if (err < 0) + return err; + + if (!tb[SEG6_IPTUNNEL_SRH]) + return -EINVAL; + + tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]); + tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]); + + /* tuninfo must contain at least the iptunnel encap structure, + * the SRH and one segment + */ + min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) + + sizeof(struct in6_addr); + if (tuninfo_len < min_size) + return -EINVAL; + + switch (tuninfo->mode) { +#ifdef CONFIG_IPV6_SEG6_INLINE + case SEG6_IPTUN_MODE_INLINE: + break; +#endif + case SEG6_IPTUN_MODE_ENCAP: + break; + default: + return -EINVAL; + } + + /* verify that SRH is consistent */ + if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo))) + return -EINVAL; + + newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt)); + if (!newts) + return -ENOMEM; + + slwt = seg6_lwt_lwtunnel(newts); + +#ifdef CONFIG_DST_CACHE + err = dst_cache_init(&slwt->cache, GFP_KERNEL); + if (err) { + kfree(newts); + return err; + } +#endif + + memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); + + newts->type = LWTUNNEL_ENCAP_SEG6; + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | + LWTUNNEL_STATE_INPUT_REDIRECT; + newts->headroom = seg6_lwt_headroom(tuninfo); + + *ts = newts; + + return 0; +} + +#ifdef CONFIG_DST_CACHE +static void seg6_destroy_state(struct lwtunnel_state *lwt) +{ + dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); +} +#endif + +static int seg6_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + + if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) + return -EMSGSIZE; + + return 0; +} + +static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + + return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); +} + +static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); + struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); + int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); + + if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) + return 1; + + return memcmp(a_hdr, b_hdr, len); +} + +static const struct lwtunnel_encap_ops seg6_iptun_ops = { + .build_state = seg6_build_state, +#ifdef CONFIG_DST_CACHE + .destroy_state = seg6_destroy_state, +#endif + .output = seg6_output, + .input = seg6_input, + .fill_encap = seg6_fill_encap_info, + .get_encap_size = seg6_encap_nlsize, + .cmp_encap = seg6_encap_cmp, +}; + +int __init seg6_iptunnel_init(void) +{ + return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); +} + +void seg6_iptunnel_exit(void) +{ + lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); +} -- cgit v1.2.3 From bf355b8d2c30a289232042cacc1cfaea4923936c Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:42 +0100 Subject: ipv6: sr: add core files for SR HMAC support This patch adds the necessary functions to compute and check the HMAC signature of an SR-enabled packet. Two HMAC algorithms are supported: hmac(sha1) and hmac(sha256). In order to avoid dynamic memory allocation for each HMAC computation, a per-cpu ring buffer is allocated for this purpose. A new per-interface sysctl called seg6_require_hmac is added, allowing a user-defined policy for processing HMAC-signed SR-enabled packets. A value of -1 means that the HMAC field will always be ignored. A value of 0 means that if an HMAC field is present, its validity will be enforced (the packet is dropped is the signature is incorrect). Finally, a value of 1 means that any SR-enabled packet that does not contain an HMAC signature or whose signature is incorrect will be dropped. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 + include/linux/seg6_hmac.h | 6 + include/net/seg6.h | 4 + include/net/seg6_hmac.h | 62 ++++++ include/uapi/linux/ipv6.h | 1 + include/uapi/linux/seg6_hmac.h | 21 ++ net/ipv6/Kconfig | 12 + net/ipv6/Makefile | 1 + net/ipv6/addrconf.c | 18 ++ net/ipv6/seg6_hmac.c | 484 +++++++++++++++++++++++++++++++++++++++++ 10 files changed, 612 insertions(+) create mode 100644 include/linux/seg6_hmac.h create mode 100644 include/net/seg6_hmac.h create mode 100644 include/uapi/linux/seg6_hmac.h create mode 100644 net/ipv6/seg6_hmac.c (limited to 'include/net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 68d3f71f0abf..93756585521f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -65,6 +65,9 @@ struct ipv6_devconf { __s32 use_oif_addrs_only; __s32 keep_addr_on_down; __s32 seg6_enabled; +#ifdef CONFIG_IPV6_SEG6_HMAC + __s32 seg6_require_hmac; +#endif struct ctl_table_header *sysctl_header; }; diff --git a/include/linux/seg6_hmac.h b/include/linux/seg6_hmac.h new file mode 100644 index 000000000000..da437ebdc6cd --- /dev/null +++ b/include/linux/seg6_hmac.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_HMAC_H +#define _LINUX_SEG6_HMAC_H + +#include + +#endif diff --git a/include/net/seg6.h b/include/net/seg6.h index ff5da0ce83e9..4e0357517d79 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -18,6 +18,7 @@ #include #include #include +#include static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, __be32 to) @@ -41,6 +42,9 @@ static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from, struct seg6_pernet_data { struct mutex lock; struct in6_addr __rcu *tun_src; +#ifdef CONFIG_IPV6_SEG6_HMAC + struct rhashtable hmac_infos; +#endif }; static inline struct seg6_pernet_data *seg6_pernet(struct net *net) diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h new file mode 100644 index 000000000000..69c3a106056b --- /dev/null +++ b/include/net/seg6_hmac.h @@ -0,0 +1,62 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SEG6_HMAC_H +#define _NET_SEG6_HMAC_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SEG6_HMAC_MAX_DIGESTSIZE 160 +#define SEG6_HMAC_RING_SIZE 256 + +struct seg6_hmac_info { + struct rhash_head node; + struct rcu_head rcu; + + u32 hmackeyid; + char secret[SEG6_HMAC_SECRET_LEN]; + u8 slen; + u8 alg_id; +}; + +struct seg6_hmac_algo { + u8 alg_id; + char name[64]; + struct crypto_shash * __percpu *tfms; + struct shash_desc * __percpu *shashs; +}; + +extern int seg6_hmac_compute(struct seg6_hmac_info *hinfo, + struct ipv6_sr_hdr *hdr, struct in6_addr *saddr, + u8 *output); +extern struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key); +extern int seg6_hmac_info_add(struct net *net, u32 key, + struct seg6_hmac_info *hinfo); +extern int seg6_hmac_info_del(struct net *net, u32 key); +extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr, + struct ipv6_sr_hdr *srh); +extern bool seg6_hmac_validate_skb(struct sk_buff *skb); +extern int seg6_hmac_init(void); +extern void seg6_hmac_exit(void); +extern int seg6_hmac_net_init(struct net *net); +extern void seg6_hmac_net_exit(struct net *net); + +#endif diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 7ff1d654e333..53561be1ac21 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -180,6 +180,7 @@ enum { DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_RTR_SOLICIT_MAX_INTERVAL, DEVCONF_SEG6_ENABLED, + DEVCONF_SEG6_REQUIRE_HMAC, DEVCONF_MAX }; diff --git a/include/uapi/linux/seg6_hmac.h b/include/uapi/linux/seg6_hmac.h new file mode 100644 index 000000000000..b652dfd51bc5 --- /dev/null +++ b/include/uapi/linux/seg6_hmac.h @@ -0,0 +1,21 @@ +#ifndef _UAPI_LINUX_SEG6_HMAC_H +#define _UAPI_LINUX_SEG6_HMAC_H + +#include + +#define SEG6_HMAC_SECRET_LEN 64 +#define SEG6_HMAC_FIELD_LEN 32 + +struct sr6_tlv_hmac { + struct sr6_tlv tlvhdr; + __u16 reserved; + __be32 hmackeyid; + __u8 hmac[SEG6_HMAC_FIELD_LEN]; +}; + +enum { + SEG6_HMAC_ALGO_SHA1 = 1, + SEG6_HMAC_ALGO_SHA256 = 2, +}; + +#endif diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 1123a001d729..0f00811a785f 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -301,4 +301,16 @@ config IPV6_SEG6_INLINE If unsure, say N. +config IPV6_SEG6_HMAC + bool "IPv6: Segment Routing HMAC support" + depends on IPV6 + select CRYPTO_HMAC + select CRYPTO_SHA1 + select CRYPTO_SHA256 + ---help--- + Support for HMAC signature generation and verification + of SR-enabled packets. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 59ee92fb3689..129cad2ba960 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-$(CONFIG_IPV6_FOU) += fou6.o +obj-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2ac6cb460af0..86219c0a0104 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -239,6 +239,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, .seg6_enabled = 0, +#ifdef CONFIG_IPV6_SEG6_HMAC + .seg6_require_hmac = 0, +#endif }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -286,6 +289,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, .seg6_enabled = 0, +#ifdef CONFIG_IPV6_SEG6_HMAC + .seg6_require_hmac = 0, +#endif }; /* Check if a valid qdisc is available */ @@ -4947,6 +4953,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled; +#ifdef CONFIG_IPV6_SEG6_HMAC + array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac; +#endif } static inline size_t inet6_ifla6_size(void) @@ -6045,6 +6054,15 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_IPV6_SEG6_HMAC + { + .procname = "seg6_require_hmac", + .data = &ipv6_devconf.seg6_require_hmac, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif { /* sentinel */ } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c new file mode 100644 index 000000000000..ef1c8a46e7ac --- /dev/null +++ b/net/ipv6/seg6_hmac.c @@ -0,0 +1,484 @@ +/* + * SR-IPv6 implementation -- HMAC functions + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static char * __percpu *hmac_ring; + +static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct seg6_hmac_info *hinfo = obj; + + return (hinfo->hmackeyid != *(__u32 *)arg->key); +} + +static inline void seg6_hinfo_release(struct seg6_hmac_info *hinfo) +{ + kfree_rcu(hinfo, rcu); +} + +static void seg6_free_hi(void *ptr, void *arg) +{ + struct seg6_hmac_info *hinfo = (struct seg6_hmac_info *)ptr; + + if (hinfo) + seg6_hinfo_release(hinfo); +} + +static const struct rhashtable_params rht_params = { + .head_offset = offsetof(struct seg6_hmac_info, node), + .key_offset = offsetof(struct seg6_hmac_info, hmackeyid), + .key_len = sizeof(u32), + .automatic_shrinking = true, + .obj_cmpfn = seg6_hmac_cmpfn, +}; + +static struct seg6_hmac_algo hmac_algos[] = { + { + .alg_id = SEG6_HMAC_ALGO_SHA1, + .name = "hmac(sha1)", + }, + { + .alg_id = SEG6_HMAC_ALGO_SHA256, + .name = "hmac(sha256)", + }, +}; + +static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh) +{ + struct sr6_tlv_hmac *tlv; + + if (srh->hdrlen < (srh->first_segment + 1) * 2 + 5) + return NULL; + + if (!sr_has_hmac(srh)) + return NULL; + + tlv = (struct sr6_tlv_hmac *) + ((char *)srh + ((srh->hdrlen + 1) << 3) - 40); + + if (tlv->tlvhdr.type != SR6_TLV_HMAC || tlv->tlvhdr.len != 38) + return NULL; + + return tlv; +} + +static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id) +{ + struct seg6_hmac_algo *algo; + int i, alg_count; + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + for (i = 0; i < alg_count; i++) { + algo = &hmac_algos[i]; + if (algo->alg_id == alg_id) + return algo; + } + + return NULL; +} + +static int __do_hmac(struct seg6_hmac_info *hinfo, const char *text, u8 psize, + u8 *output, int outlen) +{ + struct seg6_hmac_algo *algo; + struct crypto_shash *tfm; + struct shash_desc *shash; + int ret, dgsize; + + algo = __hmac_get_algo(hinfo->alg_id); + if (!algo) + return -ENOENT; + + tfm = *this_cpu_ptr(algo->tfms); + + dgsize = crypto_shash_digestsize(tfm); + if (dgsize > outlen) { + pr_debug("sr-ipv6: __do_hmac: digest size too big (%d / %d)\n", + dgsize, outlen); + return -ENOMEM; + } + + ret = crypto_shash_setkey(tfm, hinfo->secret, hinfo->slen); + if (ret < 0) { + pr_debug("sr-ipv6: crypto_shash_setkey failed: err %d\n", ret); + goto failed; + } + + shash = *this_cpu_ptr(algo->shashs); + shash->tfm = tfm; + + ret = crypto_shash_digest(shash, text, psize, output); + if (ret < 0) { + pr_debug("sr-ipv6: crypto_shash_digest failed: err %d\n", ret); + goto failed; + } + + return dgsize; + +failed: + return ret; +} + +int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, + struct in6_addr *saddr, u8 *output) +{ + __be32 hmackeyid = cpu_to_be32(hinfo->hmackeyid); + u8 tmp_out[SEG6_HMAC_MAX_DIGESTSIZE]; + int plen, i, dgsize, wrsize; + char *ring, *off; + + /* a 160-byte buffer for digest output allows to store highest known + * hash function (RadioGatun) with up to 1216 bits + */ + + /* saddr(16) + first_seg(1) + cleanup(1) + keyid(4) + seglist(16n) */ + plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16; + + /* this limit allows for 14 segments */ + if (plen >= SEG6_HMAC_RING_SIZE) + return -EMSGSIZE; + + /* Let's build the HMAC text on the ring buffer. The text is composed + * as follows, in order: + * + * 1. Source IPv6 address (128 bits) + * 2. first_segment value (8 bits) + * 3. cleanup flag (8 bits: highest bit is cleanup value, others are 0) + * 4. HMAC Key ID (32 bits) + * 5. All segments in the segments list (n * 128 bits) + */ + + local_bh_disable(); + ring = *this_cpu_ptr(hmac_ring); + off = ring; + + /* source address */ + memcpy(off, saddr, 16); + off += 16; + + /* first_segment value */ + *off++ = hdr->first_segment; + + /* cleanup flag */ + *off++ = !!(sr_has_cleanup(hdr)) << 7; + + /* HMAC Key ID */ + memcpy(off, &hmackeyid, 4); + off += 4; + + /* all segments in the list */ + for (i = 0; i < hdr->first_segment + 1; i++) { + memcpy(off, hdr->segments + i, 16); + off += 16; + } + + dgsize = __do_hmac(hinfo, ring, plen, tmp_out, + SEG6_HMAC_MAX_DIGESTSIZE); + local_bh_enable(); + + if (dgsize < 0) + return dgsize; + + wrsize = SEG6_HMAC_FIELD_LEN; + if (wrsize > dgsize) + wrsize = dgsize; + + memset(output, 0, SEG6_HMAC_FIELD_LEN); + memcpy(output, tmp_out, wrsize); + + return 0; +} +EXPORT_SYMBOL(seg6_hmac_compute); + +/* checks if an incoming SR-enabled packet's HMAC status matches + * the incoming policy. + * + * called with rcu_read_lock() + */ +bool seg6_hmac_validate_skb(struct sk_buff *skb) +{ + u8 hmac_output[SEG6_HMAC_FIELD_LEN]; + struct net *net = dev_net(skb->dev); + struct seg6_hmac_info *hinfo; + struct sr6_tlv_hmac *tlv; + struct ipv6_sr_hdr *srh; + struct inet6_dev *idev; + + idev = __in6_dev_get(skb->dev); + + srh = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + tlv = seg6_get_tlv_hmac(srh); + + /* mandatory check but no tlv */ + if (idev->cnf.seg6_require_hmac > 0 && !tlv) + return false; + + /* no check */ + if (idev->cnf.seg6_require_hmac < 0) + return true; + + /* check only if present */ + if (idev->cnf.seg6_require_hmac == 0 && !tlv) + return true; + + /* now, seg6_require_hmac >= 0 && tlv */ + + hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); + if (!hinfo) + return false; + + if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output)) + return false; + + if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0) + return false; + + return true; +} +EXPORT_SYMBOL(seg6_hmac_validate_skb); + +/* called with rcu_read_lock() */ +struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct seg6_hmac_info *hinfo; + + hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); + + return hinfo; +} +EXPORT_SYMBOL(seg6_hmac_info_lookup); + +int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + int err; + + err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node, + rht_params); + + return err; +} +EXPORT_SYMBOL(seg6_hmac_info_add); + +int seg6_hmac_info_del(struct net *net, u32 key) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct seg6_hmac_info *hinfo; + int err = -ENOENT; + + hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); + if (!hinfo) + goto out; + + err = rhashtable_remove_fast(&sdata->hmac_infos, &hinfo->node, + rht_params); + if (err) + goto out; + + seg6_hinfo_release(hinfo); + +out: + return err; +} +EXPORT_SYMBOL(seg6_hmac_info_del); + +int seg6_push_hmac(struct net *net, struct in6_addr *saddr, + struct ipv6_sr_hdr *srh) +{ + struct seg6_hmac_info *hinfo; + struct sr6_tlv_hmac *tlv; + int err = -ENOENT; + + tlv = seg6_get_tlv_hmac(srh); + if (!tlv) + return -EINVAL; + + rcu_read_lock(); + + hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); + if (!hinfo) + goto out; + + memset(tlv->hmac, 0, SEG6_HMAC_FIELD_LEN); + err = seg6_hmac_compute(hinfo, srh, saddr, tlv->hmac); + +out: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(seg6_push_hmac); + +static int seg6_hmac_init_ring(void) +{ + int i; + + hmac_ring = alloc_percpu(char *); + + if (!hmac_ring) + return -ENOMEM; + + for_each_possible_cpu(i) { + char *ring = kzalloc(SEG6_HMAC_RING_SIZE, GFP_KERNEL); + + if (!ring) + return -ENOMEM; + + *per_cpu_ptr(hmac_ring, i) = ring; + } + + return 0; +} + +static int seg6_hmac_init_algo(void) +{ + struct seg6_hmac_algo *algo; + struct crypto_shash *tfm; + struct shash_desc *shash; + int i, alg_count, cpu; + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + + for (i = 0; i < alg_count; i++) { + struct crypto_shash **p_tfm; + int shsize; + + algo = &hmac_algos[i]; + algo->tfms = alloc_percpu(struct crypto_shash *); + if (!algo->tfms) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + p_tfm = per_cpu_ptr(algo->tfms, cpu); + *p_tfm = tfm; + } + + p_tfm = this_cpu_ptr(algo->tfms); + tfm = *p_tfm; + + shsize = sizeof(*shash) + crypto_shash_descsize(tfm); + + algo->shashs = alloc_percpu(struct shash_desc *); + if (!algo->shashs) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + shash = kzalloc(shsize, GFP_KERNEL); + if (!shash) + return -ENOMEM; + *per_cpu_ptr(algo->shashs, cpu) = shash; + } + } + + return 0; +} + +int __init seg6_hmac_init(void) +{ + int ret; + + ret = seg6_hmac_init_ring(); + if (ret < 0) + goto out; + + ret = seg6_hmac_init_algo(); + +out: + return ret; +} +EXPORT_SYMBOL(seg6_hmac_init); + +int __net_init seg6_hmac_net_init(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + rhashtable_init(&sdata->hmac_infos, &rht_params); + + return 0; +} +EXPORT_SYMBOL(seg6_hmac_net_init); + +void seg6_hmac_exit(void) +{ + struct seg6_hmac_algo *algo = NULL; + int i, alg_count, cpu; + + for_each_possible_cpu(i) { + char *ring = *per_cpu_ptr(hmac_ring, i); + + kfree(ring); + } + free_percpu(hmac_ring); + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + for (i = 0; i < alg_count; i++) { + algo = &hmac_algos[i]; + for_each_possible_cpu(cpu) { + struct crypto_shash *tfm; + struct shash_desc *shash; + + shash = *per_cpu_ptr(algo->shashs, cpu); + kfree(shash); + tfm = *per_cpu_ptr(algo->tfms, cpu); + crypto_free_shash(tfm); + } + free_percpu(algo->tfms); + free_percpu(algo->shashs); + } +} +EXPORT_SYMBOL(seg6_hmac_exit); + +void __net_exit seg6_hmac_net_exit(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + rhashtable_free_and_destroy(&sdata->hmac_infos, seg6_free_hi, NULL); +} +EXPORT_SYMBOL(seg6_hmac_net_exit); -- cgit v1.2.3 From 613fa3ca9e9e6af57927dab238121010c510fe4c Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:59:20 +0100 Subject: ipv6: add source address argument for ipv6_push_nfrag_opts This patch prepares for insertion of SRH through setsockopt(). The new source address argument is used when an HMAC field is present in the SRH, which must be filled. The HMAC signature process requires the source address as input text. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 ++- net/ipv6/exthdrs.c | 6 +++--- net/ipv6/ip6_output.c | 5 +++-- net/ipv6/ip6_tunnel.c | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8fed1cd78658..0a3622bf086f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -932,7 +932,8 @@ int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); */ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, - u8 *proto, struct in6_addr **daddr_p); + u8 *proto, struct in6_addr **daddr_p, + struct in6_addr *saddr); void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 541cfa6345e5..72aadce18b5f 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -866,7 +866,7 @@ int ipv6_parse_hopopts(struct sk_buff *skb) static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, struct ipv6_rt_hdr *opt, - struct in6_addr **addr_p) + struct in6_addr **addr_p, struct in6_addr *saddr) { struct rt0_hdr *phdr, *ihdr; int hops; @@ -900,10 +900,10 @@ static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto, - struct in6_addr **daddr) + struct in6_addr **daddr, struct in6_addr *saddr) { if (opt->srcrt) { - ipv6_push_rthdr(skb, proto, opt->srcrt, daddr); + ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr); /* * IPV6_RTHDRDSTOPTS is ignored * unless IPV6_RTHDR is set (RFC3542). diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6001e781164e..ddc878d2cc6d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -203,7 +203,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt->opt_nflen) - ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop); + ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, + &fl6->saddr); } skb_push(skb, sizeof(struct ipv6hdr)); @@ -1672,7 +1673,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) - ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst); + ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index bfa889c2a87b..259e8507d2cd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1157,7 +1157,7 @@ route_lookup: if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); - ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); + ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL, NULL); } /* Calculate max headroom for all the headers and adjust -- cgit v1.2.3 From f41cd11d64b2b21012eb4abffbe579bc0b90467f Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 8 Nov 2016 17:24:03 +0200 Subject: tc_act: Remove tcf_act macro tc_act macro addressed a non existing field, and was not used in the kernel source. Signed-off-by: Yotam Gigi Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/act_api.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 82f3c912a5b1..d8eae87ea778 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -42,7 +42,6 @@ struct tc_action { struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; }; -#define tcf_act common.tcfa_act #define tcf_head common.tcfa_head #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt -- cgit v1.2.3 From 98e4321b97cc790c6aac3fb7c92bbf13212452ee Mon Sep 17 00:00:00 2001 From: David S. Miller Date: Sun, 13 Nov 2016 12:14:59 -0500 Subject: genetlink: Make family a signed integer. The idr_alloc(), idr_remove(), et al. routines all expect IDs to be signed integers. Therefore make the genl_family member 'id' signed too. Signed-off-by: David S. Miller --- include/net/genetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 3ec87bacc0f5..a34275be3600 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -48,7 +48,7 @@ struct genl_info; * @n_ops: number of operations supported by this family */ struct genl_family { - unsigned int id; /* private */ + int id; /* private */ unsigned int hdrsize; char name[GENL_NAMSIZ]; unsigned int version; -- cgit v1.2.3 From ac6e780070e30e4c35bd395acfe9191e6268bdd3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Nov 2016 13:12:35 -0800 Subject: tcp: take care of truncations done by sk_filter() With syzkaller help, Marco Grassi found a bug in TCP stack, crashing in tcp_collapse() Root cause is that sk_filter() can truncate the incoming skb, but TCP stack was not really expecting this to happen. It probably was expecting a simple DROP or ACCEPT behavior. We first need to make sure no part of TCP header could be removed. Then we need to adjust TCP_SKB_CB(skb)->end_seq Many thanks to syzkaller team and Marco for giving us a reproducer. Signed-off-by: Eric Dumazet Reported-by: Marco Grassi Reported-by: Vladis Dronov Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 19 ++++++++++++++++++- net/ipv6/tcp_ipv6.c | 6 ++++-- 3 files changed, 23 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 304a8e17bc87..123979fe12bf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1220,6 +1220,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); +int tcp_filter(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 61b7be303eec..2259114c7242 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1564,6 +1564,21 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_add_backlog); +int tcp_filter(struct sock *sk, struct sk_buff *skb) +{ + struct tcphdr *th = (struct tcphdr *)skb->data; + unsigned int eaten = skb->len; + int err; + + err = sk_filter_trim_cap(sk, skb, th->doff * 4); + if (!err) { + eaten -= skb->len; + TCP_SKB_CB(skb)->end_seq -= eaten; + } + return err; +} +EXPORT_SYMBOL(tcp_filter); + /* * From tcp_input.c */ @@ -1676,8 +1691,10 @@ process: nf_reset(skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + iph = ip_hdr(skb); skb->dev = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6ca23c2e76f7..b9f1fee9a886 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1229,7 +1229,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard; /* @@ -1457,8 +1457,10 @@ process: if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard_and_relse; - if (sk_filter(sk, skb)) + if (tcp_filter(sk, skb)) goto discard_and_relse; + th = (const struct tcphdr *)skb->data; + hdr = ipv6_hdr(skb); skb->dev = NULL; -- cgit v1.2.3 From 7e416ad7416307e22871a0ef0f0f14e2bb66a0d1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 10 Nov 2016 14:17:01 +0100 Subject: netfilter: conntrack: remove unused netns_ct member since 23014011ba420 ('netfilter: conntrack: support a fixed size of 128 distinct labels') this isn't needed anymore. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index e469e85de3f9..3d06d94d2e52 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -91,7 +91,6 @@ struct netns_ct { struct nf_ip_net nf_ct_proto; #if defined(CONFIG_NF_CONNTRACK_LABELS) unsigned int labels_used; - u8 label_words; #endif }; #endif -- cgit v1.2.3 From d9dc8b0f8b4ec8cdc48ad5a20a3105387138be82 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 11 Nov 2016 10:20:50 -0800 Subject: net: fix sleeping for sk_wait_event() Similar to commit 14135f30e33c ("inet: fix sleeping inside inet_wait_for_connect()"), sk_wait_event() needs to fix too, because release_sock() is blocking, it changes the process state back to running after sleep, which breaks the previous prepare_to_wait(). Switch to the new wait API. Cc: Eric Dumazet Cc: Peter Zijlstra Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- crypto/algif_aead.c | 9 ++++----- crypto/algif_skcipher.c | 18 +++++++++--------- include/net/sock.h | 8 +++++--- net/core/sock.c | 8 ++++---- net/core/stream.c | 28 ++++++++++++++-------------- net/decnet/af_decnet.c | 16 ++++++++-------- net/llc/af_llc.c | 24 ++++++++++++------------ net/phonet/pep.c | 9 ++++----- net/tipc/socket.c | 24 ++++++++++++------------ net/vmw_vsock/virtio_transport_common.c | 10 +++++----- 10 files changed, 77 insertions(+), 77 deletions(-) (limited to 'include/net') diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 80a0f1a78551..8948392c0525 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -132,28 +132,27 @@ static void aead_wmem_wakeup(struct sock *sk) static int aead_wait_for_data(struct sock *sk, unsigned flags) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct alg_sock *ask = alg_sk(sk); struct aead_ctx *ctx = ask->private; long timeout; - DEFINE_WAIT(wait); int err = -ERESTARTSYS; if (flags & MSG_DONTWAIT) return -EAGAIN; sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - + add_wait_queue(sk_sleep(sk), &wait); for (;;) { if (signal_pending(current)) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); timeout = MAX_SCHEDULE_TIMEOUT; - if (sk_wait_event(sk, &timeout, !ctx->more)) { + if (sk_wait_event(sk, &timeout, !ctx->more, &wait)) { err = 0; break; } } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 28556fce4267..1e38aaa8303e 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -199,26 +199,26 @@ static void skcipher_free_sgl(struct sock *sk) static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags) { - long timeout; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err = -ERESTARTSYS; + long timeout; if (flags & MSG_DONTWAIT) return -EAGAIN; sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); + add_wait_queue(sk_sleep(sk), &wait); for (;;) { if (signal_pending(current)) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); timeout = MAX_SCHEDULE_TIMEOUT; - if (sk_wait_event(sk, &timeout, skcipher_writable(sk))) { + if (sk_wait_event(sk, &timeout, skcipher_writable(sk), &wait)) { err = 0; break; } } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return err; } @@ -242,10 +242,10 @@ static void skcipher_wmem_wakeup(struct sock *sk) static int skcipher_wait_for_data(struct sock *sk, unsigned flags) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct alg_sock *ask = alg_sk(sk); struct skcipher_ctx *ctx = ask->private; long timeout; - DEFINE_WAIT(wait); int err = -ERESTARTSYS; if (flags & MSG_DONTWAIT) { @@ -254,17 +254,17 @@ static int skcipher_wait_for_data(struct sock *sk, unsigned flags) sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + add_wait_queue(sk_sleep(sk), &wait); for (;;) { if (signal_pending(current)) break; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); timeout = MAX_SCHEDULE_TIMEOUT; - if (sk_wait_event(sk, &timeout, ctx->used)) { + if (sk_wait_event(sk, &timeout, ctx->used, &wait)) { err = 0; break; } } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); diff --git a/include/net/sock.h b/include/net/sock.h index cf617ee16723..9d905ed0cd25 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -915,14 +915,16 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) #endif } -#define sk_wait_event(__sk, __timeo, __condition) \ +#define sk_wait_event(__sk, __timeo, __condition, __wait) \ ({ int __rc; \ release_sock(__sk); \ __rc = __condition; \ if (!__rc) { \ - *(__timeo) = schedule_timeout(*(__timeo)); \ + *(__timeo) = wait_woken(__wait, \ + TASK_INTERRUPTIBLE, \ + *(__timeo)); \ } \ - sched_annotate_sleep(); \ + sched_annotate_sleep(); \ lock_sock(__sk); \ __rc = __condition; \ __rc; \ diff --git a/net/core/sock.c b/net/core/sock.c index 40dbc13453f9..0397928dfdc2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2078,14 +2078,14 @@ void __sk_flush_backlog(struct sock *sk) */ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); int rc; - DEFINE_WAIT(wait); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); + rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return rc; } EXPORT_SYMBOL(sk_wait_data); diff --git a/net/core/stream.c b/net/core/stream.c index 1086c8b280a8..f575bcf64af2 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -53,8 +53,8 @@ void sk_stream_write_space(struct sock *sk) */ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct task_struct *tsk = current; - DEFINE_WAIT(wait); int done; do { @@ -68,13 +68,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) if (signal_pending(tsk)) return sock_intr_errno(*timeo_p); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending++; done = sk_wait_event(sk, timeo_p, !sk->sk_err && !((1 << sk->sk_state) & - ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); - finish_wait(sk_sleep(sk), &wait); + ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait); + remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending--; } while (!done); return 0; @@ -94,16 +94,16 @@ static inline int sk_stream_closing(struct sock *sk) void sk_stream_wait_close(struct sock *sk, long timeout) { if (timeout) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); + + add_wait_queue(sk_sleep(sk), &wait); do { - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); - if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk))) + if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk), &wait)) break; } while (!signal_pending(current) && timeout); - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); } } EXPORT_SYMBOL(sk_stream_wait_close); @@ -119,16 +119,16 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) long vm_wait = 0; long current_timeo = *timeo_p; bool noblock = (*timeo_p ? false : true); - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); if (sk_stream_memory_free(sk)) current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2; + add_wait_queue(sk_sleep(sk), &wait); + while (1) { sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; if (!*timeo_p) { @@ -147,7 +147,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) sk_wait_event(sk, ¤t_timeo, sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || (sk_stream_memory_free(sk) && - !vm_wait)); + !vm_wait), &wait); sk->sk_write_pending--; if (vm_wait) { @@ -161,7 +161,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) *timeo_p = current_timeo; } out: - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return err; do_error: diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 13d6b1a6e0fc..a90ed67027b0 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1718,7 +1718,7 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, * See if there is data ready to read, sleep if there isn't */ for(;;) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); if (sk->sk_err) goto out; @@ -1749,11 +1749,11 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, goto out; } - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target)); + sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); } skb_queue_walk_safe(queue, skb, n) { @@ -1999,19 +1999,19 @@ static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) * size. */ if (dn_queue_too_long(scp, queue, flags)) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); if (flags & MSG_DONTWAIT) { err = -EWOULDBLOCK; goto out; } - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); sk_wait_event(sk, &timeo, - !dn_queue_too_long(scp, queue, flags)); + !dn_queue_too_long(scp, queue, flags), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); continue; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index db916cf51ffe..5e9296382420 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -532,12 +532,12 @@ out: static int llc_ui_wait_for_disc(struct sock *sk, long timeout) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int rc = 0; + add_wait_queue(sk_sleep(sk), &wait); while (1) { - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE)) + if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE, &wait)) break; rc = -ERESTARTSYS; if (signal_pending(current)) @@ -547,39 +547,39 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout) break; rc = 0; } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return rc; } static bool llc_ui_wait_for_conn(struct sock *sk, long timeout) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); + add_wait_queue(sk_sleep(sk), &wait); while (1) { - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT)) + if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT, &wait)) break; if (signal_pending(current) || !timeout) break; } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return timeout; } static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct llc_sock *llc = llc_sk(sk); int rc; + add_wait_queue(sk_sleep(sk), &wait); while (1) { - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); rc = 0; if (sk_wait_event(sk, &timeout, (sk->sk_shutdown & RCV_SHUTDOWN) || (!llc_data_accept_state(llc->state) && !llc->remote_busy_flag && - !llc->p_flag))) + !llc->p_flag), &wait)) break; rc = -ERESTARTSYS; if (signal_pending(current)) @@ -588,7 +588,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) if (!timeout) break; } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); return rc; } diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 850a86cde0b3..8bad5624a27a 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1167,7 +1167,7 @@ disabled: /* Wait until flow control allows TX */ done = atomic_read(&pn->tx_credits); while (!done) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); if (!timeo) { err = -EAGAIN; @@ -1178,10 +1178,9 @@ disabled: goto out; } - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits)); - finish_wait(sk_sleep(sk), &wait); + add_wait_queue(sk_sleep(sk), &wait); + done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits), &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (sk->sk_state != TCP_ESTABLISHED) goto disabled; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 149396366e80..22d92f0ec5ac 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -876,9 +876,9 @@ exit: static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - DEFINE_WAIT(wait); int done; do { @@ -892,9 +892,9 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) if (signal_pending(current)) return sock_intr_errno(*timeo_p); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, timeo_p, !tsk->link_cong); - finish_wait(sk_sleep(sk), &wait); + add_wait_queue(sk_sleep(sk), &wait); + done = sk_wait_event(sk, timeo_p, !tsk->link_cong, &wait); + remove_wait_queue(sk_sleep(sk), &wait); } while (!done); return 0; } @@ -1031,9 +1031,9 @@ new_mtu: static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - DEFINE_WAIT(wait); int done; do { @@ -1049,12 +1049,12 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) if (signal_pending(current)) return sock_intr_errno(*timeo_p); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); done = sk_wait_event(sk, timeo_p, (!tsk->link_cong && !tsk_conn_cong(tsk)) || - !tipc_sk_connected(sk)); - finish_wait(sk_sleep(sk), &wait); + !tipc_sk_connected(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); } while (!done); return 0; } @@ -1929,8 +1929,8 @@ xmit: static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk; - DEFINE_WAIT(wait); int done; do { @@ -1942,10 +1942,10 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) if (signal_pending(current)) return sock_intr_errno(*timeo_p); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); done = sk_wait_event(sk, timeo_p, - sk->sk_state != TIPC_CONNECTING); - finish_wait(sk_sleep(sk), &wait); + sk->sk_state != TIPC_CONNECTING, &wait); + remove_wait_queue(sk_sleep(sk), &wait); } while (!done); return 0; } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a53b3a16b4f1..687e9fdb3d67 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -619,17 +619,17 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) static void virtio_transport_wait_close(struct sock *sk, long timeout) { if (timeout) { - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); + + add_wait_queue(sk_sleep(sk), &wait); do { - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); if (sk_wait_event(sk, &timeout, - sock_flag(sk, SOCK_DONE))) + sock_flag(sk, SOCK_DONE), &wait)) break; } while (!signal_pending(current) && timeout); - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); } } -- cgit v1.2.3 From c915fe13cbaae5c7aa7b44f367d05addd60c9008 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 15 Nov 2016 16:37:53 +0100 Subject: udplite: fix NULL pointer dereference The commit 850cbaddb52d ("udp: use it's own memory accounting schema") assumes that the socket proto has memory accounting enabled, but this is not the case for UDPLITE. Fix it enabling memory accounting for UDPLITE and performing fwd allocated memory reclaiming on socket shutdown. UDP and UDPLITE share now the same memory accounting limits. Also drop the backlog receive operation, since is no more needed. Fixes: 850cbaddb52d ("udp: use it's own memory accounting schema") Reported-by: Andrei Vagin Suggested-by: Eric Dumazet Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/udp.h | 1 + include/net/udplite.h | 1 + net/ipv4/udp.c | 3 ++- net/ipv4/udplite.c | 3 ++- net/ipv6/udplite.c | 3 ++- 5 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index e6e4e19be387..1661791e8ca1 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -246,6 +246,7 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, } /* net/ipv4/udp.c */ +void udp_destruct_sock(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/udplite.h b/include/net/udplite.h index 80761938b9a7..36097d388219 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -27,6 +27,7 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, static inline int udplite_sk_init(struct sock *sk) { udp_sk(sk)->pcflag = UDPLITE_BIT; + sk->sk_destruct = udp_destruct_sock; return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c827e4ea509e..9ae7c63a8b13 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1253,7 +1253,7 @@ drop: } EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); -static void udp_destruct_sock(struct sock *sk) +void udp_destruct_sock(struct sock *sk) { /* reclaim completely the forward allocated memory */ unsigned int total = 0; @@ -1267,6 +1267,7 @@ static void udp_destruct_sock(struct sock *sk) inet_sock_destruct(sk); } +EXPORT_SYMBOL_GPL(udp_destruct_sock); int udp_init_sock(struct sock *sk) { diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index af817158d830..59f10fe9782e 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -50,10 +50,11 @@ struct proto udplite_prot = { .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, .obj_size = sizeof(struct udp_sock), .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 47d0d2b87106..2784cc363f2b 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -45,10 +45,11 @@ struct proto udplitev6_prot = { .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, - .backlog_rcv = udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, .obj_size = sizeof(struct udp6_sock), .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 9efdb92d68c726e70066e5b4189c1186c9b6f90c Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Sun, 13 Nov 2016 20:43:58 -0800 Subject: vxlan: remove unsed vxlan_dev_dst_port() Signed-off-by: Pravin B Shelar Acked-by: Jiri Benc Signed-off-by: David S. Miller --- include/net/vxlan.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/net') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 308adc4154f4..49a59202f85e 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -281,16 +281,6 @@ struct vxlan_dev { struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); -static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan, - unsigned short family) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (family == AF_INET6) - return inet_sk(vxlan->vn6_sock->sock->sk)->inet_sport; -#endif - return inet_sk(vxlan->vn4_sock->sock->sk)->inet_sport; -} - static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, netdev_features_t features) { -- cgit v1.2.3 From e88a2766143a27bfe6704b4493b214de4094cf29 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Nov 2016 16:28:42 -0800 Subject: gro_cells: mark napi struct as not busy poll candidates Rolf Neugebauer reported very long delays at netns dismantle. Eric W. Biederman was kind enough to look at this problem and noticed synchronize_net() occurring from netif_napi_del() that was added in linux-4.5 Busy polling makes no sense for tunnels NAPI. If busy poll is used for sessions over tunnels, the poller will need to poll the physical device queue anyway. netif_tx_napi_add() could be used here, but function name is misleading, and renaming it is not stable material, so set NAPI_STATE_NO_BUSY_POLL bit directly. This will avoid inserting gro_cells napi structures in napi_hash[] and avoid the problematic synchronize_net() (per possible cpu) that Rolf reported. Fixes: 93d05d4a320c ("net: provide generic busy polling to all NAPI drivers") Signed-off-by: Eric Dumazet Reported-by: Rolf Neugebauer Reported-by: Eric W. Biederman Acked-by: Cong Wang Tested-by: Rolf Neugebauer Signed-off-by: David S. Miller --- include/net/gro_cells.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index d15214d673b2..2a1abbf8da74 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -68,6 +68,9 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); __skb_queue_head_init(&cell->napi_skbs); + + set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); + netif_napi_add(dev, &cell->napi, gro_cell_poll, 64); napi_enable(&cell->napi); } -- cgit v1.2.3 From 3b7093346b326e5d3590c7d49f6aefe6fa5b2c9a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 15 Nov 2016 05:46:06 -0500 Subject: ipv4: Restore fib_trie_flush_external function and fix call ordering The patch that removed the FIB offload infrastructure was a bit too aggressive and also removed code needed to clean up us splitting the table if additional rules were added. Specifically the function fib_trie_flush_external was called at the end of a new rule being added to flush the foreign trie entries from the main trie. I updated the code so that we only call fib_trie_flush_external on the main table so that we flush the entries for local from main. This way we don't call it for every rule change which is what was happening previously. Fixes: 347e3b28c1ba2 ("switchdev: remove FIB offload infrastructure") Reported-by: Eric Dumazet Cc: Jiri Pirko Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_frontend.c | 20 +++++++++++---- net/ipv4/fib_trie.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b9314b48e39f..f390c3bb05c5 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -243,6 +243,7 @@ int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); +void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c3b80478226e..161fc0f0d752 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -151,7 +151,7 @@ static void fib_replace_table(struct net *net, struct fib_table *old, int fib_unmerge(struct net *net) { - struct fib_table *old, *new; + struct fib_table *old, *new, *main_table; /* attempt to fetch local table if it has been allocated */ old = fib_get_table(net, RT_TABLE_LOCAL); @@ -162,11 +162,21 @@ int fib_unmerge(struct net *net) if (!new) return -ENOMEM; + /* table is already unmerged */ + if (new == old) + return 0; + /* replace merged table with clean table */ - if (new != old) { - fib_replace_table(net, old, new); - fib_free_table(old); - } + fib_replace_table(net, old, new); + fib_free_table(old); + + /* attempt to fetch main table if it has been allocated */ + main_table = fib_get_table(net, RT_TABLE_MAIN); + if (!main_table) + return 0; + + /* flush local entries from main table */ + fib_table_flush_external(main_table); return 0; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 4cff74d4133f..735edc9d41a2 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1760,6 +1760,71 @@ out: return NULL; } +/* Caller must hold RTNL */ +void fib_table_flush_external(struct fib_table *tb) +{ + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; + unsigned long cindex = 1; + struct hlist_node *tmp; + struct fib_alias *fa; + + /* walk trie in reverse order */ + for (;;) { + unsigned char slen = 0; + struct key_vector *n; + + if (!(cindex--)) { + t_key pkey = pn->key; + + /* cannot resize the trie vector */ + if (IS_TRIE(pn)) + break; + + /* resize completed node */ + pn = resize(t, pn); + cindex = get_index(pkey, pn); + + continue; + } + + /* grab the next available node */ + n = get_child(pn, cindex); + if (!n) + continue; + + if (IS_TNODE(n)) { + /* record pn and cindex for leaf walking */ + pn = n; + cindex = 1ul << n->bits; + + continue; + } + + hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { + /* if alias was cloned to local then we just + * need to remove the local copy from main + */ + if (tb->tb_id != fa->tb_id) { + hlist_del_rcu(&fa->fa_list); + alias_free_mem_rcu(fa); + continue; + } + + /* record local slen */ + slen = fa->fa_slen; + } + + /* update leaf slen */ + n->slen = slen; + + if (hlist_empty(&n->leaf)) { + put_child_root(pn, n->key, NULL); + node_free(n); + } + } +} + /* Caller must hold RTNL. */ int fib_table_flush(struct net *net, struct fib_table *tb) { -- cgit v1.2.3 From 21cb84c48ca0619181106f0f44f3802a989de024 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Nov 2016 10:15:12 -0800 Subject: net: busy-poll: remove need_resched() from sk_can_busy_loop() Now sk_busy_loop() can schedule by itself, we can remove need_resched() check from sk_can_busy_loop() Also add a const to its struct sock parameter. Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Adam Belay Cc: Tariq Toukan Cc: Yuval Mintz Cc: Ariel Elior Signed-off-by: David S. Miller --- include/net/busy_poll.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 2fbeb1313c0f..965e52b9b5a3 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -58,10 +58,9 @@ static inline unsigned long busy_loop_end_time(void) return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_busy_poll); } -static inline bool sk_can_busy_loop(struct sock *sk) +static inline bool sk_can_busy_loop(const struct sock *sk) { - return sk->sk_ll_usec && sk->sk_napi_id && - !need_resched() && !signal_pending(current); + return sk->sk_ll_usec && sk->sk_napi_id && !signal_pending(current); } -- cgit v1.2.3 From a23a8f5bc17e6209eefadcd1cb3d3e28a2cdf530 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Wed, 16 Nov 2016 10:05:46 +0100 Subject: lwtunnel: subtract tunnel headroom from mtu on output redirect This patch changes the lwtunnel_headroom() function which is called in ipv4_mtu() and ip6_mtu(), to also return the correct headroom value when the lwtunnel state is OUTPUT_REDIRECT. This patch enables e.g. SR-IPv6 encapsulations to work without manually setting the route mtu. Acked-by: Roopa Prabhu Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/net/lwtunnel.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 82e76fe1c1f7..d4c1c75b8862 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -94,7 +94,8 @@ static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, unsigned int mtu) { - if (lwtunnel_xmit_redirect(lwtstate) && lwtstate->headroom < mtu) + if ((lwtunnel_xmit_redirect(lwtstate) || + lwtunnel_output_redirect(lwtstate)) && lwtstate->headroom < mtu) return lwtstate->headroom; return 0; -- cgit v1.2.3 From 7fda702f9315e6f4a74fee155c540750788a2d66 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 15 Nov 2016 23:23:11 +0800 Subject: sctp: use new rhlist interface on sctp transport rhashtable Now sctp transport rhashtable uses hash(lport, dport, daddr) as the key to hash a node to one chain. If in one host thousands of assocs connect to one server with the same lport and different laddrs (although it's not a normal case), all the transports would be hashed into the same chain. It may cause to keep returning -EBUSY when inserting a new node, as the chain is too long and sctp inserts a transport node in a loop, which could even lead to system hangs there. The new rhlist interface works for this case that there are many nodes with the same key in one chain. It puts them into a list then makes this list be as a node of the chain. This patch is to replace rhashtable_ interface with rhltable_ interface. Since a chain would not be too long and it would not return -EBUSY with this fix when inserting a node, the reinsert loop is also removed here. Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 +- include/net/sctp/structs.h | 4 +- net/sctp/associola.c | 8 +++- net/sctp/input.c | 93 ++++++++++++++++++++++++++-------------------- net/sctp/socket.c | 7 +--- 5 files changed, 64 insertions(+), 50 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 31acc3f4f132..f0dcaebebddb 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -164,7 +164,7 @@ void sctp_backlog_migrate(struct sctp_association *assoc, struct sock *oldsk, struct sock *newsk); int sctp_transport_hashtable_init(void); void sctp_transport_hashtable_destroy(void); -void sctp_hash_transport(struct sctp_transport *t); +int sctp_hash_transport(struct sctp_transport *t); void sctp_unhash_transport(struct sctp_transport *t); struct sctp_transport *sctp_addrs_lookup_transport( struct net *net, diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index bd4a3ded7c87..92daabdc007d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -124,7 +124,7 @@ extern struct sctp_globals { /* This is the sctp port control hash. */ struct sctp_bind_hashbucket *port_hashtable; /* This is the hash of all transports. */ - struct rhashtable transport_hashtable; + struct rhltable transport_hashtable; /* Sizes of above hashtables. */ int ep_hashsize; @@ -761,7 +761,7 @@ static inline int sctp_packet_empty(struct sctp_packet *packet) struct sctp_transport { /* A list of transports. */ struct list_head transports; - struct rhash_head node; + struct rhlist_head node; /* Reference counting. */ atomic_t refcnt; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index f10d3397f917..68428e1f7181 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -700,11 +700,15 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Set the peer's active state. */ peer->state = peer_state; + /* Add this peer into the transport hashtable */ + if (sctp_hash_transport(peer)) { + sctp_transport_free(peer); + return NULL; + } + /* Attach the remote transport to our asoc. */ list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list); asoc->peer.transport_count++; - /* Add this peer into the transport hashtable */ - sctp_hash_transport(peer); /* If we do not yet have a primary path, set one. */ if (!asoc->peer.primary_path) { diff --git a/net/sctp/input.c b/net/sctp/input.c index a01a56ec8b8c..458e506ef84b 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -790,10 +790,9 @@ hit: /* rhashtable for transport */ struct sctp_hash_cmp_arg { - const struct sctp_endpoint *ep; - const union sctp_addr *laddr; - const union sctp_addr *paddr; - const struct net *net; + const union sctp_addr *paddr; + const struct net *net; + u16 lport; }; static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, @@ -801,7 +800,6 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, { struct sctp_transport *t = (struct sctp_transport *)ptr; const struct sctp_hash_cmp_arg *x = arg->key; - struct sctp_association *asoc; int err = 1; if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr)) @@ -809,19 +807,10 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, if (!sctp_transport_hold(t)) return err; - asoc = t->asoc; - if (!net_eq(sock_net(asoc->base.sk), x->net)) + if (!net_eq(sock_net(t->asoc->base.sk), x->net)) + goto out; + if (x->lport != htons(t->asoc->base.bind_addr.port)) goto out; - if (x->ep) { - if (x->ep != asoc->ep) - goto out; - } else { - if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port)) - goto out; - if (!sctp_bind_addr_match(&asoc->base.bind_addr, - x->laddr, sctp_sk(asoc->base.sk))) - goto out; - } err = 0; out: @@ -851,11 +840,9 @@ static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed) const struct sctp_hash_cmp_arg *x = data; const union sctp_addr *paddr = x->paddr; const struct net *net = x->net; - u16 lport; + u16 lport = x->lport; u32 addr; - lport = x->ep ? htons(x->ep->base.bind_addr.port) : - x->laddr->v4.sin_port; if (paddr->sa.sa_family == AF_INET6) addr = jhash(&paddr->v6.sin6_addr, 16, seed); else @@ -875,29 +862,32 @@ static const struct rhashtable_params sctp_hash_params = { int sctp_transport_hashtable_init(void) { - return rhashtable_init(&sctp_transport_hashtable, &sctp_hash_params); + return rhltable_init(&sctp_transport_hashtable, &sctp_hash_params); } void sctp_transport_hashtable_destroy(void) { - rhashtable_destroy(&sctp_transport_hashtable); + rhltable_destroy(&sctp_transport_hashtable); } -void sctp_hash_transport(struct sctp_transport *t) +int sctp_hash_transport(struct sctp_transport *t) { struct sctp_hash_cmp_arg arg; + int err; if (t->asoc->temp) - return; + return 0; - arg.ep = t->asoc->ep; - arg.paddr = &t->ipaddr; arg.net = sock_net(t->asoc->base.sk); + arg.paddr = &t->ipaddr; + arg.lport = htons(t->asoc->base.bind_addr.port); -reinsert: - if (rhashtable_lookup_insert_key(&sctp_transport_hashtable, &arg, - &t->node, sctp_hash_params) == -EBUSY) - goto reinsert; + err = rhltable_insert_key(&sctp_transport_hashtable, &arg, + &t->node, sctp_hash_params); + if (err) + pr_err_once("insert transport fail, errno %d\n", err); + + return err; } void sctp_unhash_transport(struct sctp_transport *t) @@ -905,39 +895,62 @@ void sctp_unhash_transport(struct sctp_transport *t) if (t->asoc->temp) return; - rhashtable_remove_fast(&sctp_transport_hashtable, &t->node, - sctp_hash_params); + rhltable_remove(&sctp_transport_hashtable, &t->node, + sctp_hash_params); } +/* return a transport with holding it */ struct sctp_transport *sctp_addrs_lookup_transport( struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr) { + struct rhlist_head *tmp, *list; + struct sctp_transport *t; struct sctp_hash_cmp_arg arg = { - .ep = NULL, - .laddr = laddr, .paddr = paddr, .net = net, + .lport = laddr->v4.sin_port, }; - return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg, - sctp_hash_params); + list = rhltable_lookup(&sctp_transport_hashtable, &arg, + sctp_hash_params); + + rhl_for_each_entry_rcu(t, tmp, list, node) { + if (!sctp_transport_hold(t)) + continue; + + if (sctp_bind_addr_match(&t->asoc->base.bind_addr, + laddr, sctp_sk(t->asoc->base.sk))) + return t; + sctp_transport_put(t); + } + + return NULL; } +/* return a transport without holding it, as it's only used under sock lock */ struct sctp_transport *sctp_epaddr_lookup_transport( const struct sctp_endpoint *ep, const union sctp_addr *paddr) { struct net *net = sock_net(ep->base.sk); + struct rhlist_head *tmp, *list; + struct sctp_transport *t; struct sctp_hash_cmp_arg arg = { - .ep = ep, .paddr = paddr, .net = net, + .lport = htons(ep->base.bind_addr.port), }; - return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg, - sctp_hash_params); + list = rhltable_lookup(&sctp_transport_hashtable, &arg, + sctp_hash_params); + + rhl_for_each_entry_rcu(t, tmp, list, node) + if (ep == t->asoc->ep) + return t; + + return NULL; } /* Look up an association. */ @@ -951,7 +964,7 @@ static struct sctp_association *__sctp_lookup_association( struct sctp_association *asoc = NULL; t = sctp_addrs_lookup_transport(net, local, peer); - if (!t || !sctp_transport_hold(t)) + if (!t) goto out; asoc = t->asoc; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f23ad913dc7a..d5f4b4a8369b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4392,10 +4392,7 @@ int sctp_transport_walk_start(struct rhashtable_iter *iter) { int err; - err = rhashtable_walk_init(&sctp_transport_hashtable, iter, - GFP_KERNEL); - if (err) - return err; + rhltable_walk_enter(&sctp_transport_hashtable, iter); err = rhashtable_walk_start(iter); if (err && err != -EAGAIN) { @@ -4479,7 +4476,7 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), rcu_read_lock(); transport = sctp_addrs_lookup_transport(net, laddr, paddr); - if (!transport || !sctp_transport_hold(transport)) + if (!transport) goto out; rcu_read_unlock(); -- cgit v1.2.3 From e68b6e50fa359cc5aad4d2f8ac2bdbc1a8f4fd59 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Nov 2016 09:10:42 -0800 Subject: udp: enable busy polling for all sockets UDP busy polling is restricted to connected UDP sockets. This is because sk_busy_loop() only takes care of one NAPI context. There are cases where it could be extended. 1) Some hosts receive traffic on a single NIC, with one RX queue. 2) Some applications use SO_REUSEPORT and associated BPF filter to split the incoming traffic on one UDP socket per RX queue/thread/cpu 3) Some UDP sockets are used to send/receive traffic for one flow, but they do not bother with connect() This patch records the napi_id of first received skb, giving more reach to busy polling. Tested: lpaa23:~# echo 70 >/proc/sys/net/core/busy_read lpaa24:~# echo 70 >/proc/sys/net/core/busy_read lpaa23:~# for f in `seq 1 10`; do ./super_netperf 1 -H lpaa24 -t UDP_RR -l 5; done Before patch : 27867 28870 37324 41060 41215 36764 36838 44455 41282 43843 After patch : 73920 73213 70147 74845 71697 68315 68028 75219 70082 73707 Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/busy_poll.h | 28 +++++++++++++++++++--------- net/ipv4/udp.c | 2 ++ net/ipv6/udp.c | 2 ++ 3 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 965e52b9b5a3..d73b849e29a6 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -80,11 +80,6 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, skb->napi_id = napi->napi_id; } -/* used in the protocol hanlder to propagate the napi_id to the socket */ -static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb) -{ - sk->sk_napi_id = skb->napi_id; -} #else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) @@ -107,10 +102,6 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, { } -static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb) -{ -} - static inline bool busy_loop_timeout(unsigned long end_time) { return true; @@ -122,4 +113,23 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) } #endif /* CONFIG_NET_RX_BUSY_POLL */ + +/* used in the protocol hanlder to propagate the napi_id to the socket */ +static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + sk->sk_napi_id = skb->napi_id; +#endif +} + +/* variant used for unconnected sockets */ +static inline void sk_mark_napi_id_once(struct sock *sk, + const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + if (!sk->sk_napi_id) + sk->sk_napi_id = skb->napi_id; +#endif +} + #endif /* _LINUX_NET_BUSY_POLL_H */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9ae7c63a8b13..e1fc0116e8d5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1569,6 +1569,8 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); sk_incoming_cpu_update(sk); + } else { + sk_mark_napi_id_once(sk, skb); } rc = __udp_enqueue_schedule_skb(sk, skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 86a8cacd333b..4f99417d9b40 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -519,6 +519,8 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); sk_incoming_cpu_update(sk); + } else { + sk_mark_napi_id_once(sk, skb); } rc = __udp_enqueue_schedule_skb(sk, skb); -- cgit v1.2.3 From c7d03a00b56fc23c3a01a8353789ad257363e281 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 17 Nov 2016 04:58:21 +0300 Subject: netns: make struct pernet_operations::id unsigned int Make struct pernet_operations::id unsigned. There are 2 reasons to do so: 1) This field is really an index into an zero based array and thus is unsigned entity. Using negative value is out-of-bound access by definition. 2) On x86_64 unsigned 32-bit data which are mixed with pointers via array indexing or offsets added or subtracted to pointers are preffered to signed 32-bit data. "int" being used as an array index needs to be sign-extended to 64-bit before being used. void f(long *p, int i) { g(p[i]); } roughly translates to movsx rsi, esi mov rdi, [rsi+...] call g MOVSX is 3 byte instruction which isn't necessary if the variable is unsigned because x86_64 is zero extending by default. Now, there is net_generic() function which, you guessed it right, uses "int" as an array index: static inline void *net_generic(const struct net *net, int id) { ... ptr = ng->ptr[id - 1]; ... } And this function is used a lot, so those sign extensions add up. Patch snipes ~1730 bytes on allyesconfig kernel (without all junk messing with code generation): add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730) Unfortunately some functions actually grow bigger. This is a semmingly random artefact of code generation with register allocator being used differently. gcc decides that some variable needs to live in new r8+ registers and every access now requires REX prefix. Or it is shifted into r12, so [r12+0] addressing mode has to be used which is longer than [r8] However, overall balance is in negative direction: add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730) function old new delta nfsd4_lock 3886 3959 +73 tipc_link_build_proto_msg 1096 1140 +44 mac80211_hwsim_new_radio 2776 2808 +32 tipc_mon_rcv 1032 1058 +26 svcauth_gss_legacy_init 1413 1429 +16 tipc_bcbase_select_primary 379 392 +13 nfsd4_exchange_id 1247 1260 +13 nfsd4_setclientid_confirm 782 793 +11 ... put_client_renew_locked 494 480 -14 ip_set_sockfn_get 730 716 -14 geneve_sock_add 829 813 -16 nfsd4_sequence_done 721 703 -18 nlmclnt_lookup_host 708 686 -22 nfsd4_lockt 1085 1063 -22 nfs_get_client 1077 1050 -27 tcf_bpf_init 1106 1076 -30 nfsd4_encode_fattr 5997 5930 -67 Total: Before=154856051, After=154854321, chg -0.00% Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- drivers/infiniband/core/cma.c | 2 +- drivers/net/bonding/bond_main.c | 2 +- drivers/net/geneve.c | 2 +- drivers/net/gtp.c | 2 +- drivers/net/ppp/ppp_generic.c | 2 +- drivers/net/ppp/pppoe.c | 2 +- drivers/net/vxlan.c | 2 +- drivers/net/wireless/mac80211_hwsim.c | 2 +- fs/lockd/netns.h | 2 +- fs/lockd/svc.c | 2 +- fs/nfs/inode.c | 2 +- fs/nfs/netns.h | 2 +- fs/nfs_common/grace.c | 2 +- fs/nfsd/netns.h | 2 +- fs/nfsd/nfsctl.c | 2 +- include/net/bonding.h | 2 +- include/net/ip_tunnels.h | 6 +++--- include/net/net_namespace.h | 2 +- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- include/net/netfilter/nf_conntrack_synproxy.h | 2 +- include/net/netns/generic.h | 2 +- kernel/audit.c | 2 +- net/8021q/vlan.c | 2 +- net/8021q/vlan.h | 2 +- net/bridge/br_netfilter_hooks.c | 2 +- net/caif/caif_dev.c | 2 +- net/core/net_namespace.c | 7 +++---- net/core/pktgen.c | 2 +- net/ipv4/ip_gre.c | 4 ++-- net/ipv4/ip_tunnel.c | 4 ++-- net/ipv4/ip_vti.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- net/ipv6/ip6_gre.c | 2 +- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/ip6_vti.c | 2 +- net/ipv6/sit.c | 2 +- net/ipv6/xfrm6_tunnel.c | 2 +- net/key/af_key.c | 2 +- net/netfilter/ipset/ip_set_core.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 2 +- net/netfilter/nf_conntrack_proto_dccp.c | 2 +- net/netfilter/nf_conntrack_proto_gre.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- net/netfilter/nf_conntrack_proto_udplite.c | 2 +- net/netfilter/nf_synproxy_core.c | 2 +- net/netfilter/nfnetlink_log.c | 2 +- net/netfilter/nfnetlink_queue.c | 2 +- net/netfilter/xt_hashlimit.c | 2 +- net/netfilter/xt_recent.c | 2 +- net/openvswitch/datapath.c | 2 +- net/openvswitch/datapath.h | 2 +- net/phonet/pn_dev.c | 2 +- net/rds/tcp.c | 2 +- net/sched/act_bpf.c | 2 +- net/sched/act_connmark.c | 2 +- net/sched/act_csum.c | 2 +- net/sched/act_gact.c | 2 +- net/sched/act_ife.c | 2 +- net/sched/act_ipt.c | 4 ++-- net/sched/act_mirred.c | 2 +- net/sched/act_nat.c | 2 +- net/sched/act_pedit.c | 2 +- net/sched/act_police.c | 2 +- net/sched/act_simple.c | 2 +- net/sched/act_skbedit.c | 2 +- net/sched/act_skbmod.c | 2 +- net/sched/act_tunnel_key.c | 2 +- net/sched/act_vlan.c | 2 +- net/sunrpc/netns.h | 2 +- net/sunrpc/sunrpc_syms.c | 2 +- net/tipc/core.c | 2 +- net/tipc/core.h | 2 +- 73 files changed, 80 insertions(+), 81 deletions(-) (limited to 'include/net') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 89a6b0546804..c68f4fe001d7 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -116,7 +116,7 @@ static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); static struct workqueue_struct *cma_wq; -static int cma_pernet_id; +static unsigned int cma_pernet_id; struct cma_pernet { struct idr tcp_ps; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5708f17e4cdf..8029dd4912b6 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -199,7 +199,7 @@ MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where " atomic_t netpoll_block_tx = ATOMIC_INIT(0); #endif -int bond_net_id __read_mostly; +unsigned int bond_net_id __read_mostly; static __be32 arp_target[BOND_MAX_ARP_TARGETS]; static int arp_ip_count; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 85a423a66478..90dc6b188607 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -43,7 +43,7 @@ struct geneve_net { struct list_head sock_list; }; -static int geneve_net_id; +static unsigned int geneve_net_id; union geneve_addr { struct sockaddr_in sin; diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 719d19f35673..98f10c216521 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -77,7 +77,7 @@ struct gtp_dev { struct hlist_head *addr_hash; }; -static int gtp_net_id __read_mostly; +static unsigned int gtp_net_id __read_mostly; struct gtp_net { struct list_head gtp_dev_list; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 5489c0ec1d9a..3d3b1f4339ef 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -204,7 +204,7 @@ static atomic_t ppp_unit_count = ATOMIC_INIT(0); static atomic_t channel_count = ATOMIC_INIT(0); /* per-net private data for this module */ -static int ppp_net_id __read_mostly; +static unsigned int ppp_net_id __read_mostly; struct ppp_net { /* units to ppp mapping */ struct idr units_idr; diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 4ddae8118c85..f017c72bb7fd 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -95,7 +95,7 @@ static const struct proto_ops pppoe_ops; static const struct ppp_channel_ops pppoe_chan_ops; /* per-net private data for this module */ -static int pppoe_net_id __read_mostly; +static unsigned int pppoe_net_id __read_mostly; struct pppoe_net { /* * we could use _single_ hash table for all diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 0a3fd675408f..21e92be6e56c 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -52,7 +52,7 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -static int vxlan_net_id; +static unsigned int vxlan_net_id; static struct rtnl_link_ops vxlan_link_ops; static const u8 all_zeros_mac[ETH_ALEN + 2]; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 8f366cc097e6..1293f8494985 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -250,7 +250,7 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c) cp->magic = 0; } -static int hwsim_net_id; +static unsigned int hwsim_net_id; static int hwsim_netgroup; diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h index 5426189406c1..fb8cac88251a 100644 --- a/fs/lockd/netns.h +++ b/fs/lockd/netns.h @@ -15,6 +15,6 @@ struct lockd_net { struct list_head nsm_handles; }; -extern int lockd_net_id; +extern unsigned int lockd_net_id; #endif diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index fc4084ef4736..1c13dd80744f 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -57,7 +57,7 @@ static struct task_struct *nlmsvc_task; static struct svc_rqst *nlmsvc_rqst; unsigned long nlmsvc_timeout; -int lockd_net_id; +unsigned int lockd_net_id; /* * These can be set at insmod time (useful for NFS as root filesystem), diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bf4ec5ecc97e..ce42dd00e4ee 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2015,7 +2015,7 @@ static void nfsiod_stop(void) destroy_workqueue(wq); } -int nfs_net_id; +unsigned int nfs_net_id; EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index fbce0d885d4c..5fbd2bde91ba 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -35,6 +35,6 @@ struct nfs_net { #endif }; -extern int nfs_net_id; +extern unsigned int nfs_net_id; #endif diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index fd8c9a5bcac4..420d3a0ab258 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -9,7 +9,7 @@ #include #include -static int grace_net_id; +static unsigned int grace_net_id; static DEFINE_SPINLOCK(grace_lock); /** diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index ee36efd5aece..3714231a9d0f 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -124,5 +124,5 @@ struct nfsd_net { /* Simple check to find out if a given net was properly initialized */ #define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl) -extern int nfsd_net_id; +extern unsigned int nfsd_net_id; #endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 36b2af931e06..2857e46d5cc5 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1201,7 +1201,7 @@ static int create_proc_exports_entry(void) } #endif -int nfsd_net_id; +unsigned int nfsd_net_id; static __net_init int nfsd_init_net(struct net *net) { diff --git a/include/net/bonding.h b/include/net/bonding.h index f32f7ef8a23a..3c857778a6ca 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -681,7 +681,7 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) } /* exported from bond_main.c */ -extern int bond_net_id; +extern unsigned int bond_net_id; extern const struct bond_parm_tbl bond_lacp_tbl[]; extern const struct bond_parm_tbl xmit_hashtype_tbl[]; extern const struct bond_parm_tbl arp_validate_tbl[]; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 59557c07904b..e893fe43dd13 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -129,7 +129,7 @@ struct ip_tunnel { #endif struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ unsigned int prl_count; /* # of entries in PRL */ - int ip_tnl_net_id; + unsigned int ip_tnl_net_id; struct gro_cells gro_cells; bool collect_md; bool ignore_df; @@ -248,7 +248,7 @@ void ip_tunnel_uninit(struct net_device *dev); void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); struct net *ip_tunnel_get_link_net(const struct net_device *dev); int ip_tunnel_get_iflink(const struct net_device *dev); -int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, +int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); @@ -275,7 +275,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p); -void ip_tunnel_setup(struct net_device *dev, int net_id); +void ip_tunnel_setup(struct net_device *dev, unsigned int net_id); struct ip_tunnel_encap_ops { size_t (*encap_hlen)(struct ip_tunnel_encap *e); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fc4f757107df..d7149e93a60a 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -291,7 +291,7 @@ struct pernet_operations { int (*init)(struct net *net); void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); - int *id; + unsigned int *id; size_t size; }; diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 2152b70626d5..e7b836590f0b 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -98,7 +98,7 @@ struct nf_conntrack_l4proto { const struct nla_policy *nla_policy; } ctnl_timeout; #endif - int *net_id; + unsigned int *net_id; /* Init l4proto pernet data */ int (*init_net)(struct net *net, u_int16_t proto); diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index e6937318546c..b0ca402c1f72 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -54,7 +54,7 @@ struct synproxy_net { struct synproxy_stats __percpu *stats; }; -extern int synproxy_net_id; +extern unsigned int synproxy_net_id; static inline struct synproxy_net *synproxy_pernet(struct net *net) { return net_generic(net, synproxy_net_id); diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h index 70e158551704..d315786bcfd7 100644 --- a/include/net/netns/generic.h +++ b/include/net/netns/generic.h @@ -31,7 +31,7 @@ struct net_generic { void *ptr[0]; }; -static inline void *net_generic(const struct net *net, int id) +static inline void *net_generic(const struct net *net, unsigned int id) { struct net_generic *ng; void *ptr; diff --git a/kernel/audit.c b/kernel/audit.c index f1ca11613379..92c463d2d1c7 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -126,7 +126,7 @@ static atomic_t audit_lost = ATOMIC_INIT(0); /* The netlink socket. */ static struct sock *audit_sock; -static int audit_net_id; +static unsigned int audit_net_id; /* Hash for inode-based rules */ struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index a79365574531..691f0ad7067d 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -44,7 +44,7 @@ /* Global VLAN variables */ -int vlan_net_id __read_mostly; +unsigned int vlan_net_id __read_mostly; const char vlan_fullname[] = "802.1Q VLAN Support"; const char vlan_version[] = DRV_VERSION; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index cc1557978066..df8bd65dd370 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -159,7 +159,7 @@ void vlan_netlink_fini(void); extern struct rtnl_link_ops vlan_link_ops; -extern int vlan_net_id; +extern unsigned int vlan_net_id; struct proc_dir_entry; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 8155bd2a5138..83d937f4415e 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -46,7 +46,7 @@ #include #endif -static int brnf_net_id __read_mostly; +static unsigned int brnf_net_id __read_mostly; struct brnf_net { bool enabled; diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index d730a0f68f46..2d38b6e34203 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -52,7 +52,7 @@ struct caif_net { struct caif_device_entry_list caifdevs; }; -static int caif_net_id; +static unsigned int caif_net_id; static int q_high = 50; /* Percent */ struct cfcnfg *get_cfcnfg(struct net *net) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 1309d78e2a64..35d37b196e67 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -55,7 +55,7 @@ static struct net_generic *net_alloc_generic(void) return ng; } -static int net_assign_generic(struct net *net, int id, void *data) +static int net_assign_generic(struct net *net, unsigned int id, void *data) { struct net_generic *ng, *old_ng; @@ -122,8 +122,7 @@ out: static void ops_free(const struct pernet_operations *ops, struct net *net) { if (ops->id && ops->size) { - int id = *ops->id; - kfree(net_generic(net, id)); + kfree(net_generic(net, *ops->id)); } } @@ -881,7 +880,7 @@ again: } return error; } - max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id); + max_gen_ptrs = max(max_gen_ptrs, *ops->id); } error = __register_pernet_operations(list, ops); if (error) { diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 306b8f0e03c1..8e69ce472236 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -413,7 +413,7 @@ struct pktgen_hdr { }; -static int pg_net_id __read_mostly; +static unsigned int pg_net_id __read_mostly; struct pktgen_net { struct net *net; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 576f705d8180..78fd62048335 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -113,8 +113,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); -static int ipgre_net_id __read_mostly; -static int gre_tap_net_id __read_mostly; +static unsigned int ipgre_net_id __read_mostly; +static unsigned int gre_tap_net_id __read_mostly; static void ipgre_err(struct sk_buff *skb, u32 info, const struct tnl_ptk_info *tpi) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 12a92e3349ed..823abaef006b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -994,7 +994,7 @@ int ip_tunnel_get_iflink(const struct net_device *dev) } EXPORT_SYMBOL(ip_tunnel_get_iflink); -int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, +int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname) { struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); @@ -1196,7 +1196,7 @@ void ip_tunnel_uninit(struct net_device *dev) EXPORT_SYMBOL_GPL(ip_tunnel_uninit); /* Do least required initialization, rest of init is done in tunnel_init call */ -void ip_tunnel_setup(struct net_device *dev, int net_id) +void ip_tunnel_setup(struct net_device *dev, unsigned int net_id) { struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->ip_tnl_net_id = net_id; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 5d7944f394d9..8b14f1404c8f 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -46,7 +46,7 @@ static struct rtnl_link_ops vti_link_ops __read_mostly; -static int vti_net_id __read_mostly; +static unsigned int vti_net_id __read_mostly; static int vti_tunnel_init(struct net_device *dev); static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index c9392589c415..79489f017854 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -121,7 +121,7 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -static int ipip_net_id __read_mostly; +static unsigned int ipip_net_id __read_mostly; static int ipip_tunnel_init(struct net_device *dev); static struct rtnl_link_ops ipip_link_ops __read_mostly; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 4a9e6db9df8d..e6e206fa86c8 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -62,7 +62,7 @@ struct clusterip_config { static const struct file_operations clusterip_proc_fops; #endif -static int clusterip_net_id __read_mostly; +static unsigned int clusterip_net_id __read_mostly; struct clusterip_net { struct list_head configs; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 710bc79f9113..75b6108234dd 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -64,7 +64,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); #define IP6_GRE_HASH_SIZE_SHIFT 5 #define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT) -static int ip6gre_net_id __read_mostly; +static unsigned int ip6gre_net_id __read_mostly; struct ip6gre_net { struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 259e8507d2cd..d3c619eda051 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -83,7 +83,7 @@ static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); static struct rtnl_link_ops ip6_link_ops __read_mostly; -static int ip6_tnl_net_id __read_mostly; +static unsigned int ip6_tnl_net_id __read_mostly; struct ip6_tnl_net { /* the IPv6 tunnel fallback device */ struct net_device *fb_tnl_dev; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index af3f0e011265..c476bb8e9cdb 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -64,7 +64,7 @@ static int vti6_dev_init(struct net_device *dev); static void vti6_dev_setup(struct net_device *dev); static struct rtnl_link_ops vti6_link_ops __read_mostly; -static int vti6_net_id __read_mostly; +static unsigned int vti6_net_id __read_mostly; struct vti6_net { /* the vti6 tunnel fallback device */ struct net_device *fb_tnl_dev; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index dc7a3449ffc1..0355231162b8 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -76,7 +76,7 @@ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, __be32 *v4dst); static struct rtnl_link_ops sit_link_ops __read_mostly; -static int sit_net_id __read_mostly; +static unsigned int sit_net_id __read_mostly; struct sit_net { struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE]; struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE]; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index e1c0bbe7996c..d7b731a78d09 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -44,7 +44,7 @@ struct xfrm6_tunnel_net { u32 spi; }; -static int xfrm6_tunnel_net_id __read_mostly; +static unsigned int xfrm6_tunnel_net_id __read_mostly; static inline struct xfrm6_tunnel_net *xfrm6_tunnel_pernet(struct net *net) { return net_generic(net, xfrm6_tunnel_net_id); diff --git a/net/key/af_key.c b/net/key/af_key.c index f9c9ecb0cdd3..c6252ed42c1d 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -36,7 +36,7 @@ #define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x)) #define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x)) -static int pfkey_net_id __read_mostly; +static unsigned int pfkey_net_id __read_mostly; struct netns_pfkey { /* List of all pfkey sockets. */ struct hlist_head table; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 23345d2d136a..c296f9b606d4 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -36,7 +36,7 @@ struct ip_set_net { bool is_destroyed; /* all sets are destroyed */ }; -static int ip_set_net_id __read_mostly; +static unsigned int ip_set_net_id __read_mostly; static inline struct ip_set_net *ip_set_pernet(struct net *net) { diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2c1b498a7a27..db40050f8785 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -70,7 +70,7 @@ EXPORT_SYMBOL(ip_vs_get_debug_level); #endif EXPORT_SYMBOL(ip_vs_new_conn_out); -static int ip_vs_net_id __read_mostly; +static unsigned int ip_vs_net_id __read_mostly; /* netns cnt used for uniqueness */ static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index ac8976964975..073b047314dc 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -385,7 +385,7 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = }; /* this module per-net specifics */ -static int dccp_net_id __read_mostly; +static unsigned int dccp_net_id __read_mostly; struct dccp_net { struct nf_proto_net pn; int dccp_loose; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index ff405c9183f1..87bb40a3feb5 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -53,7 +53,7 @@ static unsigned int gre_timeouts[GRE_CT_MAX] = { [GRE_CT_REPLIED] = 180*HZ, }; -static int proto_gre_net_id __read_mostly; +static unsigned int proto_gre_net_id __read_mostly; struct netns_proto_gre { struct nf_proto_net nf; rwlock_t keymap_lock; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 17c0ade23fd8..d096c2d6b87b 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -144,7 +144,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { } }; -static int sctp_net_id __read_mostly; +static unsigned int sctp_net_id __read_mostly; struct sctp_net { struct nf_proto_net pn; unsigned int timeouts[SCTP_CONNTRACK_MAX]; diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 8cdb4b1bf933..7808604c70a2 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -35,7 +35,7 @@ static unsigned int udplite_timeouts[UDPLITE_CT_MAX] = { [UDPLITE_CT_REPLIED] = 180*HZ, }; -static int udplite_net_id __read_mostly; +static unsigned int udplite_net_id __read_mostly; struct udplite_net { struct nf_proto_net pn; unsigned int timeouts[UDPLITE_CT_MAX]; diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index c8a4a48bced9..7c6d1fbe38b9 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -24,7 +24,7 @@ #include #include -int synproxy_net_id; +unsigned int synproxy_net_id; EXPORT_SYMBOL_GPL(synproxy_net_id); bool diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 7435505037b7..763cb4d54e8d 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -80,7 +80,7 @@ struct nfulnl_instance { #define INSTANCE_BUCKETS 16 -static int nfnl_log_net_id __read_mostly; +static unsigned int nfnl_log_net_id __read_mostly; struct nfnl_log_net { spinlock_t instances_lock; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 1e33115b399f..be7627b80400 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -77,7 +77,7 @@ struct nfqnl_instance { typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); -static int nfnl_queue_net_id __read_mostly; +static unsigned int nfnl_queue_net_id __read_mostly; #define INSTANCE_BUCKETS 16 struct nfnl_queue_net { diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index b89b688e9d01..10063408141d 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -49,7 +49,7 @@ struct hashlimit_net { struct proc_dir_entry *ip6t_hashlimit; }; -static int hashlimit_net_id; +static unsigned int hashlimit_net_id; static inline struct hashlimit_net *hashlimit_pernet(struct net *net) { return net_generic(net, hashlimit_net_id); diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index bf250000e084..1d89a4eaf841 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -95,7 +95,7 @@ struct recent_net { #endif }; -static int recent_net_id __read_mostly; +static unsigned int recent_net_id __read_mostly; static inline struct recent_net *recent_pernet(struct net *net) { diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 1402f1be642d..2d4c4d3911c0 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -58,7 +58,7 @@ #include "vport-internal_dev.h" #include "vport-netdev.h" -int ovs_net_id __read_mostly; +unsigned int ovs_net_id __read_mostly; static struct genl_family dp_packet_genl_family; static struct genl_family dp_flow_genl_family; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index ab85c1cae255..1c6e9377436d 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -144,7 +144,7 @@ struct ovs_net { bool xt_label; }; -extern int ovs_net_id; +extern unsigned int ovs_net_id; void ovs_lock(void); void ovs_unlock(void); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index a58680016472..2cb4c5dfad6f 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -44,7 +44,7 @@ struct phonet_net { struct phonet_routes routes; }; -static int phonet_net_id __read_mostly; +static unsigned int phonet_net_id __read_mostly; static struct phonet_net *phonet_pernet(struct net *net) { diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 3296a6ac583a..1a0399dea764 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -366,7 +366,7 @@ struct rds_transport rds_tcp_transport = { .t_mp_capable = 1, }; -static int rds_tcp_netid; +static unsigned int rds_tcp_netid; /* per-network namespace private data for this module */ struct rds_tcp_net { diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 9ff06cfbcdec..1aa4ecf41baf 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -33,7 +33,7 @@ struct tcf_bpf_cfg { bool is_ebpf; }; -static int bpf_net_id; +static unsigned int bpf_net_id; static struct tc_action_ops act_bpf_ops; static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index eae07a2e774d..ab8062909962 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -30,7 +30,7 @@ #define CONNMARK_TAB_MASK 3 -static int connmark_net_id; +static unsigned int connmark_net_id; static struct tc_action_ops act_connmark_ops; static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index e0defcef376d..a0edd80a44db 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -42,7 +42,7 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, }; -static int csum_net_id; +static unsigned int csum_net_id; static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e0aa30f83c6c..e6c874a2b283 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -25,7 +25,7 @@ #define GACT_TAB_MASK 15 -static int gact_net_id; +static unsigned int gact_net_id; static struct tc_action_ops act_gact_ops; #ifdef CONFIG_GACT_PROB diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 95c463cbb9a6..80b848d3f096 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -35,7 +35,7 @@ #define IFE_TAB_MASK 15 -static int ife_net_id; +static unsigned int ife_net_id; static int max_metacnt = IFE_META_MAX + 1; static struct tc_action_ops act_ife_ops; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index ce7ea6c1c50d..992ef8d624f1 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -30,10 +30,10 @@ #define IPT_TAB_MASK 15 -static int ipt_net_id; +static unsigned int ipt_net_id; static struct tc_action_ops act_ipt_ops; -static int xt_net_id; +static unsigned int xt_net_id; static struct tc_action_ops act_xt_ops; static int ipt_init_target(struct xt_entry_target *t, char *table, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6073a1132725..b2d417b8f46c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -70,7 +70,7 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, }; -static int mirred_net_id; +static unsigned int mirred_net_id; static struct tc_action_ops act_mirred_ops; static bool dev_is_mac_header_xmit(const struct net_device *dev) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 8e8b0cc30704..9b6aec665495 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -31,7 +31,7 @@ #define NAT_TAB_MASK 15 -static int nat_net_id; +static unsigned int nat_net_id; static struct tc_action_ops act_nat_ops; static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b54d56d4959b..eda322045e75 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -25,7 +25,7 @@ #define PEDIT_TAB_MASK 15 -static int pedit_net_id; +static unsigned int pedit_net_id; static struct tc_action_ops act_pedit_ops; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { diff --git a/net/sched/act_police.c b/net/sched/act_police.c index d1bd248fe146..c990b73a6c85 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -55,7 +55,7 @@ struct tc_police_compat { /* Each policer is serialized by its individual spinlock */ -static int police_net_id; +static unsigned int police_net_id; static struct tc_action_ops act_police_ops; static int tcf_act_police_walker(struct net *net, struct sk_buff *skb, diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 289af6f9bb3b..823a73ad0c60 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -26,7 +26,7 @@ #define SIMP_TAB_MASK 7 -static int simp_net_id; +static unsigned int simp_net_id; static struct tc_action_ops act_simp_ops; #define SIMP_MAX_DATA 32 diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 024f3a3afeff..06ccae3c12ee 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -29,7 +29,7 @@ #define SKBEDIT_TAB_MASK 15 -static int skbedit_net_id; +static unsigned int skbedit_net_id; static struct tc_action_ops act_skbedit_ops; static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index e7d96381c908..3b7074e23024 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -22,7 +22,7 @@ #define SKBMOD_TAB_MASK 15 -static int skbmod_net_id; +static unsigned int skbmod_net_id; static struct tc_action_ops act_skbmod_ops; #define MAX_EDIT_LEN ETH_HLEN diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index edc720f11687..7af712526f01 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -22,7 +22,7 @@ #define TUNNEL_KEY_TAB_MASK 15 -static int tunnel_key_net_id; +static unsigned int tunnel_key_net_id; static struct tc_action_ops act_tunnel_key_ops; static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index b57fcbcefea1..19e0dba305ce 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -21,7 +21,7 @@ #define VLAN_TAB_MASK 15 -static int vlan_net_id; +static unsigned int vlan_net_id; static struct tc_action_ops act_vlan_ops; static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index df5826876535..394ce523174c 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -34,7 +34,7 @@ struct sunrpc_net { struct proc_dir_entry *use_gssp_proc; }; -extern int sunrpc_net_id; +extern unsigned int sunrpc_net_id; int ip_map_cache_create(struct net *); void ip_map_cache_destroy(struct net *); diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index ee5d3d253102..d1c330a7953a 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -24,7 +24,7 @@ #include "netns.h" -int sunrpc_net_id; +unsigned int sunrpc_net_id; EXPORT_SYMBOL_GPL(sunrpc_net_id); static __net_init int sunrpc_init_net(struct net *net) diff --git a/net/tipc/core.c b/net/tipc/core.c index 236b043a4156..0b982d048fb9 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -47,7 +47,7 @@ #include /* configurable TIPC parameters */ -int tipc_net_id __read_mostly; +unsigned int tipc_net_id __read_mostly; int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ static int __net_init tipc_init_net(struct net *net) diff --git a/net/tipc/core.h b/net/tipc/core.h index a1845fb27d80..5cc5398be722 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -74,7 +74,7 @@ struct tipc_monitor; #define MAX_BEARERS 3 #define TIPC_DEF_MON_THRESHOLD 32 -extern int tipc_net_id __read_mostly; +extern unsigned int tipc_net_id __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; extern int sysctl_tipc_named_timeout __read_mostly; -- cgit v1.2.3 From 0f5258cd91e9d78a1ee30696314bec3c33321a93 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 18 Nov 2016 09:41:46 +0000 Subject: netns: fix get_net_ns_by_fd(int pid) typo The argument to get_net_ns_by_fd() is a /proc/$PID/ns/net file descriptor not a pid. Fix the typo. Signed-off-by: Stefan Hajnoczi Acked-by: Rami Rosen Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fc4f757107df..0940598c002f 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -170,7 +170,7 @@ static inline struct net *copy_net_ns(unsigned long flags, extern struct list_head net_namespace_list; struct net *get_net_ns_by_pid(pid_t pid); -struct net *get_net_ns_by_fd(int pid); +struct net *get_net_ns_by_fd(int fd); #ifdef CONFIG_SYSCTL void ipx_register_sysctl(void); -- cgit v1.2.3 From 3b2c75d371740fb0dcd0c9eac545ab1dd28b4706 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 19 Nov 2016 03:54:35 +0300 Subject: netlink: use "unsigned int" in nla_next() ->nla_len is unsigned entity (it's length after all) and u16, thus it can't overflow when being aligned into int/unsigned int. (nlmsg_next has the same code, but I didn't yet convince myself it is correct to do so). There is pointer arithmetic in this function and offset being unsigned is better: add/remove: 0/0 grow/shrink: 1/64 up/down: 5/-309 (-304) function old new delta nl80211_set_wiphy 1444 1449 +5 team_nl_cmd_options_set 997 995 -2 tcf_em_tree_validate 872 870 -2 switchdev_port_bridge_setlink 352 350 -2 switchdev_port_br_afspec 312 310 -2 rtm_to_fib_config 428 426 -2 qla4xxx_sysfs_ddb_set_param 2193 2191 -2 qla4xxx_iface_set_param 4470 4468 -2 ovs_nla_free_flow_actions 152 150 -2 output_userspace 518 516 -2 ... nl80211_set_reg 654 649 -5 validate_scan_freqs 148 142 -6 validate_linkmsg 288 282 -6 nl80211_parse_connkeys 489 483 -6 nlattr_set 231 224 -7 nf_tables_delsetelem 267 260 -7 do_setlink 3416 3408 -8 netlbl_cipsov4_add_std 1672 1659 -13 nl80211_parse_sched_scan 2902 2888 -14 nl80211_trigger_scan 1738 1720 -18 do_execute_actions 2821 2738 -83 Total: Before=154865355, After=154865051, chg -0.00% Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index a34f53acb6d6..d3938f11ae52 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -713,7 +713,7 @@ static inline int nla_ok(const struct nlattr *nla, int remaining) */ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) { - int totlen = NLA_ALIGN(nla->nla_len); + unsigned int totlen = NLA_ALIGN(nla->nla_len); *remaining -= totlen; return (struct nlattr *) ((char *) nla + totlen); -- cgit v1.2.3 From e97991832a4ea4a5f47d65f068a4c966a2eb5730 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Nov 2016 14:18:38 +0100 Subject: tcp: make undo_cwnd mandatory for congestion modules The undo_cwnd fallback in the stack doubles cwnd based on ssthresh, which un-does reno halving behaviour. It seems more appropriate to let congctl algorithms pair .ssthresh and .undo_cwnd properly. Add a 'tcp_reno_undo_cwnd' function and wire it up for all congestion algorithms that used to rely on the fallback. Cc: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_cong.c | 14 ++++++++++++-- net/ipv4/tcp_dctcp.c | 1 + net/ipv4/tcp_hybla.c | 1 + net/ipv4/tcp_input.c | 5 +---- net/ipv4/tcp_lp.c | 1 + net/ipv4/tcp_vegas.c | 1 + net/ipv4/tcp_westwood.c | 1 + 8 files changed, 19 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 123979fe12bf..7de80739adab 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -958,6 +958,7 @@ u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); u32 tcp_reno_ssthresh(struct sock *sk); +u32 tcp_reno_undo_cwnd(struct sock *sk); void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 1294af4e0127..38905ec5f508 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -68,8 +68,9 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) { int ret = 0; - /* all algorithms must implement ssthresh and cong_avoid ops */ - if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) { + /* all algorithms must implement these */ + if (!ca->ssthresh || !ca->undo_cwnd || + !(ca->cong_avoid || ca->cong_control)) { pr_err("%s does not implement required ops\n", ca->name); return -EINVAL; } @@ -441,10 +442,19 @@ u32 tcp_reno_ssthresh(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); +u32 tcp_reno_undo_cwnd(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + return max(tp->snd_cwnd, tp->snd_ssthresh << 1); +} +EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd); + struct tcp_congestion_ops tcp_reno = { .flags = TCP_CONG_NON_RESTRICTED, .name = "reno", .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = tcp_reno_undo_cwnd, }; diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 51139175bf61..bde22ebb92a8 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -342,6 +342,7 @@ static struct tcp_congestion_ops dctcp __read_mostly = { static struct tcp_congestion_ops dctcp_reno __read_mostly = { .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = tcp_reno_undo_cwnd, .get_info = dctcp_get_info, .owner = THIS_MODULE, .name = "dctcp-reno", diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 083831e359df..0f7175c3338e 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -166,6 +166,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked) static struct tcp_congestion_ops tcp_hybla __read_mostly = { .init = hybla_init, .ssthresh = tcp_reno_ssthresh, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = hybla_cong_avoid, .set_state = hybla_state, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a70046fea0e8..22e6a2097ff6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2394,10 +2394,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) if (tp->prior_ssthresh) { const struct inet_connection_sock *icsk = inet_csk(sk); - if (icsk->icsk_ca_ops->undo_cwnd) - tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); - else - tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); + tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); if (tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index c67ece1390c2..046fd3910873 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -316,6 +316,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) static struct tcp_congestion_ops tcp_lp __read_mostly = { .init = tcp_lp_init, .ssthresh = tcp_reno_ssthresh, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_lp_cong_avoid, .pkts_acked = tcp_lp_pkts_acked, diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 4c4bac1b5eab..218cfcc77650 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -307,6 +307,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_get_info); static struct tcp_congestion_ops tcp_vegas __read_mostly = { .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_vegas_cong_avoid, .pkts_acked = tcp_vegas_pkts_acked, .set_state = tcp_vegas_state, diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 4b03a2e2a050..fed66dc0e0f5 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -278,6 +278,7 @@ static struct tcp_congestion_ops tcp_westwood __read_mostly = { .init = tcp_westwood_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, + .undo_cwnd = tcp_reno_undo_cwnd, .cwnd_event = tcp_westwood_event, .in_ack_event = tcp_westwood_ack, .get_info = tcp_westwood_info, -- cgit v1.2.3 From 39385cb5f3274735b03ed1f8e7ff517b02a0beed Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 12 Nov 2016 17:03:07 +0200 Subject: Bluetooth: Fix using the correct source address type The hci_get_route() API is used to look up local HCI devices, however so far it has been incapable of dealing with anything else than the public address of HCI devices. This completely breaks with LE-only HCI devices that do not come with a public address, but use a static random address instead. This patch exteds the hci_get_route() API with a src_type parameter that's used for comparing with the right address of each HCI device. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/6lowpan.c | 4 ++-- net/bluetooth/hci_conn.c | 26 ++++++++++++++++++++++++-- net/bluetooth/l2cap_core.c | 2 +- net/bluetooth/rfcomm/tty.c | 2 +- net/bluetooth/sco.c | 2 +- 6 files changed, 30 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f00bf667ec33..554671c81f4a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1018,7 +1018,7 @@ static inline void hci_set_drvdata(struct hci_dev *hdev, void *data) } struct hci_dev *hci_dev_get(int index); -struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src); +struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, u8 src_type); struct hci_dev *hci_alloc_dev(void); void hci_free_dev(struct hci_dev *hdev); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index d020299baba4..1904a93f47d5 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -1090,7 +1090,6 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, { struct hci_conn *hcon; struct hci_dev *hdev; - bdaddr_t *src = BDADDR_ANY; int n; n = sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu", @@ -1101,7 +1100,8 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, if (n < 7) return -EINVAL; - hdev = hci_get_route(addr, src); + /* The LE_PUBLIC address type is ignored because of BDADDR_ANY */ + hdev = hci_get_route(addr, BDADDR_ANY, BDADDR_LE_PUBLIC); if (!hdev) return -ENOENT; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3809617aa98d..dc59eae54717 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -613,7 +613,7 @@ int hci_conn_del(struct hci_conn *conn) return 0; } -struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) +struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type) { int use_src = bacmp(src, BDADDR_ANY); struct hci_dev *hdev = NULL, *d; @@ -634,7 +634,29 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) */ if (use_src) { - if (!bacmp(&d->bdaddr, src)) { + bdaddr_t id_addr; + u8 id_addr_type; + + if (src_type == BDADDR_BREDR) { + if (!lmp_bredr_capable(d)) + continue; + bacpy(&id_addr, &d->bdaddr); + id_addr_type = BDADDR_BREDR; + } else { + if (!lmp_le_capable(d)) + continue; + + hci_copy_identity_address(d, &id_addr, + &id_addr_type); + + /* Convert from HCI to three-value type */ + if (id_addr_type == ADDR_LE_DEV_PUBLIC) + id_addr_type = BDADDR_LE_PUBLIC; + else + id_addr_type = BDADDR_LE_RANDOM; + } + + if (!bacmp(&id_addr, src) && id_addr_type == src_type) { hdev = d; break; } } else { diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index d4cad29b033f..577f1c01454a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7060,7 +7060,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, BT_DBG("%pMR -> %pMR (type %u) psm 0x%2.2x", &chan->src, dst, dst_type, __le16_to_cpu(psm)); - hdev = hci_get_route(dst, &chan->src); + hdev = hci_get_route(dst, &chan->src, chan->src_type); if (!hdev) return -EHOSTUNREACH; diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 8e385a0ae60e..2f2cb5e27cdd 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -178,7 +178,7 @@ static void rfcomm_reparent_device(struct rfcomm_dev *dev) struct hci_dev *hdev; struct hci_conn *conn; - hdev = hci_get_route(&dev->dst, &dev->src); + hdev = hci_get_route(&dev->dst, &dev->src, BDADDR_BREDR); if (!hdev) return; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index f52bcbf2e58c..3125ce670c2f 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -219,7 +219,7 @@ static int sco_connect(struct sock *sk) BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst); - hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src); + hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR); if (!hdev) return -EHOSTUNREACH; -- cgit v1.2.3 From 7223ecd4669921cb2a709193521967aaa2b06862 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 16 Nov 2016 15:13:36 +0100 Subject: netfilter: nat: switch to new rhlist interface I got offlist bug report about failing connections and high cpu usage. This happens because we hit 'elasticity' checks in rhashtable that refuses bucket list exceeding 16 entries. The nat bysrc hash unfortunately needs to insert distinct objects that share same key and are identical (have same source tuple), this cannot be avoided. Switch to the rhlist interface which is designed for this. The nulls_base is removed here, I don't think its needed: A (unlikely) false positive results in unneeded port clash resolution, a false negative results in packet drop during conntrack confirmation, when we try to insert the duplicate into main conntrack hash table. Tested by adding multiple ip addresses to host, then adding iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE ... and then creating multiple connections, from same source port but different addresses: for i in $(seq 2000 2032);do nc -p 1234 192.168.7.1 $i > /dev/null & done (all of these then get hashed to same bysource slot) Then, to test that nat conflict resultion is working: nc -s 10.0.0.1 -p 1234 192.168.7.1 2000 nc -s 10.0.0.2 -p 1234 192.168.7.1 2000 tcp .. src=10.0.0.1 dst=192.168.7.1 sport=1234 dport=2000 src=192.168.7.1 dst=192.168.7.10 sport=2000 dport=1024 [ASSURED] tcp .. src=10.0.0.2 dst=192.168.7.1 sport=1234 dport=2000 src=192.168.7.1 dst=192.168.7.10 sport=2000 dport=1025 [ASSURED] tcp .. src=192.168.7.10 dst=192.168.7.1 sport=1234 dport=2000 src=192.168.7.1 dst=192.168.7.10 sport=2000 dport=1234 [ASSURED] tcp .. src=192.168.7.10 dst=192.168.7.1 sport=1234 dport=2001 src=192.168.7.1 dst=192.168.7.10 sport=2001 dport=1234 [ASSURED] [..] -> nat altered source ports to 1024 and 1025, respectively. This can also be confirmed on destination host which shows ESTAB 0 0 192.168.7.1:2000 192.168.7.10:1024 ESTAB 0 0 192.168.7.1:2000 192.168.7.10:1025 ESTAB 0 0 192.168.7.1:2000 192.168.7.10:1234 Cc: Herbert Xu Fixes: 870190a9ec907 ("netfilter: nat: convert nat bysrc hash to rhashtable") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 +- net/netfilter/nf_nat_core.c | 40 +++++++++++++++++++++--------------- 2 files changed, 25 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 50418052a520..dc143ada9762 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -118,7 +118,7 @@ struct nf_conn { struct nf_ct_ext *ext; #if IS_ENABLED(CONFIG_NF_NAT) - struct rhash_head nat_bysource; + struct rhlist_head nat_bysource; #endif /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index c632429706eb..5b9c884a452e 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -42,7 +42,7 @@ struct nf_nat_conn_key { const struct nf_conntrack_zone *zone; }; -static struct rhashtable nf_nat_bysource_table; +static struct rhltable nf_nat_bysource_table; inline const struct nf_nat_l3proto * __nf_nat_l3proto_find(u8 family) @@ -207,7 +207,6 @@ static struct rhashtable_params nf_nat_bysource_params = { .obj_cmpfn = nf_nat_bysource_cmp, .nelem_hint = 256, .min_size = 1024, - .nulls_base = (1U << RHT_BASE_SHIFT), }; /* Only called for SRC manip */ @@ -226,12 +225,15 @@ find_appropriate_src(struct net *net, .tuple = tuple, .zone = zone }; + struct rhlist_head *hl; - ct = rhashtable_lookup_fast(&nf_nat_bysource_table, &key, - nf_nat_bysource_params); - if (!ct) + hl = rhltable_lookup(&nf_nat_bysource_table, &key, + nf_nat_bysource_params); + if (!hl) return 0; + ct = container_of(hl, typeof(*ct), nat_bysource); + nf_ct_invert_tuplepr(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); result->dst = tuple->dst; @@ -449,11 +451,17 @@ nf_nat_setup_info(struct nf_conn *ct, } if (maniptype == NF_NAT_MANIP_SRC) { + struct nf_nat_conn_key key = { + .net = nf_ct_net(ct), + .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + .zone = nf_ct_zone(ct), + }; int err; - err = rhashtable_insert_fast(&nf_nat_bysource_table, - &ct->nat_bysource, - nf_nat_bysource_params); + err = rhltable_insert_key(&nf_nat_bysource_table, + &key, + &ct->nat_bysource, + nf_nat_bysource_params); if (err) return NF_DROP; } @@ -570,8 +578,8 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) * will delete entry from already-freed table. */ ct->status &= ~IPS_NAT_DONE_MASK; - rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, - nf_nat_bysource_params); + rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, + nf_nat_bysource_params); /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. @@ -701,8 +709,8 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) if (!nat) return; - rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, - nf_nat_bysource_params); + rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, + nf_nat_bysource_params); } static struct nf_ct_ext_type nat_extend __read_mostly = { @@ -837,13 +845,13 @@ static int __init nf_nat_init(void) { int ret; - ret = rhashtable_init(&nf_nat_bysource_table, &nf_nat_bysource_params); + ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params); if (ret) return ret; ret = nf_ct_extend_register(&nat_extend); if (ret < 0) { - rhashtable_destroy(&nf_nat_bysource_table); + rhltable_destroy(&nf_nat_bysource_table); printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); return ret; } @@ -867,7 +875,7 @@ static int __init nf_nat_init(void) return 0; cleanup_extend: - rhashtable_destroy(&nf_nat_bysource_table); + rhltable_destroy(&nf_nat_bysource_table); nf_ct_extend_unregister(&nat_extend); return ret; } @@ -886,7 +894,7 @@ static void __exit nf_nat_cleanup(void) for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); - rhashtable_destroy(&nf_nat_bysource_table); + rhltable_destroy(&nf_nat_bysource_table); } MODULE_LICENSE("GPL"); -- cgit v1.2.3 From d3e2a1110cae6ee5eeb1f9a97addf03e974f12e6 Mon Sep 17 00:00:00 2001 From: Anders K. Pedersen Date: Sun, 20 Nov 2016 16:38:47 +0000 Subject: netfilter: nf_tables: fix inconsistent element expiration calculation As Liping Zhang reports, after commit a8b1e36d0d1d ("netfilter: nft_dynset: fix element timeout for HZ != 1000"), priv->timeout was stored in jiffies, while set->timeout was stored in milliseconds. This is inconsistent and incorrect. Firstly, we already call msecs_to_jiffies in nft_set_elem_init, so priv->timeout will be converted to jiffies twice. Secondly, if the user did not specify the NFTA_DYNSET_TIMEOUT attr, set->timeout will be used, but we forget to call msecs_to_jiffies when do update elements. Fix this by using jiffies internally for traditional sets and doing the conversions to/from msec when interacting with userspace - as dynset already does. This is preferable to doing the conversions, when elements are inserted or updated, because this can happen very frequently on busy dynsets. Fixes: a8b1e36d0d1d ("netfilter: nft_dynset: fix element timeout for HZ != 1000") Reported-by: Liping Zhang Signed-off-by: Anders K. Pedersen Acked-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d79d1e9b9546..b02af0bf5777 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -313,7 +313,7 @@ void nft_unregister_set(struct nft_set_ops *ops); * @size: maximum set size * @nelems: number of elements * @ndeact: number of deactivated elements queued for removal - * @timeout: default timeout value in msecs + * @timeout: default timeout value in jiffies * @gc_int: garbage collection interval in msecs * @policy: set parameterization (see enum nft_set_policies) * @udlen: user data length diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 026581b04ea8..e5194f6f906c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2570,7 +2570,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, } if (set->timeout && - nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout), + nla_put_be64(skb, NFTA_SET_TIMEOUT, + cpu_to_be64(jiffies_to_msecs(set->timeout)), NFTA_SET_PAD)) goto nla_put_failure; if (set->gc_int && @@ -2859,7 +2860,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_TIMEOUT] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT])); + timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( + nla[NFTA_SET_TIMEOUT]))); } gc_int = 0; if (nla[NFTA_SET_GC_INTERVAL] != NULL) { @@ -3178,7 +3180,8 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, - cpu_to_be64(*nft_set_ext_timeout(ext)), + cpu_to_be64(jiffies_to_msecs( + *nft_set_ext_timeout(ext))), NFTA_SET_ELEM_PAD)) goto nla_put_failure; @@ -3447,7 +3450,7 @@ void *nft_set_elem_init(const struct nft_set *set, memcpy(nft_set_ext_data(ext), data, set->dlen); if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) *nft_set_ext_expiration(ext) = - jiffies + msecs_to_jiffies(timeout); + jiffies + timeout; if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) *nft_set_ext_timeout(ext) = timeout; @@ -3535,7 +3538,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT])); + timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( + nla[NFTA_SET_ELEM_TIMEOUT]))); } else if (set->flags & NFT_SET_TIMEOUT) { timeout = set->timeout; } -- cgit v1.2.3 From 5173bc679dec881120df109a6a2b39143235382c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 23 Nov 2016 01:11:03 +0100 Subject: netfilter: nat: fix crash when conntrack entry is re-used Stas Nichiporovich reports oops in nf_nat_bysource_cmp(), trying to access nf_conn struct at address 0xffffffffffffff50. This is the result of fetching a null rhash list (struct embedded at offset 176; 0 - 176 gets us ...fff50). The problem is that conntrack entries are allocated from a SLAB_DESTROY_BY_RCU cache, i.e. entries can be free'd and reused on another cpu while nf nat bysource hash access the same conntrack entry. Freeing is fine (we hold rcu read lock); zeroing rhlist_head isn't. -> Move the rhlist struct outside of the memset()-inited area. Fixes: 7c9664351980aaa6a ("netfilter: move nat hlist_head to nf_conn") Reported-by: Stas Nichiporovich Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index dc143ada9762..d9d52c020a70 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -100,6 +100,9 @@ struct nf_conn { possible_net_t ct_net; +#if IS_ENABLED(CONFIG_NF_NAT) + struct rhlist_head nat_bysource; +#endif /* all members below initialized via memset */ u8 __nfct_init_offset[0]; @@ -117,9 +120,6 @@ struct nf_conn { /* Extensions */ struct nf_ct_ext *ext; -#if IS_ENABLED(CONFIG_NF_NAT) - struct rhlist_head nat_bysource; -#endif /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; }; -- cgit v1.2.3 From 59bfde01fab0c4550778cd53e8d266f1dfddf7b7 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 22 Nov 2016 23:09:57 +0200 Subject: devlink: Add E-Switch inline mode control Some HWs need the VF driver to put part of the packet headers on the TX descriptor so the e-switch can do proper matching and steering. The supported modes: none, link, network, transport. Signed-off-by: Roi Dayan Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/net/devlink.h | 2 ++ include/uapi/linux/devlink.h | 8 +++++ net/core/devlink.c | 70 ++++++++++++++++++++++++++++++++------------ 3 files changed, 61 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/devlink.h b/include/net/devlink.h index 211bd3c37028..d29e5fc82582 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -92,6 +92,8 @@ struct devlink_ops { int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode); int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); + int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode); + int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 915bfa74458c..9014c33d4e77 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -102,6 +102,13 @@ enum devlink_eswitch_mode { DEVLINK_ESWITCH_MODE_SWITCHDEV, }; +enum devlink_eswitch_inline_mode { + DEVLINK_ESWITCH_INLINE_MODE_NONE, + DEVLINK_ESWITCH_INLINE_MODE_LINK, + DEVLINK_ESWITCH_INLINE_MODE_NETWORK, + DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -133,6 +140,7 @@ enum devlink_attr { DEVLINK_ATTR_SB_OCC_CUR, /* u32 */ DEVLINK_ATTR_SB_OCC_MAX, /* u32 */ DEVLINK_ATTR_ESWITCH_MODE, /* u16 */ + DEVLINK_ATTR_ESWITCH_INLINE_MODE, /* u8 */ /* add new attributes above here, update the policy in devlink.c */ diff --git a/net/core/devlink.c b/net/core/devlink.c index c14f8b661db9..2b5bf9efa720 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1394,26 +1394,45 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, - u32 seq, int flags, u16 mode) + u32 seq, int flags) { + const struct devlink_ops *ops = devlink->ops; void *hdr; + int err = 0; + u16 mode; + u8 inline_mode; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; - if (devlink_nl_put_handle(msg, devlink)) - goto nla_put_failure; + err = devlink_nl_put_handle(msg, devlink); + if (err) + goto out; - if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode)) - goto nla_put_failure; + err = ops->eswitch_mode_get(devlink, &mode); + if (err) + goto out; + err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode); + if (err) + goto out; + + if (ops->eswitch_inline_mode_get) { + err = ops->eswitch_inline_mode_get(devlink, &inline_mode); + if (err) + goto out; + err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE, + inline_mode); + if (err) + goto out; + } genlmsg_end(msg, hdr); return 0; -nla_put_failure: +out: genlmsg_cancel(msg, hdr); - return -EMSGSIZE; + return err; } static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb, @@ -1422,22 +1441,17 @@ static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb, struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; struct sk_buff *msg; - u16 mode; int err; if (!ops || !ops->eswitch_mode_get) return -EOPNOTSUPP; - err = ops->eswitch_mode_get(devlink, &mode); - if (err) - return err; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET, - info->snd_portid, info->snd_seq, 0, mode); + info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); @@ -1453,15 +1467,32 @@ static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb, struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; u16 mode; + u8 inline_mode; + int err = 0; - if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) - return -EINVAL; + if (!ops) + return -EOPNOTSUPP; - mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); + if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) { + if (!ops->eswitch_mode_set) + return -EOPNOTSUPP; + mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); + err = ops->eswitch_mode_set(devlink, mode); + if (err) + return err; + } - if (ops && ops->eswitch_mode_set) - return ops->eswitch_mode_set(devlink, mode); - return -EOPNOTSUPP; + if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) { + if (!ops->eswitch_inline_mode_set) + return -EOPNOTSUPP; + inline_mode = nla_get_u8( + info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]); + err = ops->eswitch_inline_mode_set(devlink, inline_mode); + if (err) + return err; + } + + return 0; } static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { @@ -1478,6 +1509,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 }, [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 }, + [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 }, }; static const struct genl_ops devlink_nl_ops[] = { -- cgit v1.2.3 From 21fcf572716f61926f5d23a358e79d06a9d4d54f Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 5 Oct 2016 22:51:42 +0200 Subject: Bluetooth: __ variants of u8 and friends are not neccessary inside kernel bluetooth.h is not part of user API, so __ variants are not neccessary here. Signed-off-by: Pavel Machek Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 0a1e21d7bce1..01487192f628 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -197,7 +197,7 @@ typedef struct { #define BDADDR_LE_PUBLIC 0x01 #define BDADDR_LE_RANDOM 0x02 -static inline bool bdaddr_type_is_valid(__u8 type) +static inline bool bdaddr_type_is_valid(u8 type) { switch (type) { case BDADDR_BREDR: @@ -209,7 +209,7 @@ static inline bool bdaddr_type_is_valid(__u8 type) return false; } -static inline bool bdaddr_type_is_le(__u8 type) +static inline bool bdaddr_type_is_le(u8 type) { switch (type) { case BDADDR_LE_PUBLIC: @@ -279,15 +279,16 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock); /* Skb helpers */ struct l2cap_ctrl { - __u8 sframe:1, + u8 sframe:1, poll:1, final:1, fcs:1, sar:2, super:2; - __u16 reqseq; - __u16 txseq; - __u8 retries; + + u16 reqseq; + u16 txseq; + u8 retries; __le16 psm; bdaddr_t bdaddr; struct l2cap_chan *chan; @@ -303,7 +304,7 @@ typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status, #define HCI_REQ_SKB BIT(1) struct hci_ctrl { - __u16 opcode; + u16 opcode; u8 req_flags; u8 req_event; union { @@ -313,10 +314,10 @@ struct hci_ctrl { }; struct bt_skb_cb { - __u8 pkt_type; - __u8 force_active; - __u16 expect; - __u8 incoming:1; + u8 pkt_type; + u8 force_active; + u16 expect; + u8 incoming:1; union { struct l2cap_ctrl l2cap; struct hci_ctrl hci; @@ -366,7 +367,7 @@ out: return NULL; } -int bt_to_errno(__u16 code); +int bt_to_errno(u16 code); void hci_sock_set_flag(struct sock *sk, int nr); void hci_sock_clear_flag(struct sock *sk, int nr); -- cgit v1.2.3 From 05b055e89121394058c75dc354e9a46e1e765579 Mon Sep 17 00:00:00 2001 From: Francis Yan Date: Sun, 27 Nov 2016 23:07:13 -0800 Subject: tcp: instrument tcp sender limits chronographs This patch implements the skeleton of the TCP chronograph instrumentation on sender side limits: 1) idle (unspec) 2) busy sending data other than 3-4 below 3) rwnd-limited 4) sndbuf-limited The limits are enumerated 'tcp_chrono'. Since a connection in theory can idle forever, we do not track the actual length of this uninteresting idle period. For the rest we track how long the sender spends in each limit. At any point during the life time of a connection, the sender must be in one of the four states. If there are multiple conditions worthy of tracking in a chronograph then the highest priority enum takes precedence over the other conditions. So that if something "more interesting" starts happening, stop the previous chrono and start a new one. The time unit is jiffy(u32) in order to save space in tcp_sock. This implies application must sample the stats no longer than every 49 days of 1ms jiffy. Signed-off-by: Francis Yan Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 7 +++++-- include/net/tcp.h | 14 ++++++++++++++ net/ipv4/tcp_output.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 32a7c7e35b71..d5d3bd814338 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -211,8 +211,11 @@ struct tcp_sock { u8 reord; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ - u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ - unused:7; + u32 chrono_start; /* Start time in jiffies of a TCP chrono */ + u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ + u8 chrono_type:2, /* current chronograph type */ + rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ + unused:5; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 7de80739adab..e5ff4083870d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1516,6 +1516,20 @@ struct tcp_fastopen_context { struct rcu_head rcu; }; +/* Latencies incurred by various limits for a sender. They are + * chronograph-like stats that are mutually exclusive. + */ +enum tcp_chrono { + TCP_CHRONO_UNSPEC, + TCP_CHRONO_BUSY, /* Actively sending data (non-empty write queue) */ + TCP_CHRONO_RWND_LIMITED, /* Stalled by insufficient receive window */ + TCP_CHRONO_SNDBUF_LIMITED, /* Stalled by insufficient send buffer */ + __TCP_CHRONO_MAX, +}; + +void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type); +void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type); + /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 19105b46a304..34f751776a01 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2081,6 +2081,36 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, return false; } +static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new) +{ + const u32 now = tcp_time_stamp; + + if (tp->chrono_type > TCP_CHRONO_UNSPEC) + tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start; + tp->chrono_start = now; + tp->chrono_type = new; +} + +void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* If there are multiple conditions worthy of tracking in a + * chronograph then the highest priority enum takes precedence over + * the other conditions. So that if something "more interesting" + * starts happening, stop the previous chrono and start a new one. + */ + if (type > tp->chrono_type) + tcp_chrono_set(tp, type); +} + +void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); +} + /* This routine writes packets to the network. It advances the * send_head. This happens as incoming acks open up the remote * window for us. -- cgit v1.2.3 From 0f87230d1a6c253681550c6064715d06a32be73d Mon Sep 17 00:00:00 2001 From: Francis Yan Date: Sun, 27 Nov 2016 23:07:14 -0800 Subject: tcp: instrument how long TCP is busy sending This patch measures TCP busy time, which is defined as the period of time when sender has data (or FIN) to send. The time starts when data is buffered and stops when the write queue is flushed by ACKs or error events. Note the busy time does not include SYN time, unless data is included in SYN (i.e. Fast Open). It does include FIN time even if the FIN carries no payload. Excluding pure FIN is possible but would incur one additional test in the fast path, which may not be worth it. Signed-off-by: Francis Yan Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 6 +++++- net/ipv4/tcp_input.c | 3 +++ net/ipv4/tcp_output.c | 19 ++++++++++++++++--- 3 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index e5ff4083870d..3e097e39d4d2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1535,6 +1535,7 @@ static inline void tcp_write_queue_purge(struct sock *sk) { struct sk_buff *skb; + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) sk_wmem_free_skb(sk, skb); sk_mem_reclaim(sk); @@ -1593,8 +1594,10 @@ static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff * static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) { - if (sk->sk_send_head == skb_unlinked) + if (sk->sk_send_head == skb_unlinked) { sk->sk_send_head = NULL; + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); + } if (tcp_sk(sk)->highest_sack == skb_unlinked) tcp_sk(sk)->highest_sack = NULL; } @@ -1616,6 +1619,7 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb /* Queue it, remembering where we must start sending. */ if (sk->sk_send_head == NULL) { sk->sk_send_head = skb; + tcp_chrono_start(sk, TCP_CHRONO_BUSY); if (tcp_sk(sk)->highest_sack == NULL) tcp_sk(sk)->highest_sack = skb; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 22e6a2097ff6..a5d172761610 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3178,6 +3178,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->lost_skb_hint = NULL; } + if (!skb) + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); + if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una))) tp->snd_up = tp->snd_una; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 34f751776a01..e8ea584106e0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2096,8 +2096,8 @@ void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type) struct tcp_sock *tp = tcp_sk(sk); /* If there are multiple conditions worthy of tracking in a - * chronograph then the highest priority enum takes precedence over - * the other conditions. So that if something "more interesting" + * chronograph then the highest priority enum takes precedence + * over the other conditions. So that if something "more interesting" * starts happening, stop the previous chrono and start a new one. */ if (type > tp->chrono_type) @@ -2108,7 +2108,18 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) { struct tcp_sock *tp = tcp_sk(sk); - tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); + + /* There are multiple conditions worthy of tracking in a + * chronograph, so that the highest priority enum takes + * precedence over the other conditions (see tcp_chrono_start). + * If a condition stops, we only stop chrono tracking if + * it's the "most interesting" or current chrono we are + * tracking and starts busy chrono if we have pending data. + */ + if (tcp_write_queue_empty(sk)) + tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); + else if (type == tp->chrono_type) + tcp_chrono_set(tp, TCP_CHRONO_BUSY); } /* This routine writes packets to the network. It advances the @@ -3328,6 +3339,8 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) fo->copied = space; tcp_connect_queue_skb(sk, syn_data); + if (syn_data->len) + tcp_chrono_start(sk, TCP_CHRONO_BUSY); err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); -- cgit v1.2.3 From 0382a25af3c771a8e4d5e417d1834cbe28c2aaac Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 29 Nov 2016 13:09:44 +0100 Subject: l2tp: lock socket before checking flags in connect() Socket flags aren't updated atomically, so the socket must be locked while reading the SOCK_ZAPPED flag. This issue exists for both l2tp_ip and l2tp_ip6. For IPv6, this patch also brings error handling for __ip6_datagram_connect() failures. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 ++ net/ipv6/datagram.c | 4 +++- net/l2tp/l2tp_ip.c | 19 ++++++++++++------- net/l2tp/l2tp_ip6.c | 16 +++++++++++----- 4 files changed, 28 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8fed1cd78658..f11ca837361b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -970,6 +970,8 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); +int __ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, + int addr_len); int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr, int addr_len); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 37874e2f30ed..ccf40550c475 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -139,7 +139,8 @@ void ip6_datagram_release_cb(struct sock *sk) } EXPORT_SYMBOL_GPL(ip6_datagram_release_cb); -static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); @@ -252,6 +253,7 @@ ipv4_connected: out: return err; } +EXPORT_SYMBOL_GPL(__ip6_datagram_connect); int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 982f6c44ea01..1f57094d3111 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -308,21 +308,24 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr; int rc; - if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */ - return -EINVAL; - if (addr_len < sizeof(*lsa)) return -EINVAL; if (ipv4_is_multicast(lsa->l2tp_addr.s_addr)) return -EINVAL; - rc = ip4_datagram_connect(sk, uaddr, addr_len); - if (rc < 0) - return rc; - lock_sock(sk); + /* Must bind first - autobinding does not work */ + if (sock_flag(sk, SOCK_ZAPPED)) { + rc = -EINVAL; + goto out_sk; + } + + rc = __ip4_datagram_connect(sk, uaddr, addr_len); + if (rc < 0) + goto out_sk; + l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id; write_lock_bh(&l2tp_ip_lock); @@ -330,7 +333,9 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len sk_add_bind_node(sk, &l2tp_ip_bind_table); write_unlock_bh(&l2tp_ip_lock); +out_sk: release_sock(sk); + return rc; } diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 9978d01ba0ba..af9abfff637c 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -371,9 +371,6 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_type; int rc; - if (sock_flag(sk, SOCK_ZAPPED)) /* Must bind first - autobinding does not work */ - return -EINVAL; - if (addr_len < sizeof(*lsa)) return -EINVAL; @@ -390,10 +387,18 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr, return -EINVAL; } - rc = ip6_datagram_connect(sk, uaddr, addr_len); - lock_sock(sk); + /* Must bind first - autobinding does not work */ + if (sock_flag(sk, SOCK_ZAPPED)) { + rc = -EINVAL; + goto out_sk; + } + + rc = __ip6_datagram_connect(sk, uaddr, addr_len); + if (rc < 0) + goto out_sk; + l2tp_ip6_sk(sk)->peer_conn_id = lsa->l2tp_conn_id; write_lock_bh(&l2tp_ip6_lock); @@ -401,6 +406,7 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr, sk_add_bind_node(sk, &l2tp_ip6_bind_table); write_unlock_bh(&l2tp_ip6_lock); +out_sk: release_sock(sk); return rc; -- cgit v1.2.3 From 95a22caee396cef0bb2ca8fafdd82966a49367bb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 1 Dec 2016 11:32:06 +0100 Subject: tcp: randomize tcp timestamp offsets for each connection jiffies based timestamps allow for easy inference of number of devices behind NAT translators and also makes tracking of hosts simpler. commit ceaa1fef65a7c2e ("tcp: adding a per-socket timestamp offset") added the main infrastructure that is needed for per-connection ts randomization, in particular writing/reading the on-wire tcp header format takes the offset into account so rest of stack can use normal tcp_time_stamp (jiffies). So only two items are left: - add a tsoffset for request sockets - extend the tcp isn generator to also return another 32bit number in addition to the ISN. Re-use of ISN generator also means timestamps are still monotonically increasing for same connection quadruple, i.e. PAWS will still work. Includes fixes from Eric Dumazet. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/net/secure_seq.h | 8 ++++---- include/net/tcp.h | 2 +- net/core/secure_seq.c | 10 ++++++---- net/ipv4/syncookies.c | 1 + net/ipv4/tcp_input.c | 7 ++++++- net/ipv4/tcp_ipv4.c | 9 +++++---- net/ipv4/tcp_minisocks.c | 4 +++- net/ipv4/tcp_output.c | 2 +- net/ipv6/syncookies.c | 1 + net/ipv6/tcp_ipv6.c | 10 ++++++---- 11 files changed, 35 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 00e0ee8f001f..734bab4c3bef 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -123,6 +123,7 @@ struct tcp_request_sock { u32 txhash; u32 rcv_isn; u32 snt_isn; + u32 ts_off; u32 last_oow_ack_time; /* last SYNACK */ u32 rcv_nxt; /* the ack # by SYNACK. For * FastOpen it's the seq# diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index 3f36d45b714a..0caee631a836 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -6,10 +6,10 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); -__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport); +u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport, u32 *tsoff); +u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport, u32 *tsoff); u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, diff --git a/include/net/tcp.h b/include/net/tcp.h index 3e097e39d4d2..207147b4c6b2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1827,7 +1827,7 @@ struct tcp_request_sock_ops { struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, const struct request_sock *req, bool *strict); - __u32 (*init_seq)(const struct sk_buff *skb); + __u32 (*init_seq)(const struct sk_buff *skb, u32 *tsoff); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index fd3ce461fbe6..a8d6062cbb4a 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -40,8 +40,8 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) -__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport) +u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport, u32 *tsoff) { u32 secret[MD5_MESSAGE_BYTES / 4]; u32 hash[MD5_DIGEST_WORDS]; @@ -58,6 +58,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, md5_transform(hash, secret); + *tsoff = hash[1]; return seq_scale(hash[0]); } EXPORT_SYMBOL(secure_tcpv6_sequence_number); @@ -86,8 +87,8 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #ifdef CONFIG_INET -__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport) +u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport, u32 *tsoff) { u32 hash[MD5_DIGEST_WORDS]; @@ -99,6 +100,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, md5_transform(hash, net_secret); + *tsoff = hash[1]; return seq_scale(hash[0]); } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 0dc6286272aa..3e88467d70ee 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -334,6 +334,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; + treq->ts_off = 0; req->mss = mss; ireq->ir_num = ntohs(th->dest); ireq->ir_rmt_port = th->source; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 56fe736fd64d..2257de244622 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6307,6 +6307,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop; tcp_rsk(req)->af_specific = af_ops; + tcp_rsk(req)->ts_off = 0; tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = af_ops->mss_clamp; @@ -6328,6 +6329,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; + if (isn && tmp_opt.tstamp_ok) + af_ops->init_seq(skb, &tcp_rsk(req)->ts_off); + if (!want_cookie && !isn) { /* VJ's idea. We save last timestamp seen * from the destination in peer table, when entering @@ -6368,7 +6372,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_release; } - isn = af_ops->init_seq(skb); + isn = af_ops->init_seq(skb, &tcp_rsk(req)->ts_off); } if (!dst) { dst = af_ops->route_req(sk, &fl, req, NULL); @@ -6380,6 +6384,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (want_cookie) { isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); + tcp_rsk(req)->ts_off = 0; req->cookie_ts = tmp_opt.tstamp_ok; if (!tmp_opt.tstamp_ok) inet_rsk(req)->ecn_ok = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5555eb86e549..b50f05905ced 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -95,12 +95,12 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static __u32 tcp_v4_init_sequence(const struct sk_buff *skb) +static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff) { return secure_tcp_sequence_number(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, tcp_hdr(skb)->dest, - tcp_hdr(skb)->source); + tcp_hdr(skb)->source, tsoff); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -237,7 +237,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, inet->inet_daddr, inet->inet_sport, - usin->sin_port); + usin->sin_port, + &tp->tsoffset); inet->inet_id = tp->write_seq ^ jiffies; @@ -824,7 +825,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp, + tcp_time_stamp + tcp_rsk(req)->ts_off, req->ts_recent, 0, tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6234ebaa7db1..28ce5ee831f5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -532,7 +532,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rx_opt.ts_recent_stamp = 0; newtp->tcp_header_len = sizeof(struct tcphdr); } - newtp->tsoffset = 0; + newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ if (newtp->af_specific->md5_lookup(sk, newsk)) @@ -581,6 +581,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; + if (tmp_opt.rcv_tsecr) + tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off; /* We do not store true stamp, but it is not required, * it can be estimated (approximately) * from another data. diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d3545d0cff75..c7adcb57654e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -640,7 +640,7 @@ static unsigned int tcp_synack_options(struct request_sock *req, } if (likely(ireq->tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = tcp_skb_timestamp(skb); + opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off; opts->tsecr = req->ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 97830a6a9cbb..a4d49760bf43 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -209,6 +209,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack.v64 = 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; + treq->ts_off = 0; /* * We need to lookup the dst_entry to get the correct window size. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 28ec0a2e7b72..a2185a214abc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -101,12 +101,12 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) } } -static __u32 tcp_v6_init_sequence(const struct sk_buff *skb) +static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff) { return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, tcp_hdr(skb)->dest, - tcp_hdr(skb)->source); + tcp_hdr(skb)->source, tsoff); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, @@ -283,7 +283,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, inet->inet_sport, - inet->inet_dport); + inet->inet_dport, + &tp->tsoffset); err = tcp_connect(sk); if (err) @@ -956,7 +957,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, + tcp_time_stamp + tcp_rsk(req)->ts_off, + req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); } -- cgit v1.2.3 From 55330f05969437c5d22fcc2ae2e54810b5236b7b Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 1 Dec 2016 14:06:33 +0200 Subject: net/sched: Add separate check for skip_hw flag Creating a difference between two possible cases: 1. Not offloading tc rule since the user sets 'skip_hw' flag. 2. Not offloading tc rule since the device doesn't support offloading. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 767b03a3fe67..45ad9aab9bba 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -425,16 +425,14 @@ struct tc_cls_u32_offload { }; }; -static inline bool tc_should_offload(const struct net_device *dev, - const struct tcf_proto *tp, u32 flags) +static inline bool tc_can_offload(const struct net_device *dev, + const struct tcf_proto *tp) { const struct Qdisc *sch = tp->q; const struct Qdisc_class_ops *cops = sch->ops->cl_ops; if (!(dev->features & NETIF_F_HW_TC)) return false; - if (flags & TCA_CLS_FLAGS_SKIP_HW) - return false; if (!dev->netdev_ops->ndo_setup_tc) return false; if (cops && cops->tcf_cl_offload) @@ -443,6 +441,19 @@ static inline bool tc_should_offload(const struct net_device *dev, return true; } +static inline bool tc_skip_hw(u32 flags) +{ + return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false; +} + +static inline bool tc_should_offload(const struct net_device *dev, + const struct tcf_proto *tp, u32 flags) +{ + if (tc_skip_hw(flags)) + return false; + return tc_can_offload(dev, tp); +} + static inline bool tc_skip_sw(u32 flags) { return (flags & TCA_CLS_FLAGS_SKIP_SW) ? true : false; -- cgit v1.2.3 From 255cb30425c0ced57d6d85f3e7cddb99b9576046 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 1 Dec 2016 14:06:36 +0200 Subject: net/sched: act_mirred: Add new tc_action_ops get_dev() Adding support to a new tc_action_ops. get_dev is a general option which allows to get the underline device when trying to offload a tc rule. In case of mirred action the returned device is the mirred (egress) device. Signed-off-by: Hadar Hen Zion Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/act_api.h | 2 ++ net/sched/act_mirred.c | 12 ++++++++++++ 2 files changed, 14 insertions(+) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index d8eae87ea778..9dddf77a69cc 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -119,6 +119,8 @@ struct tc_action_ops { int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, const struct tc_action_ops *); void (*stats_update)(struct tc_action *, u64, u32, u64); + int (*get_dev)(const struct tc_action *a, struct net *net, + struct net_device **mirred_dev); }; struct tc_action_net { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1af7baa732a3..bb09ba3ca5c2 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -315,6 +315,17 @@ static struct notifier_block mirred_device_notifier = { .notifier_call = mirred_device_event, }; +static int tcf_mirred_device(const struct tc_action *a, struct net *net, + struct net_device **mirred_dev) +{ + int ifindex = tcf_mirred_ifindex(a); + + *mirred_dev = __dev_get_by_index(net, ifindex); + if (!mirred_dev) + return -EINVAL; + return 0; +} + static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .type = TCA_ACT_MIRRED, @@ -327,6 +338,7 @@ static struct tc_action_ops act_mirred_ops = { .walk = tcf_mirred_walker, .lookup = tcf_mirred_search, .size = sizeof(struct tcf_mirred), + .get_dev = tcf_mirred_device, }; static __net_init int mirred_init_net(struct net *net) -- cgit v1.2.3 From 7091d8c7055d7310339435ae3af2fb490a92524d Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 1 Dec 2016 14:06:37 +0200 Subject: net/sched: cls_flower: Add offload support using egress Hardware device In order to support hardware offloading when the device given by the tc rule is different from the Hardware underline device, extract the mirred (egress) device from the tc action when a filter is added, using the new tc_action_ops, get_dev(). Flower caches the information about the mirred device and use it for calling ndo_setup_tc in filter change, update stats and delete. Calling ndo_setup_tc of the mirred (egress) device instead of the ingress device will allow a resolution between the software ingress device and the underline hardware device. The resolution will take place inside the offloading driver using 'egress_device' flag added to tc_to_netdev struct which is provided to the offloading driver. Signed-off-by: Hadar Hen Zion Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/pkt_cls.h | 2 ++ net/sched/cls_api.c | 24 ++++++++++++++++++++++++ net/sched/cls_flower.c | 41 ++++++++++++++++++++++++----------------- 4 files changed, 51 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3755317cc6a9..1ff5ea6e1221 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -802,6 +802,7 @@ struct tc_to_netdev { struct tc_cls_matchall_offload *cls_mall; struct tc_cls_bpf_offload *cls_bpf; }; + bool egress_dev; }; /* These structures hold the attributes of xdp state that are being passed diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 45ad9aab9bba..f0a051480c6c 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -171,6 +171,8 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, struct tcf_exts *src); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); +int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, + struct net_device **hw_dev); /** * struct tcf_pkt_info - packet information diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b05d4a2155b0..3fbba79a4ef0 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -682,6 +682,30 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_dump_stats); +int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, + struct net_device **hw_dev) +{ +#ifdef CONFIG_NET_CLS_ACT + const struct tc_action *a; + LIST_HEAD(actions); + + if (tc_no_actions(exts)) + return -EINVAL; + + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { + if (a->ops->get_dev) { + a->ops->get_dev(a, dev_net(dev), hw_dev); + break; + } + } + if (*hw_dev) + return 0; +#endif + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(tcf_exts_get_dev); + static int __init tc_filter_init(void) { rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 13b349f426a7..1cacfa5c95f3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -78,6 +78,8 @@ struct cls_fl_filter { u32 handle; u32 flags; struct rcu_head rcu; + struct tc_to_netdev tc; + struct net_device *hw_dev; }; static unsigned short int fl_mask_range(const struct fl_flow_mask *mask) @@ -203,9 +205,9 @@ static void fl_destroy_filter(struct rcu_head *head) static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) { - struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_flower_offload offload = {0}; - struct tc_to_netdev tc; + struct net_device *dev = f->hw_dev; + struct tc_to_netdev *tc = &f->tc; if (!tc_can_offload(dev, tp)) return; @@ -213,10 +215,10 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) offload.command = TC_CLSFLOWER_DESTROY; offload.cookie = (unsigned long)f; - tc.type = TC_SETUP_CLSFLOWER; - tc.cls_flower = &offload; + tc->type = TC_SETUP_CLSFLOWER; + tc->cls_flower = &offload; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc); } static int fl_hw_replace_filter(struct tcf_proto *tp, @@ -226,11 +228,17 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_flower_offload offload = {0}; - struct tc_to_netdev tc; + struct tc_to_netdev *tc = &f->tc; int err; - if (!tc_can_offload(dev, tp)) - return tc_skip_sw(f->flags) ? -EINVAL : 0; + if (!tc_can_offload(dev, tp)) { + if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev)) + return tc_skip_sw(f->flags) ? -EINVAL : 0; + dev = f->hw_dev; + tc->egress_dev = true; + } else { + f->hw_dev = dev; + } offload.command = TC_CLSFLOWER_REPLACE; offload.cookie = (unsigned long)f; @@ -239,23 +247,22 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, offload.key = &f->key; offload.exts = &f->exts; - tc.type = TC_SETUP_CLSFLOWER; - tc.cls_flower = &offload; + tc->type = TC_SETUP_CLSFLOWER; + tc->cls_flower = &offload; err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, - &tc); + tc); if (tc_skip_sw(f->flags)) return err; - return 0; } static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) { - struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_flower_offload offload = {0}; - struct tc_to_netdev tc; + struct net_device *dev = f->hw_dev; + struct tc_to_netdev *tc = &f->tc; if (!tc_can_offload(dev, tp)) return; @@ -264,10 +271,10 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) offload.cookie = (unsigned long)f; offload.exts = &f->exts; - tc.type = TC_SETUP_CLSFLOWER; - tc.cls_flower = &offload; + tc->type = TC_SETUP_CLSFLOWER; + tc->cls_flower = &offload; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); + dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc); } static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) -- cgit v1.2.3 From aa4c1037a30f4e88f444e83d42c2befbe0d5caf5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 1 Dec 2016 08:48:06 -0800 Subject: bpf: Add support for reading socket family, type, protocol Add socket family, type and protocol to bpf_sock allowing bpf programs read-only access. Add __sk_flags_offset[0] to struct sock before the bitfield to programmtically determine the offset of the unsigned int containing protocol and type. Signed-off-by: David Ahern Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/sock.h | 15 +++++++++++++++ include/uapi/linux/bpf.h | 3 +++ net/core/filter.c | 21 +++++++++++++++++++++ 3 files changed, 39 insertions(+) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 442cbb118a07..69afda6bea15 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -389,6 +389,21 @@ struct sock { * Because of non atomicity rules, all * changes are protected by socket lock. */ + unsigned int __sk_flags_offset[0]; +#ifdef __BIG_ENDIAN_BITFIELD +#define SK_FL_PROTO_SHIFT 16 +#define SK_FL_PROTO_MASK 0x00ff0000 + +#define SK_FL_TYPE_SHIFT 0 +#define SK_FL_TYPE_MASK 0x0000ffff +#else +#define SK_FL_PROTO_SHIFT 8 +#define SK_FL_PROTO_MASK 0x0000ff00 + +#define SK_FL_TYPE_SHIFT 16 +#define SK_FL_TYPE_MASK 0xffff0000 +#endif + kmemcheck_bitfield_begin(flags); unsigned int sk_padding : 2, sk_no_check_tx : 1, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bfe5e31a1288..6123d9b8e828 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -571,6 +571,9 @@ enum bpf_ret_code { struct bpf_sock { __u32 bound_dev_if; + __u32 family; + __u32 type; + __u32 protocol; }; /* User return codes for XDP prog type. diff --git a/net/core/filter.c b/net/core/filter.c index 0ab252e462aa..56b43587d200 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3121,6 +3121,27 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, offsetof(struct sock, sk_bound_dev_if)); break; + + case offsetof(struct bpf_sock, family): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sock, sk_family)); + break; + + case offsetof(struct bpf_sock, type): + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sock, __sk_flags_offset)); + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_TYPE_MASK); + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_TYPE_SHIFT); + break; + + case offsetof(struct bpf_sock, protocol): + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sock, __sk_flags_offset)); + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_PROTO_MASK); + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_PROTO_SHIFT); + break; } return insn - insn_buf; -- cgit v1.2.3 From 4f7df337fe79bba1e4c2d525525d63b5ba186bbd Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 2 Dec 2016 03:59:06 +0300 Subject: netlink: 2-clause nla_ok() nla_ok() consists of 3 clauses: 1) int rem >= (int)sizeof(struct nlattr) 2) u16 nla_len >= sizeof(struct nlattr) 3) u16 nla_len <= int rem The statement is that clause (1) is redundant. What it does is ensuring that "rem" is a positive number, so that in clause (3) positive number will be compared to positive number with no problems. However, "u16" fully fits into "int" and integers do not change value when upcasting even to signed type. Negative integers will be rejected by clause (3) just fine. Small positive integers will be rejected by transitivity of comparison operator. NOTE: all of the above DOES NOT apply to nlmsg_ok() where ->nlmsg_len is u32(!), so 3 clauses AND A CAST TO INT are necessary. Obligatory space savings report: -1.6 KB $ ./scripts/bloat-o-meter ../vmlinux-000* ../vmlinux-001* add/remove: 0/0 grow/shrink: 3/63 up/down: 35/-1692 (-1657) function old new delta validate_scan_freqs 142 155 +13 tcf_em_tree_validate 867 879 +12 dcbnl_ieee_del 328 338 +10 netlbl_cipsov4_add_common.isra 218 215 -3 ... ovs_nla_put_actions 888 806 -82 netlbl_cipsov4_add_std 1648 1566 -82 nl80211_parse_sched_scan 2889 2780 -109 ip_tun_from_nlattr 3086 2945 -141 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netlink.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index d3938f11ae52..dd657a33f8c3 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -698,8 +698,7 @@ static inline int nla_len(const struct nlattr *nla) */ static inline int nla_ok(const struct nlattr *nla, int remaining) { - return remaining >= (int) sizeof(*nla) && - nla->nla_len >= sizeof(*nla) && + return nla->nla_len >= sizeof(*nla) && nla->nla_len <= remaining; } -- cgit v1.2.3 From 9bfc7b9969dbb800460e2577f1dea59336269ce4 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 2 Dec 2016 04:12:58 +0300 Subject: netns: add dummy struct inside "struct net_generic" This is precursor to fixing "[id - 1]" bloat inside net_generic(). Name "s" is chosen to complement name "u" often used for dummy unions. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/generic.h | 6 ++++-- net/core/net_namespace.c | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h index d315786bcfd7..65ccce69b040 100644 --- a/include/net/netns/generic.h +++ b/include/net/netns/generic.h @@ -25,8 +25,10 @@ */ struct net_generic { - unsigned int len; - struct rcu_head rcu; + struct { + unsigned int len; + struct rcu_head rcu; + } s; void *ptr[0]; }; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 707b3c25cb80..55e28a8d0604 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -50,7 +50,7 @@ static struct net_generic *net_alloc_generic(void) ng = kzalloc(generic_size, GFP_KERNEL); if (ng) - ng->len = max_gen_ptrs; + ng->s.len = max_gen_ptrs; return ng; } @@ -64,7 +64,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) old_ng = rcu_dereference_protected(net->gen, lockdep_is_held(&net_mutex)); - if (old_ng->len >= id) { + if (old_ng->s.len >= id) { old_ng->ptr[id - 1] = data; return 0; } @@ -84,11 +84,11 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) * the old copy for kfree after a grace period. */ - memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); + memcpy(&ng->ptr, &old_ng->ptr, old_ng->s.len * sizeof(void*)); ng->ptr[id - 1] = data; rcu_assign_pointer(net->gen, ng); - kfree_rcu(old_ng, rcu); + kfree_rcu(old_ng, s.rcu); return 0; } -- cgit v1.2.3 From 6af2d5fff2fdcd481cb9a4f354a0880142b17c60 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 2 Dec 2016 04:21:32 +0300 Subject: netns: fix net_generic() "id - 1" bloat net_generic() function is both a) inline and b) used ~600 times. It has the following code inside ... ptr = ng->ptr[id - 1]; ... "id" is never compile time constant so compiler is forced to subtract 1. And those decrements or LEA [r32 - 1] instructions add up. We also start id'ing from 1 to catch bugs where pernet sybsystem id is not initialized and 0. This is quite pointless idea (nothing will work or immediate interference with first registered subsystem) in general but it hints what needs to be done for code size reduction. Namely, overlaying allocation of pointer array and fixed part of structure in the beginning and using usual base-0 addressing. Ids are just cookies, their exact values do not matter, so lets start with 3 on x86_64. Code size savings (oh boy): -4.2 KB As usual, ignore the initial compiler stupidity part of the table. add/remove: 0/0 grow/shrink: 12/670 up/down: 89/-4297 (-4208) function old new delta tipc_nametbl_insert_publ 1250 1270 +20 nlmclnt_lookup_host 686 703 +17 nfsd4_encode_fattr 5930 5941 +11 nfs_get_client 1050 1061 +11 register_pernet_operations 333 342 +9 tcf_mirred_init 843 849 +6 tcf_bpf_init 1143 1149 +6 gss_setup_upcall 990 994 +4 idmap_name_to_id 432 434 +2 ops_init 274 275 +1 nfsd_inject_forget_client 259 260 +1 nfs4_alloc_client 612 613 +1 tunnel_key_walker 164 163 -1 ... tipc_bcbase_select_primary 392 360 -32 mac80211_hwsim_new_radio 2808 2767 -41 ipip6_tunnel_ioctl 2228 2186 -42 tipc_bcast_rcv 715 672 -43 tipc_link_build_proto_msg 1140 1089 -51 nfsd4_lock 3851 3796 -55 tipc_mon_rcv 1012 956 -56 Total: Before=156643951, After=156639743, chg -0.00% Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/generic.h | 16 +++++++++------- net/core/net_namespace.c | 20 ++++++++++++-------- 2 files changed, 21 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h index 65ccce69b040..f15daaa89385 100644 --- a/include/net/netns/generic.h +++ b/include/net/netns/generic.h @@ -25,12 +25,14 @@ */ struct net_generic { - struct { - unsigned int len; - struct rcu_head rcu; - } s; - - void *ptr[0]; + union { + struct { + unsigned int len; + struct rcu_head rcu; + } s; + + void *ptr[0]; + }; }; static inline void *net_generic(const struct net *net, unsigned int id) @@ -40,7 +42,7 @@ static inline void *net_generic(const struct net *net, unsigned int id) rcu_read_lock(); ng = rcu_dereference(net->gen); - ptr = ng->ptr[id - 1]; + ptr = ng->ptr[id]; rcu_read_unlock(); return ptr; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 55e28a8d0604..50fdc1b59777 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -39,6 +39,9 @@ EXPORT_SYMBOL(init_net); static bool init_net_initialized; +#define MIN_PERNET_OPS_ID \ + ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) + #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; @@ -46,7 +49,7 @@ static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; static struct net_generic *net_alloc_generic(void) { struct net_generic *ng; - size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); + unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); ng = kzalloc(generic_size, GFP_KERNEL); if (ng) @@ -60,12 +63,12 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) struct net_generic *ng, *old_ng; BUG_ON(!mutex_is_locked(&net_mutex)); - BUG_ON(id == 0); + BUG_ON(id < MIN_PERNET_OPS_ID); old_ng = rcu_dereference_protected(net->gen, lockdep_is_held(&net_mutex)); - if (old_ng->s.len >= id) { - old_ng->ptr[id - 1] = data; + if (old_ng->s.len > id) { + old_ng->ptr[id] = data; return 0; } @@ -84,8 +87,9 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) * the old copy for kfree after a grace period. */ - memcpy(&ng->ptr, &old_ng->ptr, old_ng->s.len * sizeof(void*)); - ng->ptr[id - 1] = data; + memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID], + (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *)); + ng->ptr[id] = data; rcu_assign_pointer(net->gen, ng); kfree_rcu(old_ng, s.rcu); @@ -874,7 +878,7 @@ static int register_pernet_operations(struct list_head *list, if (ops->id) { again: - error = ida_get_new_above(&net_generic_ids, 1, ops->id); + error = ida_get_new_above(&net_generic_ids, MIN_PERNET_OPS_ID, ops->id); if (error < 0) { if (error == -EAGAIN) { ida_pre_get(&net_generic_ids, GFP_KERNEL); @@ -882,7 +886,7 @@ again: } return error; } - max_gen_ptrs = max(max_gen_ptrs, *ops->id); + max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); } error = __register_pernet_operations(list, ops); if (error) { -- cgit v1.2.3 From 1c677b3d2828a84e0360e1a07d1204531540dc03 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sat, 3 Dec 2016 16:44:59 +0100 Subject: ipv4: fib: Add fib_info_hold() helper As explained in the previous commit, modules are going to need to take a reference on fib info and then drop it using fib_info_put(). Add the fib_info_hold() helper to make the code more readable and also symmetric with fib_info_put(). Signed-off-by: Ido Schimmel Suggested-by: Jiri Pirko Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip_fib.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f390c3bb05c5..6c67b9391fc0 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -397,6 +397,11 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) void free_fib_info(struct fib_info *fi); +static inline void fib_info_hold(struct fib_info *fi) +{ + atomic_inc(&fi->fib_clntref); +} + static inline void fib_info_put(struct fib_info *fi) { if (atomic_dec_and_test(&fi->fib_clntref)) -- cgit v1.2.3 From cacaad11f43aefbbe5fca00af3b9c16e6aee1ba4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sat, 3 Dec 2016 16:45:06 +0100 Subject: ipv4: fib: Allow for consistent FIB dumping The next patch will enable listeners of the FIB notification chain to request a dump of the FIB tables. However, since RTNL isn't taken during the dump, it's possible for the FIB tables to change mid-dump, which will result in inconsistency between the listener's table and the kernel's. Allow listeners to know about changes that occurred mid-dump, by adding a change sequence counter to each net namespace. The counter is incremented just before a notification is sent in the FIB chain. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 3 +++ net/ipv4/fib_frontend.c | 2 ++ net/ipv4/fib_trie.c | 1 + 3 files changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7adf4386ac8f..f0cf5a1b777e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -135,6 +135,9 @@ struct netns_ipv4 { #ifdef CONFIG_IP_ROUTE_MULTIPATH int sysctl_fib_multipath_use_neigh; #endif + + unsigned int fib_seq; /* protected by rtnl_mutex */ + atomic_t rt_genid; }; #endif diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 121384bbb40b..dbad5a1c161a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1219,6 +1219,8 @@ static int __net_init ip_fib_net_init(struct net *net) int err; size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ; + net->ipv4.fib_seq = 0; + /* Avoid false sharing : Use at least a full cache line */ size = max_t(size_t, size, L1_CACHE_BYTES); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 9bfce0df20dc..28913563e7cd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -101,6 +101,7 @@ EXPORT_SYMBOL(unregister_fib_notifier); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info) { + net->ipv4.fib_seq++; info->net = net; return atomic_notifier_call_chain(&fib_chain, event_type, info); } -- cgit v1.2.3 From c3852ef7f2f8f75a9f85a864bec1f6f5a3068eea Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sat, 3 Dec 2016 16:45:07 +0100 Subject: ipv4: fib: Replay events when registering FIB notifier Commit b90eb7549499 ("fib: introduce FIB notification infrastructure") introduced a new notification chain to notify listeners (f.e., switchdev drivers) about addition and deletion of routes. However, upon registration to the chain the FIB tables can already be populated, which means potential listeners will have an incomplete view of the tables. Solve that by dumping the FIB tables and replaying the events to the passed notification block. The dump itself is done using RCU in order not to starve consumers that need RTNL to make progress. The integrity of the dump is ensured by reading the FIB change sequence counter before and after the dump under RTNL. This allows us to avoid the problematic situation in which the dumping process sends a ENTRY_ADD notification following ENTRY_DEL generated by another process holding RTNL. Callers of the registration function may pass a callback that is executed in case the dump was inconsistent with current FIB tables. The number of retries until a consistent dump is achieved is set to a fixed number to prevent callers from looping for long periods of time. In case current limit proves to be problematic in the future, it can be easily converted to be configurable using a sysctl. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 20 ++- drivers/net/ethernet/rocker/rocker_main.c | 8 +- include/net/ip_fib.h | 3 +- net/ipv4/fib_trie.c | 148 ++++++++++++++++++++- 4 files changed, 174 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 14bed1d10b72..53126bf68ea9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2027,6 +2027,18 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, return NOTIFY_DONE; } +static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) +{ + struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); + + /* Flush pending FIB notifications and then flush the device's + * table before requesting another dump. The FIB notification + * block is unregistered, so no need to take RTNL. + */ + mlxsw_core_flush_owq(); + mlxsw_sp_router_fib_flush(mlxsw_sp); +} + int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { int err; @@ -2047,9 +2059,15 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) goto err_neigh_init; mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event; - register_fib_notifier(&mlxsw_sp->fib_nb); + err = register_fib_notifier(&mlxsw_sp->fib_nb, + mlxsw_sp_router_fib_dump_flush); + if (err) + goto err_register_fib_notifier; + return 0; +err_register_fib_notifier: + mlxsw_sp_neigh_fini(mlxsw_sp); err_neigh_init: mlxsw_sp_vrs_fini(mlxsw_sp); err_vrs_init: diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 8c9c90ae8962..7c450b5a1138 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2804,8 +2804,13 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_alloc_ordered_workqueue; } + /* Only FIBs pointing to our own netdevs are programmed into + * the device, so no need to pass a callback. + */ rocker->fib_nb.notifier_call = rocker_router_fib_event; - register_fib_notifier(&rocker->fib_nb); + err = register_fib_notifier(&rocker->fib_nb, NULL); + if (err) + goto err_register_fib_notifier; rocker->hw.id = rocker_read64(rocker, SWITCH_ID); @@ -2822,6 +2827,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_probe_ports: unregister_fib_notifier(&rocker->fib_nb); +err_register_fib_notifier: destroy_workqueue(rocker->rocker_owq); err_alloc_ordered_workqueue: free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 6c67b9391fc0..5f376af377c7 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -221,7 +221,8 @@ enum fib_event_type { FIB_EVENT_RULE_DEL, }; -int register_fib_notifier(struct notifier_block *nb); +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)); int unregister_fib_notifier(struct notifier_block *nb); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 28913563e7cd..73a62700b00a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -84,11 +84,99 @@ #include #include "fib_lookup.h" +static unsigned int fib_seq_sum(void) +{ + unsigned int fib_seq = 0; + struct net *net; + + rtnl_lock(); + for_each_net(net) + fib_seq += net->ipv4.fib_seq; + rtnl_unlock(); + + return fib_seq; +} + static ATOMIC_NOTIFIER_HEAD(fib_chain); -int register_fib_notifier(struct notifier_block *nb) +static int call_fib_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info) { - return atomic_notifier_chain_register(&fib_chain, nb); + info->net = net; + return nb->notifier_call(nb, event_type, info); +} + +static void fib_rules_notify(struct net *net, struct notifier_block *nb, + enum fib_event_type event_type) +{ +#ifdef CONFIG_IP_MULTIPLE_TABLES + struct fib_notifier_info info; + + if (net->ipv4.fib_has_custom_rules) + call_fib_notifier(nb, net, event_type, &info); +#endif +} + +static void fib_notify(struct net *net, struct notifier_block *nb, + enum fib_event_type event_type); + +static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, u32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id, u32 nlflags) +{ + struct fib_entry_notifier_info info = { + .dst = dst, + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .tb_id = tb_id, + .nlflags = nlflags, + }; + return call_fib_notifier(nb, net, event_type, &info.info); +} + +static bool fib_dump_is_consistent(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb), + unsigned int fib_seq) +{ + atomic_notifier_chain_register(&fib_chain, nb); + if (fib_seq == fib_seq_sum()) + return true; + atomic_notifier_chain_unregister(&fib_chain, nb); + if (cb) + cb(nb); + return false; +} + +#define FIB_DUMP_MAX_RETRIES 5 +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)) +{ + int retries = 0; + + do { + unsigned int fib_seq = fib_seq_sum(); + struct net *net; + + /* Mutex semantics guarantee that every change done to + * FIB tries before we read the change sequence counter + * is now visible to us. + */ + rcu_read_lock(); + for_each_net_rcu(net) { + fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD); + fib_notify(net, nb, FIB_EVENT_ENTRY_ADD); + } + rcu_read_unlock(); + + if (fib_dump_is_consistent(nb, cb, fib_seq)) + return 0; + } while (++retries < FIB_DUMP_MAX_RETRIES); + + return -EBUSY; } EXPORT_SYMBOL(register_fib_notifier); @@ -1902,6 +1990,62 @@ int fib_table_flush(struct net *net, struct fib_table *tb) return found; } +static void fib_leaf_notify(struct net *net, struct key_vector *l, + struct fib_table *tb, struct notifier_block *nb, + enum fib_event_type event_type) +{ + struct fib_alias *fa; + + hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + + if (!fi) + continue; + + /* local and main table can share the same trie, + * so don't notify twice for the same entry. + */ + if (tb->tb_id != fa->tb_id) + continue; + + call_fib_entry_notifier(nb, net, event_type, l->key, + KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, + fa->fa_type, fa->tb_id, 0); + } +} + +static void fib_table_notify(struct net *net, struct fib_table *tb, + struct notifier_block *nb, + enum fib_event_type event_type) +{ + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *l, *tp = t->kv; + t_key key = 0; + + while ((l = leaf_walk_rcu(&tp, key)) != NULL) { + fib_leaf_notify(net, l, tb, nb, event_type); + + key = l->key + 1; + /* stop in case of wrap around */ + if (key < l->key) + break; + } +} + +static void fib_notify(struct net *net, struct notifier_block *nb, + enum fib_event_type event_type) +{ + unsigned int h; + + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + struct hlist_head *head = &net->ipv4.fib_table_hash[h]; + struct fib_table *tb; + + hlist_for_each_entry_rcu(tb, head, tb_hlist) + fib_table_notify(net, tb, nb, event_type); + } +} + static void __trie_free_rcu(struct rcu_head *head) { struct fib_table *tb = container_of(head, struct fib_table, rcu); -- cgit v1.2.3 From adc176c5472214971d77c1a61c83db9b01e9cdc7 Mon Sep 17 00:00:00 2001 From: Erik Nordmark Date: Fri, 2 Dec 2016 14:00:08 -0800 Subject: ipv6 addrconf: Implemented enhanced DAD (RFC7527) Implemented RFC7527 Enhanced DAD. IPv6 duplicate address detection can fail if there is some temporary loopback of Ethernet frames. RFC7527 solves this by including a random nonce in the NS messages used for DAD, and if an NS is received with the same nonce it is assumed to be a looped back DAD probe and is ignored. RFC7527 is enabled by default. Can be disabled by setting both of conf/{all,interface}/enhanced_dad to zero. Signed-off-by: Erik Nordmark Signed-off-by: Bob Gilligan Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 9 +++++++++ include/linux/ipv6.h | 1 + include/net/if_inet6.h | 1 + include/net/ndisc.h | 5 ++++- include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 22 +++++++++++++++++++++- net/ipv6/ndisc.c | 29 ++++++++++++++++++++++++++--- net/ipv6/route.c | 2 +- 8 files changed, 64 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5ca567fa6b8c..7dd65c9cf707 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1734,6 +1734,15 @@ drop_unsolicited_na - BOOLEAN By default this is turned off. +enhanced_dad - BOOLEAN + Include a nonce option in the IPv6 neighbor solicitation messages used for + duplicate address detection per RFC7527. A received DAD NS will only signal + a duplicate address if the nonce is different. This avoids any false + detection of duplicates due to loopback of the NS messages that we send. + The nonce option will be sent on an interface unless both of + conf/{all,interface}/enhanced_dad are set to FALSE. + Default: TRUE + icmp/*: ratelimit - INTEGER Limit the maximal rates for sending ICMPv6 packets. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 3f95233b2733..671d014e6429 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -68,6 +68,7 @@ struct ipv6_devconf { #ifdef CONFIG_IPV6_SEG6_HMAC __s32 seg6_require_hmac; #endif + __u32 enhanced_dad; struct ctl_table_header *sysctl_header; }; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index b0576cb2ab25..0fa4c324b713 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -55,6 +55,7 @@ struct inet6_ifaddr { __u8 stable_privacy_retry; __u16 scope; + __u64 dad_nonce; unsigned long cstamp; /* created timestamp */ unsigned long tstamp; /* updated timestamp */ diff --git a/include/net/ndisc.h b/include/net/ndisc.h index be1fe2283254..d562a2fe4860 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -31,6 +31,7 @@ enum { ND_OPT_PREFIX_INFO = 3, /* RFC2461 */ ND_OPT_REDIRECT_HDR = 4, /* RFC2461 */ ND_OPT_MTU = 5, /* RFC2461 */ + ND_OPT_NONCE = 14, /* RFC7527 */ __ND_OPT_ARRAY_MAX, ND_OPT_ROUTE_INFO = 24, /* RFC4191 */ ND_OPT_RDNSS = 25, /* RFC5006 */ @@ -121,6 +122,7 @@ struct ndisc_options { #define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END] #define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR] #define nd_opts_mtu nd_opt_array[ND_OPT_MTU] +#define nd_opts_nonce nd_opt_array[ND_OPT_NONCE] #define nd_802154_opts_src_lladdr nd_802154_opt_array[ND_OPT_SOURCE_LL_ADDR] #define nd_802154_opts_tgt_lladdr nd_802154_opt_array[ND_OPT_TARGET_LL_ADDR] @@ -398,7 +400,8 @@ void ndisc_cleanup(void); int ndisc_rcv(struct sk_buff *skb); void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr); + const struct in6_addr *daddr, const struct in6_addr *saddr, + u64 nonce); void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 53561be1ac21..eaf65dc82e22 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -181,6 +181,7 @@ enum { DEVCONF_RTR_SOLICIT_MAX_INTERVAL, DEVCONF_SEG6_ENABLED, DEVCONF_SEG6_REQUIRE_HMAC, + DEVCONF_ENHANCED_DAD, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4c387dc338e3..c1e124bc8e1e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -242,6 +242,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { #ifdef CONFIG_IPV6_SEG6_HMAC .seg6_require_hmac = 0, #endif + .enhanced_dad = 1, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -292,6 +293,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { #ifdef CONFIG_IPV6_SEG6_HMAC .seg6_require_hmac = 0, #endif + .enhanced_dad = 1, }; /* Check if a valid qdisc is available */ @@ -3735,12 +3737,21 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) { unsigned long rand_num; struct inet6_dev *idev = ifp->idev; + u64 nonce; if (ifp->flags & IFA_F_OPTIMISTIC) rand_num = 0; else rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1); + nonce = 0; + if (idev->cnf.enhanced_dad || + dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad) { + do + get_random_bytes(&nonce, 6); + while (nonce == 0); + } + ifp->dad_nonce = nonce; ifp->dad_probes = idev->cnf.dad_transmits; addrconf_mod_dad_work(ifp, rand_num); } @@ -3918,7 +3929,8 @@ static void addrconf_dad_work(struct work_struct *w) /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); - ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any); + ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any, + ifp->dad_nonce); out: in6_ifa_put(ifp); rtnl_unlock(); @@ -4962,6 +4974,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, #ifdef CONFIG_IPV6_SEG6_HMAC array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac; #endif + array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; } static inline size_t inet6_ifla6_size(void) @@ -6069,6 +6082,13 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_dointvec, }, #endif + { + .procname = "enhanced_dad", + .data = &ipv6_devconf.enhanced_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { /* sentinel */ } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d8e671457d10..7ebac630d3c6 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -233,6 +233,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, case ND_OPT_SOURCE_LL_ADDR: case ND_OPT_TARGET_LL_ADDR: case ND_OPT_MTU: + case ND_OPT_NONCE: case ND_OPT_REDIRECT_HDR: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { ND_PRINTK(2, warn, @@ -568,7 +569,8 @@ static void ndisc_send_unsol_na(struct net_device *dev) } void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr) + const struct in6_addr *daddr, const struct in6_addr *saddr, + u64 nonce) { struct sk_buff *skb; struct in6_addr addr_buf; @@ -588,6 +590,8 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, if (inc_opt) optlen += ndisc_opt_addr_space(dev, NDISC_NEIGHBOUR_SOLICITATION); + if (nonce != 0) + optlen += 8; skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) @@ -605,6 +609,13 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, NDISC_NEIGHBOUR_SOLICITATION); + if (nonce != 0) { + u8 *opt = skb_put(skb, 8); + + opt[0] = ND_OPT_NONCE; + opt[1] = 8 >> 3; + memcpy(opt + 2, &nonce, 6); + } ndisc_send_skb(skb, daddr, saddr); } @@ -693,12 +704,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } - ndisc_send_ns(dev, target, target, saddr); + ndisc_send_ns(dev, target, target, saddr, 0); } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { neigh_app_ns(neigh); } else { addrconf_addr_solict_mult(target, &mcaddr); - ndisc_send_ns(dev, target, &mcaddr, saddr); + ndisc_send_ns(dev, target, &mcaddr, saddr, 0); } } @@ -742,6 +753,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) int dad = ipv6_addr_any(saddr); bool inc; int is_router = -1; + u64 nonce = 0; if (skb->len < sizeof(struct nd_msg)) { ND_PRINTK(2, warn, "NS: packet too short\n"); @@ -786,6 +798,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) return; } } + if (ndopts.nd_opts_nonce) + memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6); inc = ipv6_addr_is_multicast(daddr); @@ -794,6 +808,15 @@ static void ndisc_recv_ns(struct sk_buff *skb) have_ifp: if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { if (dad) { + if (nonce != 0 && ifp->dad_nonce == nonce) { + u8 *np = (u8 *)&nonce; + /* Matching nonce if looped back */ + ND_PRINTK(2, notice, + "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n", + ifp->idev->dev->name, + &ifp->addr, np); + goto out; + } /* * We are colliding with another node * who is doing DAD diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b317bb135ed4..aac7818e2e0f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -527,7 +527,7 @@ static void rt6_probe_deferred(struct work_struct *w) container_of(w, struct __rt6_probe_work, work); addrconf_addr_solict_mult(&work->target, &mcaddr); - ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL); + ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0); dev_put(work->dev); kfree(work); } -- cgit v1.2.3 From 0c4e966eafff8253bec545d8c27b9efa231c1f62 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 20 Oct 2016 18:33:01 +0200 Subject: netfilter: built-in NAT support for DCCP CONFIG_NF_NAT_PROTO_DCCP is no more a tristate. When set to y, NAT support for DCCP protocol is built-in into nf_nat.ko. footprint test: (nf_nat_proto_) | dccp || nf_nat --------------------------+--------++-------- no builtin | 409800 || 2241312 DCCP builtin | - || 2578968 Signed-off-by: Davide Caratti Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_nat_l4proto.h | 3 +++ net/netfilter/Kconfig | 2 +- net/netfilter/Makefile | 3 ++- net/netfilter/nf_nat_core.c | 4 ++++ net/netfilter/nf_nat_proto_dccp.c | 36 +--------------------------------- 5 files changed, 11 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h index 12f4cc841b6e..92b147be00ef 100644 --- a/include/net/netfilter/nf_nat_l4proto.h +++ b/include/net/netfilter/nf_nat_l4proto.h @@ -54,6 +54,9 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_udp; extern const struct nf_nat_l4proto nf_nat_l4proto_icmp; extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6; extern const struct nf_nat_l4proto nf_nat_l4proto_unknown; +#ifdef CONFIG_NF_NAT_PROTO_DCCP +extern const struct nf_nat_l4proto nf_nat_l4proto_dccp; +#endif bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 44410d30d461..13092e5cd245 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -384,7 +384,7 @@ config NF_NAT_NEEDED default y config NF_NAT_PROTO_DCCP - tristate + bool depends on NF_NAT && NF_CT_PROTO_DCCP default NF_NAT && NF_CT_PROTO_DCCP diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5bbf767672ec..9ea0c98e51e6 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -45,6 +45,8 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o +nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o + # generic transport layer logging obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o @@ -55,7 +57,6 @@ obj-$(CONFIG_NF_NAT) += nf_nat.o obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o # NAT protocols (nf_nat) -obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 5b9c884a452e..69b121d11275 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -682,6 +682,10 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) &nf_nat_l4proto_tcp); RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP], &nf_nat_l4proto_udp); +#ifdef CONFIG_NF_NAT_PROTO_DCCP + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP], + &nf_nat_l4proto_dccp); +#endif mutex_unlock(&nf_nat_proto_mutex); RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto); diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c index 15c47b246d0d..269fcd5dc34c 100644 --- a/net/netfilter/nf_nat_proto_dccp.c +++ b/net/netfilter/nf_nat_proto_dccp.c @@ -10,8 +10,6 @@ */ #include -#include -#include #include #include @@ -73,7 +71,7 @@ dccp_manip_pkt(struct sk_buff *skb, return true; } -static const struct nf_nat_l4proto nf_nat_l4proto_dccp = { +const struct nf_nat_l4proto nf_nat_l4proto_dccp = { .l4proto = IPPROTO_DCCP, .manip_pkt = dccp_manip_pkt, .in_range = nf_nat_l4proto_in_range, @@ -82,35 +80,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_dccp = { .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; - -static int __init nf_nat_proto_dccp_init(void) -{ - int err; - - err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_dccp); - if (err < 0) - goto err1; - err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_dccp); - if (err < 0) - goto err2; - return 0; - -err2: - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp); -err1: - return err; -} - -static void __exit nf_nat_proto_dccp_fini(void) -{ - nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_dccp); - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp); - -} - -module_init(nf_nat_proto_dccp_init); -module_exit(nf_nat_proto_dccp_fini); - -MODULE_AUTHOR("Patrick McHardy "); -MODULE_DESCRIPTION("DCCP NAT protocol helper"); -MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 7a2dd28c703408ef27d6fe6a4fcd7c58968ce3bf Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 20 Oct 2016 18:33:02 +0200 Subject: netfilter: built-in NAT support for SCTP CONFIG_NF_NAT_PROTO_SCTP is no more a tristate. When set to y, NAT support for SCTP protocol is built-in into nf_nat.ko. footprint test: (nf_nat_proto_) | sctp || nf_nat --------------------------+--------++-------- no builtin | 428344 || 2241312 SCTP builtin | - || 2597032 Signed-off-by: Davide Caratti Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_nat_l4proto.h | 3 +++ net/netfilter/Kconfig | 2 +- net/netfilter/Makefile | 2 +- net/netfilter/nf_nat_core.c | 4 ++++ net/netfilter/nf_nat_proto_sctp.c | 35 +--------------------------------- 5 files changed, 10 insertions(+), 36 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h index 92b147be00ef..2cbaf3856e21 100644 --- a/include/net/netfilter/nf_nat_l4proto.h +++ b/include/net/netfilter/nf_nat_l4proto.h @@ -57,6 +57,9 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_unknown; #ifdef CONFIG_NF_NAT_PROTO_DCCP extern const struct nf_nat_l4proto nf_nat_l4proto_dccp; #endif +#ifdef CONFIG_NF_NAT_PROTO_SCTP +extern const struct nf_nat_l4proto nf_nat_l4proto_sctp; +#endif bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 13092e5cd245..ad72edf1f6ec 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -394,7 +394,7 @@ config NF_NAT_PROTO_UDPLITE default NF_NAT && NF_CT_PROTO_UDPLITE config NF_NAT_PROTO_SCTP - tristate + bool default NF_NAT && NF_CT_PROTO_SCTP depends on NF_NAT && NF_CT_PROTO_SCTP select LIBCRC32C diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 9ea0c98e51e6..02ef6decf94d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -46,6 +46,7 @@ nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o +nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o # generic transport layer logging obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o @@ -58,7 +59,6 @@ obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o # NAT protocols (nf_nat) obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o -obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o # NAT helpers obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 69b121d11275..80858bd110cc 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -685,6 +685,10 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) #ifdef CONFIG_NF_NAT_PROTO_DCCP RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP], &nf_nat_l4proto_dccp); +#endif +#ifdef CONFIG_NF_NAT_PROTO_SCTP + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP], + &nf_nat_l4proto_sctp); #endif mutex_unlock(&nf_nat_proto_mutex); diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index cbc7ade1487b..2e14108ff697 100644 --- a/net/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -7,9 +7,7 @@ */ #include -#include #include -#include #include #include @@ -54,7 +52,7 @@ sctp_manip_pkt(struct sk_buff *skb, return true; } -static const struct nf_nat_l4proto nf_nat_l4proto_sctp = { +const struct nf_nat_l4proto nf_nat_l4proto_sctp = { .l4proto = IPPROTO_SCTP, .manip_pkt = sctp_manip_pkt, .in_range = nf_nat_l4proto_in_range, @@ -63,34 +61,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_sctp = { .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; - -static int __init nf_nat_proto_sctp_init(void) -{ - int err; - - err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_sctp); - if (err < 0) - goto err1; - err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_sctp); - if (err < 0) - goto err2; - return 0; - -err2: - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp); -err1: - return err; -} - -static void __exit nf_nat_proto_sctp_exit(void) -{ - nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_sctp); - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp); -} - -module_init(nf_nat_proto_sctp_init); -module_exit(nf_nat_proto_sctp_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SCTP NAT protocol helper"); -MODULE_AUTHOR("Patrick McHardy "); -- cgit v1.2.3 From b8ad652f9779976d0300ae199961e413859d5378 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 20 Oct 2016 18:33:03 +0200 Subject: netfilter: built-in NAT support for UDPlite CONFIG_NF_NAT_PROTO_UDPLITE is no more a tristate. When set to y, NAT support for UDPlite protocol is built-in into nf_nat.ko. footprint test: (nf_nat_proto_) |udplite || nf_nat --------------------------+--------++-------- no builtin | 408048 || 2241312 UDPLITE builtin | - || 2577256 Signed-off-by: Davide Caratti Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_nat_l4proto.h | 3 +++ net/netfilter/Kconfig | 2 +- net/netfilter/Makefile | 5 ++--- net/netfilter/nf_nat_core.c | 4 ++++ net/netfilter/nf_nat_proto_udplite.c | 35 +--------------------------------- 5 files changed, 11 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h index 2cbaf3856e21..3923150f2a1e 100644 --- a/include/net/netfilter/nf_nat_l4proto.h +++ b/include/net/netfilter/nf_nat_l4proto.h @@ -60,6 +60,9 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_dccp; #ifdef CONFIG_NF_NAT_PROTO_SCTP extern const struct nf_nat_l4proto nf_nat_l4proto_sctp; #endif +#ifdef CONFIG_NF_NAT_PROTO_UDPLITE +extern const struct nf_nat_l4proto nf_nat_l4proto_udplite; +#endif bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ad72edf1f6ec..496e1dcbd003 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -389,7 +389,7 @@ config NF_NAT_PROTO_DCCP default NF_NAT && NF_CT_PROTO_DCCP config NF_NAT_PROTO_UDPLITE - tristate + bool depends on NF_NAT && NF_CT_PROTO_UDPLITE default NF_NAT && NF_CT_PROTO_UDPLITE diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 02ef6decf94d..3b97d89df2cd 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -45,8 +45,10 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o +# NAT protocols (nf_nat) nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o +nf_nat-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o # generic transport layer logging obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o @@ -57,9 +59,6 @@ obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o obj-$(CONFIG_NF_NAT) += nf_nat.o obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o -# NAT protocols (nf_nat) -obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o - # NAT helpers obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 80858bd110cc..94b14c5a8b17 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -689,6 +689,10 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) #ifdef CONFIG_NF_NAT_PROTO_SCTP RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP], &nf_nat_l4proto_sctp); +#endif +#ifdef CONFIG_NF_NAT_PROTO_UDPLITE + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDPLITE], + &nf_nat_l4proto_udplite); #endif mutex_unlock(&nf_nat_proto_mutex); diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c index 58340c97bd83..366bfbfd82a1 100644 --- a/net/netfilter/nf_nat_proto_udplite.c +++ b/net/netfilter/nf_nat_proto_udplite.c @@ -8,11 +8,9 @@ */ #include -#include #include #include -#include #include #include #include @@ -64,7 +62,7 @@ udplite_manip_pkt(struct sk_buff *skb, return true; } -static const struct nf_nat_l4proto nf_nat_l4proto_udplite = { +const struct nf_nat_l4proto nf_nat_l4proto_udplite = { .l4proto = IPPROTO_UDPLITE, .manip_pkt = udplite_manip_pkt, .in_range = nf_nat_l4proto_in_range, @@ -73,34 +71,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_udplite = { .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; - -static int __init nf_nat_proto_udplite_init(void) -{ - int err; - - err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_udplite); - if (err < 0) - goto err1; - err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_udplite); - if (err < 0) - goto err2; - return 0; - -err2: - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite); -err1: - return err; -} - -static void __exit nf_nat_proto_udplite_fini(void) -{ - nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_udplite); - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite); -} - -module_init(nf_nat_proto_udplite_init); -module_exit(nf_nat_proto_udplite_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("UDP-Lite NAT protocol helper"); -MODULE_AUTHOR("Patrick McHardy "); -- cgit v1.2.3 From 673ab46f345557e9d741e97ca0301280360d1af1 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 14 Nov 2016 22:39:25 +0800 Subject: netfilter: nf_log: do not assume ethernet header in netdev family In netdev family, we will handle non ethernet packets, so using eth_hdr(skb)->h_proto is incorrect. Meanwhile, we can use socket(AF_PACKET...) to sending packets, so skb->protocol is not always set in bridge family. Add an extra parameter into nf_log_l2packet to solve this issue. Fixes: 1fddf4bad0ac ("netfilter: nf_log: add packet logging for netdev family") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 4 +++- net/bridge/netfilter/nf_log_bridge.c | 3 ++- net/netfilter/nf_log_common.c | 3 ++- net/netfilter/nf_log_netdev.c | 3 ++- 4 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index a559aa41253c..450f87f95415 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -109,7 +109,9 @@ void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix); -void nf_log_l2packet(struct net *net, u_int8_t pf, unsigned int hooknum, +void nf_log_l2packet(struct net *net, u_int8_t pf, + __be16 protocol, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c index c197b1f844ee..bd2b3c78f59b 100644 --- a/net/bridge/netfilter/nf_log_bridge.c +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -24,7 +24,8 @@ static void nf_log_bridge_packet(struct net *net, u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix); + nf_log_l2packet(net, pf, eth_hdr(skb)->h_proto, hooknum, skb, + in, out, loginfo, prefix); } static struct nf_logger nf_bridge_logger __read_mostly = { diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index ed9b80815fa0..dc61399e30be 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -177,6 +177,7 @@ EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); /* bridge and netdev logging families share this code. */ void nf_log_l2packet(struct net *net, u_int8_t pf, + __be16 protocol, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -184,7 +185,7 @@ void nf_log_l2packet(struct net *net, u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - switch (eth_hdr(skb)->h_proto) { + switch (protocol) { case htons(ETH_P_IP): nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, loginfo, "%s", prefix); diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c index 1f645949f3d8..350eb147754d 100644 --- a/net/netfilter/nf_log_netdev.c +++ b/net/netfilter/nf_log_netdev.c @@ -23,7 +23,8 @@ static void nf_log_netdev_packet(struct net *net, u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix); + nf_log_l2packet(net, pf, skb->protocol, hooknum, skb, in, out, + loginfo, prefix); } static struct nf_logger nf_netdev_logger __read_mostly = { -- cgit v1.2.3 From c51d39010a1bccc9c1294e2d7c00005aefeb2b5c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 15 Nov 2016 15:08:25 +0100 Subject: netfilter: conntrack: built-in support for DCCP CONFIG_NF_CT_PROTO_DCCP is no more a tristate. When set to y, connection tracking support for DCCP protocol is built-in into nf_conntrack.ko. footprint test: $ ls -l net/netfilter/nf_conntrack{_proto_dccp,}.ko \ net/ipv4/netfilter/nf_conntrack_ipv4.ko \ net/ipv6/netfilter/nf_conntrack_ipv6.ko (builtin)|| dccp | ipv4 | ipv6 | nf_conntrack ---------++--------+--------+--------+-------------- none || 469140 | 828755 | 828676 | 6141434 DCCP || - | 830566 | 829935 | 6533526 Signed-off-by: Davide Caratti Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_dccp.h | 2 +- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 3 + include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 + include/net/netns/conntrack.h | 14 +++++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 3 + net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 3 + net/netfilter/Kconfig | 6 +- net/netfilter/Makefile | 3 +- net/netfilter/nf_conntrack_proto_dccp.c | 79 ++++---------------------- 9 files changed, 41 insertions(+), 75 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/nf_conntrack_dccp.h b/include/linux/netfilter/nf_conntrack_dccp.h index 40dcc82058d1..ff721d7325cf 100644 --- a/include/linux/netfilter/nf_conntrack_dccp.h +++ b/include/linux/netfilter/nf_conntrack_dccp.h @@ -25,7 +25,7 @@ enum ct_dccp_roles { #define CT_DCCP_ROLE_MAX (__CT_DCCP_ROLE_MAX - 1) #ifdef __KERNEL__ -#include +#include struct nf_ct_dccp { u_int8_t role[IP_CT_DIR_MAX]; diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 981c327374da..c2f155fd9299 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -15,6 +15,9 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; +#ifdef CONFIG_NF_CT_PROTO_DCCP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; +#endif int nf_conntrack_ipv4_compat_init(void); void nf_conntrack_ipv4_compat_fini(void); diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index a4c993685795..5ec66c0d21c4 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -6,6 +6,9 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; +#ifdef CONFIG_NF_CT_PROTO_DCCP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; +#endif #include extern struct ctl_table nf_ct_ipv6_sysctl_table[]; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 3d06d94d2e52..440b781baf0b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -6,6 +6,9 @@ #include #include #include +#ifdef CONFIG_NF_CT_PROTO_DCCP +#include +#endif #include struct ctl_table_header; @@ -48,12 +51,23 @@ struct nf_icmp_net { unsigned int timeout; }; +#ifdef CONFIG_NF_CT_PROTO_DCCP +struct nf_dccp_net { + struct nf_proto_net pn; + int dccp_loose; + unsigned int dccp_timeout[CT_DCCP_MAX + 1]; +}; +#endif + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; struct nf_udp_net udp; struct nf_icmp_net icmp; struct nf_icmp_net icmpv6; +#ifdef CONFIG_NF_CT_PROTO_DCCP + struct nf_dccp_net dccp; +#endif }; struct ct_pcpu { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 7130ed5dc1fa..cb3cf770b00c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -340,6 +340,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = { &nf_conntrack_l4proto_tcp4, &nf_conntrack_l4proto_udp4, &nf_conntrack_l4proto_icmp, +#ifdef CONFIG_NF_CT_PROTO_DCCP + &nf_conntrack_l4proto_dccp4, +#endif }; static int ipv4_net_init(struct net *net) diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 500be28ff563..f52338d02951 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -340,6 +340,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = { &nf_conntrack_l4proto_tcp6, &nf_conntrack_l4proto_udp6, &nf_conntrack_l4proto_icmpv6, +#ifdef CONFIG_NF_CT_PROTO_DCCP + &nf_conntrack_l4proto_dccp6, +#endif }; static int ipv6_net_init(struct net *net) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 496e1dcbd003..27a3d8c8f8ce 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -146,14 +146,14 @@ config NF_CONNTRACK_LABELS to connection tracking entries. It selected by the connlabel match. config NF_CT_PROTO_DCCP - tristate 'DCCP protocol connection tracking support' + bool 'DCCP protocol connection tracking support' depends on NETFILTER_ADVANCED - default IP_DCCP + default y help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on DCCP connections. - If unsure, say 'N'. + If unsure, say Y. config NF_CT_PROTO_GRE tristate diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 3b97d89df2cd..bbd0cc08eff0 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -5,6 +5,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o +nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -16,8 +17,6 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o # connection tracking obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o -# SCTP protocol connection tracking -obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 073b047314dc..b68ce6ac13b3 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -9,7 +9,6 @@ * */ #include -#include #include #include #include @@ -384,17 +383,9 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = }, }; -/* this module per-net specifics */ -static unsigned int dccp_net_id __read_mostly; -struct dccp_net { - struct nf_proto_net pn; - int dccp_loose; - unsigned int dccp_timeout[CT_DCCP_MAX + 1]; -}; - -static inline struct dccp_net *dccp_pernet(struct net *net) +static inline struct nf_dccp_net *dccp_pernet(struct net *net) { - return net_generic(net, dccp_net_id); + return &net->ct.nf_ct_proto.dccp; } static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, @@ -424,7 +415,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); - struct dccp_net *dn; + struct nf_dccp_net *dn; struct dccp_hdr _dh, *dh; const char *msg; u_int8_t state; @@ -719,7 +710,7 @@ static int dccp_nlattr_size(void) static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { - struct dccp_net *dn = dccp_pernet(net); + struct nf_dccp_net *dn = dccp_pernet(net); unsigned int *timeouts = data; int i; @@ -820,7 +811,7 @@ static struct ctl_table dccp_sysctl_table[] = { #endif /* CONFIG_SYSCTL */ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, - struct dccp_net *dn) + struct nf_dccp_net *dn) { #ifdef CONFIG_SYSCTL if (pn->ctl_table) @@ -850,7 +841,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, static int dccp_init_net(struct net *net, u_int16_t proto) { - struct dccp_net *dn = dccp_pernet(net); + struct nf_dccp_net *dn = dccp_pernet(net); struct nf_proto_net *pn = &dn->pn; if (!pn->users) { @@ -868,7 +859,7 @@ static int dccp_init_net(struct net *net, u_int16_t proto) return dccp_kmemdup_sysctl_table(net, pn, dn); } -static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { +struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { .l3proto = AF_INET, .l4proto = IPPROTO_DCCP, .name = "dccp", @@ -898,11 +889,11 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { .nla_policy = dccp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &dccp_net_id, .init_net = dccp_init_net, }; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4); -static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { +struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { .l3proto = AF_INET6, .l4proto = IPPROTO_DCCP, .name = "dccp", @@ -932,56 +923,6 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .nla_policy = dccp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &dccp_net_id, .init_net = dccp_init_net, }; - -static struct nf_conntrack_l4proto *dccp_proto[] = { - &dccp_proto4, - &dccp_proto6, -}; - -static __net_init int dccp_net_init(struct net *net) -{ - return nf_ct_l4proto_pernet_register(net, dccp_proto, - ARRAY_SIZE(dccp_proto)); -} - -static __net_exit void dccp_net_exit(struct net *net) -{ - nf_ct_l4proto_pernet_unregister(net, dccp_proto, - ARRAY_SIZE(dccp_proto)); -} - -static struct pernet_operations dccp_net_ops = { - .init = dccp_net_init, - .exit = dccp_net_exit, - .id = &dccp_net_id, - .size = sizeof(struct dccp_net), -}; - -static int __init nf_conntrack_proto_dccp_init(void) -{ - int ret; - - ret = register_pernet_subsys(&dccp_net_ops); - if (ret < 0) - return ret; - ret = nf_ct_l4proto_register(dccp_proto, ARRAY_SIZE(dccp_proto)); - if (ret < 0) - unregister_pernet_subsys(&dccp_net_ops); - return ret; -} - -static void __exit nf_conntrack_proto_dccp_fini(void) -{ - nf_ct_l4proto_unregister(dccp_proto, ARRAY_SIZE(dccp_proto)); - unregister_pernet_subsys(&dccp_net_ops); -} - -module_init(nf_conntrack_proto_dccp_init); -module_exit(nf_conntrack_proto_dccp_fini); - -MODULE_AUTHOR("Patrick McHardy "); -MODULE_DESCRIPTION("DCCP connection tracking protocol helper"); -MODULE_LICENSE("GPL"); +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp6); -- cgit v1.2.3 From a85406afeb3e045b001b2aac5b4f89f4266fede3 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 15 Nov 2016 15:08:26 +0100 Subject: netfilter: conntrack: built-in support for SCTP CONFIG_NF_CT_PROTO_SCTP is no more a tristate. When set to y, connection tracking support for SCTP protocol is built-in into nf_conntrack.ko. footprint test: $ ls -l net/netfilter/nf_conntrack{_proto_sctp,}.ko \ net/ipv4/netfilter/nf_conntrack_ipv4.ko \ net/ipv6/netfilter/nf_conntrack_ipv6.ko (builtin)|| sctp | ipv4 | ipv6 | nf_conntrack ---------++--------+--------+--------+-------------- none || 498243 | 828755 | 828676 | 6141434 SCTP || - | 829254 | 829175 | 6547872 Signed-off-by: Davide Caratti Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 3 + include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 + include/net/netns/conntrack.h | 13 +++++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 3 + net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 3 + net/netfilter/Kconfig | 7 +-- net/netfilter/Makefile | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 76 +++----------------------- 8 files changed, 38 insertions(+), 72 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index c2f155fd9299..5f1fc15a51fb 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -18,6 +18,9 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; #ifdef CONFIG_NF_CT_PROTO_DCCP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; #endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4; +#endif int nf_conntrack_ipv4_compat_init(void); void nf_conntrack_ipv4_compat_fini(void); diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index 5ec66c0d21c4..f70d191a8820 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -9,6 +9,9 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; #ifdef CONFIG_NF_CT_PROTO_DCCP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; #endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6; +#endif #include extern struct ctl_table nf_ct_ipv6_sysctl_table[]; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 440b781baf0b..17724c62de97 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -9,6 +9,9 @@ #ifdef CONFIG_NF_CT_PROTO_DCCP #include #endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +#include +#endif #include struct ctl_table_header; @@ -59,6 +62,13 @@ struct nf_dccp_net { }; #endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +struct nf_sctp_net { + struct nf_proto_net pn; + unsigned int timeouts[SCTP_CONNTRACK_MAX]; +}; +#endif + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; @@ -68,6 +78,9 @@ struct nf_ip_net { #ifdef CONFIG_NF_CT_PROTO_DCCP struct nf_dccp_net dccp; #endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + struct nf_sctp_net sctp; +#endif }; struct ct_pcpu { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index cb3cf770b00c..0a9d354ef314 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -343,6 +343,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = { #ifdef CONFIG_NF_CT_PROTO_DCCP &nf_conntrack_l4proto_dccp4, #endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + &nf_conntrack_l4proto_sctp4, +#endif }; static int ipv4_net_init(struct net *net) diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index f52338d02951..1d8daafb1685 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -343,6 +343,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = { #ifdef CONFIG_NF_CT_PROTO_DCCP &nf_conntrack_l4proto_dccp6, #endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + &nf_conntrack_l4proto_sctp6, +#endif }; static int ipv6_net_init(struct net *net) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 27a3d8c8f8ce..29c0bf0a315d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -159,15 +159,14 @@ config NF_CT_PROTO_GRE tristate config NF_CT_PROTO_SCTP - tristate 'SCTP protocol connection tracking support' + bool 'SCTP protocol connection tracking support' depends on NETFILTER_ADVANCED - default IP_SCTP + default y help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on SCTP connections. - If you want to compile it as a module, say M here and read - . If unsure, say `N'. + If unsure, say Y. config NF_CT_PROTO_UDPLITE tristate 'UDP-Lite protocol connection tracking support' diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index bbd0cc08eff0..6545c28ab746 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -6,6 +6,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o +nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -18,7 +19,6 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o -obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o # netlink interface for nf_conntrack diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index d096c2d6b87b..a0efde38da44 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -144,15 +143,9 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { } }; -static unsigned int sctp_net_id __read_mostly; -struct sctp_net { - struct nf_proto_net pn; - unsigned int timeouts[SCTP_CONNTRACK_MAX]; -}; - -static inline struct sctp_net *sctp_pernet(struct net *net) +static inline struct nf_sctp_net *sctp_pernet(struct net *net) { - return net_generic(net, sctp_net_id); + return &net->ct.nf_ct_proto.sctp; } static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, @@ -600,7 +593,7 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; - struct sctp_net *sn = sctp_pernet(net); + struct nf_sctp_net *sn = sctp_pernet(net); int i; /* set default SCTP timeouts. */ @@ -708,7 +701,7 @@ static struct ctl_table sctp_sysctl_table[] = { #endif static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct sctp_net *sn) + struct nf_sctp_net *sn) { #ifdef CONFIG_SYSCTL if (pn->ctl_table) @@ -735,7 +728,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, static int sctp_init_net(struct net *net, u_int16_t proto) { - struct sctp_net *sn = sctp_pernet(net); + struct nf_sctp_net *sn = sctp_pernet(net); struct nf_proto_net *pn = &sn->pn; if (!pn->users) { @@ -748,7 +741,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto) return sctp_kmemdup_sysctl_table(pn, sn); } -static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { +struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_SCTP, .name = "sctp", @@ -778,11 +771,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .nla_policy = sctp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &sctp_net_id, .init_net = sctp_init_net, }; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4); -static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { +struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_SCTP, .name = "sctp", @@ -812,57 +805,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #endif - .net_id = &sctp_net_id, .init_net = sctp_init_net, }; - -static struct nf_conntrack_l4proto *sctp_proto[] = { - &nf_conntrack_l4proto_sctp4, - &nf_conntrack_l4proto_sctp6, -}; - -static int sctp_net_init(struct net *net) -{ - return nf_ct_l4proto_pernet_register(net, sctp_proto, - ARRAY_SIZE(sctp_proto)); -} - -static void sctp_net_exit(struct net *net) -{ - nf_ct_l4proto_pernet_unregister(net, sctp_proto, - ARRAY_SIZE(sctp_proto)); -} - -static struct pernet_operations sctp_net_ops = { - .init = sctp_net_init, - .exit = sctp_net_exit, - .id = &sctp_net_id, - .size = sizeof(struct sctp_net), -}; - -static int __init nf_conntrack_proto_sctp_init(void) -{ - int ret; - - ret = register_pernet_subsys(&sctp_net_ops); - if (ret < 0) - return ret; - ret = nf_ct_l4proto_register(sctp_proto, ARRAY_SIZE(sctp_proto)); - if (ret < 0) - unregister_pernet_subsys(&sctp_net_ops); - return ret; -} - -static void __exit nf_conntrack_proto_sctp_fini(void) -{ - nf_ct_l4proto_unregister(sctp_proto, ARRAY_SIZE(sctp_proto)); - unregister_pernet_subsys(&sctp_net_ops); -} - -module_init(nf_conntrack_proto_sctp_init); -module_exit(nf_conntrack_proto_sctp_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Kiran Kumar Immidi"); -MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP"); -MODULE_ALIAS("ip_conntrack_proto_sctp"); +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp6); -- cgit v1.2.3 From 9b91c96c5d1f9da79438292f8c82f65cbf078645 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 15 Nov 2016 15:08:27 +0100 Subject: netfilter: conntrack: built-in support for UDPlite CONFIG_NF_CT_PROTO_UDPLITE is no more a tristate. When set to y, connection tracking support for UDPlite protocol is built-in into nf_conntrack.ko. footprint test: $ ls -l net/netfilter/nf_conntrack{_proto_udplite,}.ko \ net/ipv4/netfilter/nf_conntrack_ipv4.ko \ net/ipv6/netfilter/nf_conntrack_ipv6.ko (builtin)|| udplite| ipv4 | ipv6 |nf_conntrack ---------++--------+--------+--------+-------------- none || 432538 | 828755 | 828676 | 6141434 UDPlite || - | 829649 | 829362 | 6498204 Signed-off-by: Davide Caratti Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 3 + include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 + include/net/netns/conntrack.h | 16 ++++++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 3 + net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 3 + net/netfilter/Kconfig | 5 +- net/netfilter/Makefile | 2 +- net/netfilter/nf_conntrack_proto_udplite.c | 79 +++----------------------- 8 files changed, 41 insertions(+), 73 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 5f1fc15a51fb..919e4e8af327 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -21,6 +21,9 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; #ifdef CONFIG_NF_CT_PROTO_SCTP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4; #endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; +#endif int nf_conntrack_ipv4_compat_init(void); void nf_conntrack_ipv4_compat_fini(void); diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index f70d191a8820..eaea968f8657 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -12,6 +12,9 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; #ifdef CONFIG_NF_CT_PROTO_SCTP extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6; #endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; +#endif #include extern struct ctl_table nf_ct_ipv6_sysctl_table[]; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 17724c62de97..cf799fc3fdec 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -69,6 +69,19 @@ struct nf_sctp_net { }; #endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +enum udplite_conntrack { + UDPLITE_CT_UNREPLIED, + UDPLITE_CT_REPLIED, + UDPLITE_CT_MAX +}; + +struct nf_udplite_net { + struct nf_proto_net pn; + unsigned int timeouts[UDPLITE_CT_MAX]; +}; +#endif + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; @@ -81,6 +94,9 @@ struct nf_ip_net { #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net sctp; #endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + struct nf_udplite_net udplite; +#endif }; struct ct_pcpu { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 0a9d354ef314..22fce4fcece4 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -346,6 +346,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = { #ifdef CONFIG_NF_CT_PROTO_SCTP &nf_conntrack_l4proto_sctp4, #endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + &nf_conntrack_l4proto_udplite4, +#endif }; static int ipv4_net_init(struct net *net) diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 1d8daafb1685..389f712854f2 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -346,6 +346,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = { #ifdef CONFIG_NF_CT_PROTO_SCTP &nf_conntrack_l4proto_sctp6, #endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + &nf_conntrack_l4proto_udplite6, +#endif }; static int ipv6_net_init(struct net *net) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 29c0bf0a315d..def4be06cda6 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -169,14 +169,15 @@ config NF_CT_PROTO_SCTP If unsure, say Y. config NF_CT_PROTO_UDPLITE - tristate 'UDP-Lite protocol connection tracking support' + bool 'UDP-Lite protocol connection tracking support' depends on NETFILTER_ADVANCED + default y help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on UDP-Lite connections. - To compile it as a module, choose M here. If unsure, say N. + If unsure, say Y. config NF_CONNTRACK_AMANDA tristate "Amanda backup protocol support" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6545c28ab746..e4c8c1d7aaed 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -7,6 +7,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o +nf_conntrack-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -19,7 +20,6 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o -obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o # netlink interface for nf_conntrack obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 7808604c70a2..c35f7bf05d8c 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -9,7 +9,6 @@ #include #include -#include #include #include #include @@ -24,26 +23,14 @@ #include #include -enum udplite_conntrack { - UDPLITE_CT_UNREPLIED, - UDPLITE_CT_REPLIED, - UDPLITE_CT_MAX -}; - static unsigned int udplite_timeouts[UDPLITE_CT_MAX] = { [UDPLITE_CT_UNREPLIED] = 30*HZ, [UDPLITE_CT_REPLIED] = 180*HZ, }; -static unsigned int udplite_net_id __read_mostly; -struct udplite_net { - struct nf_proto_net pn; - unsigned int timeouts[UDPLITE_CT_MAX]; -}; - -static inline struct udplite_net *udplite_pernet(struct net *net) +static inline struct nf_udplite_net *udplite_pernet(struct net *net) { - return net_generic(net, udplite_net_id); + return &net->ct.nf_ct_proto.udplite; } static bool udplite_pkt_to_tuple(const struct sk_buff *skb, @@ -178,7 +165,7 @@ static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; - struct udplite_net *un = udplite_pernet(net); + struct nf_udplite_net *un = udplite_pernet(net); /* set default timeouts for UDPlite. */ timeouts[UDPLITE_CT_UNREPLIED] = un->timeouts[UDPLITE_CT_UNREPLIED]; @@ -237,7 +224,7 @@ static struct ctl_table udplite_sysctl_table[] = { #endif /* CONFIG_SYSCTL */ static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct udplite_net *un) + struct nf_udplite_net *un) { #ifdef CONFIG_SYSCTL if (pn->ctl_table) @@ -257,7 +244,7 @@ static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn, static int udplite_init_net(struct net *net, u_int16_t proto) { - struct udplite_net *un = udplite_pernet(net); + struct nf_udplite_net *un = udplite_pernet(net); struct nf_proto_net *pn = &un->pn; if (!pn->users) { @@ -270,7 +257,7 @@ static int udplite_init_net(struct net *net, u_int16_t proto) return udplite_kmemdup_sysctl_table(pn, un); } -static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = +struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_UDPLITE, @@ -299,11 +286,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .nla_policy = udplite_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &udplite_net_id, .init_net = udplite_init_net, }; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); -static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = +struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDPLITE, @@ -332,54 +319,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .nla_policy = udplite_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &udplite_net_id, .init_net = udplite_init_net, }; - -static struct nf_conntrack_l4proto *udplite_proto[] = { - &nf_conntrack_l4proto_udplite4, - &nf_conntrack_l4proto_udplite6, -}; - -static int udplite_net_init(struct net *net) -{ - return nf_ct_l4proto_pernet_register(net, udplite_proto, - ARRAY_SIZE(udplite_proto)); -} - -static void udplite_net_exit(struct net *net) -{ - nf_ct_l4proto_pernet_unregister(net, udplite_proto, - ARRAY_SIZE(udplite_proto)); -} - -static struct pernet_operations udplite_net_ops = { - .init = udplite_net_init, - .exit = udplite_net_exit, - .id = &udplite_net_id, - .size = sizeof(struct udplite_net), -}; - -static int __init nf_conntrack_proto_udplite_init(void) -{ - int ret; - - ret = register_pernet_subsys(&udplite_net_ops); - if (ret < 0) - return ret; - ret = nf_ct_l4proto_register(udplite_proto, ARRAY_SIZE(udplite_proto)); - if (ret < 0) - unregister_pernet_subsys(&udplite_net_ops); - return ret; -} - -static void __exit nf_conntrack_proto_udplite_exit(void) -{ - nf_ct_l4proto_unregister(udplite_proto, ARRAY_SIZE(udplite_proto)); - unregister_pernet_subsys(&udplite_net_ops); -} - -module_init(nf_conntrack_proto_udplite_init); -module_exit(nf_conntrack_proto_udplite_exit); - -MODULE_LICENSE("GPL"); +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6); -- cgit v1.2.3 From a379854d91b2cb0af07b0f62845449f4dacbd673 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Nov 2016 21:36:39 +0100 Subject: netfilter: conntrack: remove unused init_net hook since adf0516845bcd0 ("netfilter: remove ip_conntrack* sysctl compat code") the only user (ipv4 tracker) sets this to an empty stub function. After this change nf_ct_l3proto_pernet_register() is also empty, but this will change in a followup patch to add conditional register of the hooks. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 3 --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 ------ net/netfilter/nf_conntrack_proto.c | 8 -------- 3 files changed, 17 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 8992e4229da9..cf8f3dfd810d 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -63,9 +63,6 @@ struct nf_conntrack_l3proto { size_t nla_size; - /* Init l3proto pernet data */ - int (*init_net)(struct net *net); - /* Module (if any) which this is connected to. */ struct module *me; }; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 22fce4fcece4..a006b6534323 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -307,11 +307,6 @@ static struct nf_sockopt_ops so_getorigdst = { .owner = THIS_MODULE, }; -static int ipv4_init_net(struct net *net) -{ - return 0; -} - struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .l3proto = PF_INET, .name = "ipv4", @@ -325,7 +320,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .nlattr_to_tuple = ipv4_nlattr_to_tuple, .nla_policy = ipv4_nla_policy, #endif - .init_net = ipv4_init_net, .me = THIS_MODULE, }; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 9bd34647225a..b218e70b2f74 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -193,14 +193,6 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto) { - int ret; - - if (proto->init_net) { - ret = proto->init_net(net); - if (ret < 0) - return ret; - } - return 0; } EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); -- cgit v1.2.3 From ecb2421b5ddf48e6e116fced7f74c985bb546138 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Nov 2016 21:36:40 +0100 Subject: netfilter: add and use nf_ct_netns_get/put currently aliased to try_module_get/_put. Will be changed in next patch when we add functions to make use of ->net argument to store usercount per l3proto tracker. This is needed to avoid registering the conntrack hooks in all netns and later only enable connection tracking in those that need conntrack. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 4 ++++ net/ipv4/netfilter/ipt_CLUSTERIP.c | 4 ++-- net/ipv4/netfilter/ipt_SYNPROXY.c | 4 ++-- net/ipv6/netfilter/ip6t_SYNPROXY.c | 4 ++-- net/netfilter/nf_conntrack_proto.c | 12 ++++++++++++ net/netfilter/nft_ct.c | 26 +++++++++++++------------- net/netfilter/xt_CONNSECMARK.c | 4 ++-- net/netfilter/xt_CT.c | 6 +++--- net/netfilter/xt_connbytes.c | 4 ++-- net/netfilter/xt_connlabel.c | 6 +++--- net/netfilter/xt_connlimit.c | 6 +++--- net/netfilter/xt_connmark.c | 8 ++++---- net/netfilter/xt_conntrack.c | 4 ++-- net/netfilter/xt_helper.c | 4 ++-- net/netfilter/xt_state.c | 4 ++-- 15 files changed, 58 insertions(+), 42 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d9d52c020a70..5916aa9ab3f0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -181,6 +181,10 @@ static inline void nf_ct_put(struct nf_conn *ct) int nf_ct_l3proto_try_module_get(unsigned short l3proto); void nf_ct_l3proto_module_put(unsigned short l3proto); +/* load module; enable/disable conntrack in this namespace */ +int nf_ct_netns_get(struct net *net, u8 nfproto); +void nf_ct_netns_put(struct net *net, u8 nfproto); + /* * Allocate a hashtable of hlist_head (if nulls == 0), * or hlist_nulls_head (if nulls == 1) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index e6e206fa86c8..21db00d0362b 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -419,7 +419,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) } cipinfo->config = config; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -444,7 +444,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) clusterip_config_put(cipinfo->config); - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_get(par->net, par->family); } #ifdef CONFIG_COMPAT diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 361411688221..30c0de53e254 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -418,12 +418,12 @@ static int synproxy_tg4_check(const struct xt_tgchk_param *par) e->ip.invflags & XT_INV_PROTO) return -EINVAL; - return nf_ct_l3proto_try_module_get(par->family); + return nf_ct_netns_get(par->net, par->family); } static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_target synproxy_tg4_reg __read_mostly = { diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 99a1216287c8..98c8dd38575a 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -440,12 +440,12 @@ static int synproxy_tg6_check(const struct xt_tgchk_param *par) e->ipv6.invflags & XT_INV_PROTO) return -EINVAL; - return nf_ct_l3proto_try_module_get(par->family); + return nf_ct_netns_get(par->net, par->family); } static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_target synproxy_tg6_reg __read_mostly = { diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b218e70b2f74..948f1e2fc80b 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -125,6 +125,18 @@ void nf_ct_l3proto_module_put(unsigned short l3proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); +int nf_ct_netns_get(struct net *net, u8 nfproto) +{ + return nf_ct_l3proto_try_module_get(nfproto); +} +EXPORT_SYMBOL_GPL(nf_ct_netns_get); + +void nf_ct_netns_put(struct net *net, u8 nfproto) +{ + nf_ct_l3proto_module_put(nfproto); +} +EXPORT_SYMBOL_GPL(nf_ct_netns_put); + struct nf_conntrack_l4proto * nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num) { diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 6837348c8993..e6baeaebe653 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -208,37 +208,37 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { [NFTA_CT_SREG] = { .type = NLA_U32 }, }; -static int nft_ct_l3proto_try_module_get(uint8_t family) +static int nft_ct_netns_get(struct net *net, uint8_t family) { int err; if (family == NFPROTO_INET) { - err = nf_ct_l3proto_try_module_get(NFPROTO_IPV4); + err = nf_ct_netns_get(net, NFPROTO_IPV4); if (err < 0) goto err1; - err = nf_ct_l3proto_try_module_get(NFPROTO_IPV6); + err = nf_ct_netns_get(net, NFPROTO_IPV6); if (err < 0) goto err2; } else { - err = nf_ct_l3proto_try_module_get(family); + err = nf_ct_netns_get(net, family); if (err < 0) goto err1; } return 0; err2: - nf_ct_l3proto_module_put(NFPROTO_IPV4); + nf_ct_netns_put(net, NFPROTO_IPV4); err1: return err; } -static void nft_ct_l3proto_module_put(uint8_t family) +static void nft_ct_netns_put(struct net *net, uint8_t family) { if (family == NFPROTO_INET) { - nf_ct_l3proto_module_put(NFPROTO_IPV4); - nf_ct_l3proto_module_put(NFPROTO_IPV6); + nf_ct_netns_put(net, NFPROTO_IPV4); + nf_ct_netns_put(net, NFPROTO_IPV6); } else - nf_ct_l3proto_module_put(family); + nf_ct_netns_put(net, family); } static int nft_ct_get_init(const struct nft_ctx *ctx, @@ -342,7 +342,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (err < 0) return err; - err = nft_ct_l3proto_try_module_get(ctx->afi->family); + err = nft_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) return err; @@ -390,7 +390,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, if (err < 0) goto err1; - err = nft_ct_l3proto_try_module_get(ctx->afi->family); + err = nft_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) goto err1; @@ -405,7 +405,7 @@ err1: static void nft_ct_get_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { - nft_ct_l3proto_module_put(ctx->afi->family); + nf_ct_netns_put(ctx->net, ctx->afi->family); } static void nft_ct_set_destroy(const struct nft_ctx *ctx, @@ -423,7 +423,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx, break; } - nft_ct_l3proto_module_put(ctx->afi->family); + nft_ct_netns_put(ctx->net, ctx->afi->family); } static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index e04dc282e3bb..da56c06a443c 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -106,7 +106,7 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par) return -EINVAL; } - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -115,7 +115,7 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par) static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_target connsecmark_tg_reg __read_mostly = { diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 6669e68d589e..95c750358747 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -216,7 +216,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, goto err1; #endif - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) goto err1; @@ -260,7 +260,7 @@ out: err3: nf_ct_tmpl_free(ct); err2: - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); err1: return ret; } @@ -341,7 +341,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, if (help) module_put(help->helper->me); - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); xt_ct_destroy_timeout(ct); nf_ct_put(info->ct); diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index d4bec261e74e..cad0b7b5eb35 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -110,7 +110,7 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par) sinfo->direction != XT_CONNBYTES_DIR_BOTH) return -EINVAL; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -129,7 +129,7 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par) static void connbytes_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match connbytes_mt_reg __read_mostly = { diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index 03d66f1c5e69..7827128d5a95 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -61,7 +61,7 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) return -EINVAL; } - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -70,14 +70,14 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) ret = nf_connlabels_get(par->net, info->bit); if (ret < 0) - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); return ret; } static void connlabel_mt_destroy(const struct xt_mtdtor_param *par) { nf_connlabels_put(par->net); - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match connlabels_mt_reg __read_mostly = { diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index bb3845339efd..2aff2b7c4689 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -368,7 +368,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd)); - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info("cannot load conntrack support for " "address family %u\n", par->family); @@ -378,7 +378,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) /* init private data */ info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL); if (info->data == NULL) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); return -ENOMEM; } @@ -414,7 +414,7 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) const struct xt_connlimit_info *info = par->matchinfo; unsigned int i; - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) destroy_tree(&info->data->climit_root4[i]); diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index b83e158e116a..9935d5029b0e 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -77,7 +77,7 @@ static int connmark_tg_check(const struct xt_tgchk_param *par) { int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -86,7 +86,7 @@ static int connmark_tg_check(const struct xt_tgchk_param *par) static void connmark_tg_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static bool @@ -107,7 +107,7 @@ static int connmark_mt_check(const struct xt_mtchk_param *par) { int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -116,7 +116,7 @@ static int connmark_mt_check(const struct xt_mtchk_param *par) static void connmark_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_target connmark_tg_reg __read_mostly = { diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 2dea15ebc55b..c0fb217bc649 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -271,7 +271,7 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) { int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -280,7 +280,7 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match conntrack_mt_reg[] __read_mostly = { diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index f679dd4c272a..38a78151c0e9 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -59,7 +59,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par) struct xt_helper_info *info = par->matchinfo; int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -71,7 +71,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par) static void helper_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match helper_mt_reg __read_mostly = { diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index a507922d80cd..5746a33789a5 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -43,7 +43,7 @@ static int state_mt_check(const struct xt_mtchk_param *par) { int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -52,7 +52,7 @@ static int state_mt_check(const struct xt_mtchk_param *par) static void state_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match state_mt_reg __read_mostly = { -- cgit v1.2.3 From 0c66dc1ea3f0366221f8a5a16c73f01ea9259678 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Nov 2016 21:36:43 +0100 Subject: netfilter: conntrack: register hooks in netns when needed by ruleset This makes use of nf_ct_netns_get/put added in previous patch. We add get/put functions to nf_conntrack_l3proto structure, ipv4 and ipv6 then implement use-count to track how many users (nft or xtables modules) have a dependency on ipv4 and/or ipv6 connection tracking functionality. When count reaches zero, the hooks are unregistered. This delays activation of connection tracking inside a namespace until stateful firewall rule or nat rule gets added. This patch breaks backwards compatibility in the sense that connection tracking won't be active anymore when the protocol tracker module is loaded. This breaks e.g. setups that ctnetlink for flow accounting and the like, without any '-m conntrack' packet filter rules. Followup patch restores old behavour and makes new delayed scheme optional via sysctl. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 4 ++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 55 ++++++++++++++++++++------ net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 54 +++++++++++++++++++------ net/netfilter/nf_conntrack_proto.c | 38 +++++++++++++++++- 4 files changed, 127 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index cf8f3dfd810d..e7dcd72be21c 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -52,6 +52,10 @@ struct nf_conntrack_l3proto { int (*tuple_to_nlattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); + /* Called when netns wants to use connection tracking */ + int (*net_ns_get)(struct net *); + void (*net_ns_put)(struct net *); + /* * Calculate size of tuple nlattr */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index a006b6534323..6f375443a74b 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -31,6 +31,13 @@ #include #include +static int conntrack4_net_id __read_mostly; +static DEFINE_MUTEX(register_ipv4_hooks); + +struct conntrack4_net { + unsigned int users; +}; + static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { @@ -307,6 +314,38 @@ static struct nf_sockopt_ops so_getorigdst = { .owner = THIS_MODULE, }; +static int ipv4_hooks_register(struct net *net) +{ + struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id); + int err = 0; + + mutex_lock(®ister_ipv4_hooks); + + cnet->users++; + if (cnet->users > 1) + goto out_unlock; + + err = nf_register_net_hooks(net, ipv4_conntrack_ops, + ARRAY_SIZE(ipv4_conntrack_ops)); + + if (err) + cnet->users = 0; + out_unlock: + mutex_unlock(®ister_ipv4_hooks); + return err; +} + +static void ipv4_hooks_unregister(struct net *net) +{ + struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id); + + mutex_lock(®ister_ipv4_hooks); + if (cnet->users && (--cnet->users == 0)) + nf_unregister_net_hooks(net, ipv4_conntrack_ops, + ARRAY_SIZE(ipv4_conntrack_ops)); + mutex_unlock(®ister_ipv4_hooks); +} + struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .l3proto = PF_INET, .name = "ipv4", @@ -320,6 +359,8 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .nlattr_to_tuple = ipv4_nlattr_to_tuple, .nla_policy = ipv4_nla_policy, #endif + .net_ns_get = ipv4_hooks_register, + .net_ns_put = ipv4_hooks_unregister, .me = THIS_MODULE, }; @@ -372,6 +413,8 @@ static void ipv4_net_exit(struct net *net) static struct pernet_operations ipv4_net_ops = { .init = ipv4_net_init, .exit = ipv4_net_exit, + .id = &conntrack4_net_id, + .size = sizeof(struct conntrack4_net), }; static int __init nf_conntrack_l3proto_ipv4_init(void) @@ -393,17 +436,10 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) goto cleanup_sockopt; } - ret = nf_register_hooks(ipv4_conntrack_ops, - ARRAY_SIZE(ipv4_conntrack_ops)); - if (ret < 0) { - pr_err("nf_conntrack_ipv4: can't register hooks.\n"); - goto cleanup_pernet; - } - ret = nf_ct_l4proto_register(builtin_l4proto4, ARRAY_SIZE(builtin_l4proto4)); if (ret < 0) - goto cleanup_hooks; + goto cleanup_pernet; ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4); if (ret < 0) { @@ -415,8 +451,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) cleanup_l4proto: nf_ct_l4proto_unregister(builtin_l4proto4, ARRAY_SIZE(builtin_l4proto4)); - cleanup_hooks: - nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); cleanup_pernet: unregister_pernet_subsys(&ipv4_net_ops); cleanup_sockopt: @@ -430,7 +464,6 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); nf_ct_l4proto_unregister(builtin_l4proto4, ARRAY_SIZE(builtin_l4proto4)); - nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); unregister_pernet_subsys(&ipv4_net_ops); nf_unregister_sockopt(&so_getorigdst); } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 389f712854f2..72fe48075b7f 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -34,6 +34,13 @@ #include #include +static int conntrack6_net_id; +static DEFINE_MUTEX(register_ipv6_hooks); + +struct conntrack6_net { + unsigned int users; +}; + static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { @@ -308,6 +315,36 @@ static int ipv6_nlattr_tuple_size(void) } #endif +static int ipv6_hooks_register(struct net *net) +{ + struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id); + int err = 0; + + mutex_lock(®ister_ipv6_hooks); + cnet->users++; + if (cnet->users > 1) + goto out_unlock; + + err = nf_register_net_hooks(net, ipv6_conntrack_ops, + ARRAY_SIZE(ipv6_conntrack_ops)); + if (err) + cnet->users = 0; + out_unlock: + mutex_unlock(®ister_ipv6_hooks); + return err; +} + +static void ipv6_hooks_unregister(struct net *net) +{ + struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id); + + mutex_lock(®ister_ipv6_hooks); + if (cnet->users && (--cnet->users == 0)) + nf_unregister_net_hooks(net, ipv6_conntrack_ops, + ARRAY_SIZE(ipv6_conntrack_ops)); + mutex_unlock(®ister_ipv6_hooks); +} + struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .l3proto = PF_INET6, .name = "ipv6", @@ -321,6 +358,8 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .nlattr_to_tuple = ipv6_nlattr_to_tuple, .nla_policy = ipv6_nla_policy, #endif + .net_ns_get = ipv6_hooks_register, + .net_ns_put = ipv6_hooks_unregister, .me = THIS_MODULE, }; @@ -379,6 +418,8 @@ static void ipv6_net_exit(struct net *net) static struct pernet_operations ipv6_net_ops = { .init = ipv6_net_init, .exit = ipv6_net_exit, + .id = &conntrack6_net_id, + .size = sizeof(struct conntrack6_net), }; static int __init nf_conntrack_l3proto_ipv6_init(void) @@ -398,18 +439,10 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) if (ret < 0) goto cleanup_sockopt; - ret = nf_register_hooks(ipv6_conntrack_ops, - ARRAY_SIZE(ipv6_conntrack_ops)); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register pre-routing defrag " - "hook.\n"); - goto cleanup_pernet; - } - ret = nf_ct_l4proto_register(builtin_l4proto6, ARRAY_SIZE(builtin_l4proto6)); if (ret < 0) - goto cleanup_hooks; + goto cleanup_pernet; ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6); if (ret < 0) { @@ -420,8 +453,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) cleanup_l4proto: nf_ct_l4proto_unregister(builtin_l4proto6, ARRAY_SIZE(builtin_l4proto6)); - cleanup_hooks: - nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); cleanup_pernet: unregister_pernet_subsys(&ipv6_net_ops); cleanup_sockopt: @@ -435,7 +466,6 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6); nf_ct_l4proto_unregister(builtin_l4proto6, ARRAY_SIZE(builtin_l4proto6)); - nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); unregister_pernet_subsys(&ipv6_net_ops); nf_unregister_sockopt(&so_getorigdst6); } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 948f1e2fc80b..758688b25fd8 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -127,12 +127,48 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); int nf_ct_netns_get(struct net *net, u8 nfproto) { - return nf_ct_l3proto_try_module_get(nfproto); + const struct nf_conntrack_l3proto *l3proto; + int ret; + + might_sleep(); + + ret = nf_ct_l3proto_try_module_get(nfproto); + if (ret < 0) + return ret; + + /* we already have a reference, can't fail */ + rcu_read_lock(); + l3proto = __nf_ct_l3proto_find(nfproto); + rcu_read_unlock(); + + if (!l3proto->net_ns_get) + return 0; + + ret = l3proto->net_ns_get(net); + if (ret < 0) + nf_ct_l3proto_module_put(nfproto); + + return ret; } EXPORT_SYMBOL_GPL(nf_ct_netns_get); void nf_ct_netns_put(struct net *net, u8 nfproto) { + const struct nf_conntrack_l3proto *l3proto; + + might_sleep(); + + /* same as nf_conntrack_netns_get(), reference assumed */ + rcu_read_lock(); + l3proto = __nf_ct_l3proto_find(nfproto); + rcu_read_unlock(); + + if (WARN_ON(!l3proto)) + return; + + if (l3proto->net_ns_put) + l3proto->net_ns_put(net); + nf_ct_l3proto_module_put(nfproto); } EXPORT_SYMBOL_GPL(nf_ct_netns_put); -- cgit v1.2.3 From 481fa3734769b67f00ed09a42f2a6a8cbd00b869 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Nov 2016 21:36:44 +0100 Subject: netfilter: conntrack: add nf_conntrack_default_on sysctl This switch (default on) can be used to disable automatic registration of connection tracking functionality in newly created network namespaces. This means that when net namespace goes down (or the tracker protocol module is unloaded) we *might* have to unregister the hooks. We can either add another per-netns variable that tells if the hooks got registered by default, or, alternatively, just call the protocol _put() function and have the callee deal with a possible 'extra' put() operation that doesn't pair with a get() one. This uses the latter approach, i.e. a put() without a get has no effect. Conntrack is still enabled automatically regardless of the new sysctl setting if the new net namespace requires connection tracking, e.g. when NAT rules are created. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_conntrack-sysctl.txt | 11 +++++++++++ include/net/netfilter/nf_conntrack_l3proto.h | 9 +++++++++ net/netfilter/nf_conntrack_proto.c | 19 ++++++++++++++++++- net/netfilter/nf_conntrack_standalone.c | 10 ++++++++++ 4 files changed, 48 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt index 433b6724797a..497d668288f9 100644 --- a/Documentation/networking/nf_conntrack-sysctl.txt +++ b/Documentation/networking/nf_conntrack-sysctl.txt @@ -96,6 +96,17 @@ nf_conntrack_max - INTEGER Size of connection tracking table. Default value is nf_conntrack_buckets value * 4. +nf_conntrack_default_on - BOOLEAN + 0 - don't register conntrack in new net namespaces + 1 - register conntrack in new net namespaces (default) + + This controls wheter newly created network namespaces have connection + tracking enabled by default. It will be enabled automatically + regardless of this setting if the new net namespace requires + connection tracking, e.g. when NAT rules are created. + This setting is only visible in initial user namespace, it has no + effect on existing namespaces. + nf_conntrack_tcp_be_liberal - BOOLEAN 0 - disabled (default) not 0 - enabled diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index e7dcd72be21c..e01559b4d781 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -73,9 +73,18 @@ struct nf_conntrack_l3proto { extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; +#ifdef CONFIG_SYSCTL /* Protocol pernet registration. */ int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto); +#else +static inline int nf_ct_l3proto_pernet_register(struct net *n, + struct nf_conntrack_l3proto *p) +{ + return 0; +} +#endif + void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 758688b25fd8..2d6ee1803415 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -238,12 +238,19 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); +#ifdef CONFIG_SYSCTL +extern unsigned int nf_conntrack_default_on; + int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto) { - return 0; + if (nf_conntrack_default_on == 0) + return 0; + + return proto->net_ns_get ? proto->net_ns_get(net) : 0; } EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); +#endif void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) { @@ -264,6 +271,16 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto) { + /* + * nf_conntrack_default_on *might* have registered hooks. + * ->net_ns_put must cope with more puts() than get(), i.e. + * if nf_conntrack_default_on was 0 at time of + * nf_ct_l3proto_pernet_register invocation this net_ns_put() + * should be a noop. + */ + if (proto->net_ns_put) + proto->net_ns_put(net); + /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0); } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 5f446cd9f3fd..d009ae663453 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -452,6 +452,9 @@ static int log_invalid_proto_max __read_mostly = 255; /* size the user *wants to set */ static unsigned int nf_conntrack_htable_size_user __read_mostly; +extern unsigned int nf_conntrack_default_on; +unsigned int nf_conntrack_default_on __read_mostly = 1; + static int nf_conntrack_hash_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -517,6 +520,13 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "nf_conntrack_default_on", + .data = &nf_conntrack_default_on, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; -- cgit v1.2.3 From 9115e8cd2a0c6eaaa900c462721f12e1d45f326c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 3 Dec 2016 11:14:56 -0800 Subject: net: reorganize struct sock for better data locality Group fields used in TX path, and keep some cache lines mostly read to permit sharing among cpus. Gained two 4 bytes holes on 64bit arches. Added a place holder for tcp tsq_flags, next to sk_wmem_alloc to speed up tcp_wfree() in the following patch. I have not added ____cacheline_aligned_in_smp, this might be done later. I prefer doing this once inet and tcp/udp sockets reorg is also done. Tested with both TCP and UDP. UDP receiver performance under flood increased by ~20 % : Accessing sk_filter/sk_wq/sk_napi_id no longer stalls because sk_drops was moved away from a critical cache line, now mostly read and shared. /* --- cacheline 4 boundary (256 bytes) --- */ unsigned int sk_napi_id; /* 0x100 0x4 */ int sk_rcvbuf; /* 0x104 0x4 */ struct sk_filter * sk_filter; /* 0x108 0x8 */ union { struct socket_wq * sk_wq; /* 0x8 */ struct socket_wq * sk_wq_raw; /* 0x8 */ }; /* 0x110 0x8 */ struct xfrm_policy * sk_policy[2]; /* 0x118 0x10 */ struct dst_entry * sk_rx_dst; /* 0x128 0x8 */ struct dst_entry * sk_dst_cache; /* 0x130 0x8 */ atomic_t sk_omem_alloc; /* 0x138 0x4 */ int sk_sndbuf; /* 0x13c 0x4 */ /* --- cacheline 5 boundary (320 bytes) --- */ int sk_wmem_queued; /* 0x140 0x4 */ atomic_t sk_wmem_alloc; /* 0x144 0x4 */ long unsigned int sk_tsq_flags; /* 0x148 0x8 */ struct sk_buff * sk_send_head; /* 0x150 0x8 */ struct sk_buff_head sk_write_queue; /* 0x158 0x18 */ __s32 sk_peek_off; /* 0x170 0x4 */ int sk_write_pending; /* 0x174 0x4 */ long int sk_sndtimeo; /* 0x178 0x8 */ Signed-off-by: Eric Dumazet Tested-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sock.h | 51 +++++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 69afda6bea15..6dfe3aa22b97 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -343,6 +343,9 @@ struct sock { #define sk_rxhash __sk_common.skc_rxhash socket_lock_t sk_lock; + atomic_t sk_drops; + int sk_rcvlowat; + struct sk_buff_head sk_error_queue; struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with @@ -359,14 +362,13 @@ struct sock { struct sk_buff *tail; } sk_backlog; #define sk_rmem_alloc sk_backlog.rmem_alloc - int sk_forward_alloc; - __u32 sk_txhash; + int sk_forward_alloc; #ifdef CONFIG_NET_RX_BUSY_POLL - unsigned int sk_napi_id; unsigned int sk_ll_usec; + /* ===== mostly read cache line ===== */ + unsigned int sk_napi_id; #endif - atomic_t sk_drops; int sk_rcvbuf; struct sk_filter __rcu *sk_filter; @@ -379,11 +381,30 @@ struct sock { #endif struct dst_entry *sk_rx_dst; struct dst_entry __rcu *sk_dst_cache; - /* Note: 32bit hole on 64bit arches */ - atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; + + /* ===== cache line for TX ===== */ + int sk_wmem_queued; + atomic_t sk_wmem_alloc; + unsigned long sk_tsq_flags; + struct sk_buff *sk_send_head; struct sk_buff_head sk_write_queue; + __s32 sk_peek_off; + int sk_write_pending; + long sk_sndtimeo; + struct timer_list sk_timer; + __u32 sk_priority; + __u32 sk_mark; + u32 sk_pacing_rate; /* bytes per second */ + u32 sk_max_pacing_rate; + struct page_frag sk_frag; + netdev_features_t sk_route_caps; + netdev_features_t sk_route_nocaps; + int sk_gso_type; + unsigned int sk_gso_max_size; + gfp_t sk_allocation; + __u32 sk_txhash; /* * Because of non atomicity rules, all @@ -414,42 +435,24 @@ struct sock { #define SK_PROTOCOL_MAX U8_MAX kmemcheck_bitfield_end(flags); - int sk_wmem_queued; - gfp_t sk_allocation; - u32 sk_pacing_rate; /* bytes per second */ - u32 sk_max_pacing_rate; - netdev_features_t sk_route_caps; - netdev_features_t sk_route_nocaps; - int sk_gso_type; - unsigned int sk_gso_max_size; u16 sk_gso_max_segs; - int sk_rcvlowat; unsigned long sk_lingertime; - struct sk_buff_head sk_error_queue; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; int sk_err, sk_err_soft; u32 sk_ack_backlog; u32 sk_max_ack_backlog; - __u32 sk_priority; - __u32 sk_mark; kuid_t sk_uid; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; - long sk_sndtimeo; - struct timer_list sk_timer; ktime_t sk_stamp; u16 sk_tsflags; u8 sk_shutdown; u32 sk_tskey; struct socket *sk_socket; void *sk_user_data; - struct page_frag sk_frag; - struct sk_buff *sk_send_head; - __s32 sk_peek_off; - int sk_write_pending; #ifdef CONFIG_SECURITY void *sk_security; #endif -- cgit v1.2.3 From 15e6cb46c9b09711d1224ae5418b53140e1ba444 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 1 Nov 2016 22:42:45 -0400 Subject: make skb_add_data,{_nocache}() and skb_copy_to_page_nocache() advance only on success Signed-off-by: Al Viro --- include/linux/skbuff.h | 6 +++--- include/net/sock.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 32810f279f8e..9cfae2e73b3c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2809,12 +2809,12 @@ static inline int skb_add_data(struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_NONE) { __wsum csum = 0; - if (csum_and_copy_from_iter(skb_put(skb, copy), copy, - &csum, from) == copy) { + if (csum_and_copy_from_iter_full(skb_put(skb, copy), copy, + &csum, from)) { skb->csum = csum_block_add(skb->csum, csum, off); return 0; } - } else if (copy_from_iter(skb_put(skb, copy), copy, from) == copy) + } else if (copy_from_iter_full(skb_put(skb, copy), copy, from)) return 0; __skb_trim(skb, off); diff --git a/include/net/sock.h b/include/net/sock.h index 92b269709b9a..5dd0fed82a06 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1783,13 +1783,13 @@ static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, { if (skb->ip_summed == CHECKSUM_NONE) { __wsum csum = 0; - if (csum_and_copy_from_iter(to, copy, &csum, from) != copy) + if (!csum_and_copy_from_iter_full(to, copy, &csum, from)) return -EFAULT; skb->csum = csum_block_add(skb->csum, csum, offset); } else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) { - if (copy_from_iter_nocache(to, copy, from) != copy) + if (!copy_from_iter_full_nocache(to, copy, from)) return -EFAULT; - } else if (copy_from_iter(to, copy, from) != copy) + } else if (!copy_from_iter_full(to, copy, from)) return -EFAULT; return 0; -- cgit v1.2.3 From 0b62fca2623e4633c8819e89946d0da446a5846b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 Nov 2016 18:17:31 -0400 Subject: switch getfrag callbacks to ..._full() primitives Signed-off-by: Al Viro --- include/net/udplite.h | 2 +- net/ipv4/ip_output.c | 4 ++-- net/ipv4/ping.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/udplite.h b/include/net/udplite.h index 80761938b9a7..59477d805145 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -20,7 +20,7 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { struct msghdr *msg = from; - return copy_from_iter(to, len, &msg->msg_iter) != len ? -EFAULT : 0; + return copy_from_iter_full(to, len, &msg->msg_iter) ? 0 : -EFAULT; } /* Designate sk as UDP-Lite socket */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 105908d841a3..be4d149f0321 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -802,11 +802,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk struct msghdr *msg = from; if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (copy_from_iter(to, len, &msg->msg_iter) != len) + if (!copy_from_iter_full(to, len, &msg->msg_iter)) return -EFAULT; } else { __wsum csum = 0; - if (csum_and_copy_from_iter(to, len, &csum, &msg->msg_iter) != len) + if (!csum_and_copy_from_iter_full(to, len, &csum, &msg->msg_iter)) return -EFAULT; skb->csum = csum_block_add(skb->csum, csum, odd); } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 205e2000d395..7dd7baf2a5ad 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -609,15 +609,15 @@ int ping_getfrag(void *from, char *to, fraglen -= sizeof(struct icmphdr); if (fraglen < 0) BUG(); - if (csum_and_copy_from_iter(to + sizeof(struct icmphdr), + if (!csum_and_copy_from_iter_full(to + sizeof(struct icmphdr), fraglen, &pfh->wcheck, - &pfh->msg->msg_iter) != fraglen) + &pfh->msg->msg_iter)) return -EFAULT; } else if (offset < sizeof(struct icmphdr)) { BUG(); } else { - if (csum_and_copy_from_iter(to, fraglen, &pfh->wcheck, - &pfh->msg->msg_iter) != fraglen) + if (!csum_and_copy_from_iter_full(to, fraglen, &pfh->wcheck, + &pfh->msg->msg_iter)) return -EFAULT; } -- cgit v1.2.3 From 1c0d32fde5bdf1184bc274f864c09799278a1114 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 4 Dec 2016 09:48:16 -0800 Subject: net_sched: gen_estimator: complete rewrite of rate estimators 1) Old code was hard to maintain, due to complex lock chains. (We probably will be able to remove some kfree_rcu() in callers) 2) Using a single timer to update all estimators does not scale. 3) Code was buggy on 32bit kernel (WRITE_ONCE() on 64bit quantity is not supposed to work well) In this rewrite : - I removed the RB tree that had to be scanned in gen_estimator_active(). qdisc dumps should be much faster. - Each estimator has its own timer. - Estimations are maintained in net_rate_estimator structure, instead of dirtying the qdisc. Minor, but part of the simplification. - Reading the estimator uses RCU and a seqcount to provide proper support for 32bit kernels. - We reduce memory need when estimators are not used, since we store a pointer, instead of the bytes/packets counters. - xt_rateest_mt() no longer has to grab a spinlock. (In the future, xt_rateest_tg() could be switched to per cpu counters) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/act_api.h | 2 +- include/net/gen_stats.h | 17 ++- include/net/netfilter/xt_rateest.h | 10 +- include/net/sch_generic.h | 2 +- net/core/gen_estimator.c | 299 +++++++++++++------------------------ net/core/gen_stats.c | 20 ++- net/netfilter/xt_RATEEST.c | 4 +- net/netfilter/xt_rateest.c | 28 ++-- net/sched/act_api.c | 9 +- net/sched/act_police.c | 21 +-- net/sched/sch_api.c | 2 +- net/sched/sch_cbq.c | 6 +- net/sched/sch_drr.c | 6 +- net/sched/sch_generic.c | 2 +- net/sched/sch_hfsc.c | 6 +- net/sched/sch_htb.c | 6 +- net/sched/sch_qfq.c | 8 +- 17 files changed, 182 insertions(+), 266 deletions(-) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 9dddf77a69cc..1d716449209e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -36,7 +36,7 @@ struct tc_action { struct tcf_t tcfa_tm; struct gnet_stats_basic_packed tcfa_bstats; struct gnet_stats_queue tcfa_qstats; - struct gnet_stats_rate_est64 tcfa_rate_est; + struct net_rate_estimator __rcu *tcfa_rate_est; spinlock_t tcfa_lock; struct rcu_head tcfa_rcu; struct gnet_stats_basic_cpu __percpu *cpu_bstats; diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 231e121cc7d9..8b7aa370e7a4 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -11,6 +11,8 @@ struct gnet_stats_basic_cpu { struct u64_stats_sync syncp; }; +struct net_rate_estimator; + struct gnet_dump { spinlock_t * lock; struct sk_buff * skb; @@ -42,8 +44,7 @@ void __gnet_stats_copy_basic(const seqcount_t *running, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); int gnet_stats_copy_rate_est(struct gnet_dump *d, - const struct gnet_stats_basic_packed *b, - struct gnet_stats_rate_est64 *r); + struct net_rate_estimator __rcu **ptr); int gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue __percpu *cpu_q, struct gnet_stats_queue *q, __u32 qlen); @@ -53,16 +54,16 @@ int gnet_stats_finish_copy(struct gnet_dump *d); int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, - struct gnet_stats_rate_est64 *rate_est, + struct net_rate_estimator __rcu **rate_est, spinlock_t *stats_lock, seqcount_t *running, struct nlattr *opt); -void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est64 *rate_est); +void gen_kill_estimator(struct net_rate_estimator __rcu **ptr); int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, - struct gnet_stats_rate_est64 *rate_est, + struct net_rate_estimator __rcu **ptr, spinlock_t *stats_lock, seqcount_t *running, struct nlattr *opt); -bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est64 *rate_est); +bool gen_estimator_active(struct net_rate_estimator __rcu **ptr); +bool gen_estimator_read(struct net_rate_estimator __rcu **ptr, + struct gnet_stats_rate_est64 *sample); #endif diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h index 79f45e19f31e..130e58361f99 100644 --- a/include/net/netfilter/xt_rateest.h +++ b/include/net/netfilter/xt_rateest.h @@ -1,19 +1,23 @@ #ifndef _XT_RATEEST_H #define _XT_RATEEST_H +#include + struct xt_rateest { /* keep lock and bstats on same cache line to speedup xt_rateest_tg() */ struct gnet_stats_basic_packed bstats; spinlock_t lock; - /* keep rstats and lock on same cache line to speedup xt_rateest_mt() */ - struct gnet_stats_rate_est64 rstats; + /* following fields not accessed in hot path */ + unsigned int refcnt; struct hlist_node list; char name[IFNAMSIZ]; - unsigned int refcnt; struct gnet_estimator params; struct rcu_head rcu; + + /* keep this field far away to speedup xt_rateest_mt() */ + struct net_rate_estimator __rcu *rate_est; }; struct xt_rateest *xt_rateest_lookup(const char *name); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e6aa0a249672..498f81b229a4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -76,7 +76,7 @@ struct Qdisc { struct netdev_queue *dev_queue; - struct gnet_stats_rate_est64 rate_est; + struct net_rate_estimator __rcu *rate_est; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 0993844faeea..101b5d0e2142 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -7,6 +7,7 @@ * 2 of the License, or (at your option) any later version. * * Authors: Alexey Kuznetsov, + * Eric Dumazet * * Changes: * Jamal Hadi Salim - moved it to net/core and reshulfed @@ -30,171 +31,79 @@ #include #include #include -#include #include +#include #include #include -/* - This code is NOT intended to be used for statistics collection, - its purpose is to provide a base for statistical multiplexing - for controlled load service. - If you need only statistics, run a user level daemon which - periodically reads byte counters. - - Unfortunately, rate estimation is not a very easy task. - F.e. I did not find a simple way to estimate the current peak rate - and even failed to formulate the problem 8)8) - - So I preferred not to built an estimator into the scheduler, - but run this task separately. - Ideally, it should be kernel thread(s), but for now it runs - from timers, which puts apparent top bounds on the number of rated - flows, has minimal overhead on small, but is enough - to handle controlled load service, sets of aggregates. - - We measure rate over A=(1<stats_lock) + spin_lock(e->stats_lock); - rcu_read_lock(); - list_for_each_entry_rcu(e, &elist[idx].list, list) { - struct gnet_stats_basic_packed b = {0}; - unsigned long rate; - u64 brate; - - if (e->stats_lock) - spin_lock(e->stats_lock); - read_lock(&est_lock); - if (e->bstats == NULL) - goto skip; - - __gnet_stats_copy_basic(e->running, &b, e->cpu_bstats, e->bstats); - - brate = (b.bytes - e->last_bytes)<<(7 - idx); - e->last_bytes = b.bytes; - e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log); - WRITE_ONCE(e->rate_est->bps, (e->avbps + 0xF) >> 5); - - rate = b.packets - e->last_packets; - rate <<= (7 - idx); - e->last_packets = b.packets; - e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log); - WRITE_ONCE(e->rate_est->pps, (e->avpps + 0xF) >> 5); -skip: - read_unlock(&est_lock); - if (e->stats_lock) - spin_unlock(e->stats_lock); - } + __gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats); - if (!list_empty(&elist[idx].list)) { - elist[idx].next_jiffies += ((HZ/4) << idx); + if (e->stats_lock) + spin_unlock(e->stats_lock); - if (unlikely(time_after_eq(jiffies, elist[idx].next_jiffies))) { - /* Ouch... timer was delayed. */ - elist[idx].next_jiffies = jiffies + 1; - } - mod_timer(&elist[idx].timer, elist[idx].next_jiffies); - } - rcu_read_unlock(); } -static void gen_add_node(struct gen_estimator *est) +static void est_timer(unsigned long arg) { - struct rb_node **p = &est_root.rb_node, *parent = NULL; + struct net_rate_estimator *est = (struct net_rate_estimator *)arg; + struct gnet_stats_basic_packed b; + u64 rate, brate; - while (*p) { - struct gen_estimator *e; + est_fetch_counters(est, &b); + brate = (b.bytes - est->last_bytes) << (8 - est->ewma_log); + brate -= (est->avbps >> est->ewma_log); - parent = *p; - e = rb_entry(parent, struct gen_estimator, node); + rate = (u64)(b.packets - est->last_packets) << (8 - est->ewma_log); + rate -= (est->avpps >> est->ewma_log); - if (est->bstats > e->bstats) - p = &parent->rb_right; - else - p = &parent->rb_left; - } - rb_link_node(&est->node, parent, p); - rb_insert_color(&est->node, &est_root); -} - -static -struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est64 *rate_est) -{ - struct rb_node *p = est_root.rb_node; + write_seqcount_begin(&est->seq); + est->avbps += brate; + est->avpps += rate; + write_seqcount_end(&est->seq); - while (p) { - struct gen_estimator *e; + est->last_bytes = b.bytes; + est->last_packets = b.packets; - e = rb_entry(p, struct gen_estimator, node); + est->next_jiffies += ((HZ/4) << est->intvl_log); - if (bstats > e->bstats) - p = p->rb_right; - else if (bstats < e->bstats || rate_est != e->rate_est) - p = p->rb_left; - else - return e; + if (unlikely(time_after_eq(jiffies, est->next_jiffies))) { + /* Ouch... timer was delayed. */ + est->next_jiffies = jiffies + 1; } - return NULL; + mod_timer(&est->timer, est->next_jiffies); } /** @@ -217,84 +126,76 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats */ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, - struct gnet_stats_rate_est64 *rate_est, + struct net_rate_estimator __rcu **rate_est, spinlock_t *stats_lock, seqcount_t *running, struct nlattr *opt) { - struct gen_estimator *est; struct gnet_estimator *parm = nla_data(opt); - struct gnet_stats_basic_packed b = {0}; - int idx; + struct net_rate_estimator *old, *est; + struct gnet_stats_basic_packed b; + int intvl_log; if (nla_len(opt) < sizeof(*parm)) return -EINVAL; + /* allowed timer periods are : + * -2 : 250ms, -1 : 500ms, 0 : 1 sec + * 1 : 2 sec, 2 : 4 sec, 3 : 8 sec + */ if (parm->interval < -2 || parm->interval > 3) return -EINVAL; est = kzalloc(sizeof(*est), GFP_KERNEL); - if (est == NULL) + if (!est) return -ENOBUFS; - __gnet_stats_copy_basic(running, &b, cpu_bstats, bstats); - - idx = parm->interval + 2; + seqcount_init(&est->seq); + intvl_log = parm->interval + 2; est->bstats = bstats; - est->rate_est = rate_est; est->stats_lock = stats_lock; est->running = running; est->ewma_log = parm->ewma_log; - est->last_bytes = b.bytes; - est->avbps = rate_est->bps<<5; - est->last_packets = b.packets; - est->avpps = rate_est->pps<<10; + est->intvl_log = intvl_log; est->cpu_bstats = cpu_bstats; - spin_lock_bh(&est_tree_lock); - if (!elist[idx].timer.function) { - INIT_LIST_HEAD(&elist[idx].list); - setup_timer(&elist[idx].timer, est_timer, idx); + est_fetch_counters(est, &b); + est->last_bytes = b.bytes; + est->last_packets = b.packets; + old = rcu_dereference_protected(*rate_est, 1); + if (old) { + del_timer_sync(&old->timer); + est->avbps = old->avbps; + est->avpps = old->avpps; } - if (list_empty(&elist[idx].list)) { - elist[idx].next_jiffies = jiffies + ((HZ/4) << idx); - mod_timer(&elist[idx].timer, elist[idx].next_jiffies); - } - list_add_rcu(&est->list, &elist[idx].list); - gen_add_node(est); - spin_unlock_bh(&est_tree_lock); + est->next_jiffies = jiffies + ((HZ/4) << intvl_log); + setup_timer(&est->timer, est_timer, (unsigned long)est); + mod_timer(&est->timer, est->next_jiffies); + rcu_assign_pointer(*rate_est, est); + if (old) + kfree_rcu(old, rcu); return 0; } EXPORT_SYMBOL(gen_new_estimator); /** * gen_kill_estimator - remove a rate estimator - * @bstats: basic statistics - * @rate_est: rate estimator statistics + * @rate_est: rate estimator * - * Removes the rate estimator specified by &bstats and &rate_est. + * Removes the rate estimator. * - * Note : Caller should respect an RCU grace period before freeing stats_lock */ -void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est64 *rate_est) +void gen_kill_estimator(struct net_rate_estimator __rcu **rate_est) { - struct gen_estimator *e; - - spin_lock_bh(&est_tree_lock); - while ((e = gen_find_node(bstats, rate_est))) { - rb_erase(&e->node, &est_root); + struct net_rate_estimator *est; - write_lock(&est_lock); - e->bstats = NULL; - write_unlock(&est_lock); - - list_del_rcu(&e->list); - kfree_rcu(e, e_rcu); + est = xchg((__force struct net_rate_estimator **)rate_est, NULL); + if (est) { + del_timer_sync(&est->timer); + kfree_rcu(est, rcu); } - spin_unlock_bh(&est_tree_lock); } EXPORT_SYMBOL(gen_kill_estimator); @@ -314,33 +215,47 @@ EXPORT_SYMBOL(gen_kill_estimator); */ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu_bstats, - struct gnet_stats_rate_est64 *rate_est, + struct net_rate_estimator __rcu **rate_est, spinlock_t *stats_lock, seqcount_t *running, struct nlattr *opt) { - gen_kill_estimator(bstats, rate_est); - return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt); + return gen_new_estimator(bstats, cpu_bstats, rate_est, + stats_lock, running, opt); } EXPORT_SYMBOL(gen_replace_estimator); /** * gen_estimator_active - test if estimator is currently in use - * @bstats: basic statistics - * @rate_est: rate estimator statistics + * @rate_est: rate estimator * * Returns true if estimator is active, and false if not. */ -bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est64 *rate_est) +bool gen_estimator_active(struct net_rate_estimator __rcu **rate_est) { - bool res; + return !!rcu_access_pointer(*rate_est); +} +EXPORT_SYMBOL(gen_estimator_active); - ASSERT_RTNL(); +bool gen_estimator_read(struct net_rate_estimator __rcu **rate_est, + struct gnet_stats_rate_est64 *sample) +{ + struct net_rate_estimator *est; + unsigned seq; + + rcu_read_lock(); + est = rcu_dereference(*rate_est); + if (!est) { + rcu_read_unlock(); + return false; + } - spin_lock_bh(&est_tree_lock); - res = gen_find_node(bstats, rate_est) != NULL; - spin_unlock_bh(&est_tree_lock); + do { + seq = read_seqcount_begin(&est->seq); + sample->bps = est->avbps >> 8; + sample->pps = est->avpps >> 8; + } while (read_seqcount_retry(&est->seq, seq)); - return res; + rcu_read_unlock(); + return true; } -EXPORT_SYMBOL(gen_estimator_active); +EXPORT_SYMBOL(gen_estimator_read); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 508e051304fb..87f28557b329 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -194,8 +194,7 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); /** * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV * @d: dumping handle - * @b: basic statistics - * @r: rate estimator statistics + * @rate_est: rate estimator * * Appends the rate estimator statistics to the top level TLV created by * gnet_stats_start_copy(). @@ -205,18 +204,17 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); */ int gnet_stats_copy_rate_est(struct gnet_dump *d, - const struct gnet_stats_basic_packed *b, - struct gnet_stats_rate_est64 *r) + struct net_rate_estimator __rcu **rate_est) { + struct gnet_stats_rate_est64 sample; struct gnet_stats_rate_est est; int res; - if (b && !gen_estimator_active(b, r)) + if (!gen_estimator_read(rate_est, &sample)) return 0; - - est.bps = min_t(u64, UINT_MAX, r->bps); + est.bps = min_t(u64, UINT_MAX, sample.bps); /* we have some time before reaching 2^32 packets per second */ - est.pps = r->pps; + est.pps = sample.pps; if (d->compat_tc_stats) { d->tc_stats.bps = est.bps; @@ -226,11 +224,11 @@ gnet_stats_copy_rate_est(struct gnet_dump *d, if (d->tail) { res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est), TCA_STATS_PAD); - if (res < 0 || est.bps == r->bps) + if (res < 0 || est.bps == sample.bps) return res; /* emit 64bit stats only if needed */ - return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r), - TCA_STATS_PAD); + return gnet_stats_copy(d, TCA_STATS_RATE_EST64, &sample, + sizeof(sample), TCA_STATS_PAD); } return 0; diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index dbd6c4a12b97..91a373a3f534 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -63,7 +63,7 @@ void xt_rateest_put(struct xt_rateest *est) mutex_lock(&xt_rateest_mutex); if (--est->refcnt == 0) { hlist_del(&est->list); - gen_kill_estimator(&est->bstats, &est->rstats); + gen_kill_estimator(&est->rate_est); /* * gen_estimator est_timer() might access est->lock or bstats, * wait a RCU grace period before freeing 'est' @@ -132,7 +132,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) cfg.est.interval = info->interval; cfg.est.ewma_log = info->ewma_log; - ret = gen_new_estimator(&est->bstats, NULL, &est->rstats, + ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est, &est->lock, NULL, &cfg.opt); if (ret < 0) goto err2; diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 7720b036d76a..1db02f6fca54 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -18,35 +18,33 @@ static bool xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rateest_match_info *info = par->matchinfo; - struct gnet_stats_rate_est64 *r; + struct gnet_stats_rate_est64 sample = {0}; u_int32_t bps1, bps2, pps1, pps2; bool ret = true; - spin_lock_bh(&info->est1->lock); - r = &info->est1->rstats; + gen_estimator_read(&info->est1->rate_est, &sample); + if (info->flags & XT_RATEEST_MATCH_DELTA) { - bps1 = info->bps1 >= r->bps ? info->bps1 - r->bps : 0; - pps1 = info->pps1 >= r->pps ? info->pps1 - r->pps : 0; + bps1 = info->bps1 >= sample.bps ? info->bps1 - sample.bps : 0; + pps1 = info->pps1 >= sample.pps ? info->pps1 - sample.pps : 0; } else { - bps1 = r->bps; - pps1 = r->pps; + bps1 = sample.bps; + pps1 = sample.pps; } - spin_unlock_bh(&info->est1->lock); if (info->flags & XT_RATEEST_MATCH_ABS) { bps2 = info->bps2; pps2 = info->pps2; } else { - spin_lock_bh(&info->est2->lock); - r = &info->est2->rstats; + gen_estimator_read(&info->est2->rate_est, &sample); + if (info->flags & XT_RATEEST_MATCH_DELTA) { - bps2 = info->bps2 >= r->bps ? info->bps2 - r->bps : 0; - pps2 = info->pps2 >= r->pps ? info->pps2 - r->pps : 0; + bps2 = info->bps2 >= sample.bps ? info->bps2 - sample.bps : 0; + pps2 = info->pps2 >= sample.pps ? info->pps2 - sample.pps : 0; } else { - bps2 = r->bps; - pps2 = r->pps; + bps2 = sample.bps; + pps2 = sample.pps; } - spin_unlock_bh(&info->est2->lock); } switch (info->mode) { diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f893d180da1c..2095c83ce773 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -41,8 +41,7 @@ static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p) spin_lock_bh(&hinfo->lock); hlist_del(&p->tcfa_head); spin_unlock_bh(&hinfo->lock); - gen_kill_estimator(&p->tcfa_bstats, - &p->tcfa_rate_est); + gen_kill_estimator(&p->tcfa_rate_est); /* * gen_estimator est_timer() might access p->tcfa_lock * or bstats, wait a RCU grace period before freeing p @@ -237,8 +236,7 @@ EXPORT_SYMBOL(tcf_hash_check); void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est) { if (est) - gen_kill_estimator(&a->tcfa_bstats, - &a->tcfa_rate_est); + gen_kill_estimator(&a->tcfa_rate_est); call_rcu(&a->tcfa_rcu, free_tcf); } EXPORT_SYMBOL(tcf_hash_cleanup); @@ -670,8 +668,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, goto errout; if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 || - gnet_stats_copy_rate_est(&d, &p->tcfa_bstats, - &p->tcfa_rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 || gnet_stats_copy_queue(&d, p->cpu_qstats, &p->tcfa_qstats, p->tcfa_qstats.qlen) < 0) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index c990b73a6c85..0ba91d1ce994 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -142,8 +142,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, goto failure_unlock; } else if (tb[TCA_POLICE_AVRATE] && (ret == ACT_P_CREATED || - !gen_estimator_active(&police->tcf_bstats, - &police->tcf_rate_est))) { + !gen_estimator_active(&police->tcf_rate_est))) { err = -EINVAL; goto failure_unlock; } @@ -216,13 +215,17 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, bstats_update(&police->tcf_bstats, skb); tcf_lastuse_update(&police->tcf_tm); - if (police->tcfp_ewma_rate && - police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { - police->tcf_qstats.overlimits++; - if (police->tcf_action == TC_ACT_SHOT) - police->tcf_qstats.drops++; - spin_unlock(&police->tcf_lock); - return police->tcf_action; + if (police->tcfp_ewma_rate) { + struct gnet_stats_rate_est64 sample; + + if (!gen_estimator_read(&police->tcf_rate_est, &sample) || + sample.bps >= police->tcfp_ewma_rate) { + police->tcf_qstats.overlimits++; + if (police->tcf_action == TC_ACT_SHOT) + police->tcf_qstats.drops++; + spin_unlock(&police->tcf_lock); + return police->tcf_action; + } } if (qdisc_pkt_len(skb) <= police->tcfp_mtu) { diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f337f1bdd1d4..d7b93429f0cc 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1395,7 +1395,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), &d, cpu_bstats, &q->bstats) < 0 || - gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) goto nla_put_failure; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index beb554aa8cfb..9ffe1c220b02 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -122,7 +122,7 @@ struct cbq_class { psched_time_t penalized; struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est64 rate_est; + struct net_rate_estimator __rcu *rate_est; struct tc_cbq_xstats xstats; struct tcf_proto __rcu *filter_list; @@ -1346,7 +1346,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0) return -1; @@ -1405,7 +1405,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) tcf_destroy_chain(&cl->filter_list); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); - gen_kill_estimator(&cl->bstats, &cl->rate_est); + gen_kill_estimator(&cl->rate_est); if (cl != &q->link) kfree(cl); } diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 8af5c59eef84..bb4cbdf75004 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -25,7 +25,7 @@ struct drr_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est64 rate_est; + struct net_rate_estimator __rcu *rate_est; struct list_head alist; struct Qdisc *qdisc; @@ -142,7 +142,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) { - gen_kill_estimator(&cl->bstats, &cl->rate_est); + gen_kill_estimator(&cl->rate_est); qdisc_destroy(cl->qdisc); kfree(cl); } @@ -283,7 +283,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0) return -1; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6cfb6e9038c2..6eb9c8e88519 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -709,7 +709,7 @@ void qdisc_destroy(struct Qdisc *qdisc) qdisc_put_stab(rtnl_dereference(qdisc->stab)); #endif - gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); + gen_kill_estimator(&qdisc->rate_est); if (ops->reset) ops->reset(qdisc); if (ops->destroy) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 000f1d36128e..3ffaa6fb0990 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -114,7 +114,7 @@ struct hfsc_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est64 rate_est; + struct net_rate_estimator __rcu *rate_est; struct tcf_proto __rcu *filter_list; /* filter list */ unsigned int filter_cnt; /* filter count */ unsigned int level; /* class level in hierarchy */ @@ -1091,7 +1091,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) tcf_destroy_chain(&cl->filter_list); qdisc_destroy(cl->qdisc); - gen_kill_estimator(&cl->bstats, &cl->rate_est); + gen_kill_estimator(&cl->rate_est); if (cl != &q->root) kfree(cl); } @@ -1348,7 +1348,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.rtwork = cl->cl_cumul; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0) return -1; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9926fe4f3b6f..760f39e7caee 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -111,7 +111,7 @@ struct htb_class { unsigned int children; struct htb_class *parent; /* parent class */ - struct gnet_stats_rate_est64 rate_est; + struct net_rate_estimator __rcu *rate_est; /* * Written often fields @@ -1145,7 +1145,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) return -1; @@ -1228,7 +1228,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) WARN_ON(!cl->un.leaf.q); qdisc_destroy(cl->un.leaf.q); } - gen_kill_estimator(&cl->bstats, &cl->rate_est); + gen_kill_estimator(&cl->rate_est); tcf_destroy_chain(&cl->filter_list); kfree(cl); } diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index ca0516e6f743..f9e712ce2d15 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -137,7 +137,7 @@ struct qfq_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est64 rate_est; + struct net_rate_estimator __rcu *rate_est; struct Qdisc *qdisc; struct list_head alist; /* Link for active-classes list. */ struct qfq_aggregate *agg; /* Parent aggregate. */ @@ -508,7 +508,7 @@ set_change_agg: new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL); if (new_agg == NULL) { err = -ENOBUFS; - gen_kill_estimator(&cl->bstats, &cl->rate_est); + gen_kill_estimator(&cl->rate_est); goto destroy_class; } sch_tree_lock(sch); @@ -533,7 +533,7 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) struct qfq_sched *q = qdisc_priv(sch); qfq_rm_from_agg(q, cl); - gen_kill_estimator(&cl->bstats, &cl->rate_est); + gen_kill_estimator(&cl->rate_est); qdisc_destroy(cl->qdisc); kfree(cl); } @@ -667,7 +667,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0) return -1; -- cgit v1.2.3 From 834184b1f3a4635efbdfdae5fb437f109f6605fa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Nov 2016 21:36:45 +0100 Subject: netfilter: defrag: only register defrag functionality if needed nf_defrag modules for ipv4 and ipv6 export an empty stub function. Any module that needs the defragmentation hooks registered simply 'calls' this empty function to create a phony module dependency -- modprobe will then load the defrag module too. This extends netfilter ipv4/ipv6 defragmentation modules to delay the hook registration until the functionality is requested within a network namespace instead of module load time for all namespaces. Hooks are only un-registered on module unload or when a namespace that used such defrag functionality exits. We have to use struct net for this as the register hooks can be called before netns initialization here from the ipv4/ipv6 conntrack module init path. There is no unregister functionality support, defrag will always be active once it was requested inside a net namespace. The reason is that defrag has impact on nft and iptables rulesets (without defrag we might see framents). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_defrag_ipv4.h | 3 +- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 3 +- include/net/netns/netfilter.h | 6 ++++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 7 ++++- net/ipv4/netfilter/nf_defrag_ipv4.c | 41 +++++++++++++++++++++++-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 7 ++++- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 42 +++++++++++++++++++++++--- net/netfilter/xt_TPROXY.c | 15 ++++++--- net/netfilter/xt_socket.c | 33 +++++++++++++++++--- 9 files changed, 136 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h index f01ef208dff6..db405f70e538 100644 --- a/include/net/netfilter/ipv4/nf_defrag_ipv4.h +++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h @@ -1,6 +1,7 @@ #ifndef _NF_DEFRAG_IPV4_H #define _NF_DEFRAG_IPV4_H -void nf_defrag_ipv4_enable(void); +struct net; +int nf_defrag_ipv4_enable(struct net *); #endif /* _NF_DEFRAG_IPV4_H */ diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index ddf162f7966f..7664efe37974 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -1,7 +1,8 @@ #ifndef _NF_DEFRAG_IPV6_H #define _NF_DEFRAG_IPV6_H -void nf_defrag_ipv6_enable(void); +struct net; +int nf_defrag_ipv6_enable(struct net *); int nf_ct_frag6_init(void); void nf_ct_frag6_cleanup(void); diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 58487b1cc99a..cea396b53a60 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -17,5 +17,11 @@ struct netns_nf { struct ctl_table_header *nf_log_dir_header; #endif struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) + bool defrag_ipv4; +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + bool defrag_ipv6; +#endif }; #endif diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 6f375443a74b..fcfd071f4705 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -325,6 +325,12 @@ static int ipv4_hooks_register(struct net *net) if (cnet->users > 1) goto out_unlock; + err = nf_defrag_ipv4_enable(net); + if (err) { + cnet->users = 0; + goto out_unlock; + } + err = nf_register_net_hooks(net, ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); @@ -422,7 +428,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) int ret = 0; need_conntrack(); - nf_defrag_ipv4_enable(); ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index d88da36b383c..49bd6a54404f 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -22,6 +23,8 @@ #endif #include +static DEFINE_MUTEX(defrag4_mutex); + static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb, u_int32_t user) { @@ -102,18 +105,50 @@ static struct nf_hook_ops ipv4_defrag_ops[] = { }, }; +static void __net_exit defrag4_net_exit(struct net *net) +{ + if (net->nf.defrag_ipv4) { + nf_unregister_net_hooks(net, ipv4_defrag_ops, + ARRAY_SIZE(ipv4_defrag_ops)); + net->nf.defrag_ipv4 = false; + } +} + +static struct pernet_operations defrag4_net_ops = { + .exit = defrag4_net_exit, +}; + static int __init nf_defrag_init(void) { - return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); + return register_pernet_subsys(&defrag4_net_ops); } static void __exit nf_defrag_fini(void) { - nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); + unregister_pernet_subsys(&defrag4_net_ops); } -void nf_defrag_ipv4_enable(void) +int nf_defrag_ipv4_enable(struct net *net) { + int err = 0; + + might_sleep(); + + if (net->nf.defrag_ipv4) + return 0; + + mutex_lock(&defrag4_mutex); + if (net->nf.defrag_ipv4) + goto out_unlock; + + err = nf_register_net_hooks(net, ipv4_defrag_ops, + ARRAY_SIZE(ipv4_defrag_ops)); + if (err == 0) + net->nf.defrag_ipv4 = true; + + out_unlock: + mutex_unlock(&defrag4_mutex); + return err; } EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 72fe48075b7f..4e3402486833 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -325,6 +325,12 @@ static int ipv6_hooks_register(struct net *net) if (cnet->users > 1) goto out_unlock; + err = nf_defrag_ipv6_enable(net); + if (err < 0) { + cnet->users = 0; + goto out_unlock; + } + err = nf_register_net_hooks(net, ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); if (err) @@ -427,7 +433,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) int ret = 0; need_conntrack(); - nf_defrag_ipv6_enable(); ret = nf_register_sockopt(&so_getorigdst6); if (ret < 0) { diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index f06b0471f39f..8e0bdd058787 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -30,6 +30,8 @@ #include #include +static DEFINE_MUTEX(defrag6_mutex); + static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, struct sk_buff *skb) { @@ -87,6 +89,19 @@ static struct nf_hook_ops ipv6_defrag_ops[] = { }, }; +static void __net_exit defrag6_net_exit(struct net *net) +{ + if (net->nf.defrag_ipv6) { + nf_unregister_net_hooks(net, ipv6_defrag_ops, + ARRAY_SIZE(ipv6_defrag_ops)); + net->nf.defrag_ipv6 = false; + } +} + +static struct pernet_operations defrag6_net_ops = { + .exit = defrag6_net_exit, +}; + static int __init nf_defrag_init(void) { int ret = 0; @@ -96,9 +111,9 @@ static int __init nf_defrag_init(void) pr_err("nf_defrag_ipv6: can't initialize frag6.\n"); return ret; } - ret = nf_register_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); + ret = register_pernet_subsys(&defrag6_net_ops); if (ret < 0) { - pr_err("nf_defrag_ipv6: can't register hooks\n"); + pr_err("nf_defrag_ipv6: can't register pernet ops\n"); goto cleanup_frag6; } return ret; @@ -111,12 +126,31 @@ cleanup_frag6: static void __exit nf_defrag_fini(void) { - nf_unregister_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); + unregister_pernet_subsys(&defrag6_net_ops); nf_ct_frag6_cleanup(); } -void nf_defrag_ipv6_enable(void) +int nf_defrag_ipv6_enable(struct net *net) { + int err = 0; + + might_sleep(); + + if (net->nf.defrag_ipv6) + return 0; + + mutex_lock(&defrag6_mutex); + if (net->nf.defrag_ipv6) + goto out_unlock; + + err = nf_register_net_hooks(net, ipv6_defrag_ops, + ARRAY_SIZE(ipv6_defrag_ops)); + if (err == 0) + net->nf.defrag_ipv6 = true; + + out_unlock: + mutex_unlock(&defrag6_mutex); + return err; } EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index dbd72cc40e42..80cb7babeb64 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -531,6 +531,11 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) static int tproxy_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_ip6 *i = par->entryinfo; + int err; + + err = nf_defrag_ipv6_enable(par->net); + if (err) + return err; if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && !(i->invflags & IP6T_INV_PROTO)) @@ -545,6 +550,11 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par) static int tproxy_tg4_check(const struct xt_tgchk_param *par) { const struct ipt_ip *i = par->entryinfo; + int err; + + err = nf_defrag_ipv4_enable(par->net); + if (err) + return err; if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && !(i->invflags & IPT_INV_PROTO)) @@ -596,11 +606,6 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = { static int __init tproxy_tg_init(void) { - nf_defrag_ipv4_enable(); -#ifdef XT_TPROXY_HAVE_IPV6 - nf_defrag_ipv6_enable(); -#endif - return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg)); } diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 2198914707f5..770bbec878f1 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -147,9 +147,28 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) } #endif +static int socket_mt_enable_defrag(struct net *net, int family) +{ + switch (family) { + case NFPROTO_IPV4: + return nf_defrag_ipv4_enable(net); +#ifdef XT_SOCKET_HAVE_IPV6 + case NFPROTO_IPV6: + return nf_defrag_ipv6_enable(net); +#endif + } + WARN_ONCE(1, "Unknown family %d\n", family); + return 0; +} + static int socket_mt_v1_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + int err; + + err = socket_mt_enable_defrag(par->net, par->family); + if (err) + return err; if (info->flags & ~XT_SOCKET_FLAGS_V1) { pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); @@ -161,6 +180,11 @@ static int socket_mt_v1_check(const struct xt_mtchk_param *par) static int socket_mt_v2_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo; + int err; + + err = socket_mt_enable_defrag(par->net, par->family); + if (err) + return err; if (info->flags & ~XT_SOCKET_FLAGS_V2) { pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); @@ -173,7 +197,11 @@ static int socket_mt_v3_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo3 *info = (struct xt_socket_mtinfo3 *)par->matchinfo; + int err; + err = socket_mt_enable_defrag(par->net, par->family); + if (err) + return err; if (info->flags & ~XT_SOCKET_FLAGS_V3) { pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V3); @@ -268,11 +296,6 @@ static struct xt_match socket_mt_reg[] __read_mostly = { static int __init socket_mt_init(void) { - nf_defrag_ipv4_enable(); -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - nf_defrag_ipv6_enable(); -#endif - return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } -- cgit v1.2.3 From 1814096980bbe546c4384b7b064126cbe7d40d30 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 24 Nov 2016 12:04:55 +0100 Subject: netfilter: nft_payload: layer 4 checksum adjustment for pseudoheader fields This patch adds a new flag that signals the kernel to update layer 4 checksum if the packet field belongs to the layer 4 pseudoheader. This implicitly provides stateless NAT 1:1 that is useful under very specific usecases. Since rules mangling layer 3 fields that are part of the pseudoheader may potentially convey any layer 4 packet, we have to deal with the layer 4 checksum adjustment using protocol specific code. This patch adds support for TCP, UDP and ICMPv6, since they include the pseudoheader in the layer 4 checksum calculation. ICMP doesn't, so we can skip it. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 1 + include/uapi/linux/netfilter/nf_tables.h | 6 ++ net/netfilter/nft_payload.c | 107 +++++++++++++++++++++++++++++-- 3 files changed, 109 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 862373d4ea9d..8f690effec37 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -45,6 +45,7 @@ struct nft_payload_set { enum nft_registers sreg:8; u8 csum_type; u8 csum_offset; + u8 csum_flags; }; extern const struct nft_expr_ops nft_payload_fast_ops; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 14e5f619167e..f030e59aa2ec 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -659,6 +659,10 @@ enum nft_payload_csum_types { NFT_PAYLOAD_CSUM_INET, }; +enum nft_payload_csum_flags { + NFT_PAYLOAD_L4CSUM_PSEUDOHDR = (1 << 0), +}; + /** * enum nft_payload_attributes - nf_tables payload expression netlink attributes * @@ -669,6 +673,7 @@ enum nft_payload_csum_types { * @NFTA_PAYLOAD_SREG: source register to load data from (NLA_U32: nft_registers) * @NFTA_PAYLOAD_CSUM_TYPE: checksum type (NLA_U32) * @NFTA_PAYLOAD_CSUM_OFFSET: checksum offset relative to base (NLA_U32) + * @NFTA_PAYLOAD_CSUM_FLAGS: checksum flags (NLA_U32) */ enum nft_payload_attributes { NFTA_PAYLOAD_UNSPEC, @@ -679,6 +684,7 @@ enum nft_payload_attributes { NFTA_PAYLOAD_SREG, NFTA_PAYLOAD_CSUM_TYPE, NFTA_PAYLOAD_CSUM_OFFSET, + NFTA_PAYLOAD_CSUM_FLAGS, __NFTA_PAYLOAD_MAX }; #define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 98fb5d7b8087..36d2b1096546 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy + * Copyright (c) 2016 Pablo Neira Ayuso * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -17,6 +18,10 @@ #include #include #include +/* For layer 4 checksum field offset. */ +#include +#include +#include /* add vlan header into the user buffer for if tag was removed by offloads */ static bool @@ -164,6 +169,87 @@ const struct nft_expr_ops nft_payload_fast_ops = { .dump = nft_payload_dump, }; +static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum) +{ + *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), fsum), tsum)); + if (*sum == 0) + *sum = CSUM_MANGLED_0; +} + +static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff) +{ + struct udphdr *uh, _uh; + + uh = skb_header_pointer(skb, thoff, sizeof(_uh), &_uh); + if (!uh) + return false; + + return uh->check; +} + +static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt, + struct sk_buff *skb, + unsigned int *l4csum_offset) +{ + switch (pkt->tprot) { + case IPPROTO_TCP: + *l4csum_offset = offsetof(struct tcphdr, check); + break; + case IPPROTO_UDP: + if (!nft_payload_udp_checksum(skb, pkt->xt.thoff)) + return -1; + /* Fall through. */ + case IPPROTO_UDPLITE: + *l4csum_offset = offsetof(struct udphdr, check); + break; + case IPPROTO_ICMPV6: + *l4csum_offset = offsetof(struct icmp6hdr, icmp6_cksum); + break; + default: + return -1; + } + + *l4csum_offset += pkt->xt.thoff; + return 0; +} + +static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt, + struct sk_buff *skb, + __wsum fsum, __wsum tsum) +{ + int l4csum_offset; + __sum16 sum; + + /* If we cannot determine layer 4 checksum offset or this packet doesn't + * require layer 4 checksum recalculation, skip this packet. + */ + if (nft_payload_l4csum_offset(pkt, skb, &l4csum_offset) < 0) + return 0; + + if (skb_copy_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0) + return -1; + + /* Checksum mangling for an arbitrary amount of bytes, based on + * inet_proto_csum_replace*() functions. + */ + if (skb->ip_summed != CHECKSUM_PARTIAL) { + nft_csum_replace(&sum, fsum, tsum); + if (skb->ip_summed == CHECKSUM_COMPLETE) { + skb->csum = ~csum_add(csum_sub(~(skb->csum), fsum), + tsum); + } + } else { + sum = ~csum_fold(csum_add(csum_sub(csum_unfold(sum), fsum), + tsum)); + } + + if (!skb_make_writable(skb, l4csum_offset + sizeof(sum)) || + skb_store_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0) + return -1; + + return 0; +} + static void nft_payload_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -204,14 +290,15 @@ static void nft_payload_set_eval(const struct nft_expr *expr, fsum = skb_checksum(skb, offset, priv->len, 0); tsum = csum_partial(src, priv->len, 0); - sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum), - tsum)); - if (sum == 0) - sum = CSUM_MANGLED_0; + nft_csum_replace(&sum, fsum, tsum); if (!skb_make_writable(skb, csum_offset + sizeof(sum)) || skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) goto err; + + if (priv->csum_flags && + nft_payload_l4csum_update(pkt, skb, fsum, tsum) < 0) + goto err; } if (!skb_make_writable(skb, max(offset + priv->len, 0)) || @@ -240,6 +327,15 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) priv->csum_offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET])); + if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) { + u32 flags; + + flags = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_FLAGS])); + if (flags & ~NFT_PAYLOAD_L4CSUM_PSEUDOHDR) + return -EINVAL; + + priv->csum_flags = flags; + } switch (priv->csum_type) { case NFT_PAYLOAD_CSUM_NONE: @@ -262,7 +358,8 @@ static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) || nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) || nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET, - htonl(priv->csum_offset))) + htonl(priv->csum_offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_FLAGS, htonl(priv->csum_flags))) goto nla_put_failure; return 0; -- cgit v1.2.3 From 3bf3276119455bd0fc7a7e31be2823118e613842 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 28 Nov 2016 11:40:06 +0100 Subject: netfilter: add and use nf_fwd_netdev_egress ... so we can use current skb instead of working with a clone. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_dup_netdev.h | 1 + net/netfilter/nf_dup_netdev.c | 33 +++++++++++++++++++++++++-------- net/netfilter/nft_fwd_netdev.c | 4 ++-- 3 files changed, 28 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h index 397dcae349f9..3e919356bedf 100644 --- a/include/net/netfilter/nf_dup_netdev.h +++ b/include/net/netfilter/nf_dup_netdev.h @@ -2,5 +2,6 @@ #define _NF_DUP_NETDEV_H_ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif); +void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif); #endif diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index 44ae986c383f..c9d7f95768ab 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -14,6 +14,29 @@ #include #include +static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev) +{ + if (skb_mac_header_was_set(skb)) + skb_push(skb, skb->mac_len); + + skb->dev = dev; + dev_queue_xmit(skb); +} + +void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif) +{ + struct net_device *dev; + + dev = dev_get_by_index_rcu(nft_net(pkt), oif); + if (!dev) { + kfree_skb(pkt->skb); + return; + } + + nf_do_netdev_egress(pkt->skb, dev); +} +EXPORT_SYMBOL_GPL(nf_fwd_netdev_egress); + void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) { struct net_device *dev; @@ -24,14 +47,8 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) return; skb = skb_clone(pkt->skb, GFP_ATOMIC); - if (skb == NULL) - return; - - if (skb_mac_header_was_set(skb)) - skb_push(skb, skb->mac_len); - - skb->dev = dev; - dev_queue_xmit(skb); + if (skb) + nf_do_netdev_egress(skb, dev); } EXPORT_SYMBOL_GPL(nf_dup_netdev_egress); diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 763ebc3e0b2b..ce13a50b9189 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -26,8 +26,8 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr, struct nft_fwd_netdev *priv = nft_expr_priv(expr); int oif = regs->data[priv->sreg_dev]; - nf_dup_netdev_egress(pkt, oif); - regs->verdict.code = NF_DROP; + nf_fwd_netdev_egress(pkt, oif); + regs->verdict.code = NF_STOLEN; } static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = { -- cgit v1.2.3 From e50092404c1bc7aaeb0a0f4077fa6f07b073a20f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:04:32 +0100 Subject: netfilter: nf_tables: add stateful objects This patch augments nf_tables to support stateful objects. This new infrastructure allows you to create, dump and delete stateful objects, that are identified by a user-defined name. This patch adds the generic infrastructure, follow up patches add support for two stateful objects: counters and quotas. This patch provides a native infrastructure for nf_tables to replace nfacct, the extended accounting infrastructure for iptables. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 79 +++++ include/uapi/linux/netfilter/nf_tables.h | 29 ++ net/netfilter/nf_tables_api.c | 516 +++++++++++++++++++++++++++++++ 3 files changed, 624 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 32970cba184a..903cd618f50e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -875,6 +875,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * @list: used internally * @chains: chains in the table * @sets: sets in the table + * @objects: stateful objects in the table * @hgenerator: handle generator state * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) @@ -885,6 +886,7 @@ struct nft_table { struct list_head list; struct list_head chains; struct list_head sets; + struct list_head objects; u64 hgenerator; u32 use; u16 flags:14, @@ -934,6 +936,73 @@ void nft_unregister_expr(struct nft_expr_type *); int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v); +/** + * struct nft_object - nf_tables stateful object + * + * @list: table stateful object list node + * @type: pointer to object type + * @data: pointer to object data + * @name: name of this stateful object + * @genmask: generation mask + * @use: number of references to this stateful object + * @data: object data, layout depends on type + */ +struct nft_object { + struct list_head list; + char name[NFT_OBJ_MAXNAMELEN]; + u32 genmask:2, + use:30; + /* runtime data below here */ + const struct nft_object_type *type ____cacheline_aligned; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); +}; + +static inline void *nft_obj_data(const struct nft_object *obj) +{ + return (void *)obj->data; +} + +#define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr)) + +struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, + const struct nlattr *nla, u32 objtype, + u8 genmask); + +/** + * struct nft_object_type - stateful object type + * + * @eval: stateful object evaluation function + * @list: list node in list of object types + * @type: stateful object numeric type + * @size: stateful object size + * @owner: module owner + * @maxattr: maximum netlink attribute + * @policy: netlink attribute policy + * @init: initialize object from netlink attributes + * @destroy: release existing stateful object + * @dump: netlink dump stateful object + */ +struct nft_object_type { + void (*eval)(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt); + struct list_head list; + u32 type; + unsigned int size; + unsigned int maxattr; + struct module *owner; + const struct nla_policy *policy; + int (*init)(const struct nlattr * const tb[], + struct nft_object *obj); + void (*destroy)(struct nft_object *obj); + int (*dump)(struct sk_buff *skb, + const struct nft_object *obj); +}; + +int nft_register_obj(struct nft_object_type *obj_type); +void nft_unregister_obj(struct nft_object_type *obj_type); + /** * struct nft_traceinfo - nft tracing information and state * @@ -981,6 +1050,9 @@ void nft_trace_notify(struct nft_traceinfo *info); #define MODULE_ALIAS_NFT_SET() \ MODULE_ALIAS("nft-set") +#define MODULE_ALIAS_NFT_OBJ(type) \ + MODULE_ALIAS("nft-obj-" __stringify(type)) + /* * The gencursor defines two generations, the currently active and the * next one. Objects contain a bitmask of 2 bits specifying the generations @@ -1157,4 +1229,11 @@ struct nft_trans_elem { #define nft_trans_elem(trans) \ (((struct nft_trans_elem *)trans->data)->elem) +struct nft_trans_obj { + struct nft_object *obj; +}; + +#define nft_trans_obj(trans) \ + (((struct nft_trans_obj *)trans->data)->obj) + #endif /* _NET_NF_TABLES_H */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index f030e59aa2ec..18e30dbc8c3f 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -4,6 +4,7 @@ #define NFT_TABLE_MAXNAMELEN 32 #define NFT_CHAIN_MAXNAMELEN 32 #define NFT_SET_MAXNAMELEN 32 +#define NFT_OBJ_MAXNAMELEN 32 #define NFT_USERDATA_MAXLEN 256 /** @@ -85,6 +86,9 @@ enum nft_verdicts { * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes) * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes) * @NFT_MSG_TRACE: trace event (enum nft_trace_attributes) + * @NFT_MSG_NEWOBJ: create a stateful object (enum nft_obj_attributes) + * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes) + * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -105,6 +109,9 @@ enum nf_tables_msg_types { NFT_MSG_NEWGEN, NFT_MSG_GETGEN, NFT_MSG_TRACE, + NFT_MSG_NEWOBJ, + NFT_MSG_GETOBJ, + NFT_MSG_DELOBJ, NFT_MSG_MAX, }; @@ -1178,6 +1185,28 @@ enum nft_fib_flags { NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */ }; +#define NFT_OBJECT_UNSPEC 0 + +/** + * enum nft_object_attributes - nf_tables stateful object netlink attributes + * + * @NFTA_OBJ_TABLE: name of the table containing the expression (NLA_STRING) + * @NFTA_OBJ_NAME: name of this expression type (NLA_STRING) + * @NFTA_OBJ_TYPE: stateful object type (NLA_U32) + * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED) + * @NFTA_OBJ_USE: number of references to this expression (NLA_U32) + */ +enum nft_object_attributes { + NFTA_OBJ_UNSPEC, + NFTA_OBJ_TABLE, + NFTA_OBJ_NAME, + NFTA_OBJ_TYPE, + NFTA_OBJ_DATA, + NFTA_OBJ_USE, + __NFTA_OBJ_MAX +}; +#define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) + /** * enum nft_trace_attributes - nf_tables trace netlink attributes * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e5194f6f906c..2ae717c5dcb8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -22,6 +22,7 @@ #include static LIST_HEAD(nf_tables_expressions); +static LIST_HEAD(nf_tables_objects); /** * nft_register_afinfo - register nf_tables address family info @@ -304,6 +305,38 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set) return err; } +static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type, + struct nft_object *obj) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_obj)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWOBJ) + nft_activate_next(ctx->net, obj); + + nft_trans_obj(trans) = obj; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; +} + +static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) +{ + int err; + + err = nft_trans_obj_add(ctx, NFT_MSG_DELOBJ, obj); + if (err < 0) + return err; + + nft_deactivate_next(ctx->net, obj); + ctx->table->use--; + + return err; +} + /* * Tables */ @@ -688,6 +721,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); + INIT_LIST_HEAD(&table->objects); table->flags = flags; nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); @@ -709,6 +743,7 @@ static int nft_flush_table(struct nft_ctx *ctx) { int err; struct nft_chain *chain, *nc; + struct nft_object *obj, *ne; struct nft_set *set, *ns; list_for_each_entry(chain, &ctx->table->chains, list) { @@ -735,6 +770,12 @@ static int nft_flush_table(struct nft_ctx *ctx) goto out; } + list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) { + err = nft_delobj(ctx, obj); + if (err < 0) + goto out; + } + list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) { if (!nft_is_active_next(ctx->net, chain)) continue; @@ -3838,6 +3879,434 @@ struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, } EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc); +/* + * Stateful objects + */ + +/** + * nft_register_obj- register nf_tables stateful object type + * @obj: object type + * + * Registers the object type for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_obj(struct nft_object_type *obj_type) +{ + if (obj_type->type == NFT_OBJECT_UNSPEC) + return -EINVAL; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_rcu(&obj_type->list, &nf_tables_objects); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_obj); + +/** + * nft_unregister_obj - unregister nf_tables object type + * @obj: object type + * + * Unregisters the object type for use with nf_tables. + */ +void nft_unregister_obj(struct nft_object_type *obj_type) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del_rcu(&obj_type->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_obj); + +struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, + const struct nlattr *nla, + u32 objtype, u8 genmask) +{ + struct nft_object *obj; + + list_for_each_entry(obj, &table->objects, list) { + if (!nla_strcmp(nla, obj->name) && + objtype == obj->type->type && + nft_active_genmask(obj, genmask)) + return obj; + } + return ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL_GPL(nf_tables_obj_lookup); + +static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { + [NFTA_OBJ_TABLE] = { .type = NLA_STRING }, + [NFTA_OBJ_NAME] = { .type = NLA_STRING }, + [NFTA_OBJ_TYPE] = { .type = NLA_U32 }, + [NFTA_OBJ_DATA] = { .type = NLA_NESTED }, +}; + +static struct nft_object *nft_obj_init(const struct nft_object_type *type, + const struct nlattr *attr) +{ + struct nlattr *tb[type->maxattr + 1]; + struct nft_object *obj; + int err; + + if (attr) { + err = nla_parse_nested(tb, type->maxattr, attr, type->policy); + if (err < 0) + goto err1; + } else { + memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1)); + } + + err = -ENOMEM; + obj = kzalloc(sizeof(struct nft_object) + type->size, GFP_KERNEL); + if (obj == NULL) + goto err1; + + err = type->init((const struct nlattr * const *)tb, obj); + if (err < 0) + goto err2; + + obj->type = type; + return obj; +err2: + kfree(obj); +err1: + return ERR_PTR(err); +} + +static int nft_object_dump(struct sk_buff *skb, unsigned int attr, + const struct nft_object *obj) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, attr); + if (!nest) + goto nla_put_failure; + if (obj->type->dump(skb, obj) < 0) + goto nla_put_failure; + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static const struct nft_object_type *__nft_obj_type_get(u32 objtype) +{ + const struct nft_object_type *type; + + list_for_each_entry(type, &nf_tables_objects, list) { + if (objtype == type->type) + return type; + } + return NULL; +} + +static const struct nft_object_type *nft_obj_type_get(u32 objtype) +{ + const struct nft_object_type *type; + + type = __nft_obj_type_get(objtype); + if (type != NULL && try_module_get(type->owner)) + return type; + +#ifdef CONFIG_MODULES + if (type == NULL) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-obj-%u", objtype); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (__nft_obj_type_get(objtype)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +static int nf_tables_newobj(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_object_type *type; + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + struct nft_af_info *afi; + struct nft_table *table; + struct nft_object *obj; + struct nft_ctx ctx; + u32 objtype; + int err; + + if (!nla[NFTA_OBJ_TYPE] || + !nla[NFTA_OBJ_NAME] || + !nla[NFTA_OBJ_DATA]) + return -EINVAL; + + afi = nf_tables_afinfo_lookup(net, family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err != -ENOENT) + return err; + + obj = NULL; + } + + if (obj != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + + return 0; + } + + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + + type = nft_obj_type_get(objtype); + if (IS_ERR(type)) + return PTR_ERR(type); + + obj = nft_obj_init(type, nla[NFTA_OBJ_DATA]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err1; + } + nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN); + + err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj); + if (err < 0) + goto err2; + + list_add_tail_rcu(&obj->list, &table->objects); + table->use++; + return 0; +err2: + if (obj->type->destroy) + obj->type->destroy(obj); + kfree(obj); +err1: + module_put(type->owner); + return err; +} + +static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, int event, u32 flags, + int family, const struct nft_table *table, + const struct nft_object *obj) +{ + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); + + if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || + nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || + nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) || + nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || + nft_object_dump(skb, NFTA_OBJ_DATA, obj)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + const struct nft_object *obj; + unsigned int idx = 0, s_idx = cb->args[0]; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + + rcu_read_lock(); + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry_rcu(table, &afi->tables, list) { + list_for_each_entry_rcu(obj, &table->objects, list) { + if (!nft_is_active(net, obj)) + goto cont; + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWOBJ, + NLM_F_MULTI | NLM_F_APPEND, + afi->family, table, obj) < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + } + } +done: + rcu_read_unlock(); + + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_getobj(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); + int family = nfmsg->nfgen_family; + const struct nft_af_info *afi; + const struct nft_table *table; + struct nft_object *obj; + struct sk_buff *skb2; + u32 objtype; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_obj, + }; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + if (!nla[NFTA_OBJ_NAME] || + !nla[NFTA_OBJ_TYPE]) + return -EINVAL; + + afi = nf_tables_afinfo_lookup(net, family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, + family, table, obj); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); +err: + kfree_skb(skb2); + return err; + + return 0; +} + +static void nft_obj_destroy(struct nft_object *obj) +{ + if (obj->type->destroy) + obj->type->destroy(obj); + + module_put(obj->type->owner); + kfree(obj); +} + +static int nf_tables_delobj(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + struct nft_af_info *afi; + struct nft_table *table; + struct nft_object *obj; + struct nft_ctx ctx; + u32 objtype; + + if (!nla[NFTA_OBJ_TYPE] || + !nla[NFTA_OBJ_NAME]) + return -EINVAL; + + afi = nf_tables_afinfo_lookup(net, family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (IS_ERR(obj)) + return PTR_ERR(obj); + if (obj->use > 0) + return -EBUSY; + + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + + return nft_delobj(&ctx, obj); +} + +static int nf_tables_obj_notify(const struct nft_ctx *ctx, + struct nft_object *obj, int event) +{ + struct sk_buff *skb; + int err; + + if (!ctx->report && + !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err; + + err = nf_tables_fill_obj_info(skb, ctx->net, ctx->portid, ctx->seq, + event, 0, ctx->afi->family, ctx->table, + obj); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, + ctx->report, GFP_KERNEL); +err: + if (err < 0) { + nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, + err); + } + return err; +} + static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { @@ -3998,6 +4467,21 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { [NFT_MSG_GETGEN] = { .call = nf_tables_getgen, }, + [NFT_MSG_NEWOBJ] = { + .call_batch = nf_tables_newobj, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, + [NFT_MSG_GETOBJ] = { + .call = nf_tables_getobj, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, + [NFT_MSG_DELOBJ] = { + .call_batch = nf_tables_delobj, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, }; static void nft_chain_commit_update(struct nft_trans *trans) @@ -4040,6 +4524,9 @@ static void nf_tables_commit_release(struct nft_trans *trans) nft_set_elem_destroy(nft_trans_elem_set(trans), nft_trans_elem(trans).priv, true); break; + case NFT_MSG_DELOBJ: + nft_obj_destroy(nft_trans_obj(trans)); + break; } kfree(trans); } @@ -4147,6 +4634,17 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) atomic_dec(&te->set->nelems); te->set->ndeact--; break; + case NFT_MSG_NEWOBJ: + nft_clear(net, nft_trans_obj(trans)); + nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), + NFT_MSG_NEWOBJ); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELOBJ: + list_del_rcu(&nft_trans_obj(trans)->list); + nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), + NFT_MSG_DELOBJ); + break; } } @@ -4181,6 +4679,9 @@ static void nf_tables_abort_release(struct nft_trans *trans) nft_set_elem_destroy(nft_trans_elem_set(trans), nft_trans_elem(trans).priv, true); break; + case NFT_MSG_NEWOBJ: + nft_obj_destroy(nft_trans_obj(trans)); + break; } kfree(trans); } @@ -4259,6 +4760,15 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) te->set->ops->activate(net, te->set, &te->elem); te->set->ndeact--; + nft_trans_destroy(trans); + break; + case NFT_MSG_NEWOBJ: + trans->ctx.table->use--; + list_del_rcu(&nft_trans_obj(trans)->list); + break; + case NFT_MSG_DELOBJ: + trans->ctx.table->use++; + nft_clear(trans->ctx.net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; } @@ -4807,6 +5317,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) { struct nft_table *table, *nt; struct nft_chain *chain, *nc; + struct nft_object *obj, *ne; struct nft_rule *rule, *nr; struct nft_set *set, *ns; struct nft_ctx ctx = { @@ -4833,6 +5344,11 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) table->use--; nft_set_destroy(set); } + list_for_each_entry_safe(obj, ne, &table->objects, list) { + list_del(&obj->list); + table->use--; + nft_obj_destroy(obj); + } list_for_each_entry_safe(chain, nc, &table->chains, list) { list_del(&chain->list); table->use--; -- cgit v1.2.3 From 43da04a593d8b2626f1cf4b56efe9402f6b53652 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:05:44 +0100 Subject: netfilter: nf_tables: atomic dump and reset for stateful objects This patch adds a new NFT_MSG_GETOBJ_RESET command perform an atomic dump-and-reset of the stateful object. This also comes with add support for atomic dump and reset for counter and quota objects. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 +- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 29 ++++++++++++----- net/netfilter/nft_counter.c | 56 +++++++++++++++++++++++++++----- net/netfilter/nft_quota.c | 18 ++++++---- 5 files changed, 85 insertions(+), 23 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 903cd618f50e..6f7d6a1dc09c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -997,7 +997,8 @@ struct nft_object_type { struct nft_object *obj); void (*destroy)(struct nft_object *obj); int (*dump)(struct sk_buff *skb, - const struct nft_object *obj); + struct nft_object *obj, + bool reset); }; int nft_register_obj(struct nft_object_type *obj_type); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 3d47582caa80..399eac1eee91 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -89,6 +89,7 @@ enum nft_verdicts { * @NFT_MSG_NEWOBJ: create a stateful object (enum nft_obj_attributes) * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes) * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes) + * @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -112,6 +113,7 @@ enum nf_tables_msg_types { NFT_MSG_NEWOBJ, NFT_MSG_GETOBJ, NFT_MSG_DELOBJ, + NFT_MSG_GETOBJ_RESET, NFT_MSG_MAX, }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2ae717c5dcb8..bfc015af366a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3972,14 +3972,14 @@ err1: } static int nft_object_dump(struct sk_buff *skb, unsigned int attr, - const struct nft_object *obj) + struct nft_object *obj, bool reset) { struct nlattr *nest; nest = nla_nest_start(skb, attr); if (!nest) goto nla_put_failure; - if (obj->type->dump(skb, obj) < 0) + if (obj->type->dump(skb, obj, reset) < 0) goto nla_put_failure; nla_nest_end(skb, nest); return 0; @@ -4096,7 +4096,7 @@ err1: static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, int event, u32 flags, int family, const struct nft_table *table, - const struct nft_object *obj) + struct nft_object *obj, bool reset) { struct nfgenmsg *nfmsg; struct nlmsghdr *nlh; @@ -4115,7 +4115,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) || nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || - nft_object_dump(skb, NFTA_OBJ_DATA, obj)) + nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset)) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -4131,10 +4131,14 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); const struct nft_af_info *afi; const struct nft_table *table; - const struct nft_object *obj; unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nft_object *obj; + bool reset = false; + + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) + reset = true; rcu_read_lock(); cb->seq = net->nft.base_seq; @@ -4156,7 +4160,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, NLM_F_MULTI | NLM_F_APPEND, - afi->family, table, obj) < 0) + afi->family, table, obj, reset) < 0) goto done; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -4183,6 +4187,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, const struct nft_table *table; struct nft_object *obj; struct sk_buff *skb2; + bool reset = false; u32 objtype; int err; @@ -4214,9 +4219,12 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, if (!skb2) return -ENOMEM; + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) + reset = true; + err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, - family, table, obj); + family, table, obj, reset); if (err < 0) goto err; @@ -4291,7 +4299,7 @@ static int nf_tables_obj_notify(const struct nft_ctx *ctx, err = nf_tables_fill_obj_info(skb, ctx->net, ctx->portid, ctx->seq, event, 0, ctx->afi->family, ctx->table, - obj); + obj, false); if (err < 0) { kfree_skb(skb); goto err; @@ -4482,6 +4490,11 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, }, + [NFT_MSG_GETOBJ_RESET] = { + .call = nf_tables_getobj, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, }; static void nft_chain_commit_update(struct nft_trans *trans) diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 6f3dd429f865..f6a02c5071c2 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -100,10 +100,10 @@ static void nft_counter_obj_destroy(struct nft_object *obj) nft_counter_do_destroy(priv); } -static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter, +static void nft_counter_fetch(struct nft_counter_percpu __percpu *counter, struct nft_counter *total) { - const struct nft_counter_percpu *cpu_stats; + struct nft_counter_percpu *cpu_stats; u64 bytes, packets; unsigned int seq; int cpu; @@ -122,12 +122,52 @@ static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter, } } +static u64 __nft_counter_reset(u64 *counter) +{ + u64 ret, old; + + do { + old = *counter; + ret = cmpxchg64(counter, old, 0); + } while (ret != old); + + return ret; +} + +static void nft_counter_reset(struct nft_counter_percpu __percpu *counter, + struct nft_counter *total) +{ + struct nft_counter_percpu *cpu_stats; + u64 bytes, packets; + unsigned int seq; + int cpu; + + memset(total, 0, sizeof(*total)); + for_each_possible_cpu(cpu) { + bytes = packets = 0; + + cpu_stats = per_cpu_ptr(counter, cpu); + do { + seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + packets += __nft_counter_reset(&cpu_stats->counter.packets); + bytes += __nft_counter_reset(&cpu_stats->counter.bytes); + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); + + total->packets += packets; + total->bytes += bytes; + } +} + static int nft_counter_do_dump(struct sk_buff *skb, - const struct nft_counter_percpu_priv *priv) + const struct nft_counter_percpu_priv *priv, + bool reset) { struct nft_counter total; - nft_counter_fetch(priv->counter, &total); + if (reset) + nft_counter_reset(priv->counter, &total); + else + nft_counter_fetch(priv->counter, &total); if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), NFTA_COUNTER_PAD) || @@ -141,11 +181,11 @@ nla_put_failure: } static int nft_counter_obj_dump(struct sk_buff *skb, - const struct nft_object *obj) + struct nft_object *obj, bool reset) { - const struct nft_counter_percpu_priv *priv = nft_obj_data(obj); + struct nft_counter_percpu_priv *priv = nft_obj_data(obj); - return nft_counter_do_dump(skb, priv); + return nft_counter_do_dump(skb, priv, reset); } static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { @@ -178,7 +218,7 @@ static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); - return nft_counter_do_dump(skb, priv); + return nft_counter_do_dump(skb, priv, false); } static int nft_counter_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 0d344209803a..5d25f57497cb 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -83,12 +83,17 @@ static int nft_quota_obj_init(const struct nlattr * const tb[], return nft_quota_do_init(tb, priv); } -static int nft_quota_do_dump(struct sk_buff *skb, const struct nft_quota *priv) +static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, + bool reset) { u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0; u64 consumed; - consumed = atomic64_read(&priv->consumed); + if (reset) + consumed = atomic64_xchg(&priv->consumed, 0); + else + consumed = atomic64_read(&priv->consumed); + /* Since we inconditionally increment consumed quota for each packet * that we see, don't go over the quota boundary in what we send to * userspace. @@ -108,11 +113,12 @@ nla_put_failure: return -1; } -static int nft_quota_obj_dump(struct sk_buff *skb, const struct nft_object *obj) +static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj, + bool reset) { struct nft_quota *priv = nft_obj_data(obj); - return nft_quota_do_dump(skb, priv); + return nft_quota_do_dump(skb, priv, reset); } static struct nft_object_type nft_quota_obj __read_mostly = { @@ -146,9 +152,9 @@ static int nft_quota_init(const struct nft_ctx *ctx, static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr) { - const struct nft_quota *priv = nft_expr_priv(expr); + struct nft_quota *priv = nft_expr_priv(expr); - return nft_quota_do_dump(skb, priv); + return nft_quota_do_dump(skb, priv, false); } static struct nft_expr_type nft_quota_type; -- cgit v1.2.3 From 2599e98934c5ad166ad184b3682e38aadcb63fb3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:05:48 +0100 Subject: netfilter: nf_tables: notify internal updates of stateful objects Introduce nf_tables_obj_notify() to notify internal state changes in stateful objects. This is used by the quota object to report depletion in a follow up patch. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++++ net/netfilter/nf_tables_api.c | 31 +++++++++++++++++++------------ 2 files changed, 23 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 6f7d6a1dc09c..339e374c28b5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -969,6 +969,10 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask); +int nft_obj_notify(struct net *net, struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, + int event, int family, int report, gfp_t gfp); + /** * struct nft_object_type - stateful object type * diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index bfc015af366a..9d2ed3f520ef 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4282,38 +4282,45 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk, return nft_delobj(&ctx, obj); } -static int nf_tables_obj_notify(const struct nft_ctx *ctx, - struct nft_object *obj, int event) +int nft_obj_notify(struct net *net, struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, int event, + int family, int report, gfp_t gfp) { struct sk_buff *skb; int err; - if (!ctx->report && - !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) + if (!report && + !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) return 0; err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_GOODSIZE, gfp); if (skb == NULL) goto err; - err = nf_tables_fill_obj_info(skb, ctx->net, ctx->portid, ctx->seq, - event, 0, ctx->afi->family, ctx->table, - obj, false); + err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family, + table, obj, false); if (err < 0) { kfree_skb(skb); goto err; } - err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp); err: if (err < 0) { - nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, - err); + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); } return err; } +EXPORT_SYMBOL_GPL(nft_obj_notify); + +static int nf_tables_obj_notify(const struct nft_ctx *ctx, + struct nft_object *obj, int event) +{ + return nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, + ctx->seq, event, ctx->afi->family, ctx->report, + GFP_KERNEL); +} static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) -- cgit v1.2.3 From 1896531710abcd9a961a17d0c5c6a9f537d479b6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:05:56 +0100 Subject: netfilter: nft_quota: add depleted flag for objects Notify on depleted quota objects. The NFT_QUOTA_F_DEPLETED flag indicates we have reached overquota. Add pointer to table from nft_object, so we can use it when sending the depletion notification to userspace. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ include/uapi/linux/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 1 + net/netfilter/nft_quota.c | 36 +++++++++++++++++++++++++------- 4 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 339e374c28b5..ce6fb6e83b32 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -940,6 +940,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type, * struct nft_object - nf_tables stateful object * * @list: table stateful object list node + * @table: table this object belongs to * @type: pointer to object type * @data: pointer to object data * @name: name of this stateful object @@ -950,6 +951,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type, struct nft_object { struct list_head list; char name[NFT_OBJ_MAXNAMELEN]; + struct nft_table *table; u32 genmask:2, use:30; /* runtime data below here */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 399eac1eee91..4864caca1e8e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -983,6 +983,7 @@ enum nft_queue_attributes { enum nft_quota_flags { NFT_QUOTA_F_INV = (1 << 0), + NFT_QUOTA_F_DEPLETED = (1 << 1), }; /** diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9d2ed3f520ef..c5419701ca79 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4075,6 +4075,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, err = PTR_ERR(obj); goto err1; } + obj->table = table; nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN); err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj); diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 5d25f57497cb..7f27ebdce7ab 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -17,7 +17,7 @@ struct nft_quota { u64 quota; - bool invert; + unsigned long flags; atomic64_t consumed; }; @@ -27,11 +27,16 @@ static inline bool nft_overquota(struct nft_quota *priv, return atomic64_add_return(skb->len, &priv->consumed) >= priv->quota; } +static inline bool nft_quota_invert(struct nft_quota *priv) +{ + return priv->flags & NFT_QUOTA_F_INV; +} + static inline void nft_quota_do_eval(struct nft_quota *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - if (nft_overquota(priv, pkt->skb) ^ priv->invert) + if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv)) regs->verdict.code = NFT_BREAK; } @@ -40,19 +45,29 @@ static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = { [NFTA_QUOTA_FLAGS] = { .type = NLA_U32 }, }; +#define NFT_QUOTA_DEPLETED_BIT 1 /* From NFT_QUOTA_F_DEPLETED. */ + static void nft_quota_obj_eval(struct nft_object *obj, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_quota *priv = nft_obj_data(obj); + bool overquota; - nft_quota_do_eval(priv, regs, pkt); + overquota = nft_overquota(priv, pkt->skb); + if (overquota ^ nft_quota_invert(priv)) + regs->verdict.code = NFT_BREAK; + + if (overquota && + !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) + nft_obj_notify(nft_net(pkt), obj->table, obj, 0, 0, + NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC); } static int nft_quota_do_init(const struct nlattr * const tb[], struct nft_quota *priv) { - u32 flags = 0; + unsigned long flags = 0; u64 quota; if (!tb[NFTA_QUOTA_BYTES]) @@ -66,10 +81,12 @@ static int nft_quota_do_init(const struct nlattr * const tb[], flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS])); if (flags & ~NFT_QUOTA_F_INV) return -EINVAL; + if (flags & NFT_QUOTA_F_DEPLETED) + return -EOPNOTSUPP; } priv->quota = quota; - priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false; + priv->flags = flags; atomic64_set(&priv->consumed, 0); return 0; @@ -86,13 +103,16 @@ static int nft_quota_obj_init(const struct nlattr * const tb[], static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, bool reset) { - u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0; + u32 flags = priv->flags; u64 consumed; - if (reset) + if (reset) { consumed = atomic64_xchg(&priv->consumed, 0); - else + if (test_and_clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) + flags |= NFT_QUOTA_F_DEPLETED; + } else { consumed = atomic64_read(&priv->consumed); + } /* Since we inconditionally increment consumed quota for each packet * that we see, don't go over the quota boundary in what we send to -- cgit v1.2.3 From 8aeff920dcc9b3f8cf43042a76428582634d9208 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2016 00:06:00 +0100 Subject: netfilter: nf_tables: add stateful object reference to set elements This patch allows you to refer to stateful objects from set elements. This provides the infrastructure to create maps where the right hand side of the mapping is a stateful object. This allows us to build dictionaries of stateful objects, that you can use to perform fast lookups using any arbitrary key combination. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 9 ++++ include/uapi/linux/netfilter/nf_tables.h | 8 ++++ net/netfilter/nf_tables_api.c | 72 +++++++++++++++++++++++++++----- 3 files changed, 79 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ce6fb6e83b32..85f0f03f1e87 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -326,6 +326,7 @@ void nft_unregister_set(struct nft_set_ops *ops); * @name: name of the set * @ktype: key type (numeric type defined by userspace, not used in the kernel) * @dtype: data type (verdict or numeric type defined by userspace) + * @objtype: object type (see NFT_OBJECT_* definitions) * @size: maximum set size * @nelems: number of elements * @ndeact: number of deactivated elements queued for removal @@ -347,6 +348,7 @@ struct nft_set { char name[NFT_SET_MAXNAMELEN]; u32 ktype; u32 dtype; + u32 objtype; u32 size; atomic_t nelems; u32 ndeact; @@ -416,6 +418,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, * @NFT_SET_EXT_EXPIRATION: element expiration time * @NFT_SET_EXT_USERDATA: user data associated with the element * @NFT_SET_EXT_EXPR: expression assiociated with the element + * @NFT_SET_EXT_OBJREF: stateful object reference associated with element * @NFT_SET_EXT_NUM: number of extension types */ enum nft_set_extensions { @@ -426,6 +429,7 @@ enum nft_set_extensions { NFT_SET_EXT_EXPIRATION, NFT_SET_EXT_USERDATA, NFT_SET_EXT_EXPR, + NFT_SET_EXT_OBJREF, NFT_SET_EXT_NUM }; @@ -554,6 +558,11 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, return elem + set->ops->elemsize; } +static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_OBJREF); +} + void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *data, diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 4864caca1e8e..a6b52dbff08c 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -255,6 +255,7 @@ enum nft_rule_compat_attributes { * @NFT_SET_MAP: set is used as a dictionary * @NFT_SET_TIMEOUT: set uses timeouts * @NFT_SET_EVAL: set contains expressions for evaluation + * @NFT_SET_OBJECT: set contains stateful objects */ enum nft_set_flags { NFT_SET_ANONYMOUS = 0x1, @@ -263,6 +264,7 @@ enum nft_set_flags { NFT_SET_MAP = 0x8, NFT_SET_TIMEOUT = 0x10, NFT_SET_EVAL = 0x20, + NFT_SET_OBJECT = 0x40, }; /** @@ -304,6 +306,7 @@ enum nft_set_desc_attributes { * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64) * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) * @NFTA_SET_USERDATA: user data (NLA_BINARY) + * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -321,6 +324,7 @@ enum nft_set_attributes { NFTA_SET_GC_INTERVAL, NFTA_SET_USERDATA, NFTA_SET_PAD, + NFTA_SET_OBJ_TYPE, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) @@ -344,6 +348,7 @@ enum nft_set_elem_flags { * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64) * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY) * @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes) + * @NFTA_SET_ELEM_OBJREF: stateful object reference (NLA_STRING) */ enum nft_set_elem_attributes { NFTA_SET_ELEM_UNSPEC, @@ -355,6 +360,7 @@ enum nft_set_elem_attributes { NFTA_SET_ELEM_USERDATA, NFTA_SET_ELEM_EXPR, NFTA_SET_ELEM_PAD, + NFTA_SET_ELEM_OBJREF, __NFTA_SET_ELEM_MAX }; #define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) @@ -1207,6 +1213,8 @@ enum nft_fib_flags { #define NFT_OBJECT_UNSPEC 0 #define NFT_OBJECT_COUNTER 1 #define NFT_OBJECT_QUOTA 2 +#define __NFT_OBJECT_MAX 3 +#define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) /** * enum nft_object_attributes - nf_tables stateful object netlink attributes diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c5419701ca79..8228714c42d5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2452,6 +2452,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 }, [NFTA_SET_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, + [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { @@ -2609,6 +2610,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen))) goto nla_put_failure; } + if (set->flags & NFT_SET_OBJECT && + nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype))) + goto nla_put_failure; if (set->timeout && nla_put_be64(skb, NFTA_SET_TIMEOUT, @@ -2838,7 +2842,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, unsigned int size; bool create; u64 timeout; - u32 ktype, dtype, flags, policy, gc_int; + u32 ktype, dtype, flags, policy, gc_int, objtype; struct nft_set_desc desc; unsigned char *udata; u16 udlen; @@ -2868,11 +2872,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | NFT_SET_INTERVAL | NFT_SET_TIMEOUT | - NFT_SET_MAP | NFT_SET_EVAL)) + NFT_SET_MAP | NFT_SET_EVAL | + NFT_SET_OBJECT)) return -EINVAL; - /* Only one of both operations is supported */ - if ((flags & (NFT_SET_MAP | NFT_SET_EVAL)) == - (NFT_SET_MAP | NFT_SET_EVAL)) + /* Only one of these operations is supported */ + if ((flags & (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) == + (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; } @@ -2897,6 +2902,19 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, } else if (flags & NFT_SET_MAP) return -EINVAL; + if (nla[NFTA_SET_OBJ_TYPE] != NULL) { + if (!(flags & NFT_SET_OBJECT)) + return -EINVAL; + + objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); + if (objtype == NFT_OBJECT_UNSPEC || + objtype > NFT_OBJECT_MAX) + return -EINVAL; + } else if (flags & NFT_SET_OBJECT) + return -EINVAL; + else + objtype = NFT_OBJECT_UNSPEC; + timeout = 0; if (nla[NFTA_SET_TIMEOUT] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) @@ -2984,6 +3002,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, set->ktype = ktype; set->klen = desc.klen; set->dtype = dtype; + set->objtype = objtype; set->dlen = desc.dlen; set->flags = flags; set->size = desc.size; @@ -3126,6 +3145,10 @@ const struct nft_set_ext_type nft_set_ext_types[] = { [NFT_SET_EXT_EXPR] = { .align = __alignof__(struct nft_expr), }, + [NFT_SET_EXT_OBJREF] = { + .len = sizeof(struct nft_object *), + .align = __alignof__(struct nft_object *), + }, [NFT_SET_EXT_FLAGS] = { .len = sizeof(u8), .align = __alignof__(u8), @@ -3214,6 +3237,11 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0) goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && + nla_put_string(skb, NFTA_SET_ELEM_OBJREF, + (*nft_set_ext_obj(ext))->name) < 0) + goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(*nft_set_ext_flags(ext)))) @@ -3508,7 +3536,8 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, nft_data_uninit(nft_set_ext_data(ext), set->dtype); if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); - + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) + (*nft_set_ext_obj(ext))->use--; kfree(elem); } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); @@ -3533,11 +3562,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr, u32 nlmsg_flags) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + u8 genmask = nft_genmask_next(ctx->net); struct nft_data_desc d1, d2; struct nft_set_ext_tmpl tmpl; struct nft_set_ext *ext, *ext2; struct nft_set_elem elem; struct nft_set_binding *binding; + struct nft_object *obj = NULL; struct nft_userdata *udata; struct nft_data data; enum nft_registers dreg; @@ -3600,6 +3631,20 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); } + if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { + if (!(set->flags & NFT_SET_OBJECT)) { + err = -EINVAL; + goto err2; + } + obj = nf_tables_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF], + set->objtype, genmask); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err2; + } + nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + } + if (nla[NFTA_SET_ELEM_DATA] != NULL) { err = nft_data_init(ctx, &data, sizeof(data), &d2, nla[NFTA_SET_ELEM_DATA]); @@ -3658,6 +3703,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, udata->len = ulen - 1; nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } + if (obj) { + *nft_set_ext_obj(ext) = obj; + obj->use++; + } trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) @@ -3667,10 +3716,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = set->ops->insert(ctx->net, set, &elem, &ext2); if (err) { if (err == -EEXIST) { - if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && - nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && - memcmp(nft_set_ext_data(ext), - nft_set_ext_data(ext2), set->dlen) != 0) + if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && + memcmp(nft_set_ext_data(ext), + nft_set_ext_data(ext2), set->dlen) != 0) || + (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && + nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) && + *nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2))) err = -EBUSY; else if (!(nlmsg_flags & NLM_F_EXCL)) err = 0; -- cgit v1.2.3 From 8411b6442e59810fe0750a2f321b9dcb7d0a3d17 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 5 Dec 2016 23:35:50 +0100 Subject: netfilter: nf_tables: support for set flushing This patch adds support for set flushing, that consists of walking over the set elements if the NFTA_SET_ELEM_LIST_ELEMENTS attribute is set. This patch requires the following changes: 1) Add set->ops->deactivate_one() operation: This allows us to deactivate an element from the set element walk path, given we can skip the lookup that happens in ->deactivate(). 2) Add a new nft_trans_alloc_gfp() function since we need to allocate transactions using GFP_ATOMIC given the set walk path happens with held rcu_read_lock. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++++- net/netfilter/nf_tables_api.c | 55 ++++++++++++++++++++++++++++++++++----- net/netfilter/nft_set_hash.c | 1 + net/netfilter/nft_set_rbtree.c | 1 + 4 files changed, 56 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 85f0f03f1e87..924325c46aab 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -259,7 +259,8 @@ struct nft_expr; * @lookup: look up an element within the set * @insert: insert new element into set * @activate: activate new element in the next generation - * @deactivate: deactivate element in the next generation + * @deactivate: lookup for element and deactivate it in the next generation + * @deactivate_one: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts * @privsize: function to return size of set private data @@ -294,6 +295,9 @@ struct nft_set_ops { void * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); + bool (*deactivate_one)(const struct net *net, + const struct nft_set *set, + void *priv); void (*remove)(const struct nft_set *set, const struct nft_set_elem *elem); void (*walk)(const struct nft_ctx *ctx, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b42059795819..a019a87e58ee 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -111,12 +111,12 @@ static void nft_ctx_init(struct nft_ctx *ctx, ctx->seq = nlh->nlmsg_seq; } -static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, - int msg_type, u32 size) +static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, + int msg_type, u32 size, gfp_t gfp) { struct nft_trans *trans; - trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL); + trans = kzalloc(sizeof(struct nft_trans) + size, gfp); if (trans == NULL) return NULL; @@ -126,6 +126,12 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, return trans; } +static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, + int msg_type, u32 size) +{ + return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); +} + static void nft_trans_destroy(struct nft_trans *trans) { list_del(&trans->list); @@ -3876,6 +3882,34 @@ err1: return err; } +static int nft_flush_set(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + struct nft_trans *trans; + int err; + + trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, + sizeof(struct nft_trans_elem), GFP_ATOMIC); + if (!trans) + return -ENOMEM; + + if (!set->ops->deactivate_one(ctx->net, set, elem->priv)) { + err = -ENOENT; + goto err1; + } + + nft_trans_elem_set(trans) = (struct nft_set *)set; + nft_trans_elem(trans) = *((struct nft_set_elem *)elem); + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; +err1: + kfree(trans); + return err; +} + static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -3886,9 +3920,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct nft_ctx ctx; int rem, err = 0; - if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) - return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; @@ -3900,6 +3931,18 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) return -EBUSY; + if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) { + struct nft_set_dump_args args = { + .iter = { + .genmask = genmask, + .fn = nft_flush_set, + }, + }; + set->ops->walk(&ctx, set, &args.iter); + + return args.iter.err; + } + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_del_setelem(&ctx, set, attr); if (err < 0) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 73f7687c5656..1e20e2bbb6d9 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -397,6 +397,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .insert = nft_hash_insert, .activate = nft_hash_activate, .deactivate = nft_hash_deactivate, + .deactivate_one = nft_hash_deactivate_one, .remove = nft_hash_remove, .lookup = nft_hash_lookup, .update = nft_hash_update, diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 5580bb64dc0f..08376e50f6cd 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -304,6 +304,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .insert = nft_rbtree_insert, .remove = nft_rbtree_remove, .deactivate = nft_rbtree_deactivate, + .deactivate_one = nft_rbtree_deactivate_one, .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, -- cgit v1.2.3 From 5b8e2f61b9df529ca4af057daf7bfb1de348bdf1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Dec 2016 19:32:50 -0800 Subject: net: sock_rps_record_flow() is for connected sockets Paolo noticed a cache line miss in UDP recvmsg() to access sk_rxhash, sharing a cache line with sk_drops. sk_drops might be heavily incremented by cpus handling a flood targeting this socket. We might place sk_drops on a separate cache line, but lets try to avoid wasting 64 bytes per socket just for this, since we have other bottlenecks to take care of. sock_rps_record_flow() should only access sk_rxhash for connected flows. Testing sk_state for TCP_ESTABLISHED covers most of the cases for connected sockets, for a zero cost, since system calls using sock_rps_record_flow() also access sk->sk_prot which is on the same cache line. A follow up patch will provide a static_key (Jump Label) since most hosts do not even use RFS. Signed-off-by: Eric Dumazet Reported-by: Paolo Abeni Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sock.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 6dfe3aa22b97..1749e38d0301 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -913,7 +913,17 @@ static inline void sock_rps_record_flow_hash(__u32 hash) static inline void sock_rps_record_flow(const struct sock *sk) { #ifdef CONFIG_RPS - sock_rps_record_flow_hash(sk->sk_rxhash); + /* Reading sk->sk_rxhash might incur an expensive cache line miss. + * + * TCP_ESTABLISHED does cover almost all states where RFS + * might be useful, and is cheaper [1] than testing : + * IPv4: inet_sk(sk)->inet_daddr + * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) + * OR an additional socket flag + * [1] : sk_state and sk_prot are in the same cache line. + */ + if (sk->sk_state == TCP_ESTABLISHED) + sock_rps_record_flow_hash(sk->sk_rxhash); #endif } -- cgit v1.2.3 From 972d3876faa8a9195122b2d2bcd3155f904fff37 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 7 Dec 2016 13:48:27 +0100 Subject: flow dissector: ICMP support Allow dissection of ICMP(V6) type and code. This should only occur if a packet is ICMP(V6) and the dissector has FLOW_DISSECTOR_KEY_ICMP set. There are currently no users of FLOW_DISSECTOR_KEY_ICMP. A follow-up patch will allow FLOW_DISSECTOR_KEY_ICMP to be used by the flower classifier. Signed-off-by: Simon Horman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 17 +++++++++++++++++ net/core/flow_dissector.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'include/net') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index c4f31666afd2..d896a33e00d4 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -104,6 +104,22 @@ struct flow_dissector_key_ports { }; }; +/** + * flow_dissector_key_icmp: + * @ports: type and code of ICMP header + * icmp: ICMP type (high) and code (low) + * type: ICMP type + * code: ICMP code + */ +struct flow_dissector_key_icmp { + union { + __be16 icmp; + struct { + u8 type; + u8 code; + }; + }; +}; /** * struct flow_dissector_key_eth_addrs: @@ -122,6 +138,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */ FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */ FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ + FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1eb6f949e5b2..d6447dc10371 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -57,6 +57,28 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, } EXPORT_SYMBOL(skb_flow_dissector_init); +/** + * skb_flow_get_be16 - extract be16 entity + * @skb: sk_buff to extract from + * @poff: offset to extract at + * @data: raw buffer pointer to the packet + * @hlen: packet header length + * + * The function will try to retrieve a be32 entity at + * offset poff + */ +__be16 skb_flow_get_be16(const struct sk_buff *skb, int poff, void *data, + int hlen) +{ + __be16 *u, _u; + + u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u); + if (u) + return *u; + + return 0; +} + /** * __skb_flow_get_ports - extract the upper layer ports and return them * @skb: sk_buff to extract the ports from @@ -117,6 +139,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_ports *key_ports; + struct flow_dissector_key_icmp *key_icmp; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; struct flow_dissector_key_keyid *key_keyid; @@ -546,6 +569,14 @@ ip_proto_again: data, hlen); } + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ICMP)) { + key_icmp = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ICMP, + target_container); + key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen); + } + out_good: ret = true; -- cgit v1.2.3 From 13bfff25c081f4e060af761c4082b5a96f756810 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Dec 2016 08:29:10 -0800 Subject: net: rfs: add a jump label RFS is not commonly used, so add a jump label to avoid some conditionals in fast path. Signed-off-by: Eric Dumazet Cc: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/sock.h | 25 ++++++++++++++----------- net/core/dev.c | 2 ++ net/core/sysctl_net_core.c | 5 ++++- 4 files changed, 21 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1ff5ea6e1221..994f7423a74b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -192,6 +192,7 @@ struct net_device_stats { #ifdef CONFIG_RPS #include extern struct static_key rps_needed; +extern struct static_key rfs_needed; #endif struct neighbour; diff --git a/include/net/sock.h b/include/net/sock.h index 1749e38d0301..2729e77950b7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -913,17 +913,20 @@ static inline void sock_rps_record_flow_hash(__u32 hash) static inline void sock_rps_record_flow(const struct sock *sk) { #ifdef CONFIG_RPS - /* Reading sk->sk_rxhash might incur an expensive cache line miss. - * - * TCP_ESTABLISHED does cover almost all states where RFS - * might be useful, and is cheaper [1] than testing : - * IPv4: inet_sk(sk)->inet_daddr - * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) - * OR an additional socket flag - * [1] : sk_state and sk_prot are in the same cache line. - */ - if (sk->sk_state == TCP_ESTABLISHED) - sock_rps_record_flow_hash(sk->sk_rxhash); + if (static_key_false(&rfs_needed)) { + /* Reading sk->sk_rxhash might incur an expensive cache line + * miss. + * + * TCP_ESTABLISHED does cover almost all states where RFS + * might be useful, and is cheaper [1] than testing : + * IPv4: inet_sk(sk)->inet_daddr + * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) + * OR an additional socket flag + * [1] : sk_state and sk_prot are in the same cache line. + */ + if (sk->sk_state == TCP_ESTABLISHED) + sock_rps_record_flow_hash(sk->sk_rxhash); + } #endif } diff --git a/net/core/dev.c b/net/core/dev.c index bffb5253e778..1d33ce03365f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3447,6 +3447,8 @@ EXPORT_SYMBOL(rps_cpu_mask); struct static_key rps_needed __read_mostly; EXPORT_SYMBOL(rps_needed); +struct static_key rfs_needed __read_mostly; +EXPORT_SYMBOL(rfs_needed); static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 0df2aa652530..2a46e4009f62 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -79,10 +79,13 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, if (sock_table != orig_sock_table) { rcu_assign_pointer(rps_sock_flow_table, sock_table); - if (sock_table) + if (sock_table) { static_key_slow_inc(&rps_needed); + static_key_slow_inc(&rfs_needed); + } if (orig_sock_table) { static_key_slow_dec(&rps_needed); + static_key_slow_dec(&rfs_needed); synchronize_rcu(); vfree(orig_sock_table); } -- cgit v1.2.3 From 3665f3817cd354ab7a811b3a4f282c4f5cb1a0d0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Dec 2016 10:05:36 -0800 Subject: net: do not read sk_drops if application does not care sk_drops can be an often written field, do not read it unless application showed interest. Note that sk_drops can be read via inet_diag, so applications can avoid getting this info from every received packet. In the future, 'reading' sk_drops might require folding per node or per cpu fields, and thus become even more expensive than today. Signed-off-by: Eric Dumazet Cc: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 2729e77950b7..e17aa3de2b4d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2163,7 +2163,8 @@ struct sock_skb_cb { static inline void sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb) { - SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops); + SOCK_SKB_CB(skb)->dropcount = sock_flag(sk, SOCK_RXQ_OVFL) ? + atomic_read(&sk->sk_drops) : 0; } static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb) -- cgit v1.2.3 From e6f462df9acd2a3295e5d34eb29e2823220cf129 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Dec 2016 17:22:09 +0100 Subject: cfg80211/mac80211: fix BSS leaks when abandoning assoc attempts When mac80211 abandons an association attempt, it may free all the data structures, but inform cfg80211 and userspace about it only by sending the deauth frame it received, in which case cfg80211 has no link to the BSS struct that was used and will not cfg80211_unhold_bss() it. Fix this by providing a way to inform cfg80211 of this with the BSS entry passed, so that it can clean up properly, and use this ability in the appropriate places in mac80211. This isn't ideal: some code is more or less duplicated and tracing is missing. However, it's a fairly small change and it's thus easier to backport - cleanups can come later. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ net/mac80211/mlme.c | 21 ++++++++++++--------- net/wireless/core.h | 1 + net/wireless/mlme.c | 12 ++++++++++++ net/wireless/sme.c | 14 ++++++++++++++ 5 files changed, 50 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2019310cf135..814be4b4200c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4685,6 +4685,17 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, */ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss); +/** + * cfg80211_abandon_assoc - notify cfg80211 of abandoned association attempt + * @dev: network device + * @bss: The BSS entry with which association was abandoned. + * + * Call this whenever - for reasons reported through other API, like deauth RX, + * an association attempt was abandoned. + * This function may sleep. The caller must hold the corresponding wdev's mutex. + */ +void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss); + /** * cfg80211_tx_mlme_mgmt - notification of transmitted deauth/disassoc frame * @dev: network device diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d157b250ff77..098ce9b179ee 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2518,7 +2518,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, } static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, - bool assoc) + bool assoc, bool abandon) { struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; @@ -2541,6 +2541,9 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); mutex_unlock(&sdata->local->mtx); + + if (abandon) + cfg80211_abandon_assoc(sdata->dev, assoc_data->bss); } kfree(assoc_data); @@ -2773,7 +2776,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, bssid, reason_code, ieee80211_get_reason_code_string(reason_code)); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); return; @@ -3182,14 +3185,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (status_code != WLAN_STATUS_SUCCESS) { sdata_info(sdata, "%pM denied association (code=%d)\n", mgmt->sa, status_code); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); event.u.mlme.status = MLME_DENIED; event.u.mlme.reason = status_code; drv_event_callback(sdata->local, sdata, &event); } else { if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); return; } @@ -3202,7 +3205,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, * recalc after assoc_data is NULL but before associated * is set can cause the interface to go idle */ - ieee80211_destroy_assoc_data(sdata, true); + ieee80211_destroy_assoc_data(sdata, true, false); /* get uapsd queues configuration */ uapsd_queues = 0; @@ -3901,7 +3904,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) .u.mlme.status = MLME_TIMEOUT, }; - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); drv_event_callback(sdata->local, sdata, &event); } @@ -4040,7 +4043,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DEAUTH_LEAVING, false, frame_buf); if (ifmgd->assoc_data) - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); if (ifmgd->auth_data) ieee80211_destroy_auth_data(sdata, false); cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, @@ -4949,7 +4952,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, frame_buf); - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, true); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code); @@ -5024,7 +5027,7 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) sdata_lock(sdata); if (ifmgd->assoc_data) { struct cfg80211_bss *bss = ifmgd->assoc_data->bss; - ieee80211_destroy_assoc_data(sdata, false); + ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, bss); } if (ifmgd->auth_data) diff --git a/net/wireless/core.h b/net/wireless/core.h index fb2fcd5581fe..9820fa251daa 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -409,6 +409,7 @@ void cfg80211_sme_disassoc(struct wireless_dev *wdev); void cfg80211_sme_deauth(struct wireless_dev *wdev); void cfg80211_sme_auth_timeout(struct wireless_dev *wdev); void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev); +void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev); /* internal helpers */ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index bd1f7a159d6a..4646cf5695b9 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -149,6 +149,18 @@ void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss) } EXPORT_SYMBOL(cfg80211_assoc_timeout); +void cfg80211_abandon_assoc(struct net_device *dev, struct cfg80211_bss *bss) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + + cfg80211_sme_abandon_assoc(wdev); + + cfg80211_unhold_bss(bss_from_pub(bss)); + cfg80211_put_bss(wiphy, bss); +} +EXPORT_SYMBOL(cfg80211_abandon_assoc); + void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 2b5bb380414b..5e0d19380302 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -39,6 +39,7 @@ struct cfg80211_conn { CFG80211_CONN_ASSOCIATING, CFG80211_CONN_ASSOC_FAILED, CFG80211_CONN_DEAUTH, + CFG80211_CONN_ABANDON, CFG80211_CONN_CONNECTED, } state; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; @@ -206,6 +207,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); + /* fall through */ + case CFG80211_CONN_ABANDON: /* free directly, disconnected event already sent */ cfg80211_sme_free(wdev); return 0; @@ -423,6 +426,17 @@ void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) schedule_work(&rdev->conn_work); } +void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_ABANDON; + schedule_work(&rdev->conn_work); +} + static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev, const u8 *ies, size_t ies_len, const u8 **out_ies, size_t *out_ies_len) -- cgit v1.2.3 From 3e1ed981b7a903ba81199d4d25b80c6bba705160 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 13 Dec 2016 22:30:15 +0300 Subject: netlink: revert broken, broken "2-clause nla_ok()" Commit 4f7df337fe79bba1e4c2d525525d63b5ba186bbd "netlink: 2-clause nla_ok()" is BROKEN. First clause tests if "->nla_len" could even be accessed at all, it can not possibly be omitted. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index dd657a33f8c3..d3938f11ae52 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -698,7 +698,8 @@ static inline int nla_len(const struct nlattr *nla) */ static inline int nla_ok(const struct nlattr *nla, int remaining) { - return nla->nla_len >= sizeof(*nla) && + return remaining >= (int) sizeof(*nla) && + nla->nla_len >= sizeof(*nla) && nla->nla_len <= remaining; } -- cgit v1.2.3 From 0643ee4fd1b79c1af3bd7bc8968dbf5fd047f490 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 14 Dec 2016 16:54:16 -0800 Subject: inet: Fix get port to handle zero port number with soreuseport set A user may call listen with binding an explicit port with the intent that the kernel will assign an available port to the socket. In this case inet_csk_get_port does a port scan. For such sockets, the user may also set soreuseport with the intent a creating more sockets for the port that is selected. The problem is that the initial socket being opened could inadvertently choose an existing and unreleated port number that was already created with soreuseport. This patch adds a boolean parameter to inet_bind_conflict that indicates rather soreuseport is allowed for the check (in addition to sk->sk_reuseport). In calls to inet_bind_conflict from inet_csk_get_port the argument is set to true if an explicit port is being looked up (snum argument is nonzero), and is false if port scan is done. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/inet6_connection_sock.h | 3 ++- include/net/inet_connection_sock.h | 6 ++++-- net/ipv4/inet_connection_sock.c | 14 +++++++++----- net/ipv6/inet6_connection_sock.c | 7 ++++--- 4 files changed, 19 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 954ad6bfb56a..3212b39b5bfc 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -22,7 +22,8 @@ struct sock; struct sockaddr; int inet6_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax); + const struct inet_bind_bucket *tb, bool relax, + bool soreuseport_ok); struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, const struct request_sock *req, u8 proto); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 146054ceea8e..85ee3879499e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -63,7 +63,8 @@ struct inet_connection_sock_af_ops { #endif void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); int (*bind_conflict)(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax); + const struct inet_bind_bucket *tb, + bool relax, bool soreuseport_ok); void (*mtu_reduced)(struct sock *sk); }; @@ -261,7 +262,8 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk, struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); int inet_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax); + const struct inet_bind_bucket *tb, bool relax, + bool soreuseport_ok); int inet_csk_get_port(struct sock *sk, unsigned short snum); struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f59838a60ea5..19ea045c50ed 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -45,11 +45,12 @@ void inet_get_local_port_range(struct net *net, int *low, int *high) EXPORT_SYMBOL(inet_get_local_port_range); int inet_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax) + const struct inet_bind_bucket *tb, bool relax, + bool reuseport_ok) { struct sock *sk2; - int reuse = sk->sk_reuse; - int reuseport = sk->sk_reuseport; + bool reuse = sk->sk_reuse; + bool reuseport = !!sk->sk_reuseport && reuseport_ok; kuid_t uid = sock_i_uid((struct sock *)sk); /* @@ -105,6 +106,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) struct inet_bind_bucket *tb; kuid_t uid = sock_i_uid(sk); u32 remaining, offset; + bool reuseport_ok = !!snum; if (port) { have_port: @@ -165,7 +167,8 @@ other_parity_scan: smallest_size = tb->num_owners; smallest_port = port; } - if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) + if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false, + reuseport_ok)) goto tb_found; goto next_port; } @@ -206,7 +209,8 @@ tb_found: sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size == -1) goto success; - if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { + if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true, + reuseport_ok)) { if ((reuse || (tb->fastreuseport > 0 && sk->sk_reuseport && diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 1c86c478f578..7396e75e161b 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -29,11 +29,12 @@ #include int inet6_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb, bool relax) + const struct inet_bind_bucket *tb, bool relax, + bool reuseport_ok) { const struct sock *sk2; - int reuse = sk->sk_reuse; - int reuseport = sk->sk_reuseport; + bool reuse = !!sk->sk_reuse; + bool reuseport = !!sk->sk_reuseport && reuseport_ok; kuid_t uid = sock_i_uid((struct sock *)sk); /* We must walk the whole port owner list in this case. -DaveM */ -- cgit v1.2.3 From 7c0f6ba682b9c7632072ffbedf8d328c8f3c42ba Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 24 Dec 2016 11:46:01 -0800 Subject: Replace with globally This was entirely automated, using the script by Al: PATT='^[[:blank:]]*#[[:blank:]]*include[[:blank:]]*' sed -i -e "s!$PATT!#include !" \ $(git grep -l "$PATT"|grep -v ^include/linux/uaccess.h) to do the replacement at the end of the merge window. Requested-by: Al Viro Signed-off-by: Linus Torvalds --- arch/alpha/boot/misc.c | 2 +- arch/alpha/kernel/irq.c | 2 +- arch/alpha/kernel/osf_sys.c | 2 +- arch/alpha/kernel/process.c | 2 +- arch/alpha/kernel/ptrace.c | 2 +- arch/alpha/kernel/setup.c | 2 +- arch/alpha/kernel/signal.c | 2 +- arch/alpha/kernel/srm_env.c | 2 +- arch/alpha/kernel/srmcons.c | 2 +- arch/alpha/kernel/time.c | 2 +- arch/alpha/kernel/traps.c | 2 +- arch/alpha/lib/csum_partial_copy.c | 2 +- arch/alpha/math-emu/math.c | 2 +- arch/alpha/mm/init.c | 2 +- arch/arm/common/bL_switcher_dummy_if.c | 2 +- arch/arm/kernel/swp_emulate.c | 2 +- arch/arm/kvm/arm.c | 2 +- arch/arm/kvm/guest.c | 2 +- arch/arm/mach-iop13xx/irq.c | 2 +- arch/arm/mach-ixp4xx/common.c | 2 +- arch/arm/mach-rpc/dma.c | 2 +- arch/arm/plat-iop/time.c | 2 +- arch/arm64/include/asm/word-at-a-time.h | 2 +- arch/arm64/kernel/armv8_deprecated.c | 2 +- arch/arm64/kernel/entry.S | 2 +- arch/arm64/kernel/probes/kprobes.c | 2 +- arch/arm64/kernel/signal32.c | 2 +- arch/arm64/kvm/guest.c | 2 +- arch/arm64/lib/clear_user.S | 2 +- arch/arm64/lib/copy_from_user.S | 2 +- arch/arm64/lib/copy_in_user.S | 2 +- arch/arm64/lib/copy_to_user.S | 2 +- arch/arm64/mm/cache.S | 2 +- arch/arm64/xen/hypercall.S | 2 +- arch/avr32/kernel/avr32_ksyms.c | 2 +- arch/avr32/kernel/ptrace.c | 2 +- arch/avr32/kernel/signal.c | 2 +- arch/avr32/mm/cache.c | 2 +- arch/blackfin/kernel/bfin_dma.c | 2 +- arch/blackfin/kernel/kgdb_test.c | 2 +- arch/blackfin/kernel/module.c | 2 +- arch/c6x/mm/init.c | 2 +- arch/cris/arch-v10/drivers/eeprom.c | 2 +- arch/cris/arch-v10/drivers/sync_serial.c | 2 +- arch/cris/arch-v10/kernel/ptrace.c | 2 +- arch/cris/arch-v10/kernel/signal.c | 2 +- arch/cris/arch-v10/kernel/traps.c | 2 +- arch/cris/arch-v10/lib/usercopy.c | 2 +- arch/cris/arch-v10/mm/fault.c | 2 +- arch/cris/arch-v32/drivers/cryptocop.c | 2 +- arch/cris/arch-v32/kernel/ptrace.c | 2 +- arch/cris/arch-v32/kernel/signal.c | 2 +- arch/cris/arch-v32/kernel/traps.c | 2 +- arch/cris/arch-v32/lib/usercopy.c | 2 +- arch/cris/kernel/crisksyms.c | 2 +- arch/cris/kernel/process.c | 2 +- arch/cris/kernel/profile.c | 2 +- arch/cris/kernel/ptrace.c | 2 +- arch/cris/kernel/sys_cris.c | 2 +- arch/cris/kernel/traps.c | 2 +- arch/frv/include/asm/futex.h | 2 +- arch/frv/kernel/irq.c | 2 +- arch/frv/kernel/pm-mb93093.c | 2 +- arch/frv/kernel/pm.c | 2 +- arch/frv/kernel/process.c | 2 +- arch/frv/kernel/ptrace.c | 2 +- arch/frv/kernel/signal.c | 2 +- arch/frv/kernel/sys_frv.c | 2 +- arch/frv/kernel/sysctl.c | 2 +- arch/frv/kernel/traps.c | 2 +- arch/frv/kernel/uaccess.c | 2 +- arch/frv/mm/dma-alloc.c | 2 +- arch/frv/mm/extable.c | 2 +- arch/h8300/boot/compressed/misc.c | 2 +- arch/h8300/kernel/process.c | 2 +- arch/h8300/kernel/signal.c | 2 +- arch/hexagon/kernel/hexagon_ksyms.c | 2 +- arch/hexagon/kernel/signal.c | 2 +- arch/hexagon/mm/uaccess.c | 2 +- arch/hexagon/mm/vm_fault.c | 2 +- arch/ia64/kernel/brl_emu.c | 2 +- arch/ia64/kernel/crash_dump.c | 2 +- arch/ia64/kernel/init_task.c | 2 +- arch/ia64/kernel/irq.c | 2 +- arch/ia64/kernel/kprobes.c | 2 +- arch/ia64/kernel/perfmon.c | 2 +- arch/ia64/kernel/process.c | 2 +- arch/ia64/kernel/ptrace.c | 2 +- arch/ia64/kernel/salinfo.c | 2 +- arch/ia64/kernel/signal.c | 2 +- arch/ia64/kernel/sys_ia64.c | 2 +- arch/ia64/kernel/traps.c | 2 +- arch/ia64/kernel/unaligned.c | 2 +- arch/ia64/kernel/unwind.c | 2 +- arch/ia64/lib/csum_partial_copy.c | 2 +- arch/ia64/mm/extable.c | 2 +- arch/ia64/mm/init.c | 2 +- arch/ia64/sn/kernel/sn2/sn_hwperf.c | 2 +- arch/ia64/sn/kernel/sn2/sn_proc_fs.c | 2 +- arch/ia64/sn/kernel/tiocx.c | 2 +- arch/m32r/kernel/align.c | 2 +- arch/m32r/kernel/irq.c | 2 +- arch/m32r/kernel/m32r_ksyms.c | 2 +- arch/m32r/kernel/process.c | 2 +- arch/m32r/kernel/ptrace.c | 2 +- arch/m32r/kernel/signal.c | 2 +- arch/m32r/kernel/sys_m32r.c | 2 +- arch/m32r/kernel/traps.c | 2 +- arch/m32r/lib/csum_partial_copy.c | 2 +- arch/m32r/lib/usercopy.c | 2 +- arch/m32r/mm/extable.c | 2 +- arch/m32r/mm/fault-nommu.c | 2 +- arch/m68k/bvme6000/rtc.c | 2 +- arch/m68k/kernel/process.c | 2 +- arch/m68k/kernel/ptrace.c | 2 +- arch/m68k/kernel/signal.c | 2 +- arch/m68k/kernel/sys_m68k.c | 2 +- arch/m68k/kernel/traps.c | 2 +- arch/m68k/lib/uaccess.c | 2 +- arch/m68k/mac/misc.c | 2 +- arch/m68k/mm/init.c | 2 +- arch/m68k/mm/motorola.c | 2 +- arch/m68k/mm/sun3mmu.c | 2 +- arch/m68k/mvme16x/rtc.c | 2 +- arch/m68k/sun3/mmu_emu.c | 2 +- arch/metag/kernel/irq.c | 2 +- arch/mips/alchemy/common/power.c | 2 +- arch/mips/dec/kn01-berr.c | 2 +- arch/mips/include/asm/checksum.h | 2 +- arch/mips/include/asm/compat-signal.h | 2 +- arch/mips/include/asm/r4kcache.h | 2 +- arch/mips/include/asm/termios.h | 2 +- arch/mips/jazz/jazzdma.c | 2 +- arch/mips/kernel/branch.c | 2 +- arch/mips/kernel/cpu-probe.c | 2 +- arch/mips/kernel/crash_dump.c | 2 +- arch/mips/kernel/irq.c | 2 +- arch/mips/kernel/kgdb.c | 2 +- arch/mips/kernel/linux32.c | 2 +- arch/mips/kernel/mips-mt-fpaff.c | 2 +- arch/mips/kernel/mips-r2-to-r6-emul.c | 2 +- arch/mips/kernel/mips_ksyms.c | 2 +- arch/mips/kernel/process.c | 2 +- arch/mips/kernel/ptrace.c | 2 +- arch/mips/kernel/ptrace32.c | 2 +- arch/mips/kernel/signal32.c | 2 +- arch/mips/kernel/signal_n32.c | 2 +- arch/mips/kernel/syscall.c | 2 +- arch/mips/kernel/traps.c | 2 +- arch/mips/kernel/unaligned.c | 2 +- arch/mips/math-emu/cp1emu.c | 2 +- arch/mips/math-emu/dsemul.c | 2 +- arch/mips/mm/extable.c | 2 +- arch/mips/mm/sc-debugfs.c | 2 +- arch/mips/oprofile/op_model_loongson3.c | 2 +- arch/mips/sgi-ip22/ip28-berr.c | 2 +- arch/mips/sgi-ip27/ip27-berr.c | 2 +- arch/mips/sgi-ip32/ip32-berr.c | 2 +- arch/mips/sibyte/common/sb_tbprof.c | 2 +- arch/mn10300/kernel/fpu.c | 2 +- arch/mn10300/kernel/mn10300_ksyms.c | 2 +- arch/mn10300/kernel/process.c | 2 +- arch/mn10300/kernel/ptrace.c | 2 +- arch/mn10300/kernel/setup.c | 2 +- arch/mn10300/kernel/signal.c | 2 +- arch/mn10300/kernel/sys_mn10300.c | 2 +- arch/mn10300/lib/checksum.c | 2 +- arch/mn10300/mm/cache-smp.c | 2 +- arch/mn10300/mm/cache.c | 2 +- arch/mn10300/mm/extable.c | 2 +- arch/mn10300/mm/init.c | 2 +- arch/mn10300/mm/misalignment.c | 2 +- arch/mn10300/proc-mn2ws0050/proc-init.c | 2 +- arch/nios2/kernel/traps.c | 2 +- arch/openrisc/kernel/or32_ksyms.c | 2 +- arch/openrisc/kernel/process.c | 2 +- arch/openrisc/kernel/signal.c | 2 +- arch/openrisc/kernel/traps.c | 2 +- arch/openrisc/mm/fault.c | 2 +- arch/parisc/kernel/asm-offsets.c | 2 +- arch/parisc/kernel/parisc_ksyms.c | 2 +- arch/parisc/kernel/pci-dma.c | 2 +- arch/parisc/kernel/perf.c | 2 +- arch/parisc/kernel/ptrace.c | 2 +- arch/parisc/kernel/signal.c | 2 +- arch/parisc/kernel/signal32.c | 2 +- arch/parisc/kernel/sys_parisc.c | 2 +- arch/parisc/kernel/time.c | 2 +- arch/parisc/kernel/unaligned.c | 2 +- arch/parisc/kernel/unwind.c | 2 +- arch/parisc/lib/checksum.c | 2 +- arch/powerpc/include/asm/asm-prototypes.h | 2 +- arch/powerpc/kernel/align.c | 2 +- arch/powerpc/kernel/crash_dump.c | 2 +- arch/powerpc/kernel/hw_breakpoint.c | 2 +- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/module.c | 2 +- arch/powerpc/kernel/nvram_64.c | 2 +- arch/powerpc/kernel/pci_32.c | 2 +- arch/powerpc/kernel/proc_powerpc.c | 2 +- arch/powerpc/kernel/ptrace.c | 2 +- arch/powerpc/kernel/ptrace32.c | 2 +- arch/powerpc/kernel/rtas-proc.c | 2 +- arch/powerpc/kernel/rtas.c | 2 +- arch/powerpc/kernel/rtas_flash.c | 2 +- arch/powerpc/kernel/rtasd.c | 2 +- arch/powerpc/kernel/setup_32.c | 2 +- arch/powerpc/kernel/signal.c | 2 +- arch/powerpc/kernel/signal_32.c | 2 +- arch/powerpc/kernel/signal_64.c | 2 +- arch/powerpc/kernel/sys_ppc32.c | 2 +- arch/powerpc/kernel/syscalls.c | 2 +- arch/powerpc/kernel/time.c | 2 +- arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/kernel/vecemu.c | 2 +- arch/powerpc/kvm/book3s.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 2 +- arch/powerpc/kvm/book3s_pr.c | 2 +- arch/powerpc/kvm/book3s_pr_papr.c | 2 +- arch/powerpc/kvm/book3s_rtas.c | 2 +- arch/powerpc/kvm/book3s_xics.c | 2 +- arch/powerpc/kvm/booke.c | 2 +- arch/powerpc/kvm/mpic.c | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/powerpc/lib/checksum_wrappers.c | 2 +- arch/powerpc/lib/code-patching.c | 2 +- arch/powerpc/lib/sstep.c | 2 +- arch/powerpc/lib/usercopy_64.c | 2 +- arch/powerpc/math-emu/fabs.c | 2 +- arch/powerpc/math-emu/fadd.c | 2 +- arch/powerpc/math-emu/fadds.c | 2 +- arch/powerpc/math-emu/fcmpo.c | 2 +- arch/powerpc/math-emu/fcmpu.c | 2 +- arch/powerpc/math-emu/fctiw.c | 2 +- arch/powerpc/math-emu/fctiwz.c | 2 +- arch/powerpc/math-emu/fdiv.c | 2 +- arch/powerpc/math-emu/fdivs.c | 2 +- arch/powerpc/math-emu/fmadd.c | 2 +- arch/powerpc/math-emu/fmadds.c | 2 +- arch/powerpc/math-emu/fmr.c | 2 +- arch/powerpc/math-emu/fmsub.c | 2 +- arch/powerpc/math-emu/fmsubs.c | 2 +- arch/powerpc/math-emu/fmul.c | 2 +- arch/powerpc/math-emu/fmuls.c | 2 +- arch/powerpc/math-emu/fnabs.c | 2 +- arch/powerpc/math-emu/fneg.c | 2 +- arch/powerpc/math-emu/fnmadd.c | 2 +- arch/powerpc/math-emu/fnmadds.c | 2 +- arch/powerpc/math-emu/fnmsub.c | 2 +- arch/powerpc/math-emu/fnmsubs.c | 2 +- arch/powerpc/math-emu/fre.c | 2 +- arch/powerpc/math-emu/fres.c | 2 +- arch/powerpc/math-emu/frsp.c | 2 +- arch/powerpc/math-emu/frsqrte.c | 2 +- arch/powerpc/math-emu/frsqrtes.c | 2 +- arch/powerpc/math-emu/fsel.c | 2 +- arch/powerpc/math-emu/fsqrt.c | 2 +- arch/powerpc/math-emu/fsqrts.c | 2 +- arch/powerpc/math-emu/fsub.c | 2 +- arch/powerpc/math-emu/fsubs.c | 2 +- arch/powerpc/math-emu/lfd.c | 2 +- arch/powerpc/math-emu/lfs.c | 2 +- arch/powerpc/math-emu/math.c | 2 +- arch/powerpc/math-emu/math_efp.c | 2 +- arch/powerpc/math-emu/mcrfs.c | 2 +- arch/powerpc/math-emu/mffs.c | 2 +- arch/powerpc/math-emu/mtfsb0.c | 2 +- arch/powerpc/math-emu/mtfsb1.c | 2 +- arch/powerpc/math-emu/mtfsf.c | 2 +- arch/powerpc/math-emu/mtfsfi.c | 2 +- arch/powerpc/math-emu/stfd.c | 2 +- arch/powerpc/math-emu/stfiwx.c | 2 +- arch/powerpc/math-emu/stfs.c | 2 +- arch/powerpc/mm/40x_mmu.c | 2 +- arch/powerpc/mm/fsl_booke_mmu.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/init_64.c | 2 +- arch/powerpc/mm/subpage-prot.c | 2 +- arch/powerpc/platforms/cell/spufs/coredump.c | 2 +- arch/powerpc/platforms/cell/spufs/file.c | 2 +- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- arch/powerpc/platforms/cell/spufs/syscalls.c | 2 +- arch/powerpc/platforms/chrp/nvram.c | 2 +- arch/powerpc/platforms/powernv/opal-elog.c | 2 +- arch/powerpc/platforms/powernv/opal-lpc.c | 2 +- arch/powerpc/platforms/powernv/opal-prd.c | 2 +- arch/powerpc/platforms/pseries/cmm.c | 2 +- arch/powerpc/platforms/pseries/dlpar.c | 2 +- arch/powerpc/platforms/pseries/dtl.c | 2 +- arch/powerpc/platforms/pseries/lparcfg.c | 2 +- arch/powerpc/platforms/pseries/nvram.c | 2 +- arch/powerpc/platforms/pseries/reconfig.c | 2 +- arch/powerpc/platforms/pseries/scanlog.c | 2 +- arch/powerpc/sysdev/scom.c | 2 +- arch/powerpc/sysdev/tsi108_pci.c | 2 +- arch/s390/appldata/appldata_base.c | 2 +- arch/s390/boot/compressed/misc.c | 2 +- arch/s390/crypto/prng.c | 2 +- arch/s390/include/asm/checksum.h | 2 +- arch/s390/include/asm/idals.h | 2 +- arch/s390/include/asm/mmu_context.h | 2 +- arch/s390/kernel/compat_linux.c | 2 +- arch/s390/kernel/compat_signal.c | 2 +- arch/s390/kernel/debug.c | 2 +- arch/s390/kernel/dis.c | 2 +- arch/s390/kernel/kprobes.c | 2 +- arch/s390/kernel/ptrace.c | 2 +- arch/s390/kernel/signal.c | 2 +- arch/s390/kernel/sys_s390.c | 2 +- arch/s390/kernel/time.c | 2 +- arch/s390/kernel/traps.c | 2 +- arch/s390/kvm/interrupt.c | 2 +- arch/s390/mm/init.c | 2 +- arch/score/include/asm/checksum.h | 2 +- arch/score/kernel/ptrace.c | 2 +- arch/score/kernel/traps.c | 2 +- arch/score/lib/checksum_copy.c | 2 +- arch/sh/boards/mach-landisk/gio.c | 2 +- arch/sh/boot/compressed/misc.c | 2 +- arch/sh/include/asm/mmu_context.h | 2 +- arch/sh/kernel/cpu/init.c | 2 +- arch/sh/kernel/cpu/shmobile/cpuidle.c | 2 +- arch/sh/kernel/cpu/shmobile/pm.c | 2 +- arch/sh/kernel/crash_dump.c | 2 +- arch/sh/kernel/io_trapped.c | 2 +- arch/sh/kernel/irq.c | 2 +- arch/sh/kernel/kprobes.c | 2 +- arch/sh/kernel/process_32.c | 2 +- arch/sh/kernel/process_64.c | 2 +- arch/sh/kernel/ptrace_32.c | 2 +- arch/sh/kernel/ptrace_64.c | 2 +- arch/sh/kernel/setup.c | 2 +- arch/sh/kernel/sh_ksyms_64.c | 2 +- arch/sh/kernel/signal_32.c | 2 +- arch/sh/kernel/signal_64.c | 2 +- arch/sh/kernel/sys_sh.c | 2 +- arch/sh/kernel/sys_sh32.c | 2 +- arch/sh/kernel/traps_64.c | 2 +- arch/sh/math-emu/math.c | 2 +- arch/sh/mm/cache-debugfs.c | 2 +- arch/sh/mm/cache-sh3.c | 2 +- arch/sh/mm/cache-sh5.c | 2 +- arch/sh/mm/cache-sh7705.c | 2 +- arch/sh/mm/extable_32.c | 2 +- arch/sh/mm/extable_64.c | 2 +- arch/sh/mm/nommu.c | 2 +- arch/sh/mm/pmb.c | 2 +- arch/sh/mm/tlb-sh3.c | 2 +- arch/sh/mm/tlbex_64.c | 2 +- arch/sh/mm/tlbflush_64.c | 2 +- arch/sh/oprofile/backtrace.c | 2 +- arch/sparc/include/asm/checksum_32.h | 2 +- arch/sparc/include/asm/checksum_64.h | 2 +- arch/sparc/kernel/apc.c | 2 +- arch/sparc/kernel/irq_64.c | 2 +- arch/sparc/kernel/kprobes.c | 2 +- arch/sparc/kernel/mdesc.c | 2 +- arch/sparc/kernel/pci.c | 2 +- arch/sparc/kernel/pcic.c | 2 +- arch/sparc/kernel/pmc.c | 2 +- arch/sparc/kernel/process_32.c | 2 +- arch/sparc/kernel/process_64.c | 2 +- arch/sparc/kernel/ptrace_32.c | 2 +- arch/sparc/kernel/ptrace_64.c | 2 +- arch/sparc/kernel/signal32.c | 2 +- arch/sparc/kernel/signal_32.c | 2 +- arch/sparc/kernel/signal_64.c | 2 +- arch/sparc/kernel/smp_64.c | 2 +- arch/sparc/kernel/sys_sparc32.c | 2 +- arch/sparc/kernel/sys_sparc_32.c | 2 +- arch/sparc/kernel/sys_sparc_64.c | 2 +- arch/sparc/kernel/time_64.c | 2 +- arch/sparc/kernel/traps_64.c | 2 +- arch/sparc/kernel/unaligned_32.c | 2 +- arch/sparc/kernel/unaligned_64.c | 2 +- arch/sparc/kernel/uprobes.c | 2 +- arch/sparc/kernel/visemul.c | 2 +- arch/sparc/kernel/windows.c | 2 +- arch/sparc/math-emu/math_32.c | 2 +- arch/sparc/math-emu/math_64.c | 2 +- arch/sparc/mm/extable.c | 2 +- arch/sparc/mm/init_64.c | 2 +- arch/tile/kernel/process.c | 2 +- arch/tile/kernel/single_step.c | 2 +- arch/tile/kernel/unaligned.c | 2 +- arch/um/drivers/harddog_kern.c | 2 +- arch/um/drivers/hostaudio_kern.c | 2 +- arch/um/drivers/mconsole_kern.c | 2 +- arch/um/drivers/mmapper_kern.c | 2 +- arch/um/drivers/random.c | 2 +- arch/um/kernel/exec.c | 2 +- arch/um/kernel/exitcode.c | 2 +- arch/um/kernel/process.c | 2 +- arch/um/kernel/ptrace.c | 2 +- arch/um/kernel/syscall.c | 2 +- arch/x86/entry/common.c | 2 +- arch/x86/ia32/ia32_aout.c | 2 +- arch/x86/ia32/ia32_signal.c | 2 +- arch/x86/ia32/sys_ia32.c | 2 +- arch/x86/include/asm/asm-prototypes.h | 2 +- arch/x86/include/asm/checksum_64.h | 2 +- arch/x86/include/asm/xen/page.h | 2 +- arch/x86/kernel/apm_32.c | 2 +- arch/x86/kernel/cpu/mcheck/mce-severity.c | 2 +- arch/x86/kernel/crash_dump_32.c | 2 +- arch/x86/kernel/doublefault.c | 2 +- arch/x86/kernel/kprobes/core.c | 2 +- arch/x86/kernel/kprobes/opt.c | 2 +- arch/x86/kernel/process.c | 2 +- arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/test_nx.c | 2 +- arch/x86/kernel/tls.c | 2 +- arch/x86/kernel/vm86_32.c | 2 +- arch/x86/lib/usercopy_32.c | 2 +- arch/x86/math-emu/errors.c | 2 +- arch/x86/math-emu/fpu_entry.c | 2 +- arch/x86/math-emu/get_address.c | 2 +- arch/x86/math-emu/load_store.c | 2 +- arch/x86/math-emu/reg_ld_str.c | 2 +- arch/x86/mm/extable.c | 2 +- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/init_64.c | 2 +- arch/x86/mm/pageattr.c | 2 +- arch/x86/um/ptrace_32.c | 2 +- arch/x86/um/ptrace_64.c | 2 +- arch/x86/um/signal.c | 2 +- arch/x86/um/tls_32.c | 2 +- arch/x86/xen/p2m.c | 2 +- arch/xtensa/include/asm/checksum.h | 2 +- arch/xtensa/include/asm/segment.h | 2 +- arch/xtensa/kernel/asm-offsets.c | 2 +- arch/xtensa/kernel/irq.c | 2 +- arch/xtensa/kernel/process.c | 2 +- arch/xtensa/kernel/ptrace.c | 2 +- arch/xtensa/kernel/signal.c | 2 +- arch/xtensa/kernel/stacktrace.c | 2 +- arch/xtensa/kernel/syscall.c | 2 +- arch/xtensa/kernel/traps.c | 2 +- arch/xtensa/kernel/xtensa_ksyms.c | 2 +- arch/xtensa/platforms/iss/console.c | 2 +- arch/xtensa/platforms/iss/simdisk.c | 2 +- block/ioctl.c | 2 +- block/partitions/ibm.c | 2 +- block/scsi_ioctl.c | 2 +- drivers/acpi/acpi_video.c | 2 +- drivers/acpi/battery.c | 2 +- drivers/acpi/osl.c | 2 +- drivers/acpi/proc.c | 2 +- drivers/acpi/processor_thermal.c | 2 +- drivers/acpi/processor_throttling.c | 2 +- drivers/acpi/thermal.c | 2 +- drivers/atm/adummy.c | 2 +- drivers/atm/atmtcp.c | 2 +- drivers/atm/eni.c | 2 +- drivers/atm/firestream.c | 2 +- drivers/atm/fore200e.c | 2 +- drivers/atm/he.c | 2 +- drivers/atm/horizon.c | 2 +- drivers/atm/idt77105.c | 2 +- drivers/atm/idt77252.c | 2 +- drivers/atm/iphase.c | 2 +- drivers/atm/nicstar.c | 2 +- drivers/atm/suni.c | 2 +- drivers/atm/uPD98402.c | 2 +- drivers/atm/zatm.c | 2 +- drivers/base/memory.c | 2 +- drivers/block/DAC960.c | 2 +- drivers/block/amiflop.c | 2 +- drivers/block/brd.c | 2 +- drivers/block/cciss.c | 2 +- drivers/block/cryptoloop.c | 2 +- drivers/block/hd.c | 2 +- drivers/block/loop.c | 2 +- drivers/block/nbd.c | 2 +- drivers/block/paride/pcd.c | 2 +- drivers/block/paride/pd.c | 2 +- drivers/block/paride/pf.c | 2 +- drivers/block/paride/pg.c | 2 +- drivers/block/paride/pt.c | 2 +- drivers/block/pktcdvd.c | 2 +- drivers/block/swim3.c | 2 +- drivers/block/sx8.c | 2 +- drivers/block/umem.c | 2 +- drivers/cdrom/cdrom.c | 2 +- drivers/char/agp/compat_ioctl.c | 2 +- drivers/char/agp/frontend.c | 2 +- drivers/char/applicom.c | 2 +- drivers/char/bfin-otp.c | 2 +- drivers/char/ds1620.c | 2 +- drivers/char/dtlk.c | 2 +- drivers/char/generic_nvram.c | 2 +- drivers/char/hangcheck-timer.c | 2 +- drivers/char/hw_random/core.c | 2 +- drivers/char/ipmi/ipmi_watchdog.c | 2 +- drivers/char/lp.c | 2 +- drivers/char/mbcs.c | 2 +- drivers/char/mmtimer.c | 2 +- drivers/char/mwave/3780i.c | 2 +- drivers/char/mwave/mwavedd.h | 2 +- drivers/char/nsc_gpio.c | 2 +- drivers/char/nwbutton.c | 2 +- drivers/char/nwflash.c | 2 +- drivers/char/pc8736x_gpio.c | 2 +- drivers/char/pcmcia/cm4040_cs.c | 2 +- drivers/char/pcmcia/synclink_cs.c | 2 +- drivers/char/random.c | 2 +- drivers/char/raw.c | 2 +- drivers/char/scx200_gpio.c | 2 +- drivers/char/sonypi.c | 2 +- drivers/char/tlclk.c | 2 +- drivers/char/toshiba.c | 2 +- drivers/char/xilinx_hwicap/xilinx_hwicap.c | 2 +- drivers/cpufreq/ia64-acpi-cpufreq.c | 2 +- drivers/cpuidle/governors/ladder.c | 2 +- drivers/dio/dio.c | 2 +- drivers/edac/edac_device.c | 2 +- drivers/edac/edac_mc.c | 2 +- drivers/edac/edac_pci.c | 2 +- drivers/i2c/i2c-core.c | 2 +- drivers/ide/hpt366.c | 2 +- drivers/ide/ide-disk.c | 2 +- drivers/ide/ide-io.c | 2 +- drivers/ide/ide-iops.c | 2 +- drivers/ide/ide-probe.c | 2 +- drivers/ide/ide-proc.c | 2 +- drivers/infiniband/core/ucm.c | 2 +- drivers/infiniband/core/user_mad.c | 2 +- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/core/uverbs_main.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 2 +- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 +- drivers/input/input-compat.c | 2 +- drivers/input/misc/atlas_btns.c | 2 +- drivers/input/mouse/amimouse.c | 2 +- drivers/input/mouse/atarimouse.c | 2 +- drivers/input/mouse/trackpoint.c | 2 +- drivers/input/serio/hp_sdc.c | 2 +- drivers/input/serio/q40kbd.c | 2 +- drivers/input/serio/serport.c | 2 +- drivers/input/tablet/aiptek.c | 2 +- drivers/input/tablet/gtco.c | 2 +- drivers/isdn/capi/kcapi.c | 2 +- drivers/isdn/hardware/avm/b1.c | 2 +- drivers/isdn/hardware/avm/b1dma.c | 2 +- drivers/isdn/hardware/avm/c4.c | 2 +- drivers/isdn/hardware/eicon/capimain.c | 2 +- drivers/isdn/hardware/eicon/divamnt.c | 2 +- drivers/isdn/hardware/eicon/divasi.c | 2 +- drivers/isdn/hardware/eicon/divasmain.c | 2 +- drivers/isdn/hardware/eicon/divasproc.c | 2 +- drivers/isdn/hysdn/hysdn_boot.c | 2 +- drivers/lguest/core.c | 2 +- drivers/lguest/page_tables.c | 2 +- drivers/lguest/x86/core.c | 2 +- drivers/macintosh/ans-lcd.c | 2 +- drivers/macintosh/smu.c | 2 +- drivers/macintosh/via-pmu.c | 2 +- drivers/macintosh/via-pmu68k.c | 2 +- drivers/md/dm-ioctl.c | 2 +- drivers/media/dvb-core/dmxdev.c | 2 +- drivers/media/dvb-core/dvb_demux.c | 2 +- drivers/media/dvb-core/dvb_net.c | 2 +- drivers/media/dvb-core/dvb_ringbuffer.c | 2 +- drivers/media/i2c/adv7170.c | 2 +- drivers/media/i2c/adv7175.c | 2 +- drivers/media/i2c/bt856.c | 2 +- drivers/media/i2c/bt866.c | 2 +- drivers/media/i2c/cs53l32a.c | 2 +- drivers/media/i2c/m52790.c | 2 +- drivers/media/i2c/saa6588.c | 2 +- drivers/media/i2c/saa7110.c | 2 +- drivers/media/i2c/saa7185.c | 2 +- drivers/media/i2c/tlv320aic23b.c | 2 +- drivers/media/i2c/vp27smpx.c | 2 +- drivers/media/i2c/vpx3220.c | 2 +- drivers/media/i2c/wm8739.c | 2 +- drivers/media/i2c/wm8775.c | 2 +- drivers/media/pci/ivtv/ivtv-driver.h | 2 +- drivers/media/pci/meye/meye.c | 2 +- drivers/media/pci/zoran/videocodec.c | 2 +- drivers/media/pci/zoran/zoran_driver.c | 2 +- drivers/media/platform/arv.c | 2 +- drivers/media/usb/pvrusb2/pvrusb2-ioread.c | 2 +- drivers/media/usb/pwc/pwc-ctrl.c | 2 +- drivers/media/v4l2-core/v4l2-common.c | 2 +- drivers/media/v4l2-core/v4l2-dev.c | 2 +- drivers/message/fusion/mptctl.c | 2 +- drivers/message/fusion/mptlan.h | 2 +- drivers/misc/ibmasm/ibmasmfs.c | 2 +- drivers/mmc/core/block.c | 2 +- drivers/mmc/host/android-goldfish.c | 2 +- drivers/mtd/devices/pmc551.c | 2 +- drivers/mtd/devices/slram.c | 2 +- drivers/mtd/ftl.c | 2 +- drivers/mtd/inftlcore.c | 2 +- drivers/mtd/inftlmount.c | 2 +- drivers/mtd/maps/sun_uflash.c | 2 +- drivers/mtd/mtd_blkdevs.c | 2 +- drivers/mtd/mtdchar.c | 2 +- drivers/mtd/nftlcore.c | 2 +- drivers/net/appletalk/ipddp.c | 2 +- drivers/net/eql.c | 2 +- drivers/net/ethernet/3com/3c509.c | 2 +- drivers/net/ethernet/3com/3c515.c | 2 +- drivers/net/ethernet/3com/3c574_cs.c | 2 +- drivers/net/ethernet/3com/3c59x.c | 2 +- drivers/net/ethernet/3com/typhoon.c | 2 +- drivers/net/ethernet/8390/axnet_cs.c | 2 +- drivers/net/ethernet/8390/ne2k-pci.c | 2 +- drivers/net/ethernet/8390/pcnet_cs.c | 2 +- drivers/net/ethernet/adaptec/starfire.c | 2 +- drivers/net/ethernet/alteon/acenic.c | 2 +- drivers/net/ethernet/amd/amd8111e.c | 2 +- drivers/net/ethernet/amd/nmclan_cs.c | 2 +- drivers/net/ethernet/broadcom/b44.c | 2 +- drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 2 +- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- drivers/net/ethernet/dec/tulip/de2104x.c | 2 +- drivers/net/ethernet/dec/tulip/de4x5.c | 2 +- drivers/net/ethernet/dec/tulip/dmfe.c | 2 +- drivers/net/ethernet/dec/tulip/tulip_core.c | 2 +- drivers/net/ethernet/dec/tulip/uli526x.c | 2 +- drivers/net/ethernet/dec/tulip/winbond-840.c | 2 +- drivers/net/ethernet/dec/tulip/xircom_cb.c | 2 +- drivers/net/ethernet/dlink/dl2k.h | 2 +- drivers/net/ethernet/dlink/sundance.c | 2 +- drivers/net/ethernet/fealnx.c | 2 +- drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c | 2 +- drivers/net/ethernet/freescale/fs_enet/mac-fcc.c | 2 +- drivers/net/ethernet/freescale/fs_enet/mac-fec.c | 2 +- drivers/net/ethernet/freescale/fs_enet/mac-scc.c | 2 +- drivers/net/ethernet/freescale/fs_enet/mii-fec.c | 2 +- drivers/net/ethernet/freescale/gianfar.c | 2 +- drivers/net/ethernet/freescale/gianfar.h | 2 +- drivers/net/ethernet/freescale/gianfar_ethtool.c | 2 +- drivers/net/ethernet/freescale/ucc_geth.c | 2 +- drivers/net/ethernet/freescale/ucc_geth_ethtool.c | 2 +- drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 2 +- drivers/net/ethernet/ibm/emac/core.c | 2 +- drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c | 2 +- drivers/net/ethernet/natsemi/natsemi.c | 2 +- drivers/net/ethernet/natsemi/ns83820.c | 2 +- drivers/net/ethernet/packetengines/hamachi.c | 2 +- drivers/net/ethernet/packetengines/yellowfin.c | 2 +- drivers/net/ethernet/realtek/8139cp.c | 2 +- drivers/net/ethernet/sgi/ioc3-eth.c | 2 +- drivers/net/ethernet/sis/sis900.c | 2 +- drivers/net/ethernet/smsc/epic100.c | 2 +- drivers/net/ethernet/smsc/smc91c92_cs.c | 2 +- drivers/net/ethernet/sun/cassini.c | 2 +- drivers/net/ethernet/sun/sungem.c | 2 +- drivers/net/ethernet/sun/sunhme.c | 2 +- drivers/net/ethernet/via/via-rhine.c | 2 +- drivers/net/ethernet/xircom/xirc2ps_cs.c | 2 +- drivers/net/fddi/skfp/skfddi.c | 2 +- drivers/net/hamradio/6pack.c | 2 +- drivers/net/hamradio/baycom_epp.c | 2 +- drivers/net/hamradio/baycom_par.c | 2 +- drivers/net/hamradio/baycom_ser_fdx.c | 2 +- drivers/net/hamradio/baycom_ser_hdx.c | 2 +- drivers/net/hamradio/bpqether.c | 2 +- drivers/net/hamradio/dmascc.c | 2 +- drivers/net/hamradio/hdlcdrv.c | 2 +- drivers/net/hamradio/mkiss.c | 2 +- drivers/net/hamradio/scc.c | 2 +- drivers/net/hamradio/yam.c | 2 +- drivers/net/hippi/rrunner.c | 2 +- drivers/net/irda/irtty-sir.c | 2 +- drivers/net/irda/kingsun-sir.c | 2 +- drivers/net/irda/ks959-sir.c | 2 +- drivers/net/irda/ksdazzle-sir.c | 2 +- drivers/net/irda/mcs7780.c | 2 +- drivers/net/irda/vlsi_ir.c | 2 +- drivers/net/loopback.c | 2 +- drivers/net/phy/davicom.c | 2 +- drivers/net/phy/icplus.c | 2 +- drivers/net/phy/lxt.c | 2 +- drivers/net/phy/qsemi.c | 2 +- drivers/net/ppp/ppp_async.c | 2 +- drivers/net/ppp/ppp_synctty.c | 2 +- drivers/net/ppp/pppoe.c | 2 +- drivers/net/ppp/pppox.c | 2 +- drivers/net/sb1000.c | 2 +- drivers/net/slip/slhc.c | 2 +- drivers/net/slip/slip.c | 2 +- drivers/net/tun.c | 2 +- drivers/net/usb/catc.c | 2 +- drivers/net/usb/kaweth.c | 2 +- drivers/net/usb/pegasus.c | 2 +- drivers/net/usb/rtl8150.c | 2 +- drivers/net/wan/dlci.c | 2 +- drivers/net/wan/dscc4.c | 2 +- drivers/net/wan/farsync.c | 2 +- drivers/net/wan/hd64570.c | 2 +- drivers/net/wan/hd64572.c | 2 +- drivers/net/wan/lapbether.c | 2 +- drivers/net/wan/lmc/lmc_main.c | 2 +- drivers/net/wan/lmc/lmc_media.c | 2 +- drivers/net/wan/sbni.c | 2 +- drivers/net/wan/sdla.c | 2 +- drivers/net/wireless/atmel/atmel.c | 2 +- drivers/net/wireless/intel/ipw2x00/ipw2100.c | 2 +- drivers/net/wireless/intel/ipw2x00/libipw_geo.c | 2 +- drivers/net/wireless/intel/ipw2x00/libipw_module.c | 2 +- drivers/net/wireless/intel/ipw2x00/libipw_rx.c | 2 +- drivers/net/wireless/intel/ipw2x00/libipw_tx.c | 2 +- drivers/net/wireless/intersil/hostap/hostap_hw.c | 2 +- drivers/net/wireless/intersil/hostap/hostap_main.c | 2 +- drivers/net/wireless/intersil/prism54/isl_38xx.c | 2 +- drivers/net/wireless/intersil/prism54/isl_ioctl.c | 2 +- drivers/net/wireless/ray_cs.c | 2 +- drivers/net/wireless/wl3501_cs.c | 2 +- drivers/nubus/proc.c | 2 +- drivers/oprofile/event_buffer.c | 2 +- drivers/oprofile/oprofilefs.c | 2 +- drivers/parisc/ccio-dma.c | 2 +- drivers/parisc/ccio-rm-dma.c | 2 +- drivers/parisc/eisa_eeprom.c | 2 +- drivers/parisc/eisa_enumerator.c | 2 +- drivers/parisc/led.c | 2 +- drivers/parisc/pdc_stable.c | 2 +- drivers/parport/daisy.c | 2 +- drivers/parport/ieee1284_ops.c | 2 +- drivers/parport/parport_gsc.c | 2 +- drivers/parport/probe.c | 2 +- drivers/parport/procfs.c | 2 +- drivers/pci/hotplug/acpiphp_ibm.c | 2 +- drivers/pci/hotplug/cpqphp_core.c | 2 +- drivers/pci/hotplug/cpqphp_nvram.c | 2 +- drivers/pci/hotplug/pci_hotplug_core.c | 2 +- drivers/pci/proc.c | 2 +- drivers/pci/syscall.c | 2 +- drivers/platform/x86/sony-laptop.c | 2 +- drivers/platform/x86/thinkpad_acpi.c | 2 +- drivers/pnp/interface.c | 2 +- drivers/pnp/pnpbios/proc.c | 2 +- drivers/s390/block/dasd_devmap.c | 2 +- drivers/s390/block/dasd_eckd.c | 2 +- drivers/s390/block/dasd_eer.c | 2 +- drivers/s390/block/dasd_erp.c | 2 +- drivers/s390/block/dasd_genhd.c | 2 +- drivers/s390/block/dasd_ioctl.c | 2 +- drivers/s390/block/dasd_proc.c | 2 +- drivers/s390/block/xpram.c | 2 +- drivers/s390/char/con3215.c | 2 +- drivers/s390/char/keyboard.c | 2 +- drivers/s390/char/monreader.c | 2 +- drivers/s390/char/monwriter.c | 2 +- drivers/s390/char/sclp_rw.c | 2 +- drivers/s390/char/sclp_tty.c | 2 +- drivers/s390/char/sclp_vt220.c | 2 +- drivers/s390/char/tape_char.c | 2 +- drivers/s390/char/tty3270.c | 2 +- drivers/s390/char/vmcp.c | 2 +- drivers/s390/char/vmlogrdr.c | 2 +- drivers/s390/char/vmur.c | 2 +- drivers/s390/char/zcore.c | 2 +- drivers/s390/cio/blacklist.c | 2 +- drivers/s390/crypto/zcrypt_api.c | 2 +- drivers/s390/crypto/zcrypt_cex2a.c | 2 +- drivers/s390/crypto/zcrypt_pcixcc.c | 2 +- drivers/s390/net/netiucv.c | 2 +- drivers/sbus/char/display7seg.c | 2 +- drivers/sbus/char/envctrl.c | 2 +- drivers/sbus/char/flash.c | 2 +- drivers/sbus/char/jsflash.c | 2 +- drivers/sbus/char/openprom.c | 2 +- drivers/scsi/3w-9xxx.c | 2 +- drivers/scsi/3w-sas.c | 2 +- drivers/scsi/3w-xxxx.c | 2 +- drivers/scsi/aacraid/aachba.c | 2 +- drivers/scsi/aacraid/commctrl.c | 2 +- drivers/scsi/arcmsr/arcmsr_hba.c | 2 +- drivers/scsi/bfa/bfad.c | 2 +- drivers/scsi/dpt_i2o.c | 2 +- drivers/scsi/gdth.c | 2 +- drivers/scsi/hptiop.c | 2 +- drivers/scsi/ips.h | 2 +- drivers/scsi/megaraid.c | 2 +- drivers/scsi/megaraid/megaraid_mm.h | 2 +- drivers/scsi/megaraid/megaraid_sas_base.c | 2 +- drivers/scsi/osst.c | 2 +- drivers/scsi/qla2xxx/qla_sup.c | 2 +- drivers/scsi/scsi_ioctl.c | 2 +- drivers/scsi/scsi_proc.c | 2 +- drivers/scsi/sd.c | 2 +- drivers/scsi/sr.c | 2 +- drivers/scsi/sr_ioctl.c | 2 +- drivers/scsi/st.c | 2 +- drivers/tty/amiserial.c | 2 +- drivers/tty/hvc/hvc_console.c | 2 +- drivers/tty/hvc/hvcs.c | 2 +- drivers/tty/hvc/hvsi.c | 2 +- drivers/tty/moxa.c | 2 +- drivers/tty/mxser.c | 2 +- drivers/tty/n_hdlc.c | 2 +- drivers/tty/n_r3964.c | 2 +- drivers/tty/serial/icom.c | 2 +- drivers/tty/serial/serial_core.c | 2 +- drivers/tty/synclink.c | 2 +- drivers/tty/synclink_gt.c | 2 +- drivers/tty/synclinkmp.c | 2 +- drivers/tty/tty_ioctl.c | 2 +- drivers/tty/vt/consolemap.c | 2 +- drivers/tty/vt/selection.c | 2 +- drivers/tty/vt/vc_screen.c | 2 +- drivers/tty/vt/vt_ioctl.c | 2 +- drivers/usb/atm/usbatm.c | 2 +- drivers/usb/core/hub.c | 2 +- drivers/usb/gadget/legacy/inode.c | 2 +- drivers/usb/host/uhci-hcd.c | 2 +- drivers/usb/misc/ftdi-elan.c | 2 +- drivers/usb/misc/idmouse.c | 2 +- drivers/usb/misc/ldusb.c | 2 +- drivers/usb/misc/legousbtower.c | 2 +- drivers/usb/mon/mon_bin.c | 2 +- drivers/usb/mon/mon_stat.c | 2 +- drivers/usb/mon/mon_text.c | 2 +- drivers/usb/musb/musb_debugfs.c | 2 +- drivers/video/console/newport_con.c | 2 +- drivers/video/fbdev/68328fb.c | 2 +- drivers/video/fbdev/hitfb.c | 2 +- drivers/video/fbdev/hpfb.c | 2 +- drivers/video/fbdev/mx3fb.c | 2 +- drivers/video/fbdev/q40fb.c | 2 +- drivers/video/fbdev/sm501fb.c | 2 +- drivers/video/fbdev/stifb.c | 2 +- drivers/video/fbdev/w100fb.c | 2 +- drivers/zorro/proc.c | 2 +- fs/9p/vfs_file.c | 2 +- fs/afs/proc.c | 2 +- fs/aio.c | 2 +- fs/anon_inodes.c | 2 +- fs/bfs/inode.c | 2 +- fs/binfmt_aout.c | 2 +- fs/binfmt_elf.c | 2 +- fs/binfmt_elf_fdpic.c | 2 +- fs/block_dev.c | 2 +- fs/cifs/cifs_debug.c | 2 +- fs/cifs/cifssmb.c | 2 +- fs/cifs/connect.c | 2 +- fs/cifs/transport.c | 2 +- fs/compat.c | 2 +- fs/compat_ioctl.c | 2 +- fs/configfs/file.c | 2 +- fs/coredump.c | 2 +- fs/dcache.c | 2 +- fs/dcookies.c | 2 +- fs/dlm/dlm_internal.h | 2 +- fs/efs/efs.h | 2 +- fs/eventpoll.c | 2 +- fs/exec.c | 2 +- fs/ext2/ioctl.c | 2 +- fs/ext2/super.c | 2 +- fs/ext4/extents.c | 2 +- fs/ext4/ioctl.c | 2 +- fs/ext4/super.c | 2 +- fs/fcntl.c | 2 +- fs/fhandle.c | 2 +- fs/filesystems.c | 2 +- fs/gfs2/file.c | 2 +- fs/gfs2/glock.c | 2 +- fs/gfs2/inode.c | 2 +- fs/gfs2/sys.c | 2 +- fs/gfs2/util.c | 2 +- fs/gfs2/xattr.c | 2 +- fs/hfs/hfs_fs.h | 2 +- fs/hfsplus/ioctl.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/jbd2/journal.c | 2 +- fs/jfs/ioctl.c | 2 +- fs/jfs/jfs_debug.c | 2 +- fs/jfs/super.c | 2 +- fs/libfs.c | 2 +- fs/locks.c | 2 +- fs/namei.c | 2 +- fs/ncpfs/dir.c | 2 +- fs/ncpfs/file.c | 2 +- fs/ncpfs/inode.c | 2 +- fs/ncpfs/ioctl.c | 2 +- fs/ncpfs/mmap.c | 2 +- fs/ncpfs/ncplib_kernel.h | 2 +- fs/ncpfs/sock.c | 2 +- fs/ncpfs/symlink.c | 2 +- fs/nfs/direct.c | 2 +- fs/nfs/file.c | 2 +- fs/nfs/getroot.c | 2 +- fs/nfs/inode.c | 2 +- fs/nfs/super.c | 2 +- fs/nfs/write.c | 2 +- fs/nfsd/fault_inject.c | 2 +- fs/nfsd/vfs.c | 2 +- fs/ntfs/file.c | 2 +- fs/ocfs2/cluster/masklog.c | 2 +- fs/ocfs2/cluster/tcp.c | 2 +- fs/ocfs2/dlmfs/dlmfs.c | 2 +- fs/ocfs2/stack_user.c | 2 +- fs/open.c | 2 +- fs/openpromfs/inode.c | 2 +- fs/pipe.c | 2 +- fs/proc/base.c | 2 +- fs/proc/generic.c | 2 +- fs/proc/inode.c | 2 +- fs/proc/kcore.c | 2 +- fs/proc/kmsg.c | 2 +- fs/proc/nommu.c | 2 +- fs/proc/page.c | 2 +- fs/proc/proc_net.c | 2 +- fs/proc/proc_tty.c | 2 +- fs/proc/root.c | 2 +- fs/proc/task_mmu.c | 2 +- fs/proc/vmcore.c | 2 +- fs/ramfs/file-nommu.c | 2 +- fs/ramfs/inode.c | 2 +- fs/read_write.c | 2 +- fs/readdir.c | 2 +- fs/select.c | 2 +- fs/seq_file.c | 2 +- fs/stat.c | 2 +- fs/ufs/inode.c | 2 +- fs/ufs/super.c | 2 +- fs/utimes.c | 2 +- fs/xattr.c | 2 +- fs/xfs/xfs_ioctl32.c | 2 +- fs/xfs/xfs_linux.h | 2 +- include/asm-generic/termios-base.h | 2 +- include/asm-generic/termios.h | 2 +- include/drm/drmP.h | 2 +- include/linux/isdnif.h | 2 +- include/linux/pagemap.h | 2 +- include/linux/poll.h | 2 +- include/net/checksum.h | 2 +- include/net/sctp/sctp.h | 2 +- include/rdma/ib_verbs.h | 2 +- init/init_task.c | 2 +- kernel/capability.c | 2 +- kernel/compat.c | 2 +- kernel/configs.c | 2 +- kernel/cpuset.c | 2 +- kernel/exit.c | 2 +- kernel/extable.c | 2 +- kernel/fork.c | 2 +- kernel/futex_compat.c | 2 +- kernel/groups.c | 2 +- kernel/kmod.c | 2 +- kernel/kprobes.c | 2 +- kernel/locking/lockdep_proc.c | 2 +- kernel/module.c | 2 +- kernel/power/snapshot.c | 2 +- kernel/power/user.c | 2 +- kernel/printk/printk.c | 2 +- kernel/profile.c | 2 +- kernel/signal.c | 2 +- kernel/sys.c | 2 +- kernel/sysctl.c | 2 +- kernel/time/hrtimer.c | 2 +- kernel/time/itimer.c | 2 +- kernel/time/posix-cpu-timers.c | 2 +- kernel/time/posix-timers.c | 2 +- kernel/time/time.c | 2 +- kernel/time/timer.c | 2 +- kernel/time/timer_list.c | 2 +- kernel/time/timer_stats.c | 2 +- kernel/uid16.c | 2 +- lib/extable.c | 2 +- lib/kstrtox.c | 2 +- mm/memcontrol.c | 2 +- mm/memory.c | 2 +- mm/mempolicy.c | 2 +- mm/mincore.c | 2 +- mm/mmap.c | 2 +- mm/mprotect.c | 2 +- mm/nommu.c | 2 +- mm/shmem.c | 2 +- mm/util.c | 2 +- mm/vmalloc.c | 2 +- net/802/fc.c | 2 +- net/802/hippi.c | 2 +- net/8021q/vlan.c | 2 +- net/ax25/af_ax25.c | 2 +- net/ax25/ax25_addr.c | 2 +- net/ax25/ax25_dev.c | 2 +- net/ax25/ax25_ds_in.c | 2 +- net/ax25/ax25_ds_subr.c | 2 +- net/ax25/ax25_ds_timer.c | 2 +- net/ax25/ax25_iface.c | 2 +- net/ax25/ax25_in.c | 2 +- net/ax25/ax25_ip.c | 2 +- net/ax25/ax25_out.c | 2 +- net/ax25/ax25_route.c | 2 +- net/ax25/ax25_std_in.c | 2 +- net/ax25/ax25_std_subr.c | 2 +- net/ax25/ax25_std_timer.c | 2 +- net/ax25/ax25_subr.c | 2 +- net/ax25/ax25_timer.c | 2 +- net/ax25/ax25_uid.c | 2 +- net/bridge/br_device.c | 2 +- net/bridge/br_ioctl.c | 2 +- net/bridge/br_netfilter_hooks.c | 2 +- net/bridge/br_netfilter_ipv6.c | 2 +- net/bridge/netfilter/ebtables.c | 2 +- net/compat.c | 2 +- net/core/datagram.c | 2 +- net/core/dev.c | 2 +- net/core/filter.c | 2 +- net/core/gen_estimator.c | 2 +- net/core/rtnetlink.c | 2 +- net/core/scm.c | 2 +- net/core/skbuff.c | 2 +- net/core/sock.c | 2 +- net/core/utils.c | 2 +- net/decnet/dn_dev.c | 2 +- net/decnet/dn_fib.c | 2 +- net/decnet/dn_table.c | 2 +- net/decnet/sysctl_net_decnet.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/devinet.c | 2 +- net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_semantics.c | 2 +- net/ipv4/fib_trie.c | 2 +- net/ipv4/icmp.c | 2 +- net/ipv4/igmp.c | 2 +- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_options.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/ipconfig.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv4/netfilter/arp_tables.c | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/route.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/datagram.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/ip6_flowlabel.c | 2 +- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/ip6mr.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- net/ipv6/route.c | 2 +- net/ipv6/sit.c | 2 +- net/ipv6/udp.c | 2 +- net/ipx/af_ipx.c | 2 +- net/irda/af_irda.c | 2 +- net/irda/ircomm/ircomm_tty.c | 2 +- net/irda/ircomm/ircomm_tty_ioctl.c | 2 +- net/irda/irda_device.c | 2 +- net/irda/irnet/irnet.h | 2 +- net/lapb/lapb_iface.c | 2 +- net/lapb/lapb_in.c | 2 +- net/lapb/lapb_out.c | 2 +- net/lapb/lapb_subr.c | 2 +- net/lapb/lapb_timer.c | 2 +- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- net/netfilter/nfnetlink.c | 2 +- net/netlink/af_netlink.c | 2 +- net/packet/af_packet.c | 2 +- net/rose/af_rose.c | 2 +- net/rose/rose_route.c | 2 +- net/sctp/ipv6.c | 2 +- net/socket.c | 2 +- net/sunrpc/auth_gss/auth_gss.c | 2 +- net/sunrpc/cache.c | 2 +- net/sunrpc/svcsock.c | 2 +- net/sunrpc/sysctl.c | 2 +- net/unix/af_unix.c | 2 +- net/x25/af_x25.c | 2 +- net/x25/x25_link.c | 2 +- net/xfrm/xfrm_state.c | 2 +- net/xfrm/xfrm_user.c | 2 +- security/keys/keyctl.c | 2 +- security/keys/process_keys.c | 2 +- security/keys/request_key_auth.c | 2 +- security/keys/user_defined.c | 2 +- sound/oss/dmasound/dmasound_atari.c | 2 +- sound/oss/dmasound/dmasound_core.c | 2 +- sound/oss/dmasound/dmasound_paula.c | 2 +- sound/oss/dmasound/dmasound_q40.c | 2 +- sound/oss/msnd.c | 2 +- sound/oss/os.h | 2 +- sound/oss/swarm_cs4297a.c | 2 +- virt/kvm/kvm_main.c | 2 +- 1088 files changed, 1088 insertions(+), 1088 deletions(-) (limited to 'include/net') diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c index 3ff9a957a25c..1b568ed74f95 100644 --- a/arch/alpha/boot/misc.c +++ b/arch/alpha/boot/misc.c @@ -21,7 +21,7 @@ #include #include -#include +#include #define memzero(s,n) memset ((s),0,(n)) #define puts srm_printk diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 2d6efcff3bf3..2f26ae74b61a 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -26,7 +26,7 @@ #include #include -#include +#include volatile unsigned long irq_err_count; DEFINE_PER_CPU(unsigned long, irq_pmi_count); diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 56e427c7aa3c..54d8616644e2 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -39,7 +39,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index b483156698d5..bca963a4aa48 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index 04abdec7f496..bc4d2cdcf21d 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 4811e54069fc..491e6a604e82 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -53,7 +53,7 @@ static struct notifier_block alpha_panic_block = { INT_MAX /* try to do it first */ }; -#include +#include #include #include #include diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 8dbfb15f1745..17308f925306 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c index ffe996a54fad..705ae12acd15 100644 --- a/arch/alpha/kernel/srm_env.c +++ b/arch/alpha/kernel/srm_env.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #define BASE_DIR "srm_environment" /* Subdir in /proc/ */ diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 72b59511e59a..e9c45b65a905 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -18,7 +18,7 @@ #include #include -#include +#include static DEFINE_SPINLOCK(srmcons_callback_lock); diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index 5b6202a825ff..992000e3d9e4 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 74aceead06e9..3328af7c2776 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c index b4ff3b683bcd..5dfb7975895f 100644 --- a/arch/alpha/lib/csum_partial_copy.c +++ b/arch/alpha/lib/csum_partial_copy.c @@ -11,7 +11,7 @@ #include #include -#include +#include #define ldq_u(x,y) \ diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c index 58c2669a1dd4..fa5ae0ad8983 100644 --- a/arch/alpha/math-emu/math.c +++ b/arch/alpha/math-emu/math.c @@ -3,7 +3,7 @@ #include #include -#include +#include #include "sfp-util.h" #include diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index a1bea91df56a..0542e973c73d 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c index 6053f64c3752..4c10c6452678 100644 --- a/arch/arm/common/bL_switcher_dummy_if.c +++ b/arch/arm/common/bL_switcher_dummy_if.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include static ssize_t bL_switcher_write(struct file *file, const char __user *buf, diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index c3fe769d7558..853221f81104 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include /* * Error-checking SWP macros implemented using ldrex{b}/strex{b} diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 8f92efa8460e..11676787ad49 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -33,7 +33,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -#include +#include #include #include #include diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 9aca92074f85..fa6182a40941 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-iop13xx/irq.c b/arch/arm/mach-iop13xx/irq.c index c702cc4092de..bd9b43c8004e 100644 --- a/arch/arm/mach-iop13xx/irq.c +++ b/arch/arm/mach-iop13xx/irq.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 26874f608ca9..0f08f611c1a6 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-rpc/dma.c b/arch/arm/mach-rpc/dma.c index 6d3517dc4772..fb48f3141fb4 100644 --- a/arch/arm/mach-rpc/dma.c +++ b/arch/arm/mach-rpc/dma.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c index 101e8f2c7abe..ed8d129d4bea 100644 --- a/arch/arm/plat-iop/time.c +++ b/arch/arm/plat-iop/time.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 2b79b8a89457..b0d708ff7f4e 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -16,7 +16,7 @@ #ifndef __ASM_WORD_AT_A_TIME_H #define __ASM_WORD_AT_A_TIME_H -#include +#include #ifndef __AARCH64EB__ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 04de188a36c9..fde04f029ec3 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #define CREATE_TRACE_POINTS diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 4f0d76339414..a7504f40d7ee 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include /* diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 1decd2b2c730..f0593c92279b 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index b7063de792f7..c747a0fc5d7d 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include struct compat_sigcontext { diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3f9e15722473..b37446a8ffdb 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index d7150e30438a..add4a1334085 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -17,7 +17,7 @@ */ #include -#include +#include .text diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index cfe13396085b..fd6cd05593f9 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -17,7 +17,7 @@ #include #include -#include +#include /* * Copy from user space to a kernel buffer (alignment handled by the hardware) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 718b1c4e2f85..d828540ded6f 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -19,7 +19,7 @@ #include #include -#include +#include /* * Copy from user space to user space (alignment handled by the hardware) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index e99e31c9acac..3e6ae2663b82 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -17,7 +17,7 @@ #include #include -#include +#include /* * Copy to user space from a kernel buffer (alignment handled by the hardware) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index da9576932322..17f422a4dc55 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -23,7 +23,7 @@ #include #include #include -#include +#include /* * flush_icache_range(start,end) diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index b41aff25426d..47cf3f9d89ff 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -49,7 +49,7 @@ #include #include -#include +#include #include diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c index 7c6cf14f0985..0d05fd095468 100644 --- a/arch/avr32/kernel/avr32_ksyms.c +++ b/arch/avr32/kernel/avr32_ksyms.c @@ -12,7 +12,7 @@ #include #include -#include +#include /* * GCC functions diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c index 4aedcab7cd4b..a89b893279bb 100644 --- a/arch/avr32/kernel/ptrace.c +++ b/arch/avr32/kernel/ptrace.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c index 8f1c63b9b983..b5fcc4914fe4 100644 --- a/arch/avr32/kernel/signal.c +++ b/arch/avr32/kernel/signal.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include diff --git a/arch/avr32/mm/cache.c b/arch/avr32/mm/cache.c index 85d635cd7b28..d9476825fc43 100644 --- a/arch/avr32/mm/cache.c +++ b/arch/avr32/mm/cache.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include /* diff --git a/arch/blackfin/kernel/bfin_dma.c b/arch/blackfin/kernel/bfin_dma.c index 4a32f2dd5ddc..9d3eb0cf8ccc 100644 --- a/arch/blackfin/kernel/bfin_dma.c +++ b/arch/blackfin/kernel/bfin_dma.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include /* diff --git a/arch/blackfin/kernel/kgdb_test.c b/arch/blackfin/kernel/kgdb_test.c index 18ab004aea1c..b8b785dc4e3b 100644 --- a/arch/blackfin/kernel/kgdb_test.c +++ b/arch/blackfin/kernel/kgdb_test.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include diff --git a/arch/blackfin/kernel/module.c b/arch/blackfin/kernel/module.c index 4489efc52883..0188c933b155 100644 --- a/arch/blackfin/kernel/module.c +++ b/arch/blackfin/kernel/module.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include /* Transfer the section to the L1 memory */ int diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c index 63f5560d6eb2..4cc72b0d1c1d 100644 --- a/arch/c6x/mm/init.c +++ b/arch/c6x/mm/init.c @@ -18,7 +18,7 @@ #include #include -#include +#include /* * ZERO_PAGE is a special page that is used for zero-initialized diff --git a/arch/cris/arch-v10/drivers/eeprom.c b/arch/cris/arch-v10/drivers/eeprom.c index c903a9e53a47..33558d270a53 100644 --- a/arch/cris/arch-v10/drivers/eeprom.c +++ b/arch/cris/arch-v10/drivers/eeprom.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include "i2c.h" #define D(x) diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c index 0f3983241e60..9ac75d68f184 100644 --- a/arch/cris/arch-v10/drivers/sync_serial.c +++ b/arch/cris/arch-v10/drivers/sync_serial.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c index bfddfb99401f..eca94c7d56e7 100644 --- a/arch/cris/arch-v10/kernel/ptrace.c +++ b/arch/cris/arch-v10/kernel/ptrace.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c index 7122d9773b13..db30c98e4926 100644 --- a/arch/cris/arch-v10/kernel/signal.c +++ b/arch/cris/arch-v10/kernel/signal.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #define DEBUG_SIG 0 diff --git a/arch/cris/arch-v10/kernel/traps.c b/arch/cris/arch-v10/kernel/traps.c index 7001beda716c..96d004fe9740 100644 --- a/arch/cris/arch-v10/kernel/traps.c +++ b/arch/cris/arch-v10/kernel/traps.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c index b964c667aced..1ba7cc000dfc 100644 --- a/arch/cris/arch-v10/lib/usercopy.c +++ b/arch/cris/arch-v10/lib/usercopy.c @@ -8,7 +8,7 @@ * Pieces used from memcpy, originally by Kenny Ranerup long time ago. */ -#include +#include /* Asm:s have been tweaked (within the domain of correctness) to give satisfactory results for "gcc version 2.96 20000427 (experimental)". diff --git a/arch/cris/arch-v10/mm/fault.c b/arch/cris/arch-v10/mm/fault.c index ed60588f8467..75210cbe61ce 100644 --- a/arch/cris/arch-v10/mm/fault.c +++ b/arch/cris/arch-v10/mm/fault.c @@ -11,7 +11,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index 0068fd411a84..ae6903d7fdbe 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index fe1f9cf7b391..c366bc05466a 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c index 150d1d76c29d..816bf2ca93ef 100644 --- a/arch/cris/arch-v32/kernel/signal.c +++ b/arch/cris/arch-v32/kernel/signal.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include extern unsigned long cris_signal_return_page; diff --git a/arch/cris/arch-v32/kernel/traps.c b/arch/cris/arch-v32/kernel/traps.c index 8bbe09c93132..d79666aefd71 100644 --- a/arch/cris/arch-v32/kernel/traps.c +++ b/arch/cris/arch-v32/kernel/traps.c @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c index f0f335d8aa79..05e58dab800d 100644 --- a/arch/cris/arch-v32/lib/usercopy.c +++ b/arch/cris/arch-v32/lib/usercopy.c @@ -8,7 +8,7 @@ * Pieces used from memcpy, originally by Kenny Ranerup long time ago. */ -#include +#include /* Asm:s have been tweaked (within the domain of correctness) to give satisfactory results for "gcc version 3.2.1 Axis release R53/1.53-v32". diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c index 31b4bd288cad..3166d1cf2f84 100644 --- a/arch/cris/kernel/crisksyms.c +++ b/arch/cris/kernel/crisksyms.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index b78498eb079b..50a7dd451456 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/cris/kernel/profile.c b/arch/cris/kernel/profile.c index cd9f15b92f8f..ad56b37f8e11 100644 --- a/arch/cris/kernel/profile.c +++ b/arch/cris/kernel/profile.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #define SAMPLE_BUFFER_SIZE 8192 diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c index fd3427e563c5..806b764059d5 100644 --- a/arch/cris/kernel/ptrace.c +++ b/arch/cris/kernel/ptrace.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c index 7aa036ec78ff..8febb032fdd7 100644 --- a/arch/cris/kernel/sys_cris.c +++ b/arch/cris/kernel/sys_cris.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include asmlinkage long diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c index da4c72401e27..b2a312a7afc6 100644 --- a/arch/cris/kernel/traps.c +++ b/arch/cris/kernel/traps.c @@ -20,7 +20,7 @@ #endif #include -#include +#include #include extern void arch_enable_nmi(void); diff --git a/arch/frv/include/asm/futex.h b/arch/frv/include/asm/futex.h index 4bea27f50a7a..2e1da71e27a4 100644 --- a/arch/frv/include/asm/futex.h +++ b/arch/frv/include/asm/futex.h @@ -5,7 +5,7 @@ #include #include -#include +#include extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr); diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c index 2239346fa3db..93513e4ccd2b 100644 --- a/arch/frv/kernel/irq.c +++ b/arch/frv/kernel/irq.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/frv/kernel/pm-mb93093.c b/arch/frv/kernel/pm-mb93093.c index eaa7b582ef52..8358e34a3fad 100644 --- a/arch/frv/kernel/pm-mb93093.c +++ b/arch/frv/kernel/pm-mb93093.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c index ac767d94a880..051ccecbf7f1 100644 --- a/arch/frv/kernel/pm.c +++ b/arch/frv/kernel/pm.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 5d40aeb7712e..b306241c4ef2 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c index 3987ff88dab0..49768401ce0f 100644 --- a/arch/frv/kernel/ptrace.c +++ b/arch/frv/kernel/ptrace.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c index 82d5e914dc15..bf6e07a7a1b1 100644 --- a/arch/frv/kernel/signal.c +++ b/arch/frv/kernel/signal.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #define DEBUG_SIG 0 diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c index 9c4980825bbb..f80cc8b9bd45 100644 --- a/arch/frv/kernel/sys_frv.c +++ b/arch/frv/kernel/sys_frv.c @@ -25,7 +25,7 @@ #include #include -#include +#include asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, diff --git a/arch/frv/kernel/sysctl.c b/arch/frv/kernel/sysctl.c index f4dfae2c75ad..b54a64971cf1 100644 --- a/arch/frv/kernel/sysctl.c +++ b/arch/frv/kernel/sysctl.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include static const char frv_cache_wback[] = "wback"; static const char frv_cache_wthru[] = "wthru"; diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index a6d105d61b26..31221fb4348e 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/frv/kernel/uaccess.c b/arch/frv/kernel/uaccess.c index 374f88d6cc00..8b360b4222a5 100644 --- a/arch/frv/kernel/uaccess.c +++ b/arch/frv/kernel/uaccess.c @@ -11,7 +11,7 @@ #include #include -#include +#include /*****************************************************************************/ /* diff --git a/arch/frv/mm/dma-alloc.c b/arch/frv/mm/dma-alloc.c index 7a73aaeae3ac..e701aa9e6a14 100644 --- a/arch/frv/mm/dma-alloc.c +++ b/arch/frv/mm/dma-alloc.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include static int map_page(unsigned long va, unsigned long pa, pgprot_t prot) diff --git a/arch/frv/mm/extable.c b/arch/frv/mm/extable.c index 8863d6c1df6e..9a641c1b085a 100644 --- a/arch/frv/mm/extable.c +++ b/arch/frv/mm/extable.c @@ -4,7 +4,7 @@ #include #include -#include +#include extern const void __memset_end, __memset_user_error_lr, __memset_user_error_handler; extern const void __memcpy_end, __memcpy_user_error_lr, __memcpy_user_error_handler; diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c index 9f64fe8f29ff..a947dbb4fd91 100644 --- a/arch/h8300/boot/compressed/misc.c +++ b/arch/h8300/boot/compressed/misc.c @@ -9,7 +9,7 @@ * Adapted for h8300 by Yoshinori Sato 2006 */ -#include +#include /* * gzip declarations diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index dee41256922c..891974a11704 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -38,7 +38,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index 7138303cbbf2..d784f7117f9a 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -41,7 +41,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/hexagon/kernel/hexagon_ksyms.c b/arch/hexagon/kernel/hexagon_ksyms.c index c041d8ecb1e2..af9dec4c28eb 100644 --- a/arch/hexagon/kernel/hexagon_ksyms.c +++ b/arch/hexagon/kernel/hexagon_ksyms.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include /* Additional functions */ EXPORT_SYMBOL(__clear_user_hexagon); diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c index b039a624c170..c6b22b9945a7 100644 --- a/arch/hexagon/kernel/signal.c +++ b/arch/hexagon/kernel/signal.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/hexagon/mm/uaccess.c b/arch/hexagon/mm/uaccess.c index 34127261c2b7..ec90afdb3ad0 100644 --- a/arch/hexagon/mm/uaccess.c +++ b/arch/hexagon/mm/uaccess.c @@ -23,7 +23,7 @@ * we implement here as subroutines. */ #include -#include +#include #include /* diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index bd7c251e2bce..de863d6d802b 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c index 0b286ca164f9..8682df6263d6 100644 --- a/arch/ia64/kernel/brl_emu.c +++ b/arch/ia64/kernel/brl_emu.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include extern char ia64_set_b1, ia64_set_b2, ia64_set_b3, ia64_set_b4, ia64_set_b5; diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c index c8c9298666fb..9c12b794e774 100644 --- a/arch/ia64/kernel/crash_dump.c +++ b/arch/ia64/kernel/crash_dump.c @@ -11,7 +11,7 @@ #include #include -#include +#include /** * copy_oldmem_page - copy one page from "oldmem" diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index 0eaa89f3defd..fa8ee64adac2 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include static struct signal_struct init_signals = INIT_SIGNALS(init_signals); diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index de4fc00dea98..2ff1df7b14ea 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -17,7 +17,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index c7c51445c3be..45ff27e9edbb 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -33,7 +33,7 @@ #include #include -#include +#include extern void jprobe_inst_return(void); diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 2436ad5f92c1..677a86826771 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_PERFMON diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index aae6c4dc7ae7..52deab683ba1 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 36f660da8124..0b1153e610ea 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_PERFMON #include diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index aaf74f36cfa1..d194d5c83d32 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -48,7 +48,7 @@ #include #include -#include +#include MODULE_AUTHOR("Jesse Barnes "); MODULE_DESCRIPTION("/proc interface to IA-64 SAL features"); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index b3a124da71e5..5db52c6813c4 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 41e33f84c185..a09c12230bc5 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -18,7 +18,7 @@ #include #include -#include +#include unsigned long arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len, diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 77edd68c5161..095bfaff82d0 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include fpswa_interface_t *fpswa_interface; diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 7f0d31656b4d..9cd01c2200ee 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include extern int die_if_kernel(char *str, struct pt_regs *regs, long err); diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c index 8f66195999e7..9704e2cd9878 100644 --- a/arch/ia64/kernel/unwind.c +++ b/arch/ia64/kernel/unwind.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include "entry.h" #include "unwind_i.h" diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c index 118daf5a0632..42f7678ef6ad 100644 --- a/arch/ia64/lib/csum_partial_copy.c +++ b/arch/ia64/lib/csum_partial_copy.c @@ -11,7 +11,7 @@ #include #include -#include +#include /* * XXX Fixme: those 2 inlines are meant for debugging and will go away diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c index 8f70bb2d0c37..4edb816aba9a 100644 --- a/arch/ia64/mm/extable.c +++ b/arch/ia64/mm/extable.c @@ -5,7 +5,7 @@ * David Mosberger-Tang */ -#include +#include void ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 1841ef69183d..bb4610faca84 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index b9992571c036..4c3b84d8406a 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -37,7 +37,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c index 7aab87f48060..29cf8f8c08e9 100644 --- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -9,7 +9,7 @@ #ifdef CONFIG_PROC_FS #include #include -#include +#include #include static int partition_id_show(struct seq_file *s, void *p) diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index e35f6485c1fd..32d0380eb72e 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m32r/kernel/align.c b/arch/m32r/kernel/align.c index ab871ccd33f8..ec51e5b34860 100644 --- a/arch/m32r/kernel/align.c +++ b/arch/m32r/kernel/align.c @@ -5,7 +5,7 @@ */ #include -#include +#include static int get_reg(struct pt_regs *regs, int nr) { diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c index c7272b894283..5537f7397297 100644 --- a/arch/m32r/kernel/irq.c +++ b/arch/m32r/kernel/irq.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include /* * do_IRQ handles all normal device IRQs (the special diff --git a/arch/m32r/kernel/m32r_ksyms.c b/arch/m32r/kernel/m32r_ksyms.c index 23f26f4adfff..d763f0bd2106 100644 --- a/arch/m32r/kernel/m32r_ksyms.c +++ b/arch/m32r/kernel/m32r_ksyms.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index a88b1f01e91f..e0568bee60c0 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index c145605a981f..a68acb9fa515 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index 1c81e24fd006..1ed597041fba 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #define DEBUG_SIG 0 diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c index c3fdd632fba7..f34957032504 100644 --- a/arch/m32r/kernel/sys_m32r.c +++ b/arch/m32r/kernel/sys_m32r.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index a7a424f852e4..c3c5fdfae920 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include diff --git a/arch/m32r/lib/csum_partial_copy.c b/arch/m32r/lib/csum_partial_copy.c index 5596f3df833f..b3cd59c12b8e 100644 --- a/arch/m32r/lib/csum_partial_copy.c +++ b/arch/m32r/lib/csum_partial_copy.c @@ -22,7 +22,7 @@ #include #include -#include +#include /* * Copy while checksumming, otherwise like csum_partial diff --git a/arch/m32r/lib/usercopy.c b/arch/m32r/lib/usercopy.c index 82abd159dbef..fd03f2731f20 100644 --- a/arch/m32r/lib/usercopy.c +++ b/arch/m32r/lib/usercopy.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include unsigned long __generic_copy_to_user(void __user *to, const void *from, unsigned long n) diff --git a/arch/m32r/mm/extable.c b/arch/m32r/mm/extable.c index 1743f23d49a3..40ccf80d29cf 100644 --- a/arch/m32r/mm/extable.c +++ b/arch/m32r/mm/extable.c @@ -3,7 +3,7 @@ */ #include -#include +#include int fixup_exception(struct pt_regs *regs) { diff --git a/arch/m32r/mm/fault-nommu.c b/arch/m32r/mm/fault-nommu.c index 80f18cc6f547..e22d5ddae5cb 100644 --- a/arch/m32r/mm/fault-nommu.c +++ b/arch/m32r/mm/fault-nommu.c @@ -22,7 +22,7 @@ #include /* For unblank_screen() */ #include -#include +#include #include #include #include diff --git a/arch/m68k/bvme6000/rtc.c b/arch/m68k/bvme6000/rtc.c index f7984f44ff0f..d53c9b301f84 100644 --- a/arch/m68k/bvme6000/rtc.c +++ b/arch/m68k/bvme6000/rtc.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include /* diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 4ba1ae7345c3..aaf28f8e342d 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c index 1bc10e62b9af..9cd86d7343a6 100644 --- a/arch/m68k/kernel/ptrace.c +++ b/arch/m68k/kernel/ptrace.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 58507edbdf1d..8ead291a902a 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -46,7 +46,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index 9aa01adb407f..98a2daaae30c 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index 6c9ca24830e9..558f38402737 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -32,7 +32,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m68k/lib/uaccess.c b/arch/m68k/lib/uaccess.c index 35d1442dee89..a76b73abaf64 100644 --- a/arch/m68k/lib/uaccess.c +++ b/arch/m68k/lib/uaccess.c @@ -5,7 +5,7 @@ */ #include -#include +#include unsigned long __generic_copy_from_user(void *to, const void __user *from, unsigned long n) diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c index 0fb54a90eac2..c6d351f5bd79 100644 --- a/arch/m68k/mac/misc.c +++ b/arch/m68k/mac/misc.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index b09a3cb29b68..9c1e656b1f8f 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 8f37fdd80be9..7cb72dbc2eaa 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c index 269f81158a33..b5b7d53f7283 100644 --- a/arch/m68k/mm/sun3mmu.c +++ b/arch/m68k/mm/sun3mmu.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m68k/mvme16x/rtc.c b/arch/m68k/mvme16x/rtc.c index 1cdc73268188..8f00847a0e4b 100644 --- a/arch/m68k/mvme16x/rtc.c +++ b/arch/m68k/mvme16x/rtc.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include /* diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c index 3f258e230ba5..0f95134e9b85 100644 --- a/arch/m68k/sun3/mmu_emu.c +++ b/arch/m68k/sun3/mmu_emu.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c index 3074b64793e6..c9939604a38f 100644 --- a/arch/metag/kernel/irq.c +++ b/arch/metag/kernel/irq.c @@ -13,7 +13,7 @@ #include #include -#include +#include #ifdef CONFIG_4KSTACKS union irq_ctx { diff --git a/arch/mips/alchemy/common/power.c b/arch/mips/alchemy/common/power.c index 921ed30b440c..303257b697c2 100644 --- a/arch/mips/alchemy/common/power.c +++ b/arch/mips/alchemy/common/power.c @@ -33,7 +33,7 @@ #include #include -#include +#include #include /* diff --git a/arch/mips/dec/kn01-berr.c b/arch/mips/dec/kn01-berr.c index 44d8a87a8a68..e9d2db480aeb 100644 --- a/arch/mips/dec/kn01-berr.c +++ b/arch/mips/dec/kn01-berr.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h index bce1ce53149a..7749daf2a465 100644 --- a/arch/mips/include/asm/checksum.h +++ b/arch/mips/include/asm/checksum.h @@ -18,7 +18,7 @@ #include -#include +#include /* * computes the checksum of a memory block at buff, length len, diff --git a/arch/mips/include/asm/compat-signal.h b/arch/mips/include/asm/compat-signal.h index 64e0b9343b8c..4c6176467146 100644 --- a/arch/mips/include/asm/compat-signal.h +++ b/arch/mips/include/asm/compat-signal.h @@ -8,7 +8,7 @@ #include #include -#include +#include static inline int __copy_conv_sigset_to_user(compat_sigset_t __user *d, const sigset_t *s) diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h index 667ca3c467b7..b42b513007a2 100644 --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -20,7 +20,7 @@ #include #include #include -#include /* for segment_eq() */ +#include /* for segment_eq() */ extern void (*r4k_blast_dcache)(void); extern void (*r4k_blast_icache)(void); diff --git a/arch/mips/include/asm/termios.h b/arch/mips/include/asm/termios.h index 6245b68a69a8..ce2d72e34274 100644 --- a/arch/mips/include/asm/termios.h +++ b/arch/mips/include/asm/termios.h @@ -9,7 +9,7 @@ #ifndef _ASM_TERMIOS_H #define _ASM_TERMIOS_H -#include +#include #include /* diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index db6f5afff4ff..1900f39588ae 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index 12c718181e5e..ae037a304ee4 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include /* * Calculate and return exception PC in case of branch delay slot diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index dd3175442c9e..07718bb5fc9d 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include /* Hardware capabilities */ unsigned int elf_hwcap __read_mostly; diff --git a/arch/mips/kernel/crash_dump.c b/arch/mips/kernel/crash_dump.c index 6fe7790e5868..77ee99a2d0aa 100644 --- a/arch/mips/kernel/crash_dump.c +++ b/arch/mips/kernel/crash_dump.c @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include static void *kdump_buf_page; diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index f25f7eab7307..f8f5836eb3c1 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -23,7 +23,7 @@ #include #include -#include +#include /* * 'what should we do if we get a hw irq event on an illegal vector'. diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index de63d36af895..1f4bd222ba76 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include static struct hard_trap_info { unsigned char tt; /* Trap type code for MIPS R3xxx and R4xxx */ diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 50fb62544df7..0352f742d077 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -38,7 +38,7 @@ #include #include -#include +#include #include #include diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index 789d7bf4fef3..a12904ea9f65 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include /* * CPU mask used to set process affinity for MT VPEs/TCs with FPUs diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index bd09853aecdf..ef2ca28a028b 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #ifdef CONFIG_64BIT #define ADDIU "daddiu " diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c index e2b6ab74643d..93aeec705a6e 100644 --- a/arch/mips/kernel/mips_ksyms.c +++ b/arch/mips/kernel/mips_ksyms.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 9514e5f2209f..5142b1dfe8a7 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index a92994d60e91..c8ba26072132 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 5fcbdcd7abd0..4f0998525626 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include /* diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index 97b7c51b8251..84165f2b31ff 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include "signal-common.h" diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c index a7bc38430500..b672cebb4a1a 100644 --- a/arch/mips/kernel/signal_n32.c +++ b/arch/mips/kernel/signal_n32.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 53a7ef9a8f32..833f82210528 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include /* diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 3905003dfe2b..6c7f9d7e92b3 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -61,7 +61,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index f1c308dbbc4a..7ed98354fe9d 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -89,7 +89,7 @@ #include #include #include -#include +#include #define STR(x) __STR(x) #define __STR(x) #x diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index f8b7bf836437..a298ac93edcc 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c index 4a094f7acb3d..c4469ff4a996 100644 --- a/arch/mips/math-emu/dsemul.c +++ b/arch/mips/math-emu/dsemul.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include /** * struct emuframe - The 'emulation' frame structure diff --git a/arch/mips/mm/extable.c b/arch/mips/mm/extable.c index e474fa2efed4..81bc8a34a83f 100644 --- a/arch/mips/mm/extable.c +++ b/arch/mips/mm/extable.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include int fixup_exception(struct pt_regs *regs) { diff --git a/arch/mips/mm/sc-debugfs.c b/arch/mips/mm/sc-debugfs.c index 01f1154cdb0c..7e945e310b44 100644 --- a/arch/mips/mm/sc-debugfs.c +++ b/arch/mips/mm/sc-debugfs.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include diff --git a/arch/mips/oprofile/op_model_loongson3.c b/arch/mips/oprofile/op_model_loongson3.c index 85f3ee4ab456..40660392006f 100644 --- a/arch/mips/oprofile/op_model_loongson3.c +++ b/arch/mips/oprofile/op_model_loongson3.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include "op_impl.h" diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c index 712cc0f6a58d..9960a8302eac 100644 --- a/arch/mips/sgi-ip22/ip28-berr.c +++ b/arch/mips/sgi-ip22/ip28-berr.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include static unsigned int count_be_is_fixup; diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index 692778da9e76..2e0edb385656 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include static void dump_hub_information(unsigned long errst0, unsigned long errst1) { diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c index afc1cadbba37..ba8f46d80ab8 100644 --- a/arch/mips/sgi-ip32/ip32-berr.c +++ b/arch/mips/sgi-ip32/ip32-berr.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/sibyte/common/sb_tbprof.c b/arch/mips/sibyte/common/sb_tbprof.c index 059e28c8fd97..99c720be72d2 100644 --- a/arch/mips/sibyte/common/sb_tbprof.c +++ b/arch/mips/sibyte/common/sb_tbprof.c @@ -54,7 +54,7 @@ #define K_INT_PERF_CNT K_BCM1480_INT_PERF_CNT #endif -#include +#include #define SBPROF_TB_MAJOR 240 diff --git a/arch/mn10300/kernel/fpu.c b/arch/mn10300/kernel/fpu.c index 064fa194de2b..2578b7ae7dd5 100644 --- a/arch/mn10300/kernel/fpu.c +++ b/arch/mn10300/kernel/fpu.c @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ -#include +#include #include #include #include diff --git a/arch/mn10300/kernel/mn10300_ksyms.c b/arch/mn10300/kernel/mn10300_ksyms.c index f9eb9753a404..ec6c4f8f93a6 100644 --- a/arch/mn10300/kernel/mn10300_ksyms.c +++ b/arch/mn10300/kernel/mn10300_ksyms.c @@ -9,7 +9,7 @@ * 2 of the Licence, or (at your option) any later version. */ #include -#include +#include #include diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index cbede4e88dee..e5def2217f72 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c index 5bd58514e739..976020f469c1 100644 --- a/arch/mn10300/kernel/ptrace.c +++ b/arch/mn10300/kernel/ptrace.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mn10300/kernel/setup.c b/arch/mn10300/kernel/setup.c index 2ad7f32fa122..1b3d80d8a171 100644 --- a/arch/mn10300/kernel/setup.c +++ b/arch/mn10300/kernel/setup.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c index cd8cb1d1176b..2f3cb5734235 100644 --- a/arch/mn10300/kernel/signal.c +++ b/arch/mn10300/kernel/signal.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include "sigframe.h" diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c index 815f1355fad4..f999981e55c0 100644 --- a/arch/mn10300/kernel/sys_mn10300.c +++ b/arch/mn10300/kernel/sys_mn10300.c @@ -21,7 +21,7 @@ #include #include -#include +#include asmlinkage long old_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, diff --git a/arch/mn10300/lib/checksum.c b/arch/mn10300/lib/checksum.c index b6580f5d89ee..0f569151ef11 100644 --- a/arch/mn10300/lib/checksum.c +++ b/arch/mn10300/lib/checksum.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include "internal.h" diff --git a/arch/mn10300/mm/cache-smp.c b/arch/mn10300/mm/cache-smp.c index 2d23b9eeee62..e80996064d3d 100644 --- a/arch/mn10300/mm/cache-smp.c +++ b/arch/mn10300/mm/cache-smp.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include "cache-smp.h" diff --git a/arch/mn10300/mm/cache.c b/arch/mn10300/mm/cache.c index 0a1f0aa92ebc..0b925cce2b83 100644 --- a/arch/mn10300/mm/cache.c +++ b/arch/mn10300/mm/cache.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include "cache-smp.h" diff --git a/arch/mn10300/mm/extable.c b/arch/mn10300/mm/extable.c index 25e5485ab87d..305de461cb8f 100644 --- a/arch/mn10300/mm/extable.c +++ b/arch/mn10300/mm/extable.c @@ -10,7 +10,7 @@ */ #include #include -#include +#include int fixup_exception(struct pt_regs *regs) { diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c index 97a1ec0beeec..8ce677d5575e 100644 --- a/arch/mn10300/mm/init.c +++ b/arch/mn10300/mm/init.c @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/mn10300/mm/misalignment.c b/arch/mn10300/mm/misalignment.c index b9920b1edd5a..31d04da85743 100644 --- a/arch/mn10300/mm/misalignment.c +++ b/arch/mn10300/mm/misalignment.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mn10300/proc-mn2ws0050/proc-init.c b/arch/mn10300/proc-mn2ws0050/proc-init.c index 950cc8dbb284..25b1b453c515 100644 --- a/arch/mn10300/proc-mn2ws0050/proc-init.c +++ b/arch/mn10300/proc-mn2ws0050/proc-init.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c index 81f7da7b1d55..72ed30a93c85 100644 --- a/arch/nios2/kernel/traps.c +++ b/arch/nios2/kernel/traps.c @@ -19,7 +19,7 @@ #include #include -#include +#include static DEFINE_SPINLOCK(die_lock); diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c index 83ccf7c0c58d..86e31cf1de1d 100644 --- a/arch/openrisc/kernel/or32_ksyms.c +++ b/arch/openrisc/kernel/or32_ksyms.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 277123bb4bf8..d7990df9025a 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -36,7 +36,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index c82be69b43c6..265f10fb3930 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #define DEBUG_SIG 0 diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 3d3f6062f49c..a4574cb4b0fb 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index e94cd225e816..b1a7435e786a 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 78d30d2ea2d8..1c4fe61a592b 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #ifdef CONFIG_64BIT #define FRAME_SIZE 128 diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 3cad8aadc69e..7484b3d11e0d 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -43,7 +43,7 @@ EXPORT_SYMBOL(__xchg64); EXPORT_SYMBOL(__cmpxchg_u64); #endif -#include +#include EXPORT_SYMBOL(lclear_user); EXPORT_SYMBOL(lstrnlen_user); diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index b6298a85e8ae..697c53543a4d 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -33,7 +33,7 @@ #include #include /* get_order */ #include -#include +#include #include /* for purge_tlb_*() macros */ static struct proc_dir_entry * proc_gsc_root __read_mostly = NULL; diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index 6eabce62463b..e282a5131d77 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -48,7 +48,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index e02d7b4d2b69..f8b6959d2d97 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 2264f68f3c2f..e58925ac64d1 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c index c342b2e17492..70aaabb8b3cb 100644 --- a/arch/parisc/kernel/signal32.c +++ b/arch/parisc/kernel/signal32.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include "signal32.h" diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index a81e177cac7b..bf3294171230 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -23,7 +23,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include +#include #include #include #include diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 4215f5596c8b..037d81f00520 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 2b65c0177778..0a21067ac0a3 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index e278a87f43cc..1b73690477c5 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c index ae66d31f9ecf..ba6384da6ade 100644 --- a/arch/parisc/lib/checksum.c +++ b/arch/parisc/lib/checksum.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #define addc(_t,_r) \ __asm__ __volatile__ ( \ diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 81592562e0f8..ba47c70712f9 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 033f3385fa49..8d58c61908f7 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index cfa0f81a5bb0..d10ad258d41a 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #ifdef DEBUG diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 03d089b3ed72..4d3aa05e28be 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include /* * Stores the breakpoints currently in use on each breakpoint address diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 3c05c311e35e..a018f5cae899 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -55,7 +55,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index ad108b842669..735ff3d3f77d 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 30b89d5cbb03..3f7ba0f5bf29 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 34d2c595de23..d5e2b8309939 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 678f87a63645..41c86c6b6e4d 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #undef DEBUG diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index c30612aad68e..56548bf6231f 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index b1ec62f2cc31..e4744ff38a17 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 1e887f3a61a6..f37eb53de1a1 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index c82eed97bd22..df56dfc4b681 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 6a3e5de544ce..112cc3b2ee1a 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index db2b482af658..f6f6a8a5103a 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #define MODULE_VERS "1.0" diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index a26a02006576..2bf1f9b5b34b 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 5fe79182f0fa..7fcf1f7f01c1 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index bbe77aed198d..3a3671172436 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 27aa913ac91d..97bb1385e771 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -37,7 +37,7 @@ #include #endif -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 96698fdf93b4..c83c115858c1 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 8a285876aef8..15f216d022e2 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -44,7 +44,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 644cce3d8dce..de04c9fbb5cd 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -36,7 +36,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index be9751f1cb2a..19397e2a8bf5 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -64,7 +64,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 4239aaf74886..e6cc56b61d01 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -40,7 +40,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index c4bfadb2606b..2d8f6d8ccafc 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include /* Functions in vector.S */ extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index b6952dd23152..019f008775b9 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8dcbe37a4dac..66b2a35be424 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 826c541a12af..1482961ceb4d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 02176fd52f84..f102616febc7 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -17,7 +17,7 @@ #include -#include +#include #include #include diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index ef27fbd5d9c5..20528701835b 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 3bdc639157c1..20dff102a06f 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index df3f2706d3e5..0514cbd4e533 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -30,7 +30,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index ed38f8114118..fe312c160d97 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index efd1183a6b16..cd892dec7cb6 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c index 08e3a3356c40..a0cb63fb76a1 100644 --- a/arch/powerpc/lib/checksum_wrappers.c +++ b/arch/powerpc/lib/checksum_wrappers.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index d5edbeb8eb82..c1746df0f88e 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include int patch_instruction(unsigned int *addr, unsigned int instr) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 9c78a9c102c3..06c7e9b88408 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/lib/usercopy_64.c b/arch/powerpc/lib/usercopy_64.c index 5eea6f3c1e03..9bd3a3dad78d 100644 --- a/arch/powerpc/lib/usercopy_64.c +++ b/arch/powerpc/lib/usercopy_64.c @@ -7,7 +7,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { diff --git a/arch/powerpc/math-emu/fabs.c b/arch/powerpc/math-emu/fabs.c index 549baba5948f..a5e7ad1384ee 100644 --- a/arch/powerpc/math-emu/fabs.c +++ b/arch/powerpc/math-emu/fabs.c @@ -1,6 +1,6 @@ #include #include -#include +#include int fabs(u32 *frD, u32 *frB) diff --git a/arch/powerpc/math-emu/fadd.c b/arch/powerpc/math-emu/fadd.c index 0158a16e2b82..29de37e0e0da 100644 --- a/arch/powerpc/math-emu/fadd.c +++ b/arch/powerpc/math-emu/fadd.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fadds.c b/arch/powerpc/math-emu/fadds.c index 5930f40a8687..7093c5b58002 100644 --- a/arch/powerpc/math-emu/fadds.c +++ b/arch/powerpc/math-emu/fadds.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fcmpo.c b/arch/powerpc/math-emu/fcmpo.c index 5bce011c2aec..5d644467221c 100644 --- a/arch/powerpc/math-emu/fcmpo.c +++ b/arch/powerpc/math-emu/fcmpo.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fcmpu.c b/arch/powerpc/math-emu/fcmpu.c index d4fb1babc6ad..0f9bf4864832 100644 --- a/arch/powerpc/math-emu/fcmpu.c +++ b/arch/powerpc/math-emu/fcmpu.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fctiw.c b/arch/powerpc/math-emu/fctiw.c index f694440ddc00..716d6da7f204 100644 --- a/arch/powerpc/math-emu/fctiw.c +++ b/arch/powerpc/math-emu/fctiw.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fctiwz.c b/arch/powerpc/math-emu/fctiwz.c index 71e782fd4fe3..7212fa7cfd36 100644 --- a/arch/powerpc/math-emu/fctiwz.c +++ b/arch/powerpc/math-emu/fctiwz.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fdiv.c b/arch/powerpc/math-emu/fdiv.c index a29239c05e3e..e1e452069e49 100644 --- a/arch/powerpc/math-emu/fdiv.c +++ b/arch/powerpc/math-emu/fdiv.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fdivs.c b/arch/powerpc/math-emu/fdivs.c index 526bc261275f..5511e2d1c3ad 100644 --- a/arch/powerpc/math-emu/fdivs.c +++ b/arch/powerpc/math-emu/fdivs.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fmadd.c b/arch/powerpc/math-emu/fmadd.c index 8c3f20aa5a95..2b6fae0bc8c2 100644 --- a/arch/powerpc/math-emu/fmadd.c +++ b/arch/powerpc/math-emu/fmadd.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fmadds.c b/arch/powerpc/math-emu/fmadds.c index 794fb31e59d1..aff35f24a236 100644 --- a/arch/powerpc/math-emu/fmadds.c +++ b/arch/powerpc/math-emu/fmadds.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fmr.c b/arch/powerpc/math-emu/fmr.c index bd55384b8196..f6347911f6a3 100644 --- a/arch/powerpc/math-emu/fmr.c +++ b/arch/powerpc/math-emu/fmr.c @@ -1,6 +1,6 @@ #include #include -#include +#include int fmr(u32 *frD, u32 *frB) diff --git a/arch/powerpc/math-emu/fmsub.c b/arch/powerpc/math-emu/fmsub.c index 626f6fed84ac..1fb26cebe04e 100644 --- a/arch/powerpc/math-emu/fmsub.c +++ b/arch/powerpc/math-emu/fmsub.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fmsubs.c b/arch/powerpc/math-emu/fmsubs.c index 3425bc899760..f73965453e05 100644 --- a/arch/powerpc/math-emu/fmsubs.c +++ b/arch/powerpc/math-emu/fmsubs.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fmul.c b/arch/powerpc/math-emu/fmul.c index 2c1929779892..ffd31b549290 100644 --- a/arch/powerpc/math-emu/fmul.c +++ b/arch/powerpc/math-emu/fmul.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fmuls.c b/arch/powerpc/math-emu/fmuls.c index f5ad5c9c77d0..21aee431ca9d 100644 --- a/arch/powerpc/math-emu/fmuls.c +++ b/arch/powerpc/math-emu/fmuls.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fnabs.c b/arch/powerpc/math-emu/fnabs.c index a7d34f3d9499..af877a53d264 100644 --- a/arch/powerpc/math-emu/fnabs.c +++ b/arch/powerpc/math-emu/fnabs.c @@ -1,6 +1,6 @@ #include #include -#include +#include int fnabs(u32 *frD, u32 *frB) diff --git a/arch/powerpc/math-emu/fneg.c b/arch/powerpc/math-emu/fneg.c index 1e988cd9c6cc..8417d174758c 100644 --- a/arch/powerpc/math-emu/fneg.c +++ b/arch/powerpc/math-emu/fneg.c @@ -1,6 +1,6 @@ #include #include -#include +#include int fneg(u32 *frD, u32 *frB) diff --git a/arch/powerpc/math-emu/fnmadd.c b/arch/powerpc/math-emu/fnmadd.c index e817bc5453ef..6316ef0e0874 100644 --- a/arch/powerpc/math-emu/fnmadd.c +++ b/arch/powerpc/math-emu/fnmadd.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fnmadds.c b/arch/powerpc/math-emu/fnmadds.c index 4db4b7d9ba8d..9ffe037df2b9 100644 --- a/arch/powerpc/math-emu/fnmadds.c +++ b/arch/powerpc/math-emu/fnmadds.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fnmsub.c b/arch/powerpc/math-emu/fnmsub.c index f65979fa770e..f97a9cfb54ea 100644 --- a/arch/powerpc/math-emu/fnmsub.c +++ b/arch/powerpc/math-emu/fnmsub.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fnmsubs.c b/arch/powerpc/math-emu/fnmsubs.c index 9021dacc03b8..7fa1217bd930 100644 --- a/arch/powerpc/math-emu/fnmsubs.c +++ b/arch/powerpc/math-emu/fnmsubs.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fre.c b/arch/powerpc/math-emu/fre.c index 49ccf2cc6a5a..b621a790aa67 100644 --- a/arch/powerpc/math-emu/fre.c +++ b/arch/powerpc/math-emu/fre.c @@ -1,6 +1,6 @@ #include #include -#include +#include int fre(void *frD, void *frB) { diff --git a/arch/powerpc/math-emu/fres.c b/arch/powerpc/math-emu/fres.c index 10ecbd08b79e..211c30d0145f 100644 --- a/arch/powerpc/math-emu/fres.c +++ b/arch/powerpc/math-emu/fres.c @@ -1,6 +1,6 @@ #include #include -#include +#include int fres(void *frD, void *frB) diff --git a/arch/powerpc/math-emu/frsp.c b/arch/powerpc/math-emu/frsp.c index ddcc14664b1a..3e3bc73e27ae 100644 --- a/arch/powerpc/math-emu/frsp.c +++ b/arch/powerpc/math-emu/frsp.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/frsqrte.c b/arch/powerpc/math-emu/frsqrte.c index 1d0a3a0fd0e6..7c2ce43750dc 100644 --- a/arch/powerpc/math-emu/frsqrte.c +++ b/arch/powerpc/math-emu/frsqrte.c @@ -1,6 +1,6 @@ #include #include -#include +#include int frsqrte(void *frD, void *frB) diff --git a/arch/powerpc/math-emu/frsqrtes.c b/arch/powerpc/math-emu/frsqrtes.c index 7e838e380314..269951a8c650 100644 --- a/arch/powerpc/math-emu/frsqrtes.c +++ b/arch/powerpc/math-emu/frsqrtes.c @@ -1,6 +1,6 @@ #include #include -#include +#include int frsqrtes(void *frD, void *frB) { diff --git a/arch/powerpc/math-emu/fsel.c b/arch/powerpc/math-emu/fsel.c index 1b0c14498032..32b62c6c7f48 100644 --- a/arch/powerpc/math-emu/fsel.c +++ b/arch/powerpc/math-emu/fsel.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fsqrt.c b/arch/powerpc/math-emu/fsqrt.c index a55fc7d49983..0e2a34b616dc 100644 --- a/arch/powerpc/math-emu/fsqrt.c +++ b/arch/powerpc/math-emu/fsqrt.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fsqrts.c b/arch/powerpc/math-emu/fsqrts.c index 31dccbfc39ff..420cf19b5fd4 100644 --- a/arch/powerpc/math-emu/fsqrts.c +++ b/arch/powerpc/math-emu/fsqrts.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fsub.c b/arch/powerpc/math-emu/fsub.c index 02c5dff458ba..feedd705cf62 100644 --- a/arch/powerpc/math-emu/fsub.c +++ b/arch/powerpc/math-emu/fsub.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/fsubs.c b/arch/powerpc/math-emu/fsubs.c index 5d9b18c35e07..74190514063e 100644 --- a/arch/powerpc/math-emu/fsubs.c +++ b/arch/powerpc/math-emu/fsubs.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/lfd.c b/arch/powerpc/math-emu/lfd.c index 79ac76d596c3..d998a50740a0 100644 --- a/arch/powerpc/math-emu/lfd.c +++ b/arch/powerpc/math-emu/lfd.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/lfs.c b/arch/powerpc/math-emu/lfs.c index 434ed27be8db..1ee10b83d7e3 100644 --- a/arch/powerpc/math-emu/lfs.c +++ b/arch/powerpc/math-emu/lfs.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c index ab151f040502..76ee2e5dba65 100644 --- a/arch/powerpc/math-emu/math.c +++ b/arch/powerpc/math-emu/math.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c index 28337c9709ae..581f404caa1d 100644 --- a/arch/powerpc/math-emu/math_efp.c +++ b/arch/powerpc/math-emu/math_efp.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #define FP_EX_BOOKE_E500_SPE diff --git a/arch/powerpc/math-emu/mcrfs.c b/arch/powerpc/math-emu/mcrfs.c index e948d5708e2b..8e8e72397ebc 100644 --- a/arch/powerpc/math-emu/mcrfs.c +++ b/arch/powerpc/math-emu/mcrfs.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/mffs.c b/arch/powerpc/math-emu/mffs.c index 5526cf96ede5..e00fdc22a0bc 100644 --- a/arch/powerpc/math-emu/mffs.c +++ b/arch/powerpc/math-emu/mffs.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/mtfsb0.c b/arch/powerpc/math-emu/mtfsb0.c index bc985585bca8..5ed3e7d5063e 100644 --- a/arch/powerpc/math-emu/mtfsb0.c +++ b/arch/powerpc/math-emu/mtfsb0.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/mtfsb1.c b/arch/powerpc/math-emu/mtfsb1.c index fe6ed5ac85b3..602aa16eda81 100644 --- a/arch/powerpc/math-emu/mtfsb1.c +++ b/arch/powerpc/math-emu/mtfsb1.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/mtfsf.c b/arch/powerpc/math-emu/mtfsf.c index 44b0fc8214f4..b0d5593ad357 100644 --- a/arch/powerpc/math-emu/mtfsf.c +++ b/arch/powerpc/math-emu/mtfsf.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/mtfsfi.c b/arch/powerpc/math-emu/mtfsfi.c index fd2acc26813b..5df30541a784 100644 --- a/arch/powerpc/math-emu/mtfsfi.c +++ b/arch/powerpc/math-emu/mtfsfi.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/math-emu/stfd.c b/arch/powerpc/math-emu/stfd.c index 33a165c8df0f..6baeaec134a2 100644 --- a/arch/powerpc/math-emu/stfd.c +++ b/arch/powerpc/math-emu/stfd.c @@ -1,6 +1,6 @@ #include #include -#include +#include int stfd(void *frS, void *ea) diff --git a/arch/powerpc/math-emu/stfiwx.c b/arch/powerpc/math-emu/stfiwx.c index f15a35f67e2c..9da7c5d1a872 100644 --- a/arch/powerpc/math-emu/stfiwx.c +++ b/arch/powerpc/math-emu/stfiwx.c @@ -1,6 +1,6 @@ #include #include -#include +#include int stfiwx(u32 *frS, void *ea) diff --git a/arch/powerpc/math-emu/stfs.c b/arch/powerpc/math-emu/stfs.c index 6122147356d1..62bd25264fb5 100644 --- a/arch/powerpc/math-emu/stfs.c +++ b/arch/powerpc/math-emu/stfs.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 31a5d42df8c9..61ac468c87c6 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 139dec421e57..080d49b26c3a 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 8410b4bb36ed..80334937e14f 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a000c3585390..93abf8a9813d 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -51,7 +51,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index d5543514c1df..5c096c01e8bd 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include /* diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 85c85eb3e245..e5a891ae80ee 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -30,7 +30,7 @@ #include #include -#include +#include #include "spufs.h" diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 3a147122bc98..a35e2c29d7ee 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include "spufs.h" #include "sputrace.h" diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 5364d4a54249..d8af9bc0489f 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include "spufs.h" diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index a87200a535fa..0d290ea83dc1 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include "spufs.h" diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c index 9ef8cc3378d0..c3ede2c365c3 100644 --- a/arch/powerpc/platforms/chrp/nvram.c +++ b/arch/powerpc/platforms/chrp/nvram.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index f2344cbd2f46..ecd6d9177d13 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include struct elog_obj { diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index e4169d68cb32..4886eb8b6381 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include static int opal_lpc_chip_id = -1; diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index e315e704cca7..2d6ee1c5ad85 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include /** diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 972328829387..4839db385bb0 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 76caa4a45ccd..5cb2e4beffc5 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include static struct workqueue_struct *pseries_hp_wq; diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 39049e4884fb..6b04e3f0f982 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index e6397976060e..779fc2a1c8f7 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 79aef8c1c5b3..69cedc1b3b8a 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index cc66c49f07aa..e5bf1e84047f 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include "of_helpers.h" diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index 7d28cabf1206..c47585a78b69 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c index 6f5a8d177c42..d0e9f178a324 100644 --- a/arch/powerpc/sysdev/scom.c +++ b/arch/powerpc/sysdev/scom.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include const struct scom_controller *scom_controller; EXPORT_SYMBOL_GPL(scom_controller); diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c index 53a16aa4d384..5692dd569b9b 100644 --- a/arch/powerpc/sysdev/tsi108_pci.c +++ b/arch/powerpc/sysdev/tsi108_pci.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index f587c4811faf..5a8dfa22da7c 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 4da604ebf6fd..8515dd5a5663 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -6,7 +6,7 @@ * Author(s): Martin Schwidefsky */ -#include +#include #include #include #include diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 1113389d0a39..daf9bb063aaa 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index d7f100c53f07..12bf4fef2a68 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -11,7 +11,7 @@ #ifndef _S390_CHECKSUM_H #define _S390_CHECKSUM_H -#include +#include /* * computes the checksum of a memory block at buff, length len, diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h index a7b2d7504049..280b60a0bcd4 100644 --- a/arch/s390/include/asm/idals.h +++ b/arch/s390/include/asm/idals.h @@ -17,7 +17,7 @@ #include #include #include -#include +#include #define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */ #define IDA_BLOCK_SIZE (1L< -#include +#include #include #include diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 0f9cd90c11af..96df4547377a 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -51,7 +51,7 @@ #include #include -#include +#include #include #include diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 6f2a6ab13cb5..362350cc485c 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include "compat_linux.h" diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index aa12de72fd47..79f8ae933520 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index c74c59236f44..9f017cf417f6 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index fdb40424acfe..84e0557b16fe 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include DEFINE_PER_CPU(struct kprobe *, current_kprobe); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index b81ab8882e2e..7447ba509c30 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include "entry.h" diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 9f241d1efeda..62a4c263e887 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include "entry.h" diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index f145490cce54..b7af452978ca 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include "entry.h" /* diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 867d0a057046..ec76315c9ee5 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index d0539f76fd24..283ad7840335 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include "entry.h" diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index af13f1a135b6..6843dd5a1cba 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index b3e9d18f2ec6..b67454ad8408 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/score/include/asm/checksum.h b/arch/score/include/asm/checksum.h index 539d9fd45d21..0338927f4826 100644 --- a/arch/score/include/asm/checksum.h +++ b/arch/score/include/asm/checksum.h @@ -2,7 +2,7 @@ #define _ASM_SCORE_CHECKSUM_H #include -#include +#include /* * computes the checksum of a memory block at buff, length len, diff --git a/arch/score/kernel/ptrace.c b/arch/score/kernel/ptrace.c index 4f7314d5f334..8b75e54816c1 100644 --- a/arch/score/kernel/ptrace.c +++ b/arch/score/kernel/ptrace.c @@ -29,7 +29,7 @@ #include #include -#include +#include /* * retrieve the contents of SCORE userspace general registers diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index 5cea1e750cec..d948a6818961 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include unsigned long exception_handlers[32]; diff --git a/arch/score/lib/checksum_copy.c b/arch/score/lib/checksum_copy.c index 9b770b30e8a5..39b99ef61804 100644 --- a/arch/score/lib/checksum_copy.c +++ b/arch/score/lib/checksum_copy.c @@ -25,7 +25,7 @@ #include -#include +#include unsigned int csum_partial_copy(const char *src, char *dst, int len, unsigned int sum) diff --git a/arch/sh/boards/mach-landisk/gio.c b/arch/sh/boards/mach-landisk/gio.c index 8132dff078fb..32c317f5d991 100644 --- a/arch/sh/boards/mach-landisk/gio.c +++ b/arch/sh/boards/mach-landisk/gio.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c index 208a9753ab38..ae1dfdb0013b 100644 --- a/arch/sh/boot/compressed/misc.c +++ b/arch/sh/boot/compressed/misc.c @@ -11,7 +11,7 @@ * Modified to use standard LinuxSH BIOS by Greg Banks 7Jul2000 */ -#include +#include #include #include diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h index 9f417feaf6e8..35ffdd081d26 100644 --- a/arch/sh/include/asm/mmu_context.h +++ b/arch/sh/include/asm/mmu_context.h @@ -10,7 +10,7 @@ #ifdef __KERNEL__ #include #include -#include +#include #include #include diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c index c8b3be1b54e6..c4f01c5c8736 100644 --- a/arch/sh/kernel/cpu/init.c +++ b/arch/sh/kernel/cpu/init.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c index 53b8eeb1db20..c32e66079f7c 100644 --- a/arch/sh/kernel/cpu/shmobile/cpuidle.c +++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include static unsigned long cpuidle_mode[] = { SUSP_SH_SLEEP, /* regular sleep mode */ diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c index ac37b7234f85..fba2be5d72e9 100644 --- a/arch/sh/kernel/cpu/shmobile/pm.c +++ b/arch/sh/kernel/cpu/shmobile/pm.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/kernel/crash_dump.c b/arch/sh/kernel/crash_dump.c index 569e7b171c01..b33be505361e 100644 --- a/arch/sh/kernel/crash_dump.c +++ b/arch/sh/kernel/crash_dump.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include /** * copy_oldmem_page - copy one page from "oldmem" diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c index f8ce36286cea..4d4e7a2a774b 100644 --- a/arch/sh/kernel/io_trapped.c +++ b/arch/sh/kernel/io_trapped.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 6c0378c0b8b5..bc3591125df7 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c index 83acbf3f6de8..1653ff64b103 100644 --- a/arch/sh/kernel/kprobes.c +++ b/arch/sh/kernel/kprobes.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index ee12e9451874..51741850a715 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 9d3e9916555d..e0b271bffd6a 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index c1a6b89bfe70..1aabfd356b35 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c index 5cea973a65b2..c49d0d05a215 100644 --- a/arch/sh/kernel/ptrace_64.c +++ b/arch/sh/kernel/ptrace_64.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index e7b49d81053e..3a44c753b642 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c index 26a0774f5272..6ee3740e009e 100644 --- a/arch/sh/kernel/sh_ksyms_64.c +++ b/arch/sh/kernel/sh_ksyms_64.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index f7c3d5c25caf..5128d3001ee5 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c index d8a3f0d22809..7b77f1812434 100644 --- a/arch/sh/kernel/signal_64.c +++ b/arch/sh/kernel/signal_64.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c index 8c6a350df751..6576e5ee1fc3 100644 --- a/arch/sh/kernel/sys_sh.c +++ b/arch/sh/kernel/sys_sh.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index b66d1c62eb19..d5287d76809c 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c index d208c27ccc67..00835edb6e20 100644 --- a/arch/sh/kernel/traps_64.c +++ b/arch/sh/kernel/traps_64.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index 04aa55fa8c75..5078cb809750 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c index 777e50f33c00..4eb9d43578b4 100644 --- a/arch/sh/mm/cache-debugfs.c +++ b/arch/sh/mm/cache-debugfs.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/mm/cache-sh3.c b/arch/sh/mm/cache-sh3.c index e37523f65195..031634f273fa 100644 --- a/arch/sh/mm/cache-sh3.c +++ b/arch/sh/mm/cache-sh3.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c index d1bffbcd9d52..d94dadedf74f 100644 --- a/arch/sh/mm/cache-sh5.c +++ b/arch/sh/mm/cache-sh5.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include extern void __weak sh4__flush_region_init(void); diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c index 7729cca727eb..6cd2aa395817 100644 --- a/arch/sh/mm/cache-sh7705.c +++ b/arch/sh/mm/cache-sh7705.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/mm/extable_32.c b/arch/sh/mm/extable_32.c index c1cf4463d09d..9cfcbb5848e4 100644 --- a/arch/sh/mm/extable_32.c +++ b/arch/sh/mm/extable_32.c @@ -5,7 +5,7 @@ */ #include -#include +#include int fixup_exception(struct pt_regs *regs) { diff --git a/arch/sh/mm/extable_64.c b/arch/sh/mm/extable_64.c index f05499688d88..96edaff8c983 100644 --- a/arch/sh/mm/extable_64.c +++ b/arch/sh/mm/extable_64.c @@ -12,7 +12,7 @@ */ #include #include -#include +#include extern unsigned long copy_user_memcpy, copy_user_memcpy_end; extern void __copy_user_fixup(void); diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c index 36312d254faf..82f8197b93f6 100644 --- a/arch/sh/mm/nommu.c +++ b/arch/sh/mm/nommu.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include /* * Nothing too terribly exciting here .. diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c index 7160c9fd6fe3..7b2cc490ebb7 100644 --- a/arch/sh/mm/pmb.c +++ b/arch/sh/mm/pmb.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c index 6554fb439f0e..5c66665bff8b 100644 --- a/arch/sh/mm/tlb-sh3.c +++ b/arch/sh/mm/tlb-sh3.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sh/mm/tlbex_64.c b/arch/sh/mm/tlbex_64.c index 8557548fc53e..8ff966dd0c74 100644 --- a/arch/sh/mm/tlbex_64.c +++ b/arch/sh/mm/tlbex_64.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c index f33fdd2558e8..bd0715d5dca4 100644 --- a/arch/sh/mm/tlbflush_64.c +++ b/arch/sh/mm/tlbflush_64.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/oprofile/backtrace.c b/arch/sh/oprofile/backtrace.c index 9c88dcd56e86..c7695f99c8c3 100644 --- a/arch/sh/oprofile/backtrace.c +++ b/arch/sh/oprofile/backtrace.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/include/asm/checksum_32.h b/arch/sparc/include/asm/checksum_32.h index eff748c871ec..e25af5fc99fd 100644 --- a/arch/sparc/include/asm/checksum_32.h +++ b/arch/sparc/include/asm/checksum_32.h @@ -16,7 +16,7 @@ */ #include -#include +#include /* computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) diff --git a/arch/sparc/include/asm/checksum_64.h b/arch/sparc/include/asm/checksum_64.h index 0395d75322e9..96a5ed58cea6 100644 --- a/arch/sparc/include/asm/checksum_64.h +++ b/arch/sparc/include/asm/checksum_64.h @@ -16,7 +16,7 @@ */ #include -#include +#include /* computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c index 742f6c4436bf..9ebc37e7d64c 100644 --- a/arch/sparc/kernel/apc.c +++ b/arch/sparc/kernel/apc.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 34a7930b76ef..3bebf395252c 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c index b0377db12d83..2d13a4fc0384 100644 --- a/arch/sparc/kernel/kprobes.c +++ b/arch/sparc/kernel/kprobes.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include /* We do not have hardware single-stepping on sparc64. * So we implement software single-stepping with breakpoint diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 8a6982dfd733..c0765bbf60ea 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 9c1878f4fa9f..015e55a7495d 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index 24384e1dc33d..a38787b84322 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include "kernel.h" diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c index 97d123107ecb..f12b23f7b515 100644 --- a/arch/sparc/kernel/pmc.c +++ b/arch/sparc/kernel/pmc.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index b7780a5bef11..48ffc3e7d1dd 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 47ff5588e521..d249ca10b203 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -33,7 +33,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c index a331fdc11a2c..eca3dc76793c 100644 --- a/arch/sparc/kernel/ptrace_32.c +++ b/arch/sparc/kernel/ptrace_32.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include "kernel.h" diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 96494b2ef41f..901063c1cf7e 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 91cc2f4ae4d9..b4096bb665b2 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 9c0c8fd0b292..62c3e255ae7c 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index c782c9b716db..965d50e833e7 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 8182f7caf5b1..0ce347f8e4cc 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index 022c30c72ebd..bca44f3e6b86 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -44,7 +44,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index 646988d4c1a3..fb7b185ee941 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include "systbls.h" diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index fe8b8ee8e660..884c70331345 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index c69b21e51efc..807f7e2ce014 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include "entry.h" diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 496fa926e1e0..4bc10e44d1ca 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index 32b61d1b6379..d20d4e3fd129 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index 52c00d90d4b4..cda7fd367c4f 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/uprobes.c b/arch/sparc/kernel/uprobes.c index b68314050602..d852ae56ddc1 100644 --- a/arch/sparc/kernel/uprobes.c +++ b/arch/sparc/kernel/uprobes.c @@ -29,7 +29,7 @@ #include #include -#include +#include /* Compute the address of the breakpoint instruction and return it. * diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c index c096c624ac4d..c4ac58e483a4 100644 --- a/arch/sparc/kernel/visemul.c +++ b/arch/sparc/kernel/visemul.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include /* OPF field of various VIS instructions. */ diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c index 87bab0a3857a..435a467b0595 100644 --- a/arch/sparc/kernel/windows.c +++ b/arch/sparc/kernel/windows.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include "kernel.h" diff --git a/arch/sparc/math-emu/math_32.c b/arch/sparc/math-emu/math_32.c index 5ce8f2f64604..4d7e0fff054f 100644 --- a/arch/sparc/math-emu/math_32.c +++ b/arch/sparc/math-emu/math_32.c @@ -68,7 +68,7 @@ #include #include #include -#include +#include #include "sfp-util_32.h" #include diff --git a/arch/sparc/math-emu/math_64.c b/arch/sparc/math-emu/math_64.c index 034aadbff036..9647051853d3 100644 --- a/arch/sparc/math-emu/math_64.c +++ b/arch/sparc/math-emu/math_64.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include "sfp-util_64.h" diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c index a61c349448e1..768a11e6bd4f 100644 --- a/arch/sparc/mm/extable.c +++ b/arch/sparc/mm/extable.c @@ -3,7 +3,7 @@ */ #include -#include +#include void sort_extable(struct exception_table_entry *start, struct exception_table_entry *finish) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 37aa537b3ad8..5d2f91511c60 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 9f37106ef93a..c84c54a1ac55 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #ifdef CONFIG_HARDWALL #include #endif diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 862973074bf9..de3eae813e52 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c index 4fe78c5b8394..f229e979584e 100644 --- a/arch/tile/kernel/unaligned.c +++ b/arch/tile/kernel/unaligned.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c index 3282787bbcfb..6d381279b362 100644 --- a/arch/um/drivers/harddog_kern.c +++ b/arch/um/drivers/harddog_kern.c @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include "mconsole.h" MODULE_LICENSE("GPL"); diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c index 3a4b58730f5f..12bdb5996bf5 100644 --- a/arch/um/drivers/hostaudio_kern.c +++ b/arch/um/drivers/hostaudio_kern.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 8a6b57108ac2..8a4c72af3bc0 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c index 62145c276167..3645fcb2a787 100644 --- a/arch/um/drivers/mmapper_kern.c +++ b/arch/um/drivers/mmapper_kern.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include /* These are set in mmapper_init, which is called at boot time */ diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index dd16c902ff70..05523f14d7b2 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 0d7103c9eff3..770ec07b6a6a 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c index 41ebbfebb333..546302e3b7fb 100644 --- a/arch/um/kernel/exitcode.c +++ b/arch/um/kernel/exitcode.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include /* * If read and write race, the read will still atomically read a valid diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 034b42c7ab40..078630d6448c 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 6a826cbb15c4..bc2a516c190f 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include void user_enable_single_step(struct task_struct *child) diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index c1d0ae069b53..6258676bed85 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include long old_mmap(unsigned long addr, unsigned long len, diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index bdd9cc59d20f..b83c61cfd154 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #define CREATE_TRACE_POINTS diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index cb26f18d43af..7c0a711989d2 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index cb13c0564ea7..95c0b4ae09b0 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 719cd702b0a4..47956c6a4fd8 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 44b8762fa0c7..830b19dbfa31 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h index c020ee75dce7..08e7efb2c140 100644 --- a/arch/x86/include/asm/checksum_64.h +++ b/arch/x86/include/asm/checksum_64.h @@ -8,7 +8,7 @@ */ #include -#include +#include #include /** diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index f5fb840b43e8..33cbd3db97b9 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 643818a7688b..45d44c173cf9 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -234,7 +234,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index c7efbcfbeda6..87cc9ab7a13c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "mce-internal.h" diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index 11891ca7b716..538fedea9b3f 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c @@ -10,7 +10,7 @@ #include #include -#include +#include static void *kdump_buf_page; diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c index f6dfd9334b67..b2f7207ba86c 100644 --- a/arch/x86/kernel/doublefault.c +++ b/arch/x86/kernel/doublefault.c @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index d9d8d16b69db..eb3509338ae0 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -56,7 +56,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 3bb4c5f021f6..3d1bee9d6a72 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 37363e46b1f0..b615a1113f58 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0e63c0267f99..9cc7d5a330ef 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c index 27538f183c3b..a3b875c9e6af 100644 --- a/arch/x86/kernel/test_nx.c +++ b/arch/x86/kernel/test_nx.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include extern int rodata_test_data; diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 9692a5e9fdab..6c8934406dc9 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 01f30e56f99e..ec5d7545e6dc 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -47,7 +47,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 0b281217c890..1f65ff6540f0 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c index 9e6545f269e5..2ccc424a57d9 100644 --- a/arch/x86/math-emu/errors.c +++ b/arch/x86/math-emu/errors.c @@ -19,7 +19,7 @@ #include -#include +#include #include "fpu_emu.h" #include "fpu_system.h" diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index e945fedf1de2..0203baefb5c0 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index 8db26591d91a..b8ef9f9d2ffc 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -19,7 +19,7 @@ #include -#include +#include #include #include "fpu_system.h" diff --git a/arch/x86/math-emu/load_store.c b/arch/x86/math-emu/load_store.c index 95228ff042c0..1643054766eb 100644 --- a/arch/x86/math-emu/load_store.c +++ b/arch/x86/math-emu/load_store.c @@ -18,7 +18,7 @@ | other processes using the emulator while swapping is in progress. | +---------------------------------------------------------------------------*/ -#include +#include #include "fpu_system.h" #include "exception.h" diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c index d597fe7423c9..2c98965a60ba 100644 --- a/arch/x86/math-emu/reg_ld_str.c +++ b/arch/x86/math-emu/reg_ld_str.c @@ -19,7 +19,7 @@ #include "fpu_emu.h" -#include +#include #include "fpu_system.h" #include "exception.h" diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index fcd06f7526de..61a7e9ea9aa1 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index cf8059016ec8..928d657de829 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 963895f9af7f..af85b686a7b0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -36,7 +36,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e3353c97d086..5a287e523eab 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index 60a5a5a85505..2497bac56066 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index e30202b1716e..a5c9910d234f 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -10,7 +10,7 @@ #include #define __FRAME_OFFSETS #include -#include +#include #include /* diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 49e503697022..727ed442e0a5 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c index 48e38584d5c1..5bd949da7a4a 100644 --- a/arch/x86/um/tls_32.c +++ b/arch/x86/um/tls_32.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 37129db76d33..276da636dd39 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -71,7 +71,7 @@ #include #include -#include +#include #include #include diff --git a/arch/xtensa/include/asm/checksum.h b/arch/xtensa/include/asm/checksum.h index ec35074fcb03..3ae74d7e074b 100644 --- a/arch/xtensa/include/asm/checksum.h +++ b/arch/xtensa/include/asm/checksum.h @@ -12,7 +12,7 @@ #define _XTENSA_CHECKSUM_H #include -#include +#include #include /* diff --git a/arch/xtensa/include/asm/segment.h b/arch/xtensa/include/asm/segment.h index a2eb547a1a75..98964ad15ca2 100644 --- a/arch/xtensa/include/asm/segment.h +++ b/arch/xtensa/include/asm/segment.h @@ -11,6 +11,6 @@ #ifndef _XTENSA_SEGMENT_H #define _XTENSA_SEGMENT_H -#include +#include #endif /* _XTENSA_SEGEMENT_H */ diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index 8e10e357ee32..bcb5beb81177 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c @@ -24,7 +24,7 @@ #include #include -#include +#include int main(void) { diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index 4ac3d23161cf..a265edd6ac37 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include DECLARE_PER_CPU(unsigned long, nmi_count); diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index e0ded48561db..826d25104846 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -35,7 +35,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index a651f3a628ee..32519b71d914 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include void user_enable_single_step(struct task_struct *child) diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index e87adaa07ff3..c41294745731 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c index 7538d802b65a..e7d30ee0fd48 100644 --- a/arch/xtensa/kernel/stacktrace.c +++ b/arch/xtensa/kernel/stacktrace.c @@ -14,7 +14,7 @@ #include #include -#include +#include #if IS_ENABLED(CONFIG_OPROFILE) || IS_ENABLED(CONFIG_PERF_EVENTS) diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c index 83cf49685373..d3fd100dffc9 100644 --- a/arch/xtensa/kernel/syscall.c +++ b/arch/xtensa/kernel/syscall.c @@ -15,7 +15,7 @@ * Kevin Chea * */ -#include +#include #include #include #include diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index ce37d5b899fe..282bf721a4d6 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 4d2872fd9bb5..d159e9b9c018 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index c68f1e6158aa..0140a22551c8 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index ede04cca30dd..02e94bb3ad3e 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #define SIMDISK_MAJOR 240 diff --git a/block/ioctl.c b/block/ioctl.c index 656c8c6ed206..be7f4de3eb3c 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg) { diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c index 47a61474e795..14b081af8d61 100644 --- a/block/partitions/ibm.c +++ b/block/partitions/ibm.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include "check.h" diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index c6fee7437be4..c2b64923ab66 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 201292e67ee8..d00bc0ef87a6 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #define PREFIX "ACPI: " diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 05fe9ebfb9b5..4ef1e4624b2b 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -36,7 +36,7 @@ #ifdef CONFIG_ACPI_PROCFS_POWER #include #include -#include +#include #endif #include diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index a404ff4d7151..57fb5f468ac2 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -42,7 +42,7 @@ #include #include -#include +#include #include #include "internal.h" diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c index 2a358154b770..a34669cc823b 100644 --- a/drivers/acpi/proc.c +++ b/drivers/acpi/proc.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include "sleep.h" #include "internal.h" diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index 1fed84a092c2..59c3a5d1e600 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #define PREFIX "ACPI: " diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index d51ca1c05619..a12f96cc93ff 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #define PREFIX "ACPI: " diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 35e8fbca10ad..1d0417b87cb7 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #define PREFIX "ACPI: " diff --git a/drivers/atm/adummy.c b/drivers/atm/adummy.c index f9b983ae6877..1fd25e872ece 100644 --- a/drivers/atm/adummy.c +++ b/drivers/atm/adummy.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index 480fa6ffbc09..3ef6253e1cce 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 40c2d561417b..c53a9dd1353f 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index 85aaf2222587..80c2ddcfa92c 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -52,7 +52,7 @@ #include #include #include -#include +#include #include #include "firestream.h" diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 81aaa505862c..637c3e6b0f9e 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_SBUS diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 31b513a23ae0..3617659b9184 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -71,7 +71,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 5fc81e240c24..584aa881882b 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -45,7 +45,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/atm/idt77105.c b/drivers/atm/idt77105.c index feb023d7eebd..082aa02abc57 100644 --- a/drivers/atm/idt77105.c +++ b/drivers/atm/idt77105.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include "idt77105.h" diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 074616b39f4d..471ddfd93ea8 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -45,7 +45,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index b2756765950e..8640bafeb471 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -58,7 +58,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index c7296b583787..cb28579e8a94 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #include #include "nicstar.h" diff --git a/drivers/atm/suni.c b/drivers/atm/suni.c index 02159345566c..b0363149b2fd 100644 --- a/drivers/atm/suni.c +++ b/drivers/atm/suni.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include "suni.h" diff --git a/drivers/atm/uPD98402.c b/drivers/atm/uPD98402.c index 5120a96b3a89..4fa13a807873 100644 --- a/drivers/atm/uPD98402.c +++ b/drivers/atm/uPD98402.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include "uPD98402.h" diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index d3dc95484161..292dec18ffb8 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include "uPD98401.h" #include "uPD98402.h" diff --git a/drivers/base/memory.c b/drivers/base/memory.c index bb69e58c29f3..8ab8ea1253e6 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -23,7 +23,7 @@ #include #include -#include +#include static DEFINE_MUTEX(mem_sysfs_mutex); diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 0809cda93cc0..26a51be77227 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include "DAC960.h" #define DAC960_GAM_MINOR 252 diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 5fd50a284168..a328f673adfe 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -70,7 +70,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/block/brd.c b/drivers/block/brd.c index ad793f35632c..3adc32a3153b 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -23,7 +23,7 @@ #include #endif -#include +#include #define SECTOR_SHIFT 9 #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index db9d6bb6352d..e5c5b8eb14a9 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c index 3d31761c0ed0..74e03aa537ad 100644 --- a/drivers/block/cryptoloop.c +++ b/drivers/block/cryptoloop.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include "loop.h" MODULE_LICENSE("GPL"); diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 3abb121825bc..a9b48ed7a3cd 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -45,7 +45,7 @@ #define REALLY_SLOW_IO #include -#include +#include #ifdef __arm__ #undef HD_IRQ diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 4af818766797..f347285c67ec 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -78,7 +78,7 @@ #include #include "loop.h" -#include +#include static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_index_mutex); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 99c84468f154..38c576f76d36 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -36,7 +36,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 93362362aa55..5fd2d0e25567 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -139,7 +139,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY}; #include #include #include -#include +#include static DEFINE_MUTEX(pcd_mutex); static DEFINE_SPINLOCK(pcd_lock); diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 78a39f736c64..c3ed2fc72daa 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -155,7 +155,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV}; #include #include #include -#include +#include #include static DEFINE_MUTEX(pd_mutex); diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 7a7d977a76c5..ed93e8badf56 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -155,7 +155,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_LUN, D_DLY}; #include #include #include -#include +#include static DEFINE_MUTEX(pf_mutex); static DEFINE_SPINLOCK(pf_spin_lock); diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c index bfbd4c852dd9..5db955fe3a94 100644 --- a/drivers/block/paride/pg.c +++ b/drivers/block/paride/pg.c @@ -166,7 +166,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY}; #include #include -#include +#include module_param(verbose, int, 0644); module_param(major, int, 0); diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 216a94fed5b4..61fc6824299a 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -150,7 +150,7 @@ static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3}; #include /* current, TASK_*, schedule_timeout() */ #include -#include +#include module_param(verbose, int, 0); module_param(major, int, 0); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 95c98de92971..1b94c1ca5c5f 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -68,7 +68,7 @@ #include #include -#include +#include #define DRIVER_NAME "pktcdvd" diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index c264f2d284a7..aabd8e9d3035 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index ba4bfe933276..0e93ad7b8511 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #if 0 #define CARM_DEBUG diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 46f4c719fed9..c141cc3be22b 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -54,7 +54,7 @@ #include "umem.h" -#include +#include #include #define MM_MAXCARDS 4 diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 5d475b3a0b2e..59cca72647a6 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -282,7 +282,7 @@ #include #include -#include +#include /* used to tell the module to turn on full debugging messages */ static bool debug; diff --git a/drivers/char/agp/compat_ioctl.c b/drivers/char/agp/compat_ioctl.c index a48e05b31593..2053f70ef66b 100644 --- a/drivers/char/agp/compat_ioctl.c +++ b/drivers/char/agp/compat_ioctl.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "agp.h" #include "compat_ioctl.h" diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c index 0f64d149c98d..f6955888e676 100644 --- a/drivers/char/agp/frontend.c +++ b/drivers/char/agp/frontend.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include "agp.h" diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c index 14790304b84b..e5c62dcf2c11 100644 --- a/drivers/char/applicom.c +++ b/drivers/char/applicom.c @@ -34,7 +34,7 @@ #include #include -#include +#include #include "applicom.h" diff --git a/drivers/char/bfin-otp.c b/drivers/char/bfin-otp.c index 35d46da754d7..0584025bb0c2 100644 --- a/drivers/char/bfin-otp.c +++ b/drivers/char/bfin-otp.c @@ -20,7 +20,7 @@ #include #include -#include +#include #define stamp(fmt, args...) pr_debug("%s:%i: " fmt "\n", __func__, __LINE__, ## args) #define stampit() stamp("here i am") diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c index 0fae5296e311..eb53cbadb68f 100644 --- a/drivers/char/ds1620.c +++ b/drivers/char/ds1620.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #ifdef CONFIG_PROC_FS diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c index 65a8d96c0e93..58471394beb9 100644 --- a/drivers/char/dtlk.c +++ b/drivers/char/dtlk.c @@ -59,7 +59,7 @@ #include #include #include /* for inb_p, outb_p, inb, outb, etc. */ -#include /* for get_user, etc. */ +#include /* for get_user, etc. */ #include /* for wait_queue */ #include /* for __init, module_{init,exit} */ #include /* for POLLIN, etc. */ diff --git a/drivers/char/generic_nvram.c b/drivers/char/generic_nvram.c index 073db9558379..14e728fbb8a0 100644 --- a/drivers/char/generic_nvram.c +++ b/drivers/char/generic_nvram.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_PPC_PMAC #include diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c index a7c5c59675f0..4f337375252e 100644 --- a/drivers/char/hangcheck-timer.c +++ b/drivers/char/hangcheck-timer.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index f9766415ff10..6ce5ce8be2f2 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #define RNG_MODULE_NAME "hw_random" diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 4facc7517a6a..4035495f3a86 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/char/lp.c b/drivers/char/lp.c index c4094c4e22c1..5b6742770656 100644 --- a/drivers/char/lp.c +++ b/drivers/char/lp.c @@ -134,7 +134,7 @@ #include #include -#include +#include /* if you have more than 8 printers, remember to increase LP_NO */ #define LP_NO 8 diff --git a/drivers/char/mbcs.c b/drivers/char/mbcs.c index 67d426470e53..8c9216a0f62e 100644 --- a/drivers/char/mbcs.c +++ b/drivers/char/mbcs.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c index 3d6c0671e996..f786b18ac500 100644 --- a/drivers/char/mmtimer.c +++ b/drivers/char/mmtimer.c @@ -35,7 +35,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/char/mwave/3780i.c b/drivers/char/mwave/3780i.c index 972c40a19629..4a8937f80570 100644 --- a/drivers/char/mwave/3780i.c +++ b/drivers/char/mwave/3780i.c @@ -54,7 +54,7 @@ #include /* cond_resched() */ #include -#include +#include #include #include "smapi.h" #include "mwavedd.h" diff --git a/drivers/char/mwave/mwavedd.h b/drivers/char/mwave/mwavedd.h index 37e0a4926080..21cb09c7bed7 100644 --- a/drivers/char/mwave/mwavedd.h +++ b/drivers/char/mwave/mwavedd.h @@ -53,7 +53,7 @@ #include "smapi.h" #include "mwavepub.h" #include -#include +#include #include extern int mwave_debug; diff --git a/drivers/char/nsc_gpio.c b/drivers/char/nsc_gpio.c index b07b119ae57f..2a91bf048804 100644 --- a/drivers/char/nsc_gpio.c +++ b/drivers/char/nsc_gpio.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #define NAME "nsc_gpio" diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c index 0e184426db98..a5b1eb276c0b 100644 --- a/drivers/char/nwbutton.c +++ b/drivers/char/nwbutton.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c index dbe598de9b74..a284ae25e69a 100644 --- a/drivers/char/nwflash.c +++ b/drivers/char/nwflash.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include /*****************************************************************************/ #include diff --git a/drivers/char/pc8736x_gpio.c b/drivers/char/pc8736x_gpio.c index 3f79a9fb6b1b..5f4be88c0dfc 100644 --- a/drivers/char/pc8736x_gpio.c +++ b/drivers/char/pc8736x_gpio.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #define DEVNAME "pc8736x_gpio" diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c index fc061f7c2bd1..d7123259143e 100644 --- a/drivers/char/pcmcia/cm4040_cs.c +++ b/drivers/char/pcmcia/cm4040_cs.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index a7dd5f4f2c5a..d136db1a10f0 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -84,7 +84,7 @@ #define PUT_USER(error,value,addr) error = put_user(value,addr) #define COPY_TO_USER(error,dest,src,size) error = copy_to_user(dest,src,size) ? -EFAULT : 0 -#include +#include static MGSL_PARAMS default_params = { MGSL_MODE_HDLC, /* unsigned long mode */ diff --git a/drivers/char/random.c b/drivers/char/random.c index d6876d506220..1ef26403bcc8 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -265,7 +265,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/char/raw.c b/drivers/char/raw.c index e83b2adc014a..293167c6e254 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -24,7 +24,7 @@ #include #include -#include +#include struct raw_device_data { struct block_device *binding; diff --git a/drivers/char/scx200_gpio.c b/drivers/char/scx200_gpio.c index 0bc135b9b16f..903761bc41c9 100644 --- a/drivers/char/scx200_gpio.c +++ b/drivers/char/scx200_gpio.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c index 719c5b4eed39..4fa7fcd8af36 100644 --- a/drivers/char/sonypi.c +++ b/drivers/char/sonypi.c @@ -52,7 +52,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/char/tlclk.c b/drivers/char/tlclk.c index 100cd1de9939..572a51704e67 100644 --- a/drivers/char/tlclk.c +++ b/drivers/char/tlclk.c @@ -44,7 +44,7 @@ #include #include #include /* inb/outb */ -#include +#include MODULE_AUTHOR("Sebastien Bouchard "); MODULE_LICENSE("GPL"); diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c index f5a45d887a37..5488516da8ea 100644 --- a/drivers/char/toshiba.c +++ b/drivers/char/toshiba.c @@ -63,7 +63,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c index c07dfe5c4da3..3e6b23c3453c 100644 --- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c +++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c @@ -88,7 +88,7 @@ #include #include -#include +#include #ifdef CONFIG_OF /* For open firmware. */ diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c index 759612da4fdc..e28a31a40829 100644 --- a/drivers/cpufreq/ia64-acpi-cpufreq.c +++ b/drivers/cpufreq/ia64-acpi-cpufreq.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index fe8f08948fcb..ac321f09e717 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -19,7 +19,7 @@ #include #include -#include +#include #define PROMOTION_COUNT 4 #define DEMOTION_COUNT 1 diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c index 55dd88d82d6d..830184529109 100644 --- a/drivers/dio/dio.c +++ b/drivers/dio/dio.c @@ -31,7 +31,7 @@ #include #include #include /* kmalloc() */ -#include +#include #include /* readb() */ struct dio_bus dio_bus = { diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index de4d5d08af9e..65cf2b9355c4 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -13,7 +13,7 @@ */ #include -#include +#include #include #include #include diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 5f2c717f8053..750891ea07de 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include "edac_mc.h" #include "edac_module.h" diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index 4e9d5632041a..48c844a72a27 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -10,7 +10,7 @@ * */ #include -#include +#include #include #include #include diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 3de95a29024c..cf9e396d7702 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -30,7 +30,7 @@ #define pr_fmt(fmt) "i2c-core: " fmt #include -#include +#include #include #include #include diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c index 0ceae5cbd89a..4b5dc0162e67 100644 --- a/drivers/ide/hpt366.c +++ b/drivers/ide/hpt366.c @@ -130,7 +130,7 @@ #include #include -#include +#include #include #define DRV_NAME "hpt366" diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 83679da0c3f0..5ceace542b77 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 6360bbd37efe..201e43fcbc94 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -51,7 +51,7 @@ #include #include -#include +#include #include int ide_end_rq(ide_drive_t *drive, struct request *rq, int error, diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 376f2dc410c5..210a0887dd29 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include void SELECT_MASK(ide_drive_t *drive, int mask) diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 0b63facd1d87..330e319419e6 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -36,7 +36,7 @@ #include #include -#include +#include #include /** diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index 97c070077774..863db44c7916 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c @@ -16,7 +16,7 @@ #include -#include +#include #include #include #include diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 579f9a7f6283..e0a995b85a2d 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -46,7 +46,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 415a3185cde7..249b403b43a4 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -50,7 +50,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 09b649159e6c..700782203483 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -38,7 +38,7 @@ #include #include -#include +#include #include "uverbs.h" #include "core_priv.h" diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 813593550c4b..b3f95d453fba 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -46,7 +46,7 @@ #include #include -#include +#include #include diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index a2f9f29c6ab5..fd811115af49 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -35,7 +35,7 @@ #include #include -#include +#include #include "ipoib.h" diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 64b3d11dcf1e..9104e6b8cac9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -62,7 +62,7 @@ #include -#include +#include #include #include diff --git a/drivers/input/input-compat.c b/drivers/input/input-compat.c index d84d20b9cec0..2186f71c9fe5 100644 --- a/drivers/input/input-compat.c +++ b/drivers/input/input-compat.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include "input-compat.h" #ifdef CONFIG_COMPAT diff --git a/drivers/input/misc/atlas_btns.c b/drivers/input/misc/atlas_btns.c index 638165c78e75..6423aaccc763 100644 --- a/drivers/input/misc/atlas_btns.c +++ b/drivers/input/misc/atlas_btns.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #define ACPI_ATLAS_NAME "Atlas ACPI" #define ACPI_ATLAS_CLASS "Atlas" diff --git a/drivers/input/mouse/amimouse.c b/drivers/input/mouse/amimouse.c index a7fd8f22ba56..a33437c480e3 100644 --- a/drivers/input/mouse/amimouse.c +++ b/drivers/input/mouse/amimouse.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/input/mouse/atarimouse.c b/drivers/input/mouse/atarimouse.c index d1c43236b125..96f2f51604bd 100644 --- a/drivers/input/mouse/atarimouse.c +++ b/drivers/input/mouse/atarimouse.c @@ -47,7 +47,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 354d47ecd66a..7331084973e1 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "psmouse.h" #include "trackpoint.h" diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 852858e5d8d0..559c99ca6592 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -79,7 +79,7 @@ # define sdc_readb(p) gsc_readb(p) # define sdc_writeb(v,p) gsc_writeb((v),(p)) #elif defined(__mc68000__) -# include +#include # define sdc_readb(p) in_8(p) # define sdc_writeb(v,p) out_8((p),(v)) #else diff --git a/drivers/input/serio/q40kbd.c b/drivers/input/serio/q40kbd.c index 5a9d521510bf..d0fccc8ec259 100644 --- a/drivers/input/serio/q40kbd.c +++ b/drivers/input/serio/q40kbd.c @@ -38,7 +38,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c index d189843f3727..f8ead9f9c77e 100644 --- a/drivers/input/serio/serport.c +++ b/drivers/input/serio/serport.c @@ -13,7 +13,7 @@ * the Free Software Foundation. */ -#include +#include #include #include #include diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c index 4613f0aefd08..d67547bded3e 100644 --- a/drivers/input/tablet/aiptek.c +++ b/drivers/input/tablet/aiptek.c @@ -75,7 +75,7 @@ #include #include #include -#include +#include #include /* diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c index abf09ac42ce4..b796e891e2ee 100644 --- a/drivers/input/tablet/gtco.c +++ b/drivers/input/tablet/gtco.c @@ -56,7 +56,7 @@ Scott Hill shill@gtcocalcomp.com #include #include #include -#include +#include #include #include #include diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index 823f6985b260..49d0f70c2bae 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #ifdef AVMB1_COMPAT diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c index 4d9b195547c5..9fdbd99c7547 100644 --- a/drivers/isdn/hardware/avm/b1.c +++ b/drivers/isdn/hardware/avm/b1.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include "avmcard.h" diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c index 19b113faeb7b..818bd8f231db 100644 --- a/drivers/isdn/hardware/avm/b1dma.c +++ b/drivers/isdn/hardware/avm/b1dma.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include "avmcard.h" diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c index 5d00d72fe482..17beb2869dc1 100644 --- a/drivers/isdn/hardware/avm/c4.c +++ b/drivers/isdn/hardware/avm/c4.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/isdn/hardware/eicon/capimain.c b/drivers/isdn/hardware/eicon/capimain.c index 997d46abf5b2..be36d82004d6 100644 --- a/drivers/isdn/hardware/eicon/capimain.c +++ b/drivers/isdn/hardware/eicon/capimain.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/isdn/hardware/eicon/divamnt.c b/drivers/isdn/hardware/eicon/divamnt.c index 0de29b7b712f..72e58bf07577 100644 --- a/drivers/isdn/hardware/eicon/divamnt.c +++ b/drivers/isdn/hardware/eicon/divamnt.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "platform.h" #include "di_defs.h" diff --git a/drivers/isdn/hardware/eicon/divasi.c b/drivers/isdn/hardware/eicon/divasi.c index 4103a8c178d7..cb88090f9cea 100644 --- a/drivers/isdn/hardware/eicon/divasi.c +++ b/drivers/isdn/hardware/eicon/divasi.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include "platform.h" #include "di_defs.h" diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c index 32f34511c416..8b7ad4f1ab01 100644 --- a/drivers/isdn/hardware/eicon/divasmain.c +++ b/drivers/isdn/hardware/eicon/divasmain.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/isdn/hardware/eicon/divasproc.c b/drivers/isdn/hardware/eicon/divasproc.c index 56ce98a4e248..b57efd6ad916 100644 --- a/drivers/isdn/hardware/eicon/divasproc.c +++ b/drivers/isdn/hardware/eicon/divasproc.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include "platform.h" #include "debuglib.h" diff --git a/drivers/isdn/hysdn/hysdn_boot.c b/drivers/isdn/hysdn/hysdn_boot.c index eda4741e3f2f..4a0425378f37 100644 --- a/drivers/isdn/hysdn/hysdn_boot.c +++ b/drivers/isdn/hysdn/hysdn_boot.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include "hysdn_defs.h" #include "hysdn_pof.h" diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 9e385b38debf..ac219045daf7 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include "lg.h" diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index e3abebc912c0..0bc127e9f16a 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include "lg.h" /*M:008 diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 743253fc638f..d71f6323ac00 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include #include "../lg.h" diff --git a/drivers/macintosh/ans-lcd.c b/drivers/macintosh/ans-lcd.c index cd35079c8c98..281fa9e6fc1f 100644 --- a/drivers/macintosh/ans-lcd.c +++ b/drivers/macintosh/ans-lcd.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 08edb2c25b60..227869159ac0 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #define VERSION "0.7" #define AUTHOR "(c) 2005 Benjamin Herrenschmidt, IBM Corp." diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 91081dcdc272..43b8db2b5445 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -57,7 +57,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c index a00ee41f0573..a411c5cb77a1 100644 --- a/drivers/macintosh/via-pmu68k.c +++ b/drivers/macintosh/via-pmu68k.c @@ -38,7 +38,7 @@ #include #include -#include +#include /* Misc minor number allocated for /dev/pmu */ #define PMU_MINOR 154 diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index c72a77048b73..a5a9b17f0f7f 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -17,7 +17,7 @@ #include #include -#include +#include #define DM_MSG_PREFIX "ioctl" #define DM_DRIVER_EMAIL "dm-devel@redhat.com" diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index efe55a3e80d0..0c44479b556e 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include "dmxdev.h" static int debug; diff --git a/drivers/media/dvb-core/dvb_demux.c b/drivers/media/dvb-core/dvb_demux.c index 3ad0b2cd26b1..bbbff72bbb2a 100644 --- a/drivers/media/dvb-core/dvb_demux.c +++ b/drivers/media/dvb-core/dvb_demux.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include "dvb_demux.h" diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c index dfc03a95df71..bc5e8cfe7ca2 100644 --- a/drivers/media/dvb-core/dvb_net.c +++ b/drivers/media/dvb-core/dvb_net.c @@ -62,7 +62,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/dvb-core/dvb_ringbuffer.c b/drivers/media/dvb-core/dvb_ringbuffer.c index 7df7fb3738a0..5c4b5a1f604f 100644 --- a/drivers/media/dvb-core/dvb_ringbuffer.c +++ b/drivers/media/dvb-core/dvb_ringbuffer.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "dvb_ringbuffer.h" diff --git a/drivers/media/i2c/adv7170.c b/drivers/media/i2c/adv7170.c index 05f1dc6c72af..fc9ec0f3679c 100644 --- a/drivers/media/i2c/adv7170.c +++ b/drivers/media/i2c/adv7170.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/i2c/adv7175.c b/drivers/media/i2c/adv7175.c index f554809a51e7..72139bdae1ca 100644 --- a/drivers/media/i2c/adv7175.c +++ b/drivers/media/i2c/adv7175.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/i2c/bt856.c b/drivers/media/i2c/bt856.c index 48176591a80d..54c627859c8e 100644 --- a/drivers/media/i2c/bt856.c +++ b/drivers/media/i2c/bt856.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/i2c/bt866.c b/drivers/media/i2c/bt866.c index bbec70c882a3..0d3f46af2545 100644 --- a/drivers/media/i2c/bt866.c +++ b/drivers/media/i2c/bt866.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/i2c/cs53l32a.c b/drivers/media/i2c/cs53l32a.c index e4b3cf49dd38..59c1a98c5a90 100644 --- a/drivers/media/i2c/cs53l32a.c +++ b/drivers/media/i2c/cs53l32a.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/i2c/m52790.c b/drivers/media/i2c/m52790.c index 81171d8e1c2c..89c28c36c5bf 100644 --- a/drivers/media/i2c/m52790.c +++ b/drivers/media/i2c/m52790.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/i2c/saa6588.c b/drivers/media/i2c/saa6588.c index 89e458c23983..00640233a5e3 100644 --- a/drivers/media/i2c/saa6588.c +++ b/drivers/media/i2c/saa6588.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/media/i2c/saa7110.c b/drivers/media/i2c/saa7110.c index 6f49886806ee..ad456ce051f9 100644 --- a/drivers/media/i2c/saa7110.c +++ b/drivers/media/i2c/saa7110.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/i2c/saa7185.c b/drivers/media/i2c/saa7185.c index eecad2d1edce..119050e1197a 100644 --- a/drivers/media/i2c/saa7185.c +++ b/drivers/media/i2c/saa7185.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/i2c/tlv320aic23b.c b/drivers/media/i2c/tlv320aic23b.c index 2e06c06cac9b..cc6104da34ef 100644 --- a/drivers/media/i2c/tlv320aic23b.c +++ b/drivers/media/i2c/tlv320aic23b.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/i2c/vp27smpx.c b/drivers/media/i2c/vp27smpx.c index d6c23bdbcd4a..ef0d8b8e3df7 100644 --- a/drivers/media/i2c/vp27smpx.c +++ b/drivers/media/i2c/vp27smpx.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/i2c/vpx3220.c b/drivers/media/i2c/vpx3220.c index 90b693f4e2ab..ce9f09370e22 100644 --- a/drivers/media/i2c/vpx3220.c +++ b/drivers/media/i2c/vpx3220.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/i2c/wm8739.c b/drivers/media/i2c/wm8739.c index f086e5e6e844..c885def54b15 100644 --- a/drivers/media/i2c/wm8739.c +++ b/drivers/media/i2c/wm8739.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/i2c/wm8775.c b/drivers/media/i2c/wm8775.c index 5581f4db02af..45039d756753 100644 --- a/drivers/media/i2c/wm8775.c +++ b/drivers/media/i2c/wm8775.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/pci/ivtv/ivtv-driver.h b/drivers/media/pci/ivtv/ivtv-driver.h index 10cba305dbd2..6b09a9514d64 100644 --- a/drivers/media/pci/ivtv/ivtv-driver.h +++ b/drivers/media/pci/ivtv/ivtv-driver.h @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c index e825bc93ea7a..24fba633c217 100644 --- a/drivers/media/pci/meye/meye.c +++ b/drivers/media/pci/meye/meye.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/pci/zoran/videocodec.c b/drivers/media/pci/zoran/videocodec.c index 13a3c07cd259..3c3cbce0f9cc 100644 --- a/drivers/media/pci/zoran/videocodec.c +++ b/drivers/media/pci/zoran/videocodec.c @@ -40,7 +40,7 @@ #ifdef CONFIG_PROC_FS #include #include -#include +#include #endif #include "videocodec.h" diff --git a/drivers/media/pci/zoran/zoran_driver.c b/drivers/media/pci/zoran/zoran_driver.c index 2170e174c335..94b9b616df98 100644 --- a/drivers/media/pci/zoran/zoran_driver.c +++ b/drivers/media/pci/zoran/zoran_driver.c @@ -66,7 +66,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/media/platform/arv.c b/drivers/media/platform/arv.c index 03c5098499c4..8fe59bf6cd3f 100644 --- a/drivers/media/platform/arv.c +++ b/drivers/media/platform/arv.c @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/media/usb/pvrusb2/pvrusb2-ioread.c b/drivers/media/usb/pvrusb2/pvrusb2-ioread.c index 70b8a052eb5b..3c7ca2c2c108 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-ioread.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-ioread.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #define BUFFER_COUNT 32 #define BUFFER_SIZE PAGE_ALIGN(0x4000) diff --git a/drivers/media/usb/pwc/pwc-ctrl.c b/drivers/media/usb/pwc/pwc-ctrl.c index 3a1618580ed6..655cef39eb3d 100644 --- a/drivers/media/usb/pwc/pwc-ctrl.c +++ b/drivers/media/usb/pwc/pwc-ctrl.c @@ -39,7 +39,7 @@ /* Control functions for the cam; brightness, contrast, video mode, etc. */ #ifdef __KERNEL__ -#include +#include #endif #include diff --git a/drivers/media/v4l2-core/v4l2-common.c b/drivers/media/v4l2-core/v4l2-common.c index 57cfe26a393f..a5ea1f517291 100644 --- a/drivers/media/v4l2-core/v4l2-common.c +++ b/drivers/media/v4l2-core/v4l2-common.c @@ -54,7 +54,7 @@ #if defined(CONFIG_SPI) #include #endif -#include +#include #include #include #include diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c index 8be561ab2615..fa2124cb31bd 100644 --- a/drivers/media/v4l2-core/v4l2-dev.c +++ b/drivers/media/v4l2-core/v4l2-dev.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c index 02b5f69e1a42..7b3b41368931 100644 --- a/drivers/message/fusion/mptctl.c +++ b/drivers/message/fusion/mptctl.c @@ -58,7 +58,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/message/fusion/mptlan.h b/drivers/message/fusion/mptlan.h index 69e9d5463564..8946e19dbfc8 100644 --- a/drivers/message/fusion/mptlan.h +++ b/drivers/message/fusion/mptlan.h @@ -70,7 +70,7 @@ #include #include -#include +#include #include /* Override mptbase.h by pre-defining these! */ diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index 520f58439080..e05c3245930a 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -76,7 +76,7 @@ #include #include #include -#include +#include #include #include "ibmasm.h" #include "remote.h" diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index bab3f07b1117..cb1698f268f1 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -43,7 +43,7 @@ #include #include -#include +#include #include "queue.h" #include "block.h" diff --git a/drivers/mmc/host/android-goldfish.c b/drivers/mmc/host/android-goldfish.c index dca5518b0139..590a8a4522be 100644 --- a/drivers/mmc/host/android-goldfish.c +++ b/drivers/mmc/host/android-goldfish.c @@ -49,7 +49,7 @@ #include #include -#include +#include #define DRIVER_NAME "goldfish_mmc" diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c index 220f9200fa52..cadea0620cd0 100644 --- a/drivers/mtd/devices/pmc551.c +++ b/drivers/mtd/devices/pmc551.c @@ -82,7 +82,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c index a70eb83e68f1..8087c36dc693 100644 --- a/drivers/mtd/devices/slram.c +++ b/drivers/mtd/devices/slram.c @@ -30,7 +30,7 @@ #include -#include +#include #include #include #include diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index 9fb3b0dcdac2..664d206a4cbe 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -70,7 +70,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c index b66b541877f0..8db740d6eb08 100644 --- a/drivers/mtd/inftlcore.c +++ b/drivers/mtd/inftlcore.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c index 1388c8d7f309..8d6bb189ea8e 100644 --- a/drivers/mtd/inftlmount.c +++ b/drivers/mtd/inftlmount.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/mtd/maps/sun_uflash.c b/drivers/mtd/maps/sun_uflash.c index d459aca07881..414956eca0c9 100644 --- a/drivers/mtd/maps/sun_uflash.c +++ b/drivers/mtd/maps/sun_uflash.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 8d58acf33021..df8a5ef334c0 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "mtdcore.h" diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 2a47a3f0e730..ce5ccc573a9c 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -37,7 +37,7 @@ #include #include -#include +#include #include "mtdcore.h" diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c index 46f27de018c3..e21161353e76 100644 --- a/drivers/mtd/nftlcore.c +++ b/drivers/mtd/nftlcore.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c index 31f89f1c6123..b8c293373ecc 100644 --- a/drivers/net/appletalk/ipddp.c +++ b/drivers/net/appletalk/ipddp.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include "ipddp.h" /* Our stuff */ diff --git a/drivers/net/eql.c b/drivers/net/eql.c index a10ad74cc8d2..fe13bfea30ac 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -127,7 +127,7 @@ #include #include -#include +#include static int eql_open(struct net_device *dev); static int eql_close(struct net_device *dev); diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index a7533780dddc..c7f9f2c77da7 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -88,7 +88,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index be5b80103bec..e7b1fa56b290 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -72,7 +72,7 @@ static int max_interrupt_work = 20; #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c index 9359a37fedc0..47c844cc9d27 100644 --- a/drivers/net/ethernet/3com/3c574_cs.c +++ b/drivers/net/ethernet/3com/3c574_cs.c @@ -92,7 +92,7 @@ earlier 3Com products. #include #include -#include +#include #include /*====================================================================*/ diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index b3560a364e53..40196f41768a 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -92,7 +92,7 @@ static int vortex_debug = 1; #include #include /* For nr_irqs only. */ #include -#include +#include /* Kernel compatibility defines, some common to David Hinds' PCMCIA package. This is only in the support-all-kernels source code. */ diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index a0cacbe846ba..9fe3990319ec 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -119,7 +119,7 @@ static const int multicast_filter_limit = 32; #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 1d84a0544ace..3da1fc539ef9 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -46,7 +46,7 @@ #include #include -#include +#include #define AXNET_CMD 0x00 #define AXNET_DATAPORT 0x10 /* NatSemi-defined port window offset. */ diff --git a/drivers/net/ethernet/8390/ne2k-pci.c b/drivers/net/ethernet/8390/ne2k-pci.c index 07355302443d..1bdea746926c 100644 --- a/drivers/net/ethernet/8390/ne2k-pci.c +++ b/drivers/net/ethernet/8390/ne2k-pci.c @@ -54,7 +54,7 @@ static int options[MAX_UNITS]; #include #include -#include +#include #include "8390.h" diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index 63079a6e20d9..bd0a2a14b649 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -49,7 +49,7 @@ #include #include -#include +#include #define PCNET_CMD 0x00 #define PCNET_DATAPORT 0x10 /* NatSemi-defined port window offset. */ diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 3aaad33cdbc6..c12d2618eebf 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -45,7 +45,7 @@ #include #include #include /* Processor type for cache alignment. */ -#include +#include #include /* diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index 16f0c70266bc..a1a52eb53b14 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -80,7 +80,7 @@ #include #include #include -#include +#include #define DRV_NAME "acenic" diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 11cf1e3e0295..9595f1bc535b 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -87,7 +87,7 @@ Revision History: #include #include -#include +#include #if IS_ENABLED(CONFIG_VLAN_8021Q) #define AMD8111E_VLAN_TAG_USED 1 diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c index 113a3b3cc50c..b556c926557a 100644 --- a/drivers/net/ethernet/amd/nmclan_cs.c +++ b/drivers/net/ethernet/amd/nmclan_cs.c @@ -151,7 +151,7 @@ Include Files #include #include -#include +#include #include /* ---------------------------------------------------------------------------- diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 1df3048a3cdb..48707ed76ffc 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -32,7 +32,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index 3a05f9098e75..d8aff7a4b3c7 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include "cpl5_cmd.h" #include "regs.h" diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 7b2224ae72f2..d76491676b51 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include "common.h" #include "cxgb3_ioctl.h" diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 66c37fac59b2..6f951877430b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -63,7 +63,7 @@ #include #include #include -#include +#include #include #include "cxgb4.h" diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index 90c573b8ccaf..57c17e797ae3 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -49,7 +49,7 @@ #include #include -#include +#include #include /* These identify the driver base version and may not be removed. */ diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index 51fda3a6b13f..df4a871df633 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -472,7 +472,7 @@ #include #include #include -#include +#include #ifdef CONFIG_PPC_PMAC #include #endif /* CONFIG_PPC_PMAC */ diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index df4994919456..07e10a45beaa 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -90,7 +90,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_TULIP_DM910X diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 5f1377449b8f..17e566a8b345 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #ifdef CONFIG_SPARC #include diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index e1c4133b8787..f82ebe5d89ee 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #define uw32(reg, val) iowrite32(val, ioaddr + (reg)) #define ur32(reg) ioread32(ioaddr + (reg)) diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index feda96d585e7..bc9bf88e5831 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -129,7 +129,7 @@ static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; #include #include #include -#include +#include #include /* Processor type for cache alignment. */ #include #include diff --git a/drivers/net/ethernet/dec/tulip/xircom_cb.c b/drivers/net/ethernet/dec/tulip/xircom_cb.c index 19e4ea15b504..a8de79355578 100644 --- a/drivers/net/ethernet/dec/tulip/xircom_cb.c +++ b/drivers/net/ethernet/dec/tulip/xircom_cb.c @@ -30,7 +30,7 @@ #include #include -#include +#include #include #ifdef CONFIG_NET_POLL_CONTROLLER #include diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h index 8f4f61262d5c..5d8ae5320242 100644 --- a/drivers/net/ethernet/dlink/dl2k.h +++ b/drivers/net/ethernet/dlink/dl2k.h @@ -31,7 +31,7 @@ #include #include /* Processor type for cache alignment. */ #include -#include +#include #include #include #include diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index eab36acfc0d1..2e5b66762e15 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -91,7 +91,7 @@ static char *media[MAX_UNITS]; #include #include #include -#include +#include #include /* Processor type for cache alignment. */ #include #include diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index 6967b287b6e7..9cb436cb3745 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -88,7 +88,7 @@ static int full_duplex[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1 }; #include /* Processor type for cache alignment. */ #include -#include +#include #include /* These identify the driver base version and may not be removed. */ diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index d9f3a480ca1b..1f98838f32b7 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include "fs_enet.h" diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c index 120c758f5d01..6e64989f8478 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c @@ -42,7 +42,7 @@ #include #include -#include +#include #include "fs_enet.h" diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c index 777beffa1e1e..db9c0bcf54cd 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c @@ -36,7 +36,7 @@ #include #include -#include +#include #ifdef CONFIG_8xx #include diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c index 15abd37d70e3..96d44cf44fe0 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c @@ -35,7 +35,7 @@ #include #include -#include +#include #ifdef CONFIG_8xx #include diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c index a89267b94352..1582d82483ec 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c @@ -35,7 +35,7 @@ #include #include -#include +#include #include #include "fs_enet.h" diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 756f7e763d5f..a6e7afa878be 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -93,7 +93,7 @@ #include #endif #include -#include +#include #include #include #include diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index 6e8a9c8467b9..5aa814799d70 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -40,7 +40,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 56588f2e1d91..a93e0199c369 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -32,7 +32,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 53c5fcf1436c..9d660888510f 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -37,7 +37,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c index 8ba636f61b50..b642990b549c 100644 --- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c +++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c @@ -32,7 +32,7 @@ #include #include -#include +#include #include #include "ucc_geth.h" diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index 51c4abc51bf4..a69cd19a55ae 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -54,7 +54,7 @@ #include #include -#include +#include #include /*====================================================================*/ diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 52a69c925965..5909615c27f7 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c index d2b29b490ae0..e5d72559cca9 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c @@ -30,7 +30,7 @@ #include "ixgb.h" -#include +#include #define IXGB_ALL_RAR_ENTRIES 16 diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 22b0821c1da0..90eac63f9606 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -51,7 +51,7 @@ #include /* Processor type for cache alignment. */ #include #include -#include +#include #define DRV_NAME "natsemi" #define DRV_VERSION "2.1" diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 93c4bdc0cdca..f9d2eb9a920a 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -119,7 +119,7 @@ #include #include -#include +#include #define DRV_NAME "ns83820" diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index 2d04679a923a..baff744b560e 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -160,7 +160,7 @@ static int tx_params[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; #include #include -#include +#include #include /* Processor type for cache alignment. */ #include #include diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index 2a2ca5fa0c69..fa7770da6ef8 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -100,7 +100,7 @@ static int gx_fix; #include #include #include -#include +#include #include /* Processor type for cache alignment. */ #include #include diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index b7c89ebcf4a2..0b3cd58093d5 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -76,7 +76,7 @@ #include #include #include -#include +#include /* These identify the driver base version and may not be removed. */ static char version[] = diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index 42051ab98cf0..d390b9663dc3 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 39fca6c0b68d..19a458716f1a 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -74,7 +74,7 @@ #include /* Processor type for cache alignment. */ #include #include -#include /* User space memory access functions */ +#include /* User space memory access functions */ #include "sis900.h" diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index fe9760ffab51..55a95e1d69d6 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -86,7 +86,7 @@ static int rx_copybreak; #include #include #include -#include +#include #include /* These identify the driver base version and may not be removed. */ diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index f1c75e291e55..67154621abcf 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -52,7 +52,7 @@ #include #include -#include +#include /*====================================================================*/ diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index e9e5ef241c6f..0e8e89f17dbb 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -99,7 +99,7 @@ #include #include #include -#include +#include #define cas_page_map(x) kmap_atomic((x)) #define cas_page_unmap(x) kunmap_atomic((x)) diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index 66ecf0fcc330..d277e4107976 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -42,7 +42,7 @@ #include #include -#include +#include #include #ifdef CONFIG_SPARC diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index ca96408058b0..72ff05cd3ed8 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -49,7 +49,7 @@ #include #include #endif -#include +#include #include #include diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index ba5c54249055..0a6c4e804eed 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -114,7 +114,7 @@ static const int multicast_filter_limit = 32; #include /* Processor type for cache alignment. */ #include #include -#include +#include #include /* These identify the driver base version and may not be removed. */ diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index 3b08ec766076..f71883264cc0 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -88,7 +88,7 @@ #include #include -#include +#include #ifndef MANFID_COMPAQ #define MANFID_COMPAQ 0x0138 diff --git a/drivers/net/fddi/skfp/skfddi.c b/drivers/net/fddi/skfp/skfddi.c index 3a639180e4a0..2414f1dc8ddd 100644 --- a/drivers/net/fddi/skfp/skfddi.c +++ b/drivers/net/fddi/skfp/skfddi.c @@ -88,7 +88,7 @@ static const char * const boot_msg = #include #include -#include +#include #include "h/types.h" #undef ADDR // undo Linux definition diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 470b3dcd54e5..922bf440e9f1 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -13,7 +13,7 @@ */ #include -#include +#include #include #include #include diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 78dbc44540f6..7d054697b199 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -55,7 +55,7 @@ #include #include #include -#include +#include /* --------------------------------------------------------------------- */ diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c index 072cddce9264..809dc25909d1 100644 --- a/drivers/net/hamradio/baycom_par.c +++ b/drivers/net/hamradio/baycom_par.c @@ -86,7 +86,7 @@ #include #include -#include +#include /* --------------------------------------------------------------------- */ diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c index 7b916d5b14b9..ebc06822fd4d 100644 --- a/drivers/net/hamradio/baycom_ser_fdx.c +++ b/drivers/net/hamradio/baycom_ser_fdx.c @@ -82,7 +82,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c index f9a8976195ba..60fcf512c208 100644 --- a/drivers/net/hamradio/baycom_ser_hdx.c +++ b/drivers/net/hamradio/baycom_ser_hdx.c @@ -67,7 +67,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 622ab3ab9e93..f62e7f325cf9 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -69,7 +69,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c index e4137c1b3df9..2479072981a1 100644 --- a/drivers/net/hamradio/dmascc.c +++ b/drivers/net/hamradio/dmascc.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include "z8530.h" diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index 4bad0b894e9c..8c3633c1d078 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -58,7 +58,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 1dfe2304daa7..ece59c54a653 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -17,7 +17,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c index b8083161ef46..6754cd01c605 100644 --- a/drivers/net/hamradio/scc.c +++ b/drivers/net/hamradio/scc.c @@ -178,7 +178,7 @@ #include #include -#include +#include #include "z8530.h" diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index aaff07c10058..b6891ada1d7b 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -68,7 +68,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index f5a9728b89f3..dd7fc6659ad4 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #define rr_if_busy(dev) netif_queue_stopped(dev) #define rr_if_running(dev) netif_running(dev) diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index 7a3f990c1935..7a20a9a4663a 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/irda/kingsun-sir.c b/drivers/net/irda/kingsun-sir.c index fb5d162ec7d2..24c0f169a7b1 100644 --- a/drivers/net/irda/kingsun-sir.c +++ b/drivers/net/irda/kingsun-sir.c @@ -71,7 +71,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/net/irda/ks959-sir.c b/drivers/net/irda/ks959-sir.c index 8e6e0edf2440..3affded3e30d 100644 --- a/drivers/net/irda/ks959-sir.c +++ b/drivers/net/irda/ks959-sir.c @@ -123,7 +123,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/net/irda/ksdazzle-sir.c b/drivers/net/irda/ksdazzle-sir.c index 37f23a189b35..741452c7ce35 100644 --- a/drivers/net/irda/ksdazzle-sir.c +++ b/drivers/net/irda/ksdazzle-sir.c @@ -87,7 +87,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c index bca6a1e72d1d..6f6ed75b63c9 100644 --- a/drivers/net/irda/mcs7780.c +++ b/drivers/net/irda/mcs7780.c @@ -55,7 +55,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c index a0849f49bbec..ffedad2a360a 100644 --- a/drivers/net/irda/vlsi_ir.c +++ b/drivers/net/irda/vlsi_ir.c @@ -45,7 +45,7 @@ MODULE_LICENSE("GPL"); #include #include #include -#include +#include #include #include diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 6255973e3dda..1e05b7c2d157 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -40,7 +40,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/net/phy/davicom.c b/drivers/net/phy/davicom.c index 36e3e2033eca..e28913d9ea7e 100644 --- a/drivers/net/phy/davicom.c +++ b/drivers/net/phy/davicom.c @@ -32,7 +32,7 @@ #include #include -#include +#include #define MII_DM9161_SCR 0x10 #define MII_DM9161_SCR_INIT 0x0610 diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c index 22b51f01a94a..567280a72241 100644 --- a/drivers/net/phy/icplus.c +++ b/drivers/net/phy/icplus.c @@ -28,7 +28,7 @@ #include #include -#include +#include MODULE_DESCRIPTION("ICPlus IP175C/IP101A/IP101G/IC1001 PHY drivers"); MODULE_AUTHOR("Michael Barkowski"); diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c index b9fde1bcf0f0..8d198a1f0031 100644 --- a/drivers/net/phy/lxt.c +++ b/drivers/net/phy/lxt.c @@ -32,7 +32,7 @@ #include #include -#include +#include /* The Level one LXT970 is used by many boards */ diff --git a/drivers/net/phy/qsemi.c b/drivers/net/phy/qsemi.c index d470db89e8dd..dbef8002bc28 100644 --- a/drivers/net/phy/qsemi.c +++ b/drivers/net/phy/qsemi.c @@ -32,7 +32,7 @@ #include #include -#include +#include /* ------------------------------------------------------------------------- */ /* The Quality Semiconductor QS6612 is used on the RPX CLLF */ diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index 9c889e0303dd..feb9569e3345 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #define PPP_VERSION "2.4.2" diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index 925d3e295bac..9ae53986cb4a 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #define PPP_VERSION "2.4.2" diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index f017c72bb7fd..d7e405268983 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -83,7 +83,7 @@ #include #include -#include +#include #define PPPOE_HASH_BITS 4 #define PPPOE_HASH_SIZE (1 << PPPOE_HASH_BITS) diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c index b9c8be6283d3..c0599b3b23c0 100644 --- a/drivers/net/ppp/pppox.c +++ b/drivers/net/ppp/pppox.c @@ -34,7 +34,7 @@ #include -#include +#include static const struct pppox_proto *pppox_protos[PX_MAX_PROTO + 1]; diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c index 8b8b53259783..7820fced33f6 100644 --- a/drivers/net/sb1000.c +++ b/drivers/net/sb1000.c @@ -55,7 +55,7 @@ static char version[] = "sb1000.c:v1.1.2 6/01/98 (fventuri@mediaone.net)\n"; #include #include -#include +#include #ifdef SB1000_DEBUG static int sb1000_debug = SB1000_DEBUG; diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index 27ed25252aac..5782733959f0 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -75,7 +75,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 7e933d8ff811..9841f3dc0682 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -64,7 +64,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 57e88b814700..cd8e02c94be0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -73,7 +73,7 @@ #include #include -#include +#include /* Uncomment to enable debugging */ /* #define TUN_DEBUG 1 */ diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index a1f2f6f1e614..3daa41bdd4ea 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #undef DEBUG diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 338aed5da14d..876f02f4945e 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -54,7 +54,7 @@ #include #include #include -#include +#include #include #undef DEBUG diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 399f7ee57aea..24e803fe9a53 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include "pegasus.h" /* diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 93a1bda1c1e5..95b7bd0d7abc 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include /* Version Information */ #define DRIVER_VERSION "v0.6.2 (2004/08/27)" diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c index ae6ecf401189..65ee2a6f248c 100644 --- a/drivers/net/wan/dlci.c +++ b/drivers/net/wan/dlci.c @@ -52,7 +52,7 @@ #include #include -#include +#include static const char version[] = "DLCI driver v0.35, 4 Jan 1997, mike.mclagan@linux.org"; diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index 7351e5440ed7..799830ffcae2 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -95,7 +95,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 03696d35ee9c..33265eb50420 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include "farsync.h" diff --git a/drivers/net/wan/hd64570.c b/drivers/net/wan/hd64570.c index dc334c85d966..166696d2c496 100644 --- a/drivers/net/wan/hd64570.c +++ b/drivers/net/wan/hd64570.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include "hd64570.h" #define get_msci(port) (phy_node(port) ? MSCI1_OFFSET : MSCI0_OFFSET) diff --git a/drivers/net/wan/hd64572.c b/drivers/net/wan/hd64572.c index e92ecf1d3314..7ef49dab6855 100644 --- a/drivers/net/wan/hd64572.c +++ b/drivers/net/wan/hd64572.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include "hd64572.h" #define NAPI_WEIGHT 16 diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 6676607164d6..9df9ed62beff 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 001b7796740d..4698450c77d1 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -59,7 +59,7 @@ #include /* Processor type for cache alignment. */ #include #include -#include +#include //#include #define DRIVER_MAJOR_VERSION 1 diff --git a/drivers/net/wan/lmc/lmc_media.c b/drivers/net/wan/lmc/lmc_media.c index ff2e4a5654c7..cffe23bd16e1 100644 --- a/drivers/net/wan/lmc/lmc_media.c +++ b/drivers/net/wan/lmc/lmc_media.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include "lmc.h" #include "lmc_var.h" diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c index 3f83be98d469..3ca3419c54a0 100644 --- a/drivers/net/wan/sbni.c +++ b/drivers/net/wan/sbni.c @@ -63,7 +63,7 @@ #include #include #include -#include +#include #include "sbni.h" diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index 421ac5f85699..236c62538036 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -56,7 +56,7 @@ #include #include -#include +#include static const char* version = "SDLA driver v0.30, 12 Sep 1996, mike.mclagan@linux.org"; diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c index eb92d5ab7a27..e12f62356fd1 100644 --- a/drivers/net/wireless/atmel/atmel.c +++ b/drivers/net/wireless/atmel/atmel.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index 64176090b196..356aba9d3d53 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -148,7 +148,7 @@ that only one external action is invoked at a time. #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_geo.c b/drivers/net/wireless/intel/ipw2x00/libipw_geo.c index 218f2a32de21..ce7eda20a68f 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_geo.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_geo.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include "libipw.h" diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_module.c b/drivers/net/wireless/intel/ipw2x00/libipw_module.c index 2332075565f2..c58c5b2dcce5 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_module.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_module.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_rx.c b/drivers/net/wireless/intel/ipw2x00/libipw_rx.c index 1c1ec7bb9302..6df19f03355a 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_rx.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_rx.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c index e8c039879b05..048f1e3ada11 100644 --- a/drivers/net/wireless/intel/ipw2x00/libipw_tx.c +++ b/drivers/net/wireless/intel/ipw2x00/libipw_tx.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include "libipw.h" diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c index a8a9bd8e176a..544ef7adde7d 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_hw.c +++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c @@ -32,7 +32,7 @@ #include -#include +#include #include #include diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c index 1a16b8cb366e..544fc09dcb62 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_main.c +++ b/drivers/net/wireless/intersil/hostap/hostap_main.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include "hostap_wlan.h" #include "hostap_80211.h" diff --git a/drivers/net/wireless/intersil/prism54/isl_38xx.c b/drivers/net/wireless/intersil/prism54/isl_38xx.c index 6700387ef9ab..ce9d4db0d9ca 100644 --- a/drivers/net/wireless/intersil/prism54/isl_38xx.c +++ b/drivers/net/wireless/intersil/prism54/isl_38xx.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include "prismcompat.h" diff --git a/drivers/net/wireless/intersil/prism54/isl_ioctl.c b/drivers/net/wireless/intersil/prism54/isl_ioctl.c index 48e8a978a832..334717b0a2be 100644 --- a/drivers/net/wireless/intersil/prism54/isl_ioctl.c +++ b/drivers/net/wireless/intersil/prism54/isl_ioctl.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include "prismcompat.h" #include "isl_ioctl.h" diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 4fdc7223c894..b94479441b0c 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -53,7 +53,7 @@ #include #include -#include +#include /* Warning : these stuff will slow down the driver... */ #define WIRELESS_SPY /* Enable spying addresses */ diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index d9d29ab88184..acec0d9ec422 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -51,7 +51,7 @@ #include #include -#include +#include #include "wl3501.h" diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index 5371b374f1fe..e8f68f5732f1 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include static int diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c index c0cc4e7ff023..67935fbbbcab 100644 --- a/drivers/oprofile/event_buffer.c +++ b/drivers/oprofile/event_buffer.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include "oprof.h" #include "event_buffer.h" diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 134398e0231b..d77ebbfc67c9 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "oprof.h" diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 3ed6238f8f6e..553ef8a5d588 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -48,7 +48,7 @@ #include #include /* for L1_CACHE_BYTES */ -#include +#include #include #include #include diff --git a/drivers/parisc/ccio-rm-dma.c b/drivers/parisc/ccio-rm-dma.c index f78f6f1aef47..1bf988010855 100644 --- a/drivers/parisc/ccio-rm-dma.c +++ b/drivers/parisc/ccio-rm-dma.c @@ -40,7 +40,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c index 783906fe659a..4dd9b1308128 100644 --- a/drivers/parisc/eisa_eeprom.c +++ b/drivers/parisc/eisa_eeprom.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #define EISA_EEPROM_MINOR 241 diff --git a/drivers/parisc/eisa_enumerator.c b/drivers/parisc/eisa_enumerator.c index 21905fef2cbf..d9bffe8d29b9 100644 --- a/drivers/parisc/eisa_enumerator.c +++ b/drivers/parisc/eisa_enumerator.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index b48243131993..ff1a332d76e4 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -49,7 +49,7 @@ #include /* HZ */ #include #include -#include +#include /* The control of the LEDs and LCDs on PARISC-machines have to be done completely in software. The necessary calculations are done in a work queue diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index 3651c3871d5b..055f83fddc18 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c @@ -68,7 +68,7 @@ #include #include -#include +#include #include #define PDCS_VERSION "0.30" diff --git a/drivers/parport/daisy.c b/drivers/parport/daisy.c index 5bed17f68ef4..d998d0ed2bec 100644 --- a/drivers/parport/daisy.c +++ b/drivers/parport/daisy.c @@ -26,7 +26,7 @@ #include #include -#include +#include #undef DEBUG diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c index 2e21af43d91e..c0e7d21c88c2 100644 --- a/drivers/parport/ieee1284_ops.c +++ b/drivers/parport/ieee1284_ops.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #undef DEBUG /* undef me for production */ diff --git a/drivers/parport/parport_gsc.c b/drivers/parport/parport_gsc.c index 6e3a60c78873..dd6d4ccb41e4 100644 --- a/drivers/parport/parport_gsc.c +++ b/drivers/parport/parport_gsc.c @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/parport/probe.c b/drivers/parport/probe.c index d763bc9e44c1..4d1d6eaf333d 100644 --- a/drivers/parport/probe.c +++ b/drivers/parport/probe.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include static const struct { const char *token; diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c index 74ed3e459a3e..8ee44a104ac4 100644 --- a/drivers/parport/procfs.c +++ b/drivers/parport/procfs.c @@ -23,7 +23,7 @@ #include #include -#include +#include #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index f6221d739f59..68d105aaf4e2 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include "acpiphp.h" #include "../pci.h" diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index ec009a7dba20..33d300d12411 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -40,7 +40,7 @@ #include #include -#include +#include #include "cpqphp.h" #include "cpqphp_nvram.h" diff --git a/drivers/pci/hotplug/cpqphp_nvram.c b/drivers/pci/hotplug/cpqphp_nvram.c index c25fc9061059..daae8071a156 100644 --- a/drivers/pci/hotplug/cpqphp_nvram.c +++ b/drivers/pci/hotplug/cpqphp_nvram.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include "cpqphp.h" #include "cpqphp_nvram.h" diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index 56013d0daf7f..7b0e97be9063 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include "../pci.h" #include "cpci_hotplug.h" diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 2408abe4ee8c..f82710a8694d 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include "pci.h" diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c index b91c4da68365..9bf993e1f71e 100644 --- a/drivers/pci/syscall.c +++ b/drivers/pci/syscall.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "pci.h" SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn, diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index c890a49587e4..aa2ee51d3547 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -68,7 +68,7 @@ #include #include #endif -#include +#include #include #define dprintk(fmt, ...) \ diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index aa65a857a6b1..cacb43fb1df7 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -82,7 +82,7 @@ #include #include #include -#include +#include #include /* ThinkPad CMOS commands */ diff --git a/drivers/pnp/interface.c b/drivers/pnp/interface.c index 4b6808ff0e5d..5c5b3d47b5f6 100644 --- a/drivers/pnp/interface.c +++ b/drivers/pnp/interface.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include "base.h" diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c index c212db0fc65d..5ee6b2a5f8d5 100644 --- a/drivers/pnp/pnpbios/proc.c +++ b/drivers/pnp/pnpbios/proc.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include "pnpbios.h" diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 84ca314c87e3..dd46e96a3034 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include /* This is ugly... */ diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 67bf50c9946f..ade04216c970 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c index 6c5d671304b4..8713fefd794b 100644 --- a/drivers/s390/block/dasd_eer.c +++ b/drivers/s390/block/dasd_eer.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c index 113c1c1fa1af..9e3419124264 100644 --- a/drivers/s390/block/dasd_erp.c +++ b/drivers/s390/block/dasd_erp.c @@ -15,7 +15,7 @@ #include #include -#include +#include /* This is ugly... */ #define PRINTK_HEADER "dasd_erp:" diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index e2fa759bf2ad..8b1341fb2e0d 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -16,7 +16,7 @@ #include #include -#include +#include /* This is ugly... */ #define PRINTK_HEADER "dasd_gendisk:" diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 9dfbd972f844..ec65c1e51c2a 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include /* This is ugly... */ #define PRINTK_HEADER "dasd_ioctl:" diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index bad7a196bf84..70dc2c4cd3f7 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -20,7 +20,7 @@ #include #include -#include +#include /* This is ugly... */ #define PRINTK_HEADER "dasd_proc:" diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 288f59a4147b..b9d7e755c8a3 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #define XPRAM_NAME "xpram" #define XPRAM_DEVS 1 /* one partition */ diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 1b8d825623bd..9ec4ae056158 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c index 7b9c50aa4cc9..82c913318b73 100644 --- a/drivers/s390/char/keyboard.c +++ b/drivers/s390/char/keyboard.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "keyboard.h" diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c index ebdeaa53182d..027ac6ae5eea 100644 --- a/drivers/s390/char/monreader.c +++ b/drivers/s390/char/monreader.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c index 9b5d1138b2e2..571a7e352755 100644 --- a/drivers/s390/char/monwriter.c +++ b/drivers/s390/char/monwriter.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/s390/char/sclp_rw.c b/drivers/s390/char/sclp_rw.c index 6010cd347a08..91b26df5227d 100644 --- a/drivers/s390/char/sclp_rw.c +++ b/drivers/s390/char/sclp_rw.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include "sclp.h" #include "sclp_rw.h" diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c index 9259017a1295..236b736ae136 100644 --- a/drivers/s390/char/sclp_tty.c +++ b/drivers/s390/char/sclp_tty.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "ctrlchar.h" #include "sclp.h" diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c index 68d6ee7ae504..095481d32236 100644 --- a/drivers/s390/char/sclp_vt220.c +++ b/drivers/s390/char/sclp_vt220.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include "sclp.h" #include "ctrlchar.h" diff --git a/drivers/s390/char/tape_char.c b/drivers/s390/char/tape_char.c index 77f9b9c2f701..46ac1164f242 100644 --- a/drivers/s390/char/tape_char.c +++ b/drivers/s390/char/tape_char.c @@ -18,7 +18,7 @@ #include #include -#include +#include #define TAPE_DBF_AREA tape_core_dbf diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index 272cb6cd1b2a..e5ebe2fbee23 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include "raw3270.h" #include "tty3270.h" diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c index 2a67b496a9e2..65f5a794f26d 100644 --- a/drivers/s390/char/vmcp.c +++ b/drivers/s390/char/vmcp.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include "vmcp.h" static debug_info_t *vmcp_debug; diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 3167e8581994..57974a1e0e03 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c index ff18f373af9a..04aceb694d51 100644 --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index f771e5e9e26b..d3b51edb056e 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index 9082476b51db..bf7f5d4c50e1 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 854a6e58dfea..51eece9af577 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index c7d48a18199e..b97c5d5ee5a4 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include "ap_bus.h" diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c index 26ceaa696765..600604782b65 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.c +++ b/drivers/s390/crypto/zcrypt_pcixcc.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include "ap_bus.h" diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 2981024a2438..3f85b97ab8d2 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -62,7 +62,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c index 33fbe8249fd5..04efed171c88 100644 --- a/drivers/sbus/char/display7seg.c +++ b/drivers/sbus/char/display7seg.c @@ -17,7 +17,7 @@ #include #include #include -#include /* put_/get_user */ +#include /* put_/get_user */ #include #include diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c index 5609b602c54d..56e962a01493 100644 --- a/drivers/sbus/char/envctrl.c +++ b/drivers/sbus/char/envctrl.c @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/sbus/char/flash.c b/drivers/sbus/char/flash.c index 206ef4232adf..216f923161d1 100644 --- a/drivers/sbus/char/flash.c +++ b/drivers/sbus/char/flash.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c index a40ee1e37486..6ff61dad5e21 100644 --- a/drivers/sbus/char/jsflash.c +++ b/drivers/sbus/char/jsflash.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c index 4612691c6619..2c2e6a3b4c7e 100644 --- a/drivers/sbus/char/openprom.c +++ b/drivers/sbus/char/openprom.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_PCI #include diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index 316f87fe3299..00e7968a1d70 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -92,7 +92,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c index 970d8fa6bd53..b150e131b2e7 100644 --- a/drivers/scsi/3w-sas.c +++ b/drivers/scsi/3w-sas.c @@ -64,7 +64,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index aa412ab02765..33261b690774 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -210,7 +210,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 6678d1fd897b..1ee7c654f7b8 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include /* For flush_kernel_dcache_page */ #include diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c index 5648b715fed9..e1daff230c7d 100644 --- a/drivers/scsi/aacraid/commctrl.c +++ b/drivers/scsi/aacraid/commctrl.c @@ -41,7 +41,7 @@ #include /* ssleep prototype */ #include #include -#include +#include #include #include "aacraid.h" diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index 9e45749d55ed..af032c46ec0e 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -61,7 +61,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index 9d253cb83ee7..d9e15210b110 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include "bfad_drv.h" diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c index 27c0dce22e72..5f75e638ec95 100644 --- a/drivers/scsi/dpt_i2o.c +++ b/drivers/scsi/dpt_i2o.c @@ -37,7 +37,7 @@ MODULE_DESCRIPTION("Adaptec I2O RAID Driver"); //////////////////////////////////////////////////////////////// #include /* For SCSI-Passthrough */ -#include +#include #include #include /* for kmalloc() */ diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index 0a767740bf02..d020a13646ae 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -130,7 +130,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c index a83f705ed8a5..db17ad15b0c1 100644 --- a/drivers/scsi/hptiop.c +++ b/drivers/scsi/hptiop.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/ips.h b/drivers/scsi/ips.h index 45b9566b928e..b782bb60baf0 100644 --- a/drivers/scsi/ips.h +++ b/drivers/scsi/ips.h @@ -51,7 +51,7 @@ #define _IPS_H_ #include - #include +#include #include /* diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 9d05302a3bcd..3c63c292cb92 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/megaraid/megaraid_mm.h b/drivers/scsi/megaraid/megaraid_mm.h index 55b425c0a654..a30e725f2d5c 100644 --- a/drivers/scsi/megaraid/megaraid_mm.h +++ b/drivers/scsi/megaraid/megaraid_mm.h @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 6484c382f670..d5cf15eb8c5e 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c index a2960f5d98ec..e8196c55b633 100644 --- a/drivers/scsi/osst.c +++ b/drivers/scsi/osst.c @@ -52,7 +52,7 @@ static const char * osst_version = "0.99.4"; #include #include #include -#include +#include #include /* The driver prints some debugging information on the console if DEBUG diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index 9f6012b78e56..b4336e0cd85f 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include /* * NVRAM support routines diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c index c4f7b56fa6f6..8b8c814df5c7 100644 --- a/drivers/scsi/scsi_ioctl.c +++ b/drivers/scsi/scsi_ioctl.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c index 7a74b82e8973..480a597b3877 100644 --- a/drivers/scsi/scsi_proc.c +++ b/drivers/scsi/scsi_proc.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 1622e23138e0..b1933041da39 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index bed2bbd6b923..94352e4df831 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index 03054c0e7689..dfffdf63e44c 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 605887d5ee57..5f35b863e1a7 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -41,7 +41,7 @@ static const char *verstr = "20160209"; #include #include -#include +#include #include #include diff --git a/drivers/tty/amiserial.c b/drivers/tty/amiserial.c index dfbb974927f2..dea16bb8c46a 100644 --- a/drivers/tty/amiserial.c +++ b/drivers/tty/amiserial.c @@ -127,7 +127,7 @@ static struct serial_state rs_table[1]; #define NR_PORTS ARRAY_SIZE(rs_table) -#include +#include #define serial_isroot() (capable(CAP_SYS_ADMIN)) diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index ce864875330e..9b5c0fb216b5 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -42,7 +42,7 @@ #include #include -#include +#include #include "hvc_console.h" diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c index 3c4d7c2b4ade..7823d6d998cf 100644 --- a/drivers/tty/hvc/hvcs.c +++ b/drivers/tty/hvc/hvcs.c @@ -81,7 +81,7 @@ #include #include #include -#include +#include #include /* diff --git a/drivers/tty/hvc/hvsi.c b/drivers/tty/hvc/hvsi.c index 96ce6bd1cc6f..2e578d6433af 100644 --- a/drivers/tty/hvc/hvsi.c +++ b/drivers/tty/hvc/hvsi.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c index 60d37b225589..4caf0c3b1f99 100644 --- a/drivers/tty/moxa.c +++ b/drivers/tty/moxa.c @@ -47,7 +47,7 @@ #include #include -#include +#include #include "moxa.h" diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c index 69294ae154be..7b8f383fb090 100644 --- a/drivers/tty/mxser.c +++ b/drivers/tty/mxser.c @@ -43,7 +43,7 @@ #include #include -#include +#include #include "mxser.h" diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index a7fa016f31eb..eb278832f5ce 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -103,7 +103,7 @@ #include #include -#include +#include /* * Buffers for individual HDLC frames diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c index 345111467b85..305b6490d405 100644 --- a/drivers/tty/n_r3964.c +++ b/drivers/tty/n_r3964.c @@ -65,7 +65,7 @@ #include #include #include -#include +#include /*#define DEBUG_QUEUE*/ diff --git a/drivers/tty/serial/icom.c b/drivers/tty/serial/icom.c index c60a8d5e4020..d83783cfbade 100644 --- a/drivers/tty/serial/icom.c +++ b/drivers/tty/serial/icom.c @@ -53,7 +53,7 @@ #include #include -#include +#include #include "icom.h" diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index d0847375ea64..9939c3d9912b 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -36,7 +36,7 @@ #include #include -#include +#include /* * This is used to lock changes in serial line configuration. diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c index 415885c56435..657eed82eeb3 100644 --- a/drivers/tty/synclink.c +++ b/drivers/tty/synclink.c @@ -107,7 +107,7 @@ #define PUT_USER(error,value,addr) error = put_user(value,addr) #define COPY_TO_USER(error,dest,src,size) error = copy_to_user(dest,src,size) ? -EFAULT : 0 -#include +#include #define RCLRVALUE 0xffff diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 8267bcf2405e..31885f20fc15 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -77,7 +77,7 @@ #include #include #include -#include +#include #if defined(CONFIG_HDLC) || (defined(CONFIG_HDLC_MODULE) && defined(CONFIG_SYNCLINK_GT_MODULE)) #define SYNCLINK_GENERIC_HDLC 1 diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index d66620f7eaa3..51e8846cd68f 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -79,7 +79,7 @@ #define PUT_USER(error,value,addr) error = put_user(value,addr) #define COPY_TO_USER(error,dest,src,size) error = copy_to_user(dest,src,size) ? -EFAULT : 0 -#include +#include static MGSL_PARAMS default_params = { MGSL_MODE_HDLC, /* unsigned long mode */ diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index bf36ac9aee41..f27fc0f14c11 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -22,7 +22,7 @@ #include #include -#include +#include #undef TTY_DEBUG_WAIT_UNTIL_SENT diff --git a/drivers/tty/vt/consolemap.c b/drivers/tty/vt/consolemap.c index 71e81406ef71..1f6e17fc3fb0 100644 --- a/drivers/tty/vt/consolemap.c +++ b/drivers/tty/vt/consolemap.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 368ce1803e8f..36e1b8c7680f 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c index 14a2b5f11bca..56dcff6059d3 100644 --- a/drivers/tty/vt/vc_screen.c +++ b/drivers/tty/vt/vc_screen.c @@ -39,7 +39,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index f62c598810ff..a56edf2d58eb 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c index 4dec9df8764b..5a59da0dc98a 100644 --- a/drivers/usb/atm/usbatm.c +++ b/drivers/usb/atm/usbatm.c @@ -64,7 +64,7 @@ #include "usbatm.h" -#include +#include #include #include #include diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 143454ea385b..1fa5c0f29c64 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include "hub.h" diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 10b2576f8b6a..e8f4102d19df 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index 5d3d914ab4fb..683098afa93e 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -42,7 +42,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c index 9a82f8308ad7..01a9373b7e18 100644 --- a/drivers/usb/misc/ftdi-elan.c +++ b/drivers/usb/misc/ftdi-elan.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c index 2975e80b7a56..debc1fd74b0d 100644 --- a/drivers/usb/misc/idmouse.c +++ b/drivers/usb/misc/idmouse.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include /* image constants */ diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index 9ca595632f17..3bc5356832db 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index c8fbe7b739a0..b10e26c74a90 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -83,7 +83,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index 1a874a1f3890..91c22276c03b 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include "usb_mon.h" diff --git a/drivers/usb/mon/mon_stat.c b/drivers/usb/mon/mon_stat.c index 5388a339cfb8..5bdf73a57498 100644 --- a/drivers/usb/mon/mon_stat.c +++ b/drivers/usb/mon/mon_stat.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include "usb_mon.h" diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c index e59334b09c41..db1a4abf2806 100644 --- a/drivers/usb/mon/mon_text.c +++ b/drivers/usb/mon/mon_text.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "usb_mon.h" diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c index 9b22d946c089..4fef50e5c8c1 100644 --- a/drivers/usb/musb/musb_debugfs.c +++ b/drivers/usb/musb/musb_debugfs.c @@ -37,7 +37,7 @@ #include #include -#include +#include #include "musb_core.h" #include "musb_debug.h" diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c index 1e11614322fe..42d02a206059 100644 --- a/drivers/video/console/newport_con.c +++ b/drivers/video/console/newport_con.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/video/fbdev/68328fb.c b/drivers/video/fbdev/68328fb.c index 17f21cedff9b..c0c6b88d3839 100644 --- a/drivers/video/fbdev/68328fb.c +++ b/drivers/video/fbdev/68328fb.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/video/fbdev/hitfb.c b/drivers/video/fbdev/hitfb.c index 9d68dc9ee7bf..abe3e54d4506 100644 --- a/drivers/video/fbdev/hitfb.c +++ b/drivers/video/fbdev/hitfb.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/video/fbdev/hpfb.c b/drivers/video/fbdev/hpfb.c index 9476d196f510..16f16f5e1a4b 100644 --- a/drivers/video/fbdev/hpfb.c +++ b/drivers/video/fbdev/hpfb.c @@ -16,7 +16,7 @@ #include #include -#include +#include static struct fb_info fb_info = { .fix = { diff --git a/drivers/video/fbdev/mx3fb.c b/drivers/video/fbdev/mx3fb.c index 8778e01cebac..1c3c7ab26a95 100644 --- a/drivers/video/fbdev/mx3fb.c +++ b/drivers/video/fbdev/mx3fb.c @@ -33,7 +33,7 @@ #include #include -#include +#include #define MX3FB_NAME "mx3_sdc_fb" diff --git a/drivers/video/fbdev/q40fb.c b/drivers/video/fbdev/q40fb.c index 7487f76f6275..04ea330ccf5d 100644 --- a/drivers/video/fbdev/q40fb.c +++ b/drivers/video/fbdev/q40fb.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/video/fbdev/sm501fb.c b/drivers/video/fbdev/sm501fb.c index d0a4e2f79a57..d80bc8a3200f 100644 --- a/drivers/video/fbdev/sm501fb.c +++ b/drivers/video/fbdev/sm501fb.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include #ifdef CONFIG_PM diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 7df4228e25f0..accfef71e984 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -67,7 +67,7 @@ #include #include /* for HP-UX compatibility */ -#include +#include #include "sticore.h" diff --git a/drivers/video/fbdev/w100fb.c b/drivers/video/fbdev/w100fb.c index 10951c82f6ed..d570e19a2864 100644 --- a/drivers/video/fbdev/w100fb.c +++ b/drivers/video/fbdev/w100fb.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include