aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/netfilter/ipset/ip_set.h29
-rw-r--r--include/linux/netfilter/ipset/ip_set_comment.h38
-rw-r--r--include/linux/netfilter/ipset/ip_set_timeout.h27
-rw-r--r--include/linux/netfilter/x_tables.h56
-rw-r--r--include/linux/netfilter_bridge.h7
-rw-r--r--include/linux/netfilter_ipv6.h3
-rw-r--r--include/linux/skbuff.h7
-rw-r--r--include/net/netfilter/nf_tables.h11
-rw-r--r--include/uapi/linux/netfilter/ipset/ip_set.h6
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h4
-rw-r--r--net/bridge/br_netfilter.c398
-rw-r--r--net/bridge/br_private.h7
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/netfilter/Kconfig3
-rw-r--r--net/ipv4/netfilter/arp_tables.c86
-rw-r--r--net/ipv4/netfilter/ip_tables.c95
-rw-r--r--net/ipv6/netfilter.c2
-rw-r--r--net/ipv6/netfilter/Kconfig3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c96
-rw-r--r--net/netfilter/Kconfig18
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h44
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c27
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c46
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c24
-rw-r--r--net/netfilter/ipset/ip_set_core.c344
-rw-r--r--net/netfilter/ipset/ip_set_getport.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h714
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c39
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c46
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c51
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c53
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c58
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c19
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c49
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c225
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c120
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c52
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c128
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c419
-rw-r--r--net/netfilter/ipset/pfxlen.c16
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c8
-rw-r--r--net/netfilter/nf_tables_api.c139
-rw-r--r--net/netfilter/nf_tables_netdev.c75
-rw-r--r--net/netfilter/x_tables.c37
-rw-r--r--net/netfilter/xt_set.c44
45 files changed, 1972 insertions, 1718 deletions
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index ffdfdc24952a..48bb01edcf30 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -108,8 +108,13 @@ struct ip_set_counter {
atomic64_t packets;
};
+struct ip_set_comment_rcu {
+ struct rcu_head rcu;
+ char str[0];
+};
+
struct ip_set_comment {
- char *str;
+ struct ip_set_comment_rcu __rcu *c;
};
struct ip_set_skbinfo {
@@ -176,6 +181,9 @@ struct ip_set_type_variant {
/* List elements */
int (*list)(const struct ip_set *set, struct sk_buff *skb,
struct netlink_callback *cb);
+ /* Keep listing private when resizing runs parallel */
+ void (*uref)(struct ip_set *set, struct netlink_callback *cb,
+ bool start);
/* Return true if "b" set is the same as "a"
* according to the create set parameters */
@@ -223,7 +231,7 @@ struct ip_set {
/* The name of the set */
char name[IPSET_MAXNAMELEN];
/* Lock protecting the set data */
- rwlock_t lock;
+ spinlock_t lock;
/* References to the set */
u32 ref;
/* The core set type */
@@ -341,12 +349,11 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
cpu_to_be64((u64)skbinfo->skbmark << 32 |
skbinfo->skbmarkmask))) ||
(skbinfo->skbprio &&
- nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+ nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
cpu_to_be32(skbinfo->skbprio))) ||
(skbinfo->skbqueue &&
- nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+ nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
cpu_to_be16(skbinfo->skbqueue)));
-
}
static inline void
@@ -380,12 +387,12 @@ ip_set_init_counter(struct ip_set_counter *counter,
/* Netlink CB args */
enum {
- IPSET_CB_NET = 0,
- IPSET_CB_DUMP,
- IPSET_CB_INDEX,
- IPSET_CB_ARG0,
+ IPSET_CB_NET = 0, /* net namespace */
+ IPSET_CB_DUMP, /* dump single set/all sets */
+ IPSET_CB_INDEX, /* set index */
+ IPSET_CB_PRIVATE, /* set private data */
+ IPSET_CB_ARG0, /* type specific */
IPSET_CB_ARG1,
- IPSET_CB_ARG2,
};
/* register and unregister set references */
@@ -545,8 +552,6 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \
.timeout = (set)->timeout }
-#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b))
-
#define IPSET_CONCAT(a, b) a##b
#define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b)
diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h
index 21217ea008d7..8d0248525957 100644
--- a/include/linux/netfilter/ipset/ip_set_comment.h
+++ b/include/linux/netfilter/ipset/ip_set_comment.h
@@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb)
return nla_data(tb);
}
+/* Called from uadd only, protected by the set spinlock.
+ * The kadt functions don't use the comment extensions in any way.
+ */
static inline void
ip_set_init_comment(struct ip_set_comment *comment,
const struct ip_set_ext *ext)
{
+ struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
size_t len = ext->comment ? strlen(ext->comment) : 0;
- if (unlikely(comment->str)) {
- kfree(comment->str);
- comment->str = NULL;
+ if (unlikely(c)) {
+ kfree_rcu(c, rcu);
+ rcu_assign_pointer(comment->c, NULL);
}
if (!len)
return;
if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
len = IPSET_MAX_COMMENT_SIZE;
- comment->str = kzalloc(len + 1, GFP_ATOMIC);
- if (unlikely(!comment->str))
+ c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
+ if (unlikely(!c))
return;
- strlcpy(comment->str, ext->comment, len + 1);
+ strlcpy(c->str, ext->comment, len + 1);
+ rcu_assign_pointer(comment->c, c);
}
+/* Used only when dumping a set, protected by rcu_read_lock_bh() */
static inline int
ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
{
- if (!comment->str)
+ struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
+
+ if (!c)
return 0;
- return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
+ return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
}
+/* Called from uadd/udel, flush or the garbage collectors protected
+ * by the set spinlock.
+ * Called when the set is destroyed and when there can't be any user
+ * of the set data anymore.
+ */
static inline void
ip_set_comment_free(struct ip_set_comment *comment)
{
- if (unlikely(!comment->str))
+ struct ip_set_comment_rcu *c;
+
+ c = rcu_dereference_protected(comment->c, 1);
+ if (unlikely(!c))
return;
- kfree(comment->str);
- comment->str = NULL;
+ kfree_rcu(c, rcu);
+ rcu_assign_pointer(comment->c, NULL);
}
#endif
diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 83c2f9e0886c..1d6a935c1ac5 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -40,38 +40,33 @@ ip_set_timeout_uget(struct nlattr *tb)
}
static inline bool
-ip_set_timeout_test(unsigned long timeout)
+ip_set_timeout_expired(unsigned long *t)
{
- return timeout == IPSET_ELEM_PERMANENT ||
- time_is_after_jiffies(timeout);
-}
-
-static inline bool
-ip_set_timeout_expired(unsigned long *timeout)
-{
- return *timeout != IPSET_ELEM_PERMANENT &&
- time_is_before_jiffies(*timeout);
+ return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
}
static inline void
-ip_set_timeout_set(unsigned long *timeout, u32 t)
+ip_set_timeout_set(unsigned long *timeout, u32 value)
{
- if (!t) {
+ unsigned long t;
+
+ if (!value) {
*timeout = IPSET_ELEM_PERMANENT;
return;
}
- *timeout = msecs_to_jiffies(t * 1000) + jiffies;
- if (*timeout == IPSET_ELEM_PERMANENT)
+ t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
+ if (t == IPSET_ELEM_PERMANENT)
/* Bingo! :-) */
- (*timeout)--;
+ t--;
+ *timeout = t;
}
static inline u32
ip_set_timeout_get(unsigned long *timeout)
{
return *timeout == IPSET_ELEM_PERMANENT ? 0 :
- jiffies_to_msecs(*timeout - jiffies)/1000;
+ jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
}
#endif /* __KERNEL__ */
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 09f38206c18f..95693c4cebdd 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -224,13 +224,10 @@ struct xt_table_info {
unsigned int stacksize;
unsigned int __percpu *stackptr;
void ***jumpstack;
- /* ipt_entry tables: one per CPU */
- /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
- void *entries[1];
+
+ unsigned char entries[0] __aligned(8);
};
-#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
- + nr_cpu_ids * sizeof(char *))
int xt_register_target(struct xt_target *target);
void xt_unregister_target(struct xt_target *target);
int xt_register_targets(struct xt_target *target, unsigned int n);
@@ -353,6 +350,55 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
return ret;
}
+
+/* On SMP, ip(6)t_entry->counters.pcnt holds address of the
+ * real (percpu) counter. On !SMP, its just the packet count,
+ * so nothing needs to be done there.
+ *
+ * xt_percpu_counter_alloc returns the address of the percpu
+ * counter, or 0 on !SMP.
+ *
+ * Hence caller must use IS_ERR_VALUE to check for error, this
+ * allows us to return 0 for single core systems without forcing
+ * callers to deal with SMP vs. NONSMP issues.
+ */
+static inline u64 xt_percpu_counter_alloc(void)
+{
+ if (nr_cpu_ids > 1) {
+ void __percpu *res = alloc_percpu(struct xt_counters);
+
+ if (res == NULL)
+ return (u64) -ENOMEM;
+
+ return (__force u64) res;
+ }
+
+ return 0;
+}
+static inline void xt_percpu_counter_free(u64 pcnt)
+{
+ if (nr_cpu_ids > 1)
+ free_percpu((void __percpu *) pcnt);
+}
+
+static inline struct xt_counters *
+xt_get_this_cpu_counter(struct xt_counters *cnt)
+{
+ if (nr_cpu_ids > 1)
+ return this_cpu_ptr((void __percpu *) cnt->pcnt);
+
+ return cnt;
+}
+
+static inline struct xt_counters *
+xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
+{
+ if (nr_cpu_ids > 1)
+ return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu);
+
+ return cnt;
+}
+
struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index f2fdb5a52070..6d80fc686323 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -20,13 +20,6 @@ enum nf_br_hook_priorities {
#define BRNF_BRIDGED_DNAT 0x02
#define BRNF_NF_BRIDGE_PREROUTING 0x08
-static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
-{
- if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
- return PPPOE_SES_HLEN;
- return 0;
-}
-
int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
static inline void br_drop_fake_rtable(struct sk_buff *skb)
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 64dad1cc1a4b..8b7d28f3aada 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -25,6 +25,9 @@ void ipv6_netfilter_fini(void);
struct nf_ipv6_ops {
int (*chk_addr)(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict);
+ void (*route_input)(struct sk_buff *skb);
+ int (*fragment)(struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct sock *, struct sk_buff *));
};
extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a7acc92aa668..d6cdd6e87d53 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -36,6 +36,7 @@
#include <linux/sched.h>
#include <net/flow_dissector.h>
#include <linux/splice.h>
+#include <linux/in6.h>
/* A. Checksumming of received packets by device.
*
@@ -173,13 +174,17 @@ struct nf_bridge_info {
BRNF_PROTO_PPPOE
} orig_proto:8;
bool pkt_otherhost;
+ __u16 frag_max_size;
unsigned int mask;
struct net_device *physindev;
union {
struct net_device *physoutdev;
char neigh_header[8];
};
- __be32 ipv4_daddr;
+ union {
+ __be32 ipv4_daddr;
+ struct in6_addr ipv6_daddr;
+ };
};
#endif
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 3d6f48ca40a7..2a246680a6c3 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -781,6 +781,7 @@ struct nft_stats {
};
#define NFT_HOOK_OPS_MAX 2
+#define NFT_BASECHAIN_DISABLED (1 << 0)
/**
* struct nft_base_chain - nf_tables base chain
@@ -791,14 +792,17 @@ struct nft_stats {
* @policy: default policy
* @stats: per-cpu chain stats
* @chain: the chain
+ * @dev_name: device name that this base chain is attached to (if any)
*/
struct nft_base_chain {
struct nf_hook_ops ops[NFT_HOOK_OPS_MAX];
possible_net_t pnet;
const struct nf_chain_type *type;
u8 policy;
+ u8 flags;
struct nft_stats __percpu *stats;
struct nft_chain chain;
+ char dev_name[IFNAMSIZ];
};
static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
@@ -806,6 +810,11 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai
return container_of(chain, struct nft_base_chain, chain);
}
+int nft_register_basechain(struct nft_base_chain *basechain,
+ unsigned int hook_nops);
+void nft_unregister_basechain(struct nft_base_chain *basechain,
+ unsigned int hook_nops);
+
unsigned int nft_do_chain(struct nft_pktinfo *pkt,
const struct nf_hook_ops *ops);
@@ -819,7 +828,6 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt,
* @use: number of chain references to this table
* @flags: table flag (see enum nft_table_flags)
* @name: name of the table
- * @dev: this table is bound to this device (if any)
*/
struct nft_table {
struct list_head list;
@@ -829,7 +837,6 @@ struct nft_table {
u32 use;
u16 flags;
char name[NFT_TABLE_MAXNAMELEN];
- struct net_device *dev;
};
enum nft_af_flags {
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 5ab4e60894cf..63b2e34f1b60 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -15,12 +15,12 @@
/* The protocol version */
#define IPSET_PROTOCOL 6
-/* The maximum permissible comment length we will accept over netlink */
-#define IPSET_MAX_COMMENT_SIZE 255
-
/* The max length of strings including NUL: set and type identifiers */
#define IPSET_MAXNAMELEN 32
+/* The maximum permissible comment length we will accept over netlink */
+#define IPSET_MAX_COMMENT_SIZE 255
+
/* Message types and commands */
enum ipset_cmd {
IPSET_CMD_NONE,
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 89a671e0f5e7..a99e6a997140 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -122,11 +122,13 @@ enum nft_list_attributes {
*
* @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32)
* @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
+ * @NFTA_HOOK_DEV: netdevice name (NLA_STRING)
*/
enum nft_hook_attributes {
NFTA_HOOK_UNSPEC,
NFTA_HOOK_HOOKNUM,
NFTA_HOOK_PRIORITY,
+ NFTA_HOOK_DEV,
__NFTA_HOOK_MAX
};
#define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1)
@@ -146,14 +148,12 @@ enum nft_table_flags {
* @NFTA_TABLE_NAME: name of the table (NLA_STRING)
* @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
* @NFTA_TABLE_USE: number of chains in this table (NLA_U32)
- * @NFTA_TABLE_DEV: net device name (NLA_STRING)
*/
enum nft_table_attributes {
NFTA_TABLE_UNSPEC,
NFTA_TABLE_NAME,
NFTA_TABLE_FLAGS,
NFTA_TABLE_USE,
- NFTA_TABLE_DEV,
__NFTA_TABLE_MAX
};
#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 46660a28feef..e4e5f2f29173 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -34,6 +34,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/addrconf.h>
#include <net/route.h>
#include <net/netfilter/br_netfilter.h>
@@ -115,6 +116,8 @@ struct brnf_frag_data {
char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
u8 encap_size;
u8 size;
+ u16 vlan_tci;
+ __be16 vlan_proto;
};
static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
@@ -216,7 +219,7 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
* expected format
*/
-static int br_parse_ip_options(struct sk_buff *skb)
+static int br_validate_ipv4(struct sk_buff *skb)
{
const struct iphdr *iph;
struct net_device *dev = skb->dev;
@@ -264,6 +267,111 @@ drop:
return -1;
}
+/* We only check the length. A bridge shouldn't do any hop-by-hop stuff
+ * anyway
+ */
+static int check_hbh_len(struct sk_buff *skb)
+{
+ unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
+ u32 pkt_len;
+ const unsigned char *nh = skb_network_header(skb);
+ int off = raw - nh;
+ int len = (raw[1] + 1) << 3;
+
+ if ((raw + len) - skb->data > skb_headlen(skb))
+ goto bad;
+
+ off += 2;
+ len -= 2;
+
+ while (len > 0) {
+ int optlen = nh[off + 1] + 2;
+
+ switch (nh[off]) {
+ case IPV6_TLV_PAD1:
+ optlen = 1;
+ break;
+
+ case IPV6_TLV_PADN:
+ break;
+
+ case IPV6_TLV_JUMBO:
+ if (nh[off + 1] != 4 || (off & 3) != 2)
+ goto bad;
+ pkt_len = ntohl(*(__be32 *)(nh + off + 2));
+ if (pkt_len <= IPV6_MAXPLEN ||
+ ipv6_hdr(skb)->payload_len)
+ goto bad;
+ if (pkt_len > skb->len - sizeof(struct ipv6hdr))
+ goto bad;
+ if (pskb_trim_rcsum(skb,
+ pkt_len + sizeof(struct ipv6hdr)))
+ goto bad;
+ nh = skb_network_header(skb);
+ break;
+ default:
+ if (optlen > len)
+ goto bad;
+ break;
+ }
+ off += optlen;
+ len -= optlen;
+ }
+ if (len == 0)
+ return 0;
+bad:
+ return -1;
+}
+
+/* Equivalent to br_validate_ipv4 for IPv6 */
+static int br_validate_ipv6(struct sk_buff *skb)
+{
+ const struct ipv6hdr *hdr;
+ struct net_device *dev = skb->dev;
+ struct inet6_dev *idev = in6_dev_get(skb->dev);
+ u32 pkt_len;
+ u8 ip6h_len = sizeof(struct ipv6hdr);
+
+ if (!pskb_may_pull(skb, ip6h_len))
+ goto inhdr_error;
+
+ if (skb->len < ip6h_len)
+ goto drop;
+
+ hdr = ipv6_hdr(skb);
+
+ if (hdr->version != 6)
+ goto inhdr_error;
+
+ pkt_len = ntohs(hdr->payload_len);
+
+ if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
+ if (pkt_len + ip6h_len > skb->len) {
+ IP6_INC_STATS_BH(dev_net(dev), idev,
+ IPSTATS_MIB_INTRUNCATEDPKTS);
+ goto drop;
+ }
+ if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
+ IP6_INC_STATS_BH(dev_net(dev), idev,
+ IPSTATS_MIB_INDISCARDS);
+ goto drop;
+ }
+ }
+ if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
+ goto drop;
+
+ memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
+ /* No IP options in IPv6 header; however it should be
+ * checked if some next headers need special treatment
+ */
+ return 0;
+
+inhdr_error:
+ IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS);
+drop:
+ return -1;
+}
+
static void nf_bridge_update_protocol(struct sk_buff *skb)
{
switch (skb->nf_bridge->orig_proto) {
@@ -278,37 +386,6 @@ static void nf_bridge_update_protocol(struct sk_buff *skb)
}
}
-/* PF_BRIDGE/PRE_ROUTING *********************************************/
-/* Undo the changes made for ip6tables PREROUTING and continue the
- * bridge PRE_ROUTING hook. */
-static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
-{
- struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
- struct rtable *rt;
-
- if (nf_bridge->pkt_otherhost) {
- skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->pkt_otherhost = false;
- }
- nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
-
- rt = bridge_parent_rtable(nf_bridge->physindev);
- if (!rt) {
- kfree_skb(skb);
- return 0;
- }
- skb_dst_set_noref(skb, &rt->dst);
-
- skb->dev = nf_bridge->physindev;
- nf_bridge_update_protocol(skb);
- nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
- skb->dev, NULL,
- br_handle_frame_finish, 1);
-
- return 0;
-}
-
/* Obtain the correct destination MAC address, while preserving the original
* source MAC address. If we already know this address, we just copy it. If we
* don't, we use the neighbour framework to find out. In both cases, we make
@@ -357,7 +434,74 @@ free_skb:
static bool daddr_was_changed(const struct sk_buff *skb,
const struct nf_bridge_info *nf_bridge)
{
- return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr;
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr;
+ case htons(ETH_P_IPV6):
+ return memcmp(&nf_bridge->ipv6_daddr, &ipv6_hdr(skb)->daddr,
+ sizeof(ipv6_hdr(skb)->daddr)) != 0;
+ default:
+ return false;
+ }
+}
+
+/* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables
+ * PREROUTING and continue the bridge PRE_ROUTING hook. See comment
+ * for br_nf_pre_routing_finish(), same logic is used here but
+ * equivalent IPv6 function ip6_route_input() called indirectly.
+ */
+static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
+{
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+ struct rtable *rt;
+ struct net_device *dev = skb->dev;
+ const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
+
+ nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
+
+ if (nf_bridge->pkt_otherhost) {
+ skb->pkt_type = PACKET_OTHERHOST;
+ nf_bridge->pkt_otherhost = false;
+ }
+ nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
+ if (daddr_was_changed(skb, nf_bridge)) {
+ skb_dst_drop(skb);
+ v6ops->route_input(skb);
+
+ if (skb_dst(skb)->error) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ if (skb_dst(skb)->dev == dev) {
+ skb->dev = nf_bridge->physindev;
+ nf_bridge_update_protocol(skb);
+ nf_bridge_push_encap_header(skb);
+ NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
+ sk, skb, skb->dev, NULL,
+ br_nf_pre_routing_finish_bridge,
+ 1);
+ return 0;
+ }
+ ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
+ skb->pkt_type = PACKET_HOST;
+ } else {
+ rt = bridge_parent_rtable(nf_bridge->physindev);
+ if (!rt) {
+ kfree_skb(skb);
+ return 0;
+ }
+ skb_dst_set_noref(skb, &rt->dst);
+ }
+
+ skb->dev = nf_bridge->physindev;
+ nf_bridge_update_protocol(skb);
+ nf_bridge_push_encap_header(skb);
+ NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
+ skb->dev, NULL,
+ br_handle_frame_finish, 1);
+
+ return 0;
}
/* This requires some explaining. If DNAT has taken place,
@@ -406,16 +550,14 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
int err;
- int frag_max_size;
- frag_max_size = IPCB(skb)->frag_max_size;
- BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
+ nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
}
- nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
+ nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
if (daddr_was_changed(skb, nf_bridge)) {
if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -517,90 +659,16 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
return skb->dev;
}
-/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */
-static int check_hbh_len(struct sk_buff *skb)
-{
- unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
- u32 pkt_len;
- const unsigned char *nh = skb_network_header(skb);
- int off = raw - nh;
- int len = (raw[1] + 1) << 3;
-
- if ((raw + len) - skb->data > skb_headlen(skb))
- goto bad;
-
- off += 2;
- len -= 2;
-
- while (len > 0) {
- int optlen = nh[off + 1] + 2;
-
- switch (nh[off]) {
- case IPV6_TLV_PAD1:
- optlen = 1;
- break;
-
- case IPV6_TLV_PADN:
- break;
-
- case IPV6_TLV_JUMBO:
- if (nh[off + 1] != 4 || (off & 3) != 2)
- goto bad;
- pkt_len = ntohl(*(__be32 *) (nh + off + 2));
- if (pkt_len <= IPV6_MAXPLEN ||
- ipv6_hdr(skb)->payload_len)
- goto bad;
- if (pkt_len > skb->len - sizeof(struct ipv6hdr))
- goto bad;
- if (pskb_trim_rcsum(skb,
- pkt_len + sizeof(struct ipv6hdr)))
- goto bad;
- nh = skb_network_header(skb);
- break;
- default:
- if (optlen > len)
- goto bad;
- break;
- }
- off += optlen;
- len -= optlen;
- }
- if (len == 0)
- return 0;
-bad:
- return -1;
-
-}
-
/* Replicate the checks that IPv6 does on packet reception and pass the packet
- * to ip6tables, which doesn't support NAT, so things are fairly simple. */
+ * to ip6tables.
+ */
static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- const struct ipv6hdr *hdr;
- u32 pkt_len;
-
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_DROP;
-
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- return NF_DROP;
-
- hdr = ipv6_hdr(skb);
+ struct nf_bridge_info *nf_bridge;
- if (hdr->version != 6)
- return NF_DROP;
-
- pkt_len = ntohs(hdr->payload_len);
-
- if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
- if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
- return NF_DROP;
- if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
- return NF_DROP;
- }
- if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
+ if (br_validate_ipv6(skb))
return NF_DROP;
nf_bridge_put(skb->nf_bridge);
@@ -609,6 +677,9 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
if (!setup_pre_routing(skb))
return NF_DROP;
+ nf_bridge = nf_bridge_info_get(skb);
+ nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;
+
skb->protocol = htons(ETH_P_IPV6);
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb,
skb->dev, NULL,
@@ -656,7 +727,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
nf_bridge_pull_encap_header_rcsum(skb);
- if (br_parse_ip_options(skb))
+ if (br_validate_ipv4(skb))
return NF_DROP;
nf_bridge_put(skb->nf_bridge);
@@ -700,12 +771,12 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
struct net_device *in;
if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
- int frag_max_size;
- if (skb->protocol == htons(ETH_P_IP)) {
- frag_max_size = IPCB(skb)->frag_max_size;
- BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
- }
+ if (skb->protocol == htons(ETH_P_IP))
+ nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
+
+ if (skb->protocol == htons(ETH_P_IPV6))
+ nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
in = nf_bridge->physindev;
if (nf_bridge->pkt_otherhost) {
@@ -768,12 +839,15 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
}
if (pf == NFPROTO_IPV4) {
- int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size;
-
- if (br_parse_ip_options(skb))
+ if (br_validate_ipv4(skb))
return NF_DROP;
+ IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
+ }
- IPCB(skb)->frag_max_size = frag_max;
+ if (pf == NFPROTO_IPV6) {
+ if (br_validate_ipv6(skb))
+ return NF_DROP;
+ IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
}
nf_bridge->physoutdev = skb->dev;
@@ -823,7 +897,7 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
return NF_STOLEN;
}
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
{
struct brnf_frag_data *data;
@@ -837,12 +911,18 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
return 0;
}
+ if (data->vlan_tci) {
+ skb->vlan_tci = data->vlan_tci;
+ skb->vlan_proto = data->vlan_proto;
+ }
+
skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
__skb_push(skb, data->encap_size);
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(sk, skb);
}
+#endif
static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *))
@@ -863,54 +943,82 @@ static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb,
return ip_do_fragment(sk, skb, output);
}
+static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
+{
+ if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
+ return PPPOE_SES_HLEN;
+ return 0;
+}
+
static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
{
- int ret;
- int frag_max_size;
+ struct nf_bridge_info *nf_bridge;
unsigned int mtu_reserved;
- if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) {
+ mtu_reserved = nf_bridge_mtu_reduction(skb);
+
+ if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(sk, skb);
}
- mtu_reserved = nf_bridge_mtu_reduction(skb);
+ nf_bridge = nf_bridge_info_get(skb);
+
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
- if (skb->len + mtu_reserved > skb->dev->mtu) {
+ if (skb->protocol == htons(ETH_P_IP)) {
struct brnf_frag_data *data;
- frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
- if (br_parse_ip_options(skb))
- /* Drop invalid packet */
+ if (br_validate_ipv4(skb))
return NF_DROP;
- IPCB(skb)->frag_max_size = frag_max_size;
+
+ IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
nf_bridge_update_protocol(skb);
data = this_cpu_ptr(&brnf_frag_data_storage);
+
+ data->vlan_tci = skb->vlan_tci;
+ data->vlan_proto = skb->vlan_proto;
data->encap_size = nf_bridge_encap_header_len(skb);
data->size = ETH_HLEN + data->encap_size;
skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size);
- ret = br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit);
- } else {
- nf_bridge_info_free(skb);
- ret = br_dev_queue_push_xmit(sk, skb);
+ return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit);
}
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
+ struct brnf_frag_data *data;
- return ret;
-}
-#else
-static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
-{
+ if (br_validate_ipv6(skb))
+ return NF_DROP;
+
+ IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
+
+ nf_bridge_update_protocol(skb);
+
+ data = this_cpu_ptr(&brnf_frag_data_storage);
+ data->encap_size = nf_bridge_encap_header_len(skb);
+ data->size = ETH_HLEN + data->encap_size;
+
+ skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
+ data->size);
+
+ if (v6ops)
+ return v6ops->fragment(sk, skb, br_nf_push_frag_xmit);
+ else
+ return -EMSGSIZE;
+ }
+#endif
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(sk, skb);
}
-#endif
/* PF_BRIDGE/POST_ROUTING ********************************************/
static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1f36fa70639b..5dccced71269 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -18,6 +18,7 @@
#include <linux/netpoll.h>
#include <linux/u64_stats_sync.h>
#include <net/route.h>
+#include <net/ip6_fib.h>
#include <linux/if_vlan.h>
#define BR_HASH_BITS 8
@@ -214,7 +215,10 @@ struct net_bridge
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- struct rtable fake_rtable;
+ union {
+ struct rtable fake_rtable;
+ struct rt6_info fake_rt6_info;
+ };
bool nf_call_iptables;
bool nf_call_ip6tables;
bool nf_call_arptables;
@@ -304,7 +308,6 @@ struct br_input_skb_cb {
int mrouters_only;
#endif
- u16 frag_max_size;
bool proxyarp_replied;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 55f3c2e94357..6bf89a6312bc 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -549,10 +549,6 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
hlen = iph->ihl * 4;
mtu = mtu - hlen; /* Size of data space */
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge)
- mtu -= nf_bridge_mtu_reduction(skb);
-#endif
IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
/* When frag_list is given, use it. First, check its validity:
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index fb20f363151f..2199a5db25e6 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -195,7 +195,8 @@ config IP_NF_MATCH_ECN
config IP_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
- depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW)
+ depends on NETFILTER_ADVANCED
+ depends on IP_NF_MANGLE || IP_NF_RAW
---help---
This option allows you to match packets whose replies would
go out via the interface the packet came in.
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index a61200754f4b..95c9b6eece25 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -256,7 +256,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
const struct arphdr *arp;
struct arpt_entry *e, *back;
const char *indev, *outdev;
- void *table_base;
+ const void *table_base;
const struct xt_table_info *private;
struct xt_action_param acpar;
unsigned int addend;
@@ -275,7 +275,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
* pointer.
*/
smp_read_barrier_depends();
- table_base = private->entries[smp_processor_id()];
+ table_base = private->entries;
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
@@ -289,13 +289,15 @@ unsigned int arpt_do_table(struct sk_buff *skb,
arp = arp_hdr(skb);
do {
const struct xt_entry_target *t;
+ struct xt_counters *counter;
if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
e = arpt_next_entry(e);
continue;
}
- ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1);
+ counter = xt_get_this_cpu_counter(&e->counters);
+ ADD_COUNTER(*counter, arp_hdr_len(skb->dev), 1);
t = arpt_get_target_c(e);
@@ -521,6 +523,10 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
if (ret)
return ret;
+ e->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(e->counters.pcnt))
+ return -ENOMEM;
+
t = arpt_get_target(e);
target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
t->u.user.revision);
@@ -538,6 +544,8 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
err:
module_put(t->u.kernel.target->me);
out:
+ xt_percpu_counter_free(e->counters.pcnt);
+
return ret;
}
@@ -614,6 +622,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
+ xt_percpu_counter_free(e->counters.pcnt);
}
/* Checks and translates the user-supplied table segment (held in
@@ -702,12 +711,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i) {
- if (newinfo->entries[i] && newinfo->entries[i] != entry0)
- memcpy(newinfo->entries[i], entry0, newinfo->size);
- }
-
return ret;
}
@@ -722,14 +725,16 @@ static void get_counters(const struct xt_table_info *t,
seqcount_t *s = &per_cpu(xt_recseq, cpu);
i = 0;
- xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ xt_entry_foreach(iter, t->entries, t->size) {
+ struct xt_counters *tmp;
u64 bcnt, pcnt;
unsigned int start;
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
do {
start = read_seqcount_begin(s);
- bcnt = iter->counters.bcnt;
- pcnt = iter->counters.pcnt;
+ bcnt = tmp->bcnt;
+ pcnt = tmp->pcnt;
} while (read_seqcount_retry(s, start));
ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -774,7 +779,7 @@ static int copy_entries_to_user(unsigned int total_size,
if (IS_ERR(counters))
return PTR_ERR(counters);
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
/* ... then copy entire thing ... */
if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
ret = -EFAULT;
@@ -863,16 +868,16 @@ static int compat_table_info(const struct xt_table_info *info,
struct xt_table_info *newinfo)
{
struct arpt_entry *iter;
- void *loc_cpu_entry;
+ const void *loc_cpu_entry;
int ret;
if (!newinfo || !info)
return -EINVAL;
- /* we dont care about newinfo->entries[] */
+ /* we dont care about newinfo->entries */
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
- loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ loc_cpu_entry = info->entries;
xt_compat_init_offsets(NFPROTO_ARP, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
@@ -1037,7 +1042,7 @@ static int __do_replace(struct net *net, const char *name,
get_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
- loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+ loc_cpu_old_entry = oldinfo->entries;
xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
cleanup_entry(iter);
@@ -1084,8 +1089,7 @@ static int do_replace(struct net *net, const void __user *user,
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
@@ -1115,7 +1119,7 @@ static int do_replace(struct net *net, const void __user *user,
static int do_add_counters(struct net *net, const void __user *user,
unsigned int len, int compat)
{
- unsigned int i, curcpu;
+ unsigned int i;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1125,7 +1129,6 @@ static int do_add_counters(struct net *net, const void __user *user,
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
- void *loc_cpu_entry;
struct arpt_entry *iter;
unsigned int addend;
#ifdef CONFIG_COMPAT
@@ -1181,12 +1184,13 @@ static int do_add_counters(struct net *net, const void __user *user,
}
i = 0;
- /* Choose the copy that is on our node */
- curcpu = smp_processor_id();
- loc_cpu_entry = private->entries[curcpu];
+
addend = xt_write_recseq_begin();
- xt_entry_foreach(iter, loc_cpu_entry, private->size) {
- ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ xt_entry_foreach(iter, private->entries, private->size) {
+ struct xt_counters *tmp;
+
+ tmp = xt_get_this_cpu_counter(&iter->counters);
+ ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
++i;
}
xt_write_recseq_end(addend);
@@ -1396,7 +1400,7 @@ static int translate_compat_table(const char *name,
newinfo->hook_entry[i] = info->hook_entry[i];
newinfo->underflow[i] = info->underflow[i];
}
- entry1 = newinfo->entries[raw_smp_processor_id()];
+ entry1 = newinfo->entries;
pos = entry1;
size = total_size;
xt_entry_foreach(iter0, entry0, total_size) {
@@ -1416,9 +1420,17 @@ static int translate_compat_table(const char *name,
i = 0;
xt_entry_foreach(iter1, entry1, newinfo->size) {
+ iter1->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(iter1->counters.pcnt)) {
+ ret = -ENOMEM;
+ break;
+ }
+
ret = check_target(iter1, name);
- if (ret != 0)
+ if (ret != 0) {
+ xt_percpu_counter_free(iter1->counters.pcnt);
break;
+ }
++i;
if (strcmp(arpt_get_target(iter1)->u.user.name,
XT_ERROR_TARGET) == 0)
@@ -1448,11 +1460,6 @@ static int translate_compat_table(const char *name,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i)
- if (newinfo->entries[i] && newinfo->entries[i] != entry1)
- memcpy(newinfo->entries[i], entry1, newinfo->size);
-
*pinfo = newinfo;
*pentry0 = entry1;
xt_free_table_info(info);
@@ -1511,8 +1518,7 @@ static int compat_do_replace(struct net *net, void __user *user,
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) {
ret = -EFAULT;
goto free_newinfo;
@@ -1609,7 +1615,6 @@ static int compat_copy_entries_to_user(unsigned int total_size,
void __user *pos;
unsigned int size;
int ret = 0;
- void *loc_cpu_entry;
unsigned int i = 0;
struct arpt_entry *iter;
@@ -1617,11 +1622,9 @@ static int compat_copy_entries_to_user(unsigned int total_size,
if (IS_ERR(counters))
return PTR_ERR(counters);
- /* choose the copy on our node/cpu */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
pos = userptr;
size = total_size;
- xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ xt_entry_foreach(iter, private->entries, total_size) {
ret = compat_copy_entry_to_user(iter, &pos,
&size, counters, i++);
if (ret != 0)
@@ -1790,8 +1793,7 @@ struct xt_table *arpt_register_table(struct net *net,
goto out;
}
- /* choose the copy on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(newinfo, loc_cpu_entry, repl);
@@ -1822,7 +1824,7 @@ void arpt_unregister_table(struct xt_table *table)
private = xt_unregister_table(table);
/* Decrease module usage counts and free resources */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
xt_entry_foreach(iter, loc_cpu_entry, private->size)
cleanup_entry(iter);
if (private->number > private->initial_entries)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e7abf5145edc..6c72fbb7b49e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -254,15 +254,13 @@ static void trace_packet(const struct sk_buff *skb,
const struct xt_table_info *private,
const struct ipt_entry *e)
{
- const void *table_base;
const struct ipt_entry *root;
const char *hookname, *chainname, *comment;
const struct ipt_entry *iter;
unsigned int rulenum = 0;
struct net *net = dev_net(in ? in : out);
- table_base = private->entries[smp_processor_id()];
- root = get_entry(table_base, private->hook_entry[hook]);
+ root = get_entry(private->entries, private->hook_entry[hook]);
hookname = chainname = hooknames[hook];
comment = comments[NF_IP_TRACE_COMMENT_RULE];
@@ -331,7 +329,7 @@ ipt_do_table(struct sk_buff *skb,
* pointer.
*/
smp_read_barrier_depends();
- table_base = private->entries[cpu];
+ table_base = private->entries;
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
stackptr = per_cpu_ptr(private->stackptr, cpu);
origptr = *stackptr;
@@ -345,6 +343,7 @@ ipt_do_table(struct sk_buff *skb,
do {
const struct xt_entry_target *t;
const struct xt_entry_match *ematch;
+ struct xt_counters *counter;
IP_NF_ASSERT(e);
if (!ip_packet_match(ip, indev, outdev,
@@ -361,7 +360,8 @@ ipt_do_table(struct sk_buff *skb,
goto no_match;
}
- ADD_COUNTER(e->counters, skb->len, 1);
+ counter = xt_get_this_cpu_counter(&e->counters);
+ ADD_COUNTER(*counter, skb->len, 1);
t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
@@ -665,6 +665,10 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
if (ret)
return ret;
+ e->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(e->counters.pcnt))
+ return -ENOMEM;
+
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -691,6 +695,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
ret = check_target(e, net, name);
if (ret)
goto err;
+
return 0;
err:
module_put(t->u.kernel.target->me);
@@ -700,6 +705,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
break;
cleanup_match(ematch, net);
}
+
+ xt_percpu_counter_free(e->counters.pcnt);
+
return ret;
}
@@ -784,6 +792,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
+ xt_percpu_counter_free(e->counters.pcnt);
}
/* Checks and translates the user-supplied table segment (held in
@@ -866,12 +875,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i) {
- if (newinfo->entries[i] && newinfo->entries[i] != entry0)
- memcpy(newinfo->entries[i], entry0, newinfo->size);
- }
-
return ret;
}
@@ -887,14 +890,16 @@ get_counters(const struct xt_table_info *t,
seqcount_t *s = &per_cpu(xt_recseq, cpu);
i = 0;
- xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ xt_entry_foreach(iter, t->entries, t->size) {
+ struct xt_counters *tmp;
u64 bcnt, pcnt;
unsigned int start;
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
do {
start = read_seqcount_begin(s);
- bcnt = iter->counters.bcnt;
- pcnt = iter->counters.pcnt;
+ bcnt = tmp->bcnt;
+ pcnt = tmp->pcnt;
} while (read_seqcount_retry(s, start));
ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -939,11 +944,7 @@ copy_entries_to_user(unsigned int total_size,
if (IS_ERR(counters))
return PTR_ERR(counters);
- /* choose the copy that is on our node/cpu, ...
- * This choice is lazy (because current thread is
- * allowed to migrate to another cpu)
- */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
ret = -EFAULT;
goto free_counters;
@@ -1051,16 +1052,16 @@ static int compat_table_info(const struct xt_table_info *info,
struct xt_table_info *newinfo)
{
struct ipt_entry *iter;
- void *loc_cpu_entry;
+ const void *loc_cpu_entry;
int ret;
if (!newinfo || !info)
return -EINVAL;
- /* we dont care about newinfo->entries[] */
+ /* we dont care about newinfo->entries */
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
- loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ loc_cpu_entry = info->entries;
xt_compat_init_offsets(AF_INET, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
@@ -1181,7 +1182,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct xt_table *t;
struct xt_table_info *oldinfo;
struct xt_counters *counters;
- void *loc_cpu_old_entry;
struct ipt_entry *iter;
ret = 0;
@@ -1224,8 +1224,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
get_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
- loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
- xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
cleanup_entry(iter, net);
xt_free_table_info(oldinfo);
@@ -1271,8 +1270,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
@@ -1303,7 +1301,7 @@ static int
do_add_counters(struct net *net, const void __user *user,
unsigned int len, int compat)
{
- unsigned int i, curcpu;
+ unsigned int i;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1313,7 +1311,6 @@ do_add_counters(struct net *net, const void __user *user,
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
- void *loc_cpu_entry;
struct ipt_entry *iter;
unsigned int addend;
#ifdef CONFIG_COMPAT
@@ -1369,12 +1366,12 @@ do_add_counters(struct net *net, const void __user *user,
}
i = 0;
- /* Choose the copy that is on our node */
- curcpu = smp_processor_id();
- loc_cpu_entry = private->entries[curcpu];
addend = xt_write_recseq_begin();
- xt_entry_foreach(iter, loc_cpu_entry, private->size) {
- ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ xt_entry_foreach(iter, private->entries, private->size) {
+ struct xt_counters *tmp;
+
+ tmp = xt_get_this_cpu_counter(&iter->counters);
+ ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
++i;
}
xt_write_recseq_end(addend);
@@ -1608,6 +1605,10 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
unsigned int j;
int ret = 0;
+ e->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(e->counters.pcnt))
+ return -ENOMEM;
+
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -1632,6 +1633,9 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
break;
cleanup_match(ematch, net);
}
+
+ xt_percpu_counter_free(e->counters.pcnt);
+
return ret;
}
@@ -1716,7 +1720,7 @@ translate_compat_table(struct net *net,
newinfo->hook_entry[i] = info->hook_entry[i];
newinfo->underflow[i] = info->underflow[i];
}
- entry1 = newinfo->entries[raw_smp_processor_id()];
+ entry1 = newinfo->entries;
pos = entry1;
size = total_size;
xt_entry_foreach(iter0, entry0, total_size) {
@@ -1768,11 +1772,6 @@ translate_compat_table(struct net *net,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i)
- if (newinfo->entries[i] && newinfo->entries[i] != entry1)
- memcpy(newinfo->entries[i], entry1, newinfo->size);
-
*pinfo = newinfo;
*pentry0 = entry1;
xt_free_table_info(info);
@@ -1819,8 +1818,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
@@ -1891,7 +1889,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
void __user *pos;
unsigned int size;
int ret = 0;
- const void *loc_cpu_entry;
unsigned int i = 0;
struct ipt_entry *iter;
@@ -1899,14 +1896,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
if (IS_ERR(counters))
return PTR_ERR(counters);
- /* choose the copy that is on our node/cpu, ...
- * This choice is lazy (because current thread is
- * allowed to migrate to another cpu)
- */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
pos = userptr;
size = total_size;
- xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ xt_entry_foreach(iter, private->entries, total_size) {
ret = compat_copy_entry_to_user(iter, &pos,
&size, counters, i++);
if (ret != 0)
@@ -2081,8 +2073,7 @@ struct xt_table *ipt_register_table(struct net *net,
goto out;
}
- /* choose the copy on our node/cpu, but dont care about preemption */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(net, newinfo, loc_cpu_entry, repl);
@@ -2113,7 +2104,7 @@ void ipt_unregister_table(struct net *net, struct xt_table *table)
private = xt_unregister_table(table);
/* Decrease module usage counts and free resources */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
xt_entry_foreach(iter, loc_cpu_entry, private->size)
cleanup_entry(iter, net);
if (private->number > private->initial_entries)
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d958718b5031..b4de08a83e0b 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -191,6 +191,8 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
static const struct nf_ipv6_ops ipv6ops = {
.chk_addr = ipv6_chk_addr,
+ .route_input = ip6_route_input,
+ .fragment = ip6_fragment
};
static const struct nf_afinfo nf_ip6_afinfo = {
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index ca6998345b42..b552cf0d6198 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -186,7 +186,8 @@ config IP6_NF_MATCH_MH
config IP6_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
- depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW)
+ depends on NETFILTER_ADVANCED
+ depends on IP6_NF_MANGLE || IP6_NF_RAW
---help---
This option allows you to match packets whose replies would
go out via the interface the packet came in.
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index cdd085f8b770..3c35ced39b42 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -283,15 +283,13 @@ static void trace_packet(const struct sk_buff *skb,
const struct xt_table_info *private,
const struct ip6t_entry *e)
{
- const void *table_base;
const struct ip6t_entry *root;
const char *hookname, *chainname, *comment;
const struct ip6t_entry *iter;
unsigned int rulenum = 0;
struct net *net = dev_net(in ? in : out);
- table_base = private->entries[smp_processor_id()];
- root = get_entry(table_base, private->hook_entry[hook]);
+ root = get_entry(private->entries, private->hook_entry[hook]);
hookname = chainname = hooknames[hook];
comment = comments[NF_IP6_TRACE_COMMENT_RULE];
@@ -357,7 +355,7 @@ ip6t_do_table(struct sk_buff *skb,
*/
smp_read_barrier_depends();
cpu = smp_processor_id();
- table_base = private->entries[cpu];
+ table_base = private->entries;
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
stackptr = per_cpu_ptr(private->stackptr, cpu);
origptr = *stackptr;
@@ -367,6 +365,7 @@ ip6t_do_table(struct sk_buff *skb,
do {
const struct xt_entry_target *t;
const struct xt_entry_match *ematch;
+ struct xt_counters *counter;
IP_NF_ASSERT(e);
acpar.thoff = 0;
@@ -384,7 +383,8 @@ ip6t_do_table(struct sk_buff *skb,
goto no_match;
}
- ADD_COUNTER(e->counters, skb->len, 1);
+ counter = xt_get_this_cpu_counter(&e->counters);
+ ADD_COUNTER(*counter, skb->len, 1);
t = ip6t_get_target_c(e);
IP_NF_ASSERT(t->u.kernel.target);
@@ -679,6 +679,10 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
if (ret)
return ret;
+ e->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(e->counters.pcnt))
+ return -ENOMEM;
+
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -714,6 +718,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
break;
cleanup_match(ematch, net);
}
+
+ xt_percpu_counter_free(e->counters.pcnt);
+
return ret;
}
@@ -797,6 +804,8 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
+
+ xt_percpu_counter_free(e->counters.pcnt);
}
/* Checks and translates the user-supplied table segment (held in
@@ -879,12 +888,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i) {
- if (newinfo->entries[i] && newinfo->entries[i] != entry0)
- memcpy(newinfo->entries[i], entry0, newinfo->size);
- }
-
return ret;
}
@@ -900,14 +903,16 @@ get_counters(const struct xt_table_info *t,
seqcount_t *s = &per_cpu(xt_recseq, cpu);
i = 0;
- xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ xt_entry_foreach(iter, t->entries, t->size) {
+ struct xt_counters *tmp;
u64 bcnt, pcnt;
unsigned int start;
+ tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
do {
start = read_seqcount_begin(s);
- bcnt = iter->counters.bcnt;
- pcnt = iter->counters.pcnt;
+ bcnt = tmp->bcnt;
+ pcnt = tmp->pcnt;
} while (read_seqcount_retry(s, start));
ADD_COUNTER(counters[i], bcnt, pcnt);
@@ -952,11 +957,7 @@ copy_entries_to_user(unsigned int total_size,
if (IS_ERR(counters))
return PTR_ERR(counters);
- /* choose the copy that is on our node/cpu, ...
- * This choice is lazy (because current thread is
- * allowed to migrate to another cpu)
- */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
ret = -EFAULT;
goto free_counters;
@@ -1064,16 +1065,16 @@ static int compat_table_info(const struct xt_table_info *info,
struct xt_table_info *newinfo)
{
struct ip6t_entry *iter;
- void *loc_cpu_entry;
+ const void *loc_cpu_entry;
int ret;
if (!newinfo || !info)
return -EINVAL;
- /* we dont care about newinfo->entries[] */
+ /* we dont care about newinfo->entries */
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
- loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ loc_cpu_entry = info->entries;
xt_compat_init_offsets(AF_INET6, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
@@ -1194,7 +1195,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct xt_table *t;
struct xt_table_info *oldinfo;
struct xt_counters *counters;
- const void *loc_cpu_old_entry;
struct ip6t_entry *iter;
ret = 0;
@@ -1237,8 +1237,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
get_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
- loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
- xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
cleanup_entry(iter, net);
xt_free_table_info(oldinfo);
@@ -1284,8 +1283,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
@@ -1316,7 +1314,7 @@ static int
do_add_counters(struct net *net, const void __user *user, unsigned int len,
int compat)
{
- unsigned int i, curcpu;
+ unsigned int i;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1326,7 +1324,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
- const void *loc_cpu_entry;
struct ip6t_entry *iter;
unsigned int addend;
#ifdef CONFIG_COMPAT
@@ -1374,7 +1371,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
goto free;
}
-
local_bh_disable();
private = t->private;
if (private->number != num_counters) {
@@ -1383,16 +1379,15 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
}
i = 0;
- /* Choose the copy that is on our node */
- curcpu = smp_processor_id();
addend = xt_write_recseq_begin();
- loc_cpu_entry = private->entries[curcpu];
- xt_entry_foreach(iter, loc_cpu_entry, private->size) {
- ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ xt_entry_foreach(iter, private->entries, private->size) {
+ struct xt_counters *tmp;
+
+ tmp = xt_get_this_cpu_counter(&iter->counters);
+ ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
++i;
}
xt_write_recseq_end(addend);
-
unlock_up_free:
local_bh_enable();
xt_table_unlock(t);
@@ -1621,6 +1616,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
+ e->counters.pcnt = xt_percpu_counter_alloc();
+ if (IS_ERR_VALUE(e->counters.pcnt))
+ return -ENOMEM;
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -1645,6 +1643,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net,
break;
cleanup_match(ematch, net);
}
+
+ xt_percpu_counter_free(e->counters.pcnt);
+
return ret;
}
@@ -1729,7 +1730,7 @@ translate_compat_table(struct net *net,
newinfo->hook_entry[i] = info->hook_entry[i];
newinfo->underflow[i] = info->underflow[i];
}
- entry1 = newinfo->entries[raw_smp_processor_id()];
+ entry1 = newinfo->entries;
pos = entry1;
size = total_size;
xt_entry_foreach(iter0, entry0, total_size) {
@@ -1781,11 +1782,6 @@ translate_compat_table(struct net *net,
return ret;
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i)
- if (newinfo->entries[i] && newinfo->entries[i] != entry1)
- memcpy(newinfo->entries[i], entry1, newinfo->size);
-
*pinfo = newinfo;
*pentry0 = entry1;
xt_free_table_info(info);
@@ -1832,8 +1828,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
if (!newinfo)
return -ENOMEM;
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
tmp.size) != 0) {
ret = -EFAULT;
@@ -1904,7 +1899,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
void __user *pos;
unsigned int size;
int ret = 0;
- const void *loc_cpu_entry;
unsigned int i = 0;
struct ip6t_entry *iter;
@@ -1912,14 +1906,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
if (IS_ERR(counters))
return PTR_ERR(counters);
- /* choose the copy that is on our node/cpu, ...
- * This choice is lazy (because current thread is
- * allowed to migrate to another cpu)
- */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
pos = userptr;
size = total_size;
- xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ xt_entry_foreach(iter, private->entries, total_size) {
ret = compat_copy_entry_to_user(iter, &pos,
&size, counters, i++);
if (ret != 0)
@@ -2094,8 +2083,7 @@ struct xt_table *ip6t_register_table(struct net *net,
goto out;
}
- /* choose the copy on our node/cpu, but dont care about preemption */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ loc_cpu_entry = newinfo->entries;
memcpy(loc_cpu_entry, repl->entries, repl->size);
ret = translate_table(net, newinfo, loc_cpu_entry, repl);
@@ -2125,7 +2113,7 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table)
private = xt_unregister_table(table);
/* Decrease module usage counts and free resources */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ loc_cpu_entry = private->entries;
xt_entry_foreach(iter, loc_cpu_entry, private->size)
cleanup_entry(iter, net);
if (private->number > private->initial_entries)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index fbc8d15c7fda..6eae69a698ed 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -206,7 +206,7 @@ config NF_CONNTRACK_FTP
config NF_CONNTRACK_H323
tristate "H.323 protocol support"
- depends on (IPV6 || IPV6=n)
+ depends on IPV6 || IPV6=n
depends on NETFILTER_ADVANCED
help
H.323 is a VoIP signalling protocol from ITU-T. As one of the most
@@ -723,7 +723,7 @@ config NETFILTER_XT_TARGET_HL
config NETFILTER_XT_TARGET_HMARK
tristate '"HMARK" target support'
- depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+ depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on NETFILTER_ADVANCED
---help---
This option adds the "HMARK" target.
@@ -865,7 +865,7 @@ config NETFILTER_XT_TARGET_REDIRECT
config NETFILTER_XT_TARGET_TEE
tristate '"TEE" - packet cloning to alternate destination'
depends on NETFILTER_ADVANCED
- depends on (IPV6 || IPV6=n)
+ depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
---help---
This option adds a "TEE" target with which a packet can be cloned and
@@ -875,8 +875,8 @@ config NETFILTER_XT_TARGET_TPROXY
tristate '"TPROXY" target transparent proxying support'
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
- depends on (IPV6 || IPV6=n)
- depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+ depends on IPV6 || IPV6=n
+ depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
@@ -915,7 +915,7 @@ config NETFILTER_XT_TARGET_SECMARK
config NETFILTER_XT_TARGET_TCPMSS
tristate '"TCPMSS" target support'
- depends on (IPV6 || IPV6=n)
+ depends on IPV6 || IPV6=n
default m if NETFILTER_ADVANCED=n
---help---
This option adds a `TCPMSS' target, which allows you to alter the
@@ -1127,7 +1127,7 @@ config NETFILTER_XT_MATCH_ESP
config NETFILTER_XT_MATCH_HASHLIMIT
tristate '"hashlimit" match support'
- depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+ depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on NETFILTER_ADVANCED
help
This option adds a `hashlimit' match.
@@ -1369,8 +1369,8 @@ config NETFILTER_XT_MATCH_SOCKET
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
depends on !NF_CONNTRACK || NF_CONNTRACK
- depends on (IPV6 || IPV6=n)
- depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
+ depends on IPV6 || IPV6=n
+ depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
help
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 6f024a8a1534..d05e759ed0fa 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -41,7 +41,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
struct mtype *map = set->data;
init_timer(&map->gc);
- map->gc.data = (unsigned long) set;
+ map->gc.data = (unsigned long)set;
map->gc.function = gc;
map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
@@ -144,10 +144,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (ret == IPSET_ADD_FAILED) {
if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(x, set)))
+ ip_set_timeout_expired(ext_timeout(x, set))) {
ret = 0;
- else if (!(flags & IPSET_FLAG_EXIST))
+ } else if (!(flags & IPSET_FLAG_EXIST)) {
+ set_bit(e->id, map->members);
return -IPSET_ERR_EXIST;
+ }
/* Element is re-added, cleanup extensions */
ip_set_ext_destroy(set, x);
}
@@ -165,6 +167,10 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_init_comment(ext_comment(x, set), ext);
if (SET_WITH_SKBINFO(set))
ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
+
+ /* Activate element */
+ set_bit(e->id, map->members);
+
return 0;
}
@@ -203,10 +209,13 @@ mtype_list(const struct ip_set *set,
struct nlattr *adt, *nested;
void *x;
u32 id, first = cb->args[IPSET_CB_ARG0];
+ int ret = 0;
adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
if (!adt)
return -EMSGSIZE;
+ /* Extensions may be replaced */
+ rcu_read_lock();
for (; cb->args[IPSET_CB_ARG0] < map->elements;
cb->args[IPSET_CB_ARG0]++) {
id = cb->args[IPSET_CB_ARG0];
@@ -214,7 +223,7 @@ mtype_list(const struct ip_set *set,
if (!test_bit(id, map->members) ||
(SET_WITH_TIMEOUT(set) &&
#ifdef IP_SET_BITMAP_STORED_TIMEOUT
- mtype_is_filled((const struct mtype_elem *) x) &&
+ mtype_is_filled((const struct mtype_elem *)x) &&
#endif
ip_set_timeout_expired(ext_timeout(x, set))))
continue;
@@ -222,14 +231,16 @@ mtype_list(const struct ip_set *set,
if (!nested) {
if (id == first) {
nla_nest_cancel(skb, adt);
- return -EMSGSIZE;
- } else
- goto nla_put_failure;
+ ret = -EMSGSIZE;
+ goto out;
+ }
+
+ goto nla_put_failure;
}
if (mtype_do_list(skb, map, id, set->dsize))
goto nla_put_failure;
if (ip_set_put_extensions(skb, set, x,
- mtype_is_filled((const struct mtype_elem *) x)))
+ mtype_is_filled((const struct mtype_elem *)x)))
goto nla_put_failure;
ipset_nest_end(skb, nested);
}
@@ -238,29 +249,32 @@ mtype_list(const struct ip_set *set,
/* Set listing finished */
cb->args[IPSET_CB_ARG0] = 0;
- return 0;
+ goto out;
nla_put_failure:
nla_nest_cancel(skb, nested);
if (unlikely(id == first)) {
cb->args[IPSET_CB_ARG0] = 0;
- return -EMSGSIZE;
+ ret = -EMSGSIZE;
}
ipset_nest_end(skb, adt);
- return 0;
+out:
+ rcu_read_unlock();
+ return ret;
}
static void
mtype_gc(unsigned long ul_set)
{
- struct ip_set *set = (struct ip_set *) ul_set;
+ struct ip_set *set = (struct ip_set *)ul_set;
struct mtype *map = set->data;
void *x;
u32 id;
/* We run parallel with other readers (test element)
- * but adding/deleting new entries is locked out */
- read_lock_bh(&set->lock);
+ * but adding/deleting new entries is locked out
+ */
+ spin_lock_bh(&set->lock);
for (id = 0; id < map->elements; id++)
if (mtype_gc_test(id, map, set->dsize)) {
x = get_ext(set, map, id);
@@ -269,7 +283,7 @@ mtype_gc(unsigned long ul_set)
ip_set_ext_destroy(set, x);
}
}
- read_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 2fe6de46f6d0..64a564334418 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -59,7 +59,7 @@ struct bitmap_ip_adt_elem {
static inline u32
ip_to_id(const struct bitmap_ip *m, u32 ip)
{
- return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts;
+ return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts;
}
/* Common functions */
@@ -81,7 +81,7 @@ static inline int
bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
u32 flags, size_t dsize)
{
- return !!test_and_set_bit(e->id, map->members);
+ return !!test_bit(e->id, map->members);
}
static inline int
@@ -138,18 +138,12 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret = 0;
- if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (unlikely(!tb[IPSET_ATTR_IP]))
+ return -IPSET_ERR_PROTOCOL;
+
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
if (ret)
return ret;
@@ -181,8 +175,9 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
- } else
+ } else {
ip_to = ip;
+ }
if (ip_to > map->last_ip)
return -IPSET_ERR_BITMAP_RANGE;
@@ -193,8 +188,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -284,8 +279,9 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (cidr >= HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(first_ip, last_ip, cidr);
- } else
+ } else {
return -IPSET_ERR_PROTOCOL;
+ }
if (tb[IPSET_ATTR_NETMASK]) {
netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
@@ -382,6 +378,7 @@ bitmap_ip_init(void)
static void __exit
bitmap_ip_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&bitmap_ip_type);
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index eb188561d65f..1430535118fb 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -90,7 +90,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
return 0;
elem = get_elem(map->extensions, e->id, dsize);
if (elem->filled == MAC_FILLED)
- return e->ether == NULL ||
+ return !e->ether ||
ether_addr_equal(e->ether, elem->ether);
/* Trigger kernel to fill out the ethernet address */
return -EAGAIN;
@@ -131,7 +131,8 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
/* If MAC is unset yet, we store plain timeout value
* because the timer is not activated yet
* and we can reuse it later when MAC is filled out,
- * possibly by the kernel */
+ * possibly by the kernel
+ */
if (e->ether)
ip_set_timeout_set(timeout, t);
else
@@ -147,28 +148,35 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
struct bitmap_ipmac_elem *elem;
elem = get_elem(map->extensions, e->id, dsize);
- if (test_and_set_bit(e->id, map->members)) {
+ if (test_bit(e->id, map->members)) {
if (elem->filled == MAC_FILLED) {
- if (e->ether && (flags & IPSET_FLAG_EXIST))
- memcpy(elem->ether, e->ether, ETH_ALEN);
+ if (e->ether &&
+ (flags & IPSET_FLAG_EXIST) &&
+ !ether_addr_equal(e->ether, elem->ether)) {
+ /* memcpy isn't atomic */
+ clear_bit(e->id, map->members);
+ smp_mb__after_atomic();
+ ether_addr_copy(elem->ether, e->ether);
+ }
return IPSET_ADD_FAILED;
} else if (!e->ether)
/* Already added without ethernet address */
return IPSET_ADD_FAILED;
/* Fill the MAC address and trigger the timer activation */
- memcpy(elem->ether, e->ether, ETH_ALEN);
+ clear_bit(e->id, map->members);
+ smp_mb__after_atomic();
+ ether_addr_copy(elem->ether, e->ether);
elem->filled = MAC_FILLED;
return IPSET_ADD_START_STORED_TIMEOUT;
} else if (e->ether) {
/* We can store MAC too */
- memcpy(elem->ether, e->ether, ETH_ALEN);
+ ether_addr_copy(elem->ether, e->ether);
elem->filled = MAC_FILLED;
return 0;
- } else {
- elem->filled = MAC_UNSET;
- /* MAC is not stored yet, don't start timer */
- return IPSET_ADD_STORE_PLAIN_TIMEOUT;
}
+ elem->filled = MAC_UNSET;
+ /* MAC is not stored yet, don't start timer */
+ return IPSET_ADD_STORE_PLAIN_TIMEOUT;
}
static inline int
@@ -239,18 +247,12 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
u32 ip = 0;
int ret = 0;
- if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (unlikely(!tb[IPSET_ATTR_IP]))
+ return -IPSET_ERR_PROTOCOL;
+
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
if (ret)
return ret;
@@ -350,8 +352,9 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (cidr >= HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(first_ip, last_ip, cidr);
- } else
+ } else {
return -IPSET_ERR_PROTOCOL;
+ }
elements = (u64)last_ip - first_ip + 1;
@@ -419,6 +422,7 @@ bitmap_ipmac_init(void)
static void __exit
bitmap_ipmac_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&bitmap_ipmac_type);
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 898edb693b3f..5338ccd5da46 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -73,7 +73,7 @@ static inline int
bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
struct bitmap_port *map, u32 flags, size_t dsize)
{
- return !!test_and_set_bit(e->id, map->members);
+ return !!test_bit(e->id, map->members);
}
static inline int
@@ -136,19 +136,13 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
u16 port_to;
int ret = 0;
- if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
+ return -IPSET_ERR_PROTOCOL;
+
port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
if (port < map->first_port || port > map->last_port)
return -IPSET_ERR_BITMAP_RANGE;
@@ -168,8 +162,9 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
if (port < map->first_port)
return -IPSET_ERR_BITMAP_RANGE;
}
- } else
+ } else {
port_to = port;
+ }
if (port_to > map->last_port)
return -IPSET_ERR_BITMAP_RANGE;
@@ -180,8 +175,8 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -312,6 +307,7 @@ bitmap_port_init(void)
static void __exit
bitmap_port_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&bitmap_port_type);
}
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 475e4960a164..338b4047776f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -32,8 +32,10 @@ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */
struct ip_set_net {
struct ip_set * __rcu *ip_set_list; /* all individual sets */
ip_set_id_t ip_set_max; /* max number of sets */
- int is_deleted; /* deleted by ip_set_net_exit */
+ bool is_deleted; /* deleted by ip_set_net_exit */
+ bool is_destroyed; /* all sets are destroyed */
};
+
static int ip_set_net_id __read_mostly;
static inline struct ip_set_net *ip_set_pernet(struct net *net)
@@ -59,8 +61,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
#define ip_set(inst, id) \
ip_set_dereference((inst)->ip_set_list)[id]
-/*
- * The set types are implemented in modules and registered set types
+/* The set types are implemented in modules and registered set types
* can be found in ip_set_type_list. Adding/deleting types is
* serialized by ip_set_type_mutex.
*/
@@ -130,7 +131,8 @@ __find_set_type_get(const char *name, u8 family, u8 revision,
goto unlock;
}
/* Make sure the type is already loaded
- * but we don't support the revision */
+ * but we don't support the revision
+ */
list_for_each_entry_rcu(type, &ip_set_type_list, list)
if (STRNCMP(type->name, name)) {
err = -IPSET_ERR_FIND_TYPE;
@@ -208,15 +210,15 @@ ip_set_type_register(struct ip_set_type *type)
pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
type->name, family_name(type->family),
type->revision_min);
- ret = -EINVAL;
- goto unlock;
+ ip_set_type_unlock();
+ return -EINVAL;
}
list_add_rcu(&type->list, &ip_set_type_list);
pr_debug("type %s, family %s, revision %u:%u registered.\n",
type->name, family_name(type->family),
type->revision_min, type->revision_max);
-unlock:
ip_set_type_unlock();
+
return ret;
}
EXPORT_SYMBOL_GPL(ip_set_type_register);
@@ -230,12 +232,12 @@ ip_set_type_unregister(struct ip_set_type *type)
pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
type->name, family_name(type->family),
type->revision_min);
- goto unlock;
+ ip_set_type_unlock();
+ return;
}
list_del_rcu(&type->list);
pr_debug("type %s, family %s with revision min %u unregistered.\n",
type->name, family_name(type->family), type->revision_min);
-unlock:
ip_set_type_unlock();
synchronize_rcu();
@@ -289,7 +291,7 @@ static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
int
ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
{
- struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
+ struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
if (unlikely(!flag_nested(nla)))
return -IPSET_ERR_PROTOCOL;
@@ -306,7 +308,7 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
int
ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
{
- struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
+ struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
if (unlikely(!flag_nested(nla)))
return -IPSET_ERR_PROTOCOL;
@@ -317,7 +319,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
return -IPSET_ERR_PROTOCOL;
memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
- sizeof(struct in6_addr));
+ sizeof(struct in6_addr));
return 0;
}
EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
@@ -389,13 +391,22 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext *ext)
{
u64 fullmark;
+
+ if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ return -IPSET_ERR_PROTOCOL;
+
if (tb[IPSET_ATTR_TIMEOUT]) {
- if (!(set->extensions & IPSET_EXT_TIMEOUT))
+ if (!SET_WITH_TIMEOUT(set))
return -IPSET_ERR_TIMEOUT;
ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
}
if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
- if (!(set->extensions & IPSET_EXT_COUNTER))
+ if (!SET_WITH_COUNTER(set))
return -IPSET_ERR_COUNTER;
if (tb[IPSET_ATTR_BYTES])
ext->bytes = be64_to_cpu(nla_get_be64(
@@ -405,25 +416,25 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
tb[IPSET_ATTR_PACKETS]));
}
if (tb[IPSET_ATTR_COMMENT]) {
- if (!(set->extensions & IPSET_EXT_COMMENT))
+ if (!SET_WITH_COMMENT(set))
return -IPSET_ERR_COMMENT;
ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
}
if (tb[IPSET_ATTR_SKBMARK]) {
- if (!(set->extensions & IPSET_EXT_SKBINFO))
+ if (!SET_WITH_SKBINFO(set))
return -IPSET_ERR_SKBINFO;
fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
ext->skbmark = fullmark >> 32;
ext->skbmarkmask = fullmark & 0xffffffff;
}
if (tb[IPSET_ATTR_SKBPRIO]) {
- if (!(set->extensions & IPSET_EXT_SKBINFO))
+ if (!SET_WITH_SKBINFO(set))
return -IPSET_ERR_SKBINFO;
ext->skbprio = be32_to_cpu(nla_get_be32(
tb[IPSET_ATTR_SKBPRIO]));
}
if (tb[IPSET_ATTR_SKBQUEUE]) {
- if (!(set->extensions & IPSET_EXT_SKBINFO))
+ if (!SET_WITH_SKBINFO(set))
return -IPSET_ERR_SKBINFO;
ext->skbqueue = be16_to_cpu(nla_get_be16(
tb[IPSET_ATTR_SKBQUEUE]));
@@ -457,8 +468,7 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
}
EXPORT_SYMBOL_GPL(ip_set_put_extensions);
-/*
- * Creating/destroying/renaming/swapping affect the existence and
+/* Creating/destroying/renaming/swapping affect the existence and
* the properties of a set. All of these can be executed from userspace
* only and serialized by the nfnl mutex indirectly from nfnetlink.
*
@@ -485,8 +495,7 @@ __ip_set_put(struct ip_set *set)
write_unlock_bh(&ip_set_ref_lock);
}
-/*
- * Add, del and test set entries from kernel.
+/* Add, del and test set entries from kernel.
*
* The set behind the index must exist and must be referenced
* so it can't be destroyed (or changed) under our foot.
@@ -514,23 +523,23 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
dev_net(par->in ? par->in : par->out), index);
int ret = 0;
- BUG_ON(set == NULL);
+ BUG_ON(!set);
pr_debug("set %s, index %u\n", set->name, index);
if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return 0;
- read_lock_bh(&set->lock);
+ rcu_read_lock_bh();
ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
- read_unlock_bh(&set->lock);
+ rcu_read_unlock_bh();
if (ret == -EAGAIN) {
/* Type requests element to be completed */
pr_debug("element must be completed, ADD is triggered\n");
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
set->variant->kadt(set, skb, par, IPSET_ADD, opt);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
ret = 1;
} else {
/* --return-nomatch: invert matched element */
@@ -553,16 +562,16 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
dev_net(par->in ? par->in : par->out), index);
int ret;
- BUG_ON(set == NULL);
+ BUG_ON(!set);
pr_debug("set %s, index %u\n", set->name, index);
if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
return ret;
}
@@ -576,23 +585,22 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
dev_net(par->in ? par->in : par->out), index);
int ret = 0;
- BUG_ON(set == NULL);
+ BUG_ON(!set);
pr_debug("set %s, index %u\n", set->name, index);
if (opt->dim < set->type->dimension ||
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
return ret;
}
EXPORT_SYMBOL_GPL(ip_set_del);
-/*
- * Find set by name, reference it once. The reference makes sure the
+/* Find set by name, reference it once. The reference makes sure the
* thing pointed to, does not go away under our feet.
*
*/
@@ -606,7 +614,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
rcu_read_lock();
for (i = 0; i < inst->ip_set_max; i++) {
s = rcu_dereference(inst->ip_set_list)[i];
- if (s != NULL && STRNCMP(s->name, name)) {
+ if (s && STRNCMP(s->name, name)) {
__ip_set_get(s);
index = i;
*set = s;
@@ -619,8 +627,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
}
EXPORT_SYMBOL_GPL(ip_set_get_byname);
-/*
- * If the given set pointer points to a valid set, decrement
+/* If the given set pointer points to a valid set, decrement
* reference count by 1. The caller shall not assume the index
* to be valid, after calling this function.
*
@@ -633,7 +640,7 @@ __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
rcu_read_lock();
set = rcu_dereference(inst->ip_set_list)[index];
- if (set != NULL)
+ if (set)
__ip_set_put(set);
rcu_read_unlock();
}
@@ -647,8 +654,7 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
}
EXPORT_SYMBOL_GPL(ip_set_put_byindex);
-/*
- * Get the name of a set behind a set index.
+/* Get the name of a set behind a set index.
* We assume the set is referenced, so it does exist and
* can't be destroyed. The set cannot be renamed due to
* the referencing either.
@@ -659,7 +665,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index)
{
const struct ip_set *set = ip_set_rcu_get(net, index);
- BUG_ON(set == NULL);
+ BUG_ON(!set);
BUG_ON(set->ref == 0);
/* Referenced, so it's safe */
@@ -667,13 +673,11 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index)
}
EXPORT_SYMBOL_GPL(ip_set_name_byindex);
-/*
- * Routines to call by external subsystems, which do not
+/* Routines to call by external subsystems, which do not
* call nfnl_lock for us.
*/
-/*
- * Find set by index, reference it once. The reference makes sure the
+/* Find set by index, reference it once. The reference makes sure the
* thing pointed to, does not go away under our feet.
*
* The nfnl mutex is used in the function.
@@ -699,8 +703,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
}
EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
-/*
- * If the given set pointer points to a valid set, decrement
+/* If the given set pointer points to a valid set, decrement
* reference count by 1. The caller shall not assume the index
* to be valid, after calling this function.
*
@@ -715,15 +718,14 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index)
nfnl_lock(NFNL_SUBSYS_IPSET);
if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
set = ip_set(inst, index);
- if (set != NULL)
+ if (set)
__ip_set_put(set);
}
nfnl_unlock(NFNL_SUBSYS_IPSET);
}
EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
-/*
- * Communication protocol with userspace over netlink.
+/* Communication protocol with userspace over netlink.
*
* The commands are serialized by the nfnl mutex.
*/
@@ -750,7 +752,7 @@ start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ if (!nlh)
return NULL;
nfmsg = nlmsg_data(nlh);
@@ -783,7 +785,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
*id = IPSET_INVALID_ID;
for (i = 0; i < inst->ip_set_max; i++) {
set = ip_set(inst, i);
- if (set != NULL && STRNCMP(set->name, name)) {
+ if (set && STRNCMP(set->name, name)) {
*id = i;
break;
}
@@ -809,7 +811,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
*index = IPSET_INVALID_ID;
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
- if (s == NULL) {
+ if (!s) {
if (*index == IPSET_INVALID_ID)
*index = i;
} else if (STRNCMP(name, s->name)) {
@@ -841,18 +843,18 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set, *clash = NULL;
ip_set_id_t index = IPSET_INVALID_ID;
- struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
+ struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {};
const char *name, *typename;
u8 family, revision;
u32 flags = flag_exist(nlh);
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL ||
- attr[IPSET_ATTR_TYPENAME] == NULL ||
- attr[IPSET_ATTR_REVISION] == NULL ||
- attr[IPSET_ATTR_FAMILY] == NULL ||
- (attr[IPSET_ATTR_DATA] != NULL &&
+ !attr[IPSET_ATTR_SETNAME] ||
+ !attr[IPSET_ATTR_TYPENAME] ||
+ !attr[IPSET_ATTR_REVISION] ||
+ !attr[IPSET_ATTR_FAMILY] ||
+ (attr[IPSET_ATTR_DATA] &&
!flag_nested(attr[IPSET_ATTR_DATA]))))
return -IPSET_ERR_PROTOCOL;
@@ -863,33 +865,29 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
name, typename, family_name(family), revision);
- /*
- * First, and without any locks, allocate and initialize
+ /* First, and without any locks, allocate and initialize
* a normal base set structure.
*/
- set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
+ set = kzalloc(sizeof(*set), GFP_KERNEL);
if (!set)
return -ENOMEM;
- rwlock_init(&set->lock);
+ spin_lock_init(&set->lock);
strlcpy(set->name, name, IPSET_MAXNAMELEN);
set->family = family;
set->revision = revision;
- /*
- * Next, check that we know the type, and take
+ /* Next, check that we know the type, and take
* a reference on the type, to make sure it stays available
* while constructing our new set.
*
* After referencing the type, we try to create the type
* specific part of the set without holding any locks.
*/
- ret = find_set_type_get(typename, family, revision, &(set->type));
+ ret = find_set_type_get(typename, family, revision, &set->type);
if (ret)
goto out;
- /*
- * Without holding any locks, create private part.
- */
+ /* Without holding any locks, create private part. */
if (attr[IPSET_ATTR_DATA] &&
nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
set->type->create_policy)) {
@@ -903,8 +901,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
/* BTW, ret==0 here. */
- /*
- * Here, we have a valid, constructed set and we are protected
+ /* Here, we have a valid, constructed set and we are protected
* by the nfnl mutex. Find the first free index in ip_set_list
* and check clashing.
*/
@@ -927,7 +924,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
/* Wraparound */
goto cleanup;
- list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL);
+ list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
if (!list)
goto cleanup;
/* nfnl mutex is held, both lists are valid */
@@ -941,12 +938,11 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
inst->ip_set_max = i;
kfree(tmp);
ret = 0;
- } else if (ret)
+ } else if (ret) {
goto cleanup;
+ }
- /*
- * Finally! Add our shiny new set to the list, and be done.
- */
+ /* Finally! Add our shiny new set to the list, and be done. */
pr_debug("create: '%s' created with index %u!\n", set->name, index);
ip_set(inst, index) = set;
@@ -971,12 +967,9 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
};
static void
-ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index)
+ip_set_destroy_set(struct ip_set *set)
{
- struct ip_set *set = ip_set(inst, index);
-
pr_debug("set: %s\n", set->name);
- ip_set(inst, index) = NULL;
/* Must call it without holding any lock */
set->variant->destroy(set);
@@ -1011,30 +1004,36 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
if (!attr[IPSET_ATTR_SETNAME]) {
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
- if (s != NULL && s->ref) {
+ if (s && s->ref) {
ret = -IPSET_ERR_BUSY;
goto out;
}
}
+ inst->is_destroyed = true;
read_unlock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
- if (s != NULL)
- ip_set_destroy_set(inst, i);
+ if (s) {
+ ip_set(inst, i) = NULL;
+ ip_set_destroy_set(s);
+ }
}
+ /* Modified by ip_set_destroy() only, which is serialized */
+ inst->is_destroyed = false;
} else {
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
- if (s == NULL) {
+ if (!s) {
ret = -ENOENT;
goto out;
} else if (s->ref) {
ret = -IPSET_ERR_BUSY;
goto out;
}
+ ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
- ip_set_destroy_set(inst, i);
+ ip_set_destroy_set(s);
}
return 0;
out:
@@ -1049,9 +1048,9 @@ ip_set_flush_set(struct ip_set *set)
{
pr_debug("set: %s\n", set->name);
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
set->variant->flush(set);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
}
static int
@@ -1069,12 +1068,12 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
if (!attr[IPSET_ATTR_SETNAME]) {
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
- if (s != NULL)
+ if (s)
ip_set_flush_set(s);
}
} else {
s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
- if (s == NULL)
+ if (!s)
return -ENOENT;
ip_set_flush_set(s);
@@ -1106,12 +1105,12 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL ||
- attr[IPSET_ATTR_SETNAME2] == NULL))
+ !attr[IPSET_ATTR_SETNAME] ||
+ !attr[IPSET_ATTR_SETNAME2]))
return -IPSET_ERR_PROTOCOL;
set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
- if (set == NULL)
+ if (!set)
return -ENOENT;
read_lock_bh(&ip_set_ref_lock);
@@ -1123,7 +1122,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
- if (s != NULL && STRNCMP(s->name, name2)) {
+ if (s && STRNCMP(s->name, name2)) {
ret = -IPSET_ERR_EXIST_SETNAME2;
goto out;
}
@@ -1155,23 +1154,24 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
char from_name[IPSET_MAXNAMELEN];
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL ||
- attr[IPSET_ATTR_SETNAME2] == NULL))
+ !attr[IPSET_ATTR_SETNAME] ||
+ !attr[IPSET_ATTR_SETNAME2]))
return -IPSET_ERR_PROTOCOL;
from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&from_id);
- if (from == NULL)
+ if (!from)
return -ENOENT;
to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
&to_id);
- if (to == NULL)
+ if (!to)
return -IPSET_ERR_EXIST_SETNAME2;
/* Features must not change.
- * Not an artificial restriction anymore, as we must prevent
- * possible loops created by swapping in setlist type of sets. */
+ * Not an artifical restriction anymore, as we must prevent
+ * possible loops created by swapping in setlist type of sets.
+ */
if (!(from->type->features == to->type->features &&
from->family == to->family))
return -IPSET_ERR_TYPE_MISMATCH;
@@ -1202,12 +1202,16 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
static int
ip_set_dump_done(struct netlink_callback *cb)
{
- struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET];
if (cb->args[IPSET_CB_ARG0]) {
- pr_debug("release set %s\n",
- ip_set(inst, cb->args[IPSET_CB_INDEX])->name);
- __ip_set_put_byindex(inst,
- (ip_set_id_t) cb->args[IPSET_CB_INDEX]);
+ struct ip_set_net *inst =
+ (struct ip_set_net *)cb->args[IPSET_CB_NET];
+ ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
+ struct ip_set *set = ip_set(inst, index);
+
+ if (set->variant->uref)
+ set->variant->uref(set, cb, false);
+ pr_debug("release set %s\n", set->name);
+ __ip_set_put_byindex(inst, index);
}
return 0;
}
@@ -1229,7 +1233,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
{
struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
- struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
+ struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
struct nlattr *attr = (void *)nlh + min_len;
u32 dump_type;
ip_set_id_t index;
@@ -1238,27 +1242,23 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
nla_parse(cda, IPSET_ATTR_CMD_MAX,
attr, nlh->nlmsg_len - min_len, ip_set_setname_policy);
- /* cb->args[IPSET_CB_NET]: net namespace
- * [IPSET_CB_DUMP]: dump single set/all sets
- * [IPSET_CB_INDEX]: set index
- * [IPSET_CB_ARG0]: type specific
- */
-
if (cda[IPSET_ATTR_SETNAME]) {
struct ip_set *set;
set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
&index);
- if (set == NULL)
+ if (!set)
return -ENOENT;
dump_type = DUMP_ONE;
cb->args[IPSET_CB_INDEX] = index;
- } else
+ } else {
dump_type = DUMP_ALL;
+ }
if (cda[IPSET_ATTR_FLAGS]) {
u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
+
dump_type |= (f << 16);
}
cb->args[IPSET_CB_NET] = (unsigned long)inst;
@@ -1276,6 +1276,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk));
u32 dump_type, dump_flags;
+ bool is_destroyed;
int ret = 0;
if (!cb->args[IPSET_CB_DUMP]) {
@@ -1283,7 +1284,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
if (ret < 0) {
nlh = nlmsg_hdr(cb->skb);
/* We have to create and send the error message
- * manually :-( */
+ * manually :-(
+ */
if (nlh->nlmsg_flags & NLM_F_ACK)
netlink_ack(cb->skb, nlh, ret);
return ret;
@@ -1301,13 +1303,21 @@ dump_last:
pr_debug("dump type, flag: %u %u index: %ld\n",
dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
- index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
+ index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
+ write_lock_bh(&ip_set_ref_lock);
set = ip_set(inst, index);
- if (set == NULL) {
+ is_destroyed = inst->is_destroyed;
+ if (!set || is_destroyed) {
+ write_unlock_bh(&ip_set_ref_lock);
if (dump_type == DUMP_ONE) {
ret = -ENOENT;
goto out;
}
+ if (is_destroyed) {
+ /* All sets are just being destroyed */
+ ret = 0;
+ goto out;
+ }
continue;
}
/* When dumping all sets, we must dump "sorted"
@@ -1315,14 +1325,17 @@ dump_last:
*/
if (dump_type != DUMP_ONE &&
((dump_type == DUMP_ALL) ==
- !!(set->type->features & IPSET_DUMP_LAST)))
+ !!(set->type->features & IPSET_DUMP_LAST))) {
+ write_unlock_bh(&ip_set_ref_lock);
continue;
+ }
pr_debug("List set: %s\n", set->name);
if (!cb->args[IPSET_CB_ARG0]) {
/* Start listing: make sure set won't be destroyed */
pr_debug("reference set\n");
- __ip_set_get(set);
+ set->ref++;
}
+ write_unlock_bh(&ip_set_ref_lock);
nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, flags,
IPSET_CMD_LIST);
@@ -1350,11 +1363,13 @@ dump_last:
goto release_refcount;
if (dump_flags & IPSET_FLAG_LIST_HEADER)
goto next_set;
+ if (set->variant->uref)
+ set->variant->uref(set, cb, true);
/* Fall through and add elements */
default:
- read_lock_bh(&set->lock);
+ rcu_read_lock_bh();
ret = set->variant->list(set, skb, cb);
- read_unlock_bh(&set->lock);
+ rcu_read_unlock_bh();
if (!cb->args[IPSET_CB_ARG0])
/* Set is done, proceed with next one */
goto next_set;
@@ -1366,6 +1381,8 @@ dump_last:
dump_type = DUMP_LAST;
cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16);
cb->args[IPSET_CB_INDEX] = 0;
+ if (set && set->variant->uref)
+ set->variant->uref(set, cb, false);
goto dump_last;
}
goto out;
@@ -1380,7 +1397,10 @@ next_set:
release_refcount:
/* If there was an error or set is done, release set */
if (ret || !cb->args[IPSET_CB_ARG0]) {
- pr_debug("release set %s\n", ip_set(inst, index)->name);
+ set = ip_set(inst, index);
+ if (set->variant->uref)
+ set->variant->uref(set, cb, false);
+ pr_debug("release set %s\n", set->name);
__ip_set_put_byindex(inst, index);
cb->args[IPSET_CB_ARG0] = 0;
}
@@ -1432,9 +1452,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
do {
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
retried = true;
} while (ret == -EAGAIN &&
set->variant->resize &&
@@ -1450,12 +1470,12 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
size_t payload = min(SIZE_MAX,
sizeof(*errmsg) + nlmsg_len(nlh));
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
- struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
+ struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
struct nlattr *cmdattr;
u32 *errline;
skb2 = nlmsg_new(payload, GFP_KERNEL);
- if (skb2 == NULL)
+ if (!skb2)
return -ENOMEM;
rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
@@ -1472,7 +1492,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
*errline = lineno;
- netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
/* Signal netlink not to send its ACK/errmsg. */
return -EINTR;
}
@@ -1487,25 +1508,25 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
{
struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
struct ip_set *set;
- struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+ struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
const struct nlattr *nla;
u32 flags = flag_exist(nlh);
bool use_lineno;
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL ||
+ !attr[IPSET_ATTR_SETNAME] ||
!((attr[IPSET_ATTR_DATA] != NULL) ^
(attr[IPSET_ATTR_ADT] != NULL)) ||
- (attr[IPSET_ATTR_DATA] != NULL &&
+ (attr[IPSET_ATTR_DATA] &&
!flag_nested(attr[IPSET_ATTR_DATA])) ||
- (attr[IPSET_ATTR_ADT] != NULL &&
+ (attr[IPSET_ATTR_ADT] &&
(!flag_nested(attr[IPSET_ATTR_ADT]) ||
- attr[IPSET_ATTR_LINENO] == NULL))))
+ !attr[IPSET_ATTR_LINENO]))))
return -IPSET_ERR_PROTOCOL;
set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
- if (set == NULL)
+ if (!set)
return -ENOENT;
use_lineno = !!attr[IPSET_ATTR_LINENO];
@@ -1542,25 +1563,25 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
{
struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
struct ip_set *set;
- struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+ struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
const struct nlattr *nla;
u32 flags = flag_exist(nlh);
bool use_lineno;
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL ||
+ !attr[IPSET_ATTR_SETNAME] ||
!((attr[IPSET_ATTR_DATA] != NULL) ^
(attr[IPSET_ATTR_ADT] != NULL)) ||
- (attr[IPSET_ATTR_DATA] != NULL &&
+ (attr[IPSET_ATTR_DATA] &&
!flag_nested(attr[IPSET_ATTR_DATA])) ||
- (attr[IPSET_ATTR_ADT] != NULL &&
+ (attr[IPSET_ATTR_ADT] &&
(!flag_nested(attr[IPSET_ATTR_ADT]) ||
- attr[IPSET_ATTR_LINENO] == NULL))))
+ !attr[IPSET_ATTR_LINENO]))))
return -IPSET_ERR_PROTOCOL;
set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
- if (set == NULL)
+ if (!set)
return -ENOENT;
use_lineno = !!attr[IPSET_ATTR_LINENO];
@@ -1597,26 +1618,26 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
{
struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
struct ip_set *set;
- struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+ struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL ||
- attr[IPSET_ATTR_DATA] == NULL ||
+ !attr[IPSET_ATTR_SETNAME] ||
+ !attr[IPSET_ATTR_DATA] ||
!flag_nested(attr[IPSET_ATTR_DATA])))
return -IPSET_ERR_PROTOCOL;
set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
- if (set == NULL)
+ if (!set)
return -ENOENT;
if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
set->type->adt_policy))
return -IPSET_ERR_PROTOCOL;
- read_lock_bh(&set->lock);
+ rcu_read_lock_bh();
ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
- read_unlock_bh(&set->lock);
+ rcu_read_unlock_bh();
/* Userspace can't trigger element to be re-added */
if (ret == -EAGAIN)
ret = 1;
@@ -1638,15 +1659,15 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_SETNAME] == NULL))
+ !attr[IPSET_ATTR_SETNAME]))
return -IPSET_ERR_PROTOCOL;
set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
- if (set == NULL)
+ if (!set)
return -ENOENT;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL)
+ if (!skb2)
return -ENOMEM;
nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1695,8 +1716,8 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
int ret = 0;
if (unlikely(protocol_failed(attr) ||
- attr[IPSET_ATTR_TYPENAME] == NULL ||
- attr[IPSET_ATTR_FAMILY] == NULL))
+ !attr[IPSET_ATTR_TYPENAME] ||
+ !attr[IPSET_ATTR_FAMILY]))
return -IPSET_ERR_PROTOCOL;
family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
@@ -1706,7 +1727,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
return ret;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL)
+ if (!skb2)
return -ENOMEM;
nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1751,11 +1772,11 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
struct nlmsghdr *nlh2;
int ret = 0;
- if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
+ if (unlikely(!attr[IPSET_ATTR_PROTOCOL]))
return -IPSET_ERR_PROTOCOL;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL)
+ if (!skb2)
return -ENOMEM;
nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1883,7 +1904,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
ret = -EFAULT;
goto done;
}
- op = (unsigned int *) data;
+ op = (unsigned int *)data;
if (*op < IP_SET_OP_VERSION) {
/* Check the version at the beginning of operations */
@@ -1995,10 +2016,11 @@ ip_set_net_init(struct net *net)
if (inst->ip_set_max >= IPSET_INVALID_ID)
inst->ip_set_max = IPSET_INVALID_ID - 1;
- list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL);
+ list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
if (!list)
return -ENOMEM;
- inst->is_deleted = 0;
+ inst->is_deleted = false;
+ inst->is_destroyed = false;
rcu_assign_pointer(inst->ip_set_list, list);
return 0;
}
@@ -2011,12 +2033,14 @@ ip_set_net_exit(struct net *net)
struct ip_set *set = NULL;
ip_set_id_t i;
- inst->is_deleted = 1; /* flag for ip_set_nfnl_put */
+ inst->is_deleted = true; /* flag for ip_set_nfnl_put */
for (i = 0; i < inst->ip_set_max; i++) {
set = ip_set(inst, i);
- if (set != NULL)
- ip_set_destroy_set(inst, i);
+ if (set) {
+ ip_set(inst, i) = NULL;
+ ip_set_destroy_set(set);
+ }
}
kfree(rcu_dereference_protected(inst->ip_set_list, 1));
}
@@ -2028,11 +2052,11 @@ static struct pernet_operations ip_set_net_ops = {
.size = sizeof(struct ip_set_net)
};
-
static int __init
ip_set_init(void)
{
int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+
if (ret != 0) {
pr_err("ip_set: cannot register with nfnetlink.\n");
return ret;
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 1981f021cc60..42c3e3ba1b94 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -30,7 +30,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
const struct tcphdr *th;
th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph);
- if (th == NULL)
+ if (!th)
/* No choice either */
return false;
@@ -42,7 +42,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
const sctp_sctphdr_t *sh;
sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
- if (sh == NULL)
+ if (!sh)
/* No choice either */
return false;
@@ -55,7 +55,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
const struct udphdr *uh;
uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph);
- if (uh == NULL)
+ if (!uh)
/* No choice either */
return false;
@@ -67,7 +67,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
const struct icmphdr *ic;
ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
- if (ic == NULL)
+ if (!ic)
return false;
*port = (__force __be16)htons((ic->type << 8) | ic->code);
@@ -78,7 +78,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
const struct icmp6hdr *ic;
ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
- if (ic == NULL)
+ if (!ic)
return false;
*port = (__force __be16)
@@ -116,7 +116,8 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
return false;
default:
/* Other protocols doesn't have ports,
- so we can match fragments */
+ * so we can match fragments.
+ */
*proto = protocol;
return true;
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 7952869c8023..afe905c208af 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -10,19 +10,19 @@
#include <linux/rcupdate.h>
#include <linux/jhash.h>
+#include <linux/types.h>
#include <linux/netfilter/ipset/ip_set_timeout.h>
-#ifndef rcu_dereference_bh
-#define rcu_dereference_bh(p) rcu_dereference(p)
-#endif
+
+#define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c)
+#define ipset_dereference_protected(p, set) \
+ __ipset_dereference_protected(p, spin_is_locked(&(set)->lock))
#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1)
/* Hashing which uses arrays to resolve clashing. The hash table is resized
* (doubled) when searching becomes too long.
* Internally jhash is used with the assumption that the size of the
- * stored data is a multiple of sizeof(u32). If storage supports timeout,
- * the timeout field must be the last one in the data structure - that field
- * is ignored when computing the hash key.
+ * stored data is a multiple of sizeof(u32).
*
* Readers and resizing
*
@@ -35,7 +35,9 @@
/* Number of elements to store in an initial array block */
#define AHASH_INIT_SIZE 4
/* Max number of elements to store in an array block */
-#define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE)
+#define AHASH_MAX_SIZE (3 * AHASH_INIT_SIZE)
+/* Max muber of elements in the array block when tuned */
+#define AHASH_MAX_TUNED 64
/* Max number of elements can be tuned */
#ifdef IP_SET_HASH_WITH_MULTI
@@ -53,8 +55,9 @@ tune_ahash_max(u8 curr, u32 multi)
/* Currently, at listing one hash bucket must fit into a message.
* Therefore we have a hard limit here.
*/
- return n > curr && n <= 64 ? n : curr;
+ return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
}
+
#define TUNE_AHASH_MAX(h, multi) \
((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
#else
@@ -64,18 +67,23 @@ tune_ahash_max(u8 curr, u32 multi)
/* A hash bucket */
struct hbucket {
- void *value; /* the array of the values */
+ struct rcu_head rcu; /* for call_rcu_bh */
+ /* Which positions are used in the array */
+ DECLARE_BITMAP(used, AHASH_MAX_TUNED);
u8 size; /* size of the array */
u8 pos; /* position of the first free entry */
-};
+ unsigned char value[0]; /* the array of the values */
+} __attribute__ ((aligned));
/* The hash table: the table size stored here in order to make resizing easy */
struct htable {
+ atomic_t ref; /* References for resizing */
+ atomic_t uref; /* References for dumping */
u8 htable_bits; /* size of hash table == 2^htable_bits */
- struct hbucket bucket[0]; /* hashtable buckets */
+ struct hbucket __rcu *bucket[0]; /* hashtable buckets */
};
-#define hbucket(h, i) (&((h)->bucket[i]))
+#define hbucket(h, i) ((h)->bucket[i])
#ifndef IPSET_NET_COUNT
#define IPSET_NET_COUNT 1
@@ -83,8 +91,8 @@ struct htable {
/* Book-keeping of the prefixes added to the set */
struct net_prefixes {
- u32 nets[IPSET_NET_COUNT]; /* number of elements per cidr */
- u8 cidr[IPSET_NET_COUNT]; /* the different cidr values in the set */
+ u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */
+ u8 cidr[IPSET_NET_COUNT]; /* the cidr value */
};
/* Compute the hash table size */
@@ -97,11 +105,11 @@ htable_size(u8 hbits)
if (hbits > 31)
return 0;
hsize = jhash_size(hbits);
- if ((((size_t)-1) - sizeof(struct htable))/sizeof(struct hbucket)
+ if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
< hsize)
return 0;
- return hsize * sizeof(struct hbucket) + sizeof(struct htable);
+ return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
}
/* Compute htable_bits from the user input parameter hashsize */
@@ -110,6 +118,7 @@ htable_bits(u32 hashsize)
{
/* Assume that hashsize == 2^htable_bits */
u8 bits = fls(hashsize - 1);
+
if (jhash_size(bits) != hashsize)
/* Round up to the first 2^n value */
bits = fls(hashsize);
@@ -117,30 +126,6 @@ htable_bits(u32 hashsize)
return bits;
}
-static int
-hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
-{
- if (n->pos >= n->size) {
- void *tmp;
-
- if (n->size >= ahash_max)
- /* Trigger rehashing */
- return -EAGAIN;
-
- tmp = kzalloc((n->size + AHASH_INIT_SIZE) * dsize,
- GFP_ATOMIC);
- if (!tmp)
- return -ENOMEM;
- if (n->size) {
- memcpy(tmp, n->value, n->size * dsize);
- kfree(n->value);
- }
- n->value = tmp;
- n->size += AHASH_INIT_SIZE;
- }
- return 0;
-}
-
#ifdef IP_SET_HASH_WITH_NETS
#if IPSET_NET_COUNT > 1
#define __CIDR(cidr, i) (cidr[i])
@@ -149,17 +134,21 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#endif
/* cidr + 1 is stored in net_prefixes to support /0 */
-#define SCIDR(cidr, i) (__CIDR(cidr, i) + 1)
+#define NCIDR_PUT(cidr) ((cidr) + 1)
+#define NCIDR_GET(cidr) ((cidr) - 1)
#ifdef IP_SET_HASH_WITH_NETS_PACKED
/* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */
-#define GCIDR(cidr, i) (__CIDR(cidr, i) + 1)
-#define NCIDR(cidr) (cidr)
+#define DCIDR_PUT(cidr) ((cidr) - 1)
+#define DCIDR_GET(cidr, i) (__CIDR(cidr, i) + 1)
#else
-#define GCIDR(cidr, i) (__CIDR(cidr, i))
-#define NCIDR(cidr) (cidr - 1)
+#define DCIDR_PUT(cidr) (cidr)
+#define DCIDR_GET(cidr, i) __CIDR(cidr, i)
#endif
+#define INIT_CIDR(cidr, host_mask) \
+ DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
+
#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
#ifdef IP_SET_HASH_WITH_NET0
@@ -203,6 +192,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#undef mtype_del
#undef mtype_test_cidrs
#undef mtype_test
+#undef mtype_uref
#undef mtype_expire
#undef mtype_resize
#undef mtype_head
@@ -244,6 +234,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
#define mtype_test IPSET_TOKEN(MTYPE, _test)
+#define mtype_uref IPSET_TOKEN(MTYPE, _uref)
#define mtype_expire IPSET_TOKEN(MTYPE, _expire)
#define mtype_resize IPSET_TOKEN(MTYPE, _resize)
#define mtype_head IPSET_TOKEN(MTYPE, _head)
@@ -266,7 +257,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#endif
#define HKEY(data, initval, htable_bits) \
-(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \
+(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval) \
& jhash_mask(htable_bits))
#ifndef htype
@@ -292,9 +283,6 @@ struct htype {
#ifdef IP_SET_HASH_WITH_NETMASK
u8 netmask; /* netmask value for subnets to store */
#endif
-#ifdef IP_SET_HASH_WITH_RBTREE
- struct rb_root rbtree;
-#endif
#ifdef IP_SET_HASH_WITH_NETS
struct net_prefixes nets[0]; /* book-keeping of prefixes */
#endif
@@ -303,7 +291,8 @@ struct htype {
#ifdef IP_SET_HASH_WITH_NETS
/* Network cidr size book keeping when the hash stores different
- * sized networks */
+ * sized networks. cidr == real cidr + 1 to support /0.
+ */
static void
mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
{
@@ -311,11 +300,11 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
/* Add in increasing prefix order, so larger cidr first */
for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) {
- if (j != -1)
+ if (j != -1) {
continue;
- else if (h->nets[i].cidr[n] < cidr)
+ } else if (h->nets[i].cidr[n] < cidr) {
j = i;
- else if (h->nets[i].cidr[n] == cidr) {
+ } else if (h->nets[i].cidr[n] == cidr) {
h->nets[cidr - 1].nets[n]++;
return;
}
@@ -334,15 +323,15 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
u8 i, j, net_end = nets_length - 1;
for (i = 0; i < nets_length; i++) {
- if (h->nets[i].cidr[n] != cidr)
- continue;
- h->nets[cidr -1].nets[n]--;
- if (h->nets[cidr -1].nets[n] > 0)
- return;
+ if (h->nets[i].cidr[n] != cidr)
+ continue;
+ h->nets[cidr - 1].nets[n]--;
+ if (h->nets[cidr - 1].nets[n] > 0)
+ return;
for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
- h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
+ h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
h->nets[j].cidr[n] = 0;
- return;
+ return;
}
}
#endif
@@ -353,15 +342,18 @@ mtype_ahash_memsize(const struct htype *h, const struct htable *t,
u8 nets_length, size_t dsize)
{
u32 i;
- size_t memsize = sizeof(*h)
- + sizeof(*t)
+ struct hbucket *n;
+ size_t memsize = sizeof(*h) + sizeof(*t);
+
#ifdef IP_SET_HASH_WITH_NETS
- + sizeof(struct net_prefixes) * nets_length
+ memsize += sizeof(struct net_prefixes) * nets_length;
#endif
- + jhash_size(t->htable_bits) * sizeof(struct hbucket);
-
- for (i = 0; i < jhash_size(t->htable_bits); i++)
- memsize += t->bucket[i].size * dsize;
+ for (i = 0; i < jhash_size(t->htable_bits); i++) {
+ n = rcu_dereference_bh(hbucket(t, i));
+ if (!n)
+ continue;
+ memsize += sizeof(struct hbucket) + n->size * dsize;
+ }
return memsize;
}
@@ -376,7 +368,8 @@ mtype_ext_cleanup(struct ip_set *set, struct hbucket *n)
int i;
for (i = 0; i < n->pos; i++)
- ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
+ if (test_bit(i, n->used))
+ ip_set_ext_destroy(set, ahash_data(n, i, set->dsize));
}
/* Flush a hash type of set: destroy all elements */
@@ -388,16 +381,16 @@ mtype_flush(struct ip_set *set)
struct hbucket *n;
u32 i;
- t = rcu_dereference_bh_nfnl(h->table);
+ t = ipset_dereference_protected(h->table, set);
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = hbucket(t, i);
- if (n->size) {
- if (set->extensions & IPSET_EXT_DESTROY)
- mtype_ext_cleanup(set, n);
- n->size = n->pos = 0;
- /* FIXME: use slab cache */
- kfree(n->value);
- }
+ n = __ipset_dereference_protected(hbucket(t, i), 1);
+ if (!n)
+ continue;
+ if (set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set, n);
+ /* FIXME: use slab cache */
+ rcu_assign_pointer(hbucket(t, i), NULL);
+ kfree_rcu(n, rcu);
}
#ifdef IP_SET_HASH_WITH_NETS
memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family));
@@ -413,13 +406,13 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = hbucket(t, i);
- if (n->size) {
- if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
- mtype_ext_cleanup(set, n);
- /* FIXME: use slab cache */
- kfree(n->value);
- }
+ n = __ipset_dereference_protected(hbucket(t, i), 1);
+ if (!n)
+ continue;
+ if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
+ mtype_ext_cleanup(set, n);
+ /* FIXME: use slab cache */
+ kfree(n);
}
ip_set_free(t);
@@ -431,13 +424,11 @@ mtype_destroy(struct ip_set *set)
{
struct htype *h = set->data;
- if (set->extensions & IPSET_EXT_TIMEOUT)
+ if (SET_WITH_TIMEOUT(set))
del_timer_sync(&h->gc);
- mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table), true);
-#ifdef IP_SET_HASH_WITH_RBTREE
- rbtree_destroy(&h->rbtree);
-#endif
+ mtype_ahash_destroy(set,
+ __ipset_dereference_protected(h->table, 1), true);
kfree(h);
set->data = NULL;
@@ -449,7 +440,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
struct htype *h = set->data;
init_timer(&h->gc);
- h->gc.data = (unsigned long) set;
+ h->gc.data = (unsigned long)set;
h->gc.function = gc;
h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&h->gc);
@@ -482,61 +473,71 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
struct htable *t;
struct hbucket *n;
struct mtype_elem *data;
- u32 i;
- int j;
+ u32 i, j, d;
#ifdef IP_SET_HASH_WITH_NETS
u8 k;
#endif
- rcu_read_lock_bh();
- t = rcu_dereference_bh(h->table);
+ t = ipset_dereference_protected(h->table, set);
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = hbucket(t, i);
- for (j = 0; j < n->pos; j++) {
+ n = __ipset_dereference_protected(hbucket(t, i), 1);
+ if (!n)
+ continue;
+ for (j = 0, d = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used)) {
+ d++;
+ continue;
+ }
data = ahash_data(n, j, dsize);
if (ip_set_timeout_expired(ext_timeout(data, set))) {
pr_debug("expired %u/%u\n", i, j);
+ clear_bit(j, n->used);
+ smp_mb__after_atomic();
#ifdef IP_SET_HASH_WITH_NETS
for (k = 0; k < IPSET_NET_COUNT; k++)
- mtype_del_cidr(h, SCIDR(data->cidr, k),
- nets_length, k);
+ mtype_del_cidr(h,
+ NCIDR_PUT(DCIDR_GET(data->cidr,
+ k)),
+ nets_length, k);
#endif
ip_set_ext_destroy(set, data);
- if (j != n->pos - 1)
- /* Not last one */
- memcpy(data,
- ahash_data(n, n->pos - 1, dsize),
- dsize);
- n->pos--;
h->elements--;
+ d++;
}
}
- if (n->pos + AHASH_INIT_SIZE < n->size) {
- void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
- * dsize,
- GFP_ATOMIC);
+ if (d >= AHASH_INIT_SIZE) {
+ struct hbucket *tmp = kzalloc(sizeof(*tmp) +
+ (n->size - AHASH_INIT_SIZE) * dsize,
+ GFP_ATOMIC);
if (!tmp)
/* Still try to delete expired elements */
continue;
- n->size -= AHASH_INIT_SIZE;
- memcpy(tmp, n->value, n->size * dsize);
- kfree(n->value);
- n->value = tmp;
+ tmp->size = n->size - AHASH_INIT_SIZE;
+ for (j = 0, d = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, dsize);
+ memcpy(tmp->value + d * dsize, data, dsize);
+ set_bit(j, tmp->used);
+ d++;
+ }
+ tmp->pos = d;
+ rcu_assign_pointer(hbucket(t, i), tmp);
+ kfree_rcu(n, rcu);
}
}
- rcu_read_unlock_bh();
}
static void
mtype_gc(unsigned long ul_set)
{
- struct ip_set *set = (struct ip_set *) ul_set;
+ struct ip_set *set = (struct ip_set *)ul_set;
struct htype *h = set->data;
pr_debug("called\n");
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
mtype_expire(set, h, NLEN(set->family), set->dsize);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&h->gc);
@@ -544,93 +545,152 @@ mtype_gc(unsigned long ul_set)
/* Resize a hash: create a new hash table with doubling the hashsize
* and inserting the elements to it. Repeat until we succeed or
- * fail due to memory pressures. */
+ * fail due to memory pressures.
+ */
static int
mtype_resize(struct ip_set *set, bool retried)
{
struct htype *h = set->data;
- struct htable *t, *orig = rcu_dereference_bh_nfnl(h->table);
- u8 htable_bits = orig->htable_bits;
+ struct htable *t, *orig;
+ u8 htable_bits;
+ size_t dsize = set->dsize;
#ifdef IP_SET_HASH_WITH_NETS
u8 flags;
+ struct mtype_elem *tmp;
#endif
struct mtype_elem *data;
struct mtype_elem *d;
struct hbucket *n, *m;
- u32 i, j;
+ u32 i, j, key;
int ret;
- /* Try to cleanup once */
- if (SET_WITH_TIMEOUT(set) && !retried) {
- i = h->elements;
- write_lock_bh(&set->lock);
- mtype_expire(set, set->data, NLEN(set->family), set->dsize);
- write_unlock_bh(&set->lock);
- if (h->elements < i)
- return 0;
- }
+#ifdef IP_SET_HASH_WITH_NETS
+ tmp = kmalloc(dsize, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+#endif
+ rcu_read_lock_bh();
+ orig = rcu_dereference_bh_nfnl(h->table);
+ htable_bits = orig->htable_bits;
+ rcu_read_unlock_bh();
retry:
ret = 0;
htable_bits++;
- pr_debug("attempt to resize set %s from %u to %u, t %p\n",
- set->name, orig->htable_bits, htable_bits, orig);
if (!htable_bits) {
/* In case we have plenty of memory :-) */
pr_warn("Cannot increase the hashsize of set %s further\n",
set->name);
- return -IPSET_ERR_HASH_FULL;
+ ret = -IPSET_ERR_HASH_FULL;
+ goto out;
+ }
+ t = ip_set_alloc(htable_size(htable_bits));
+ if (!t) {
+ ret = -ENOMEM;
+ goto out;
}
- t = ip_set_alloc(sizeof(*t)
- + jhash_size(htable_bits) * sizeof(struct hbucket));
- if (!t)
- return -ENOMEM;
t->htable_bits = htable_bits;
- read_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
+ orig = __ipset_dereference_protected(h->table, 1);
+ /* There can't be another parallel resizing, but dumping is possible */
+ atomic_set(&orig->ref, 1);
+ atomic_inc(&orig->uref);
+ pr_debug("attempt to resize set %s from %u to %u, t %p\n",
+ set->name, orig->htable_bits, htable_bits, orig);
for (i = 0; i < jhash_size(orig->htable_bits); i++) {
- n = hbucket(orig, i);
+ n = __ipset_dereference_protected(hbucket(orig, i), 1);
+ if (!n)
+ continue;
for (j = 0; j < n->pos; j++) {
- data = ahash_data(n, j, set->dsize);
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, dsize);
#ifdef IP_SET_HASH_WITH_NETS
+ /* We have readers running parallel with us,
+ * so the live data cannot be modified.
+ */
flags = 0;
+ memcpy(tmp, data, dsize);
+ data = tmp;
mtype_data_reset_flags(data, &flags);
#endif
- m = hbucket(t, HKEY(data, h->initval, htable_bits));
- ret = hbucket_elem_add(m, AHASH_MAX(h), set->dsize);
- if (ret < 0) {
-#ifdef IP_SET_HASH_WITH_NETS
- mtype_data_reset_flags(data, &flags);
-#endif
- read_unlock_bh(&set->lock);
- mtype_ahash_destroy(set, t, false);
- if (ret == -EAGAIN)
- goto retry;
- return ret;
+ key = HKEY(data, h->initval, htable_bits);
+ m = __ipset_dereference_protected(hbucket(t, key), 1);
+ if (!m) {
+ m = kzalloc(sizeof(*m) +
+ AHASH_INIT_SIZE * dsize,
+ GFP_ATOMIC);
+ if (!m) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+ m->size = AHASH_INIT_SIZE;
+ RCU_INIT_POINTER(hbucket(t, key), m);
+ } else if (m->pos >= m->size) {
+ struct hbucket *ht;
+
+ if (m->size >= AHASH_MAX(h)) {
+ ret = -EAGAIN;
+ } else {
+ ht = kzalloc(sizeof(*ht) +
+ (m->size + AHASH_INIT_SIZE)
+ * dsize,
+ GFP_ATOMIC);
+ if (!ht)
+ ret = -ENOMEM;
+ }
+ if (ret < 0)
+ goto cleanup;
+ memcpy(ht, m, sizeof(struct hbucket) +
+ m->size * dsize);
+ ht->size = m->size + AHASH_INIT_SIZE;
+ kfree(m);
+ m = ht;
+ RCU_INIT_POINTER(hbucket(t, key), ht);
}
- d = ahash_data(m, m->pos++, set->dsize);
- memcpy(d, data, set->dsize);
+ d = ahash_data(m, m->pos, dsize);
+ memcpy(d, data, dsize);
+ set_bit(m->pos++, m->used);
#ifdef IP_SET_HASH_WITH_NETS
mtype_data_reset_flags(d, &flags);
#endif
}
}
-
rcu_assign_pointer(h->table, t);
- read_unlock_bh(&set->lock);
+
+ spin_unlock_bh(&set->lock);
/* Give time to other readers of the set */
synchronize_rcu_bh();
pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
orig->htable_bits, orig, t->htable_bits, t);
- mtype_ahash_destroy(set, orig, false);
+ /* If there's nobody else dumping the table, destroy it */
+ if (atomic_dec_and_test(&orig->uref)) {
+ pr_debug("Table destroy by resize %p\n", orig);
+ mtype_ahash_destroy(set, orig, false);
+ }
- return 0;
+out:
+#ifdef IP_SET_HASH_WITH_NETS
+ kfree(tmp);
+#endif
+ return ret;
+
+cleanup:
+ atomic_set(&orig->ref, 0);
+ atomic_dec(&orig->uref);
+ spin_unlock_bh(&set->lock);
+ mtype_ahash_destroy(set, t, false);
+ if (ret == -EAGAIN)
+ goto retry;
+ goto out;
}
/* Add an element to a hash and update the internal counters when succeeded,
- * otherwise report the proper error code. */
+ * otherwise report the proper error code.
+ */
static int
mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags)
@@ -639,17 +699,49 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct htable *t;
const struct mtype_elem *d = value;
struct mtype_elem *data;
- struct hbucket *n;
- int i, ret = 0;
- int j = AHASH_MAX(h) + 1;
+ struct hbucket *n, *old = ERR_PTR(-ENOENT);
+ int i, j = -1;
bool flag_exist = flags & IPSET_FLAG_EXIST;
+ bool deleted = false, forceadd = false, reuse = false;
u32 key, multi = 0;
- rcu_read_lock_bh();
- t = rcu_dereference_bh(h->table);
+ if (h->elements >= h->maxelem) {
+ if (SET_WITH_TIMEOUT(set))
+ /* FIXME: when set is full, we slow down here */
+ mtype_expire(set, h, NLEN(set->family), set->dsize);
+ if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set))
+ forceadd = true;
+ }
+
+ t = ipset_dereference_protected(h->table, set);
key = HKEY(value, h->initval, t->htable_bits);
- n = hbucket(t, key);
+ n = __ipset_dereference_protected(hbucket(t, key), 1);
+ if (!n) {
+ if (forceadd) {
+ if (net_ratelimit())
+ pr_warn("Set %s is full, maxelem %u reached\n",
+ set->name, h->maxelem);
+ return -IPSET_ERR_HASH_FULL;
+ } else if (h->elements >= h->maxelem) {
+ goto set_full;
+ }
+ old = NULL;
+ n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
+ GFP_ATOMIC);
+ if (!n)
+ return -ENOMEM;
+ n->size = AHASH_INIT_SIZE;
+ goto copy_elem;
+ }
for (i = 0; i < n->pos; i++) {
+ if (!test_bit(i, n->used)) {
+ /* Reuse first deleted entry */
+ if (j == -1) {
+ deleted = reuse = true;
+ j = i;
+ }
+ continue;
+ }
data = ahash_data(n, i, set->dsize);
if (mtype_data_equal(data, d, &multi)) {
if (flag_exist ||
@@ -657,85 +749,94 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_timeout_expired(ext_timeout(data, set)))) {
/* Just the extensions could be overwritten */
j = i;
- goto reuse_slot;
- } else {
- ret = -IPSET_ERR_EXIST;
- goto out;
+ goto overwrite_extensions;
}
+ return -IPSET_ERR_EXIST;
}
/* Reuse first timed out entry */
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(data, set)) &&
- j != AHASH_MAX(h) + 1)
+ j == -1) {
j = i;
+ reuse = true;
+ }
}
- if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set) && n->pos) {
- /* Choosing the first entry in the array to replace */
- j = 0;
- goto reuse_slot;
- }
- if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
- /* FIXME: when set is full, we slow down here */
- mtype_expire(set, h, NLEN(set->family), set->dsize);
-
- if (h->elements >= h->maxelem) {
- if (net_ratelimit())
- pr_warn("Set %s is full, maxelem %u reached\n",
- set->name, h->maxelem);
- ret = -IPSET_ERR_HASH_FULL;
- goto out;
- }
-
-reuse_slot:
- if (j != AHASH_MAX(h) + 1) {
- /* Fill out reused slot */
+ if (reuse || forceadd) {
data = ahash_data(n, j, set->dsize);
+ if (!deleted) {
#ifdef IP_SET_HASH_WITH_NETS
- for (i = 0; i < IPSET_NET_COUNT; i++) {
- mtype_del_cidr(h, SCIDR(data->cidr, i),
- NLEN(set->family), i);
- mtype_add_cidr(h, SCIDR(d->cidr, i),
- NLEN(set->family), i);
- }
+ for (i = 0; i < IPSET_NET_COUNT; i++)
+ mtype_del_cidr(h,
+ NCIDR_PUT(DCIDR_GET(data->cidr, i)),
+ NLEN(set->family), i);
#endif
- ip_set_ext_destroy(set, data);
- } else {
- /* Use/create a new slot */
+ ip_set_ext_destroy(set, data);
+ h->elements--;
+ }
+ goto copy_data;
+ }
+ if (h->elements >= h->maxelem)
+ goto set_full;
+ /* Create a new slot */
+ if (n->pos >= n->size) {
TUNE_AHASH_MAX(h, multi);
- ret = hbucket_elem_add(n, AHASH_MAX(h), set->dsize);
- if (ret != 0) {
- if (ret == -EAGAIN)
- mtype_data_next(&h->next, d);
- goto out;
+ if (n->size >= AHASH_MAX(h)) {
+ /* Trigger rehashing */
+ mtype_data_next(&h->next, d);
+ return -EAGAIN;
}
- data = ahash_data(n, n->pos++, set->dsize);
+ old = n;
+ n = kzalloc(sizeof(*n) +
+ (old->size + AHASH_INIT_SIZE) * set->dsize,
+ GFP_ATOMIC);
+ if (!n)
+ return -ENOMEM;
+ memcpy(n, old, sizeof(struct hbucket) +
+ old->size * set->dsize);
+ n->size = old->size + AHASH_INIT_SIZE;
+ }
+
+copy_elem:
+ j = n->pos++;
+ data = ahash_data(n, j, set->dsize);
+copy_data:
+ h->elements++;
#ifdef IP_SET_HASH_WITH_NETS
- for (i = 0; i < IPSET_NET_COUNT; i++)
- mtype_add_cidr(h, SCIDR(d->cidr, i), NLEN(set->family),
- i);
+ for (i = 0; i < IPSET_NET_COUNT; i++)
+ mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)),
+ NLEN(set->family), i);
#endif
- h->elements++;
- }
memcpy(data, d, sizeof(struct mtype_elem));
+overwrite_extensions:
#ifdef IP_SET_HASH_WITH_NETS
mtype_data_set_flags(data, flags);
#endif
- if (SET_WITH_TIMEOUT(set))
- ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
if (SET_WITH_COUNTER(set))
ip_set_init_counter(ext_counter(data, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(data, set), ext);
if (SET_WITH_SKBINFO(set))
ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
+ /* Must come last for the case when timed out entry is reused */
+ if (SET_WITH_TIMEOUT(set))
+ ip_set_timeout_set(ext_timeout(data, set), ext->timeout);
+ smp_mb__before_atomic();
+ set_bit(j, n->used);
+ if (old != ERR_PTR(-ENOENT)) {
+ rcu_assign_pointer(hbucket(t, key), n);
+ if (old)
+ kfree_rcu(old, rcu);
+ }
-out:
- rcu_read_unlock_bh();
- return ret;
+ return 0;
+set_full:
+ if (net_ratelimit())
+ pr_warn("Set %s is full, maxelem %u reached\n",
+ set->name, h->maxelem);
+ return -IPSET_ERR_HASH_FULL;
}
-/* Delete an element from the hash: swap it with the last element
- * and free up space if possible.
+/* Delete an element from the hash and free up space if possible.
*/
static int
mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
@@ -746,55 +847,70 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
const struct mtype_elem *d = value;
struct mtype_elem *data;
struct hbucket *n;
- int i, ret = -IPSET_ERR_EXIST;
-#ifdef IP_SET_HASH_WITH_NETS
- u8 j;
-#endif
+ int i, j, k, ret = -IPSET_ERR_EXIST;
u32 key, multi = 0;
+ size_t dsize = set->dsize;
- rcu_read_lock_bh();
- t = rcu_dereference_bh(h->table);
+ t = ipset_dereference_protected(h->table, set);
key = HKEY(value, h->initval, t->htable_bits);
- n = hbucket(t, key);
- for (i = 0; i < n->pos; i++) {
- data = ahash_data(n, i, set->dsize);
+ n = __ipset_dereference_protected(hbucket(t, key), 1);
+ if (!n)
+ goto out;
+ for (i = 0, k = 0; i < n->pos; i++) {
+ if (!test_bit(i, n->used)) {
+ k++;
+ continue;
+ }
+ data = ahash_data(n, i, dsize);
if (!mtype_data_equal(data, d, &multi))
continue;
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(data, set)))
goto out;
- if (i != n->pos - 1)
- /* Not last one */
- memcpy(data, ahash_data(n, n->pos - 1, set->dsize),
- set->dsize);
- n->pos--;
+ ret = 0;
+ clear_bit(i, n->used);
+ smp_mb__after_atomic();
+ if (i + 1 == n->pos)
+ n->pos--;
h->elements--;
#ifdef IP_SET_HASH_WITH_NETS
for (j = 0; j < IPSET_NET_COUNT; j++)
- mtype_del_cidr(h, SCIDR(d->cidr, j), NLEN(set->family),
- j);
+ mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
+ NLEN(set->family), j);
#endif
ip_set_ext_destroy(set, data);
- if (n->pos + AHASH_INIT_SIZE < n->size) {
- void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
- * set->dsize,
- GFP_ATOMIC);
- if (!tmp) {
- ret = 0;
+
+ for (; i < n->pos; i++) {
+ if (!test_bit(i, n->used))
+ k++;
+ }
+ if (n->pos == 0 && k == 0) {
+ rcu_assign_pointer(hbucket(t, key), NULL);
+ kfree_rcu(n, rcu);
+ } else if (k >= AHASH_INIT_SIZE) {
+ struct hbucket *tmp = kzalloc(sizeof(*tmp) +
+ (n->size - AHASH_INIT_SIZE) * dsize,
+ GFP_ATOMIC);
+ if (!tmp)
goto out;
+ tmp->size = n->size - AHASH_INIT_SIZE;
+ for (j = 0, k = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, dsize);
+ memcpy(tmp->value + k * dsize, data, dsize);
+ set_bit(j, tmp->used);
+ k++;
}
- n->size -= AHASH_INIT_SIZE;
- memcpy(tmp, n->value, n->size * set->dsize);
- kfree(n->value);
- n->value = tmp;
+ tmp->pos = k;
+ rcu_assign_pointer(hbucket(t, key), tmp);
+ kfree_rcu(n, rcu);
}
- ret = 0;
goto out;
}
out:
- rcu_read_unlock_bh();
return ret;
}
@@ -813,7 +929,8 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
#ifdef IP_SET_HASH_WITH_NETS
/* Special test function which takes into account the different network
- * sizes added to the set */
+ * sizes added to the set
+ */
static int
mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
const struct ip_set_ext *ext,
@@ -836,16 +953,21 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) {
#if IPSET_NET_COUNT == 2
mtype_data_reset_elem(d, &orig);
- mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0]), false);
+ mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false);
for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi;
k++) {
- mtype_data_netmask(d, NCIDR(h->nets[k].cidr[1]), true);
+ mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]),
+ true);
#else
- mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0]));
+ mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]));
#endif
key = HKEY(d, h->initval, t->htable_bits);
- n = hbucket(t, key);
+ n = rcu_dereference_bh(hbucket(t, key));
+ if (!n)
+ continue;
for (i = 0; i < n->pos; i++) {
+ if (!test_bit(i, n->used))
+ continue;
data = ahash_data(n, i, set->dsize);
if (!mtype_data_equal(data, d, &multi))
continue;
@@ -883,13 +1005,13 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
int i, ret = 0;
u32 key, multi = 0;
- rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
#ifdef IP_SET_HASH_WITH_NETS
/* If we test an IP address and not a network address,
- * try all possible network sizes */
+ * try all possible network sizes
+ */
for (i = 0; i < IPSET_NET_COUNT; i++)
- if (GCIDR(d->cidr, i) != SET_HOST_MASK(set->family))
+ if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family))
break;
if (i == IPSET_NET_COUNT) {
ret = mtype_test_cidrs(set, d, ext, mext, flags);
@@ -898,8 +1020,14 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
#endif
key = HKEY(d, h->initval, t->htable_bits);
- n = hbucket(t, key);
+ n = rcu_dereference_bh(hbucket(t, key));
+ if (!n) {
+ ret = 0;
+ goto out;
+ }
for (i = 0; i < n->pos; i++) {
+ if (!test_bit(i, n->used))
+ continue;
data = ahash_data(n, i, set->dsize);
if (mtype_data_equal(data, d, &multi) &&
!(SET_WITH_TIMEOUT(set) &&
@@ -909,7 +1037,6 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
}
out:
- rcu_read_unlock_bh();
return ret;
}
@@ -921,15 +1048,19 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
const struct htable *t;
struct nlattr *nested;
size_t memsize;
+ u8 htable_bits;
+ rcu_read_lock_bh();
t = rcu_dereference_bh_nfnl(h->table);
memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize);
+ htable_bits = t->htable_bits;
+ rcu_read_unlock_bh();
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
goto nla_put_failure;
if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
- htonl(jhash_size(t->htable_bits))) ||
+ htonl(jhash_size(htable_bits))) ||
nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
goto nla_put_failure;
#ifdef IP_SET_HASH_WITH_NETMASK
@@ -953,32 +1084,63 @@ nla_put_failure:
return -EMSGSIZE;
}
+/* Make possible to run dumping parallel with resizing */
+static void
+mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
+{
+ struct htype *h = set->data;
+ struct htable *t;
+
+ if (start) {
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh_nfnl(h->table);
+ atomic_inc(&t->uref);
+ cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
+ rcu_read_unlock_bh();
+ } else if (cb->args[IPSET_CB_PRIVATE]) {
+ t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ /* Resizing didn't destroy the hash table */
+ pr_debug("Table destroy by dump: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
+ cb->args[IPSET_CB_PRIVATE] = 0;
+ }
+}
+
/* Reply a LIST/SAVE request: dump the elements of the specified set */
static int
mtype_list(const struct ip_set *set,
struct sk_buff *skb, struct netlink_callback *cb)
{
- const struct htype *h = set->data;
- const struct htable *t = rcu_dereference_bh_nfnl(h->table);
+ const struct htable *t;
struct nlattr *atd, *nested;
const struct hbucket *n;
const struct mtype_elem *e;
u32 first = cb->args[IPSET_CB_ARG0];
/* We assume that one hash bucket fills into one page */
void *incomplete;
- int i;
+ int i, ret = 0;
atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
if (!atd)
return -EMSGSIZE;
+
pr_debug("list hash set %s\n", set->name);
+ t = (const struct htable *)cb->args[IPSET_CB_PRIVATE];
+ /* Expire may replace a hbucket with another one */
+ rcu_read_lock();
for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits);
cb->args[IPSET_CB_ARG0]++) {
incomplete = skb_tail_pointer(skb);
- n = hbucket(t, cb->args[IPSET_CB_ARG0]);
+ n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0]));
pr_debug("cb->arg bucket: %lu, t %p n %p\n",
cb->args[IPSET_CB_ARG0], t, n);
+ if (!n)
+ continue;
for (i = 0; i < n->pos; i++) {
+ if (!test_bit(i, n->used))
+ continue;
e = ahash_data(n, i, set->dsize);
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
@@ -989,9 +1151,10 @@ mtype_list(const struct ip_set *set,
if (!nested) {
if (cb->args[IPSET_CB_ARG0] == first) {
nla_nest_cancel(skb, atd);
- return -EMSGSIZE;
- } else
- goto nla_put_failure;
+ ret = -EMSGSIZE;
+ goto out;
+ }
+ goto nla_put_failure;
}
if (mtype_data_list(skb, e))
goto nla_put_failure;
@@ -1004,7 +1167,7 @@ mtype_list(const struct ip_set *set,
/* Set listing finished */
cb->args[IPSET_CB_ARG0] = 0;
- return 0;
+ goto out;
nla_put_failure:
nlmsg_trim(skb, incomplete);
@@ -1012,20 +1175,24 @@ nla_put_failure:
pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n",
set->name);
cb->args[IPSET_CB_ARG0] = 0;
- return -EMSGSIZE;
+ ret = -EMSGSIZE;
+ } else {
+ ipset_nest_end(skb, atd);
}
- ipset_nest_end(skb, atd);
- return 0;
+out:
+ rcu_read_unlock();
+ return ret;
}
static int
IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
- const struct xt_action_param *par,
- enum ipset_adt adt, struct ip_set_adt_opt *opt);
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt);
static int
IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
+ enum ipset_adt adt, u32 *lineno, u32 flags,
+ bool retried);
static const struct ip_set_type_variant mtype_variant = {
.kadt = mtype_kadt,
@@ -1039,6 +1206,7 @@ static const struct ip_set_type_variant mtype_variant = {
.flush = mtype_flush,
.head = mtype_head,
.list = mtype_list,
+ .uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
};
@@ -1076,12 +1244,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
-#ifdef IP_SET_HASH_WITH_MARKMASK
- !ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK) ||
-#endif
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
+#ifdef IP_SET_HASH_WITH_MARKMASK
+ /* Separated condition in order to avoid directive in argument list */
+ if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK)))
+ return -IPSET_ERR_PROTOCOL;
+#endif
if (tb[IPSET_ATTR_HASHSIZE]) {
hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
@@ -1104,7 +1274,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
#endif
#ifdef IP_SET_HASH_WITH_MARKMASK
if (tb[IPSET_ATTR_MARKMASK]) {
- markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK]));
+ markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
if (markmask == 0)
return -IPSET_ERR_INVALID_MARKMASK;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 54df48b5c455..9d6bf19f7b78 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -108,18 +108,12 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
u32 ip = 0, ip_to = 0, hosts;
int ret = 0;
- if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (unlikely(!tb[IPSET_ATTR_IP]))
+ return -IPSET_ERR_PROTOCOL;
+
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
if (ret)
return ret;
@@ -164,8 +158,8 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -246,20 +240,20 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
- if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
- tb[IPSET_ATTR_IP_TO] ||
- tb[IPSET_ATTR_CIDR]))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (unlikely(!tb[IPSET_ATTR_IP]))
+ return -IPSET_ERR_PROTOCOL;
+ if (unlikely(tb[IPSET_ATTR_IP_TO]))
+ return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+ if (unlikely(tb[IPSET_ATTR_CIDR])) {
+ u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+ if (cidr != HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
+
ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
return ret;
@@ -321,6 +315,7 @@ hash_ip_init(void)
static void __exit
hash_ip_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_ip_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index d231248eb3e2..a0695a2ab585 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -108,19 +108,13 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
u32 ip, ip_to = 0;
int ret;
- if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (unlikely(!tb[IPSET_ATTR_IP] ||
+ !ip_set_attr_netorder(tb, IPSET_ATTR_MARK)))
+ return -IPSET_ERR_PROTOCOL;
+
ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
return ret;
@@ -161,8 +155,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -212,7 +206,6 @@ hash_ipmark6_data_next(struct hash_ipmark4_elem *next,
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
-
static int
hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -240,20 +233,20 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
- tb[IPSET_ATTR_IP_TO] ||
- tb[IPSET_ATTR_CIDR]))
+ !ip_set_attr_netorder(tb, IPSET_ATTR_MARK)))
return -IPSET_ERR_PROTOCOL;
+ if (unlikely(tb[IPSET_ATTR_IP_TO]))
+ return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+ if (unlikely(tb[IPSET_ATTR_CIDR])) {
+ u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (cidr != HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
@@ -274,10 +267,8 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
- return ret;
+ return 0;
}
static struct ip_set_type hash_ipmark_type __read_mostly = {
@@ -325,6 +316,7 @@ hash_ipmark_init(void)
static void __exit
hash_ipmark_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_ipmark_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index a47c29f12090..9d84b3dff603 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -116,20 +116,14 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
bool with_ports = false;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
-
ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
return ret;
@@ -146,8 +140,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMP))
e.port = 0;
@@ -193,8 +188,8 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
}
return ret;
@@ -279,21 +274,21 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
bool with_ports = false;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
- tb[IPSET_ATTR_IP_TO] ||
- tb[IPSET_ATTR_CIDR]))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
return -IPSET_ERR_PROTOCOL;
+ if (unlikely(tb[IPSET_ATTR_IP_TO]))
+ return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+ if (unlikely(tb[IPSET_ATTR_CIDR])) {
+ u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (cidr != HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
@@ -311,8 +306,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMPV6))
e.port = 0;
@@ -335,8 +331,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -388,6 +384,7 @@ hash_ipport_init(void)
static void __exit
hash_ipport_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_ipport_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 89615f134845..215b7b942038 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -63,7 +63,7 @@ hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
static bool
hash_ipportip4_data_list(struct sk_buff *skb,
- const struct hash_ipportip4_elem *data)
+ const struct hash_ipportip4_elem *data)
{
if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) ||
@@ -119,20 +119,14 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
bool with_ports = false;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
-
ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
return ret;
@@ -153,8 +147,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMP))
e.port = 0;
@@ -200,8 +195,8 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
}
return ret;
@@ -290,21 +285,21 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
bool with_ports = false;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
- tb[IPSET_ATTR_IP_TO] ||
- tb[IPSET_ATTR_CIDR]))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO)))
return -IPSET_ERR_PROTOCOL;
+ if (unlikely(tb[IPSET_ATTR_IP_TO]))
+ return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+ if (unlikely(tb[IPSET_ATTR_CIDR])) {
+ u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (cidr != HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
@@ -326,8 +321,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMPV6))
e.port = 0;
@@ -350,8 +346,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -403,6 +399,7 @@ hash_ipportip_init(void)
static void __exit
hash_ipportip_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_ipportip_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 6ba7a7e083f9..9ca719625ea3 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -141,7 +141,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_ipportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -173,21 +173,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
u8 cidr;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
-
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
if (ret)
return ret;
@@ -215,14 +209,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMP))
e.port = 0;
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -269,8 +265,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip2_from, ip2_to);
if (ip2_from + UINT_MAX == ip2_to)
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1);
+ }
if (retried)
ip = ntohl(h->next.ip);
@@ -293,8 +290,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
ip2 = ip2_last + 1;
}
}
@@ -395,7 +392,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_ipportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet6_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -426,24 +423,22 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
u8 cidr;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
- tb[IPSET_ATTR_IP_TO] ||
- tb[IPSET_ATTR_CIDR]))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
+ if (unlikely(tb[IPSET_ATTR_CIDR])) {
+ u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (cidr != HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
@@ -474,14 +469,16 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMPV6))
e.port = 0;
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -505,8 +502,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -562,6 +559,7 @@ hash_ipportnet_init(void)
static void __exit
hash_ipportnet_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_ipportnet_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index 1f8668d7a538..f1e7d2c0f685 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -89,10 +89,10 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
return 0;
if (skb_mac_header(skb) < skb->head ||
- (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+ (skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
- memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
return -EINVAL;
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
@@ -107,22 +107,16 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
- if (unlikely(!tb[IPSET_ATTR_ETHER] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (unlikely(!tb[IPSET_ATTR_ETHER]))
+ return -IPSET_ERR_PROTOCOL;
+
ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+ ether_addr_copy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]));
if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
return -IPSET_ERR_HASH_ELEM;
@@ -171,6 +165,7 @@ hash_mac_init(void)
static void __exit
hash_mac_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_mac_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 2e63dad8644d..3e4bffdc1cc0 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -120,7 +120,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_net *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -146,19 +146,13 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
u32 ip = 0, ip_to = 0, last;
int ret;
- if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (unlikely(!tb[IPSET_ATTR_IP] ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+ return -IPSET_ERR_PROTOCOL;
+
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
if (ret)
return ret;
@@ -175,6 +169,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -182,7 +177,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
e.ip = htonl(ip & ip_set_hostmask(e.cidr));
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt, set) ? -ret:
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
@@ -204,8 +199,8 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
ip = last + 1;
}
return ret;
@@ -294,7 +289,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_net *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net6_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -318,21 +313,15 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
-
ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
return ret;
@@ -341,16 +330,17 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
- if (tb[IPSET_ATTR_CIDR])
+ if (tb[IPSET_ATTR_CIDR]) {
e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
-
- if (!e.cidr || e.cidr > HOST_MASK)
- return -IPSET_ERR_INVALID_CIDR;
+ if (!e.cidr || e.cidr > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
ip6_netmask(&e.ip, e.cidr);
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -404,6 +394,7 @@ hash_net_init(void)
static void __exit
hash_net_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_net_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index fe481f677f56..43d8c9896fa3 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -13,7 +13,6 @@
#include <linux/skbuff.h>
#include <linux/errno.h>
#include <linux/random.h>
-#include <linux/rbtree.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/netlink.h>
@@ -37,88 +36,13 @@ MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_hash:net,iface");
-/* Interface name rbtree */
-
-struct iface_node {
- struct rb_node node;
- char iface[IFNAMSIZ];
-};
-
-#define iface_data(n) (rb_entry(n, struct iface_node, node)->iface)
-
-static void
-rbtree_destroy(struct rb_root *root)
-{
- struct iface_node *node, *next;
-
- rbtree_postorder_for_each_entry_safe(node, next, root, node)
- kfree(node);
-
- *root = RB_ROOT;
-}
-
-static int
-iface_test(struct rb_root *root, const char **iface)
-{
- struct rb_node *n = root->rb_node;
-
- while (n) {
- const char *d = iface_data(n);
- int res = strcmp(*iface, d);
-
- if (res < 0)
- n = n->rb_left;
- else if (res > 0)
- n = n->rb_right;
- else {
- *iface = d;
- return 1;
- }
- }
- return 0;
-}
-
-static int
-iface_add(struct rb_root *root, const char **iface)
-{
- struct rb_node **n = &(root->rb_node), *p = NULL;
- struct iface_node *d;
-
- while (*n) {
- char *ifname = iface_data(*n);
- int res = strcmp(*iface, ifname);
-
- p = *n;
- if (res < 0)
- n = &((*n)->rb_left);
- else if (res > 0)
- n = &((*n)->rb_right);
- else {
- *iface = ifname;
- return 0;
- }
- }
-
- d = kzalloc(sizeof(*d), GFP_ATOMIC);
- if (!d)
- return -ENOMEM;
- strcpy(d->iface, *iface);
-
- rb_link_node(&d->node, p, n);
- rb_insert_color(&d->node, root);
-
- *iface = d->iface;
- return 0;
-}
-
/* Type specific function prefix */
#define HTYPE hash_netiface
#define IP_SET_HASH_WITH_NETS
-#define IP_SET_HASH_WITH_RBTREE
#define IP_SET_HASH_WITH_MULTI
#define IP_SET_HASH_WITH_NET0
-#define STREQ(a, b) (strcmp(a, b) == 0)
+#define STRLCPY(a, b) strlcpy(a, b, IFNAMSIZ)
/* IPv4 variant */
@@ -137,7 +61,7 @@ struct hash_netiface4_elem {
u8 cidr;
u8 nomatch;
u8 elem;
- const char *iface;
+ char iface[IFNAMSIZ];
};
/* Common functions */
@@ -151,7 +75,7 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
ip1->cidr == ip2->cidr &&
(++*multi) &&
ip1->physdev == ip2->physdev &&
- ip1->iface == ip2->iface;
+ strcmp(ip1->iface, ip2->iface) == 0;
}
static inline int
@@ -219,7 +143,7 @@ static const char *get_physindev_name(const struct sk_buff *skb)
return dev ? dev->name : NULL;
}
-static const char *get_phyoutdev_name(const struct sk_buff *skb)
+static const char *get_physoutdev_name(const struct sk_buff *skb)
{
struct net_device *dev = nf_bridge_get_physoutdev(skb);
@@ -235,11 +159,10 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
.elem = 1,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- int ret;
if (e.cidr == 0)
return -EINVAL;
@@ -249,35 +172,25 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
e.ip &= ip_set_netmask(e.cidr);
-#define IFACE(dir) (par->dir ? par->dir->name : NULL)
+#define IFACE(dir) (par->dir ? par->dir->name : "")
#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC)
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- e.iface = SRCDIR ? get_physindev_name(skb) :
- get_phyoutdev_name(skb);
+ const char *eiface = SRCDIR ? get_physindev_name(skb) :
+ get_physoutdev_name(skb);
- if (!e.iface)
+ if (!eiface)
return -EINVAL;
+ STRLCPY(e.iface, eiface);
e.physdev = 1;
-#else
- e.iface = NULL;
#endif
- } else
- e.iface = SRCDIR ? IFACE(in) : IFACE(out);
+ } else {
+ STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+ }
- if (!e.iface)
+ if (strlen(e.iface) == 0)
return -EINVAL;
- ret = iface_test(&h->rbtree, &e.iface);
- if (adt == IPSET_ADD) {
- if (!ret) {
- ret = iface_add(&h->rbtree, &e.iface);
- if (ret)
- return ret;
- }
- } else if (!ret)
- return ret;
-
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
@@ -290,23 +203,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, last;
- char iface[IFNAMSIZ];
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
!tb[IPSET_ATTR_IFACE] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
-
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
if (ret)
return ret;
@@ -320,21 +226,11 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
-
- strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE]));
- e.iface = iface;
- ret = iface_test(&h->rbtree, &e.iface);
- if (adt == IPSET_ADD) {
- if (!ret) {
- ret = iface_add(&h->rbtree, &e.iface);
- if (ret)
- return ret;
- }
- } else if (!ret)
- return ret;
+ nla_strlcpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ);
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_PHYSDEV)
e.physdev = 1;
if (cadt_flags & IPSET_FLAG_NOMATCH)
@@ -355,8 +251,9 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip, ip_to);
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip, ip_to, e.cidr);
+ }
if (retried)
ip = ntohl(h->next.ip);
@@ -367,8 +264,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
ip = last + 1;
}
return ret;
@@ -390,7 +287,7 @@ struct hash_netiface6_elem {
u8 cidr;
u8 nomatch;
u8 elem;
- const char *iface;
+ char iface[IFNAMSIZ];
};
/* Common functions */
@@ -404,7 +301,7 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
ip1->cidr == ip2->cidr &&
(++*multi) &&
ip1->physdev == ip2->physdev &&
- ip1->iface == ip2->iface;
+ strcmp(ip1->iface, ip2->iface) == 0;
}
static inline int
@@ -475,11 +372,10 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface6_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
.elem = 1,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- int ret;
if (e.cidr == 0)
return -EINVAL;
@@ -491,60 +387,43 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- e.iface = SRCDIR ? get_physindev_name(skb) :
- get_phyoutdev_name(skb);
- if (!e.iface)
- return -EINVAL;
+ const char *eiface = SRCDIR ? get_physindev_name(skb) :
+ get_physoutdev_name(skb);
+ if (!eiface)
+ return -EINVAL;
+ STRLCPY(e.iface, eiface);
e.physdev = 1;
-#else
- e.iface = NULL;
#endif
- } else
- e.iface = SRCDIR ? IFACE(in) : IFACE(out);
+ } else {
+ STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+ }
- if (!e.iface)
+ if (strlen(e.iface) == 0)
return -EINVAL;
- ret = iface_test(&h->rbtree, &e.iface);
- if (adt == IPSET_ADD) {
- if (!ret) {
- ret = iface_add(&h->rbtree, &e.iface);
- if (ret)
- return ret;
- }
- } else if (!ret)
- return ret;
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
static int
hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- struct hash_netiface *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- char iface[IFNAMSIZ];
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
!tb[IPSET_ATTR_IFACE] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
-
ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
return ret;
@@ -553,26 +432,19 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
- if (tb[IPSET_ATTR_CIDR])
+ if (tb[IPSET_ATTR_CIDR]) {
e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (e.cidr > HOST_MASK)
- return -IPSET_ERR_INVALID_CIDR;
+ if (e.cidr > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
+
ip6_netmask(&e.ip, e.cidr);
- strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE]));
- e.iface = iface;
- ret = iface_test(&h->rbtree, &e.iface);
- if (adt == IPSET_ADD) {
- if (!ret) {
- ret = iface_add(&h->rbtree, &e.iface);
- if (ret)
- return ret;
- }
- } else if (!ret)
- return ret;
+ nla_strlcpy(e.iface, tb[IPSET_ATTR_IFACE], IFNAMSIZ);
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_PHYSDEV)
e.physdev = 1;
if (cadt_flags & IPSET_FLAG_NOMATCH)
@@ -633,6 +505,7 @@ hash_netiface_init(void)
static void __exit
hash_netiface_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_netiface_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 847047483560..3c862c0a76d1 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -57,8 +57,8 @@ struct hash_netnet4_elem {
static inline bool
hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
- const struct hash_netnet4_elem *ip2,
- u32 *multi)
+ const struct hash_netnet4_elem *ip2,
+ u32 *multi)
{
return ip1->ipcmp == ip2->ipcmp &&
ip1->ccmp == ip2->ccmp;
@@ -84,7 +84,7 @@ hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags)
static inline void
hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem,
- struct hash_netnet4_elem *orig)
+ struct hash_netnet4_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
@@ -103,7 +103,7 @@ hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner)
static bool
hash_netnet4_data_list(struct sk_buff *skb,
- const struct hash_netnet4_elem *data)
+ const struct hash_netnet4_elem *data)
{
u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
@@ -122,7 +122,7 @@ nla_put_failure:
static inline void
hash_netnet4_data_next(struct hash_netnet4_elem *next,
- const struct hash_netnet4_elem *d)
+ const struct hash_netnet4_elem *d)
{
next->ipcmp = d->ipcmp;
}
@@ -133,16 +133,16 @@ hash_netnet4_data_next(struct hash_netnet4_elem *next,
static int
hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
- const struct xt_action_param *par,
- enum ipset_adt adt, struct ip_set_adt_opt *opt)
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
const struct hash_netnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
- e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+ e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+ e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
if (adt == IPSET_TEST)
e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
@@ -156,31 +156,23 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
static int
hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_netnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netnet4_elem e = { };
+ struct hash_netnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, last;
u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2;
- u8 cidr, cidr2;
int ret;
- e.cidr[0] = e.cidr[1] = HOST_MASK;
- if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+ return -IPSET_ERR_PROTOCOL;
+
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
if (ret)
return ret;
@@ -194,21 +186,20 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
if (tb[IPSET_ATTR_CIDR]) {
- cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > HOST_MASK)
+ e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- e.cidr[0] = cidr;
}
if (tb[IPSET_ATTR_CIDR2]) {
- cidr2 = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
- if (!cidr2 || cidr2 > HOST_MASK)
+ e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+ if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- e.cidr[1] = cidr2;
}
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -231,8 +222,9 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip, ip_to);
if (unlikely(ip + UINT_MAX == ip_to))
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
+ }
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
@@ -243,28 +235,27 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip2_from, ip2_to);
if (unlikely(ip2_from + UINT_MAX == ip2_to))
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+ }
if (retried)
ip = ntohl(h->next.ip[0]);
while (!after(ip, ip_to)) {
e.ip[0] = htonl(ip);
- last = ip_set_range_to_cidr(ip, ip_to, &cidr);
- e.cidr[0] = cidr;
+ last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
ip2 = (retried &&
ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1])
: ip2_from;
while (!after(ip2, ip2_to)) {
e.ip[1] = htonl(ip2);
- last2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr2);
- e.cidr[1] = cidr2;
+ last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
ip2 = last2 + 1;
}
ip = last + 1;
@@ -288,8 +279,8 @@ struct hash_netnet6_elem {
static inline bool
hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
- const struct hash_netnet6_elem *ip2,
- u32 *multi)
+ const struct hash_netnet6_elem *ip2,
+ u32 *multi)
{
return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
@@ -316,7 +307,7 @@ hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags)
static inline void
hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem,
- struct hash_netnet6_elem *orig)
+ struct hash_netnet6_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
@@ -335,7 +326,7 @@ hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner)
static bool
hash_netnet6_data_list(struct sk_buff *skb,
- const struct hash_netnet6_elem *data)
+ const struct hash_netnet6_elem *data)
{
u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
@@ -354,7 +345,7 @@ nla_put_failure:
static inline void
hash_netnet6_data_next(struct hash_netnet4_elem *next,
- const struct hash_netnet6_elem *d)
+ const struct hash_netnet6_elem *d)
{
}
@@ -368,18 +359,18 @@ hash_netnet6_data_next(struct hash_netnet4_elem *next,
static int
hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
- const struct xt_action_param *par,
- enum ipset_adt adt, struct ip_set_adt_opt *opt)
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
const struct hash_netnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
- e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+ e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+ e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
if (adt == IPSET_TEST)
- e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK;
+ e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK;
ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6);
ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6);
@@ -391,29 +382,22 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
static int
hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netnet6_elem e = { };
+ struct hash_netnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
- e.cidr[0] = e.cidr[1] = HOST_MASK;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
-
ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]);
if (ret)
return ret;
@@ -426,21 +410,24 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
- if (tb[IPSET_ATTR_CIDR])
+ if (tb[IPSET_ATTR_CIDR]) {
e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
- if (tb[IPSET_ATTR_CIDR2])
+ if (tb[IPSET_ATTR_CIDR2]) {
e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
-
- if (!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] ||
- e.cidr[1] > HOST_MASK)
- return -IPSET_ERR_INVALID_CIDR;
+ if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
ip6_netmask(&e.ip[0], e.cidr[0]);
ip6_netmask(&e.ip[1], e.cidr[1]);
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -497,6 +484,7 @@ hash_netnet_init(void)
static void __exit
hash_netnet_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_netnet_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 8273819c1a2f..731813e0f08c 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -136,7 +136,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_netport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -166,21 +166,15 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
u8 cidr;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
-
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
if (ret)
return ret;
@@ -204,8 +198,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMP))
e.port = 0;
@@ -214,6 +209,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -239,8 +235,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip, ip_to);
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
+ }
if (retried)
ip = ntohl(h->next.ip);
@@ -256,8 +253,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
ip = last + 1;
}
@@ -354,7 +351,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct hash_netport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport6_elem e = {
- .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1,
+ .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -384,23 +381,17 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
u8 cidr;
int ret;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
-
ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
return ret;
@@ -425,14 +416,16 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMPV6))
e.port = 0;
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -456,8 +449,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -510,6 +503,7 @@ hash_netport_init(void)
static void __exit
hash_netport_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_netport_type);
}
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 1451a8ac938f..0c68734f5cc4 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -62,8 +62,8 @@ struct hash_netportnet4_elem {
static inline bool
hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
- const struct hash_netportnet4_elem *ip2,
- u32 *multi)
+ const struct hash_netportnet4_elem *ip2,
+ u32 *multi)
{
return ip1->ipcmp == ip2->ipcmp &&
ip1->ccmp == ip2->ccmp &&
@@ -91,7 +91,7 @@ hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags)
static inline void
hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem,
- struct hash_netportnet4_elem *orig)
+ struct hash_netportnet4_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
@@ -111,7 +111,7 @@ hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem,
static bool
hash_netportnet4_data_list(struct sk_buff *skb,
- const struct hash_netportnet4_elem *data)
+ const struct hash_netportnet4_elem *data)
{
u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
@@ -132,7 +132,7 @@ nla_put_failure:
static inline void
hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
- const struct hash_netportnet4_elem *d)
+ const struct hash_netportnet4_elem *d)
{
next->ipcmp = d->ipcmp;
next->port = d->port;
@@ -144,16 +144,16 @@ hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
static int
hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
- const struct xt_action_param *par,
- enum ipset_adt adt, struct ip_set_adt_opt *opt)
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
- e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+ e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+ e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
if (adt == IPSET_TEST)
e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK;
@@ -171,34 +171,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
static int
hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netportnet4_elem e = { };
+ struct hash_netportnet4_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to;
u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2;
bool with_ports = false;
- u8 cidr, cidr2;
int ret;
- e.cidr[0] = e.cidr[1] = HOST_MASK;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
-
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
if (ret)
return ret;
@@ -212,17 +204,15 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
if (tb[IPSET_ATTR_CIDR]) {
- cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > HOST_MASK)
+ e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- e.cidr[0] = cidr;
}
if (tb[IPSET_ATTR_CIDR2]) {
- cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
- if (!cidr || cidr > HOST_MASK)
+ e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
+ if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
- e.cidr[1] = cidr;
}
e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
@@ -233,14 +223,16 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMP))
e.port = 0;
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -264,8 +256,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip, ip_to);
if (unlikely(ip + UINT_MAX == ip_to))
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
+ }
port_to = port = ntohs(e.port);
if (tb[IPSET_ATTR_PORT_TO]) {
@@ -283,16 +276,16 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip2_from, ip2_to);
if (unlikely(ip2_from + UINT_MAX == ip2_to))
return -IPSET_ERR_HASH_RANGE;
- } else
+ } else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+ }
if (retried)
ip = ntohl(h->next.ip[0]);
while (!after(ip, ip_to)) {
e.ip[0] = htonl(ip);
- ip_last = ip_set_range_to_cidr(ip, ip_to, &cidr);
- e.cidr[0] = cidr;
+ ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++) {
@@ -303,13 +296,12 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
while (!after(ip2, ip2_to)) {
e.ip[1] = htonl(ip2);
ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
- &cidr2);
- e.cidr[1] = cidr2;
+ &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
ip2 = ip2_last + 1;
}
}
@@ -336,8 +328,8 @@ struct hash_netportnet6_elem {
static inline bool
hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
- const struct hash_netportnet6_elem *ip2,
- u32 *multi)
+ const struct hash_netportnet6_elem *ip2,
+ u32 *multi)
{
return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
@@ -366,7 +358,7 @@ hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags)
static inline void
hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem,
- struct hash_netportnet6_elem *orig)
+ struct hash_netportnet6_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
@@ -386,7 +378,7 @@ hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem,
static bool
hash_netportnet6_data_list(struct sk_buff *skb,
- const struct hash_netportnet6_elem *data)
+ const struct hash_netportnet6_elem *data)
{
u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
@@ -407,7 +399,7 @@ nla_put_failure:
static inline void
hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
- const struct hash_netportnet6_elem *d)
+ const struct hash_netportnet6_elem *d)
{
next->port = d->port;
}
@@ -422,16 +414,16 @@ hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
static int
hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
- const struct xt_action_param *par,
- enum ipset_adt adt, struct ip_set_adt_opt *opt)
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet6_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- e.cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
- e.cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
+ e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
+ e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
if (adt == IPSET_TEST)
e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK;
@@ -449,34 +441,27 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
static int
hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
- enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
const struct hash_netportnet *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netportnet6_elem e = { };
+ struct hash_netportnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to;
bool with_ports = false;
int ret;
- e.cidr[0] = e.cidr[1] = HOST_MASK;
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
- if (tb[IPSET_ATTR_LINENO])
- *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
-
ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]);
if (ret)
return ret;
@@ -489,15 +474,17 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
- if (tb[IPSET_ATTR_CIDR])
+ if (tb[IPSET_ATTR_CIDR]) {
e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+ if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
- if (tb[IPSET_ATTR_CIDR2])
+ if (tb[IPSET_ATTR_CIDR2]) {
e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
-
- if (unlikely(!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] ||
- e.cidr[1] > HOST_MASK))
- return -IPSET_ERR_INVALID_CIDR;
+ if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+ return -IPSET_ERR_INVALID_CIDR;
+ }
ip6_netmask(&e.ip[0], e.cidr[0]);
ip6_netmask(&e.ip[1], e.cidr[1]);
@@ -510,14 +497,16 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (e.proto == 0)
return -IPSET_ERR_INVALID_PROTO;
- } else
+ } else {
return -IPSET_ERR_MISSING_PROTO;
+ }
if (!(with_ports || e.proto == IPPROTO_ICMPV6))
e.port = 0;
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
}
@@ -541,8 +530,8 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret && !ip_set_eexist(ret, flags))
return ret;
- else
- ret = 0;
+
+ ret = 0;
}
return ret;
}
@@ -598,6 +587,7 @@ hash_netportnet_init(void)
static void __exit
hash_netportnet_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&hash_netportnet_type);
}
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 5bd3b1eae3fa..a1fe5377a2b3 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/ip.h>
+#include <linux/rculist.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
@@ -27,6 +28,8 @@ MODULE_ALIAS("ip_set_list:set");
/* Member elements */
struct set_elem {
+ struct rcu_head rcu;
+ struct list_head list;
ip_set_id_t id;
};
@@ -41,12 +44,9 @@ struct list_set {
u32 size; /* size of set list array */
struct timer_list gc; /* garbage collection */
struct net *net; /* namespace */
- struct set_elem members[0]; /* the set members */
+ struct list_head members; /* the set members */
};
-#define list_set_elem(set, map, id) \
- (struct set_elem *)((void *)(map)->members + (id) * (set)->dsize)
-
static int
list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -54,17 +54,14 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
{
struct list_set *map = set->data;
struct set_elem *e;
- u32 i, cmdflags = opt->cmdflags;
+ u32 cmdflags = opt->cmdflags;
int ret;
/* Don't lookup sub-counters at all */
opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS;
if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- return 0;
+ list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -91,13 +88,9 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
{
struct list_set *map = set->data;
struct set_elem *e;
- u32 i;
int ret;
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- return 0;
+ list_for_each_entry(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -115,13 +108,9 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
{
struct list_set *map = set->data;
struct set_elem *e;
- u32 i;
int ret;
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- return 0;
+ list_for_each_entry(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -138,110 +127,65 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+ int ret = -EINVAL;
+ rcu_read_lock();
switch (adt) {
case IPSET_TEST:
- return list_set_ktest(set, skb, par, opt, &ext);
+ ret = list_set_ktest(set, skb, par, opt, &ext);
+ break;
case IPSET_ADD:
- return list_set_kadd(set, skb, par, opt, &ext);
+ ret = list_set_kadd(set, skb, par, opt, &ext);
+ break;
case IPSET_DEL:
- return list_set_kdel(set, skb, par, opt, &ext);
+ ret = list_set_kdel(set, skb, par, opt, &ext);
+ break;
default:
break;
}
- return -EINVAL;
-}
-
-static bool
-id_eq(const struct ip_set *set, u32 i, ip_set_id_t id)
-{
- const struct list_set *map = set->data;
- const struct set_elem *e;
-
- if (i >= map->size)
- return 0;
+ rcu_read_unlock();
- e = list_set_elem(set, map, i);
- return !!(e->id == id &&
- !(SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, set))));
+ return ret;
}
-static int
-list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
- const struct ip_set_ext *ext)
-{
- struct list_set *map = set->data;
- struct set_elem *e = list_set_elem(set, map, i);
+/* Userspace interfaces: we are protected by the nfnl mutex */
- if (e->id != IPSET_INVALID_ID) {
- if (i == map->size - 1) {
- /* Last element replaced: e.g. add new,before,last */
- ip_set_put_byindex(map->net, e->id);
- ip_set_ext_destroy(set, e);
- } else {
- struct set_elem *x = list_set_elem(set, map,
- map->size - 1);
-
- /* Last element pushed off */
- if (x->id != IPSET_INVALID_ID) {
- ip_set_put_byindex(map->net, x->id);
- ip_set_ext_destroy(set, x);
- }
- memmove(list_set_elem(set, map, i + 1), e,
- set->dsize * (map->size - (i + 1)));
- /* Extensions must be initialized to zero */
- memset(e, 0, set->dsize);
- }
- }
-
- e->id = d->id;
- if (SET_WITH_TIMEOUT(set))
- ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
- if (SET_WITH_COUNTER(set))
- ip_set_init_counter(ext_counter(e, set), ext);
- if (SET_WITH_COMMENT(set))
- ip_set_init_comment(ext_comment(e, set), ext);
- if (SET_WITH_SKBINFO(set))
- ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
- return 0;
-}
-
-static int
-list_set_del(struct ip_set *set, u32 i)
+static void
+__list_set_del(struct ip_set *set, struct set_elem *e)
{
struct list_set *map = set->data;
- struct set_elem *e = list_set_elem(set, map, i);
ip_set_put_byindex(map->net, e->id);
+ /* We may call it, because we don't have a to be destroyed
+ * extension which is used by the kernel.
+ */
ip_set_ext_destroy(set, e);
+ kfree_rcu(e, rcu);
+}
- if (i < map->size - 1)
- memmove(e, list_set_elem(set, map, i + 1),
- set->dsize * (map->size - (i + 1)));
+static inline void
+list_set_del(struct ip_set *set, struct set_elem *e)
+{
+ list_del_rcu(&e->list);
+ __list_set_del(set, e);
+}
- /* Last element */
- e = list_set_elem(set, map, map->size - 1);
- e->id = IPSET_INVALID_ID;
- return 0;
+static inline void
+list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
+{
+ list_replace_rcu(&old->list, &e->list);
+ __list_set_del(set, old);
}
static void
set_cleanup_entries(struct ip_set *set)
{
struct list_set *map = set->data;
- struct set_elem *e;
- u32 i = 0;
+ struct set_elem *e, *n;
- while (i < map->size) {
- e = list_set_elem(set, map, i);
- if (e->id != IPSET_INVALID_ID &&
- ip_set_timeout_expired(ext_timeout(e, set)))
- list_set_del(set, i);
- /* Check element moved to position i in next loop */
- else
- i++;
- }
+ list_for_each_entry_safe(e, n, &map->members, list)
+ if (ip_set_timeout_expired(ext_timeout(e, set)))
+ list_set_del(set, e);
}
static int
@@ -250,31 +194,46 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
{
struct list_set *map = set->data;
struct set_adt_elem *d = value;
- struct set_elem *e;
- u32 i;
+ struct set_elem *e, *next, *prev = NULL;
int ret;
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- return 0;
- else if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, set)))
+ list_for_each_entry(e, &map->members, list) {
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, set)))
continue;
- else if (e->id != d->id)
+ else if (e->id != d->id) {
+ prev = e;
continue;
+ }
- if (d->before == 0)
- return 1;
- else if (d->before > 0)
- ret = id_eq(set, i + 1, d->refid);
- else
- ret = i > 0 && id_eq(set, i - 1, d->refid);
+ if (d->before == 0) {
+ ret = 1;
+ } else if (d->before > 0) {
+ next = list_next_entry(e, list);
+ ret = !list_is_last(&e->list, &map->members) &&
+ next->id == d->refid;
+ } else {
+ ret = prev && prev->id == d->refid;
+ }
return ret;
}
return 0;
}
+static void
+list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext,
+ struct set_elem *e)
+{
+ if (SET_WITH_COUNTER(set))
+ ip_set_init_counter(ext_counter(e, set), ext);
+ if (SET_WITH_COMMENT(set))
+ ip_set_init_comment(ext_comment(e, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
+ /* Update timeout last */
+ if (SET_WITH_TIMEOUT(set))
+ ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
+}
static int
list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
@@ -282,60 +241,78 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
{
struct list_set *map = set->data;
struct set_adt_elem *d = value;
- struct set_elem *e;
+ struct set_elem *e, *n, *prev, *next;
bool flag_exist = flags & IPSET_FLAG_EXIST;
- u32 i, ret = 0;
if (SET_WITH_TIMEOUT(set))
set_cleanup_entries(set);
- /* Check already added element */
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- goto insert;
- else if (e->id != d->id)
+ /* Find where to add the new entry */
+ n = prev = next = NULL;
+ list_for_each_entry(e, &map->members, list) {
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, set)))
continue;
-
- if ((d->before > 1 && !id_eq(set, i + 1, d->refid)) ||
- (d->before < 0 &&
- (i == 0 || !id_eq(set, i - 1, d->refid))))
- /* Before/after doesn't match */
+ else if (d->id == e->id)
+ n = e;
+ else if (d->before == 0 || e->id != d->refid)
+ continue;
+ else if (d->before > 0)
+ next = e;
+ else
+ prev = e;
+ }
+ /* Re-add already existing element */
+ if (n) {
+ if ((d->before > 0 && !next) ||
+ (d->before < 0 && !prev))
return -IPSET_ERR_REF_EXIST;
if (!flag_exist)
- /* Can't re-add */
return -IPSET_ERR_EXIST;
/* Update extensions */
- ip_set_ext_destroy(set, e);
+ ip_set_ext_destroy(set, n);
+ list_set_init_extensions(set, ext, n);
- if (SET_WITH_TIMEOUT(set))
- ip_set_timeout_set(ext_timeout(e, set), ext->timeout);
- if (SET_WITH_COUNTER(set))
- ip_set_init_counter(ext_counter(e, set), ext);
- if (SET_WITH_COMMENT(set))
- ip_set_init_comment(ext_comment(e, set), ext);
- if (SET_WITH_SKBINFO(set))
- ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
/* Set is already added to the list */
ip_set_put_byindex(map->net, d->id);
return 0;
}
-insert:
- ret = -IPSET_ERR_LIST_FULL;
- for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- ret = d->before != 0 ? -IPSET_ERR_REF_EXIST
- : list_set_add(set, i, d, ext);
- else if (e->id != d->refid)
- continue;
- else if (d->before > 0)
- ret = list_set_add(set, i, d, ext);
- else if (i + 1 < map->size)
- ret = list_set_add(set, i + 1, d, ext);
+ /* Add new entry */
+ if (d->before == 0) {
+ /* Append */
+ n = list_empty(&map->members) ? NULL :
+ list_last_entry(&map->members, struct set_elem, list);
+ } else if (d->before > 0) {
+ /* Insert after next element */
+ if (!list_is_last(&next->list, &map->members))
+ n = list_next_entry(next, list);
+ } else {
+ /* Insert before prev element */
+ if (prev->list.prev != &map->members)
+ n = list_prev_entry(prev, list);
}
+ /* Can we replace a timed out entry? */
+ if (n &&
+ !(SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(n, set))))
+ n = NULL;
+
+ e = kzalloc(set->dsize, GFP_KERNEL);
+ if (!e)
+ return -ENOMEM;
+ e->id = d->id;
+ INIT_LIST_HEAD(&e->list);
+ list_set_init_extensions(set, ext, e);
+ if (n)
+ list_set_replace(set, e, n);
+ else if (next)
+ list_add_tail_rcu(&e->list, &next->list);
+ else if (prev)
+ list_add_rcu(&e->list, &prev->list);
+ else
+ list_add_tail_rcu(&e->list, &map->members);
- return ret;
+ return 0;
}
static int
@@ -344,32 +321,30 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
{
struct list_set *map = set->data;
struct set_adt_elem *d = value;
- struct set_elem *e;
- u32 i;
-
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- return d->before != 0 ? -IPSET_ERR_REF_EXIST
- : -IPSET_ERR_EXIST;
- else if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, set)))
+ struct set_elem *e, *next, *prev = NULL;
+
+ list_for_each_entry(e, &map->members, list) {
+ if (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, set)))
continue;
- else if (e->id != d->id)
+ else if (e->id != d->id) {
+ prev = e;
continue;
+ }
- if (d->before == 0)
- return list_set_del(set, i);
- else if (d->before > 0) {
- if (!id_eq(set, i + 1, d->refid))
+ if (d->before > 0) {
+ next = list_next_entry(e, list);
+ if (list_is_last(&e->list, &map->members) ||
+ next->id != d->refid)
return -IPSET_ERR_REF_EXIST;
- return list_set_del(set, i);
- } else if (i == 0 || !id_eq(set, i - 1, d->refid))
- return -IPSET_ERR_REF_EXIST;
- else
- return list_set_del(set, i);
+ } else if (d->before < 0) {
+ if (!prev || prev->id != d->refid)
+ return -IPSET_ERR_REF_EXIST;
+ }
+ list_set_del(set, e);
+ return 0;
}
- return -IPSET_ERR_EXIST;
+ return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST;
}
static int
@@ -383,19 +358,13 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
struct ip_set *s;
int ret = 0;
- if (unlikely(!tb[IPSET_ATTR_NAME] ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
- return -IPSET_ERR_PROTOCOL;
-
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+ if (unlikely(!tb[IPSET_ATTR_NAME] ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
+ return -IPSET_ERR_PROTOCOL;
+
ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -410,6 +379,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CADT_FLAGS]) {
u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
e.before = f & IPSET_FLAG_BEFORE;
}
@@ -447,27 +417,26 @@ static void
list_set_flush(struct ip_set *set)
{
struct list_set *map = set->data;
- struct set_elem *e;
- u32 i;
-
- for (i = 0; i < map->size; i++) {
- e = list_set_elem(set, map, i);
- if (e->id != IPSET_INVALID_ID) {
- ip_set_put_byindex(map->net, e->id);
- ip_set_ext_destroy(set, e);
- e->id = IPSET_INVALID_ID;
- }
- }
+ struct set_elem *e, *n;
+
+ list_for_each_entry_safe(e, n, &map->members, list)
+ list_set_del(set, e);
}
static void
list_set_destroy(struct ip_set *set)
{
struct list_set *map = set->data;
+ struct set_elem *e, *n;
if (SET_WITH_TIMEOUT(set))
del_timer_sync(&map->gc);
- list_set_flush(set);
+ list_for_each_entry_safe(e, n, &map->members, list) {
+ list_del(&e->list);
+ ip_set_put_byindex(map->net, e->id);
+ ip_set_ext_destroy(set, e);
+ kfree(e);
+ }
kfree(map);
set->data = NULL;
@@ -478,6 +447,11 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
{
const struct list_set *map = set->data;
struct nlattr *nested;
+ struct set_elem *e;
+ u32 n = 0;
+
+ list_for_each_entry(e, &map->members, list)
+ n++;
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
@@ -485,7 +459,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
- htonl(sizeof(*map) + map->size * set->dsize)))
+ htonl(sizeof(*map) + n * set->dsize)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
@@ -502,18 +476,22 @@ list_set_list(const struct ip_set *set,
{
const struct list_set *map = set->data;
struct nlattr *atd, *nested;
- u32 i, first = cb->args[IPSET_CB_ARG0];
- const struct set_elem *e;
+ u32 i = 0, first = cb->args[IPSET_CB_ARG0];
+ struct set_elem *e;
+ int ret = 0;
atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
if (!atd)
return -EMSGSIZE;
- for (; cb->args[IPSET_CB_ARG0] < map->size;
- cb->args[IPSET_CB_ARG0]++) {
- i = cb->args[IPSET_CB_ARG0];
- e = list_set_elem(set, map, i);
- if (e->id == IPSET_INVALID_ID)
- goto finish;
+ list_for_each_entry(e, &map->members, list) {
+ if (i == first)
+ break;
+ i++;
+ }
+
+ rcu_read_lock();
+ list_for_each_entry_from(e, &map->members, list) {
+ i++;
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
@@ -521,9 +499,10 @@ list_set_list(const struct ip_set *set,
if (!nested) {
if (i == first) {
nla_nest_cancel(skb, atd);
- return -EMSGSIZE;
- } else
- goto nla_put_failure;
+ ret = -EMSGSIZE;
+ goto out;
+ }
+ goto nla_put_failure;
}
if (nla_put_string(skb, IPSET_ATTR_NAME,
ip_set_name_byindex(map->net, e->id)))
@@ -532,20 +511,23 @@ list_set_list(const struct ip_set *set,
goto nla_put_failure;
ipset_nest_end(skb, nested);
}
-finish:
+
ipset_nest_end(skb, atd);
/* Set listing finished */
cb->args[IPSET_CB_ARG0] = 0;
- return 0;
+ goto out;
nla_put_failure:
nla_nest_cancel(skb, nested);
if (unlikely(i == first)) {
cb->args[IPSET_CB_ARG0] = 0;
- return -EMSGSIZE;
+ ret = -EMSGSIZE;
}
+ cb->args[IPSET_CB_ARG0] = i - 1;
ipset_nest_end(skb, atd);
- return 0;
+out:
+ rcu_read_unlock();
+ return ret;
}
static bool
@@ -577,12 +559,12 @@ static const struct ip_set_type_variant set_variant = {
static void
list_set_gc(unsigned long ul_set)
{
- struct ip_set *set = (struct ip_set *) ul_set;
+ struct ip_set *set = (struct ip_set *)ul_set;
struct list_set *map = set->data;
- write_lock_bh(&set->lock);
+ spin_lock_bh(&set->lock);
set_cleanup_entries(set);
- write_unlock_bh(&set->lock);
+ spin_unlock_bh(&set->lock);
map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
@@ -594,7 +576,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
struct list_set *map = set->data;
init_timer(&map->gc);
- map->gc.data = (unsigned long) set;
+ map->gc.data = (unsigned long)set;
map->gc.function = gc;
map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
@@ -606,24 +588,16 @@ static bool
init_list_set(struct net *net, struct ip_set *set, u32 size)
{
struct list_set *map;
- struct set_elem *e;
- u32 i;
- map = kzalloc(sizeof(*map) +
- min_t(u32, size, IP_SET_LIST_MAX_SIZE) * set->dsize,
- GFP_KERNEL);
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
if (!map)
return false;
map->size = size;
map->net = net;
+ INIT_LIST_HEAD(&map->members);
set->data = map;
- for (i = 0; i < size; i++) {
- e = list_set_elem(set, map, i);
- e->id = IPSET_INVALID_ID;
- }
-
return true;
}
@@ -696,6 +670,7 @@ list_set_init(void)
static void __exit
list_set_fini(void)
{
+ rcu_barrier();
ip_set_type_unregister(&list_set_type);
}
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index 04d15fdc99ee..1c8a42c1056c 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c
@@ -1,9 +1,7 @@
#include <linux/export.h>
#include <linux/netfilter/ipset/pfxlen.h>
-/*
- * Prefixlen maps for fast conversions, by Jan Engelhardt.
- */
+/* Prefixlen maps for fast conversions, by Jan Engelhardt. */
#define E(a, b, c, d) \
{.ip6 = { \
@@ -11,8 +9,7 @@
htonl(c), htonl(d), \
} }
-/*
- * This table works for both IPv4 and IPv6;
+/* This table works for both IPv4 and IPv6;
* just use prefixlen_netmask_map[prefixlength].ip.
*/
const union nf_inet_addr ip_set_netmask_map[] = {
@@ -149,13 +146,12 @@ const union nf_inet_addr ip_set_netmask_map[] = {
EXPORT_SYMBOL_GPL(ip_set_netmask_map);
#undef E
-#define E(a, b, c, d) \
- {.ip6 = { (__force __be32) a, (__force __be32) b, \
- (__force __be32) c, (__force __be32) d, \
+#define E(a, b, c, d) \
+ {.ip6 = { (__force __be32)a, (__force __be32)b, \
+ (__force __be32)c, (__force __be32)d, \
} }
-/*
- * This table works for both IPv4 and IPv6;
+/* This table works for both IPv4 and IPv6;
* just use prefixlen_hostmask_map[prefixlength].ip.
*/
const union nf_inet_addr ip_set_hostmask_map[] = {
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 60865f110309..2281be419a74 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -90,7 +90,13 @@ static int generic_packet(struct nf_conn *ct,
static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
{
- return nf_generic_should_process(nf_ct_protonum(ct));
+ bool ret;
+
+ ret = nf_generic_should_process(nf_ct_protonum(ct));
+ if (!ret)
+ pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n",
+ nf_ct_protonum(ct));
+ return ret;
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4528f122bcd2..cfe636808541 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -127,13 +127,46 @@ static void nft_trans_destroy(struct nft_trans *trans)
kfree(trans);
}
+int nft_register_basechain(struct nft_base_chain *basechain,
+ unsigned int hook_nops)
+{
+ if (basechain->flags & NFT_BASECHAIN_DISABLED)
+ return 0;
+
+ return nf_register_hooks(basechain->ops, hook_nops);
+}
+EXPORT_SYMBOL_GPL(nft_register_basechain);
+
+void nft_unregister_basechain(struct nft_base_chain *basechain,
+ unsigned int hook_nops)
+{
+ if (basechain->flags & NFT_BASECHAIN_DISABLED)
+ return;
+
+ nf_unregister_hooks(basechain->ops, hook_nops);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_basechain);
+
+static int nf_tables_register_hooks(const struct nft_table *table,
+ struct nft_chain *chain,
+ unsigned int hook_nops)
+{
+ if (table->flags & NFT_TABLE_F_DORMANT ||
+ !(chain->flags & NFT_BASE_CHAIN))
+ return 0;
+
+ return nft_register_basechain(nft_base_chain(chain), hook_nops);
+}
+
static void nf_tables_unregister_hooks(const struct nft_table *table,
- const struct nft_chain *chain,
+ struct nft_chain *chain,
unsigned int hook_nops)
{
- if (!(table->flags & NFT_TABLE_F_DORMANT) &&
- chain->flags & NFT_BASE_CHAIN)
- nf_unregister_hooks(nft_base_chain(chain)->ops, hook_nops);
+ if (table->flags & NFT_TABLE_F_DORMANT ||
+ !(chain->flags & NFT_BASE_CHAIN))
+ return;
+
+ nft_unregister_basechain(nft_base_chain(chain), hook_nops);
}
/* Internal table flags */
@@ -399,8 +432,6 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
[NFTA_TABLE_NAME] = { .type = NLA_STRING,
.len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
- [NFTA_TABLE_DEV] = { .type = NLA_STRING,
- .len = IFNAMSIZ - 1 },
};
static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
@@ -425,10 +456,6 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
goto nla_put_failure;
- if (table->dev &&
- nla_put_string(skb, NFTA_TABLE_DEV, table->dev->name))
- goto nla_put_failure;
-
nlmsg_end(skb, nlh);
return 0;
@@ -566,7 +593,7 @@ static int nf_tables_table_enable(const struct nft_af_info *afi,
if (!(chain->flags & NFT_BASE_CHAIN))
continue;
- err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops);
+ err = nft_register_basechain(nft_base_chain(chain), afi->nops);
if (err < 0)
goto err;
@@ -581,20 +608,20 @@ err:
if (i-- <= 0)
break;
- nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops);
+ nft_unregister_basechain(nft_base_chain(chain), afi->nops);
}
return err;
}
static void nf_tables_table_disable(const struct nft_af_info *afi,
- struct nft_table *table)
+ struct nft_table *table)
{
struct nft_chain *chain;
list_for_each_entry(chain, &table->chains, list) {
if (chain->flags & NFT_BASE_CHAIN)
- nf_unregister_hooks(nft_base_chain(chain)->ops,
- afi->nops);
+ nft_unregister_basechain(nft_base_chain(chain),
+ afi->nops);
}
}
@@ -614,11 +641,6 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
if (flags == ctx->table->flags)
return 0;
- if ((ctx->afi->flags & NFT_AF_NEEDS_DEV) &&
- ctx->nla[NFTA_TABLE_DEV] &&
- nla_strcmp(ctx->nla[NFTA_TABLE_DEV], ctx->table->dev->name))
- return -EOPNOTSUPP;
-
trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
sizeof(struct nft_trans_table));
if (trans == NULL)
@@ -656,7 +678,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
struct nft_table *table;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
- struct net_device *dev = NULL;
u32 flags = 0;
struct nft_ctx ctx;
int err;
@@ -691,20 +712,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
return -EINVAL;
}
- if (afi->flags & NFT_AF_NEEDS_DEV) {
- char ifname[IFNAMSIZ];
-
- if (!nla[NFTA_TABLE_DEV])
- return -EOPNOTSUPP;
-
- nla_strlcpy(ifname, nla[NFTA_TABLE_DEV], IFNAMSIZ);
- dev = dev_get_by_name(net, ifname);
- if (!dev)
- return -ENOENT;
- } else if (nla[NFTA_TABLE_DEV]) {
- return -EOPNOTSUPP;
- }
-
err = -EAFNOSUPPORT;
if (!try_module_get(afi->owner))
goto err1;
@@ -718,7 +725,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
- table->dev = dev;
nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
@@ -732,9 +738,6 @@ err3:
err2:
module_put(afi->owner);
err1:
- if (dev != NULL)
- dev_put(dev);
-
return err;
}
@@ -838,9 +841,6 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
{
BUG_ON(ctx->table->use > 0);
- if (ctx->table->dev)
- dev_put(ctx->table->dev);
-
kfree(ctx->table);
module_put(ctx->afi->owner);
}
@@ -916,6 +916,8 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
[NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 },
[NFTA_HOOK_PRIORITY] = { .type = NLA_U32 },
+ [NFTA_HOOK_DEV] = { .type = NLA_STRING,
+ .len = IFNAMSIZ - 1 },
};
static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
@@ -989,6 +991,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
goto nla_put_failure;
+ if (basechain->dev_name[0] &&
+ nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
@@ -1200,9 +1205,13 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
BUG_ON(chain->use > 0);
if (chain->flags & NFT_BASE_CHAIN) {
- module_put(nft_base_chain(chain)->type->owner);
- free_percpu(nft_base_chain(chain)->stats);
- kfree(nft_base_chain(chain));
+ struct nft_base_chain *basechain = nft_base_chain(chain);
+
+ module_put(basechain->type->owner);
+ free_percpu(basechain->stats);
+ if (basechain->ops[0].dev != NULL)
+ dev_put(basechain->ops[0].dev);
+ kfree(basechain);
} else {
kfree(chain);
}
@@ -1221,6 +1230,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
struct nlattr *ha[NFTA_HOOK_MAX + 1];
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
+ struct net_device *dev = NULL;
u8 policy = NF_ACCEPT;
u64 handle = 0;
unsigned int i;
@@ -1360,17 +1370,43 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
return -ENOENT;
hookfn = type->hooks[hooknum];
+ if (afi->flags & NFT_AF_NEEDS_DEV) {
+ char ifname[IFNAMSIZ];
+
+ if (!ha[NFTA_HOOK_DEV]) {
+ module_put(type->owner);
+ return -EOPNOTSUPP;
+ }
+
+ nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
+ dev = dev_get_by_name(net, ifname);
+ if (!dev) {
+ module_put(type->owner);
+ return -ENOENT;
+ }
+ } else if (ha[NFTA_HOOK_DEV]) {
+ module_put(type->owner);
+ return -EOPNOTSUPP;
+ }
+
basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
if (basechain == NULL) {
module_put(type->owner);
+ if (dev != NULL)
+ dev_put(dev);
return -ENOMEM;
}
+ if (dev != NULL)
+ strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
if (IS_ERR(stats)) {
module_put(type->owner);
kfree(basechain);
+ if (dev != NULL)
+ dev_put(dev);
return PTR_ERR(stats);
}
basechain->stats = stats;
@@ -1379,6 +1415,8 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (stats == NULL) {
module_put(type->owner);
kfree(basechain);
+ if (dev != NULL)
+ dev_put(dev);
return -ENOMEM;
}
rcu_assign_pointer(basechain->stats, stats);
@@ -1396,7 +1434,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
ops->priority = priority;
ops->priv = chain;
ops->hook = afi->hooks[ops->hooknum];
- ops->dev = table->dev;
+ ops->dev = dev;
if (hookfn)
ops->hook = hookfn;
if (afi->hook_ops_init)
@@ -1416,12 +1454,9 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
chain->table = table;
nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
- if (!(table->flags & NFT_TABLE_F_DORMANT) &&
- chain->flags & NFT_BASE_CHAIN) {
- err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops);
- if (err < 0)
- goto err1;
- }
+ err = nf_tables_register_hooks(table, chain, afi->nops);
+ if (err < 0)
+ goto err1;
nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 04cb17057f46..2cae4d4a03b7 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/netdevice.h>
#include <net/netfilter/nf_tables.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
@@ -157,6 +158,77 @@ static const struct nf_chain_type nft_filter_chain_netdev = {
.hook_mask = (1 << NF_NETDEV_INGRESS),
};
+static void nft_netdev_event(unsigned long event, struct nft_af_info *afi,
+ struct net_device *dev, struct nft_table *table,
+ struct nft_base_chain *basechain)
+{
+ switch (event) {
+ case NETDEV_REGISTER:
+ if (strcmp(basechain->dev_name, dev->name) != 0)
+ return;
+
+ BUG_ON(!(basechain->flags & NFT_BASECHAIN_DISABLED));
+
+ dev_hold(dev);
+ basechain->ops[0].dev = dev;
+ basechain->flags &= ~NFT_BASECHAIN_DISABLED;
+ if (!(table->flags & NFT_TABLE_F_DORMANT))
+ nft_register_basechain(basechain, afi->nops);
+ break;
+ case NETDEV_UNREGISTER:
+ if (strcmp(basechain->dev_name, dev->name) != 0)
+ return;
+
+ BUG_ON(basechain->flags & NFT_BASECHAIN_DISABLED);
+
+ if (!(table->flags & NFT_TABLE_F_DORMANT))
+ nft_unregister_basechain(basechain, afi->nops);
+
+ dev_put(basechain->ops[0].dev);
+ basechain->ops[0].dev = NULL;
+ basechain->flags |= NFT_BASECHAIN_DISABLED;
+ break;
+ case NETDEV_CHANGENAME:
+ if (dev->ifindex != basechain->ops[0].dev->ifindex)
+ return;
+
+ strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+ break;
+ }
+}
+
+static int nf_tables_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_chain *chain;
+
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
+ if (afi->family != NFPROTO_NETDEV)
+ continue;
+
+ list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry(chain, &table->chains, list) {
+ if (!(chain->flags & NFT_BASE_CHAIN))
+ continue;
+
+ nft_netdev_event(event, afi, dev, table,
+ nft_base_chain(chain));
+ }
+ }
+ }
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nf_tables_netdev_notifier = {
+ .notifier_call = nf_tables_netdev_event,
+};
+
static int __init nf_tables_netdev_init(void)
{
int ret;
@@ -166,11 +238,14 @@ static int __init nf_tables_netdev_init(void)
if (ret < 0)
nft_unregister_chain_type(&nft_filter_chain_netdev);
+ register_netdevice_notifier(&nf_tables_netdev_notifier);
+
return ret;
}
static void __exit nf_tables_netdev_exit(void)
{
+ unregister_netdevice_notifier(&nf_tables_netdev_notifier);
unregister_pernet_subsys(&nf_tables_netdev_net_ops);
nft_unregister_chain_type(&nft_filter_chain_netdev);
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 83032464a4bd..d324fe71260c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -658,35 +658,23 @@ EXPORT_SYMBOL_GPL(xt_compat_target_to_user);
struct xt_table_info *xt_alloc_table_info(unsigned int size)
{
- struct xt_table_info *newinfo;
- int cpu;
+ struct xt_table_info *info = NULL;
+ size_t sz = sizeof(*info) + size;
/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
return NULL;
- newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL);
- if (!newinfo)
- return NULL;
-
- newinfo->size = size;
-
- for_each_possible_cpu(cpu) {
- if (size <= PAGE_SIZE)
- newinfo->entries[cpu] = kmalloc_node(size,
- GFP_KERNEL,
- cpu_to_node(cpu));
- else
- newinfo->entries[cpu] = vmalloc_node(size,
- cpu_to_node(cpu));
-
- if (newinfo->entries[cpu] == NULL) {
- xt_free_table_info(newinfo);
+ if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+ info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+ if (!info) {
+ info = vmalloc(sz);
+ if (!info)
return NULL;
- }
}
-
- return newinfo;
+ memset(info, 0, sizeof(*info));
+ info->size = size;
+ return info;
}
EXPORT_SYMBOL(xt_alloc_table_info);
@@ -694,9 +682,6 @@ void xt_free_table_info(struct xt_table_info *info)
{
int cpu;
- for_each_possible_cpu(cpu)
- kvfree(info->entries[cpu]);
-
if (info->jumpstack != NULL) {
for_each_possible_cpu(cpu)
kvfree(info->jumpstack[cpu]);
@@ -705,7 +690,7 @@ void xt_free_table_info(struct xt_table_info *info)
free_percpu(info->stackptr);
- kfree(info);
+ kvfree(info);
}
EXPORT_SYMBOL(xt_free_table_info);
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index b103e9627716..5669e5b453f4 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -9,7 +9,8 @@
*/
/* Kernel module which implements the set match and SET target
- * for netfilter/iptables. */
+ * for netfilter/iptables.
+ */
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -53,6 +54,7 @@ static bool
set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v0 *info = par->matchinfo;
+
ADT_OPT(opt, par->family, info->match_set.u.compat.dim,
info->match_set.u.compat.flags, 0, UINT_MAX);
@@ -69,10 +71,10 @@ compat_flags(struct xt_set_info_v0 *info)
info->u.compat.dim = IPSET_DIM_ZERO;
if (info->u.flags[0] & IPSET_MATCH_INV)
info->u.compat.flags |= IPSET_INV_MATCH;
- for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) {
+ for (i = 0; i < IPSET_DIM_MAX - 1 && info->u.flags[i]; i++) {
info->u.compat.dim++;
if (info->u.flags[i] & IPSET_SRC)
- info->u.compat.flags |= (1<<info->u.compat.dim);
+ info->u.compat.flags |= (1 << info->u.compat.dim);
}
}
@@ -89,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
info->match_set.index);
return -ENOENT;
}
- if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
+ if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
pr_warn("Protocol error: set match dimension is over the limit!\n");
ip_set_nfnl_put(par->net, info->match_set.index);
return -ERANGE;
@@ -115,6 +117,7 @@ static bool
set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v1 *info = par->matchinfo;
+
ADT_OPT(opt, par->family, info->match_set.dim,
info->match_set.flags, 0, UINT_MAX);
@@ -179,9 +182,10 @@ static bool
set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v3 *info = par->matchinfo;
+ int ret;
+
ADT_OPT(opt, par->family, info->match_set.dim,
info->match_set.flags, info->flags, UINT_MAX);
- int ret;
if (info->packets.op != IPSET_COUNTER_NONE ||
info->bytes.op != IPSET_COUNTER_NONE)
@@ -225,9 +229,10 @@ static bool
set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_set_info_match_v4 *info = par->matchinfo;
+ int ret;
+
ADT_OPT(opt, par->family, info->match_set.dim,
info->match_set.flags, info->flags, UINT_MAX);
- int ret;
if (info->packets.op != IPSET_COUNTER_NONE ||
info->bytes.op != IPSET_COUNTER_NONE)
@@ -253,6 +258,7 @@ static unsigned int
set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_set_info_target_v0 *info = par->targinfo;
+
ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim,
info->add_set.u.compat.flags, 0, UINT_MAX);
ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim,
@@ -291,8 +297,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
return -ENOENT;
}
}
- if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 ||
- info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) {
+ if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 ||
+ info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
pr_warn("Protocol error: SET target dimension is over the limit!\n");
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
@@ -325,6 +331,7 @@ static unsigned int
set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_set_info_target_v1 *info = par->targinfo;
+
ADT_OPT(add_opt, par->family, info->add_set.dim,
info->add_set.flags, 0, UINT_MAX);
ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -393,6 +400,7 @@ static unsigned int
set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_set_info_target_v2 *info = par->targinfo;
+
ADT_OPT(add_opt, par->family, info->add_set.dim,
info->add_set.flags, info->flags, info->timeout);
ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -400,8 +408,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
/* Normalize to fit into jiffies */
if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
- add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
- add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+ add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC)
+ add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC;
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_add(info->add_set.index, skb, par, &add_opt);
if (info->del_set.index != IPSET_INVALID_ID)
@@ -419,6 +427,8 @@ static unsigned int
set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_set_info_target_v3 *info = par->targinfo;
+ int ret;
+
ADT_OPT(add_opt, par->family, info->add_set.dim,
info->add_set.flags, info->flags, info->timeout);
ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -426,12 +436,10 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
ADT_OPT(map_opt, par->family, info->map_set.dim,
info->map_set.flags, 0, UINT_MAX);
- int ret;
-
/* Normalize to fit into jiffies */
if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
- add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
- add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+ add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC)
+ add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC;
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_add(info->add_set.index, skb, par, &add_opt);
if (info->del_set.index != IPSET_INVALID_ID)
@@ -457,7 +465,6 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-
static int
set_target_v3_checkentry(const struct xt_tgchk_param *par)
{
@@ -497,8 +504,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
!(par->hook_mask & (1 << NF_INET_FORWARD |
1 << NF_INET_LOCAL_OUT |
1 << NF_INET_POST_ROUTING))) {
- pr_warn("mapping of prio or/and queue is allowed only"
- "from OUTPUT/FORWARD/POSTROUTING chains\n");
+ pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
return -EINVAL;
}
index = ip_set_nfnl_get_byindex(par->net,
@@ -519,8 +525,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
if (info->add_set.dim > IPSET_DIM_MAX ||
info->del_set.dim > IPSET_DIM_MAX ||
info->map_set.dim > IPSET_DIM_MAX) {
- pr_warn("Protocol error: SET target dimension "
- "is over the limit!\n");
+ pr_warn("Protocol error: SET target dimension is over the limit!\n");
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
if (info->del_set.index != IPSET_INVALID_ID)
@@ -546,7 +551,6 @@ set_target_v3_destroy(const struct xt_tgdtor_param *par)
ip_set_nfnl_put(par->net, info->map_set.index);
}
-
static struct xt_match set_matches[] __read_mostly = {
{
.name = "set",