aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c54
-rw-r--r--net/core/dev.c61
-rw-r--r--net/core/dev_mcast.c28
-rw-r--r--net/core/dst.c11
-rw-r--r--net/core/fib_rules.c104
-rw-r--r--net/core/flow.c3
-rw-r--r--net/core/gen_estimator.c23
-rw-r--r--net/core/gen_stats.c10
-rw-r--r--net/core/neighbour.c262
-rw-r--r--net/core/net-sysfs.c20
-rw-r--r--net/core/net_namespace.c9
-rw-r--r--net/core/netpoll.c62
-rw-r--r--net/core/pktgen.c104
-rw-r--r--net/core/request_sock.c5
-rw-r--r--net/core/rtnetlink.c56
-rw-r--r--net/core/skbuff.c246
-rw-r--r--net/core/sock.c200
-rw-r--r--net/core/stream.c85
-rw-r--r--net/core/sysctl_net_core.c70
-rw-r--r--net/core/utils.c27
20 files changed, 839 insertions, 601 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 029b93e246b4..8a28fc93b724 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -115,10 +115,10 @@ out_noerr:
}
/**
- * skb_recv_datagram - Receive a datagram skbuff
+ * __skb_recv_datagram - Receive a datagram skbuff
* @sk: socket
* @flags: MSG_ flags
- * @noblock: blocking operation?
+ * @peeked: returns non-zero if this packet has been seen before
* @err: error code returned
*
* Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -143,8 +143,8 @@ out_noerr:
* quite explicitly by POSIX 1003.1g, don't change them without having
* the standard around please.
*/
-struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
- int noblock, int *err)
+struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
+ int *peeked, int *err)
{
struct sk_buff *skb;
long timeo;
@@ -156,7 +156,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
if (error)
goto no_packet;
- timeo = sock_rcvtimeo(sk, noblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
/* Again only user level code calls this function, so nothing
@@ -165,18 +165,19 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
* Look at current nfs client by the way...
* However, this function was corrent in any case. 8)
*/
- if (flags & MSG_PEEK) {
- unsigned long cpu_flags;
-
- spin_lock_irqsave(&sk->sk_receive_queue.lock,
- cpu_flags);
- skb = skb_peek(&sk->sk_receive_queue);
- if (skb)
+ unsigned long cpu_flags;
+
+ spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
+ skb = skb_peek(&sk->sk_receive_queue);
+ if (skb) {
+ *peeked = skb->peeked;
+ if (flags & MSG_PEEK) {
+ skb->peeked = 1;
atomic_inc(&skb->users);
- spin_unlock_irqrestore(&sk->sk_receive_queue.lock,
- cpu_flags);
- } else
- skb = skb_dequeue(&sk->sk_receive_queue);
+ } else
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ }
+ spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
if (skb)
return skb;
@@ -194,10 +195,21 @@ no_packet:
*err = error;
return NULL;
}
+EXPORT_SYMBOL(__skb_recv_datagram);
+
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
+ int noblock, int *err)
+{
+ int peeked;
+
+ return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+ &peeked, err);
+}
void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
{
kfree_skb(skb);
+ sk_mem_reclaim(sk);
}
/**
@@ -217,20 +229,28 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
* This function currently only disables BH when acquiring the
* sk_receive_queue lock. Therefore it must not be used in a
* context where that lock is acquired in an IRQ context.
+ *
+ * It returns 0 if the packet was removed by us.
*/
-void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
+int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
{
+ int err = 0;
+
if (flags & MSG_PEEK) {
+ err = -ENOENT;
spin_lock_bh(&sk->sk_receive_queue.lock);
if (skb == skb_peek(&sk->sk_receive_queue)) {
__skb_unlink(skb, &sk->sk_receive_queue);
atomic_dec(&skb->users);
+ err = 0;
}
spin_unlock_bh(&sk->sk_receive_queue.lock);
}
kfree_skb(skb);
+ sk_mem_reclaim(sk);
+ return err;
}
EXPORT_SYMBOL(skb_kill_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0879f52115eb..c9c593e1ba6f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -150,8 +150,11 @@
* 86DD IPv6
*/
+#define PTYPE_HASH_SIZE (16)
+#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
+
static DEFINE_SPINLOCK(ptype_lock);
-static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */
+static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
static struct list_head ptype_all __read_mostly; /* Taps */
#ifdef CONFIG_NET_DMA
@@ -362,7 +365,7 @@ void dev_add_pack(struct packet_type *pt)
if (pt->type == htons(ETH_P_ALL))
list_add_rcu(&pt->list, &ptype_all);
else {
- hash = ntohs(pt->type) & 15;
+ hash = ntohs(pt->type) & PTYPE_HASH_MASK;
list_add_rcu(&pt->list, &ptype_base[hash]);
}
spin_unlock_bh(&ptype_lock);
@@ -391,7 +394,7 @@ void __dev_remove_pack(struct packet_type *pt)
if (pt->type == htons(ETH_P_ALL))
head = &ptype_all;
else
- head = &ptype_base[ntohs(pt->type) & 15];
+ head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
list_for_each_entry(pt1, head, list) {
if (pt == pt1) {
@@ -672,7 +675,7 @@ struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *h
ASSERT_RTNL();
- for_each_netdev(&init_net, dev)
+ for_each_netdev(net, dev)
if (dev->type == type &&
!memcmp(dev->dev_addr, ha, dev->addr_len))
return dev;
@@ -1420,7 +1423,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
}
rcu_read_lock();
- list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
+ list_for_each_entry_rcu(ptype,
+ &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
err = ptype->gso_send_check(skb);
@@ -2077,7 +2081,8 @@ ncls:
goto out;
type = skb->protocol;
- list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
+ list_for_each_entry_rcu(ptype,
+ &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
@@ -2363,8 +2368,9 @@ static int dev_ifconf(struct net *net, char __user *arg)
* in detail.
*/
void *dev_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(dev_base_lock)
{
- struct net *net = seq->private;
+ struct net *net = seq_file_net(seq);
loff_t off;
struct net_device *dev;
@@ -2382,13 +2388,14 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net *net = seq->private;
+ struct net *net = seq_file_net(seq);
++*pos;
return v == SEQ_START_TOKEN ?
first_net_device(net) : next_net_device((struct net_device *)v);
}
void dev_seq_stop(struct seq_file *seq, void *v)
+ __releases(dev_base_lock)
{
read_unlock(&dev_base_lock);
}
@@ -2481,26 +2488,8 @@ static const struct seq_operations dev_seq_ops = {
static int dev_seq_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
- int res;
- res = seq_open(file, &dev_seq_ops);
- if (!res) {
- seq = file->private_data;
- seq->private = get_proc_net(inode);
- if (!seq->private) {
- seq_release(inode, file);
- res = -ENXIO;
- }
- }
- return res;
-}
-
-static int dev_seq_release(struct inode *inode, struct file *file)
-{
- struct seq_file *seq = file->private_data;
- struct net *net = seq->private;
- put_net(net);
- return seq_release(inode, file);
+ return seq_open_net(inode, file, &dev_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations dev_seq_fops = {
@@ -2508,7 +2497,7 @@ static const struct file_operations dev_seq_fops = {
.open = dev_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = dev_seq_release,
+ .release = seq_release_net,
};
static const struct seq_operations softnet_seq_ops = {
@@ -2543,7 +2532,7 @@ static void *ptype_get_idx(loff_t pos)
++i;
}
- for (t = 0; t < 16; t++) {
+ for (t = 0; t < PTYPE_HASH_SIZE; t++) {
list_for_each_entry_rcu(pt, &ptype_base[t], list) {
if (i == pos)
return pt;
@@ -2554,6 +2543,7 @@ static void *ptype_get_idx(loff_t pos)
}
static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
{
rcu_read_lock();
return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
@@ -2577,10 +2567,10 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
hash = 0;
nxt = ptype_base[0].next;
} else
- hash = ntohs(pt->type) & 15;
+ hash = ntohs(pt->type) & PTYPE_HASH_MASK;
while (nxt == &ptype_base[hash]) {
- if (++hash >= 16)
+ if (++hash >= PTYPE_HASH_SIZE)
return NULL;
nxt = ptype_base[hash].next;
}
@@ -2589,6 +2579,7 @@ found:
}
static void ptype_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
{
rcu_read_unlock();
}
@@ -3505,7 +3496,7 @@ static int dev_new_index(struct net *net)
/* Delayed registration/unregisteration */
static DEFINE_SPINLOCK(net_todo_list_lock);
-static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
+static LIST_HEAD(net_todo_list);
static void net_set_todo(struct net_device *dev)
{
@@ -3984,6 +3975,8 @@ void synchronize_net(void)
void unregister_netdevice(struct net_device *dev)
{
+ ASSERT_RTNL();
+
rollback_registered(dev);
/* Finish processing unregister after unlock */
net_set_todo(dev);
@@ -4416,7 +4409,7 @@ static int __init net_dev_init(void)
goto out;
INIT_LIST_HEAD(&ptype_all);
- for (i = 0; i < 16; i++)
+ for (i = 0; i < PTYPE_HASH_SIZE; i++)
INIT_LIST_HEAD(&ptype_base[i]);
if (register_pernet_subsys(&netdev_net_ops))
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 69fff16ece10..cadbfbf7e7f5 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -186,8 +186,9 @@ EXPORT_SYMBOL(dev_mc_unsync);
#ifdef CONFIG_PROC_FS
static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(dev_base_lock)
{
- struct net *net = seq->private;
+ struct net *net = seq_file_net(seq);
struct net_device *dev;
loff_t off = 0;
@@ -206,6 +207,7 @@ static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void dev_mc_seq_stop(struct seq_file *seq, void *v)
+ __releases(dev_base_lock)
{
read_unlock(&dev_base_lock);
}
@@ -241,26 +243,8 @@ static const struct seq_operations dev_mc_seq_ops = {
static int dev_mc_seq_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
- int res;
- res = seq_open(file, &dev_mc_seq_ops);
- if (!res) {
- seq = file->private_data;
- seq->private = get_proc_net(inode);
- if (!seq->private) {
- seq_release(inode, file);
- res = -ENXIO;
- }
- }
- return res;
-}
-
-static int dev_mc_seq_release(struct inode *inode, struct file *file)
-{
- struct seq_file *seq = file->private_data;
- struct net *net = seq->private;
- put_net(net);
- return seq_release(inode, file);
+ return seq_open_net(inode, file, &dev_mc_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations dev_mc_seq_fops = {
@@ -268,7 +252,7 @@ static const struct file_operations dev_mc_seq_fops = {
.open = dev_mc_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = dev_mc_seq_release,
+ .release = seq_release_net,
};
#endif
diff --git a/net/core/dst.c b/net/core/dst.c
index 03daead3592a..7deef483c79f 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -153,18 +153,19 @@ loop:
#endif
}
-static int dst_discard(struct sk_buff *skb)
+int dst_discard(struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
+EXPORT_SYMBOL(dst_discard);
void * dst_alloc(struct dst_ops * ops)
{
struct dst_entry * dst;
if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
- if (ops->gc())
+ if (ops->gc(ops))
return NULL;
}
dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC);
@@ -278,13 +279,13 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (!unregister) {
dst->input = dst->output = dst_discard;
} else {
- dst->dev = init_net.loopback_dev;
+ dst->dev = dst->dev->nd_net->loopback_dev;
dev_hold(dst->dev);
dev_put(dev);
if (dst->neighbour && dst->neighbour->dev == dev) {
- dst->neighbour->dev = init_net.loopback_dev;
+ dst->neighbour->dev = dst->dev;
+ dev_hold(dst->dev);
dev_put(dev);
- dev_hold(dst->neighbour->dev);
}
}
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 848132b6cb73..42ccaf5b8509 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -15,9 +15,6 @@
#include <net/sock.h>
#include <net/fib_rules.h>
-static LIST_HEAD(rules_ops);
-static DEFINE_SPINLOCK(rules_mod_lock);
-
int fib_default_rule_add(struct fib_rules_ops *ops,
u32 pref, u32 table, u32 flags)
{
@@ -32,6 +29,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->pref = pref;
r->table = table;
r->flags = flags;
+ r->fr_net = ops->fro_net;
/* The lock is not required here, the list in unreacheable
* at the moment this function is called */
@@ -44,12 +42,12 @@ static void notify_rule_change(int event, struct fib_rule *rule,
struct fib_rules_ops *ops, struct nlmsghdr *nlh,
u32 pid);
-static struct fib_rules_ops *lookup_rules_ops(int family)
+static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
{
struct fib_rules_ops *ops;
rcu_read_lock();
- list_for_each_entry_rcu(ops, &rules_ops, list) {
+ list_for_each_entry_rcu(ops, &net->rules_ops, list) {
if (ops->family == family) {
if (!try_module_get(ops->owner))
ops = NULL;
@@ -78,6 +76,9 @@ int fib_rules_register(struct fib_rules_ops *ops)
{
int err = -EEXIST;
struct fib_rules_ops *o;
+ struct net *net;
+
+ net = ops->fro_net;
if (ops->rule_size < sizeof(struct fib_rule))
return -EINVAL;
@@ -87,22 +88,23 @@ int fib_rules_register(struct fib_rules_ops *ops)
ops->action == NULL)
return -EINVAL;
- spin_lock(&rules_mod_lock);
- list_for_each_entry(o, &rules_ops, list)
+ spin_lock(&net->rules_mod_lock);
+ list_for_each_entry(o, &net->rules_ops, list)
if (ops->family == o->family)
goto errout;
- list_add_tail_rcu(&ops->list, &rules_ops);
+ hold_net(net);
+ list_add_tail_rcu(&ops->list, &net->rules_ops);
err = 0;
errout:
- spin_unlock(&rules_mod_lock);
+ spin_unlock(&net->rules_mod_lock);
return err;
}
EXPORT_SYMBOL_GPL(fib_rules_register);
-static void cleanup_ops(struct fib_rules_ops *ops)
+void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
{
struct fib_rule *rule, *tmp;
@@ -111,28 +113,19 @@ static void cleanup_ops(struct fib_rules_ops *ops)
fib_rule_put(rule);
}
}
+EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
-int fib_rules_unregister(struct fib_rules_ops *ops)
+void fib_rules_unregister(struct fib_rules_ops *ops)
{
- int err = 0;
- struct fib_rules_ops *o;
-
- spin_lock(&rules_mod_lock);
- list_for_each_entry(o, &rules_ops, list) {
- if (o == ops) {
- list_del_rcu(&o->list);
- cleanup_ops(ops);
- goto out;
- }
- }
+ struct net *net = ops->fro_net;
- err = -ENOENT;
-out:
- spin_unlock(&rules_mod_lock);
+ spin_lock(&net->rules_mod_lock);
+ list_del_rcu(&ops->list);
+ fib_rules_cleanup_ops(ops);
+ spin_unlock(&net->rules_mod_lock);
synchronize_rcu();
-
- return err;
+ release_net(net);
}
EXPORT_SYMBOL_GPL(fib_rules_unregister);
@@ -231,7 +224,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
goto errout;
- ops = lookup_rules_ops(frh->family);
+ ops = lookup_rules_ops(net, frh->family);
if (ops == NULL) {
err = EAFNOSUPPORT;
goto errout;
@@ -250,6 +243,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
err = -ENOMEM;
goto errout;
}
+ rule->fr_net = net;
if (tb[FRA_PRIORITY])
rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
@@ -281,7 +275,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
rule->table = frh_get_table(frh, tb);
if (!rule->pref && ops->default_pref)
- rule->pref = ops->default_pref();
+ rule->pref = ops->default_pref(ops);
err = -EINVAL;
if (tb[FRA_GOTO]) {
@@ -358,6 +352,7 @@ errout:
static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *tmp;
@@ -367,7 +362,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
goto errout;
- ops = lookup_rules_ops(frh->family);
+ ops = lookup_rules_ops(net, frh->family);
if (ops == NULL) {
err = EAFNOSUPPORT;
goto errout;
@@ -539,13 +534,14 @@ skip:
static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = skb->sk->sk_net;
struct fib_rules_ops *ops;
int idx = 0, family;
family = rtnl_msg_family(cb->nlh);
if (family != AF_UNSPEC) {
/* Protocol specific dump request */
- ops = lookup_rules_ops(family);
+ ops = lookup_rules_ops(net, family);
if (ops == NULL)
return -EAFNOSUPPORT;
@@ -553,7 +549,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
}
rcu_read_lock();
- list_for_each_entry_rcu(ops, &rules_ops, list) {
+ list_for_each_entry_rcu(ops, &net->rules_ops, list) {
if (idx < cb->args[0] || !try_module_get(ops->owner))
goto skip;
@@ -574,9 +570,11 @@ static void notify_rule_change(int event, struct fib_rule *rule,
struct fib_rules_ops *ops, struct nlmsghdr *nlh,
u32 pid)
{
+ struct net *net;
struct sk_buff *skb;
int err = -ENOBUFS;
+ net = ops->fro_net;
skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
if (skb == NULL)
goto errout;
@@ -588,10 +586,11 @@ static void notify_rule_change(int event, struct fib_rule *rule,
kfree_skb(skb);
goto errout;
}
- err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
+
+ err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
errout:
if (err < 0)
- rtnl_set_sk_err(ops->nlgroup, err);
+ rtnl_set_sk_err(net, ops->nlgroup, err);
}
static void attach_rules(struct list_head *rules, struct net_device *dev)
@@ -619,22 +618,20 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = ptr;
+ struct net *net = dev->nd_net;
struct fib_rules_ops *ops;
- if (dev->nd_net != &init_net)
- return NOTIFY_DONE;
-
ASSERT_RTNL();
rcu_read_lock();
switch (event) {
case NETDEV_REGISTER:
- list_for_each_entry(ops, &rules_ops, list)
+ list_for_each_entry(ops, &net->rules_ops, list)
attach_rules(&ops->rules_list, dev);
break;
case NETDEV_UNREGISTER:
- list_for_each_entry(ops, &rules_ops, list)
+ list_for_each_entry(ops, &net->rules_ops, list)
detach_rules(&ops->rules_list, dev);
break;
}
@@ -648,13 +645,40 @@ static struct notifier_block fib_rules_notifier = {
.notifier_call = fib_rules_event,
};
+static int fib_rules_net_init(struct net *net)
+{
+ INIT_LIST_HEAD(&net->rules_ops);
+ spin_lock_init(&net->rules_mod_lock);
+ return 0;
+}
+
+static struct pernet_operations fib_rules_net_ops = {
+ .init = fib_rules_net_init,
+};
+
static int __init fib_rules_init(void)
{
+ int err;
rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
- return register_netdevice_notifier(&fib_rules_notifier);
+ err = register_netdevice_notifier(&fib_rules_notifier);
+ if (err < 0)
+ goto fail;
+
+ err = register_pernet_subsys(&fib_rules_net_ops);
+ if (err < 0)
+ goto fail_unregister;
+ return 0;
+
+fail_unregister:
+ unregister_netdevice_notifier(&fib_rules_notifier);
+fail:
+ rtnl_unregister(PF_UNSPEC, RTM_NEWRULE);
+ rtnl_unregister(PF_UNSPEC, RTM_DELRULE);
+ rtnl_unregister(PF_UNSPEC, RTM_GETRULE);
+ return err;
}
subsys_initcall(fib_rules_init);
diff --git a/net/core/flow.c b/net/core/flow.c
index 6489f4e24ecf..46b38e06e0d7 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -352,8 +352,7 @@ static int __init flow_cache_init(void)
flow_lwm = 2 * flow_hash_size;
flow_hwm = 4 * flow_hash_size;
- init_timer(&flow_hash_rnd_timer);
- flow_hash_rnd_timer.function = flow_cache_new_hashrnd;
+ setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
add_timer(&flow_hash_rnd_timer);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index daadbcc4e8dd..57abe8266be1 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -135,7 +135,7 @@ skip:
}
if (!list_empty(&elist[idx].list))
- mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+ mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
rcu_read_unlock();
}
@@ -159,13 +159,13 @@ skip:
int gen_new_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est,
spinlock_t *stats_lock,
- struct rtattr *opt)
+ struct nlattr *opt)
{
struct gen_estimator *est;
- struct gnet_estimator *parm = RTA_DATA(opt);
+ struct gnet_estimator *parm = nla_data(opt);
int idx;
- if (RTA_PAYLOAD(opt) < sizeof(*parm))
+ if (nla_len(opt) < sizeof(*parm))
return -EINVAL;
if (parm->interval < -2 || parm->interval > 3)
@@ -191,7 +191,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
}
if (list_empty(&elist[idx].list))
- mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+ mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
list_add_rcu(&est->list, &elist[idx].list);
return 0;
@@ -241,7 +241,7 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats,
}
/**
- * gen_replace_estimator - replace rate estimator configruation
+ * gen_replace_estimator - replace rate estimator configuration
* @bstats: basic statistics
* @rate_est: rate estimator statistics
* @stats_lock: statistics lock
@@ -252,13 +252,12 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats,
*
* Returns 0 on success or a negative error code.
*/
-int
-gen_replace_estimator(struct gnet_stats_basic *bstats,
- struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock,
- struct rtattr *opt)
+int gen_replace_estimator(struct gnet_stats_basic *bstats,
+ struct gnet_stats_rate_est *rate_est,
+ spinlock_t *stats_lock, struct nlattr *opt)
{
- gen_kill_estimator(bstats, rate_est);
- return gen_new_estimator(bstats, rate_est, stats_lock, opt);
+ gen_kill_estimator(bstats, rate_est);
+ return gen_new_estimator(bstats, rate_est, stats_lock, opt);
}
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index bcc25591d8ac..c3d0ffeac243 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -20,16 +20,17 @@
#include <linux/socket.h>
#include <linux/rtnetlink.h>
#include <linux/gen_stats.h>
+#include <net/netlink.h>
#include <net/gen_stats.h>
static inline int
gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
{
- RTA_PUT(d->skb, type, size, buf);
+ NLA_PUT(d->skb, type, size, buf);
return 0;
-rtattr_failure:
+nla_put_failure:
spin_unlock_bh(d->lock);
return -1;
}
@@ -55,13 +56,14 @@ rtattr_failure:
int
gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
int xstats_type, spinlock_t *lock, struct gnet_dump *d)
+ __acquires(lock)
{
memset(d, 0, sizeof(*d));
spin_lock_bh(lock);
d->lock = lock;
if (type)
- d->tail = (struct rtattr *)skb_tail_pointer(skb);
+ d->tail = (struct nlattr *)skb_tail_pointer(skb);
d->skb = skb;
d->compat_tc_stats = tc_stats_type;
d->compat_xstats = xstats_type;
@@ -212,7 +214,7 @@ int
gnet_stats_finish_copy(struct gnet_dump *d)
{
if (d->tail)
- d->tail->rta_len = skb_tail_pointer(d->skb) - (u8 *)d->tail;
+ d->tail->nla_len = skb_tail_pointer(d->skb) - (u8 *)d->tail;
if (d->compat_tc_stats)
if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 29b8ee4e35d6..a16cf1ec5e5e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -59,7 +59,6 @@ static void neigh_timer_handler(unsigned long arg);
static void __neigh_notify(struct neighbour *n, int type, int flags);
static void neigh_update_notify(struct neighbour *neigh);
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
-void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
static struct neigh_table *neigh_tables;
#ifdef CONFIG_PROC_FS
@@ -165,6 +164,16 @@ static int neigh_forced_gc(struct neigh_table *tbl)
return shrunk;
}
+static void neigh_add_timer(struct neighbour *n, unsigned long when)
+{
+ neigh_hold(n);
+ if (unlikely(mod_timer(&n->timer, when))) {
+ printk("NEIGH: BUG, double timer add, state is %x\n",
+ n->nud_state);
+ dump_stack();
+ }
+}
+
static int neigh_del_timer(struct neighbour *n)
{
if ((n->nud_state & NUD_IN_TIMER) &&
@@ -270,9 +279,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
n->parms = neigh_parms_clone(&tbl->parms);
- init_timer(&n->timer);
- n->timer.function = neigh_timer_handler;
- n->timer.data = (unsigned long)n;
+ setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
NEIGH_CACHE_STAT_INC(tbl, allocs);
n->tbl = tbl;
@@ -367,7 +374,8 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
return n;
}
-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
+struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
+ const void *pkey)
{
struct neighbour *n;
int key_len = tbl->key_len;
@@ -377,7 +385,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
read_lock_bh(&tbl->lock);
for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
- if (!memcmp(n->primary_key, pkey, key_len)) {
+ if (!memcmp(n->primary_key, pkey, key_len) &&
+ (net == n->dev->nd_net)) {
neigh_hold(n);
NEIGH_CACHE_STAT_INC(tbl, hits);
break;
@@ -455,7 +464,8 @@ out_neigh_release:
goto out;
}
-struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
+struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
+ struct net *net, const void *pkey,
struct net_device *dev, int creat)
{
struct pneigh_entry *n;
@@ -471,6 +481,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
if (!memcmp(n->key, pkey, key_len) &&
+ (n->net == net) &&
(n->dev == dev || !n->dev)) {
read_unlock_bh(&tbl->lock);
goto out;
@@ -487,6 +498,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
if (!n)
goto out;
+ n->net = hold_net(net);
memcpy(n->key, pkey, key_len);
n->dev = dev;
if (dev)
@@ -509,7 +521,7 @@ out:
}
-int pneigh_delete(struct neigh_table *tbl, const void *pkey,
+int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
struct net_device *dev)
{
struct pneigh_entry *n, **np;
@@ -524,13 +536,15 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey,
write_lock_bh(&tbl->lock);
for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
np = &n->next) {
- if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
+ if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
+ (n->net == net)) {
*np = n->next;
write_unlock_bh(&tbl->lock);
if (tbl->pdestructor)
tbl->pdestructor(n);
if (n->dev)
dev_put(n->dev);
+ release_net(n->net);
kfree(n);
return 0;
}
@@ -553,6 +567,7 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
tbl->pdestructor(n);
if (n->dev)
dev_put(n->dev);
+ release_net(n->net);
kfree(n);
continue;
}
@@ -562,6 +577,13 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
return -ENOENT;
}
+static void neigh_parms_destroy(struct neigh_parms *parms);
+
+static inline void neigh_parms_put(struct neigh_parms *parms)
+{
+ if (atomic_dec_and_test(&parms->refcnt))
+ neigh_parms_destroy(parms);
+}
/*
* neighbour must already be out of the table;
@@ -718,15 +740,6 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
p->ucast_probes + p->app_probes + p->mcast_probes);
}
-static inline void neigh_add_timer(struct neighbour *n, unsigned long when)
-{
- if (unlikely(mod_timer(&n->timer, when))) {
- printk("NEIGH: BUG, double timer add, state is %x\n",
- n->nud_state);
- dump_stack();
- }
-}
-
/* Called when a timer expires for a neighbour entry. */
static void neigh_timer_handler(unsigned long arg)
@@ -858,7 +871,6 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
atomic_set(&neigh->probes, neigh->parms->ucast_probes);
neigh->nud_state = NUD_INCOMPLETE;
neigh->updated = jiffies;
- neigh_hold(neigh);
neigh_add_timer(neigh, now + 1);
} else {
neigh->nud_state = NUD_FAILED;
@@ -871,7 +883,6 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
}
} else if (neigh->nud_state & NUD_STALE) {
NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
- neigh_hold(neigh);
neigh->nud_state = NUD_DELAY;
neigh->updated = jiffies;
neigh_add_timer(neigh,
@@ -1015,13 +1026,11 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
if (new != old) {
neigh_del_timer(neigh);
- if (new & NUD_IN_TIMER) {
- neigh_hold(neigh);
+ if (new & NUD_IN_TIMER)
neigh_add_timer(neigh, (jiffies +
((new & NUD_REACHABLE) ?
neigh->parms->reachable_time :
0)));
- }
neigh->nud_state = new;
}
@@ -1266,27 +1275,49 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
spin_unlock(&tbl->proxy_queue.lock);
}
+static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
+ struct net *net, int ifindex)
+{
+ struct neigh_parms *p;
+
+ for (p = &tbl->parms; p; p = p->next) {
+ if (p->net != net)
+ continue;
+ if ((p->dev && p->dev->ifindex == ifindex) ||
+ (!p->dev && !ifindex))
+ return p;
+ }
+
+ return NULL;
+}
struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
struct neigh_table *tbl)
{
- struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
+ struct neigh_parms *p, *ref;
+ struct net *net;
+
+ net = dev->nd_net;
+ ref = lookup_neigh_params(tbl, net, 0);
+ if (!ref)
+ return NULL;
+ p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
if (p) {
p->tbl = tbl;
atomic_set(&p->refcnt, 1);
INIT_RCU_HEAD(&p->rcu_head);
p->reachable_time =
neigh_rand_reach_time(p->base_reachable_time);
- if (dev) {
- if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
- kfree(p);
- return NULL;
- }
- dev_hold(dev);
- p->dev = dev;
+ if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
+ kfree(p);
+ return NULL;
}
+
+ dev_hold(dev);
+ p->dev = dev;
+ p->net = hold_net(net);
p->sysctl_table = NULL;
write_lock_bh(&tbl->lock);
p->next = tbl->parms.next;
@@ -1326,8 +1357,9 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
NEIGH_PRINTK1("neigh_parms_release: not found\n");
}
-void neigh_parms_destroy(struct neigh_parms *parms)
+static void neigh_parms_destroy(struct neigh_parms *parms)
{
+ release_net(parms->net);
kfree(parms);
}
@@ -1338,6 +1370,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
unsigned long now = jiffies;
unsigned long phsize;
+ tbl->parms.net = &init_net;
atomic_set(&tbl->parms.refcnt, 1);
INIT_RCU_HEAD(&tbl->parms.rcu_head);
tbl->parms.reachable_time =
@@ -1372,15 +1405,11 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
rwlock_init(&tbl->lock);
- init_timer(&tbl->gc_timer);
- tbl->gc_timer.data = (unsigned long)tbl;
- tbl->gc_timer.function = neigh_periodic_timer;
+ setup_timer(&tbl->gc_timer, neigh_periodic_timer, (unsigned long)tbl);
tbl->gc_timer.expires = now + 1;
add_timer(&tbl->gc_timer);
- init_timer(&tbl->proxy_timer);
- tbl->proxy_timer.data = (unsigned long)tbl;
- tbl->proxy_timer.function = neigh_proxy_process;
+ setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
skb_queue_head_init_class(&tbl->proxy_queue,
&neigh_table_proxy_queue_class);
@@ -1483,7 +1512,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
goto out_dev_put;
if (ndm->ndm_flags & NTF_PROXY) {
- err = pneigh_delete(tbl, nla_data(dst_attr), dev);
+ err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
goto out_dev_put;
}
@@ -1560,7 +1589,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct pneigh_entry *pn;
err = -ENOBUFS;
- pn = pneigh_lookup(tbl, dst, dev, 1);
+ pn = pneigh_lookup(tbl, net, dst, dev, 1);
if (pn) {
pn->flags = ndm->ndm_flags;
err = 0;
@@ -1755,19 +1784,6 @@ errout:
return -EMSGSIZE;
}
-static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
- int ifindex)
-{
- struct neigh_parms *p;
-
- for (p = &tbl->parms; p; p = p->next)
- if ((p->dev && p->dev->ifindex == ifindex) ||
- (!p->dev && !ifindex))
- return p;
-
- return NULL;
-}
-
static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
[NDTA_NAME] = { .type = NLA_STRING },
[NDTA_THRESH1] = { .type = NLA_U32 },
@@ -1795,6 +1811,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct neigh_table *tbl;
struct ndtmsg *ndtmsg;
struct nlattr *tb[NDTA_MAX+1];
@@ -1844,7 +1861,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if (tbp[NDTPA_IFINDEX])
ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
- p = lookup_neigh_params(tbl, ifindex);
+ p = lookup_neigh_params(tbl, net, ifindex);
if (p == NULL) {
err = -ENOENT;
goto errout_tbl_lock;
@@ -1919,6 +1936,7 @@ errout:
static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = skb->sk->sk_net;
int family, tidx, nidx = 0;
int tbl_skip = cb->args[0];
int neigh_skip = cb->args[1];
@@ -1938,8 +1956,11 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
NLM_F_MULTI) <= 0)
break;
- for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
- if (nidx < neigh_skip)
+ for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
+ if (net != p->net)
+ continue;
+
+ if (nidx++ < neigh_skip)
continue;
if (neightbl_fill_param_info(skb, tbl, p,
@@ -2015,6 +2036,7 @@ static void neigh_update_notify(struct neighbour *neigh)
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
struct netlink_callback *cb)
{
+ struct net * net = skb->sk->sk_net;
struct neighbour *n;
int rc, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
@@ -2025,8 +2047,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
continue;
if (h > s_h)
s_idx = 0;
- for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
- if (idx < s_idx)
+ for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
+ int lidx;
+ if (n->dev->nd_net != net)
+ continue;
+ lidx = idx++;
+ if (lidx < s_idx)
continue;
if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
@@ -2118,6 +2144,7 @@ EXPORT_SYMBOL(__neigh_for_each_release);
static struct neighbour *neigh_get_first(struct seq_file *seq)
{
struct neigh_seq_state *state = seq->private;
+ struct net *net = state->p.net;
struct neigh_table *tbl = state->tbl;
struct neighbour *n = NULL;
int bucket = state->bucket;
@@ -2127,6 +2154,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
n = tbl->hash_buckets[bucket];
while (n) {
+ if (n->dev->nd_net != net)
+ goto next;
if (state->neigh_sub_iter) {
loff_t fakep = 0;
void *v;
@@ -2156,6 +2185,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
+ struct net *net = state->p.net;
struct neigh_table *tbl = state->tbl;
if (state->neigh_sub_iter) {
@@ -2167,6 +2197,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
while (1) {
while (n) {
+ if (n->dev->nd_net != net)
+ goto next;
if (state->neigh_sub_iter) {
void *v = state->neigh_sub_iter(state, n, pos);
if (v)
@@ -2213,6 +2245,7 @@ static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
{
struct neigh_seq_state *state = seq->private;
+ struct net * net = state->p.net;
struct neigh_table *tbl = state->tbl;
struct pneigh_entry *pn = NULL;
int bucket = state->bucket;
@@ -2220,6 +2253,8 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
state->flags |= NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
pn = tbl->phash_buckets[bucket];
+ while (pn && (pn->net != net))
+ pn = pn->next;
if (pn)
break;
}
@@ -2233,6 +2268,7 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
loff_t *pos)
{
struct neigh_seq_state *state = seq->private;
+ struct net * net = state->p.net;
struct neigh_table *tbl = state->tbl;
pn = pn->next;
@@ -2240,6 +2276,8 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
if (++state->bucket > PNEIGH_HASHMASK)
break;
pn = tbl->phash_buckets[state->bucket];
+ while (pn && (pn->net != net))
+ pn = pn->next;
if (pn)
break;
}
@@ -2277,6 +2315,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
}
void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
+ __acquires(tbl->lock)
{
struct neigh_seq_state *state = seq->private;
loff_t pos_minus_one;
@@ -2320,6 +2359,7 @@ out:
EXPORT_SYMBOL(neigh_seq_next);
void neigh_seq_stop(struct seq_file *seq, void *v)
+ __releases(tbl->lock)
{
struct neigh_seq_state *state = seq->private;
struct neigh_table *tbl = state->tbl;
@@ -2441,6 +2481,7 @@ static inline size_t neigh_nlmsg_size(void)
static void __neigh_notify(struct neighbour *n, int type, int flags)
{
+ struct net *net = n->dev->nd_net;
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -2455,10 +2496,10 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
kfree_skb(skb);
goto errout;
}
- err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+ err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
errout:
if (err < 0)
- rtnl_set_sk_err(RTNLGRP_NEIGH, err);
+ rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}
#ifdef CONFIG_ARPD
@@ -2472,11 +2513,8 @@ void neigh_app_ns(struct neighbour *n)
static struct neigh_sysctl_table {
struct ctl_table_header *sysctl_header;
- ctl_table neigh_vars[__NET_NEIGH_MAX];
- ctl_table neigh_dev[2];
- ctl_table neigh_neigh_dir[2];
- ctl_table neigh_proto_dir[2];
- ctl_table neigh_root_dir[2];
+ struct ctl_table neigh_vars[__NET_NEIGH_MAX];
+ char *dev_name;
} neigh_sysctl_template __read_mostly = {
.neigh_vars = {
{
@@ -2607,32 +2645,7 @@ static struct neigh_sysctl_table {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
- {}
- },
- .neigh_dev = {
- {
- .ctl_name = NET_PROTO_CONF_DEFAULT,
- .procname = "default",
- .mode = 0555,
- },
- },
- .neigh_neigh_dir = {
- {
- .procname = "neigh",
- .mode = 0555,
- },
- },
- .neigh_proto_dir = {
- {
- .mode = 0555,
- },
- },
- .neigh_root_dir = {
- {
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- },
+ {},
},
};
@@ -2640,14 +2653,26 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
int p_id, int pdev_id, char *p_name,
proc_handler *handler, ctl_handler *strategy)
{
- struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template,
- sizeof(*t), GFP_KERNEL);
+ struct neigh_sysctl_table *t;
const char *dev_name_source = NULL;
- char *dev_name = NULL;
- int err = 0;
+#define NEIGH_CTL_PATH_ROOT 0
+#define NEIGH_CTL_PATH_PROTO 1
+#define NEIGH_CTL_PATH_NEIGH 2
+#define NEIGH_CTL_PATH_DEV 3
+
+ struct ctl_path neigh_path[] = {
+ { .procname = "net", .ctl_name = CTL_NET, },
+ { .procname = "proto", .ctl_name = 0, },
+ { .procname = "neigh", .ctl_name = 0, },
+ { .procname = "default", .ctl_name = NET_PROTO_CONF_DEFAULT, },
+ { },
+ };
+
+ t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
if (!t)
- return -ENOBUFS;
+ goto err;
+
t->neigh_vars[0].data = &p->mcast_probes;
t->neigh_vars[1].data = &p->ucast_probes;
t->neigh_vars[2].data = &p->app_probes;
@@ -2665,11 +2690,11 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
if (dev) {
dev_name_source = dev->name;
- t->neigh_dev[0].ctl_name = dev->ifindex;
+ neigh_path[NEIGH_CTL_PATH_DEV].ctl_name = dev->ifindex;
/* Terminate the table early */
memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
} else {
- dev_name_source = t->neigh_dev[0].procname;
+ dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
t->neigh_vars[14].data = (int *)(p + 1);
t->neigh_vars[15].data = (int *)(p + 1) + 1;
t->neigh_vars[16].data = (int *)(p + 1) + 2;
@@ -2704,39 +2729,28 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
t->neigh_vars[13].ctl_name = CTL_UNNUMBERED;
}
- dev_name = kstrdup(dev_name_source, GFP_KERNEL);
- if (!dev_name) {
- err = -ENOBUFS;
+ t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
+ if (!t->dev_name)
goto free;
- }
-
- t->neigh_dev[0].procname = dev_name;
-
- t->neigh_neigh_dir[0].ctl_name = pdev_id;
- t->neigh_proto_dir[0].procname = p_name;
- t->neigh_proto_dir[0].ctl_name = p_id;
+ neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
+ neigh_path[NEIGH_CTL_PATH_NEIGH].ctl_name = pdev_id;
+ neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
+ neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id;
- t->neigh_dev[0].child = t->neigh_vars;
- t->neigh_neigh_dir[0].child = t->neigh_dev;
- t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
- t->neigh_root_dir[0].child = t->neigh_proto_dir;
-
- t->sysctl_header = register_sysctl_table(t->neigh_root_dir);
- if (!t->sysctl_header) {
- err = -ENOBUFS;
+ t->sysctl_header = register_sysctl_paths(neigh_path, t->neigh_vars);
+ if (!t->sysctl_header)
goto free_procname;
- }
+
p->sysctl_table = t;
return 0;
- /* error path */
- free_procname:
- kfree(dev_name);
- free:
+free_procname:
+ kfree(t->dev_name);
+free:
kfree(t);
-
- return err;
+err:
+ return -ENOBUFS;
}
void neigh_sysctl_unregister(struct neigh_parms *p)
@@ -2745,7 +2759,7 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
struct neigh_sysctl_table *t = p->sysctl_table;
p->sysctl_table = NULL;
unregister_sysctl_table(t->sysctl_header);
- kfree(t->neigh_dev[0].procname);
+ kfree(t->dev_name);
kfree(t);
}
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 61ead1d11132..7635d3f72723 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -95,17 +95,6 @@ NETDEVICE_SHOW(type, fmt_dec);
NETDEVICE_SHOW(link_mode, fmt_dec);
/* use same locking rules as GIFHWADDR ioctl's */
-static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
-{
- int i;
- char *cp = buf;
-
- for (i = 0; i < len; i++)
- cp += sprintf(cp, "%02x%c", addr[i],
- i == (len - 1) ? '\n' : ':');
- return cp - buf;
-}
-
static ssize_t show_address(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -114,7 +103,7 @@ static ssize_t show_address(struct device *dev, struct device_attribute *attr,
read_lock(&dev_base_lock);
if (dev_isalive(net))
- ret = format_addr(buf, net->dev_addr, net->addr_len);
+ ret = sysfs_format_mac(buf, net->dev_addr, net->addr_len);
read_unlock(&dev_base_lock);
return ret;
}
@@ -124,7 +113,7 @@ static ssize_t show_broadcast(struct device *dev,
{
struct net_device *net = to_net_dev(dev);
if (dev_isalive(net))
- return format_addr(buf, net->broadcast, net->addr_len);
+ return sysfs_format_mac(buf, net->broadcast, net->addr_len);
return -EINVAL;
}
@@ -247,9 +236,8 @@ static ssize_t netstat_show(const struct device *d,
struct net_device_stats *stats;
ssize_t ret = -EINVAL;
- if (offset > sizeof(struct net_device_stats) ||
- offset % sizeof(unsigned long) != 0)
- WARN_ON(1);
+ WARN_ON(offset > sizeof(struct net_device_stats) ||
+ offset % sizeof(unsigned long) != 0);
read_lock(&dev_base_lock);
if (dev_isalive(dev) && dev->get_stats &&
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ec936ae92458..26e941d912e8 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -58,6 +58,7 @@ out_undo:
#ifdef CONFIG_NET_NS
static struct kmem_cache *net_cachep;
+static struct workqueue_struct *netns_wq;
static struct net *net_alloc(void)
{
@@ -149,7 +150,7 @@ void __put_net(struct net *net)
{
/* Cleanup the network namespace in process context */
INIT_WORK(&net->work, cleanup_net);
- schedule_work(&net->work);
+ queue_work(netns_wq, &net->work);
}
EXPORT_SYMBOL_GPL(__put_net);
@@ -171,7 +172,13 @@ static int __init net_ns_init(void)
net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
SMP_CACHE_BYTES,
SLAB_PANIC, NULL);
+
+ /* Create workqueue for cleanup */
+ netns_wq = create_singlethread_workqueue("netns");
+ if (!netns_wq)
+ panic("Could not create netns workq");
#endif
+
mutex_lock(&net_mutex);
err = setup_net(&init_net);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c499b5c69bed..6faa128a4c8e 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -39,8 +39,6 @@ static struct sk_buff_head skb_pool;
static atomic_t trapped;
#define USEC_PER_POLL 50
-#define NETPOLL_RX_ENABLED 1
-#define NETPOLL_RX_DROP 2
#define MAX_SKB_SIZE \
(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
@@ -128,27 +126,24 @@ static int poll_one_napi(struct netpoll_info *npinfo,
if (!test_bit(NAPI_STATE_SCHED, &napi->state))
return budget;
- npinfo->rx_flags |= NETPOLL_RX_DROP;
atomic_inc(&trapped);
work = napi->poll(napi, budget);
atomic_dec(&trapped);
- npinfo->rx_flags &= ~NETPOLL_RX_DROP;
return budget - work;
}
-static void poll_napi(struct netpoll *np)
+static void poll_napi(struct net_device *dev)
{
- struct netpoll_info *npinfo = np->dev->npinfo;
struct napi_struct *napi;
int budget = 16;
- list_for_each_entry(napi, &np->dev->napi_list, dev_list) {
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner != smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
- budget = poll_one_napi(npinfo, napi, budget);
+ budget = poll_one_napi(dev->npinfo, napi, budget);
spin_unlock(&napi->poll_lock);
if (!budget)
@@ -159,30 +154,27 @@ static void poll_napi(struct netpoll *np)
static void service_arp_queue(struct netpoll_info *npi)
{
- struct sk_buff *skb;
-
- if (unlikely(!npi))
- return;
-
- skb = skb_dequeue(&npi->arp_tx);
+ if (npi) {
+ struct sk_buff *skb;
- while (skb != NULL) {
- arp_reply(skb);
- skb = skb_dequeue(&npi->arp_tx);
+ while ((skb = skb_dequeue(&npi->arp_tx)))
+ arp_reply(skb);
}
}
void netpoll_poll(struct netpoll *np)
{
- if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller)
+ struct net_device *dev = np->dev;
+
+ if (!dev || !netif_running(dev) || !dev->poll_controller)
return;
/* Process pending work on NIC */
- np->dev->poll_controller(np->dev);
- if (!list_empty(&np->dev->napi_list))
- poll_napi(np);
+ dev->poll_controller(dev);
+
+ poll_napi(dev);
- service_arp_queue(np->dev->npinfo);
+ service_arp_queue(dev->npinfo);
zap_completion_queue();
}
@@ -364,8 +356,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
skb_reset_mac_header(skb);
skb->protocol = eth->h_proto = htons(ETH_P_IP);
- memcpy(eth->h_source, np->local_mac, 6);
- memcpy(eth->h_dest, np->remote_mac, 6);
+ memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
+ memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
skb->dev = np->dev;
@@ -418,7 +410,8 @@ static void arp_reply(struct sk_buff *skb)
memcpy(&tip, arp_ptr, 4);
/* Should we ignore arp? */
- if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip))
+ if (tip != htonl(np->local_ip) ||
+ ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
return;
size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4);
@@ -435,7 +428,7 @@ static void arp_reply(struct sk_buff *skb)
/* Fill the device header for the ARP frame */
if (dev_hard_header(send_skb, skb->dev, ptype,
- sha, np->local_mac,
+ sha, np->dev->dev_addr,
send_skb->len) < 0) {
kfree_skb(send_skb);
return;
@@ -479,7 +472,7 @@ int __netpoll_rx(struct sk_buff *skb)
if (skb->dev->type != ARPHRD_ETHER)
goto out;
- /* check if netpoll clients need ARP */
+ /* if receive ARP during middle of NAPI poll, then queue */
if (skb->protocol == htons(ETH_P_ARP) &&
atomic_read(&trapped)) {
skb_queue_tail(&npi->arp_tx, skb);
@@ -541,6 +534,9 @@ int __netpoll_rx(struct sk_buff *skb)
return 1;
out:
+ /* If packet received while already in poll then just
+ * silently drop.
+ */
if (atomic_read(&trapped)) {
kfree_skb(skb);
return 1;
@@ -679,7 +675,6 @@ int netpoll_setup(struct netpoll *np)
goto release;
}
- npinfo->rx_flags = 0;
npinfo->rx_np = NULL;
spin_lock_init(&npinfo->rx_lock);
@@ -741,9 +736,6 @@ int netpoll_setup(struct netpoll *np)
}
}
- if (is_zero_ether_addr(np->local_mac) && ndev->dev_addr)
- memcpy(np->local_mac, ndev->dev_addr, 6);
-
if (!np->local_ip) {
rcu_read_lock();
in_dev = __in_dev_get_rcu(ndev);
@@ -764,7 +756,6 @@ int netpoll_setup(struct netpoll *np)
if (np->rx_hook) {
spin_lock_irqsave(&npinfo->rx_lock, flags);
- npinfo->rx_flags |= NETPOLL_RX_ENABLED;
npinfo->rx_np = np;
spin_unlock_irqrestore(&npinfo->rx_lock, flags);
}
@@ -806,7 +797,6 @@ void netpoll_cleanup(struct netpoll *np)
if (npinfo->rx_np == np) {
spin_lock_irqsave(&npinfo->rx_lock, flags);
npinfo->rx_np = NULL;
- npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
spin_unlock_irqrestore(&npinfo->rx_lock, flags);
}
@@ -816,11 +806,7 @@ void netpoll_cleanup(struct netpoll *np)
cancel_rearming_delayed_work(&npinfo->tx_work);
/* clean after last, unfinished work */
- if (!skb_queue_empty(&npinfo->txq)) {
- struct sk_buff *skb;
- skb = __skb_dequeue(&npinfo->txq);
- kfree_skb(skb);
- }
+ __skb_queue_purge(&npinfo->txq);
kfree(npinfo);
np->dev->npinfo = NULL;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 285ec3ed9b37..eebccdbdbaca 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -397,62 +397,6 @@ struct pktgen_thread {
#define REMOVE 1
#define FIND 0
-/* This code works around the fact that do_div cannot handle two 64-bit
- numbers, and regular 64-bit division doesn't work on x86 kernels.
- --Ben
-*/
-
-#define PG_DIV 0
-
-/* This was emailed to LMKL by: Chris Caputo <ccaputo@alt.net>
- * Function copied/adapted/optimized from:
- *
- * nemesis.sourceforge.net/browse/lib/static/intmath/ix86/intmath.c.html
- *
- * Copyright 1994, University of Cambridge Computer Laboratory
- * All Rights Reserved.
- *
- */
-static inline s64 divremdi3(s64 x, s64 y, int type)
-{
- u64 a = (x < 0) ? -x : x;
- u64 b = (y < 0) ? -y : y;
- u64 res = 0, d = 1;
-
- if (b > 0) {
- while (b < a) {
- b <<= 1;
- d <<= 1;
- }
- }
-
- do {
- if (a >= b) {
- a -= b;
- res += d;
- }
- b >>= 1;
- d >>= 1;
- }
- while (d);
-
- if (PG_DIV == type) {
- return (((x ^ y) & (1ll << 63)) == 0) ? res : -(s64) res;
- } else {
- return ((x & (1ll << 63)) == 0) ? a : -(s64) a;
- }
-}
-
-/* End of hacks to deal with 64-bit math on x86 */
-
-/** Convert to milliseconds */
-static inline __u64 tv_to_ms(const struct timeval *tv)
-{
- __u64 ms = tv->tv_usec / 1000;
- ms += (__u64) tv->tv_sec * (__u64) 1000;
- return ms;
-}
-
/** Convert to micro-seconds */
static inline __u64 tv_to_us(const struct timeval *tv)
{
@@ -461,51 +405,13 @@ static inline __u64 tv_to_us(const struct timeval *tv)
return us;
}
-static inline __u64 pg_div(__u64 n, __u32 base)
-{
- __u64 tmp = n;
- do_div(tmp, base);
- /* printk("pktgen: pg_div, n: %llu base: %d rv: %llu\n",
- n, base, tmp); */
- return tmp;
-}
-
-static inline __u64 pg_div64(__u64 n, __u64 base)
-{
- __u64 tmp = n;
-/*
- * How do we know if the architecture we are running on
- * supports division with 64 bit base?
- *
- */
-#if defined(__sparc_v9__) || defined(__powerpc64__) || defined(__alpha__) || defined(__x86_64__) || defined(__ia64__)
-
- do_div(tmp, base);
-#else
- tmp = divremdi3(n, base, PG_DIV);
-#endif
- return tmp;
-}
-
-static inline __u64 getCurMs(void)
-{
- struct timeval tv;
- do_gettimeofday(&tv);
- return tv_to_ms(&tv);
-}
-
-static inline __u64 getCurUs(void)
+static __u64 getCurUs(void)
{
struct timeval tv;
do_gettimeofday(&tv);
return tv_to_us(&tv);
}
-static inline __u64 tv_diff(const struct timeval *a, const struct timeval *b)
-{
- return tv_to_us(a) - tv_to_us(b);
-}
-
/* old include end */
static char version[] __initdata = VERSION;
@@ -2358,9 +2264,11 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
t = random32() % (imx - imn) + imn;
s = htonl(t);
- while (LOOPBACK(s) || MULTICAST(s)
- || BADCLASS(s) || ZERONET(s)
- || LOCAL_MCAST(s)) {
+ while (ipv4_is_loopback(s) ||
+ ipv4_is_multicast(s) ||
+ ipv4_is_lbcast(s) ||
+ ipv4_is_zeronet(s) ||
+ ipv4_is_local_multicast(s)) {
t = random32() % (imx - imn) + imn;
s = htonl(t);
}
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 45aed75cb571..2d3035d3abd7 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -69,8 +69,6 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
return 0;
}
-EXPORT_SYMBOL(reqsk_queue_alloc);
-
void __reqsk_queue_destroy(struct request_sock_queue *queue)
{
struct listen_sock *lopt;
@@ -91,8 +89,6 @@ void __reqsk_queue_destroy(struct request_sock_queue *queue)
kfree(lopt);
}
-EXPORT_SYMBOL(__reqsk_queue_destroy);
-
static inline struct listen_sock *reqsk_queue_yank_listen_sk(
struct request_sock_queue *queue)
{
@@ -134,4 +130,3 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
kfree(lopt);
}
-EXPORT_SYMBOL(reqsk_queue_destroy);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fed95a323b28..ddbdde82a700 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -60,7 +60,6 @@ struct rtnl_link
};
static DEFINE_MUTEX(rtnl_mutex);
-static struct sock *rtnl;
void rtnl_lock(void)
{
@@ -458,8 +457,9 @@ size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size)
return ret;
}
-int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo)
{
+ struct sock *rtnl = net->rtnl;
int err = 0;
NETLINK_CB(skb).dst_group = group;
@@ -471,14 +471,17 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
return err;
}
-int rtnl_unicast(struct sk_buff *skb, u32 pid)
+int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
{
+ struct sock *rtnl = net->rtnl;
+
return nlmsg_unicast(rtnl, skb, pid);
}
-int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
+int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
struct nlmsghdr *nlh, gfp_t flags)
{
+ struct sock *rtnl = net->rtnl;
int report = 0;
if (nlh)
@@ -487,8 +490,10 @@ int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
return nlmsg_notify(rtnl, skb, pid, group, report, flags);
}
-void rtnl_set_sk_err(u32 group, int error)
+void rtnl_set_sk_err(struct net *net, u32 group, int error)
{
+ struct sock *rtnl = net->rtnl;
+
netlink_set_err(rtnl, 0, group, error);
}
@@ -1186,7 +1191,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
kfree_skb(nskb);
goto errout;
}
- err = rtnl_unicast(nskb, NETLINK_CB(skb).pid);
+ err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
errout:
dev_put(dev);
@@ -1219,6 +1224,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
{
+ struct net *net = dev->nd_net;
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -1233,10 +1239,10 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
kfree_skb(skb);
goto errout;
}
- err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+ err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
errout:
if (err < 0)
- rtnl_set_sk_err(RTNLGRP_LINK, err);
+ rtnl_set_sk_err(net, RTNLGRP_LINK, err);
}
/* Protected by RTNL sempahore. */
@@ -1247,6 +1253,7 @@ static int rtattr_max;
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
+ struct net *net = skb->sk->sk_net;
rtnl_doit_func doit;
int sz_idx, kind;
int min_len;
@@ -1275,6 +1282,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EPERM;
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+ struct sock *rtnl;
rtnl_dumpit_func dumpit;
dumpit = rtnl_get_dumpit(family, type);
@@ -1282,6 +1290,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EOPNOTSUPP;
__rtnl_unlock();
+ rtnl = net->rtnl;
err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
rtnl_lock();
return err;
@@ -1326,9 +1335,6 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
{
struct net_device *dev = ptr;
- if (dev->nd_net != &init_net)
- return NOTIFY_DONE;
-
switch (event) {
case NETDEV_UNREGISTER:
rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
@@ -1354,6 +1360,29 @@ static struct notifier_block rtnetlink_dev_notifier = {
.notifier_call = rtnetlink_event,
};
+
+static int rtnetlink_net_init(struct net *net)
+{
+ struct sock *sk;
+ sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
+ rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
+ if (!sk)
+ return -ENOMEM;
+ net->rtnl = sk;
+ return 0;
+}
+
+static void rtnetlink_net_exit(struct net *net)
+{
+ netlink_kernel_release(net->rtnl);
+ net->rtnl = NULL;
+}
+
+static struct pernet_operations rtnetlink_net_ops = {
+ .init = rtnetlink_net_init,
+ .exit = rtnetlink_net_exit,
+};
+
void __init rtnetlink_init(void)
{
int i;
@@ -1366,10 +1395,9 @@ void __init rtnetlink_init(void)
if (!rta_buf)
panic("rtnetlink_init: cannot allocate rta_buf\n");
- rtnl = netlink_kernel_create(&init_net, NETLINK_ROUTE, RTNLGRP_MAX,
- rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
- if (rtnl == NULL)
+ if (register_pernet_subsys(&rtnetlink_net_ops))
panic("rtnetlink_init: cannot initialize rtnetlink\n");
+
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b6283779e93d..98420f9c4b6d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -52,6 +52,7 @@
#endif
#include <linux/string.h>
#include <linux/skbuff.h>
+#include <linux/splice.h>
#include <linux/cache.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
@@ -71,6 +72,40 @@
static struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ struct sk_buff *skb = (struct sk_buff *) buf->private;
+
+ kfree_skb(skb);
+}
+
+static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ struct sk_buff *skb = (struct sk_buff *) buf->private;
+
+ skb_get(skb);
+}
+
+static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ return 1;
+}
+
+
+/* Pipe buffer operations for a socket. */
+static struct pipe_buf_operations sock_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = sock_pipe_buf_release,
+ .steal = sock_pipe_buf_steal,
+ .get = sock_pipe_buf_get,
+};
+
/*
* Keep out-of-line to prevent kernel bloat.
* __builtin_return_address is not used because it is not always
@@ -1122,6 +1157,217 @@ fault:
return -EFAULT;
}
+/*
+ * Callback from splice_to_pipe(), if we need to release some pages
+ * at the end of the spd in case we error'ed out in filling the pipe.
+ */
+static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
+{
+ struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private;
+
+ kfree_skb(skb);
+}
+
+/*
+ * Fill page/offset/length into spd, if it can hold more pages.
+ */
+static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
+ unsigned int len, unsigned int offset,
+ struct sk_buff *skb)
+{
+ if (unlikely(spd->nr_pages == PIPE_BUFFERS))
+ return 1;
+
+ spd->pages[spd->nr_pages] = page;
+ spd->partial[spd->nr_pages].len = len;
+ spd->partial[spd->nr_pages].offset = offset;
+ spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb);
+ spd->nr_pages++;
+ return 0;
+}
+
+/*
+ * Map linear and fragment data from the skb to spd. Returns number of
+ * pages mapped.
+ */
+static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
+ unsigned int *total_len,
+ struct splice_pipe_desc *spd)
+{
+ unsigned int nr_pages = spd->nr_pages;
+ unsigned int poff, plen, len, toff, tlen;
+ int headlen, seg;
+
+ toff = *offset;
+ tlen = *total_len;
+ if (!tlen)
+ goto err;
+
+ /*
+ * if the offset is greater than the linear part, go directly to
+ * the fragments.
+ */
+ headlen = skb_headlen(skb);
+ if (toff >= headlen) {
+ toff -= headlen;
+ goto map_frag;
+ }
+
+ /*
+ * first map the linear region into the pages/partial map, skipping
+ * any potential initial offset.
+ */
+ len = 0;
+ while (len < headlen) {
+ void *p = skb->data + len;
+
+ poff = (unsigned long) p & (PAGE_SIZE - 1);
+ plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff);
+ len += plen;
+
+ if (toff) {
+ if (plen <= toff) {
+ toff -= plen;
+ continue;
+ }
+ plen -= toff;
+ poff += toff;
+ toff = 0;
+ }
+
+ plen = min(plen, tlen);
+ if (!plen)
+ break;
+
+ /*
+ * just jump directly to update and return, no point
+ * in going over fragments when the output is full.
+ */
+ if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb))
+ goto done;
+
+ tlen -= plen;
+ }
+
+ /*
+ * then map the fragments
+ */
+map_frag:
+ for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
+ const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
+
+ plen = f->size;
+ poff = f->page_offset;
+
+ if (toff) {
+ if (plen <= toff) {
+ toff -= plen;
+ continue;
+ }
+ plen -= toff;
+ poff += toff;
+ toff = 0;
+ }
+
+ plen = min(plen, tlen);
+ if (!plen)
+ break;
+
+ if (spd_fill_page(spd, f->page, plen, poff, skb))
+ break;
+
+ tlen -= plen;
+ }
+
+done:
+ if (spd->nr_pages - nr_pages) {
+ *offset = 0;
+ *total_len = tlen;
+ return 0;
+ }
+err:
+ return 1;
+}
+
+/*
+ * Map data from the skb to a pipe. Should handle both the linear part,
+ * the fragments, and the frag list. It does NOT handle frag lists within
+ * the frag list, if such a thing exists. We'd probably need to recurse to
+ * handle that cleanly.
+ */
+int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
+ struct pipe_inode_info *pipe, unsigned int tlen,
+ unsigned int flags)
+{
+ struct partial_page partial[PIPE_BUFFERS];
+ struct page *pages[PIPE_BUFFERS];
+ struct splice_pipe_desc spd = {
+ .pages = pages,
+ .partial = partial,
+ .flags = flags,
+ .ops = &sock_pipe_buf_ops,
+ .spd_release = sock_spd_release,
+ };
+ struct sk_buff *skb;
+
+ /*
+ * I'd love to avoid the clone here, but tcp_read_sock()
+ * ignores reference counts and unconditonally kills the sk_buff
+ * on return from the actor.
+ */
+ skb = skb_clone(__skb, GFP_KERNEL);
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ /*
+ * __skb_splice_bits() only fails if the output has no room left,
+ * so no point in going over the frag_list for the error case.
+ */
+ if (__skb_splice_bits(skb, &offset, &tlen, &spd))
+ goto done;
+ else if (!tlen)
+ goto done;
+
+ /*
+ * now see if we have a frag_list to map
+ */
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list && tlen; list = list->next) {
+ if (__skb_splice_bits(list, &offset, &tlen, &spd))
+ break;
+ }
+ }
+
+done:
+ /*
+ * drop our reference to the clone, the pipe consumption will
+ * drop the rest.
+ */
+ kfree_skb(skb);
+
+ if (spd.nr_pages) {
+ int ret;
+
+ /*
+ * Drop the socket lock, otherwise we have reverse
+ * locking dependencies between sk_lock and i_mutex
+ * here as compared to sendfile(). We enter here
+ * with the socket lock held, and splice_to_pipe() will
+ * grab the pipe inode lock. For sendfile() emulation,
+ * we call into ->sendpage() with the i_mutex lock held
+ * and networking will grab the socket lock.
+ */
+ release_sock(__skb->sk);
+ ret = splice_to_pipe(pipe, &spd);
+ lock_sock(__skb->sk);
+ return ret;
+ }
+
+ return 0;
+}
+
/**
* skb_store_bits - store bits from kernel buffer to skb
* @skb: destination buffer
diff --git a/net/core/sock.c b/net/core/sock.c
index c519b439b8b1..1c4b1cd16d65 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -154,7 +154,7 @@ static const char *af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
"sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
"sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
- "sk_lock-27" , "sk_lock-28" , "sk_lock-29" ,
+ "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
"sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
};
@@ -168,7 +168,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
"slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
"slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
- "slock-27" , "slock-28" , "slock-29" ,
+ "slock-27" , "slock-28" , "slock-AF_CAN" ,
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
"slock-AF_RXRPC" , "slock-AF_MAX"
};
@@ -282,6 +282,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (err)
goto out;
+ if (!sk_rmem_schedule(sk, skb->truesize)) {
+ err = -ENOBUFS;
+ goto out;
+ }
+
skb->dev = NULL;
skb_set_owner_r(skb, sk);
@@ -419,6 +424,14 @@ out:
return ret;
}
+static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
+{
+ if (valbool)
+ sock_set_flag(sk, bit);
+ else
+ sock_reset_flag(sk, bit);
+}
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -463,11 +476,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
case SO_DEBUG:
if (val && !capable(CAP_NET_ADMIN)) {
ret = -EACCES;
- }
- else if (valbool)
- sock_set_flag(sk, SOCK_DBG);
- else
- sock_reset_flag(sk, SOCK_DBG);
+ } else
+ sock_valbool_flag(sk, SOCK_DBG, valbool);
break;
case SO_REUSEADDR:
sk->sk_reuse = valbool;
@@ -477,10 +487,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
ret = -ENOPROTOOPT;
break;
case SO_DONTROUTE:
- if (valbool)
- sock_set_flag(sk, SOCK_LOCALROUTE);
- else
- sock_reset_flag(sk, SOCK_LOCALROUTE);
+ sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
break;
case SO_BROADCAST:
sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
@@ -1105,7 +1112,9 @@ void sock_rfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
+ skb_truesize_check(skb);
atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+ sk_mem_uncharge(skb->sk, skb->truesize);
}
@@ -1382,6 +1391,103 @@ int sk_wait_data(struct sock *sk, long *timeo)
EXPORT_SYMBOL(sk_wait_data);
+/**
+ * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
+ * @sk: socket
+ * @size: memory size to allocate
+ * @kind: allocation type
+ *
+ * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
+ * rmem allocation. This function assumes that protocols which have
+ * memory_pressure use sk_wmem_queued as write buffer accounting.
+ */
+int __sk_mem_schedule(struct sock *sk, int size, int kind)
+{
+ struct proto *prot = sk->sk_prot;
+ int amt = sk_mem_pages(size);
+ int allocated;
+
+ sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
+ allocated = atomic_add_return(amt, prot->memory_allocated);
+
+ /* Under limit. */
+ if (allocated <= prot->sysctl_mem[0]) {
+ if (prot->memory_pressure && *prot->memory_pressure)
+ *prot->memory_pressure = 0;
+ return 1;
+ }
+
+ /* Under pressure. */
+ if (allocated > prot->sysctl_mem[1])
+ if (prot->enter_memory_pressure)
+ prot->enter_memory_pressure();
+
+ /* Over hard limit. */
+ if (allocated > prot->sysctl_mem[2])
+ goto suppress_allocation;
+
+ /* guarantee minimum buffer size under pressure */
+ if (kind == SK_MEM_RECV) {
+ if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
+ return 1;
+ } else { /* SK_MEM_SEND */
+ if (sk->sk_type == SOCK_STREAM) {
+ if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
+ return 1;
+ } else if (atomic_read(&sk->sk_wmem_alloc) <
+ prot->sysctl_wmem[0])
+ return 1;
+ }
+
+ if (prot->memory_pressure) {
+ if (!*prot->memory_pressure ||
+ prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) *
+ sk_mem_pages(sk->sk_wmem_queued +
+ atomic_read(&sk->sk_rmem_alloc) +
+ sk->sk_forward_alloc))
+ return 1;
+ }
+
+suppress_allocation:
+
+ if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
+ sk_stream_moderate_sndbuf(sk);
+
+ /* Fail only if socket is _under_ its sndbuf.
+ * In this case we cannot block, so that we have to fail.
+ */
+ if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
+ return 1;
+ }
+
+ /* Alas. Undo changes. */
+ sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
+ atomic_sub(amt, prot->memory_allocated);
+ return 0;
+}
+
+EXPORT_SYMBOL(__sk_mem_schedule);
+
+/**
+ * __sk_reclaim - reclaim memory_allocated
+ * @sk: socket
+ */
+void __sk_mem_reclaim(struct sock *sk)
+{
+ struct proto *prot = sk->sk_prot;
+
+ atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
+ prot->memory_allocated);
+ sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
+
+ if (prot->memory_pressure && *prot->memory_pressure &&
+ (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
+ *prot->memory_pressure = 0;
+}
+
+EXPORT_SYMBOL(__sk_mem_reclaim);
+
+
/*
* Set of default routines for initialising struct proto_ops when
* the protocol does not support a particular function. In certain
@@ -1496,7 +1602,7 @@ static void sock_def_error_report(struct sock *sk)
read_lock(&sk->sk_callback_lock);
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
wake_up_interruptible(sk->sk_sleep);
- sk_wake_async(sk,0,POLL_ERR);
+ sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
read_unlock(&sk->sk_callback_lock);
}
@@ -1505,7 +1611,7 @@ static void sock_def_readable(struct sock *sk, int len)
read_lock(&sk->sk_callback_lock);
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
wake_up_interruptible(sk->sk_sleep);
- sk_wake_async(sk,1,POLL_IN);
+ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
read_unlock(&sk->sk_callback_lock);
}
@@ -1522,7 +1628,7 @@ static void sock_def_write_space(struct sock *sk)
/* Should agree with poll, otherwise some programs break */
if (sock_writeable(sk))
- sk_wake_async(sk, 2, POLL_OUT);
+ sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
read_unlock(&sk->sk_callback_lock);
@@ -1537,7 +1643,7 @@ void sk_send_sigurg(struct sock *sk)
{
if (sk->sk_socket && sk->sk_socket->file)
if (send_sigurg(&sk->sk_socket->file->f_owner))
- sk_wake_async(sk, 3, POLL_PRI);
+ sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
}
void sk_reset_timer(struct sock *sk, struct timer_list* timer,
@@ -1611,6 +1717,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_stamp = ktime_set(-1L, -1L);
atomic_set(&sk->sk_refcnt, 1);
+ atomic_set(&sk->sk_drops, 0);
}
void fastcall lock_sock_nested(struct sock *sk, int subclass)
@@ -1801,65 +1908,15 @@ EXPORT_SYMBOL(sk_common_release);
static DEFINE_RWLOCK(proto_list_lock);
static LIST_HEAD(proto_list);
-#ifdef CONFIG_SMP
-/*
- * Define default functions to keep track of inuse sockets per protocol
- * Note that often used protocols use dedicated functions to get a speed increase.
- * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE)
- */
-static void inuse_add(struct proto *prot, int inc)
-{
- per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc;
-}
-
-static int inuse_get(const struct proto *prot)
-{
- int res = 0, cpu;
- for_each_possible_cpu(cpu)
- res += per_cpu_ptr(prot->inuse_ptr, cpu)[0];
- return res;
-}
-
-static int inuse_init(struct proto *prot)
-{
- if (!prot->inuse_getval || !prot->inuse_add) {
- prot->inuse_ptr = alloc_percpu(int);
- if (prot->inuse_ptr == NULL)
- return -ENOBUFS;
-
- prot->inuse_getval = inuse_get;
- prot->inuse_add = inuse_add;
- }
- return 0;
-}
-
-static void inuse_fini(struct proto *prot)
-{
- if (prot->inuse_ptr != NULL) {
- free_percpu(prot->inuse_ptr);
- prot->inuse_ptr = NULL;
- prot->inuse_getval = NULL;
- prot->inuse_add = NULL;
- }
-}
-#else
-static inline int inuse_init(struct proto *prot)
-{
- return 0;
-}
-
-static inline void inuse_fini(struct proto *prot)
-{
-}
-#endif
-
int proto_register(struct proto *prot, int alloc_slab)
{
char *request_sock_slab_name = NULL;
char *timewait_sock_slab_name;
- if (inuse_init(prot))
+ if (sock_prot_inuse_init(prot) != 0) {
+ printk(KERN_CRIT "%s: Can't alloc inuse counters!\n", prot->name);
goto out;
+ }
if (alloc_slab) {
prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
@@ -1927,7 +1984,7 @@ out_free_sock_slab:
kmem_cache_destroy(prot->slab);
prot->slab = NULL;
out_free_inuse:
- inuse_fini(prot);
+ sock_prot_inuse_free(prot);
out:
return -ENOBUFS;
}
@@ -1940,7 +1997,8 @@ void proto_unregister(struct proto *prot)
list_del(&prot->node);
write_unlock(&proto_list_lock);
- inuse_fini(prot);
+ sock_prot_inuse_free(prot);
+
if (prot->slab != NULL) {
kmem_cache_destroy(prot->slab);
prot->slab = NULL;
@@ -1967,6 +2025,7 @@ EXPORT_SYMBOL(proto_unregister);
#ifdef CONFIG_PROC_FS
static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(proto_list_lock)
{
read_lock(&proto_list_lock);
return seq_list_start_head(&proto_list, *pos);
@@ -1978,6 +2037,7 @@ static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void proto_seq_stop(struct seq_file *seq, void *v)
+ __releases(proto_list_lock)
{
read_unlock(&proto_list_lock);
}
diff --git a/net/core/stream.c b/net/core/stream.c
index 755bacbcb321..4a0ad152c9c4 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -35,7 +35,7 @@ void sk_stream_write_space(struct sock *sk)
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
wake_up_interruptible(sk->sk_sleep);
if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
- sock_wake_async(sock, 2, POLL_OUT);
+ sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
}
}
@@ -172,17 +172,6 @@ do_interrupted:
EXPORT_SYMBOL(sk_stream_wait_memory);
-void sk_stream_rfree(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
-
- skb_truesize_check(skb);
- atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
- sk->sk_forward_alloc += skb->truesize;
-}
-
-EXPORT_SYMBOL(sk_stream_rfree);
-
int sk_stream_error(struct sock *sk, int flags, int err)
{
if (err == -EPIPE)
@@ -194,76 +183,6 @@ int sk_stream_error(struct sock *sk, int flags, int err)
EXPORT_SYMBOL(sk_stream_error);
-void __sk_stream_mem_reclaim(struct sock *sk)
-{
- atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
- sk->sk_prot->memory_allocated);
- sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
- if (*sk->sk_prot->memory_pressure &&
- (atomic_read(sk->sk_prot->memory_allocated) <
- sk->sk_prot->sysctl_mem[0]))
- *sk->sk_prot->memory_pressure = 0;
-}
-
-EXPORT_SYMBOL(__sk_stream_mem_reclaim);
-
-int sk_stream_mem_schedule(struct sock *sk, int size, int kind)
-{
- int amt = sk_stream_pages(size);
-
- sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM;
- atomic_add(amt, sk->sk_prot->memory_allocated);
-
- /* Under limit. */
- if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) {
- if (*sk->sk_prot->memory_pressure)
- *sk->sk_prot->memory_pressure = 0;
- return 1;
- }
-
- /* Over hard limit. */
- if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) {
- sk->sk_prot->enter_memory_pressure();
- goto suppress_allocation;
- }
-
- /* Under pressure. */
- if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1])
- sk->sk_prot->enter_memory_pressure();
-
- if (kind) {
- if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0])
- return 1;
- } else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0])
- return 1;
-
- if (!*sk->sk_prot->memory_pressure ||
- sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) *
- sk_stream_pages(sk->sk_wmem_queued +
- atomic_read(&sk->sk_rmem_alloc) +
- sk->sk_forward_alloc))
- return 1;
-
-suppress_allocation:
-
- if (!kind) {
- sk_stream_moderate_sndbuf(sk);
-
- /* Fail only if socket is _under_ its sndbuf.
- * In this case we cannot block, so that we have to fail.
- */
- if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
- return 1;
- }
-
- /* Alas. Undo changes. */
- sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM;
- atomic_sub(amt, sk->sk_prot->memory_allocated);
- return 0;
-}
-
-EXPORT_SYMBOL(sk_stream_mem_schedule);
-
void sk_stream_kill_queues(struct sock *sk)
{
/* First the read buffer. */
@@ -276,7 +195,7 @@ void sk_stream_kill_queues(struct sock *sk)
BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
- sk_stream_mem_reclaim(sk);
+ sk_mem_reclaim(sk);
BUG_TRAP(!sk->sk_wmem_queued);
BUG_TRAP(!sk->sk_forward_alloc);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 113cc728dc31..130338f83ae5 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -10,12 +10,11 @@
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/netdevice.h>
+#include <linux/init.h>
#include <net/sock.h>
#include <net/xfrm.h>
-#ifdef CONFIG_SYSCTL
-
-ctl_table core_table[] = {
+static struct ctl_table net_core_table[] = {
#ifdef CONFIG_NET
{
.ctl_name = NET_CORE_WMEM_MAX,
@@ -128,7 +127,7 @@ ctl_table core_table[] = {
{
.ctl_name = NET_CORE_SOMAXCONN,
.procname = "somaxconn",
- .data = &sysctl_somaxconn,
+ .data = &init_net.sysctl_somaxconn,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
@@ -152,4 +151,65 @@ ctl_table core_table[] = {
{ .ctl_name = 0 }
};
-#endif
+static __net_initdata struct ctl_path net_core_path[] = {
+ { .procname = "net", .ctl_name = CTL_NET, },
+ { .procname = "core", .ctl_name = NET_CORE, },
+ { },
+};
+
+static __net_init int sysctl_core_net_init(struct net *net)
+{
+ struct ctl_table *tbl, *tmp;
+
+ net->sysctl_somaxconn = SOMAXCONN;
+
+ tbl = net_core_table;
+ if (net != &init_net) {
+ tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL);
+ if (tbl == NULL)
+ goto err_dup;
+
+ for (tmp = tbl; tmp->procname; tmp++) {
+ if (tmp->data >= (void *)&init_net &&
+ tmp->data < (void *)(&init_net + 1))
+ tmp->data += (char *)net - (char *)&init_net;
+ else
+ tmp->mode &= ~0222;
+ }
+ }
+
+ net->sysctl_core_hdr = register_net_sysctl_table(net,
+ net_core_path, tbl);
+ if (net->sysctl_core_hdr == NULL)
+ goto err_reg;
+
+ return 0;
+
+err_reg:
+ if (tbl != net_core_table)
+ kfree(tbl);
+err_dup:
+ return -ENOMEM;
+}
+
+static __net_exit void sysctl_core_net_exit(struct net *net)
+{
+ struct ctl_table *tbl;
+
+ tbl = net->sysctl_core_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->sysctl_core_hdr);
+ BUG_ON(tbl == net_core_table);
+ kfree(tbl);
+}
+
+static __net_initdata struct pernet_operations sysctl_core_ops = {
+ .init = sysctl_core_net_init,
+ .exit = sysctl_core_net_exit,
+};
+
+static __init int sysctl_core_init(void)
+{
+ return register_pernet_subsys(&sysctl_core_ops);
+}
+
+__initcall(sysctl_core_init);
diff --git a/net/core/utils.c b/net/core/utils.c
index 0bf17da40d52..8031eb59054e 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -91,17 +91,6 @@ EXPORT_SYMBOL(in_aton);
#define IN6PTON_NULL 0x20000000 /* first/tail */
#define IN6PTON_UNKNOWN 0x40000000
-static inline int digit2bin(char c, int delim)
-{
- if (c == delim || c == '\0')
- return IN6PTON_DELIM;
- if (c == '.')
- return IN6PTON_DOT;
- if (c >= '0' && c <= '9')
- return (IN6PTON_DIGIT | (c - '0'));
- return IN6PTON_UNKNOWN;
-}
-
static inline int xdigit2bin(char c, int delim)
{
if (c == delim || c == '\0')
@@ -293,3 +282,19 @@ out:
}
EXPORT_SYMBOL(in6_pton);
+
+void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
+ __be32 from, __be32 to, int pseudohdr)
+{
+ __be32 diff[] = { ~from, to };
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ *sum = csum_fold(csum_partial(diff, sizeof(diff),
+ ~csum_unfold(*sum)));
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ skb->csum = ~csum_partial(diff, sizeof(diff),
+ ~skb->csum);
+ } else if (pseudohdr)
+ *sum = ~csum_fold(csum_partial(diff, sizeof(diff),
+ csum_unfold(*sum)));
+}
+EXPORT_SYMBOL(inet_proto_csum_replace4);