From 303d9bf6bb64ead8e3f1d7e29904a4025502e591 Mon Sep 17 00:00:00 2001 From: Iñaky Pérez-González Date: Wed, 23 Jan 2008 13:40:27 -0800 Subject: rfkill: add the WiMAX radio type Teach rfkill about wimax radios. Had to define a KEY_WIMAX as a 'key for disabling only wimax radios', as other radio technologies have. This makes sense as hardware has specific keys for disabling specific radios. The RFKILL enabling part is, otherwise, a copy and paste of any other radio technology. Signed-off-by: Inaky Perez-Gonzalez Signed-off-by: Ivo van Doorn Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- include/linux/input.h | 2 ++ include/linux/rfkill.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index 2075d6da2a31..056a17a4f34f 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -371,6 +371,8 @@ struct input_absinfo { #define KEY_BRIGHTNESS_ZERO 244 /* brightness off, use ambient */ #define KEY_DISPLAY_OFF 245 /* display device to off state */ +#define KEY_WIMAX 246 + #define BTN_MISC 0x100 #define BTN_0 0x100 #define BTN_1 0x101 diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 0ce5e0b52dbd..e3ab21d7fc7f 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -33,11 +33,13 @@ * RFKILL_TYPE_WLAN: switch is on a 802.11 wireless network device. * RFKILL_TYPE_BLUETOOTH: switch is on a bluetooth device. * RFKILL_TYPE_UWB: switch is on a ultra wideband device. + * RFKILL_TYPE_WIMAX: switch is on a WiMAX device. */ enum rfkill_type { RFKILL_TYPE_WLAN , RFKILL_TYPE_BLUETOOTH, RFKILL_TYPE_UWB, + RFKILL_TYPE_WIMAX, RFKILL_TYPE_MAX, }; -- cgit v1.2.3 From 3692e94f1559523b84a5a0e65929ee84b276e83f Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 26 Jan 2008 00:51:45 +0200 Subject: Move usbnet.h and rndis_host.h to include/linux/usb Move headers usbnet.h and rndis_host.h to include/linux/usb and fix includes for drivers/net/usb modules. Headers are moved because rndis_wlan will be outside drivers/net/usb in drivers/net/wireless and yet need these headers. Signed-off-by: Jussi Kivilinna Acked-by: David Brownell Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- drivers/net/usb/asix.c | 3 +- drivers/net/usb/cdc_ether.c | 3 +- drivers/net/usb/cdc_subset.c | 3 +- drivers/net/usb/dm9601.c | 3 +- drivers/net/usb/gl620a.c | 3 +- drivers/net/usb/mcs7830.c | 3 +- drivers/net/usb/net1080.c | 3 +- drivers/net/usb/plusb.c | 3 +- drivers/net/usb/rndis_host.c | 5 +- drivers/net/usb/rndis_host.h | 274 ----------------------------------------- drivers/net/usb/usbnet.c | 3 +- drivers/net/usb/usbnet.h | 214 -------------------------------- drivers/net/usb/zaurus.c | 3 +- include/linux/usb/rndis_host.h | 274 +++++++++++++++++++++++++++++++++++++++++ include/linux/usb/usbnet.h | 214 ++++++++++++++++++++++++++++++++ 15 files changed, 500 insertions(+), 511 deletions(-) delete mode 100644 drivers/net/usb/rndis_host.h delete mode 100644 drivers/net/usb/usbnet.h create mode 100644 include/linux/usb/rndis_host.h create mode 100644 include/linux/usb/usbnet.h (limited to 'include') diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c index 569028b2baf2..6f245cfb6624 100644 --- a/drivers/net/usb/asix.c +++ b/drivers/net/usb/asix.c @@ -33,8 +33,7 @@ #include #include #include - -#include "usbnet.h" +#include #define DRIVER_VERSION "14-Jun-2006" static const char driver_name [] = "asix"; diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 97c17bb560ac..a934428a5890 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -31,8 +31,7 @@ #include #include #include - -#include "usbnet.h" +#include #if defined(CONFIG_USB_NET_RNDIS_HOST) || defined(CONFIG_USB_NET_RNDIS_HOST_MODULE) diff --git a/drivers/net/usb/cdc_subset.c b/drivers/net/usb/cdc_subset.c index 943988ed01d8..0ec7936cbe21 100644 --- a/drivers/net/usb/cdc_subset.c +++ b/drivers/net/usb/cdc_subset.c @@ -26,8 +26,7 @@ #include #include #include - -#include "usbnet.h" +#include /* diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 633a511d6cb6..4b131a6c6b70 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -20,8 +20,7 @@ #include #include #include - -#include "usbnet.h" +#include /* datasheet: http://www.davicom.com.tw/big5/download/Data%20Sheet/DM9601-DS-P01-930914.pdf diff --git a/drivers/net/usb/gl620a.c b/drivers/net/usb/gl620a.c index 031cf5ca4dbb..f7ccfad9384e 100644 --- a/drivers/net/usb/gl620a.c +++ b/drivers/net/usb/gl620a.c @@ -29,8 +29,7 @@ #include #include #include - -#include "usbnet.h" +#include /* diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index 5ea7411e1337..c3d119f997f5 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -31,8 +31,7 @@ #include #include #include - -#include "usbnet.h" +#include /* requests */ #define MCS7830_RD_BMREQ (USB_DIR_IN | USB_TYPE_VENDOR | \ diff --git a/drivers/net/usb/net1080.c b/drivers/net/usb/net1080.c index 19bf8dae70c9..034e8a73ca6b 100644 --- a/drivers/net/usb/net1080.c +++ b/drivers/net/usb/net1080.c @@ -28,11 +28,10 @@ #include #include #include +#include #include -#include "usbnet.h" - /* * Netchip 1080 driver ... http://www.netchip.com diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 45300939d185..08555f8b15f4 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -28,8 +28,7 @@ #include #include #include - -#include "usbnet.h" +#include /* diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 0606e11510ae..a61324757b17 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -29,9 +29,8 @@ #include #include #include - -#include "usbnet.h" -#include "rndis_host.h" +#include +#include /* diff --git a/drivers/net/usb/rndis_host.h b/drivers/net/usb/rndis_host.h deleted file mode 100644 index edc1d4a0e272..000000000000 --- a/drivers/net/usb/rndis_host.h +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Host Side support for RNDIS Networking Links - * Copyright (C) 2005 by David Brownell - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - - -#ifndef __RNDIS_HOST_H -#define __RNDIS_HOST_H - - -/* - * CONTROL uses CDC "encapsulated commands" with funky notifications. - * - control-out: SEND_ENCAPSULATED - * - interrupt-in: RESPONSE_AVAILABLE - * - control-in: GET_ENCAPSULATED - * - * We'll try to ignore the RESPONSE_AVAILABLE notifications. - * - * REVISIT some RNDIS implementations seem to have curious issues still - * to be resolved. - */ -struct rndis_msg_hdr { - __le32 msg_type; /* RNDIS_MSG_* */ - __le32 msg_len; - // followed by data that varies between messages - __le32 request_id; - __le32 status; - // ... and more -} __attribute__ ((packed)); - -/* MS-Windows uses this strange size, but RNDIS spec says 1024 minimum */ -#define CONTROL_BUFFER_SIZE 1025 - -/* RNDIS defines an (absurdly huge) 10 second control timeout, - * but ActiveSync seems to use a more usual 5 second timeout - * (which matches the USB 2.0 spec). - */ -#define RNDIS_CONTROL_TIMEOUT_MS (5 * 1000) - - -#define ccpu2 __constant_cpu_to_le32 - -#define RNDIS_MSG_COMPLETION ccpu2(0x80000000) - -/* codes for "msg_type" field of rndis messages; - * only the data channel uses packet messages (maybe batched); - * everything else goes on the control channel. - */ -#define RNDIS_MSG_PACKET ccpu2(0x00000001) /* 1-N packets */ -#define RNDIS_MSG_INIT ccpu2(0x00000002) -#define RNDIS_MSG_INIT_C (RNDIS_MSG_INIT|RNDIS_MSG_COMPLETION) -#define RNDIS_MSG_HALT ccpu2(0x00000003) -#define RNDIS_MSG_QUERY ccpu2(0x00000004) -#define RNDIS_MSG_QUERY_C (RNDIS_MSG_QUERY|RNDIS_MSG_COMPLETION) -#define RNDIS_MSG_SET ccpu2(0x00000005) -#define RNDIS_MSG_SET_C (RNDIS_MSG_SET|RNDIS_MSG_COMPLETION) -#define RNDIS_MSG_RESET ccpu2(0x00000006) -#define RNDIS_MSG_RESET_C (RNDIS_MSG_RESET|RNDIS_MSG_COMPLETION) -#define RNDIS_MSG_INDICATE ccpu2(0x00000007) -#define RNDIS_MSG_KEEPALIVE ccpu2(0x00000008) -#define RNDIS_MSG_KEEPALIVE_C (RNDIS_MSG_KEEPALIVE|RNDIS_MSG_COMPLETION) - -/* codes for "status" field of completion messages */ -#define RNDIS_STATUS_SUCCESS ccpu2(0x00000000) -#define RNDIS_STATUS_FAILURE ccpu2(0xc0000001) -#define RNDIS_STATUS_INVALID_DATA ccpu2(0xc0010015) -#define RNDIS_STATUS_NOT_SUPPORTED ccpu2(0xc00000bb) -#define RNDIS_STATUS_MEDIA_CONNECT ccpu2(0x4001000b) -#define RNDIS_STATUS_MEDIA_DISCONNECT ccpu2(0x4001000c) - -/* codes for OID_GEN_PHYSICAL_MEDIUM */ -#define RNDIS_PHYSICAL_MEDIUM_UNSPECIFIED ccpu2(0x00000000) -#define RNDIS_PHYSICAL_MEDIUM_WIRELESS_LAN ccpu2(0x00000001) -#define RNDIS_PHYSICAL_MEDIUM_CABLE_MODEM ccpu2(0x00000002) -#define RNDIS_PHYSICAL_MEDIUM_PHONE_LINE ccpu2(0x00000003) -#define RNDIS_PHYSICAL_MEDIUM_POWER_LINE ccpu2(0x00000004) -#define RNDIS_PHYSICAL_MEDIUM_DSL ccpu2(0x00000005) -#define RNDIS_PHYSICAL_MEDIUM_FIBRE_CHANNEL ccpu2(0x00000006) -#define RNDIS_PHYSICAL_MEDIUM_1394 ccpu2(0x00000007) -#define RNDIS_PHYSICAL_MEDIUM_WIRELESS_WAN ccpu2(0x00000008) -#define RNDIS_PHYSICAL_MEDIUM_MAX ccpu2(0x00000009) - -struct rndis_data_hdr { - __le32 msg_type; /* RNDIS_MSG_PACKET */ - __le32 msg_len; // rndis_data_hdr + data_len + pad - __le32 data_offset; // 36 -- right after header - __le32 data_len; // ... real packet size - - __le32 oob_data_offset; // zero - __le32 oob_data_len; // zero - __le32 num_oob; // zero - __le32 packet_data_offset; // zero - - __le32 packet_data_len; // zero - __le32 vc_handle; // zero - __le32 reserved; // zero -} __attribute__ ((packed)); - -struct rndis_init { /* OUT */ - // header and: - __le32 msg_type; /* RNDIS_MSG_INIT */ - __le32 msg_len; // 24 - __le32 request_id; - __le32 major_version; // of rndis (1.0) - __le32 minor_version; - __le32 max_transfer_size; -} __attribute__ ((packed)); - -struct rndis_init_c { /* IN */ - // header and: - __le32 msg_type; /* RNDIS_MSG_INIT_C */ - __le32 msg_len; - __le32 request_id; - __le32 status; - __le32 major_version; // of rndis (1.0) - __le32 minor_version; - __le32 device_flags; - __le32 medium; // zero == 802.3 - __le32 max_packets_per_message; - __le32 max_transfer_size; - __le32 packet_alignment; // max 7; (1< #include #include - -#include "usbnet.h" +#include #define DRIVER_VERSION "22-Aug-2005" diff --git a/drivers/net/usb/usbnet.h b/drivers/net/usb/usbnet.h deleted file mode 100644 index e0501da3dd11..000000000000 --- a/drivers/net/usb/usbnet.h +++ /dev/null @@ -1,214 +0,0 @@ -/* - * USB Networking Link Interface - * - * Copyright (C) 2000-2005 by David Brownell - * Copyright (C) 2003-2005 David Hollis - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - - -#ifndef __USBNET_H -#define __USBNET_H - - -/* interface from usbnet core to each USB networking link we handle */ -struct usbnet { - /* housekeeping */ - struct usb_device *udev; - struct usb_interface *intf; - struct driver_info *driver_info; - const char *driver_name; - void *driver_priv; - wait_queue_head_t *wait; - struct mutex phy_mutex; - unsigned char suspend_count; - - /* i/o info: pipes etc */ - unsigned in, out; - struct usb_host_endpoint *status; - unsigned maxpacket; - struct timer_list delay; - - /* protocol/interface state */ - struct net_device *net; - struct net_device_stats stats; - int msg_enable; - unsigned long data [5]; - u32 xid; - u32 hard_mtu; /* count any extra framing */ - size_t rx_urb_size; /* size for rx urbs */ - struct mii_if_info mii; - - /* various kinds of pending driver work */ - struct sk_buff_head rxq; - struct sk_buff_head txq; - struct sk_buff_head done; - struct urb *interrupt; - struct tasklet_struct bh; - - struct work_struct kevent; - unsigned long flags; -# define EVENT_TX_HALT 0 -# define EVENT_RX_HALT 1 -# define EVENT_RX_MEMORY 2 -# define EVENT_STS_SPLIT 3 -# define EVENT_LINK_RESET 4 -}; - -static inline struct usb_driver *driver_of(struct usb_interface *intf) -{ - return to_usb_driver(intf->dev.driver); -} - -/* interface from the device/framing level "minidriver" to core */ -struct driver_info { - char *description; - - int flags; -/* framing is CDC Ethernet, not writing ZLPs (hw issues), or optionally: */ -#define FLAG_FRAMING_NC 0x0001 /* guard against device dropouts */ -#define FLAG_FRAMING_GL 0x0002 /* genelink batches packets */ -#define FLAG_FRAMING_Z 0x0004 /* zaurus adds a trailer */ -#define FLAG_FRAMING_RN 0x0008 /* RNDIS batches, plus huge header */ - -#define FLAG_NO_SETINT 0x0010 /* device can't set_interface() */ -#define FLAG_ETHER 0x0020 /* maybe use "eth%d" names */ - -#define FLAG_FRAMING_AX 0x0040 /* AX88772/178 packets */ -#define FLAG_WLAN 0x0080 /* use "wlan%d" names */ - - - /* init device ... can sleep, or cause probe() failure */ - int (*bind)(struct usbnet *, struct usb_interface *); - - /* cleanup device ... can sleep, but can't fail */ - void (*unbind)(struct usbnet *, struct usb_interface *); - - /* reset device ... can sleep */ - int (*reset)(struct usbnet *); - - /* see if peer is connected ... can sleep */ - int (*check_connect)(struct usbnet *); - - /* for status polling */ - void (*status)(struct usbnet *, struct urb *); - - /* link reset handling, called from defer_kevent */ - int (*link_reset)(struct usbnet *); - - /* fixup rx packet (strip framing) */ - int (*rx_fixup)(struct usbnet *dev, struct sk_buff *skb); - - /* fixup tx packet (add framing) */ - struct sk_buff *(*tx_fixup)(struct usbnet *dev, - struct sk_buff *skb, gfp_t flags); - - /* early initialization code, can sleep. This is for minidrivers - * having 'subminidrivers' that need to do extra initialization - * right after minidriver have initialized hardware. */ - int (*early_init)(struct usbnet *dev); - - /* called by minidriver when link state changes, state: 0=disconnect, - * 1=connect */ - void (*link_change)(struct usbnet *dev, int state); - - /* for new devices, use the descriptor-reading code instead */ - int in; /* rx endpoint */ - int out; /* tx endpoint */ - - unsigned long data; /* Misc driver specific data */ -}; - -/* Minidrivers are just drivers using the "usbnet" core as a powerful - * network-specific subroutine library ... that happens to do pretty - * much everything except custom framing and chip-specific stuff. - */ -extern int usbnet_probe(struct usb_interface *, const struct usb_device_id *); -extern int usbnet_suspend (struct usb_interface *, pm_message_t ); -extern int usbnet_resume (struct usb_interface *); -extern void usbnet_disconnect(struct usb_interface *); - - -/* Drivers that reuse some of the standard USB CDC infrastructure - * (notably, using multiple interfaces according to the CDC - * union descriptor) get some helper code. - */ -struct cdc_state { - struct usb_cdc_header_desc *header; - struct usb_cdc_union_desc *u; - struct usb_cdc_ether_desc *ether; - struct usb_interface *control; - struct usb_interface *data; -}; - -extern int usbnet_generic_cdc_bind (struct usbnet *, struct usb_interface *); -extern void usbnet_cdc_unbind (struct usbnet *, struct usb_interface *); - -/* CDC and RNDIS support the same host-chosen packet filters for IN transfers */ -#define DEFAULT_FILTER (USB_CDC_PACKET_TYPE_BROADCAST \ - |USB_CDC_PACKET_TYPE_ALL_MULTICAST \ - |USB_CDC_PACKET_TYPE_PROMISCUOUS \ - |USB_CDC_PACKET_TYPE_DIRECTED) - - -/* we record the state for each of our queued skbs */ -enum skb_state { - illegal = 0, - tx_start, tx_done, - rx_start, rx_done, rx_cleanup -}; - -struct skb_data { /* skb->cb is one of these */ - struct urb *urb; - struct usbnet *dev; - enum skb_state state; - size_t length; -}; - - -extern int usbnet_get_endpoints(struct usbnet *, struct usb_interface *); -extern void usbnet_defer_kevent (struct usbnet *, int); -extern void usbnet_skb_return (struct usbnet *, struct sk_buff *); -extern void usbnet_unlink_rx_urbs(struct usbnet *); - -extern int usbnet_get_settings (struct net_device *net, struct ethtool_cmd *cmd); -extern int usbnet_set_settings (struct net_device *net, struct ethtool_cmd *cmd); -extern u32 usbnet_get_link (struct net_device *net); -extern u32 usbnet_get_msglevel (struct net_device *); -extern void usbnet_set_msglevel (struct net_device *, u32); -extern void usbnet_get_drvinfo (struct net_device *, struct ethtool_drvinfo *); -extern int usbnet_nway_reset(struct net_device *net); - -/* messaging support includes the interface name, so it must not be - * used before it has one ... notably, in minidriver bind() calls. - */ -#ifdef DEBUG -#define devdbg(usbnet, fmt, arg...) \ - printk(KERN_DEBUG "%s: " fmt "\n" , (usbnet)->net->name , ## arg) -#else -#define devdbg(usbnet, fmt, arg...) do {} while(0) -#endif - -#define deverr(usbnet, fmt, arg...) \ - printk(KERN_ERR "%s: " fmt "\n" , (usbnet)->net->name , ## arg) -#define devwarn(usbnet, fmt, arg...) \ - printk(KERN_WARNING "%s: " fmt "\n" , (usbnet)->net->name , ## arg) - -#define devinfo(usbnet, fmt, arg...) \ - printk(KERN_INFO "%s: " fmt "\n" , (usbnet)->net->name , ## arg); \ - - -#endif /* __USBNET_H */ diff --git a/drivers/net/usb/zaurus.c b/drivers/net/usb/zaurus.c index 9f98e8ce487a..e24f7b3ace4b 100644 --- a/drivers/net/usb/zaurus.c +++ b/drivers/net/usb/zaurus.c @@ -29,8 +29,7 @@ #include #include #include - -#include "usbnet.h" +#include /* diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h new file mode 100644 index 000000000000..edc1d4a0e272 --- /dev/null +++ b/include/linux/usb/rndis_host.h @@ -0,0 +1,274 @@ +/* + * Host Side support for RNDIS Networking Links + * Copyright (C) 2005 by David Brownell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#ifndef __RNDIS_HOST_H +#define __RNDIS_HOST_H + + +/* + * CONTROL uses CDC "encapsulated commands" with funky notifications. + * - control-out: SEND_ENCAPSULATED + * - interrupt-in: RESPONSE_AVAILABLE + * - control-in: GET_ENCAPSULATED + * + * We'll try to ignore the RESPONSE_AVAILABLE notifications. + * + * REVISIT some RNDIS implementations seem to have curious issues still + * to be resolved. + */ +struct rndis_msg_hdr { + __le32 msg_type; /* RNDIS_MSG_* */ + __le32 msg_len; + // followed by data that varies between messages + __le32 request_id; + __le32 status; + // ... and more +} __attribute__ ((packed)); + +/* MS-Windows uses this strange size, but RNDIS spec says 1024 minimum */ +#define CONTROL_BUFFER_SIZE 1025 + +/* RNDIS defines an (absurdly huge) 10 second control timeout, + * but ActiveSync seems to use a more usual 5 second timeout + * (which matches the USB 2.0 spec). + */ +#define RNDIS_CONTROL_TIMEOUT_MS (5 * 1000) + + +#define ccpu2 __constant_cpu_to_le32 + +#define RNDIS_MSG_COMPLETION ccpu2(0x80000000) + +/* codes for "msg_type" field of rndis messages; + * only the data channel uses packet messages (maybe batched); + * everything else goes on the control channel. + */ +#define RNDIS_MSG_PACKET ccpu2(0x00000001) /* 1-N packets */ +#define RNDIS_MSG_INIT ccpu2(0x00000002) +#define RNDIS_MSG_INIT_C (RNDIS_MSG_INIT|RNDIS_MSG_COMPLETION) +#define RNDIS_MSG_HALT ccpu2(0x00000003) +#define RNDIS_MSG_QUERY ccpu2(0x00000004) +#define RNDIS_MSG_QUERY_C (RNDIS_MSG_QUERY|RNDIS_MSG_COMPLETION) +#define RNDIS_MSG_SET ccpu2(0x00000005) +#define RNDIS_MSG_SET_C (RNDIS_MSG_SET|RNDIS_MSG_COMPLETION) +#define RNDIS_MSG_RESET ccpu2(0x00000006) +#define RNDIS_MSG_RESET_C (RNDIS_MSG_RESET|RNDIS_MSG_COMPLETION) +#define RNDIS_MSG_INDICATE ccpu2(0x00000007) +#define RNDIS_MSG_KEEPALIVE ccpu2(0x00000008) +#define RNDIS_MSG_KEEPALIVE_C (RNDIS_MSG_KEEPALIVE|RNDIS_MSG_COMPLETION) + +/* codes for "status" field of completion messages */ +#define RNDIS_STATUS_SUCCESS ccpu2(0x00000000) +#define RNDIS_STATUS_FAILURE ccpu2(0xc0000001) +#define RNDIS_STATUS_INVALID_DATA ccpu2(0xc0010015) +#define RNDIS_STATUS_NOT_SUPPORTED ccpu2(0xc00000bb) +#define RNDIS_STATUS_MEDIA_CONNECT ccpu2(0x4001000b) +#define RNDIS_STATUS_MEDIA_DISCONNECT ccpu2(0x4001000c) + +/* codes for OID_GEN_PHYSICAL_MEDIUM */ +#define RNDIS_PHYSICAL_MEDIUM_UNSPECIFIED ccpu2(0x00000000) +#define RNDIS_PHYSICAL_MEDIUM_WIRELESS_LAN ccpu2(0x00000001) +#define RNDIS_PHYSICAL_MEDIUM_CABLE_MODEM ccpu2(0x00000002) +#define RNDIS_PHYSICAL_MEDIUM_PHONE_LINE ccpu2(0x00000003) +#define RNDIS_PHYSICAL_MEDIUM_POWER_LINE ccpu2(0x00000004) +#define RNDIS_PHYSICAL_MEDIUM_DSL ccpu2(0x00000005) +#define RNDIS_PHYSICAL_MEDIUM_FIBRE_CHANNEL ccpu2(0x00000006) +#define RNDIS_PHYSICAL_MEDIUM_1394 ccpu2(0x00000007) +#define RNDIS_PHYSICAL_MEDIUM_WIRELESS_WAN ccpu2(0x00000008) +#define RNDIS_PHYSICAL_MEDIUM_MAX ccpu2(0x00000009) + +struct rndis_data_hdr { + __le32 msg_type; /* RNDIS_MSG_PACKET */ + __le32 msg_len; // rndis_data_hdr + data_len + pad + __le32 data_offset; // 36 -- right after header + __le32 data_len; // ... real packet size + + __le32 oob_data_offset; // zero + __le32 oob_data_len; // zero + __le32 num_oob; // zero + __le32 packet_data_offset; // zero + + __le32 packet_data_len; // zero + __le32 vc_handle; // zero + __le32 reserved; // zero +} __attribute__ ((packed)); + +struct rndis_init { /* OUT */ + // header and: + __le32 msg_type; /* RNDIS_MSG_INIT */ + __le32 msg_len; // 24 + __le32 request_id; + __le32 major_version; // of rndis (1.0) + __le32 minor_version; + __le32 max_transfer_size; +} __attribute__ ((packed)); + +struct rndis_init_c { /* IN */ + // header and: + __le32 msg_type; /* RNDIS_MSG_INIT_C */ + __le32 msg_len; + __le32 request_id; + __le32 status; + __le32 major_version; // of rndis (1.0) + __le32 minor_version; + __le32 device_flags; + __le32 medium; // zero == 802.3 + __le32 max_packets_per_message; + __le32 max_transfer_size; + __le32 packet_alignment; // max 7; (1< + * Copyright (C) 2003-2005 David Hollis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#ifndef __USBNET_H +#define __USBNET_H + + +/* interface from usbnet core to each USB networking link we handle */ +struct usbnet { + /* housekeeping */ + struct usb_device *udev; + struct usb_interface *intf; + struct driver_info *driver_info; + const char *driver_name; + void *driver_priv; + wait_queue_head_t *wait; + struct mutex phy_mutex; + unsigned char suspend_count; + + /* i/o info: pipes etc */ + unsigned in, out; + struct usb_host_endpoint *status; + unsigned maxpacket; + struct timer_list delay; + + /* protocol/interface state */ + struct net_device *net; + struct net_device_stats stats; + int msg_enable; + unsigned long data [5]; + u32 xid; + u32 hard_mtu; /* count any extra framing */ + size_t rx_urb_size; /* size for rx urbs */ + struct mii_if_info mii; + + /* various kinds of pending driver work */ + struct sk_buff_head rxq; + struct sk_buff_head txq; + struct sk_buff_head done; + struct urb *interrupt; + struct tasklet_struct bh; + + struct work_struct kevent; + unsigned long flags; +# define EVENT_TX_HALT 0 +# define EVENT_RX_HALT 1 +# define EVENT_RX_MEMORY 2 +# define EVENT_STS_SPLIT 3 +# define EVENT_LINK_RESET 4 +}; + +static inline struct usb_driver *driver_of(struct usb_interface *intf) +{ + return to_usb_driver(intf->dev.driver); +} + +/* interface from the device/framing level "minidriver" to core */ +struct driver_info { + char *description; + + int flags; +/* framing is CDC Ethernet, not writing ZLPs (hw issues), or optionally: */ +#define FLAG_FRAMING_NC 0x0001 /* guard against device dropouts */ +#define FLAG_FRAMING_GL 0x0002 /* genelink batches packets */ +#define FLAG_FRAMING_Z 0x0004 /* zaurus adds a trailer */ +#define FLAG_FRAMING_RN 0x0008 /* RNDIS batches, plus huge header */ + +#define FLAG_NO_SETINT 0x0010 /* device can't set_interface() */ +#define FLAG_ETHER 0x0020 /* maybe use "eth%d" names */ + +#define FLAG_FRAMING_AX 0x0040 /* AX88772/178 packets */ +#define FLAG_WLAN 0x0080 /* use "wlan%d" names */ + + + /* init device ... can sleep, or cause probe() failure */ + int (*bind)(struct usbnet *, struct usb_interface *); + + /* cleanup device ... can sleep, but can't fail */ + void (*unbind)(struct usbnet *, struct usb_interface *); + + /* reset device ... can sleep */ + int (*reset)(struct usbnet *); + + /* see if peer is connected ... can sleep */ + int (*check_connect)(struct usbnet *); + + /* for status polling */ + void (*status)(struct usbnet *, struct urb *); + + /* link reset handling, called from defer_kevent */ + int (*link_reset)(struct usbnet *); + + /* fixup rx packet (strip framing) */ + int (*rx_fixup)(struct usbnet *dev, struct sk_buff *skb); + + /* fixup tx packet (add framing) */ + struct sk_buff *(*tx_fixup)(struct usbnet *dev, + struct sk_buff *skb, gfp_t flags); + + /* early initialization code, can sleep. This is for minidrivers + * having 'subminidrivers' that need to do extra initialization + * right after minidriver have initialized hardware. */ + int (*early_init)(struct usbnet *dev); + + /* called by minidriver when link state changes, state: 0=disconnect, + * 1=connect */ + void (*link_change)(struct usbnet *dev, int state); + + /* for new devices, use the descriptor-reading code instead */ + int in; /* rx endpoint */ + int out; /* tx endpoint */ + + unsigned long data; /* Misc driver specific data */ +}; + +/* Minidrivers are just drivers using the "usbnet" core as a powerful + * network-specific subroutine library ... that happens to do pretty + * much everything except custom framing and chip-specific stuff. + */ +extern int usbnet_probe(struct usb_interface *, const struct usb_device_id *); +extern int usbnet_suspend (struct usb_interface *, pm_message_t ); +extern int usbnet_resume (struct usb_interface *); +extern void usbnet_disconnect(struct usb_interface *); + + +/* Drivers that reuse some of the standard USB CDC infrastructure + * (notably, using multiple interfaces according to the CDC + * union descriptor) get some helper code. + */ +struct cdc_state { + struct usb_cdc_header_desc *header; + struct usb_cdc_union_desc *u; + struct usb_cdc_ether_desc *ether; + struct usb_interface *control; + struct usb_interface *data; +}; + +extern int usbnet_generic_cdc_bind (struct usbnet *, struct usb_interface *); +extern void usbnet_cdc_unbind (struct usbnet *, struct usb_interface *); + +/* CDC and RNDIS support the same host-chosen packet filters for IN transfers */ +#define DEFAULT_FILTER (USB_CDC_PACKET_TYPE_BROADCAST \ + |USB_CDC_PACKET_TYPE_ALL_MULTICAST \ + |USB_CDC_PACKET_TYPE_PROMISCUOUS \ + |USB_CDC_PACKET_TYPE_DIRECTED) + + +/* we record the state for each of our queued skbs */ +enum skb_state { + illegal = 0, + tx_start, tx_done, + rx_start, rx_done, rx_cleanup +}; + +struct skb_data { /* skb->cb is one of these */ + struct urb *urb; + struct usbnet *dev; + enum skb_state state; + size_t length; +}; + + +extern int usbnet_get_endpoints(struct usbnet *, struct usb_interface *); +extern void usbnet_defer_kevent (struct usbnet *, int); +extern void usbnet_skb_return (struct usbnet *, struct sk_buff *); +extern void usbnet_unlink_rx_urbs(struct usbnet *); + +extern int usbnet_get_settings (struct net_device *net, struct ethtool_cmd *cmd); +extern int usbnet_set_settings (struct net_device *net, struct ethtool_cmd *cmd); +extern u32 usbnet_get_link (struct net_device *net); +extern u32 usbnet_get_msglevel (struct net_device *); +extern void usbnet_set_msglevel (struct net_device *, u32); +extern void usbnet_get_drvinfo (struct net_device *, struct ethtool_drvinfo *); +extern int usbnet_nway_reset(struct net_device *net); + +/* messaging support includes the interface name, so it must not be + * used before it has one ... notably, in minidriver bind() calls. + */ +#ifdef DEBUG +#define devdbg(usbnet, fmt, arg...) \ + printk(KERN_DEBUG "%s: " fmt "\n" , (usbnet)->net->name , ## arg) +#else +#define devdbg(usbnet, fmt, arg...) do {} while(0) +#endif + +#define deverr(usbnet, fmt, arg...) \ + printk(KERN_ERR "%s: " fmt "\n" , (usbnet)->net->name , ## arg) +#define devwarn(usbnet, fmt, arg...) \ + printk(KERN_WARNING "%s: " fmt "\n" , (usbnet)->net->name , ## arg) + +#define devinfo(usbnet, fmt, arg...) \ + printk(KERN_INFO "%s: " fmt "\n" , (usbnet)->net->name , ## arg); \ + + +#endif /* __USBNET_H */ -- cgit v1.2.3 From 38320c70d282be1997a5204c7c7fe14c3aa6bfaa Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Jan 2008 19:35:05 -0800 Subject: [IPSEC]: Use crypto_aead and authenc in ESP This patch converts ESP to use the crypto_aead interface and in particular the authenc algorithm. This lays the foundations for future support of combined mode algorithms. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/esp.h | 54 +----- net/ipv4/Kconfig | 1 + net/ipv4/esp4.c | 497 +++++++++++++++++++++++++++++++++--------------------- net/ipv6/Kconfig | 1 + net/ipv6/esp6.c | 460 ++++++++++++++++++++++++++++++-------------------- 5 files changed, 591 insertions(+), 422 deletions(-) (limited to 'include') diff --git a/include/net/esp.h b/include/net/esp.h index c05f529bff28..d58451331dbd 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -1,58 +1,20 @@ #ifndef _NET_ESP_H #define _NET_ESP_H -#include -#include -#include +#include -#define ESP_NUM_FAST_SG 4 +struct crypto_aead; -struct esp_data -{ - struct scatterlist sgbuf[ESP_NUM_FAST_SG]; - - /* Confidentiality */ - struct { - int padlen; /* 0..255 */ - /* ivlen is offset from enc_data, where encrypted data start. - * It is logically different of crypto_tfm_alg_ivsize(tfm). - * We assume that it is either zero (no ivec), or - * >= crypto_tfm_alg_ivsize(tfm). */ - int ivlen; - int ivinitted; - u8 *ivec; /* ivec buffer */ - struct crypto_blkcipher *tfm; /* crypto handle */ - } conf; - - /* Integrity. It is active when icv_full_len != 0 */ - struct { - u8 *work_icv; - int icv_full_len; - int icv_trunc_len; - struct crypto_hash *tfm; - } auth; +struct esp_data { + /* 0..255 */ + int padlen; + + /* Confidentiality & Integrity */ + struct crypto_aead *aead; }; extern void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len); -static inline int esp_mac_digest(struct esp_data *esp, struct sk_buff *skb, - int offset, int len) -{ - struct hash_desc desc; - int err; - - desc.tfm = esp->auth.tfm; - desc.flags = 0; - - err = crypto_hash_init(&desc); - if (unlikely(err)) - return err; - err = skb_icv_walk(skb, &desc, offset, len, crypto_hash_update); - if (unlikely(err)) - return err; - return crypto_hash_final(&desc, esp->auth.work_icv); -} - struct ip_esp_hdr; static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 24e2b7294bf8..19880b086e71 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -343,6 +343,7 @@ config INET_ESP tristate "IP: ESP transformation" select XFRM select CRYPTO + select CRYPTO_AEAD select CRYPTO_HMAC select CRYPTO_MD5 select CRYPTO_CBC diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 28ea5c77ca23..c4047223bfbe 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1,27 +1,118 @@ +#include +#include #include #include #include #include #include #include -#include #include #include -#include +#include +#include #include #include #include #include #include +struct esp_skb_cb { + struct xfrm_skb_cb xfrm; + void *tmp; +}; + +#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) + +/* + * Allocate an AEAD request structure with extra space for SG and IV. + * + * For alignment considerations the IV is placed at the front, followed + * by the request and finally the SG list. + * + * TODO: Use spare space in skb for this where possible. + */ +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) +{ + unsigned int len; + + len = crypto_aead_ivsize(aead); + if (len) { + len += crypto_aead_alignmask(aead) & + ~(crypto_tfm_ctx_alignment() - 1); + len = ALIGN(len, crypto_tfm_ctx_alignment()); + } + + len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead); + len = ALIGN(len, __alignof__(struct scatterlist)); + + len += sizeof(struct scatterlist) * nfrags; + + return kmalloc(len, GFP_ATOMIC); +} + +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) +{ + return crypto_aead_ivsize(aead) ? + PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; +} + +static inline struct aead_givcrypt_request *esp_tmp_givreq( + struct crypto_aead *aead, u8 *iv) +{ + struct aead_givcrypt_request *req; + + req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead), + crypto_tfm_ctx_alignment()); + aead_givcrypt_set_tfm(req, aead); + return req; +} + +static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) +{ + struct aead_request *req; + + req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead), + crypto_tfm_ctx_alignment()); + aead_request_set_tfm(req, aead); + return req; +} + +static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, + struct aead_request *req) +{ + return (void *)ALIGN((unsigned long)(req + 1) + + crypto_aead_reqsize(aead), + __alignof__(struct scatterlist)); +} + +static inline struct scatterlist *esp_givreq_sg( + struct crypto_aead *aead, struct aead_givcrypt_request *req) +{ + return (void *)ALIGN((unsigned long)(req + 1) + + crypto_aead_reqsize(aead), + __alignof__(struct scatterlist)); +} + +static void esp_output_done(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + + kfree(ESP_SKB_CB(skb)->tmp); + xfrm_output_resume(skb, err); +} + static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { int err; struct ip_esp_hdr *esph; - struct crypto_blkcipher *tfm; - struct blkcipher_desc desc; + struct crypto_aead *aead; + struct aead_givcrypt_request *req; + struct scatterlist *sg; + struct scatterlist *asg; struct esp_data *esp; struct sk_buff *trailer; + void *tmp; + u8 *iv; u8 *tail; int blksize; int clen; @@ -36,18 +127,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) clen = skb->len; esp = x->data; - alen = esp->auth.icv_trunc_len; - tfm = esp->conf.tfm; - desc.tfm = tfm; - desc.flags = 0; - blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); + aead = esp->aead; + alen = crypto_aead_authsize(aead); + + blksize = ALIGN(crypto_aead_blocksize(aead), 4); clen = ALIGN(clen + 2, blksize); - if (esp->conf.padlen) - clen = ALIGN(clen, esp->conf.padlen); + if (esp->padlen) + clen = ALIGN(clen, esp->padlen); + + if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0) + goto error; + nfrags = err; - if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) + tmp = esp_alloc_tmp(aead, nfrags + 1); + if (!tmp) goto error; + iv = esp_tmp_iv(aead, tmp); + req = esp_tmp_givreq(aead, iv); + asg = esp_givreq_sg(aead, req); + sg = asg + 1; + /* Fill padding... */ tail = skb_tail_pointer(trailer); do { @@ -56,28 +156,34 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) tail[i] = i + 1; } while (0); tail[clen - skb->len - 2] = (clen - skb->len) - 2; - pskb_put(skb, trailer, clen - skb->len); + tail[clen - skb->len - 1] = *skb_mac_header(skb); + pskb_put(skb, trailer, clen - skb->len + alen); skb_push(skb, -skb_network_offset(skb)); esph = ip_esp_hdr(skb); - *(skb_tail_pointer(trailer) - 1) = *skb_mac_header(skb); *skb_mac_header(skb) = IPPROTO_ESP; - spin_lock_bh(&x->lock); - /* this is non-NULL only with UDP Encapsulation */ if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; struct udphdr *uh; __be32 *udpdata32; + unsigned int sport, dport; + int encap_type; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + encap_type = encap->encap_type; + spin_unlock_bh(&x->lock); uh = (struct udphdr *)esph; - uh->source = encap->encap_sport; - uh->dest = encap->encap_dport; - uh->len = htons(skb->len + alen - skb_transport_offset(skb)); + uh->source = sport; + uh->dest = dport; + uh->len = htons(skb->len - skb_transport_offset(skb)); uh->check = 0; - switch (encap->encap_type) { + switch (encap_type) { default: case UDP_ENCAP_ESPINUDP: esph = (struct ip_esp_hdr *)(uh + 1); @@ -95,131 +201,45 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esph->spi = x->id.spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); - if (esp->conf.ivlen) { - if (unlikely(!esp->conf.ivinitted)) { - get_random_bytes(esp->conf.ivec, esp->conf.ivlen); - esp->conf.ivinitted = 1; - } - crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); - } - - do { - struct scatterlist *sg = &esp->sgbuf[0]; - - if (unlikely(nfrags > ESP_NUM_FAST_SG)) { - sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); - if (!sg) - goto unlock; - } - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, - esph->enc_data + - esp->conf.ivlen - - skb->data, clen); - err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); - if (unlikely(sg != &esp->sgbuf[0])) - kfree(sg); - } while (0); - - if (unlikely(err)) - goto unlock; - - if (esp->conf.ivlen) { - memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); - crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen); - } + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, + esph->enc_data + crypto_aead_ivsize(aead) - skb->data, + clen + alen); + sg_init_one(asg, esph, sizeof(*esph)); + + aead_givcrypt_set_callback(req, 0, esp_output_done, skb); + aead_givcrypt_set_crypt(req, sg, sg, clen, iv); + aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); + aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq); + + ESP_SKB_CB(skb)->tmp = tmp; + err = crypto_aead_givencrypt(req); + if (err == -EINPROGRESS) + goto error; - if (esp->auth.icv_full_len) { - err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, - sizeof(*esph) + esp->conf.ivlen + clen); - memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); - } + if (err == -EBUSY) + err = NET_XMIT_DROP; -unlock: - spin_unlock_bh(&x->lock); + kfree(tmp); error: return err; } -/* - * Note: detecting truncated vs. non-truncated authentication data is very - * expensive, so we only support truncated data, which is the recommended - * and common case. - */ -static int esp_input(struct xfrm_state *x, struct sk_buff *skb) +static int esp_input_done2(struct sk_buff *skb, int err) { struct iphdr *iph; - struct ip_esp_hdr *esph; + struct xfrm_state *x = xfrm_input_state(skb); struct esp_data *esp = x->data; - struct crypto_blkcipher *tfm = esp->conf.tfm; - struct blkcipher_desc desc = { .tfm = tfm }; - struct sk_buff *trailer; - int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); - int alen = esp->auth.icv_trunc_len; - int elen = skb->len - sizeof(*esph) - esp->conf.ivlen - alen; - int nfrags; + struct crypto_aead *aead = esp->aead; + int alen = crypto_aead_authsize(aead); + int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); + int elen = skb->len - hlen; int ihl; u8 nexthdr[2]; - struct scatterlist *sg; int padlen; - int err = -EINVAL; - if (!pskb_may_pull(skb, sizeof(*esph))) - goto out; - - if (elen <= 0 || (elen & (blksize-1))) - goto out; - - if ((err = skb_cow_data(skb, 0, &trailer)) < 0) - goto out; - nfrags = err; - - skb->ip_summed = CHECKSUM_NONE; - - spin_lock(&x->lock); - - /* If integrity check is required, do this. */ - if (esp->auth.icv_full_len) { - u8 sum[alen]; - - err = esp_mac_digest(esp, skb, 0, skb->len - alen); - if (err) - goto unlock; - - if (skb_copy_bits(skb, skb->len - alen, sum, alen)) - BUG(); - - if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { - err = -EBADMSG; - goto unlock; - } - } - - esph = (struct ip_esp_hdr *)skb->data; - - /* Get ivec. This can be wrong, check against another impls. */ - if (esp->conf.ivlen) - crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); - - sg = &esp->sgbuf[0]; - - if (unlikely(nfrags > ESP_NUM_FAST_SG)) { - err = -ENOMEM; - sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); - if (!sg) - goto unlock; - } - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, - sizeof(*esph) + esp->conf.ivlen, - elen); - err = crypto_blkcipher_decrypt(&desc, sg, sg, elen); - if (unlikely(sg != &esp->sgbuf[0])) - kfree(sg); - -unlock: - spin_unlock(&x->lock); + kfree(ESP_SKB_CB(skb)->tmp); if (unlikely(err)) goto out; @@ -229,15 +249,11 @@ unlock: err = -EINVAL; padlen = nexthdr[0]; - if (padlen+2 >= elen) + if (padlen + 2 + alen >= elen) goto out; /* ... check padding bits here. Silly. :-) */ - /* RFC4303: Drop dummy packets without any error */ - if (nexthdr[1] == IPPROTO_NONE) - goto out; - iph = ip_hdr(skb); ihl = iph->ihl * 4; @@ -279,10 +295,87 @@ unlock: } pskb_trim(skb, skb->len - alen - padlen - 2); - __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen); + __skb_pull(skb, hlen); skb_set_transport_header(skb, -ihl); - return nexthdr[1]; + err = nexthdr[1]; + + /* RFC4303: Drop dummy packets without any error */ + if (err == IPPROTO_NONE) + err = -EINVAL; + +out: + return err; +} + +static void esp_input_done(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + + xfrm_input_resume(skb, esp_input_done2(skb, err)); +} + +/* + * Note: detecting truncated vs. non-truncated authentication data is very + * expensive, so we only support truncated data, which is the recommended + * and common case. + */ +static int esp_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ip_esp_hdr *esph; + struct esp_data *esp = x->data; + struct crypto_aead *aead = esp->aead; + struct aead_request *req; + struct sk_buff *trailer; + int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); + int nfrags; + void *tmp; + u8 *iv; + struct scatterlist *sg; + struct scatterlist *asg; + int err = -EINVAL; + + if (!pskb_may_pull(skb, sizeof(*esph))) + goto out; + + if (elen <= 0) + goto out; + + if ((err = skb_cow_data(skb, 0, &trailer)) < 0) + goto out; + nfrags = err; + + err = -ENOMEM; + tmp = esp_alloc_tmp(aead, nfrags + 1); + if (!tmp) + goto out; + + ESP_SKB_CB(skb)->tmp = tmp; + iv = esp_tmp_iv(aead, tmp); + req = esp_tmp_req(aead, iv); + asg = esp_req_sg(aead, req); + sg = asg + 1; + + skb->ip_summed = CHECKSUM_NONE; + + esph = (struct ip_esp_hdr *)skb->data; + + /* Get ivec. This can be wrong, check against another impls. */ + iv = esph->enc_data; + + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); + sg_init_one(asg, esph, sizeof(*esph)); + + aead_request_set_callback(req, 0, esp_input_done, skb); + aead_request_set_crypt(req, sg, sg, elen, iv); + aead_request_set_assoc(req, asg, sizeof(*esph)); + + err = crypto_aead_decrypt(req); + if (err == -EINPROGRESS) + goto out; + + err = esp_input_done2(skb, err); out: return err; @@ -291,11 +384,11 @@ out: static u32 esp4_get_mtu(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - u32 align = max_t(u32, blksize, esp->conf.padlen); + u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); + u32 align = max_t(u32, blksize, esp->padlen); u32 rem; - mtu -= x->props.header_len + esp->auth.icv_trunc_len; + mtu -= x->props.header_len + crypto_aead_authsize(esp->aead); rem = mtu & (align - 1); mtu &= ~(align - 1); @@ -342,80 +435,98 @@ static void esp_destroy(struct xfrm_state *x) if (!esp) return; - crypto_free_blkcipher(esp->conf.tfm); - esp->conf.tfm = NULL; - kfree(esp->conf.ivec); - esp->conf.ivec = NULL; - crypto_free_hash(esp->auth.tfm); - esp->auth.tfm = NULL; - kfree(esp->auth.work_icv); - esp->auth.work_icv = NULL; + crypto_free_aead(esp->aead); kfree(esp); } static int esp_init_state(struct xfrm_state *x) { struct esp_data *esp = NULL; - struct crypto_blkcipher *tfm; + struct crypto_aead *aead; + struct crypto_authenc_key_param *param; + struct rtattr *rta; + char *key; + char *p; + char authenc_name[CRYPTO_MAX_ALG_NAME]; u32 align; + unsigned int keylen; + int err; if (x->ealg == NULL) - goto error; + return -EINVAL; + + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; esp = kzalloc(sizeof(*esp), GFP_KERNEL); if (esp == NULL) return -ENOMEM; + x->data = esp; + + aead = crypto_alloc_aead(authenc_name, 0, 0); + err = PTR_ERR(aead); + if (IS_ERR(aead)) + goto error; + + esp->aead = aead; + + keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) + + (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param)); + err = -ENOMEM; + key = kmalloc(keylen, GFP_KERNEL); + if (!key) + goto error; + + p = key; + rta = (void *)p; + rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM; + rta->rta_len = RTA_LENGTH(sizeof(*param)); + param = RTA_DATA(rta); + p += RTA_SPACE(sizeof(*param)); + if (x->aalg) { struct xfrm_algo_desc *aalg_desc; - struct crypto_hash *hash; - - hash = crypto_alloc_hash(x->aalg->alg_name, 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(hash)) - goto error; - esp->auth.tfm = hash; - if (crypto_hash_setkey(hash, x->aalg->alg_key, - (x->aalg->alg_key_len + 7) / 8)) - goto error; + memcpy(p, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8); + p += (x->aalg->alg_key_len + 7) / 8; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); + err = -EINVAL; if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_hash_digestsize(hash)) { + crypto_aead_authsize(aead)) { NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", x->aalg->alg_name, - crypto_hash_digestsize(hash), + crypto_aead_authsize(aead), aalg_desc->uinfo.auth.icv_fullbits/8); - goto error; + goto free_key; } - esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; - esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8; - - esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL); - if (!esp->auth.work_icv) - goto error; + err = crypto_aead_setauthsize( + aead, aalg_desc->uinfo.auth.icv_truncbits / 8); + if (err) + goto free_key; } - tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - goto error; - esp->conf.tfm = tfm; - esp->conf.ivlen = crypto_blkcipher_ivsize(tfm); - esp->conf.padlen = 0; - if (esp->conf.ivlen) { - esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); - if (unlikely(esp->conf.ivec == NULL)) - goto error; - esp->conf.ivinitted = 0; - } - if (crypto_blkcipher_setkey(tfm, x->ealg->alg_key, - (x->ealg->alg_key_len + 7) / 8)) + esp->padlen = 0; + + param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8); + memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8); + + err = crypto_aead_setkey(aead, key, keylen); + +free_key: + kfree(key); + + if (err) goto error; - x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; + + x->props.header_len = sizeof(struct ip_esp_hdr) + + crypto_aead_ivsize(aead); if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); else if (x->props.mode == XFRM_MODE_BEET) @@ -434,18 +545,14 @@ static int esp_init_state(struct xfrm_state *x) break; } } - x->data = esp; - align = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - if (esp->conf.padlen) - align = max_t(u32, align, esp->conf.padlen); - x->props.trailer_len = align + 1 + esp->auth.icv_trunc_len; - return 0; + + align = ALIGN(crypto_aead_blocksize(aead), 4); + if (esp->padlen) + align = max_t(u32, align, esp->padlen); + x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead); error: - x->data = esp; - esp_destroy(x); - x->data = NULL; - return -EINVAL; + return err; } static struct xfrm_type esp_type = diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index eb0b8085949b..3ffb0323668c 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -85,6 +85,7 @@ config INET6_ESP depends on IPV6 select XFRM select CRYPTO + select CRYPTO_AEAD select CRYPTO_HMAC select CRYPTO_MD5 select CRYPTO_CBC diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 5bd5292ad9fa..dc821acf3d33 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -24,33 +24,124 @@ * This file is derived from net/ipv4/esp.c */ +#include +#include #include #include #include #include #include #include -#include #include #include #include +#include #include #include #include #include #include +struct esp_skb_cb { + struct xfrm_skb_cb xfrm; + void *tmp; +}; + +#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) + +/* + * Allocate an AEAD request structure with extra space for SG and IV. + * + * For alignment considerations the IV is placed at the front, followed + * by the request and finally the SG list. + * + * TODO: Use spare space in skb for this where possible. + */ +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) +{ + unsigned int len; + + len = crypto_aead_ivsize(aead); + if (len) { + len += crypto_aead_alignmask(aead) & + ~(crypto_tfm_ctx_alignment() - 1); + len = ALIGN(len, crypto_tfm_ctx_alignment()); + } + + len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead); + len = ALIGN(len, __alignof__(struct scatterlist)); + + len += sizeof(struct scatterlist) * nfrags; + + return kmalloc(len, GFP_ATOMIC); +} + +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) +{ + return crypto_aead_ivsize(aead) ? + PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; +} + +static inline struct aead_givcrypt_request *esp_tmp_givreq( + struct crypto_aead *aead, u8 *iv) +{ + struct aead_givcrypt_request *req; + + req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead), + crypto_tfm_ctx_alignment()); + aead_givcrypt_set_tfm(req, aead); + return req; +} + +static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) +{ + struct aead_request *req; + + req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead), + crypto_tfm_ctx_alignment()); + aead_request_set_tfm(req, aead); + return req; +} + +static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, + struct aead_request *req) +{ + return (void *)ALIGN((unsigned long)(req + 1) + + crypto_aead_reqsize(aead), + __alignof__(struct scatterlist)); +} + +static inline struct scatterlist *esp_givreq_sg( + struct crypto_aead *aead, struct aead_givcrypt_request *req) +{ + return (void *)ALIGN((unsigned long)(req + 1) + + crypto_aead_reqsize(aead), + __alignof__(struct scatterlist)); +} + +static void esp_output_done(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + + kfree(ESP_SKB_CB(skb)->tmp); + xfrm_output_resume(skb, err); +} + static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) { int err; struct ip_esp_hdr *esph; - struct crypto_blkcipher *tfm; - struct blkcipher_desc desc; + struct crypto_aead *aead; + struct aead_givcrypt_request *req; + struct scatterlist *sg; + struct scatterlist *asg; struct sk_buff *trailer; + void *tmp; int blksize; int clen; int alen; int nfrags; + u8 *iv; u8 *tail; struct esp_data *esp = x->data; @@ -60,18 +151,26 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) /* Round to block size */ clen = skb->len; - alen = esp->auth.icv_trunc_len; - tfm = esp->conf.tfm; - desc.tfm = tfm; - desc.flags = 0; - blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); + aead = esp->aead; + alen = crypto_aead_authsize(aead); + + blksize = ALIGN(crypto_aead_blocksize(aead), 4); clen = ALIGN(clen + 2, blksize); - if (esp->conf.padlen) - clen = ALIGN(clen, esp->conf.padlen); + if (esp->padlen) + clen = ALIGN(clen, esp->padlen); - if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) { + if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0) goto error; - } + nfrags = err; + + tmp = esp_alloc_tmp(aead, nfrags + 1); + if (!tmp) + goto error; + + iv = esp_tmp_iv(aead, tmp); + req = esp_tmp_givreq(aead, iv); + asg = esp_givreq_sg(aead, req); + sg = asg + 1; /* Fill padding... */ tail = skb_tail_pointer(trailer); @@ -81,86 +180,113 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) tail[i] = i + 1; } while (0); tail[clen-skb->len - 2] = (clen - skb->len) - 2; - pskb_put(skb, trailer, clen - skb->len); + tail[clen - skb->len - 1] = *skb_mac_header(skb); + pskb_put(skb, trailer, clen - skb->len + alen); skb_push(skb, -skb_network_offset(skb)); esph = ip_esp_hdr(skb); - *(skb_tail_pointer(trailer) - 1) = *skb_mac_header(skb); *skb_mac_header(skb) = IPPROTO_ESP; esph->spi = x->id.spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq); - spin_lock_bh(&x->lock); + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, + esph->enc_data + crypto_aead_ivsize(aead) - skb->data, + clen + alen); + sg_init_one(asg, esph, sizeof(*esph)); - if (esp->conf.ivlen) { - if (unlikely(!esp->conf.ivinitted)) { - get_random_bytes(esp->conf.ivec, esp->conf.ivlen); - esp->conf.ivinitted = 1; - } - crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); - } + aead_givcrypt_set_callback(req, 0, esp_output_done, skb); + aead_givcrypt_set_crypt(req, sg, sg, clen, iv); + aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); + aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq); - do { - struct scatterlist *sg = &esp->sgbuf[0]; + ESP_SKB_CB(skb)->tmp = tmp; + err = crypto_aead_givencrypt(req); + if (err == -EINPROGRESS) + goto error; - if (unlikely(nfrags > ESP_NUM_FAST_SG)) { - sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); - if (!sg) - goto unlock; - } - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, - esph->enc_data + - esp->conf.ivlen - - skb->data, clen); - err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); - if (unlikely(sg != &esp->sgbuf[0])) - kfree(sg); - } while (0); + if (err == -EBUSY) + err = NET_XMIT_DROP; + + kfree(tmp); + +error: + return err; +} + +static int esp_input_done2(struct sk_buff *skb, int err) +{ + struct xfrm_state *x = xfrm_input_state(skb); + struct esp_data *esp = x->data; + struct crypto_aead *aead = esp->aead; + int alen = crypto_aead_authsize(aead); + int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); + int elen = skb->len - hlen; + int hdr_len = skb_network_header_len(skb); + int padlen; + u8 nexthdr[2]; + + kfree(ESP_SKB_CB(skb)->tmp); if (unlikely(err)) - goto unlock; + goto out; - if (esp->conf.ivlen) { - memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); - crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen); - } + if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2)) + BUG(); - if (esp->auth.icv_full_len) { - err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, - sizeof(*esph) + esp->conf.ivlen + clen); - memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); + err = -EINVAL; + padlen = nexthdr[0]; + if (padlen + 2 + alen >= elen) { + LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage " + "padlen=%d, elen=%d\n", padlen + 2, elen - alen); + goto out; } -unlock: - spin_unlock_bh(&x->lock); + /* ... check padding bits here. Silly. :-) */ -error: + pskb_trim(skb, skb->len - alen - padlen - 2); + __skb_pull(skb, hlen); + skb_set_transport_header(skb, -hdr_len); + + err = nexthdr[1]; + + /* RFC4303: Drop dummy packets without any error */ + if (err == IPPROTO_NONE) + err = -EINVAL; + +out: return err; } +static void esp_input_done(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + + xfrm_input_resume(skb, esp_input_done2(skb, err)); +} + static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) { - struct ipv6hdr *iph; struct ip_esp_hdr *esph; struct esp_data *esp = x->data; - struct crypto_blkcipher *tfm = esp->conf.tfm; - struct blkcipher_desc desc = { .tfm = tfm }; + struct crypto_aead *aead = esp->aead; + struct aead_request *req; struct sk_buff *trailer; - int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); - int alen = esp->auth.icv_trunc_len; - int elen = skb->len - sizeof(*esph) - esp->conf.ivlen - alen; - int hdr_len = skb_network_header_len(skb); + int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); int nfrags; int ret = 0; + void *tmp; + u8 *iv; + struct scatterlist *sg; + struct scatterlist *asg; if (!pskb_may_pull(skb, sizeof(*esph))) { ret = -EINVAL; goto out; } - if (elen <= 0 || (elen & (blksize-1))) { + if (elen <= 0) { ret = -EINVAL; goto out; } @@ -170,86 +296,38 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } - skb->ip_summed = CHECKSUM_NONE; - - spin_lock(&x->lock); - - /* If integrity check is required, do this. */ - if (esp->auth.icv_full_len) { - u8 sum[alen]; - - ret = esp_mac_digest(esp, skb, 0, skb->len - alen); - if (ret) - goto unlock; + ret = -ENOMEM; + tmp = esp_alloc_tmp(aead, nfrags + 1); + if (!tmp) + goto out; - if (skb_copy_bits(skb, skb->len - alen, sum, alen)) - BUG(); + ESP_SKB_CB(skb)->tmp = tmp; + iv = esp_tmp_iv(aead, tmp); + req = esp_tmp_req(aead, iv); + asg = esp_req_sg(aead, req); + sg = asg + 1; - if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { - ret = -EBADMSG; - goto unlock; - } - } + skb->ip_summed = CHECKSUM_NONE; esph = (struct ip_esp_hdr *)skb->data; - iph = ipv6_hdr(skb); /* Get ivec. This can be wrong, check against another impls. */ - if (esp->conf.ivlen) - crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); - - { - struct scatterlist *sg = &esp->sgbuf[0]; - - if (unlikely(nfrags > ESP_NUM_FAST_SG)) { - sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); - if (!sg) { - ret = -ENOMEM; - goto unlock; - } - } - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, - sizeof(*esph) + esp->conf.ivlen, - elen); - ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen); - if (unlikely(sg != &esp->sgbuf[0])) - kfree(sg); - } + iv = esph->enc_data; -unlock: - spin_unlock(&x->lock); + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); + sg_init_one(asg, esph, sizeof(*esph)); - if (unlikely(ret)) - goto out; + aead_request_set_callback(req, 0, esp_input_done, skb); + aead_request_set_crypt(req, sg, sg, elen, iv); + aead_request_set_assoc(req, asg, sizeof(*esph)); - { - u8 nexthdr[2]; - u8 padlen; - - if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) - BUG(); - - padlen = nexthdr[0]; - if (padlen+2 >= elen) { - LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen); - ret = -EINVAL; - goto out; - } - /* ... check padding bits here. Silly. :-) */ - - /* RFC4303: Drop dummy packets without any error */ - if (nexthdr[1] == IPPROTO_NONE) { - ret = -EINVAL; - goto out; - } + ret = crypto_aead_decrypt(req); + if (ret == -EINPROGRESS) + goto out; - pskb_trim(skb, skb->len - alen - padlen - 2); - ret = nexthdr[1]; - } + ret = esp_input_done2(skb, ret); - __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen); - skb_set_transport_header(skb, -hdr_len); out: return ret; } @@ -257,11 +335,11 @@ out: static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - u32 align = max_t(u32, blksize, esp->conf.padlen); + u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); + u32 align = max_t(u32, blksize, esp->padlen); u32 rem; - mtu -= x->props.header_len + esp->auth.icv_trunc_len; + mtu -= x->props.header_len + crypto_aead_authsize(esp->aead); rem = mtu & (align - 1); mtu &= ~(align - 1); @@ -300,81 +378,101 @@ static void esp6_destroy(struct xfrm_state *x) if (!esp) return; - crypto_free_blkcipher(esp->conf.tfm); - esp->conf.tfm = NULL; - kfree(esp->conf.ivec); - esp->conf.ivec = NULL; - crypto_free_hash(esp->auth.tfm); - esp->auth.tfm = NULL; - kfree(esp->auth.work_icv); - esp->auth.work_icv = NULL; + crypto_free_aead(esp->aead); kfree(esp); } static int esp6_init_state(struct xfrm_state *x) { struct esp_data *esp = NULL; - struct crypto_blkcipher *tfm; + struct crypto_aead *aead; + struct crypto_authenc_key_param *param; + struct rtattr *rta; + char *key; + char *p; + char authenc_name[CRYPTO_MAX_ALG_NAME]; + u32 align; + unsigned int keylen; + int err; if (x->ealg == NULL) - goto error; + return -EINVAL; if (x->encap) - goto error; + return -EINVAL; + + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; esp = kzalloc(sizeof(*esp), GFP_KERNEL); if (esp == NULL) return -ENOMEM; + x->data = esp; + + aead = crypto_alloc_aead(authenc_name, 0, 0); + err = PTR_ERR(aead); + if (IS_ERR(aead)) + goto error; + + esp->aead = aead; + + keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) + + (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param)); + err = -ENOMEM; + key = kmalloc(keylen, GFP_KERNEL); + if (!key) + goto error; + + p = key; + rta = (void *)p; + rta->rta_type = CRYPTO_AUTHENC_KEYA_PARAM; + rta->rta_len = RTA_LENGTH(sizeof(*param)); + param = RTA_DATA(rta); + p += RTA_SPACE(sizeof(*param)); + if (x->aalg) { struct xfrm_algo_desc *aalg_desc; - struct crypto_hash *hash; - - hash = crypto_alloc_hash(x->aalg->alg_name, 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(hash)) - goto error; - esp->auth.tfm = hash; - if (crypto_hash_setkey(hash, x->aalg->alg_key, - (x->aalg->alg_key_len + 7) / 8)) - goto error; + memcpy(p, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8); + p += (x->aalg->alg_key_len + 7) / 8; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); + err = -EINVAL; if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_hash_digestsize(hash)) { + crypto_aead_authsize(aead)) { NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", x->aalg->alg_name, - crypto_hash_digestsize(hash), + crypto_aead_authsize(aead), aalg_desc->uinfo.auth.icv_fullbits/8); - goto error; + goto free_key; } - esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; - esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8; - - esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL); - if (!esp->auth.work_icv) - goto error; - } - tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - goto error; - esp->conf.tfm = tfm; - esp->conf.ivlen = crypto_blkcipher_ivsize(tfm); - esp->conf.padlen = 0; - if (esp->conf.ivlen) { - esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); - if (unlikely(esp->conf.ivec == NULL)) - goto error; - esp->conf.ivinitted = 0; + err = crypto_aead_setauthsize( + aead, aalg_desc->uinfo.auth.icv_truncbits / 8); + if (err) + goto free_key; } - if (crypto_blkcipher_setkey(tfm, x->ealg->alg_key, - (x->ealg->alg_key_len + 7) / 8)) + + esp->padlen = 0; + + param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8); + memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8); + + err = crypto_aead_setkey(aead, key, keylen); + +free_key: + kfree(key); + + if (err) goto error; - x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; + + x->props.header_len = sizeof(struct ip_esp_hdr) + + crypto_aead_ivsize(aead); switch (x->props.mode) { case XFRM_MODE_BEET: case XFRM_MODE_TRANSPORT: @@ -385,14 +483,14 @@ static int esp6_init_state(struct xfrm_state *x) default: goto error; } - x->data = esp; - return 0; + + align = ALIGN(crypto_aead_blocksize(aead), 4); + if (esp->padlen) + align = max_t(u32, align, esp->padlen); + x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead); error: - x->data = esp; - esp6_destroy(x); - x->data = NULL; - return -EINVAL; + return err; } static struct xfrm_type esp6_type = -- cgit v1.2.3 From 1a6509d991225ad210de54c63314fd9542922095 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 28 Jan 2008 19:37:29 -0800 Subject: [IPSEC]: Add support for combined mode algorithms This patch adds support for combined mode algorithms with GCM being the first algorithm supported. Combined mode algorithms can be added through the xfrm_user interface using the new algorithm payload type XFRMA_ALG_AEAD. Each algorithms is identified by its name and the ICV length. For the purposes of matching algorithms in xfrm_tmpl structures, combined mode algorithms occupy the same name space as encryption algorithms. This is in line with how they are negotiated using IKE. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/pfkeyv2.h | 6 +++ include/linux/xfrm.h | 8 +++ include/net/xfrm.h | 8 +++ net/ipv4/esp4.c | 71 ++++++++++++++++++++----- net/ipv6/esp6.c | 77 +++++++++++++++++++++------ net/xfrm/xfrm_algo.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_user.c | 71 +++++++++++++++++++++++-- 7 files changed, 347 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index d9db5f62ee48..6db69ff5d83e 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -298,6 +298,12 @@ struct sadb_x_sec_ctx { #define SADB_X_EALG_BLOWFISHCBC 7 #define SADB_EALG_NULL 11 #define SADB_X_EALG_AESCBC 12 +#define SADB_X_EALG_AES_CCM_ICV8 14 +#define SADB_X_EALG_AES_CCM_ICV12 15 +#define SADB_X_EALG_AES_CCM_ICV16 16 +#define SADB_X_EALG_AES_GCM_ICV8 18 +#define SADB_X_EALG_AES_GCM_ICV12 19 +#define SADB_X_EALG_AES_GCM_ICV16 20 #define SADB_X_EALG_CAMELLIACBC 22 #define SADB_EALG_MAX 253 /* last EALG */ /* private allocations should use 249-255 (RFC2407) */ diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 9b5b00c4ef9d..e31b8c84f2c9 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -96,6 +96,13 @@ struct xfrm_algo { char alg_key[0]; }; +struct xfrm_algo_aead { + char alg_name[64]; + int alg_key_len; /* in bits */ + int alg_icv_len; /* in bits */ + char alg_key[0]; +}; + struct xfrm_stats { __u32 replay_window; __u32 replay; @@ -270,6 +277,7 @@ enum xfrm_attr_type_t { XFRMA_LASTUSED, XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ XFRMA_MIGRATE, + XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 5ebb9ba479b1..34d373775a0e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -159,6 +159,7 @@ struct xfrm_state struct xfrm_algo *aalg; struct xfrm_algo *ealg; struct xfrm_algo *calg; + struct xfrm_algo_aead *aead; /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; @@ -1108,6 +1109,10 @@ static inline int xfrm_id_proto_match(u8 proto, u8 userproto) /* * xfrm algorithm information */ +struct xfrm_algo_aead_info { + u16 icv_truncbits; +}; + struct xfrm_algo_auth_info { u16 icv_truncbits; u16 icv_fullbits; @@ -1127,6 +1132,7 @@ struct xfrm_algo_desc { char *compat; u8 available:1; union { + struct xfrm_algo_aead_info aead; struct xfrm_algo_auth_info auth; struct xfrm_algo_encr_info encr; struct xfrm_algo_comp_info comp; @@ -1343,6 +1349,8 @@ extern struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id); extern struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe); extern struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe); extern struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe); +extern struct xfrm_algo_desc *xfrm_aead_get_byname(char *name, int icv_len, + int probe); struct hash_desc; struct scatterlist; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index c4047223bfbe..fac4f102c9f9 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -439,32 +439,53 @@ static void esp_destroy(struct xfrm_state *x) kfree(esp); } -static int esp_init_state(struct xfrm_state *x) +static int esp_init_aead(struct xfrm_state *x) { - struct esp_data *esp = NULL; + struct esp_data *esp = x->data; + struct crypto_aead *aead; + int err; + + aead = crypto_alloc_aead(x->aead->alg_name, 0, 0); + err = PTR_ERR(aead); + if (IS_ERR(aead)) + goto error; + + esp->aead = aead; + + err = crypto_aead_setkey(aead, x->aead->alg_key, + (x->aead->alg_key_len + 7) / 8); + if (err) + goto error; + + err = crypto_aead_setauthsize(aead, x->aead->alg_icv_len / 8); + if (err) + goto error; + +error: + return err; +} + +static int esp_init_authenc(struct xfrm_state *x) +{ + struct esp_data *esp = x->data; struct crypto_aead *aead; struct crypto_authenc_key_param *param; struct rtattr *rta; char *key; char *p; char authenc_name[CRYPTO_MAX_ALG_NAME]; - u32 align; unsigned int keylen; int err; + err = -EINVAL; if (x->ealg == NULL) - return -EINVAL; + goto error; + err = -ENAMETOOLONG; if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", x->aalg ? x->aalg->alg_name : "digest_null", x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - esp = kzalloc(sizeof(*esp), GFP_KERNEL); - if (esp == NULL) - return -ENOMEM; - - x->data = esp; + goto error; aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); @@ -512,8 +533,6 @@ static int esp_init_state(struct xfrm_state *x) goto free_key; } - esp->padlen = 0; - param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8); memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8); @@ -522,9 +541,35 @@ static int esp_init_state(struct xfrm_state *x) free_key: kfree(key); +error: + return err; +} + +static int esp_init_state(struct xfrm_state *x) +{ + struct esp_data *esp; + struct crypto_aead *aead; + u32 align; + int err; + + esp = kzalloc(sizeof(*esp), GFP_KERNEL); + if (esp == NULL) + return -ENOMEM; + + x->data = esp; + + if (x->aead) + err = esp_init_aead(x); + else + err = esp_init_authenc(x); + if (err) goto error; + aead = esp->aead; + + esp->padlen = 0; + x->props.header_len = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); if (x->props.mode == XFRM_MODE_TUNNEL) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index dc821acf3d33..ca48c56c4b17 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -382,35 +382,53 @@ static void esp6_destroy(struct xfrm_state *x) kfree(esp); } -static int esp6_init_state(struct xfrm_state *x) +static int esp_init_aead(struct xfrm_state *x) +{ + struct esp_data *esp = x->data; + struct crypto_aead *aead; + int err; + + aead = crypto_alloc_aead(x->aead->alg_name, 0, 0); + err = PTR_ERR(aead); + if (IS_ERR(aead)) + goto error; + + esp->aead = aead; + + err = crypto_aead_setkey(aead, x->aead->alg_key, + (x->aead->alg_key_len + 7) / 8); + if (err) + goto error; + + err = crypto_aead_setauthsize(aead, x->aead->alg_icv_len / 8); + if (err) + goto error; + +error: + return err; +} + +static int esp_init_authenc(struct xfrm_state *x) { - struct esp_data *esp = NULL; + struct esp_data *esp = x->data; struct crypto_aead *aead; struct crypto_authenc_key_param *param; struct rtattr *rta; char *key; char *p; char authenc_name[CRYPTO_MAX_ALG_NAME]; - u32 align; unsigned int keylen; int err; + err = -EINVAL; if (x->ealg == NULL) - return -EINVAL; - - if (x->encap) - return -EINVAL; + goto error; + err = -ENAMETOOLONG; if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", x->aalg ? x->aalg->alg_name : "digest_null", x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - esp = kzalloc(sizeof(*esp), GFP_KERNEL); - if (esp == NULL) - return -ENOMEM; - - x->data = esp; + goto error; aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); @@ -458,8 +476,6 @@ static int esp6_init_state(struct xfrm_state *x) goto free_key; } - esp->padlen = 0; - param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8); memcpy(p, x->ealg->alg_key, (x->ealg->alg_key_len + 7) / 8); @@ -468,9 +484,38 @@ static int esp6_init_state(struct xfrm_state *x) free_key: kfree(key); +error: + return err; +} + +static int esp6_init_state(struct xfrm_state *x) +{ + struct esp_data *esp; + struct crypto_aead *aead; + u32 align; + int err; + + if (x->encap) + return -EINVAL; + + esp = kzalloc(sizeof(*esp), GFP_KERNEL); + if (esp == NULL) + return -ENOMEM; + + x->data = esp; + + if (x->aead) + err = esp_init_aead(x); + else + err = esp_init_authenc(x); + if (err) goto error; + aead = esp->aead; + + esp->padlen = 0; + x->props.header_len = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); switch (x->props.mode) { diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 02e3ecf9585d..6cc15250de69 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -28,6 +28,105 @@ * that instantiated crypto transforms have correct parameters for IPsec * purposes. */ +static struct xfrm_algo_desc aead_list[] = { +{ + .name = "rfc4106(gcm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 64, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV8, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "rfc4106(gcm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 96, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV12, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "rfc4106(gcm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AES_GCM_ICV16, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "rfc4309(ccm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 64, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV8, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "rfc4309(ccm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 96, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV12, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +{ + .name = "rfc4309(ccm(aes))", + + .uinfo = { + .aead = { + .icv_truncbits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_EALG_AES_CCM_ICV16, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } +}, +}; + static struct xfrm_algo_desc aalg_list[] = { { .name = "hmac(digest_null)", @@ -332,6 +431,11 @@ static struct xfrm_algo_desc calg_list[] = { }, }; +static inline int aead_entries(void) +{ + return ARRAY_SIZE(aead_list); +} + static inline int aalg_entries(void) { return ARRAY_SIZE(aalg_list); @@ -354,6 +458,13 @@ struct xfrm_algo_list { u32 mask; }; +static const struct xfrm_algo_list xfrm_aead_list = { + .algs = aead_list, + .entries = ARRAY_SIZE(aead_list), + .type = CRYPTO_ALG_TYPE_AEAD, + .mask = CRYPTO_ALG_TYPE_MASK, +}; + static const struct xfrm_algo_list xfrm_aalg_list = { .algs = aalg_list, .entries = ARRAY_SIZE(aalg_list), @@ -461,6 +572,33 @@ struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) } EXPORT_SYMBOL_GPL(xfrm_calg_get_byname); +struct xfrm_aead_name { + const char *name; + int icvbits; +}; + +static int xfrm_aead_name_match(const struct xfrm_algo_desc *entry, + const void *data) +{ + const struct xfrm_aead_name *aead = data; + const char *name = aead->name; + + return aead->icvbits == entry->uinfo.aead.icv_truncbits && name && + !strcmp(name, entry->name); +} + +struct xfrm_algo_desc *xfrm_aead_get_byname(char *name, int icv_len, int probe) +{ + struct xfrm_aead_name data = { + .name = name, + .icvbits = icv_len, + }; + + return xfrm_find_algo(&xfrm_aead_list, xfrm_aead_name_match, &data, + probe); +} +EXPORT_SYMBOL_GPL(xfrm_aead_get_byname); + struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx) { if (idx >= aalg_entries()) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e0ccdf267813..78338079b7f5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -31,6 +31,11 @@ #include #endif +static inline int aead_len(struct xfrm_algo_aead *alg) +{ + return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); +} + static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) { struct nlattr *rt = attrs[type]; @@ -68,6 +73,22 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) return 0; } +static int verify_aead(struct nlattr **attrs) +{ + struct nlattr *rt = attrs[XFRMA_ALG_AEAD]; + struct xfrm_algo_aead *algp; + + if (!rt) + return 0; + + algp = nla_data(rt); + if (nla_len(rt) < aead_len(algp)) + return -EINVAL; + + algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; + return 0; +} + static void verify_one_addr(struct nlattr **attrs, enum xfrm_attr_type_t type, xfrm_address_t **addrp) { @@ -119,20 +140,28 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, switch (p->id.proto) { case IPPROTO_AH: if (!attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ALG_COMP]) goto out; break; case IPPROTO_ESP: - if ((!attrs[XFRMA_ALG_AUTH] && - !attrs[XFRMA_ALG_CRYPT]) || - attrs[XFRMA_ALG_COMP]) + if (attrs[XFRMA_ALG_COMP]) + goto out; + if (!attrs[XFRMA_ALG_AUTH] && + !attrs[XFRMA_ALG_CRYPT] && + !attrs[XFRMA_ALG_AEAD]) + goto out; + if ((attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_CRYPT]) && + attrs[XFRMA_ALG_AEAD]) goto out; break; case IPPROTO_COMP: if (!attrs[XFRMA_ALG_COMP] || + attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_AUTH] || attrs[XFRMA_ALG_CRYPT]) goto out; @@ -143,6 +172,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, case IPPROTO_ROUTING: if (attrs[XFRMA_ALG_COMP] || attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ENCAP] || attrs[XFRMA_SEC_CTX] || @@ -155,6 +185,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; } + if ((err = verify_aead(attrs))) + goto out; if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH))) goto out; if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT))) @@ -208,6 +240,31 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, return 0; } +static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props, + struct nlattr *rta) +{ + struct xfrm_algo_aead *p, *ualg; + struct xfrm_algo_desc *algo; + + if (!rta) + return 0; + + ualg = nla_data(rta); + + algo = xfrm_aead_get_byname(ualg->alg_name, ualg->alg_icv_len, 1); + if (!algo) + return -ENOSYS; + *props = algo->desc.sadb_alg_id; + + p = kmemdup(ualg, aead_len(ualg), GFP_KERNEL); + if (!p) + return -ENOMEM; + + strcpy(p->alg_name, algo->name); + *algpp = p; + return 0; +} + static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) { int len = 0; @@ -286,6 +343,9 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, copy_from_user_state(x, p); + if ((err = attach_aead(&x->aead, &x->props.ealgo, + attrs[XFRMA_ALG_AEAD]))) + goto error; if ((err = attach_one_algo(&x->aalg, &x->props.aalgo, xfrm_aalg_get_byname, attrs[XFRMA_ALG_AUTH]))) @@ -510,6 +570,8 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (x->lastused) NLA_PUT_U64(skb, XFRMA_LASTUSED, x->lastused); + if (x->aead) + NLA_PUT(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead); if (x->aalg) NLA_PUT(skb, XFRMA_ALG_AUTH, xfrm_alg_len(x->aalg), x->aalg); if (x->ealg) @@ -1808,6 +1870,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { #undef XMSGSIZE static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { + [XFRMA_ALG_AEAD] = { .len = sizeof(struct xfrm_algo_aead) }, [XFRMA_ALG_AUTH] = { .len = sizeof(struct xfrm_algo) }, [XFRMA_ALG_CRYPT] = { .len = sizeof(struct xfrm_algo) }, [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) }, @@ -1972,6 +2035,8 @@ static int xfrm_notify_sa_flush(struct km_event *c) static inline size_t xfrm_sa_len(struct xfrm_state *x) { size_t l = 0; + if (x->aead) + l += nla_total_size(aead_len(x->aead)); if (x->aalg) l += nla_total_size(xfrm_alg_len(x->aalg)); if (x->ealg) -- cgit v1.2.3 From 4a19ec5800fc3bb64e2d87c4d9fdd9e636086fe0 Mon Sep 17 00:00:00 2001 From: Laszlo Attila Toth Date: Wed, 30 Jan 2008 19:08:16 -0800 Subject: [NET]: Introducing socket mark socket option. A userspace program may wish to set the mark for each packets its send without using the netfilter MARK target. Changing the mark can be used for mark based routing without netfilter or for packet filtering. It requires CAP_NET_ADMIN capability. Signed-off-by: Laszlo Attila Toth Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/asm-alpha/socket.h | 2 ++ include/asm-arm/socket.h | 2 ++ include/asm-avr32/socket.h | 2 ++ include/asm-blackfin/socket.h | 3 +++ include/asm-cris/socket.h | 2 ++ include/asm-frv/socket.h | 2 ++ include/asm-h8300/socket.h | 2 ++ include/asm-ia64/socket.h | 2 ++ include/asm-m32r/socket.h | 2 ++ include/asm-m68k/socket.h | 2 ++ include/asm-mips/socket.h | 2 ++ include/asm-parisc/socket.h | 2 ++ include/asm-powerpc/socket.h | 2 ++ include/asm-s390/socket.h | 2 ++ include/asm-sh/socket.h | 2 ++ include/asm-sparc/socket.h | 2 ++ include/asm-sparc64/socket.h | 1 + include/asm-v850/socket.h | 2 ++ include/asm-x86/socket.h | 2 ++ include/asm-xtensa/socket.h | 2 ++ include/net/route.h | 2 ++ include/net/sock.h | 2 ++ net/core/sock.c | 11 +++++++++++ net/ipv4/ip_output.c | 3 +++ net/ipv4/raw.c | 2 ++ net/ipv6/ip6_output.c | 2 ++ net/ipv6/raw.c | 3 +++ 27 files changed, 65 insertions(+) (limited to 'include') diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h index 1fede7f92860..08c979319929 100644 --- a/include/asm-alpha/socket.h +++ b/include/asm-alpha/socket.h @@ -60,4 +60,6 @@ #define SO_SECURITY_ENCRYPTION_TRANSPORT 20 #define SO_SECURITY_ENCRYPTION_NETWORK 21 +#define SO_MARK 36 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-arm/socket.h b/include/asm-arm/socket.h index 65a1a64bf934..6817be9573a6 100644 --- a/include/asm-arm/socket.h +++ b/include/asm-arm/socket.h @@ -52,4 +52,6 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-avr32/socket.h b/include/asm-avr32/socket.h index a0d0507a5034..35863f260929 100644 --- a/include/asm-avr32/socket.h +++ b/include/asm-avr32/socket.h @@ -52,4 +52,6 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/include/asm-blackfin/socket.h b/include/asm-blackfin/socket.h index 5213c9652186..2ca702e44d47 100644 --- a/include/asm-blackfin/socket.h +++ b/include/asm-blackfin/socket.h @@ -50,4 +50,7 @@ #define SO_PASSSEC 34 #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS + +#define SO_MARK 36 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-cris/socket.h b/include/asm-cris/socket.h index 5b18dfdf1748..9df0ca82f5de 100644 --- a/include/asm-cris/socket.h +++ b/include/asm-cris/socket.h @@ -54,6 +54,8 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-frv/socket.h b/include/asm-frv/socket.h index a823befd11dd..e51ca67b9356 100644 --- a/include/asm-frv/socket.h +++ b/include/asm-frv/socket.h @@ -52,5 +52,7 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-h8300/socket.h b/include/asm-h8300/socket.h index 39911d8c9684..da2520dbf254 100644 --- a/include/asm-h8300/socket.h +++ b/include/asm-h8300/socket.h @@ -52,4 +52,6 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-ia64/socket.h b/include/asm-ia64/socket.h index 9e42ce43cfbe..d5ef0aa3e312 100644 --- a/include/asm-ia64/socket.h +++ b/include/asm-ia64/socket.h @@ -61,4 +61,6 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/include/asm-m32r/socket.h b/include/asm-m32r/socket.h index 793d5d30c850..9a0e20012224 100644 --- a/include/asm-m32r/socket.h +++ b/include/asm-m32r/socket.h @@ -52,4 +52,6 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/include/asm-m68k/socket.h b/include/asm-m68k/socket.h index 6d21b90863ad..dbc64e92c41a 100644 --- a/include/asm-m68k/socket.h +++ b/include/asm-m68k/socket.h @@ -52,4 +52,6 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h index 95945689b1c6..63f60254d308 100644 --- a/include/asm-mips/socket.h +++ b/include/asm-mips/socket.h @@ -73,6 +73,8 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #ifdef __KERNEL__ /** sock_type - Socket types diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h index 99e868f6a8f5..69a7a0d30b02 100644 --- a/include/asm-parisc/socket.h +++ b/include/asm-parisc/socket.h @@ -52,4 +52,6 @@ #define SO_PEERSEC 0x401d #define SO_PASSSEC 0x401e +#define SO_MARK 0x401f + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-powerpc/socket.h b/include/asm-powerpc/socket.h index 403e9fde2eb5..f5a4e168e498 100644 --- a/include/asm-powerpc/socket.h +++ b/include/asm-powerpc/socket.h @@ -59,4 +59,6 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/include/asm-s390/socket.h b/include/asm-s390/socket.h index 1161ebe3dec9..c786ab623b2d 100644 --- a/include/asm-s390/socket.h +++ b/include/asm-s390/socket.h @@ -60,4 +60,6 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-sh/socket.h b/include/asm-sh/socket.h index c48d6fc9da38..6d4bf6512959 100644 --- a/include/asm-sh/socket.h +++ b/include/asm-sh/socket.h @@ -52,4 +52,6 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* __ASM_SH_SOCKET_H */ diff --git a/include/asm-sparc/socket.h b/include/asm-sparc/socket.h index 7c1423997cf0..2e2bd0b7c8e3 100644 --- a/include/asm-sparc/socket.h +++ b/include/asm-sparc/socket.h @@ -52,6 +52,8 @@ #define SO_TIMESTAMPNS 0x0021 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 0x0022 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/include/asm-sparc64/socket.h b/include/asm-sparc64/socket.h index 986441dcb8f0..44a625af6e31 100644 --- a/include/asm-sparc64/socket.h +++ b/include/asm-sparc64/socket.h @@ -57,4 +57,5 @@ #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 #define SO_SECURITY_ENCRYPTION_NETWORK 0x5004 +#define SO_MARK 0x0022 #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h index a4c2493b025f..e199a2bf12aa 100644 --- a/include/asm-v850/socket.h +++ b/include/asm-v850/socket.h @@ -52,4 +52,6 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* __V850_SOCKET_H__ */ diff --git a/include/asm-x86/socket.h b/include/asm-x86/socket.h index 99ca648b94c5..80af9c4ccad7 100644 --- a/include/asm-x86/socket.h +++ b/include/asm-x86/socket.h @@ -52,4 +52,6 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-xtensa/socket.h b/include/asm-xtensa/socket.h index 1f5aeacb9da2..6100682b1da2 100644 --- a/include/asm-xtensa/socket.h +++ b/include/asm-xtensa/socket.h @@ -63,4 +63,6 @@ #define SO_TIMESTAMPNS 35 #define SCM_TIMESTAMPNS SO_TIMESTAMPNS +#define SO_MARK 36 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/net/route.h b/include/net/route.h index 4eabf008413b..fcc6d5b35863 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -149,6 +150,7 @@ static inline int ip_route_connect(struct rtable **rp, __be32 dst, int flags) { struct flowi fl = { .oif = oif, + .mark = sk->sk_mark, .nl_u = { .ip4_u = { .daddr = dst, .saddr = src, .tos = tos } }, diff --git a/include/net/sock.h b/include/net/sock.h index 902324488d0f..e3fb4c047f4c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -262,6 +262,8 @@ struct sock { __u32 sk_sndmsg_off; int sk_write_pending; void *sk_security; + __u32 sk_mark; + /* XXX 4 bytes hole on 64 bit */ void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk, int bytes); void (*sk_write_space)(struct sock *sk); diff --git a/net/core/sock.c b/net/core/sock.c index 1c4b1cd16d65..433715fb141a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -667,6 +667,13 @@ set_rcvbuf: else clear_bit(SOCK_PASSSEC, &sock->flags); break; + case SO_MARK: + if (!capable(CAP_NET_ADMIN)) + ret = -EPERM; + else { + sk->sk_mark = val; + } + break; /* We implement the SO_SNDLOWAT etc to not be settable (1003.1g 5.3) */ @@ -836,6 +843,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_PEERSEC: return security_socket_getpeersec_stream(sock, optval, optlen, len); + case SO_MARK: + v.val = sk->sk_mark; + break; + default: return -ENOPROTOOPT; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6e4d5f493bf9..341779e685d9 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -168,6 +168,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, } skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; /* Send it out. */ return ip_local_out(skb); @@ -385,6 +386,7 @@ packet_routed: (skb_shinfo(skb)->gso_segs ?: 1) - 1); skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; return ip_local_out(skb); @@ -1286,6 +1288,7 @@ int ip_push_pending_frames(struct sock *sk) iph->daddr = rt->rt_dst; skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; skb->dst = dst_clone(&rt->u.dst); if (iph->protocol == IPPROTO_ICMP) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 85c08696abbe..f863c3dc5421 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -352,6 +352,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, skb_reserve(skb, hh_len); skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; skb->dst = dst_clone(&rt->u.dst); skb_reset_network_header(skb); @@ -544,6 +545,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { struct flowi fl = { .oif = ipc.oif, + .mark = sk->sk_mark, .nl_u = { .ip4_u = { .daddr = daddr, .saddr = saddr, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cfe9e707883c..9ac6ca2521c3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -257,6 +257,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, ipv6_addr_copy(&hdr->daddr, first_hop); skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; mtu = dst_mtu(dst); if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) { @@ -1439,6 +1440,7 @@ int ip6_push_pending_frames(struct sock *sk) ipv6_addr_copy(&hdr->daddr, final_dst); skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; skb->dst = dst_clone(&rt->u.dst); IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4d880551fe6a..d61c63dedbf6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -641,6 +641,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, skb_reserve(skb, hh_len); skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; skb->dst = dst_clone(&rt->u.dst); skb_put(skb, length); @@ -767,6 +768,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, */ memset(&fl, 0, sizeof(fl)); + fl.mark = sk->sk_mark; + if (sin6) { if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; -- cgit v1.2.3 From 533cb5b0a63f28ecab5503cfceb77e641fa7f7c4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Jan 2008 19:11:50 -0800 Subject: [XFRM]: constify 'struct xfrm_type' Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/xfrm.h | 8 ++++---- net/ipv4/ah4.c | 2 +- net/ipv4/esp4.c | 2 +- net/ipv4/ipcomp.c | 2 +- net/ipv4/xfrm4_tunnel.c | 2 +- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/ipv6/ipcomp6.c | 2 +- net/ipv6/mip6.c | 4 ++-- net/ipv6/xfrm6_tunnel.c | 2 +- net/xfrm/xfrm_state.c | 16 ++++++++-------- 11 files changed, 22 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 34d373775a0e..ac72116636ca 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -202,7 +202,7 @@ struct xfrm_state /* Reference to data common to all the instances of this * transformer. */ - struct xfrm_type *type; + const struct xfrm_type *type; struct xfrm_mode *inner_mode; struct xfrm_mode *outer_mode; @@ -279,7 +279,7 @@ struct xfrm_state_afinfo { unsigned int proto; unsigned int eth_proto; struct module *owner; - struct xfrm_type *type_map[IPPROTO_MAX]; + const struct xfrm_type *type_map[IPPROTO_MAX]; struct xfrm_mode *mode_map[XFRM_MODE_MAX]; int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, @@ -322,8 +322,8 @@ struct xfrm_type u32 (*get_mtu)(struct xfrm_state *, int size); }; -extern int xfrm_register_type(struct xfrm_type *type, unsigned short family); -extern int xfrm_unregister_type(struct xfrm_type *type, unsigned short family); +extern int xfrm_register_type(const struct xfrm_type *type, unsigned short family); +extern int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family); struct xfrm_mode { /* diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index d76803a3dcae..9d4555ec0b59 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -300,7 +300,7 @@ static void ah_destroy(struct xfrm_state *x) } -static struct xfrm_type ah_type = +static const struct xfrm_type ah_type = { .description = "AH4", .owner = THIS_MODULE, diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index fac4f102c9f9..258d17631b4b 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -600,7 +600,7 @@ error: return err; } -static struct xfrm_type esp_type = +static const struct xfrm_type esp_type = { .description = "ESP4", .owner = THIS_MODULE, diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index b79cdbee68a9..ae1f45fc23b9 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -437,7 +437,7 @@ error: goto out; } -static struct xfrm_type ipcomp_type = { +static const struct xfrm_type ipcomp_type = { .description = "IPCOMP4", .owner = THIS_MODULE, .proto = IPPROTO_COMP, diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 326845195620..87f77e412ba2 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -38,7 +38,7 @@ static void ipip_destroy(struct xfrm_state *x) { } -static struct xfrm_type ipip_type = { +static const struct xfrm_type ipip_type = { .description = "IPIP", .owner = THIS_MODULE, .proto = IPPROTO_IPIP, diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index fb0d07a15e93..379c8e04c36c 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -515,7 +515,7 @@ static void ah6_destroy(struct xfrm_state *x) kfree(ahp); } -static struct xfrm_type ah6_type = +static const struct xfrm_type ah6_type = { .description = "AH6", .owner = THIS_MODULE, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index ca48c56c4b17..8e0f1428c716 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -538,7 +538,7 @@ error: return err; } -static struct xfrm_type esp6_type = +static const struct xfrm_type esp6_type = { .description = "ESP6", .owner = THIS_MODULE, diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 710325e7a842..b90039593a7f 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -453,7 +453,7 @@ error: goto out; } -static struct xfrm_type ipcomp6_type = +static const struct xfrm_type ipcomp6_type = { .description = "IPCOMP6", .owner = THIS_MODULE, diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 49d396620eac..cd8a5bda13cd 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -330,7 +330,7 @@ static void mip6_destopt_destroy(struct xfrm_state *x) { } -static struct xfrm_type mip6_destopt_type = +static const struct xfrm_type mip6_destopt_type = { .description = "MIP6DESTOPT", .owner = THIS_MODULE, @@ -462,7 +462,7 @@ static void mip6_rthdr_destroy(struct xfrm_state *x) { } -static struct xfrm_type mip6_rthdr_type = +static const struct xfrm_type mip6_rthdr_type = { .description = "MIP6RT", .owner = THIS_MODULE, diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index fae90ff31087..639fe8a6ff1e 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -319,7 +319,7 @@ static void xfrm6_tunnel_destroy(struct xfrm_state *x) xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr); } -static struct xfrm_type xfrm6_tunnel_type = { +static const struct xfrm_type xfrm6_tunnel_type = { .description = "IP6IP6", .owner = THIS_MODULE, .proto = IPPROTO_IPV6, diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3003503d0c94..4463eeb8d1de 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -216,10 +216,10 @@ static void xfrm_state_unlock_afinfo(struct xfrm_state_afinfo *afinfo) write_unlock_bh(&xfrm_state_afinfo_lock); } -int xfrm_register_type(struct xfrm_type *type, unsigned short family) +int xfrm_register_type(const struct xfrm_type *type, unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); - struct xfrm_type **typemap; + const struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL)) @@ -235,10 +235,10 @@ int xfrm_register_type(struct xfrm_type *type, unsigned short family) } EXPORT_SYMBOL(xfrm_register_type); -int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) +int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_lock_afinfo(family); - struct xfrm_type **typemap; + const struct xfrm_type **typemap; int err = 0; if (unlikely(afinfo == NULL)) @@ -254,11 +254,11 @@ int xfrm_unregister_type(struct xfrm_type *type, unsigned short family) } EXPORT_SYMBOL(xfrm_unregister_type); -static struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) +static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family) { struct xfrm_state_afinfo *afinfo; - struct xfrm_type **typemap; - struct xfrm_type *type; + const struct xfrm_type **typemap; + const struct xfrm_type *type; int modload_attempted = 0; retry: @@ -281,7 +281,7 @@ retry: return type; } -static void xfrm_put_type(struct xfrm_type *type) +static void xfrm_put_type(const struct xfrm_type *type) { module_put(type->owner); } -- cgit v1.2.3 From 377cf82d66ea43f3d3cb82c2563e65b5c12c9bfd Mon Sep 17 00:00:00 2001 From: Denis V. Lunev Date: Thu, 31 Jan 2008 03:46:12 -0800 Subject: [RAW]: Family check in the /proc/net/raw[6] is extra. Different hashtables are used for IPv6 and IPv4 raw sockets, so no need to check the socket family in the iterator over hashtables. Clean this out. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/raw.h | 4 +--- net/ipv4/raw.c | 12 ++++-------- net/ipv6/raw.c | 2 +- 3 files changed, 6 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/raw.h b/include/net/raw.h index cca81d8b2d8b..c7ea7a2aca86 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -41,7 +41,6 @@ extern void raw_proc_exit(void); struct raw_iter_state { struct seq_net_private p; int bucket; - unsigned short family; struct raw_hashinfo *h; }; @@ -49,8 +48,7 @@ struct raw_iter_state { void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); -int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h, - unsigned short family); +int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h); #endif diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index f863c3dc5421..507cbfe72eb5 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -862,8 +862,7 @@ static struct sock *raw_get_first(struct seq_file *seq) struct hlist_node *node; sk_for_each(sk, node, &state->h->ht[state->bucket]) - if (sk->sk_net == state->p.net && - sk->sk_family == state->family) + if (sk->sk_net == state->p.net) goto found; } sk = NULL; @@ -879,8 +878,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_net != state->p.net && - sk->sk_family != state->family); + } while (sk && sk->sk_net != state->p.net); if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { sk = sk_head(&state->h->ht[state->bucket]); @@ -974,8 +972,7 @@ static const struct seq_operations raw_seq_ops = { .show = raw_seq_show, }; -int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h, - unsigned short family) +int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h) { int err; struct raw_iter_state *i; @@ -987,14 +984,13 @@ int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h, i = raw_seq_private((struct seq_file *)file->private_data); i->h = h; - i->family = family; return 0; } EXPORT_SYMBOL_GPL(raw_seq_open); static int raw_v4_seq_open(struct inode *inode, struct file *file) { - return raw_seq_open(inode, file, &raw_v4_hashinfo, PF_INET); + return raw_seq_open(inode, file, &raw_v4_hashinfo); } static const struct file_operations raw_seq_fops = { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d61c63dedbf6..a2cf49911ffd 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1262,7 +1262,7 @@ static const struct seq_operations raw6_seq_ops = { static int raw6_seq_open(struct inode *inode, struct file *file) { - return raw_seq_open(inode, file, &raw_v6_hashinfo, PF_INET6); + return raw_seq_open(inode, file, &raw_v6_hashinfo); } static const struct file_operations raw6_seq_fops = { -- cgit v1.2.3 From 3046d76746311ac7ff0cdc3ec42db15730528dbf Mon Sep 17 00:00:00 2001 From: Denis V. Lunev Date: Thu, 31 Jan 2008 03:48:55 -0800 Subject: [RAW]: Wrong content of the /proc/net/raw6. The address of IPv6 raw sockets was shown in the wrong format, from IPv4 ones. The problem has been introduced by the commit 42a73808ed4f30b739eb52bcbb33a02fe62ceef5 ("[RAW]: Consolidate proc interface.") Thanks to Adrian Bunk who originally noticed the problem. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/raw.h | 3 ++- net/ipv4/raw.c | 8 ++++---- net/ipv6/raw.c | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/raw.h b/include/net/raw.h index c7ea7a2aca86..1828f81fe374 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -48,7 +48,8 @@ struct raw_iter_state { void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); -int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h); +int raw_seq_open(struct inode *ino, struct file *file, + struct raw_hashinfo *h, const struct seq_operations *ops); #endif diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 830f19e2fce9..a3002fe65b7f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -962,13 +962,13 @@ static const struct seq_operations raw_seq_ops = { .show = raw_seq_show, }; -int raw_seq_open(struct inode *ino, struct file *file, struct raw_hashinfo *h) +int raw_seq_open(struct inode *ino, struct file *file, + struct raw_hashinfo *h, const struct seq_operations *ops) { int err; struct raw_iter_state *i; - err = seq_open_net(ino, file, &raw_seq_ops, - sizeof(struct raw_iter_state)); + err = seq_open_net(ino, file, ops, sizeof(struct raw_iter_state)); if (err < 0) return err; @@ -980,7 +980,7 @@ EXPORT_SYMBOL_GPL(raw_seq_open); static int raw_v4_seq_open(struct inode *inode, struct file *file) { - return raw_seq_open(inode, file, &raw_v4_hashinfo); + return raw_seq_open(inode, file, &raw_v4_hashinfo, &raw_seq_ops); } static const struct file_operations raw_seq_fops = { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a2cf49911ffd..8897ccf8086a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1262,7 +1262,7 @@ static const struct seq_operations raw6_seq_ops = { static int raw6_seq_open(struct inode *inode, struct file *file) { - return raw_seq_open(inode, file, &raw_v6_hashinfo); + return raw_seq_open(inode, file, &raw_v6_hashinfo, &raw6_seq_ops); } static const struct file_operations raw6_seq_fops = { -- cgit v1.2.3 From ecb6f85e11627a0fb26a7e2db0d3603c0d602937 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 31 Jan 2008 03:54:47 -0800 Subject: [NETFILTER]: Use const in struct xt_match, xt_target, xt_table Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 6 +++--- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b99ede51318a..90dc6ea2a683 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -214,7 +214,7 @@ struct xt_match /* Free to use by each match */ unsigned long data; - char *table; + const char *table; unsigned int matchsize; unsigned int compatsize; unsigned int hooks; @@ -261,7 +261,7 @@ struct xt_target /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; - char *table; + const char *table; unsigned int targetsize; unsigned int compatsize; unsigned int hooks; @@ -277,7 +277,7 @@ struct xt_table struct list_head list; /* A unique name... */ - char name[XT_TABLE_MAXNAMELEN]; + const char name[XT_TABLE_MAXNAMELEN]; /* What hooks you will enter on */ unsigned int valid_hooks; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 982b7f986291..628a6b58ddb8 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -291,7 +291,7 @@ static void trace_packet(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, - char *tablename, + const char *tablename, struct xt_table_info *private, struct ipt_entry *e) { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index dd7860fea61f..92b4898344ff 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -320,7 +320,7 @@ static void trace_packet(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, - char *tablename, + const char *tablename, struct xt_table_info *private, struct ip6t_entry *e) { -- cgit v1.2.3 From 2fd8e526f44beaf439f351b310648b559e62a7cb Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jan 2008 03:56:35 -0800 Subject: [NETFILTER]: bridge netfilter: remove nf_bridge_info read-only netoutdev member Before the removal of the deferred output hooks, netoutdev was used in case of VLANs on top of a bridge to store the VLAN device, so the deferred hooks would see the correct output device. This isn't necessary anymore since we're calling the output hooks for the correct device directly in the IP stack. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 --- net/bridge/br_netfilter.c | 4 ---- 2 files changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c618fbf7d173..dfe975a9967e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -108,9 +108,6 @@ struct nf_bridge_info { atomic_t use; struct net_device *physindev; struct net_device *physoutdev; -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - struct net_device *netoutdev; -#endif unsigned int mask; unsigned long data[32 / sizeof(unsigned long)]; }; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 80014bab81b0..1c0efd8ad9f3 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -828,10 +828,6 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, nf_bridge_pull_encap_header(skb); nf_bridge_save_header(skb); -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - if (nf_bridge->netoutdev) - realoutdev = nf_bridge->netoutdev; -#endif NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev, br_nf_dev_queue_xmit); -- cgit v1.2.3 From c82a5cb8b2b2ce15f1fb8add6772921b72da5943 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 31 Jan 2008 03:57:36 -0800 Subject: linux/types.h: Use __u64 for aligned_u64 Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/types.h b/include/linux/types.h index f4f8d19158e4..b94c0e4efe24 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -126,7 +126,7 @@ typedef __s64 int64_t; #endif /* this is a special 64bit data type that is 8-byte aligned */ -#define aligned_u64 unsigned long long __attribute__((aligned(8))) +#define aligned_u64 __u64 __attribute__((aligned(8))) #define aligned_be64 __be64 __attribute__((aligned(8))) #define aligned_le64 __le64 __attribute__((aligned(8))) -- cgit v1.2.3 From b41649989c9640e54e47001994b7ecb927ea1822 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 31 Jan 2008 03:58:24 -0800 Subject: [NETFILTER]: xt_conntrack: add port and direction matching Extend the xt_conntrack match revision 1 by port matching (all four {orig,repl}{src,dst}) and by packet direction matching. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_conntrack.h | 30 ++++++++++++-------- net/netfilter/xt_conntrack.c | 50 ++++++++++++++++++++++++++++++---- 2 files changed, 63 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h index d2492a3329be..f3fd83e46bab 100644 --- a/include/linux/netfilter/xt_conntrack.h +++ b/include/linux/netfilter/xt_conntrack.h @@ -6,9 +6,6 @@ #define _XT_CONNTRACK_H #include -#ifdef __KERNEL__ -# include -#endif #define XT_CONNTRACK_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1)) #define XT_CONNTRACK_STATE_INVALID (1 << 0) @@ -18,14 +15,21 @@ #define XT_CONNTRACK_STATE_UNTRACKED (1 << (IP_CT_NUMBER + 3)) /* flags, invflags: */ -#define XT_CONNTRACK_STATE 0x01 -#define XT_CONNTRACK_PROTO 0x02 -#define XT_CONNTRACK_ORIGSRC 0x04 -#define XT_CONNTRACK_ORIGDST 0x08 -#define XT_CONNTRACK_REPLSRC 0x10 -#define XT_CONNTRACK_REPLDST 0x20 -#define XT_CONNTRACK_STATUS 0x40 -#define XT_CONNTRACK_EXPIRES 0x80 +enum { + XT_CONNTRACK_STATE = 1 << 0, + XT_CONNTRACK_PROTO = 1 << 1, + XT_CONNTRACK_ORIGSRC = 1 << 2, + XT_CONNTRACK_ORIGDST = 1 << 3, + XT_CONNTRACK_REPLSRC = 1 << 4, + XT_CONNTRACK_REPLDST = 1 << 5, + XT_CONNTRACK_STATUS = 1 << 6, + XT_CONNTRACK_EXPIRES = 1 << 7, + XT_CONNTRACK_ORIGSRC_PORT = 1 << 8, + XT_CONNTRACK_ORIGDST_PORT = 1 << 9, + XT_CONNTRACK_REPLSRC_PORT = 1 << 10, + XT_CONNTRACK_REPLDST_PORT = 1 << 11, + XT_CONNTRACK_DIRECTION = 1 << 12, +}; /* This is exposed to userspace, so remains frozen in time. */ struct ip_conntrack_old_tuple @@ -70,8 +74,10 @@ struct xt_conntrack_mtinfo1 { union nf_inet_addr repldst_addr, repldst_mask; u_int32_t expires_min, expires_max; u_int16_t l4proto; + __be16 origsrc_port, origdst_port; + __be16 replsrc_port, repldst_port; + u_int16_t match_flags, invert_flags; u_int8_t state_mask, status_mask; - u_int8_t match_flags, invert_flags; }; #endif /*_XT_CONNTRACK_H*/ diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index e92190eafcc5..85330856a29c 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -4,7 +4,6 @@ * * (C) 2001 Marc Boucher (marc@mbsi.ca). * Copyright © CC Computer Consultants GmbH, 2007 - 2008 - * Jan Engelhardt * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -20,6 +19,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher "); +MODULE_AUTHOR("Jan Engelhardt "); MODULE_DESCRIPTION("Xtables: connection tracking state match"); MODULE_ALIAS("ipt_conntrack"); MODULE_ALIAS("ip6t_conntrack"); @@ -166,6 +166,44 @@ conntrack_mt_repldst(const struct nf_conn *ct, &info->repldst_addr, &info->repldst_mask, family); } +static inline bool +ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info, + const struct nf_conn *ct) +{ + const struct nf_conntrack_tuple *tuple; + + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + if ((info->match_flags & XT_CONNTRACK_PROTO) && + (tuple->dst.protonum == info->l4proto) ^ + !(info->invert_flags & XT_CONNTRACK_PROTO)) + return false; + + /* Shortcut to match all recognized protocols by using ->src.all. */ + if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) && + (tuple->src.u.all == info->origsrc_port) ^ + !(info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT)) + return false; + + if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) && + (tuple->dst.u.all == info->origdst_port) ^ + !(info->invert_flags & XT_CONNTRACK_ORIGDST_PORT)) + return false; + + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) && + (tuple->src.u.all == info->replsrc_port) ^ + !(info->invert_flags & XT_CONNTRACK_REPLSRC_PORT)) + return false; + + if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) && + (tuple->dst.u.all == info->repldst_port) ^ + !(info->invert_flags & XT_CONNTRACK_REPLDST_PORT)) + return false; + + return true; +} + static bool conntrack_mt(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, @@ -200,10 +238,9 @@ conntrack_mt(const struct sk_buff *skb, const struct net_device *in, if (ct == NULL) return info->match_flags & XT_CONNTRACK_STATE; - - if ((info->match_flags & XT_CONNTRACK_PROTO) && - ((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == - info->l4proto) ^ !(info->invert_flags & XT_CONNTRACK_PROTO))) + if ((info->match_flags & XT_CONNTRACK_DIRECTION) && + (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^ + !!(info->invert_flags & XT_CONNTRACK_DIRECTION)) return false; if (info->match_flags & XT_CONNTRACK_ORIGSRC) @@ -226,6 +263,9 @@ conntrack_mt(const struct sk_buff *skb, const struct net_device *in, !(info->invert_flags & XT_CONNTRACK_REPLDST)) return false; + if (!ct_proto_port_check(info, ct)) + return false; + if ((info->match_flags & XT_CONNTRACK_STATUS) && (!!(info->status_mask & ct->status) ^ !(info->invert_flags & XT_CONNTRACK_STATUS))) -- cgit v1.2.3 From 000e8a53540b75a885efeb00ec1f1cb3c8d0bead Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 31 Jan 2008 03:58:56 -0800 Subject: [NETFILTER]: nf_log: add netfilter gcc printf format checking Signed-off-by: Helge Deller Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_log.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 037e82403f91..8c6b5ae45534 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -54,6 +54,6 @@ void nf_log_packet(int pf, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *li, - const char *fmt, ...); + const char *fmt, ...) __attribute__ ((format(printf,7,8))); #endif /* _NF_LOG_H */ -- cgit v1.2.3 From abfdf1c48907f78ad7d943b77ea180bf5504564f Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 31 Jan 2008 03:59:24 -0800 Subject: [NETFILTER]: ebtables: remove casts, use consts Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/arp.h | 8 +++++--- net/bridge/netfilter/ebt_802_3.c | 6 +++--- net/bridge/netfilter/ebt_among.c | 24 +++++++++++++++--------- net/bridge/netfilter/ebt_arp.c | 13 ++++++++----- net/bridge/netfilter/ebt_arpreply.c | 13 ++++++++----- net/bridge/netfilter/ebt_dnat.c | 4 ++-- net/bridge/netfilter/ebt_ip.c | 10 ++++++---- net/bridge/netfilter/ebt_limit.c | 2 +- net/bridge/netfilter/ebt_log.c | 18 +++++++++++------- net/bridge/netfilter/ebt_mark.c | 4 ++-- net/bridge/netfilter/ebt_mark_m.c | 4 ++-- net/bridge/netfilter/ebt_pkttype.c | 4 ++-- net/bridge/netfilter/ebt_redirect.c | 4 ++-- net/bridge/netfilter/ebt_snat.c | 7 ++++--- net/bridge/netfilter/ebt_stp.c | 24 +++++++++++++----------- net/bridge/netfilter/ebt_ulog.c | 4 ++-- net/bridge/netfilter/ebt_vlan.c | 7 ++++--- net/ipv4/arp.c | 9 +++++---- 18 files changed, 95 insertions(+), 70 deletions(-) (limited to 'include') diff --git a/include/net/arp.h b/include/net/arp.h index 752eb47b2678..c236270ec95e 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -13,15 +13,17 @@ extern int arp_find(unsigned char *haddr, struct sk_buff *skb); extern int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg); extern void arp_send(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, - unsigned char *dest_hw, unsigned char *src_hw, unsigned char *th); + const unsigned char *dest_hw, + const unsigned char *src_hw, const unsigned char *th); extern int arp_bind_neighbour(struct dst_entry *dst); extern int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir); extern void arp_ifdown(struct net_device *dev); extern struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, - unsigned char *dest_hw, unsigned char *src_hw, - unsigned char *target_hw); + const unsigned char *dest_hw, + const unsigned char *src_hw, + const unsigned char *target_hw); extern void arp_xmit(struct sk_buff *skb); extern struct neigh_ops arp_broken_ops; diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c index 41a78072cd0e..ac1730b32aa2 100644 --- a/net/bridge/netfilter/ebt_802_3.c +++ b/net/bridge/netfilter/ebt_802_3.c @@ -15,8 +15,8 @@ static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_802_3_info *info = (struct ebt_802_3_info *)data; - struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb); + const struct ebt_802_3_info *info = data; + const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb); __be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; if (info->bitmask & EBT_802_3_SAP) { @@ -40,7 +40,7 @@ static struct ebt_match filter_802_3; static int ebt_802_3_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_802_3_info *info = (struct ebt_802_3_info *)data; + const struct ebt_802_3_info *info = data; if (datalen < sizeof(struct ebt_802_3_info)) return -EINVAL; diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 6436d30a550e..318157e1565d 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -25,7 +25,7 @@ static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, const struct ebt_mac_wormhash_tuple *p; int start, limit, i; uint32_t cmp[2] = { 0, 0 }; - int key = (const unsigned char) mac[5]; + int key = ((const unsigned char *)mac)[5]; memcpy(((char *) cmp) + 2, mac, 6); start = wh->table[key]; @@ -73,15 +73,18 @@ static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash static int get_ip_dst(const struct sk_buff *skb, __be32 *addr) { if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { - struct iphdr _iph, *ih; + const struct iphdr *ih; + struct iphdr _iph; ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) return -1; *addr = ih->daddr; } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { - struct arphdr _arph, *ah; - __be32 buf, *bp; + const struct arphdr *ah; + struct arphdr _arph; + const __be32 *bp; + __be32 buf; ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL || @@ -101,15 +104,18 @@ static int get_ip_dst(const struct sk_buff *skb, __be32 *addr) static int get_ip_src(const struct sk_buff *skb, __be32 *addr) { if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { - struct iphdr _iph, *ih; + const struct iphdr *ih; + struct iphdr _iph; ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) return -1; *addr = ih->saddr; } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { - struct arphdr _arph, *ah; - __be32 buf, *bp; + const struct arphdr *ah; + struct arphdr _arph; + const __be32 *bp; + __be32 buf; ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL || @@ -130,7 +136,7 @@ static int ebt_filter_among(const struct sk_buff *skb, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_among_info *info = (struct ebt_among_info *) data; + const struct ebt_among_info *info = data; const char *dmac, *smac; const struct ebt_mac_wormhash *wh_dst, *wh_src; __be32 dip = 0, sip = 0; @@ -175,7 +181,7 @@ static int ebt_among_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_among_info *info = (struct ebt_among_info *) data; + const struct ebt_among_info *info = data; int expected_length = sizeof(struct ebt_among_info); const struct ebt_mac_wormhash *wh_dst, *wh_src; int err; diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c index 18141392a9b4..933433ede38f 100644 --- a/net/bridge/netfilter/ebt_arp.c +++ b/net/bridge/netfilter/ebt_arp.c @@ -18,8 +18,9 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_arp_info *info = (struct ebt_arp_info *)data; - struct arphdr _arph, *ah; + const struct ebt_arp_info *info = data; + const struct arphdr *ah; + struct arphdr _arph; ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL) @@ -35,7 +36,8 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in return EBT_NOMATCH; if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) { - __be32 saddr, daddr, *sap, *dap; + const __be32 *sap, *dap; + __be32 saddr, daddr; if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP)) return EBT_NOMATCH; @@ -61,7 +63,8 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in } if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) { - unsigned char _mac[ETH_ALEN], *mp; + const unsigned char *mp; + unsigned char _mac[ETH_ALEN]; uint8_t verdict, i; if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER)) @@ -100,7 +103,7 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in static int ebt_arp_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_arp_info *info = (struct ebt_arp_info *)data; + const struct ebt_arp_info *info = data; if (datalen != EBT_ALIGN(sizeof(struct ebt_arp_info))) return -EINVAL; diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 48a80e423287..7b6a8c13ccd8 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -19,10 +19,13 @@ static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data; - __be32 _sip, *siptr, _dip, *diptr; - struct arphdr _ah, *ap; - unsigned char _sha[ETH_ALEN], *shp; + struct ebt_arpreply_info *info = (void *)data; + const __be32 *siptr, *diptr; + __be32 _sip, _dip; + const struct arphdr *ap; + struct arphdr _ah; + const unsigned char *shp; + unsigned char _sha[ETH_ALEN]; ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); if (ap == NULL) @@ -58,7 +61,7 @@ static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr, static int ebt_target_reply_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data; + const struct ebt_arpreply_info *info = data; if (datalen != EBT_ALIGN(sizeof(struct ebt_arpreply_info))) return -EINVAL; diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index 74262e9a566a..6ad91609b6ad 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -18,7 +18,7 @@ static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_nat_info *info = (struct ebt_nat_info *)data; + const struct ebt_nat_info *info = data; if (skb_make_writable(skb, 0)) return NF_DROP; @@ -30,7 +30,7 @@ static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr, static int ebt_target_dnat_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_nat_info *info = (struct ebt_nat_info *)data; + const struct ebt_nat_info *info = data; if (BASE_CHAIN && info->target == EBT_RETURN) return -EINVAL; diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index 69f7f0ab9c76..82934f9b1e02 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -28,9 +28,11 @@ static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_ip_info *info = (struct ebt_ip_info *)data; - struct iphdr _iph, *ih; - struct tcpudphdr _ports, *pptr; + const struct ebt_ip_info *info = data; + const struct iphdr *ih; + struct iphdr _iph; + const struct tcpudphdr *pptr; + struct tcpudphdr _ports; ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) @@ -79,7 +81,7 @@ static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in, static int ebt_ip_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_ip_info *info = (struct ebt_ip_info *)data; + const struct ebt_ip_info *info = data; if (datalen != EBT_ALIGN(sizeof(struct ebt_ip_info))) return -EINVAL; diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c index d48fa5cb26cf..2eb5cb79662d 100644 --- a/net/bridge/netfilter/ebt_limit.c +++ b/net/bridge/netfilter/ebt_limit.c @@ -69,7 +69,7 @@ user2credits(u_int32_t user) static int ebt_limit_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_limit_info *info = (struct ebt_limit_info *)data; + struct ebt_limit_info *info = data; if (datalen != EBT_ALIGN(sizeof(struct ebt_limit_info))) return -EINVAL; diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 3be9e9898553..40560d64d8c0 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -24,7 +24,7 @@ static DEFINE_SPINLOCK(ebt_log_lock); static int ebt_log_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_log_info *info = (struct ebt_log_info *)data; + struct ebt_log_info *info = data; if (datalen != EBT_ALIGN(sizeof(struct ebt_log_info))) return -EINVAL; @@ -50,7 +50,7 @@ struct arppayload unsigned char ip_dst[4]; }; -static void print_MAC(unsigned char *p) +static void print_MAC(const unsigned char *p) { int i; @@ -84,7 +84,8 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum, if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == htons(ETH_P_IP)){ - struct iphdr _iph, *ih; + const struct iphdr *ih; + struct iphdr _iph; ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) { @@ -99,7 +100,8 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum, ih->protocol == IPPROTO_UDPLITE || ih->protocol == IPPROTO_SCTP || ih->protocol == IPPROTO_DCCP) { - struct tcpudphdr _ports, *pptr; + const struct tcpudphdr *pptr; + struct tcpudphdr _ports; pptr = skb_header_pointer(skb, ih->ihl*4, sizeof(_ports), &_ports); @@ -116,7 +118,8 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum, if ((bitmask & EBT_LOG_ARP) && ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) || (eth_hdr(skb)->h_proto == htons(ETH_P_RARP)))) { - struct arphdr _arph, *ah; + const struct arphdr *ah; + struct arphdr _arph; ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL) { @@ -132,7 +135,8 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum, if (ah->ar_hrd == htons(1) && ah->ar_hln == ETH_ALEN && ah->ar_pln == sizeof(__be32)) { - struct arppayload _arpp, *ap; + const struct arppayload *ap; + struct arppayload _arpp; ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); @@ -160,7 +164,7 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_log_info *info = (struct ebt_log_info *)data; + const struct ebt_log_info *info = data; struct nf_loginfo li; li.type = NF_LOG_TYPE_LOG; diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 6cba54309c09..6fe93dfee9b0 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -21,7 +21,7 @@ static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; + const struct ebt_mark_t_info *info = data; int action = info->target & -16; if (action == MARK_SET_VALUE) @@ -39,7 +39,7 @@ static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr, static int ebt_target_mark_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; + const struct ebt_mark_t_info *info = data; int tmp; if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info))) diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c index 6b0d2169af74..0acab0917a63 100644 --- a/net/bridge/netfilter/ebt_mark_m.c +++ b/net/bridge/netfilter/ebt_mark_m.c @@ -16,7 +16,7 @@ static int ebt_filter_mark(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_mark_m_info *info = (struct ebt_mark_m_info *) data; + const struct ebt_mark_m_info *info = data; if (info->bitmask & EBT_MARK_OR) return !(!!(skb->mark & info->mask) ^ info->invert); @@ -26,7 +26,7 @@ static int ebt_filter_mark(const struct sk_buff *skb, static int ebt_mark_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_mark_m_info *info = (struct ebt_mark_m_info *) data; + const struct ebt_mark_m_info *info = data; if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_m_info))) return -EINVAL; diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c index 4fffd70e4da7..a15cf061bafb 100644 --- a/net/bridge/netfilter/ebt_pkttype.c +++ b/net/bridge/netfilter/ebt_pkttype.c @@ -18,7 +18,7 @@ static int ebt_filter_pkttype(const struct sk_buff *skb, const void *data, unsigned int datalen) { - struct ebt_pkttype_info *info = (struct ebt_pkttype_info *)data; + const struct ebt_pkttype_info *info = data; return (skb->pkt_type != info->pkt_type) ^ info->invert; } @@ -26,7 +26,7 @@ static int ebt_filter_pkttype(const struct sk_buff *skb, static int ebt_pkttype_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_pkttype_info *info = (struct ebt_pkttype_info *)data; + const struct ebt_pkttype_info *info = data; if (datalen != EBT_ALIGN(sizeof(struct ebt_pkttype_info))) return -EINVAL; diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 422cb834cff9..c1f9ca293e9c 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -19,7 +19,7 @@ static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_redirect_info *info = (struct ebt_redirect_info *)data; + const struct ebt_redirect_info *info = data; if (skb_make_writable(skb, 0)) return NF_DROP; @@ -36,7 +36,7 @@ static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr, static int ebt_target_redirect_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_redirect_info *info = (struct ebt_redirect_info *)data; + const struct ebt_redirect_info *info = data; if (datalen != EBT_ALIGN(sizeof(struct ebt_redirect_info))) return -EINVAL; diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index 425ac920904d..6bc263c58981 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -20,7 +20,7 @@ static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_nat_info *info = (struct ebt_nat_info *) data; + const struct ebt_nat_info *info = data; if (skb_make_writable(skb, 0)) return NF_DROP; @@ -28,7 +28,8 @@ static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr, memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN); if (!(info->target & NAT_ARP_BIT) && eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { - struct arphdr _ah, *ap; + const struct arphdr *ap; + struct arphdr _ah; ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); if (ap == NULL) @@ -45,7 +46,7 @@ out: static int ebt_target_snat_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_nat_info *info = (struct ebt_nat_info *) data; + const struct ebt_nat_info *info = data; int tmp; if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info))) diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 31b77367319c..fe323c4db58e 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -40,10 +40,10 @@ struct stp_config_pdu { #define NR16(p) (p[0] << 8 | p[1]) #define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) -static int ebt_filter_config(struct ebt_stp_info *info, - struct stp_config_pdu *stpc) +static int ebt_filter_config(const struct ebt_stp_info *info, + const struct stp_config_pdu *stpc) { - struct ebt_stp_config_info *c; + const struct ebt_stp_config_info *c; uint16_t v16; uint32_t v32; int verdict, i; @@ -122,9 +122,10 @@ static int ebt_filter_config(struct ebt_stp_info *info, static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_stp_info *info = (struct ebt_stp_info *)data; - struct stp_header _stph, *sp; - uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; + const struct ebt_stp_info *info = data; + const struct stp_header *sp; + struct stp_header _stph; + const uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph); if (sp == NULL) @@ -140,7 +141,8 @@ static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in if (sp->type == BPDU_TYPE_CONFIG && info->bitmask & EBT_STP_CONFIG_MASK) { - struct stp_config_pdu _stpc, *st; + const struct stp_config_pdu *st; + struct stp_config_pdu _stpc; st = skb_header_pointer(skb, sizeof(_stph), sizeof(_stpc), &_stpc); @@ -154,10 +156,10 @@ static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in static int ebt_stp_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_stp_info *info = (struct ebt_stp_info *)data; - int len = EBT_ALIGN(sizeof(struct ebt_stp_info)); - uint8_t bridge_ula[6] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; - uint8_t msk[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + const struct ebt_stp_info *info = data; + const unsigned int len = EBT_ALIGN(sizeof(struct ebt_stp_info)); + const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; + const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK || !(info->bitmask & EBT_STP_MASK)) diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 8e7b00b68d38..2015711d94b9 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -249,7 +249,7 @@ static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data; + const struct ebt_ulog_info *uloginfo = data; ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL); } @@ -258,7 +258,7 @@ static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, static int ebt_ulog_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data; + struct ebt_ulog_info *uloginfo = data; if (datalen != EBT_ALIGN(sizeof(struct ebt_ulog_info)) || uloginfo->nlgroup > 31) diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index 0ddf7499d496..097d06701e49 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -46,8 +46,9 @@ ebt_filter_vlan(const struct sk_buff *skb, const struct net_device *out, const void *data, unsigned int datalen) { - struct ebt_vlan_info *info = (struct ebt_vlan_info *) data; - struct vlan_hdr _frame, *fp; + const struct ebt_vlan_info *info = data; + const struct vlan_hdr *fp; + struct vlan_hdr _frame; unsigned short TCI; /* Whole TCI, given from parsed frame */ unsigned short id; /* VLAN ID, given from frame TCI */ @@ -91,7 +92,7 @@ ebt_check_vlan(const char *tablename, unsigned int hooknr, const struct ebt_entry *e, void *data, unsigned int datalen) { - struct ebt_vlan_info *info = (struct ebt_vlan_info *) data; + struct ebt_vlan_info *info = data; /* Parameters buffer overflow check */ if (datalen != EBT_ALIGN(sizeof(struct ebt_vlan_info))) { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 5976c598cc4b..8e17f65f4002 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -558,8 +558,9 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) */ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, - unsigned char *dest_hw, unsigned char *src_hw, - unsigned char *target_hw) + const unsigned char *dest_hw, + const unsigned char *src_hw, + const unsigned char *target_hw) { struct sk_buff *skb; struct arphdr *arp; @@ -672,8 +673,8 @@ void arp_xmit(struct sk_buff *skb) */ void arp_send(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, - unsigned char *dest_hw, unsigned char *src_hw, - unsigned char *target_hw) + const unsigned char *dest_hw, const unsigned char *src_hw, + const unsigned char *target_hw) { struct sk_buff *skb; -- cgit v1.2.3 From a98da11d88dbec1d5cebe2c6dbe9939ed8d13f69 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 31 Jan 2008 04:01:49 -0800 Subject: [NETFILTER]: x_tables: change xt_table_register() return value convention Switch from 0/-E to ptr/PTR_ERR convention. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 6 +++--- net/ipv4/netfilter/arp_tables.c | 7 ++++--- net/ipv4/netfilter/ip_tables.c | 7 ++++--- net/ipv6/netfilter/ip6_tables.c | 7 ++++--- net/netfilter/x_tables.c | 14 ++++++++------ 5 files changed, 23 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 90dc6ea2a683..937cebb3ab54 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -335,9 +335,9 @@ extern int xt_check_target(const struct xt_target *target, unsigned short family unsigned int size, const char *table, unsigned int hook, unsigned short proto, int inv_proto); -extern int xt_register_table(struct xt_table *table, - struct xt_table_info *bootstrap, - struct xt_table_info *newinfo); +extern struct xt_table *xt_register_table(struct xt_table *table, + struct xt_table_info *bootstrap, + struct xt_table_info *newinfo); extern void *xt_unregister_table(struct xt_table *table); extern struct xt_table_info *xt_replace_table(struct xt_table *table, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b4a810c28ac8..060de950e6ac 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1727,6 +1727,7 @@ int arpt_register_table(struct arpt_table *table, struct xt_table_info bootstrap = { 0, 0, 0, { 0 }, { 0 }, { } }; void *loc_cpu_entry; + struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) { @@ -1750,10 +1751,10 @@ int arpt_register_table(struct arpt_table *table, return ret; } - ret = xt_register_table(table, &bootstrap, newinfo); - if (ret != 0) { + new_table = xt_register_table(table, &bootstrap, newinfo); + if (IS_ERR(new_table)) { xt_free_table_info(newinfo); - return ret; + return PTR_ERR(new_table); } return 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 628a6b58ddb8..1b7c09e4a007 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2055,6 +2055,7 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) struct xt_table_info bootstrap = { 0, 0, 0, { 0 }, { 0 }, { } }; void *loc_cpu_entry; + struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) @@ -2074,10 +2075,10 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) return ret; } - ret = xt_register_table(table, &bootstrap, newinfo); - if (ret != 0) { + new_table = xt_register_table(table, &bootstrap, newinfo); + if (IS_ERR(new_table)) { xt_free_table_info(newinfo); - return ret; + return PTR_ERR(new_table); } return 0; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 92b4898344ff..f7a62398d7b7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2081,6 +2081,7 @@ int ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl) struct xt_table_info bootstrap = { 0, 0, 0, { 0 }, { 0 }, { } }; void *loc_cpu_entry; + struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) @@ -2100,10 +2101,10 @@ int ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl) return ret; } - ret = xt_register_table(table, &bootstrap, newinfo); - if (ret != 0) { + new_table = xt_register_table(table, &bootstrap, newinfo); + if (IS_ERR(new_table)) { xt_free_table_info(newinfo); - return ret; + return PTR_ERR(new_table); } return 0; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8d4fca96a4a7..d8d8637739ba 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -660,9 +660,9 @@ xt_replace_table(struct xt_table *table, } EXPORT_SYMBOL_GPL(xt_replace_table); -int xt_register_table(struct xt_table *table, - struct xt_table_info *bootstrap, - struct xt_table_info *newinfo) +struct xt_table *xt_register_table(struct xt_table *table, + struct xt_table_info *bootstrap, + struct xt_table_info *newinfo) { int ret; struct xt_table_info *private; @@ -670,7 +670,7 @@ int xt_register_table(struct xt_table *table, ret = mutex_lock_interruptible(&xt[table->af].mutex); if (ret != 0) - return ret; + goto out; /* Don't autoload: we'd eat our tail... */ list_for_each_entry(t, &xt[table->af].tables, list) { @@ -693,11 +693,13 @@ int xt_register_table(struct xt_table *table, private->initial_entries = private->number; list_add(&table->list, &xt[table->af].tables); + mutex_unlock(&xt[table->af].mutex); + return table; - ret = 0; unlock: mutex_unlock(&xt[table->af].mutex); - return ret; +out: + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(xt_register_table); -- cgit v1.2.3 From 8d870052079d255917ec4f8431f5ec102707b7af Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 31 Jan 2008 04:02:13 -0800 Subject: [NETFILTER]: x_tables: per-netns xt_tables In fact all we want is per-netns set of rules, however doing that will unnecessary complicate routines such as ipt_hook()/ipt_do_table, so make full xt_table array per-netns. Every user stubbed with init_net for a while. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 6 ++++-- include/net/net_namespace.h | 4 ++++ include/net/netns/x_tables.h | 10 ++++++++++ net/ipv4/netfilter/arp_tables.c | 12 ++++++------ net/ipv4/netfilter/ip_tables.c | 12 ++++++------ net/ipv6/netfilter/ip6_tables.c | 12 ++++++------ net/netfilter/x_tables.c | 34 ++++++++++++++++++++++++---------- 7 files changed, 60 insertions(+), 30 deletions(-) create mode 100644 include/net/netns/x_tables.h (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 937cebb3ab54..91a1dd5b9c66 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -335,7 +335,8 @@ extern int xt_check_target(const struct xt_target *target, unsigned short family unsigned int size, const char *table, unsigned int hook, unsigned short proto, int inv_proto); -extern struct xt_table *xt_register_table(struct xt_table *table, +extern struct xt_table *xt_register_table(struct net *net, + struct xt_table *table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo); extern void *xt_unregister_table(struct xt_table *table); @@ -352,7 +353,8 @@ extern struct xt_target *xt_request_find_target(int af, const char *name, extern int xt_find_revision(int af, const char *name, u8 revision, int target, int *err); -extern struct xt_table *xt_find_table_lock(int af, const char *name); +extern struct xt_table *xt_find_table_lock(struct net *net, int af, + const char *name); extern void xt_table_unlock(struct xt_table *t); extern int xt_proto_init(int af); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index b8c1d60ba9e4..28738b7d53eb 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -12,6 +12,7 @@ #include #include #include +#include struct proc_dir_entry; struct net_device; @@ -56,6 +57,9 @@ struct net { #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct netns_ipv6 ipv6; #endif +#ifdef CONFIG_NETFILTER + struct netns_xt xt; +#endif }; #ifdef CONFIG_NET diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h new file mode 100644 index 000000000000..0cb63ed2c1fc --- /dev/null +++ b/include/net/netns/x_tables.h @@ -0,0 +1,10 @@ +#ifndef __NETNS_X_TABLES_H +#define __NETNS_X_TABLES_H + +#include +#include + +struct netns_xt { + struct list_head tables[NPROTO]; +}; +#endif diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 060de950e6ac..0da50a4a6578 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -870,7 +870,7 @@ static int get_info(void __user *user, int *len, int compat) if (compat) xt_compat_lock(NF_ARP); #endif - t = try_then_request_module(xt_find_table_lock(NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, name), "arptable_%s", name); if (t && !IS_ERR(t)) { struct arpt_getinfo info; @@ -926,7 +926,7 @@ static int get_entries(struct arpt_get_entries __user *uptr, int *len) return -EINVAL; } - t = xt_find_table_lock(NF_ARP, get.name); + t = xt_find_table_lock(&init_net, NF_ARP, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", @@ -966,7 +966,7 @@ static int __do_replace(const char *name, unsigned int valid_hooks, goto out; } - t = try_then_request_module(xt_find_table_lock(NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, name), "arptable_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1132,7 +1132,7 @@ static int do_add_counters(void __user *user, unsigned int len, int compat) goto free; } - t = xt_find_table_lock(NF_ARP, name); + t = xt_find_table_lock(&init_net, NF_ARP, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1604,7 +1604,7 @@ static int compat_get_entries(struct compat_arpt_get_entries __user *uptr, } xt_compat_lock(NF_ARP); - t = xt_find_table_lock(NF_ARP, get.name); + t = xt_find_table_lock(&init_net, NF_ARP, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1751,7 +1751,7 @@ int arpt_register_table(struct arpt_table *table, return ret; } - new_table = xt_register_table(table, &bootstrap, newinfo); + new_table = xt_register_table(&init_net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { xt_free_table_info(newinfo); return PTR_ERR(new_table); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 1b7c09e4a007..bc22ea421a94 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1112,7 +1112,7 @@ static int get_info(void __user *user, int *len, int compat) if (compat) xt_compat_lock(AF_INET); #endif - t = try_then_request_module(xt_find_table_lock(AF_INET, name), + t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET, name), "iptable_%s", name); if (t && !IS_ERR(t)) { struct ipt_getinfo info; @@ -1170,7 +1170,7 @@ get_entries(struct ipt_get_entries __user *uptr, int *len) return -EINVAL; } - t = xt_find_table_lock(AF_INET, get.name); + t = xt_find_table_lock(&init_net, AF_INET, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); @@ -1208,7 +1208,7 @@ __do_replace(const char *name, unsigned int valid_hooks, goto out; } - t = try_then_request_module(xt_find_table_lock(AF_INET, name), + t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET, name), "iptable_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1383,7 +1383,7 @@ do_add_counters(void __user *user, unsigned int len, int compat) goto free; } - t = xt_find_table_lock(AF_INET, name); + t = xt_find_table_lock(&init_net, AF_INET, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1924,7 +1924,7 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) } xt_compat_lock(AF_INET); - t = xt_find_table_lock(AF_INET, get.name); + t = xt_find_table_lock(&init_net, AF_INET, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; struct xt_table_info info; @@ -2075,7 +2075,7 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) return ret; } - new_table = xt_register_table(table, &bootstrap, newinfo); + new_table = xt_register_table(&init_net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { xt_free_table_info(newinfo); return PTR_ERR(new_table); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index f7a62398d7b7..1aac3ef39414 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1138,7 +1138,7 @@ static int get_info(void __user *user, int *len, int compat) if (compat) xt_compat_lock(AF_INET6); #endif - t = try_then_request_module(xt_find_table_lock(AF_INET6, name), + t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, name), "ip6table_%s", name); if (t && !IS_ERR(t)) { struct ip6t_getinfo info; @@ -1196,7 +1196,7 @@ get_entries(struct ip6t_get_entries __user *uptr, int *len) return -EINVAL; } - t = xt_find_table_lock(AF_INET6, get.name); + t = xt_find_table_lock(&init_net, AF_INET6, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); @@ -1235,7 +1235,7 @@ __do_replace(const char *name, unsigned int valid_hooks, goto out; } - t = try_then_request_module(xt_find_table_lock(AF_INET6, name), + t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, name), "ip6table_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1410,7 +1410,7 @@ do_add_counters(void __user *user, unsigned int len, int compat) goto free; } - t = xt_find_table_lock(AF_INET6, name); + t = xt_find_table_lock(&init_net, AF_INET6, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1950,7 +1950,7 @@ compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len) } xt_compat_lock(AF_INET6); - t = xt_find_table_lock(AF_INET6, get.name); + t = xt_find_table_lock(&init_net, AF_INET6, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; struct xt_table_info info; @@ -2101,7 +2101,7 @@ int ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl) return ret; } - new_table = xt_register_table(table, &bootstrap, newinfo); + new_table = xt_register_table(&init_net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { xt_free_table_info(newinfo); return PTR_ERR(new_table); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d8d8637739ba..d62f722ccccb 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -44,7 +44,6 @@ struct xt_af { struct mutex mutex; struct list_head match; struct list_head target; - struct list_head tables; #ifdef CONFIG_COMPAT struct mutex compat_mutex; struct compat_delta *compat_offsets; @@ -597,14 +596,14 @@ void xt_free_table_info(struct xt_table_info *info) EXPORT_SYMBOL(xt_free_table_info); /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ -struct xt_table *xt_find_table_lock(int af, const char *name) +struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name) { struct xt_table *t; if (mutex_lock_interruptible(&xt[af].mutex) != 0) return ERR_PTR(-EINTR); - list_for_each_entry(t, &xt[af].tables, list) + list_for_each_entry(t, &net->xt.tables[af], list) if (strcmp(t->name, name) == 0 && try_module_get(t->me)) return t; mutex_unlock(&xt[af].mutex); @@ -660,7 +659,7 @@ xt_replace_table(struct xt_table *table, } EXPORT_SYMBOL_GPL(xt_replace_table); -struct xt_table *xt_register_table(struct xt_table *table, +struct xt_table *xt_register_table(struct net *net, struct xt_table *table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo) { @@ -673,7 +672,7 @@ struct xt_table *xt_register_table(struct xt_table *table, goto out; /* Don't autoload: we'd eat our tail... */ - list_for_each_entry(t, &xt[table->af].tables, list) { + list_for_each_entry(t, &net->xt.tables[table->af], list) { if (strcmp(t->name, table->name) == 0) { ret = -EEXIST; goto unlock; @@ -692,7 +691,7 @@ struct xt_table *xt_register_table(struct xt_table *table, /* save number of initial entries */ private->initial_entries = private->number; - list_add(&table->list, &xt[table->af].tables); + list_add(&table->list, &net->xt.tables[table->af]); mutex_unlock(&xt[table->af].mutex); return table; @@ -744,7 +743,7 @@ static struct list_head *type2list(u_int16_t af, u_int16_t type) list = &xt[af].match; break; case TABLE: - list = &xt[af].tables; + list = &init_net.xt.tables[af]; break; default: list = NULL; @@ -919,10 +918,22 @@ void xt_proto_fini(int af) } EXPORT_SYMBOL_GPL(xt_proto_fini); +static int __net_init xt_net_init(struct net *net) +{ + int i; + + for (i = 0; i < NPROTO; i++) + INIT_LIST_HEAD(&net->xt.tables[i]); + return 0; +} + +static struct pernet_operations xt_net_ops = { + .init = xt_net_init, +}; static int __init xt_init(void) { - int i; + int i, rv; xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL); if (!xt) @@ -936,13 +947,16 @@ static int __init xt_init(void) #endif INIT_LIST_HEAD(&xt[i].target); INIT_LIST_HEAD(&xt[i].match); - INIT_LIST_HEAD(&xt[i].tables); } - return 0; + rv = register_pernet_subsys(&xt_net_ops); + if (rv < 0) + kfree(xt); + return rv; } static void __exit xt_fini(void) { + unregister_pernet_subsys(&xt_net_ops); kfree(xt); } -- cgit v1.2.3 From 44d34e721e2c81ccdfb13cf34996309247ae2981 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 31 Jan 2008 04:02:44 -0800 Subject: [NETFILTER]: x_tables: return new table from {arp,ip,ip6}t_register_table() Typical table module registers xt_table structure (i.e. packet_filter) and link it to list during it. We can't use one template for it because corresponding list_head will become corrupted. We also can't unregister with template because it wasn't changed at all and thus doesn't know in which list it is. So, we duplicate template at the very first step of table registration. Table modules will save it for use during unregistration time and actual filtering. Do it at once to not screw bisection. P.S.: renaming i.e. packet_filter => __packet_filter is temporary until full netnsization of table modules is done. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_arp/arp_tables.h | 4 ++-- include/linux/netfilter_ipv4/ip_tables.h | 5 +++-- include/linux/netfilter_ipv6/ip6_tables.h | 4 ++-- net/ipv4/netfilter/arp_tables.c | 22 ++++++++++++---------- net/ipv4/netfilter/arptable_filter.c | 15 ++++++++------- net/ipv4/netfilter/ip_tables.c | 28 +++++++++++++++++----------- net/ipv4/netfilter/iptable_filter.c | 18 ++++++++++-------- net/ipv4/netfilter/iptable_mangle.c | 18 ++++++++++-------- net/ipv4/netfilter/iptable_raw.c | 18 ++++++++++-------- net/ipv4/netfilter/nf_nat_rule.c | 16 +++++++++------- net/ipv6/netfilter/ip6_tables.c | 24 ++++++++++++++---------- net/ipv6/netfilter/ip6table_filter.c | 17 +++++++++-------- net/ipv6/netfilter/ip6table_mangle.c | 17 +++++++++-------- net/ipv6/netfilter/ip6table_raw.c | 15 ++++++++------- net/netfilter/x_tables.c | 12 +++++++++++- 15 files changed, 134 insertions(+), 99 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 53dd4df27aa1..f35486b3a7ca 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -271,8 +271,8 @@ struct arpt_error xt_register_target(tgt); }) #define arpt_unregister_target(tgt) xt_unregister_target(tgt) -extern int arpt_register_table(struct arpt_table *table, - const struct arpt_replace *repl); +extern struct arpt_table *arpt_register_table(struct arpt_table *table, + const struct arpt_replace *repl); extern void arpt_unregister_table(struct arpt_table *table); extern unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 45fcad91e67b..bfc889f90276 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -244,8 +244,9 @@ ipt_get_target(struct ipt_entry *e) #include extern void ipt_init(void) __init; -extern int ipt_register_table(struct xt_table *table, - const struct ipt_replace *repl); +extern struct xt_table *ipt_register_table(struct net *net, + struct xt_table *table, + const struct ipt_replace *repl); extern void ipt_unregister_table(struct xt_table *table); /* Standard entry. */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 110801d699ee..f716c7506245 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -305,8 +305,8 @@ ip6t_get_target(struct ip6t_entry *e) #include extern void ip6t_init(void) __init; -extern int ip6t_register_table(struct xt_table *table, - const struct ip6t_replace *repl); +extern struct xt_table *ip6t_register_table(struct xt_table *table, + const struct ip6t_replace *repl); extern void ip6t_unregister_table(struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, unsigned int hook, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 0da50a4a6578..3a5afb84e69f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1719,8 +1719,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len return ret; } -int arpt_register_table(struct arpt_table *table, - const struct arpt_replace *repl) +struct arpt_table *arpt_register_table(struct arpt_table *table, + const struct arpt_replace *repl) { int ret; struct xt_table_info *newinfo; @@ -1732,7 +1732,7 @@ int arpt_register_table(struct arpt_table *table, newinfo = xt_alloc_table_info(repl->size); if (!newinfo) { ret = -ENOMEM; - return ret; + goto out; } /* choose the copy on our node/cpu */ @@ -1746,18 +1746,20 @@ int arpt_register_table(struct arpt_table *table, repl->underflow); duprintf("arpt_register_table: translate table gives %d\n", ret); - if (ret != 0) { - xt_free_table_info(newinfo); - return ret; - } + if (ret != 0) + goto out_free; new_table = xt_register_table(&init_net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { - xt_free_table_info(newinfo); - return PTR_ERR(new_table); + ret = PTR_ERR(new_table); + goto out_free; } + return new_table; - return 0; +out_free: + xt_free_table_info(newinfo); +out: + return ERR_PTR(ret); } void arpt_unregister_table(struct arpt_table *table) diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 7201511d54d2..b00321506a92 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -45,7 +45,7 @@ static struct .term = ARPT_ERROR_INIT, }; -static struct arpt_table packet_filter = { +static struct arpt_table __packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, @@ -53,6 +53,7 @@ static struct arpt_table packet_filter = { .me = THIS_MODULE, .af = NF_ARP, }; +static struct arpt_table *packet_filter; /* The work comes in here from netfilter.c */ static unsigned int arpt_hook(unsigned int hook, @@ -61,7 +62,7 @@ static unsigned int arpt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return arpt_do_table(skb, hook, in, out, &packet_filter); + return arpt_do_table(skb, hook, in, out, packet_filter); } static struct nf_hook_ops arpt_ops[] __read_mostly = { @@ -90,9 +91,9 @@ static int __init arptable_filter_init(void) int ret; /* Register table */ - ret = arpt_register_table(&packet_filter, &initial_table.repl); - if (ret < 0) - return ret; + packet_filter = arpt_register_table(&__packet_filter, &initial_table.repl); + if (IS_ERR(packet_filter)) + return PTR_ERR(packet_filter); ret = nf_register_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); if (ret < 0) @@ -100,14 +101,14 @@ static int __init arptable_filter_init(void) return ret; cleanup_table: - arpt_unregister_table(&packet_filter); + arpt_unregister_table(packet_filter); return ret; } static void __exit arptable_filter_fini(void) { nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); - arpt_unregister_table(&packet_filter); + arpt_unregister_table(packet_filter); } module_init(arptable_filter_init); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index bc22ea421a94..99dd62d93f4b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2048,7 +2048,8 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) +struct xt_table *ipt_register_table(struct net *net, struct xt_table *table, + const struct ipt_replace *repl) { int ret; struct xt_table_info *newinfo; @@ -2058,8 +2059,10 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); - if (!newinfo) - return -ENOMEM; + if (!newinfo) { + ret = -ENOMEM; + goto out; + } /* choose the copy on our node/cpu, but dont care about preemption */ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; @@ -2070,18 +2073,21 @@ int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) repl->num_entries, repl->hook_entry, repl->underflow); - if (ret != 0) { - xt_free_table_info(newinfo); - return ret; - } + if (ret != 0) + goto out_free; - new_table = xt_register_table(&init_net, table, &bootstrap, newinfo); + new_table = xt_register_table(net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { - xt_free_table_info(newinfo); - return PTR_ERR(new_table); + ret = PTR_ERR(new_table); + goto out_free; } - return 0; + return new_table; + +out_free: + xt_free_table_info(newinfo); +out: + return ERR_PTR(ret); } void ipt_unregister_table(struct xt_table *table) diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 29bb4f9fbda0..3b43ca07a26c 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -53,13 +53,14 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_filter = { +static struct xt_table __packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET, }; +static struct xt_table *packet_filter; /* The work comes in here from netfilter.c. */ static unsigned int @@ -69,7 +70,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, &packet_filter); + return ipt_do_table(skb, hook, in, out, packet_filter); } static unsigned int @@ -88,7 +89,7 @@ ipt_local_out_hook(unsigned int hook, return NF_ACCEPT; } - return ipt_do_table(skb, hook, in, out, &packet_filter); + return ipt_do_table(skb, hook, in, out, packet_filter); } static struct nf_hook_ops ipt_ops[] __read_mostly = { @@ -132,9 +133,10 @@ static int __init iptable_filter_init(void) initial_table.entries[1].target.verdict = -forward - 1; /* Register table */ - ret = ipt_register_table(&packet_filter, &initial_table.repl); - if (ret < 0) - return ret; + packet_filter = ipt_register_table(&init_net, &__packet_filter, + &initial_table.repl); + if (IS_ERR(packet_filter)) + return PTR_ERR(packet_filter); /* Register hooks */ ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); @@ -144,14 +146,14 @@ static int __init iptable_filter_init(void) return ret; cleanup_table: - ipt_unregister_table(&packet_filter); + ipt_unregister_table(packet_filter); return ret; } static void __exit iptable_filter_fini(void) { nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - ipt_unregister_table(&packet_filter); + ipt_unregister_table(packet_filter); } module_init(iptable_filter_init); diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 5c4be202430c..292f2ed4416f 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -64,13 +64,14 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_mangler = { +static struct xt_table __packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET, }; +static struct xt_table *packet_mangler; /* The work comes in here from netfilter.c. */ static unsigned int @@ -80,7 +81,7 @@ ipt_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, &packet_mangler); + return ipt_do_table(skb, hook, in, out, packet_mangler); } static unsigned int @@ -112,7 +113,7 @@ ipt_local_hook(unsigned int hook, daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(skb, hook, in, out, &packet_mangler); + ret = ipt_do_table(skb, hook, in, out, packet_mangler); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { iph = ip_hdr(skb); @@ -171,9 +172,10 @@ static int __init iptable_mangle_init(void) int ret; /* Register table */ - ret = ipt_register_table(&packet_mangler, &initial_table.repl); - if (ret < 0) - return ret; + packet_mangler = ipt_register_table(&init_net, &__packet_mangler, + &initial_table.repl); + if (IS_ERR(packet_mangler)) + return PTR_ERR(packet_mangler); /* Register hooks */ ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); @@ -183,14 +185,14 @@ static int __init iptable_mangle_init(void) return ret; cleanup_table: - ipt_unregister_table(&packet_mangler); + ipt_unregister_table(packet_mangler); return ret; } static void __exit iptable_mangle_fini(void) { nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - ipt_unregister_table(&packet_mangler); + ipt_unregister_table(packet_mangler); } module_init(iptable_mangle_init); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index dc34aa274533..dab863dd0558 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -36,13 +36,14 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_raw = { +static struct xt_table __packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET, }; +static struct xt_table *packet_raw; /* The work comes in here from netfilter.c. */ static unsigned int @@ -52,7 +53,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, &packet_raw); + return ipt_do_table(skb, hook, in, out, packet_raw); } static unsigned int @@ -70,7 +71,7 @@ ipt_local_hook(unsigned int hook, "packet.\n"); return NF_ACCEPT; } - return ipt_do_table(skb, hook, in, out, &packet_raw); + return ipt_do_table(skb, hook, in, out, packet_raw); } /* 'raw' is the very first table. */ @@ -96,9 +97,10 @@ static int __init iptable_raw_init(void) int ret; /* Register table */ - ret = ipt_register_table(&packet_raw, &initial_table.repl); - if (ret < 0) - return ret; + packet_raw = ipt_register_table(&init_net, &__packet_raw, + &initial_table.repl); + if (IS_ERR(packet_raw)) + return PTR_ERR(packet_raw); /* Register hooks */ ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); @@ -108,14 +110,14 @@ static int __init iptable_raw_init(void) return ret; cleanup_table: - ipt_unregister_table(&packet_raw); + ipt_unregister_table(packet_raw); return ret; } static void __exit iptable_raw_fini(void) { nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - ipt_unregister_table(&packet_raw); + ipt_unregister_table(packet_raw); } module_init(iptable_raw_init); diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 519182269e76..f8fda57ba20b 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -58,13 +58,14 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table nat_table = { +static struct xt_table __nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET, }; +static struct xt_table *nat_table; /* Source NAT */ static unsigned int ipt_snat_target(struct sk_buff *skb, @@ -214,7 +215,7 @@ int nf_nat_rule_find(struct sk_buff *skb, { int ret; - ret = ipt_do_table(skb, hooknum, in, out, &nat_table); + ret = ipt_do_table(skb, hooknum, in, out, nat_table); if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) @@ -248,9 +249,10 @@ int __init nf_nat_rule_init(void) { int ret; - ret = ipt_register_table(&nat_table, &nat_initial_table.repl); - if (ret != 0) - return ret; + nat_table = ipt_register_table(&init_net, &__nat_table, + &nat_initial_table.repl); + if (IS_ERR(nat_table)) + return PTR_ERR(nat_table); ret = xt_register_target(&ipt_snat_reg); if (ret != 0) goto unregister_table; @@ -264,7 +266,7 @@ int __init nf_nat_rule_init(void) unregister_snat: xt_unregister_target(&ipt_snat_reg); unregister_table: - ipt_unregister_table(&nat_table); + ipt_unregister_table(nat_table); return ret; } @@ -273,5 +275,5 @@ void nf_nat_rule_cleanup(void) { xt_unregister_target(&ipt_dnat_reg); xt_unregister_target(&ipt_snat_reg); - ipt_unregister_table(&nat_table); + ipt_unregister_table(nat_table); } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1aac3ef39414..b89f133f41d0 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2074,7 +2074,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -int ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl) +struct xt_table *ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl) { int ret; struct xt_table_info *newinfo; @@ -2084,8 +2084,10 @@ int ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl) struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); - if (!newinfo) - return -ENOMEM; + if (!newinfo) { + ret = -ENOMEM; + goto out; + } /* choose the copy on our node/cpu, but dont care about preemption */ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; @@ -2096,18 +2098,20 @@ int ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl) repl->num_entries, repl->hook_entry, repl->underflow); - if (ret != 0) { - xt_free_table_info(newinfo); - return ret; - } + if (ret != 0) + goto out_free; new_table = xt_register_table(&init_net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { - xt_free_table_info(newinfo); - return PTR_ERR(new_table); + ret = PTR_ERR(new_table); + goto out_free; } + return new_table; - return 0; +out_free: + xt_free_table_info(newinfo); +out: + return ERR_PTR(ret); } void ip6t_unregister_table(struct xt_table *table) diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 87d38d08aad0..bffd67f32359 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -51,13 +51,14 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_filter = { +static struct xt_table __packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET6, }; +static struct xt_table *packet_filter; /* The work comes in here from netfilter.c. */ static unsigned int @@ -67,7 +68,7 @@ ip6t_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, &packet_filter); + return ip6t_do_table(skb, hook, in, out, packet_filter); } static unsigned int @@ -87,7 +88,7 @@ ip6t_local_out_hook(unsigned int hook, } #endif - return ip6t_do_table(skb, hook, in, out, &packet_filter); + return ip6t_do_table(skb, hook, in, out, packet_filter); } static struct nf_hook_ops ip6t_ops[] __read_mostly = { @@ -131,9 +132,9 @@ static int __init ip6table_filter_init(void) initial_table.entries[1].target.verdict = -forward - 1; /* Register table */ - ret = ip6t_register_table(&packet_filter, &initial_table.repl); - if (ret < 0) - return ret; + packet_filter = ip6t_register_table(&__packet_filter, &initial_table.repl); + if (IS_ERR(packet_filter)) + return PTR_ERR(packet_filter); /* Register hooks */ ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); @@ -143,14 +144,14 @@ static int __init ip6table_filter_init(void) return ret; cleanup_table: - ip6t_unregister_table(&packet_filter); + ip6t_unregister_table(packet_filter); return ret; } static void __exit ip6table_filter_fini(void) { nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); - ip6t_unregister_table(&packet_filter); + ip6t_unregister_table(packet_filter); } module_init(ip6table_filter_init); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index d6082600bc5d..63d334df3b40 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -57,13 +57,14 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_mangler = { +static struct xt_table __packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET6, }; +static struct xt_table *packet_mangler; /* The work comes in here from netfilter.c. */ static unsigned int @@ -73,7 +74,7 @@ ip6t_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, &packet_mangler); + return ip6t_do_table(skb, hook, in, out, packet_mangler); } static unsigned int @@ -108,7 +109,7 @@ ip6t_local_hook(unsigned int hook, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(skb, hook, in, out, &packet_mangler); + ret = ip6t_do_table(skb, hook, in, out, packet_mangler); if (ret != NF_DROP && ret != NF_STOLEN && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) @@ -163,9 +164,9 @@ static int __init ip6table_mangle_init(void) int ret; /* Register table */ - ret = ip6t_register_table(&packet_mangler, &initial_table.repl); - if (ret < 0) - return ret; + packet_mangler = ip6t_register_table(&__packet_mangler, &initial_table.repl); + if (IS_ERR(packet_mangler)) + return PTR_ERR(packet_mangler); /* Register hooks */ ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); @@ -175,14 +176,14 @@ static int __init ip6table_mangle_init(void) return ret; cleanup_table: - ip6t_unregister_table(&packet_mangler); + ip6t_unregister_table(packet_mangler); return ret; } static void __exit ip6table_mangle_fini(void) { nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); - ip6t_unregister_table(&packet_mangler); + ip6t_unregister_table(packet_mangler); } module_init(ip6table_mangle_init); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index eccbaaa104af..7f55b236440e 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -35,13 +35,14 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_raw = { +static struct xt_table __packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET6, }; +static struct xt_table *packet_raw; /* The work comes in here from netfilter.c. */ static unsigned int @@ -51,7 +52,7 @@ ip6t_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, &packet_raw); + return ip6t_do_table(skb, hook, in, out, packet_raw); } static struct nf_hook_ops ip6t_ops[] __read_mostly = { @@ -76,9 +77,9 @@ static int __init ip6table_raw_init(void) int ret; /* Register table */ - ret = ip6t_register_table(&packet_raw, &initial_table.repl); - if (ret < 0) - return ret; + packet_raw = ip6t_register_table(&__packet_raw, &initial_table.repl); + if (IS_ERR(packet_raw)) + return PTR_ERR(packet_raw); /* Register hooks */ ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); @@ -88,14 +89,14 @@ static int __init ip6table_raw_init(void) return ret; cleanup_table: - ip6t_unregister_table(&packet_raw); + ip6t_unregister_table(packet_raw); return ret; } static void __exit ip6table_raw_fini(void) { nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); - ip6t_unregister_table(&packet_raw); + ip6t_unregister_table(packet_raw); } module_init(ip6table_raw_init); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d62f722ccccb..d7fbb1bb92ea 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -667,9 +667,16 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table, struct xt_table_info *private; struct xt_table *t; + /* Don't add one object to multiple lists. */ + table = kmemdup(table, sizeof(struct xt_table), GFP_KERNEL); + if (!table) { + ret = -ENOMEM; + goto out; + } + ret = mutex_lock_interruptible(&xt[table->af].mutex); if (ret != 0) - goto out; + goto out_free; /* Don't autoload: we'd eat our tail... */ list_for_each_entry(t, &net->xt.tables[table->af], list) { @@ -697,6 +704,8 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table, unlock: mutex_unlock(&xt[table->af].mutex); +out_free: + kfree(table); out: return ERR_PTR(ret); } @@ -710,6 +719,7 @@ void *xt_unregister_table(struct xt_table *table) private = table->private; list_del(&table->list); mutex_unlock(&xt[table->af].mutex); + kfree(table); return private; } -- cgit v1.2.3 From 9335f047fe61587ec82ff12fbb1220bcfdd32006 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 31 Jan 2008 04:03:23 -0800 Subject: [NETFILTER]: ip_tables: per-netns FILTER, MANGLE, RAW Now, iptables show and configure different set of rules in different netnss'. Filtering decisions are still made by consulting only init_net's set. Changes are identical except naming so no splitting. P.S.: one need to remove init_net checks in nf_sockopt.c and inet_create() to see the effect. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 5 +++++ net/ipv4/netfilter/iptable_filter.c | 41 ++++++++++++++++++++++++++----------- net/ipv4/netfilter/iptable_mangle.c | 41 ++++++++++++++++++++++++++----------- net/ipv4/netfilter/iptable_raw.c | 41 ++++++++++++++++++++++++++----------- 4 files changed, 92 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 15a0b052df22..aeb0c3b8df11 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -27,5 +27,10 @@ struct netns_ipv4 { struct sock *fibnl; struct netns_frags frags; +#ifdef CONFIG_NETFILTER + struct xt_table *iptable_filter; + struct xt_table *iptable_mangle; + struct xt_table *iptable_raw; +#endif }; #endif diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 3b43ca07a26c..69f3d7e6e96f 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -28,7 +28,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[3]; struct ipt_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, @@ -53,14 +53,13 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table __packet_filter = { +static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET, }; -static struct xt_table *packet_filter; /* The work comes in here from netfilter.c. */ static unsigned int @@ -70,7 +69,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, packet_filter); + return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter); } static unsigned int @@ -89,7 +88,7 @@ ipt_local_out_hook(unsigned int hook, return NF_ACCEPT; } - return ipt_do_table(skb, hook, in, out, packet_filter); + return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_filter); } static struct nf_hook_ops ipt_ops[] __read_mostly = { @@ -120,6 +119,26 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { static int forward = NF_ACCEPT; module_param(forward, bool, 0000); +static int __net_init iptable_filter_net_init(struct net *net) +{ + /* Register table */ + net->ipv4.iptable_filter = + ipt_register_table(net, &packet_filter, &initial_table.repl); + if (IS_ERR(net->ipv4.iptable_filter)) + return PTR_ERR(net->ipv4.iptable_filter); + return 0; +} + +static void __net_exit iptable_filter_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.iptable_filter); +} + +static struct pernet_operations iptable_filter_net_ops = { + .init = iptable_filter_net_init, + .exit = iptable_filter_net_exit, +}; + static int __init iptable_filter_init(void) { int ret; @@ -132,11 +151,9 @@ static int __init iptable_filter_init(void) /* Entry 1 is the FORWARD hook */ initial_table.entries[1].target.verdict = -forward - 1; - /* Register table */ - packet_filter = ipt_register_table(&init_net, &__packet_filter, - &initial_table.repl); - if (IS_ERR(packet_filter)) - return PTR_ERR(packet_filter); + ret = register_pernet_subsys(&iptable_filter_net_ops); + if (ret < 0) + return ret; /* Register hooks */ ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); @@ -146,14 +163,14 @@ static int __init iptable_filter_init(void) return ret; cleanup_table: - ipt_unregister_table(packet_filter); + unregister_pernet_subsys(&iptable_filter_net_ops); return ret; } static void __exit iptable_filter_fini(void) { nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - ipt_unregister_table(packet_filter); + unregister_pernet_subsys(&iptable_filter_net_ops); } module_init(iptable_filter_init); diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 292f2ed4416f..c55a210853a7 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -33,7 +33,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[5]; struct ipt_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, @@ -64,14 +64,13 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table __packet_mangler = { +static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET, }; -static struct xt_table *packet_mangler; /* The work comes in here from netfilter.c. */ static unsigned int @@ -81,7 +80,7 @@ ipt_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, packet_mangler); + return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle); } static unsigned int @@ -113,7 +112,7 @@ ipt_local_hook(unsigned int hook, daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(skb, hook, in, out, packet_mangler); + ret = ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_mangle); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { iph = ip_hdr(skb); @@ -167,15 +166,33 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { }, }; +static int __net_init iptable_mangle_net_init(struct net *net) +{ + /* Register table */ + net->ipv4.iptable_mangle = + ipt_register_table(net, &packet_mangler, &initial_table.repl); + if (IS_ERR(net->ipv4.iptable_mangle)) + return PTR_ERR(net->ipv4.iptable_mangle); + return 0; +} + +static void __net_exit iptable_mangle_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.iptable_mangle); +} + +static struct pernet_operations iptable_mangle_net_ops = { + .init = iptable_mangle_net_init, + .exit = iptable_mangle_net_exit, +}; + static int __init iptable_mangle_init(void) { int ret; - /* Register table */ - packet_mangler = ipt_register_table(&init_net, &__packet_mangler, - &initial_table.repl); - if (IS_ERR(packet_mangler)) - return PTR_ERR(packet_mangler); + ret = register_pernet_subsys(&iptable_mangle_net_ops); + if (ret < 0) + return ret; /* Register hooks */ ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); @@ -185,14 +202,14 @@ static int __init iptable_mangle_init(void) return ret; cleanup_table: - ipt_unregister_table(packet_mangler); + unregister_pernet_subsys(&iptable_mangle_net_ops); return ret; } static void __exit iptable_mangle_fini(void) { nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - ipt_unregister_table(packet_mangler); + unregister_pernet_subsys(&iptable_mangle_net_ops); } module_init(iptable_mangle_init); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index dab863dd0558..e41fe8ca4e1c 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -14,7 +14,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[2]; struct ipt_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, @@ -36,14 +36,13 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table __packet_raw = { +static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET, }; -static struct xt_table *packet_raw; /* The work comes in here from netfilter.c. */ static unsigned int @@ -53,7 +52,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, packet_raw); + return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw); } static unsigned int @@ -71,7 +70,7 @@ ipt_local_hook(unsigned int hook, "packet.\n"); return NF_ACCEPT; } - return ipt_do_table(skb, hook, in, out, packet_raw); + return ipt_do_table(skb, hook, in, out, init_net.ipv4.iptable_raw); } /* 'raw' is the very first table. */ @@ -92,15 +91,33 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { }, }; +static int __net_init iptable_raw_net_init(struct net *net) +{ + /* Register table */ + net->ipv4.iptable_raw = + ipt_register_table(net, &packet_raw, &initial_table.repl); + if (IS_ERR(net->ipv4.iptable_raw)) + return PTR_ERR(net->ipv4.iptable_raw); + return 0; +} + +static void __net_exit iptable_raw_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.iptable_raw); +} + +static struct pernet_operations iptable_raw_net_ops = { + .init = iptable_raw_net_init, + .exit = iptable_raw_net_exit, +}; + static int __init iptable_raw_init(void) { int ret; - /* Register table */ - packet_raw = ipt_register_table(&init_net, &__packet_raw, - &initial_table.repl); - if (IS_ERR(packet_raw)) - return PTR_ERR(packet_raw); + ret = register_pernet_subsys(&iptable_raw_net_ops); + if (ret < 0) + return ret; /* Register hooks */ ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); @@ -110,14 +127,14 @@ static int __init iptable_raw_init(void) return ret; cleanup_table: - ipt_unregister_table(packet_raw); + unregister_pernet_subsys(&iptable_raw_net_ops); return ret; } static void __exit iptable_raw_fini(void) { nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - ipt_unregister_table(packet_raw); + unregister_pernet_subsys(&iptable_raw_net_ops); } module_init(iptable_raw_init); -- cgit v1.2.3 From 336b517fdc0f92f54a3f77a2d0933f9556aa79ad Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 31 Jan 2008 04:03:45 -0800 Subject: [NETFILTER]: ip6_tables: netns preparation * Propagate netns from userspace down to xt_find_table_lock() * Register ip6 tables in netns (modules still use init_net) Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6_tables.h | 3 +- net/ipv6/netfilter/ip6_tables.c | 51 ++++++++++++++++--------------- net/ipv6/netfilter/ip6table_filter.c | 2 +- net/ipv6/netfilter/ip6table_mangle.c | 2 +- net/ipv6/netfilter/ip6table_raw.c | 2 +- 5 files changed, 32 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index f716c7506245..f2507dcc5750 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -305,7 +305,8 @@ ip6t_get_target(struct ip6t_entry *e) #include extern void ip6t_init(void) __init; -extern struct xt_table *ip6t_register_table(struct xt_table *table, +extern struct xt_table *ip6t_register_table(struct net *net, + struct xt_table *table, const struct ip6t_replace *repl); extern void ip6t_unregister_table(struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index b89f133f41d0..2453dfdc91aa 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1118,7 +1118,7 @@ static int compat_table_info(const struct xt_table_info *info, } #endif -static int get_info(void __user *user, int *len, int compat) +static int get_info(struct net *net, void __user *user, int *len, int compat) { char name[IP6T_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -1138,7 +1138,7 @@ static int get_info(void __user *user, int *len, int compat) if (compat) xt_compat_lock(AF_INET6); #endif - t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, name), + t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), "ip6table_%s", name); if (t && !IS_ERR(t)) { struct ip6t_getinfo info; @@ -1178,7 +1178,7 @@ static int get_info(void __user *user, int *len, int compat) } static int -get_entries(struct ip6t_get_entries __user *uptr, int *len) +get_entries(struct net *net, struct ip6t_get_entries __user *uptr, int *len) { int ret; struct ip6t_get_entries get; @@ -1196,7 +1196,7 @@ get_entries(struct ip6t_get_entries __user *uptr, int *len) return -EINVAL; } - t = xt_find_table_lock(&init_net, AF_INET6, get.name); + t = xt_find_table_lock(net, AF_INET6, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); @@ -1217,7 +1217,7 @@ get_entries(struct ip6t_get_entries __user *uptr, int *len) } static int -__do_replace(const char *name, unsigned int valid_hooks, +__do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, unsigned int num_counters, void __user *counters_ptr) { @@ -1235,7 +1235,7 @@ __do_replace(const char *name, unsigned int valid_hooks, goto out; } - t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, name), + t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), "ip6table_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1288,7 +1288,7 @@ __do_replace(const char *name, unsigned int valid_hooks, } static int -do_replace(void __user *user, unsigned int len) +do_replace(struct net *net, void __user *user, unsigned int len) { int ret; struct ip6t_replace tmp; @@ -1322,7 +1322,7 @@ do_replace(void __user *user, unsigned int len) duprintf("ip_tables: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) goto free_newinfo_untrans; @@ -1358,7 +1358,8 @@ add_counter_to_entry(struct ip6t_entry *e, } static int -do_add_counters(void __user *user, unsigned int len, int compat) +do_add_counters(struct net *net, void __user *user, unsigned int len, + int compat) { unsigned int i; struct xt_counters_info tmp; @@ -1410,7 +1411,7 @@ do_add_counters(void __user *user, unsigned int len, int compat) goto free; } - t = xt_find_table_lock(&init_net, AF_INET6, name); + t = xt_find_table_lock(net, AF_INET6, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1815,7 +1816,7 @@ out_unlock: } static int -compat_do_replace(void __user *user, unsigned int len) +compat_do_replace(struct net *net, void __user *user, unsigned int len) { int ret; struct compat_ip6t_replace tmp; @@ -1852,7 +1853,7 @@ compat_do_replace(void __user *user, unsigned int len) duprintf("compat_do_replace: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) goto free_newinfo_untrans; @@ -1876,11 +1877,11 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, switch (cmd) { case IP6T_SO_SET_REPLACE: - ret = compat_do_replace(user, len); + ret = compat_do_replace(sk->sk_net, user, len); break; case IP6T_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 1); + ret = do_add_counters(sk->sk_net, user, len, 1); break; default: @@ -1929,7 +1930,8 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, } static int -compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len) +compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, + int *len) { int ret; struct compat_ip6t_get_entries get; @@ -1950,7 +1952,7 @@ compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len) } xt_compat_lock(AF_INET6); - t = xt_find_table_lock(&init_net, AF_INET6, get.name); + t = xt_find_table_lock(net, AF_INET6, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1986,10 +1988,10 @@ compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IP6T_SO_GET_INFO: - ret = get_info(user, len, 1); + ret = get_info(sk->sk_net, user, len, 1); break; case IP6T_SO_GET_ENTRIES: - ret = compat_get_entries(user, len); + ret = compat_get_entries(sk->sk_net, user, len); break; default: ret = do_ip6t_get_ctl(sk, cmd, user, len); @@ -2008,11 +2010,11 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) switch (cmd) { case IP6T_SO_SET_REPLACE: - ret = do_replace(user, len); + ret = do_replace(sk->sk_net, user, len); break; case IP6T_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 0); + ret = do_add_counters(sk->sk_net, user, len, 0); break; default: @@ -2033,11 +2035,11 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IP6T_SO_GET_INFO: - ret = get_info(user, len, 0); + ret = get_info(sk->sk_net, user, len, 0); break; case IP6T_SO_GET_ENTRIES: - ret = get_entries(user, len); + ret = get_entries(sk->sk_net, user, len); break; case IP6T_SO_GET_REVISION_MATCH: @@ -2074,7 +2076,8 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -struct xt_table *ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl) +struct xt_table *ip6t_register_table(struct net *net, struct xt_table *table, + const struct ip6t_replace *repl) { int ret; struct xt_table_info *newinfo; @@ -2101,7 +2104,7 @@ struct xt_table *ip6t_register_table(struct xt_table *table, const struct ip6t_r if (ret != 0) goto out_free; - new_table = xt_register_table(&init_net, table, &bootstrap, newinfo); + new_table = xt_register_table(net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { ret = PTR_ERR(new_table); goto out_free; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index bffd67f32359..d0bf71d40cc5 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -132,7 +132,7 @@ static int __init ip6table_filter_init(void) initial_table.entries[1].target.verdict = -forward - 1; /* Register table */ - packet_filter = ip6t_register_table(&__packet_filter, &initial_table.repl); + packet_filter = ip6t_register_table(&init_net, &__packet_filter, &initial_table.repl); if (IS_ERR(packet_filter)) return PTR_ERR(packet_filter); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 63d334df3b40..abdfece4ab82 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -164,7 +164,7 @@ static int __init ip6table_mangle_init(void) int ret; /* Register table */ - packet_mangler = ip6t_register_table(&__packet_mangler, &initial_table.repl); + packet_mangler = ip6t_register_table(&init_net, &__packet_mangler, &initial_table.repl); if (IS_ERR(packet_mangler)) return PTR_ERR(packet_mangler); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 7f55b236440e..12acd6300903 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -77,7 +77,7 @@ static int __init ip6table_raw_init(void) int ret; /* Register table */ - packet_raw = ip6t_register_table(&__packet_raw, &initial_table.repl); + packet_raw = ip6t_register_table(&init_net, &__packet_raw, &initial_table.repl); if (IS_ERR(packet_raw)) return PTR_ERR(packet_raw); -- cgit v1.2.3 From 8280aa6182f03c4e27dc235ce0440bc94927dc28 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 31 Jan 2008 04:04:13 -0800 Subject: [NETFILTER]: ip6_tables: per-netns IPv6 FILTER, MANGLE, RAW Now it's possible to list and manipulate per-netns ip6tables rules. Filtering decisions are based on init_net's table so far. P.S.: remove init_net check in inet6_create() to see the effect Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 5 +++++ net/ipv6/netfilter/ip6table_filter.c | 40 ++++++++++++++++++++++++++---------- net/ipv6/netfilter/ip6table_mangle.c | 40 ++++++++++++++++++++++++++---------- net/ipv6/netfilter/ip6table_raw.c | 38 +++++++++++++++++++++++++--------- 4 files changed, 91 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 187c4248df22..1dd7de4e4195 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -31,5 +31,10 @@ struct netns_ipv6 { struct ipv6_devconf *devconf_all; struct ipv6_devconf *devconf_dflt; struct netns_frags frags; +#ifdef CONFIG_NETFILTER + struct xt_table *ip6table_filter; + struct xt_table *ip6table_mangle; + struct xt_table *ip6table_raw; +#endif }; #endif diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index d0bf71d40cc5..2d9cd095a72c 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -26,7 +26,7 @@ static struct struct ip6t_replace repl; struct ip6t_standard entries[3]; struct ip6t_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, @@ -51,14 +51,13 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table __packet_filter = { +static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET6, }; -static struct xt_table *packet_filter; /* The work comes in here from netfilter.c. */ static unsigned int @@ -68,7 +67,7 @@ ip6t_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, packet_filter); + return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_filter); } static unsigned int @@ -88,7 +87,7 @@ ip6t_local_out_hook(unsigned int hook, } #endif - return ip6t_do_table(skb, hook, in, out, packet_filter); + return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_filter); } static struct nf_hook_ops ip6t_ops[] __read_mostly = { @@ -119,6 +118,26 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { static int forward = NF_ACCEPT; module_param(forward, bool, 0000); +static int __net_init ip6table_filter_net_init(struct net *net) +{ + /* Register table */ + net->ipv6.ip6table_filter = + ip6t_register_table(net, &packet_filter, &initial_table.repl); + if (IS_ERR(net->ipv6.ip6table_filter)) + return PTR_ERR(net->ipv6.ip6table_filter); + return 0; +} + +static void __net_exit ip6table_filter_net_exit(struct net *net) +{ + ip6t_unregister_table(net->ipv6.ip6table_filter); +} + +static struct pernet_operations ip6table_filter_net_ops = { + .init = ip6table_filter_net_init, + .exit = ip6table_filter_net_exit, +}; + static int __init ip6table_filter_init(void) { int ret; @@ -131,10 +150,9 @@ static int __init ip6table_filter_init(void) /* Entry 1 is the FORWARD hook */ initial_table.entries[1].target.verdict = -forward - 1; - /* Register table */ - packet_filter = ip6t_register_table(&init_net, &__packet_filter, &initial_table.repl); - if (IS_ERR(packet_filter)) - return PTR_ERR(packet_filter); + ret = register_pernet_subsys(&ip6table_filter_net_ops); + if (ret < 0) + return ret; /* Register hooks */ ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); @@ -144,14 +162,14 @@ static int __init ip6table_filter_init(void) return ret; cleanup_table: - ip6t_unregister_table(packet_filter); + unregister_pernet_subsys(&ip6table_filter_net_ops); return ret; } static void __exit ip6table_filter_fini(void) { nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); - ip6t_unregister_table(packet_filter); + unregister_pernet_subsys(&ip6table_filter_net_ops); } module_init(ip6table_filter_init); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index abdfece4ab82..035343a90ffe 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -26,7 +26,7 @@ static struct struct ip6t_replace repl; struct ip6t_standard entries[5]; struct ip6t_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, @@ -57,14 +57,13 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table __packet_mangler = { +static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET6, }; -static struct xt_table *packet_mangler; /* The work comes in here from netfilter.c. */ static unsigned int @@ -74,7 +73,7 @@ ip6t_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, packet_mangler); + return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_mangle); } static unsigned int @@ -109,7 +108,7 @@ ip6t_local_hook(unsigned int hook, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(skb, hook, in, out, packet_mangler); + ret = ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_mangle); if (ret != NF_DROP && ret != NF_STOLEN && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) @@ -159,14 +158,33 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { }, }; +static int __net_init ip6table_mangle_net_init(struct net *net) +{ + /* Register table */ + net->ipv6.ip6table_mangle = + ip6t_register_table(net, &packet_mangler, &initial_table.repl); + if (IS_ERR(net->ipv6.ip6table_mangle)) + return PTR_ERR(net->ipv6.ip6table_mangle); + return 0; +} + +static void __net_exit ip6table_mangle_net_exit(struct net *net) +{ + ip6t_unregister_table(net->ipv6.ip6table_mangle); +} + +static struct pernet_operations ip6table_mangle_net_ops = { + .init = ip6table_mangle_net_init, + .exit = ip6table_mangle_net_exit, +}; + static int __init ip6table_mangle_init(void) { int ret; - /* Register table */ - packet_mangler = ip6t_register_table(&init_net, &__packet_mangler, &initial_table.repl); - if (IS_ERR(packet_mangler)) - return PTR_ERR(packet_mangler); + ret = register_pernet_subsys(&ip6table_mangle_net_ops); + if (ret < 0) + return ret; /* Register hooks */ ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); @@ -176,14 +194,14 @@ static int __init ip6table_mangle_init(void) return ret; cleanup_table: - ip6t_unregister_table(packet_mangler); + unregister_pernet_subsys(&ip6table_mangle_net_ops); return ret; } static void __exit ip6table_mangle_fini(void) { nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); - ip6t_unregister_table(packet_mangler); + unregister_pernet_subsys(&ip6table_mangle_net_ops); } module_init(ip6table_mangle_init); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 12acd6300903..5cd84203abfe 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -13,7 +13,7 @@ static struct struct ip6t_replace repl; struct ip6t_standard entries[2]; struct ip6t_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, @@ -35,14 +35,13 @@ static struct .term = IP6T_ERROR_INIT, /* ERROR */ }; -static struct xt_table __packet_raw = { +static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, .af = AF_INET6, }; -static struct xt_table *packet_raw; /* The work comes in here from netfilter.c. */ static unsigned int @@ -52,7 +51,7 @@ ip6t_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, packet_raw); + return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_raw); } static struct nf_hook_ops ip6t_ops[] __read_mostly = { @@ -72,14 +71,33 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { }, }; +static int __net_init ip6table_raw_net_init(struct net *net) +{ + /* Register table */ + net->ipv6.ip6table_raw = + ip6t_register_table(net, &packet_raw, &initial_table.repl); + if (IS_ERR(net->ipv6.ip6table_raw)) + return PTR_ERR(net->ipv6.ip6table_raw); + return 0; +} + +static void __net_exit ip6table_raw_net_exit(struct net *net) +{ + ip6t_unregister_table(net->ipv6.ip6table_raw); +} + +static struct pernet_operations ip6table_raw_net_ops = { + .init = ip6table_raw_net_init, + .exit = ip6table_raw_net_exit, +}; + static int __init ip6table_raw_init(void) { int ret; - /* Register table */ - packet_raw = ip6t_register_table(&init_net, &__packet_raw, &initial_table.repl); - if (IS_ERR(packet_raw)) - return PTR_ERR(packet_raw); + ret = register_pernet_subsys(&ip6table_raw_net_ops); + if (ret < 0) + return ret; /* Register hooks */ ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); @@ -89,14 +107,14 @@ static int __init ip6table_raw_init(void) return ret; cleanup_table: - ip6t_unregister_table(packet_raw); + unregister_pernet_subsys(&ip6table_raw_net_ops); return ret; } static void __exit ip6table_raw_fini(void) { nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); - ip6t_unregister_table(packet_raw); + unregister_pernet_subsys(&ip6table_raw_net_ops); } module_init(ip6table_raw_init); -- cgit v1.2.3 From 79df341ab6c0b1eab77921265ddd1b17ec4db13a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 31 Jan 2008 04:04:32 -0800 Subject: [NETFILTER]: arp_tables: netns preparation * Propagate netns from userspace. * arpt_register_table() registers table in supplied netns. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_arp/arp_tables.h | 3 +- net/ipv4/netfilter/arp_tables.c | 55 ++++++++++++++++++-------------- net/ipv4/netfilter/arptable_filter.c | 2 +- 3 files changed, 34 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index f35486b3a7ca..db223ca92c8b 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -271,7 +271,8 @@ struct arpt_error xt_register_target(tgt); }) #define arpt_unregister_target(tgt) xt_unregister_target(tgt) -extern struct arpt_table *arpt_register_table(struct arpt_table *table, +extern struct arpt_table *arpt_register_table(struct net *net, + struct arpt_table *table, const struct arpt_replace *repl); extern void arpt_unregister_table(struct arpt_table *table); extern unsigned int arpt_do_table(struct sk_buff *skb, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3a5afb84e69f..ec64b679641d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -850,7 +851,7 @@ static int compat_table_info(const struct xt_table_info *info, } #endif -static int get_info(void __user *user, int *len, int compat) +static int get_info(struct net *net, void __user *user, int *len, int compat) { char name[ARPT_TABLE_MAXNAMELEN]; struct arpt_table *t; @@ -870,7 +871,7 @@ static int get_info(void __user *user, int *len, int compat) if (compat) xt_compat_lock(NF_ARP); #endif - t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), "arptable_%s", name); if (t && !IS_ERR(t)) { struct arpt_getinfo info; @@ -908,7 +909,8 @@ static int get_info(void __user *user, int *len, int compat) return ret; } -static int get_entries(struct arpt_get_entries __user *uptr, int *len) +static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, + int *len) { int ret; struct arpt_get_entries get; @@ -926,7 +928,7 @@ static int get_entries(struct arpt_get_entries __user *uptr, int *len) return -EINVAL; } - t = xt_find_table_lock(&init_net, NF_ARP, get.name); + t = xt_find_table_lock(net, NF_ARP, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", @@ -947,7 +949,8 @@ static int get_entries(struct arpt_get_entries __user *uptr, int *len) return ret; } -static int __do_replace(const char *name, unsigned int valid_hooks, +static int __do_replace(struct net *net, const char *name, + unsigned int valid_hooks, struct xt_table_info *newinfo, unsigned int num_counters, void __user *counters_ptr) @@ -966,7 +969,7 @@ static int __do_replace(const char *name, unsigned int valid_hooks, goto out; } - t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), "arptable_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1019,7 +1022,7 @@ static int __do_replace(const char *name, unsigned int valid_hooks, return ret; } -static int do_replace(void __user *user, unsigned int len) +static int do_replace(struct net *net, void __user *user, unsigned int len) { int ret; struct arpt_replace tmp; @@ -1053,7 +1056,7 @@ static int do_replace(void __user *user, unsigned int len) duprintf("arp_tables: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) goto free_newinfo_untrans; @@ -1080,7 +1083,8 @@ static inline int add_counter_to_entry(struct arpt_entry *e, return 0; } -static int do_add_counters(void __user *user, unsigned int len, int compat) +static int do_add_counters(struct net *net, void __user *user, unsigned int len, + int compat) { unsigned int i; struct xt_counters_info tmp; @@ -1132,7 +1136,7 @@ static int do_add_counters(void __user *user, unsigned int len, int compat) goto free; } - t = xt_find_table_lock(&init_net, NF_ARP, name); + t = xt_find_table_lock(net, NF_ARP, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1435,7 +1439,8 @@ struct compat_arpt_replace { struct compat_arpt_entry entries[0]; }; -static int compat_do_replace(void __user *user, unsigned int len) +static int compat_do_replace(struct net *net, void __user *user, + unsigned int len) { int ret; struct compat_arpt_replace tmp; @@ -1471,7 +1476,7 @@ static int compat_do_replace(void __user *user, unsigned int len) duprintf("compat_do_replace: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) goto free_newinfo_untrans; @@ -1494,11 +1499,11 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, switch (cmd) { case ARPT_SO_SET_REPLACE: - ret = compat_do_replace(user, len); + ret = compat_do_replace(sk->sk_net, user, len); break; case ARPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 1); + ret = do_add_counters(sk->sk_net, user, len, 1); break; default: @@ -1584,7 +1589,8 @@ struct compat_arpt_get_entries { struct compat_arpt_entry entrytable[0]; }; -static int compat_get_entries(struct compat_arpt_get_entries __user *uptr, +static int compat_get_entries(struct net *net, + struct compat_arpt_get_entries __user *uptr, int *len) { int ret; @@ -1604,7 +1610,7 @@ static int compat_get_entries(struct compat_arpt_get_entries __user *uptr, } xt_compat_lock(NF_ARP); - t = xt_find_table_lock(&init_net, NF_ARP, get.name); + t = xt_find_table_lock(net, NF_ARP, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1641,10 +1647,10 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, switch (cmd) { case ARPT_SO_GET_INFO: - ret = get_info(user, len, 1); + ret = get_info(sk->sk_net, user, len, 1); break; case ARPT_SO_GET_ENTRIES: - ret = compat_get_entries(user, len); + ret = compat_get_entries(sk->sk_net, user, len); break; default: ret = do_arpt_get_ctl(sk, cmd, user, len); @@ -1662,11 +1668,11 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned switch (cmd) { case ARPT_SO_SET_REPLACE: - ret = do_replace(user, len); + ret = do_replace(sk->sk_net, user, len); break; case ARPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 0); + ret = do_add_counters(sk->sk_net, user, len, 0); break; default: @@ -1686,11 +1692,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len switch (cmd) { case ARPT_SO_GET_INFO: - ret = get_info(user, len, 0); + ret = get_info(sk->sk_net, user, len, 0); break; case ARPT_SO_GET_ENTRIES: - ret = get_entries(user, len); + ret = get_entries(sk->sk_net, user, len); break; case ARPT_SO_GET_REVISION_TARGET: { @@ -1719,7 +1725,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len return ret; } -struct arpt_table *arpt_register_table(struct arpt_table *table, +struct arpt_table *arpt_register_table(struct net *net, + struct arpt_table *table, const struct arpt_replace *repl) { int ret; @@ -1749,7 +1756,7 @@ struct arpt_table *arpt_register_table(struct arpt_table *table, if (ret != 0) goto out_free; - new_table = xt_register_table(&init_net, table, &bootstrap, newinfo); + new_table = xt_register_table(net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { ret = PTR_ERR(new_table); goto out_free; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index b00321506a92..1a688607fe83 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -91,7 +91,7 @@ static int __init arptable_filter_init(void) int ret; /* Register table */ - packet_filter = arpt_register_table(&__packet_filter, &initial_table.repl); + packet_filter = arpt_register_table(&init_net, &__packet_filter, &initial_table.repl); if (IS_ERR(packet_filter)) return PTR_ERR(packet_filter); -- cgit v1.2.3 From 9ea0cb2601c4747dff758a9a7a5a4a433ad527f3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 31 Jan 2008 04:05:09 -0800 Subject: [NETFILTER]: arp_tables: per-netns arp_tables FILTER Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + net/ipv4/netfilter/arptable_filter.c | 38 ++++++++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index aeb0c3b8df11..a9b4f6086294 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -31,6 +31,7 @@ struct netns_ipv4 { struct xt_table *iptable_filter; struct xt_table *iptable_mangle; struct xt_table *iptable_raw; + struct xt_table *arptable_filter; #endif }; #endif diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 1a688607fe83..4e9c496a30c2 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -20,7 +20,7 @@ static struct struct arpt_replace repl; struct arpt_standard entries[3]; struct arpt_error term; -} initial_table __initdata = { +} initial_table __net_initdata = { .repl = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, @@ -45,7 +45,7 @@ static struct .term = ARPT_ERROR_INIT, }; -static struct arpt_table __packet_filter = { +static struct arpt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, @@ -53,7 +53,6 @@ static struct arpt_table __packet_filter = { .me = THIS_MODULE, .af = NF_ARP, }; -static struct arpt_table *packet_filter; /* The work comes in here from netfilter.c */ static unsigned int arpt_hook(unsigned int hook, @@ -62,7 +61,7 @@ static unsigned int arpt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return arpt_do_table(skb, hook, in, out, packet_filter); + return arpt_do_table(skb, hook, in, out, init_net.ipv4.arptable_filter); } static struct nf_hook_ops arpt_ops[] __read_mostly = { @@ -86,14 +85,33 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = { }, }; +static int __net_init arptable_filter_net_init(struct net *net) +{ + /* Register table */ + net->ipv4.arptable_filter = + arpt_register_table(net, &packet_filter, &initial_table.repl); + if (IS_ERR(net->ipv4.arptable_filter)) + return PTR_ERR(net->ipv4.arptable_filter); + return 0; +} + +static void __net_exit arptable_filter_net_exit(struct net *net) +{ + arpt_unregister_table(net->ipv4.arptable_filter); +} + +static struct pernet_operations arptable_filter_net_ops = { + .init = arptable_filter_net_init, + .exit = arptable_filter_net_exit, +}; + static int __init arptable_filter_init(void) { int ret; - /* Register table */ - packet_filter = arpt_register_table(&init_net, &__packet_filter, &initial_table.repl); - if (IS_ERR(packet_filter)) - return PTR_ERR(packet_filter); + ret = register_pernet_subsys(&arptable_filter_net_ops); + if (ret < 0) + return ret; ret = nf_register_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); if (ret < 0) @@ -101,14 +119,14 @@ static int __init arptable_filter_init(void) return ret; cleanup_table: - arpt_unregister_table(packet_filter); + unregister_pernet_subsys(&arptable_filter_net_ops); return ret; } static void __exit arptable_filter_fini(void) { nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); - arpt_unregister_table(packet_filter); + unregister_pernet_subsys(&arptable_filter_net_ops); } module_init(arptable_filter_init); -- cgit v1.2.3 From edc26f7aaa23591c779d6d6fc833c0c96fbeb3c0 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 31 Jan 2008 04:06:38 -0800 Subject: [NETFILTER]: xt_owner: allow matching UID/GID ranges Add support for ranges to the new revision. This doesn't affect compatibility since the new revision was not released yet. Signed-off-by: Jan Engelhardt Signed-off-by: David S. Miller --- include/linux/netfilter/xt_owner.h | 4 ++-- net/netfilter/xt_owner.c | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/xt_owner.h b/include/linux/netfilter/xt_owner.h index eacd34efebd5..c84e52cfe415 100644 --- a/include/linux/netfilter/xt_owner.h +++ b/include/linux/netfilter/xt_owner.h @@ -8,8 +8,8 @@ enum { }; struct xt_owner_match_info { - u_int32_t uid; - u_int32_t gid; + u_int32_t uid_min, uid_max; + u_int32_t gid_min, gid_max; u_int8_t match, invert; }; diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index d382f9cc38b0..9059c16144c3 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -4,8 +4,8 @@ * * (C) 2000 Marc Boucher * - * Copyright © CC Computer Consultants GmbH, 2007 - * Contact: + * Copyright © CC Computer Consultants GmbH, 2007 - 2008 + * * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -102,13 +102,15 @@ owner_mt(const struct sk_buff *skb, const struct net_device *in, (XT_OWNER_UID | XT_OWNER_GID)) == 0; if (info->match & XT_OWNER_UID) - if ((filp->f_uid != info->uid) ^ - !!(info->invert & XT_OWNER_UID)) + if ((filp->f_uid >= info->uid_min && + filp->f_uid <= info->uid_max) ^ + !(info->invert & XT_OWNER_UID)) return false; if (info->match & XT_OWNER_GID) - if ((filp->f_gid != info->gid) ^ - !!(info->invert & XT_OWNER_GID)) + if ((filp->f_gid >= info->gid_min && + filp->f_gid <= info->gid_max) ^ + !(info->invert & XT_OWNER_GID)) return false; return true; -- cgit v1.2.3 From 96eb24d770381b8a257b26183f6b6c131ad51ab9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 31 Jan 2008 04:07:29 -0800 Subject: [NETFILTER]: nf_conntrack: sparse warnings The hashtable size is really unsigned so sparse complains when you pass a signed integer. Change all uses to make it consistent. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 4 ++-- net/netfilter/nf_conntrack_core.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 857d89951790..dada0411abd1 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -171,9 +171,9 @@ static inline void nf_ct_put(struct nf_conn *ct) extern int nf_ct_l3proto_try_module_get(unsigned short l3proto); extern void nf_ct_l3proto_module_put(unsigned short l3proto); -extern struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced); +extern struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced); extern void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, - int size); + unsigned int size); extern struct nf_conntrack_tuple_hash * __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 078fff0335ad..7b1f7e80f2f8 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -939,7 +939,7 @@ static int kill_all(struct nf_conn *i, void *data) return 1; } -void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, int size) +void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size) { if (vmalloced) vfree(hash); @@ -988,7 +988,7 @@ void nf_conntrack_cleanup(void) nf_conntrack_expect_fini(); } -struct hlist_head *nf_ct_alloc_hashtable(int *sizep, int *vmalloced) +struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced) { struct hlist_head *hash; unsigned int size, i; @@ -1015,8 +1015,8 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) { - int i, bucket, hashsize, vmalloced; - int old_vmalloced, old_size; + int i, bucket, vmalloced, old_vmalloced; + unsigned int hashsize, old_size; int rnd; struct hlist_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; @@ -1025,7 +1025,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) if (!nf_conntrack_htable_size) return param_set_uint(val, kp); - hashsize = simple_strtol(val, NULL, 0); + hashsize = simple_strtoul(val, NULL, 0); if (!hashsize) return -EINVAL; -- cgit v1.2.3 From b0a6363c2418c93f25dd30b8ffcd3fdd4ce23ad6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jan 2008 04:10:18 -0800 Subject: [NETFILTER]: {ip,arp,ip6}_tables: fix sparse warnings in compat code CHECK net/ipv4/netfilter/ip_tables.c net/ipv4/netfilter/ip_tables.c:1453:8: warning: incorrect type in argument 3 (different signedness) net/ipv4/netfilter/ip_tables.c:1453:8: expected int *size net/ipv4/netfilter/ip_tables.c:1453:8: got unsigned int [usertype] *size net/ipv4/netfilter/ip_tables.c:1458:44: warning: incorrect type in argument 3 (different signedness) net/ipv4/netfilter/ip_tables.c:1458:44: expected int *size net/ipv4/netfilter/ip_tables.c:1458:44: got unsigned int [usertype] *size net/ipv4/netfilter/ip_tables.c:1603:2: warning: incorrect type in argument 2 (different signedness) net/ipv4/netfilter/ip_tables.c:1603:2: expected unsigned int *i net/ipv4/netfilter/ip_tables.c:1603:2: got int * net/ipv4/netfilter/ip_tables.c:1627:8: warning: incorrect type in argument 3 (different signedness) net/ipv4/netfilter/ip_tables.c:1627:8: expected int *size net/ipv4/netfilter/ip_tables.c:1627:8: got unsigned int *size net/ipv4/netfilter/ip_tables.c:1634:40: warning: incorrect type in argument 3 (different signedness) net/ipv4/netfilter/ip_tables.c:1634:40: expected int *size net/ipv4/netfilter/ip_tables.c:1634:40: got unsigned int *size net/ipv4/netfilter/ip_tables.c:1653:8: warning: incorrect type in argument 5 (different signedness) net/ipv4/netfilter/ip_tables.c:1653:8: expected unsigned int *i net/ipv4/netfilter/ip_tables.c:1653:8: got int * net/ipv4/netfilter/ip_tables.c:1666:2: warning: incorrect type in argument 2 (different signedness) net/ipv4/netfilter/ip_tables.c:1666:2: expected unsigned int *i net/ipv4/netfilter/ip_tables.c:1666:2: got int * CHECK net/ipv4/netfilter/arp_tables.c net/ipv4/netfilter/arp_tables.c:1285:40: warning: incorrect type in argument 3 (different signedness) net/ipv4/netfilter/arp_tables.c:1285:40: expected int *size net/ipv4/netfilter/arp_tables.c:1285:40: got unsigned int *size net/ipv4/netfilter/arp_tables.c:1543:44: warning: incorrect type in argument 3 (different signedness) net/ipv4/netfilter/arp_tables.c:1543:44: expected int *size net/ipv4/netfilter/arp_tables.c:1543:44: got unsigned int [usertype] *size CHECK net/ipv6/netfilter/ip6_tables.c net/ipv6/netfilter/ip6_tables.c:1481:8: warning: incorrect type in argument 3 (different signedness) net/ipv6/netfilter/ip6_tables.c:1481:8: expected int *size net/ipv6/netfilter/ip6_tables.c:1481:8: got unsigned int [usertype] *size net/ipv6/netfilter/ip6_tables.c:1486:44: warning: incorrect type in argument 3 (different signedness) net/ipv6/netfilter/ip6_tables.c:1486:44: expected int *size net/ipv6/netfilter/ip6_tables.c:1486:44: got unsigned int [usertype] *size net/ipv6/netfilter/ip6_tables.c:1631:2: warning: incorrect type in argument 2 (different signedness) net/ipv6/netfilter/ip6_tables.c:1631:2: expected unsigned int *i net/ipv6/netfilter/ip6_tables.c:1631:2: got int * net/ipv6/netfilter/ip6_tables.c:1655:8: warning: incorrect type in argument 3 (different signedness) net/ipv6/netfilter/ip6_tables.c:1655:8: expected int *size net/ipv6/netfilter/ip6_tables.c:1655:8: got unsigned int *size net/ipv6/netfilter/ip6_tables.c:1662:40: warning: incorrect type in argument 3 (different signedness) net/ipv6/netfilter/ip6_tables.c:1662:40: expected int *size net/ipv6/netfilter/ip6_tables.c:1662:40: got unsigned int *size net/ipv6/netfilter/ip6_tables.c:1680:8: warning: incorrect type in argument 5 (different signedness) net/ipv6/netfilter/ip6_tables.c:1680:8: expected unsigned int *i net/ipv6/netfilter/ip6_tables.c:1680:8: got int * net/ipv6/netfilter/ip6_tables.c:1693:2: warning: incorrect type in argument 2 (different signedness) net/ipv6/netfilter/ip6_tables.c:1693:2: expected unsigned int *i net/ipv6/netfilter/ip6_tables.c:1693:2: got int * Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 8 ++++---- net/ipv4/netfilter/ip_tables.c | 10 ++++++---- net/ipv6/netfilter/ip6_tables.c | 10 ++++++---- net/netfilter/x_tables.c | 8 ++++---- 4 files changed, 20 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 91a1dd5b9c66..11eea39bbf7d 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -432,15 +432,15 @@ extern short xt_compat_calc_jump(int af, unsigned int offset); extern int xt_compat_match_offset(struct xt_match *match); extern int xt_compat_match_from_user(struct xt_entry_match *m, - void **dstptr, int *size); + void **dstptr, unsigned int *size); extern int xt_compat_match_to_user(struct xt_entry_match *m, - void __user **dstptr, int *size); + void __user **dstptr, unsigned int *size); extern int xt_compat_target_offset(struct xt_target *target); extern void xt_compat_target_from_user(struct xt_entry_target *t, - void **dstptr, int *size); + void **dstptr, unsigned int *size); extern int xt_compat_target_to_user(struct xt_entry_target *t, - void __user **dstptr, int *size); + void __user **dstptr, unsigned int *size); #endif /* CONFIG_COMPAT */ #endif /* __KERNEL__ */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 427bc9b3d342..a73afa1ba8b8 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1429,7 +1429,7 @@ struct compat_ipt_replace { static int compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, - compat_uint_t *size, struct xt_counters *counters, + unsigned int *size, struct xt_counters *counters, unsigned int *i) { struct ipt_entry_target *t; @@ -1476,7 +1476,7 @@ compat_find_calc_match(struct ipt_entry_match *m, const char *name, const struct ipt_ip *ip, unsigned int hookmask, - int *size, int *i) + int *size, unsigned int *i) { struct xt_match *match; @@ -1534,7 +1534,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct ipt_entry_target *t; struct xt_target *target; unsigned int entry_offset; - int ret, off, h, j; + unsigned int j; + int ret, off, h; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 @@ -1647,7 +1648,8 @@ static int compat_check_entry(struct ipt_entry *e, const char *name, unsigned int *i) { - int j, ret; + unsigned int j; + int ret; j = 0; ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6fabb73ff445..b91738ab9f4e 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1457,7 +1457,7 @@ struct compat_ip6t_replace { static int compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, - compat_uint_t *size, struct xt_counters *counters, + unsigned int *size, struct xt_counters *counters, unsigned int *i) { struct ip6t_entry_target *t; @@ -1504,7 +1504,7 @@ compat_find_calc_match(struct ip6t_entry_match *m, const char *name, const struct ip6t_ip6 *ipv6, unsigned int hookmask, - int *size, int *i) + int *size, unsigned int *i) { struct xt_match *match; @@ -1562,7 +1562,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, struct ip6t_entry_target *t; struct xt_target *target; unsigned int entry_offset; - int ret, off, h, j; + unsigned int j; + int ret, off, h; duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 @@ -1674,7 +1675,8 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, static int compat_check_entry(struct ip6t_entry *e, const char *name, unsigned int *i) { - int j, ret; + unsigned int j; + int ret; j = 0; ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d7fbb1bb92ea..cd78fc853a5d 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -399,7 +399,7 @@ int xt_compat_match_offset(struct xt_match *match) EXPORT_SYMBOL_GPL(xt_compat_match_offset); int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, - int *size) + unsigned int *size) { struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; @@ -426,7 +426,7 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, EXPORT_SYMBOL_GPL(xt_compat_match_from_user); int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr, - int *size) + unsigned int *size) { struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match __user *cm = *dstptr; @@ -493,7 +493,7 @@ int xt_compat_target_offset(struct xt_target *target) EXPORT_SYMBOL_GPL(xt_compat_target_offset); void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, - int *size) + unsigned int *size) { struct xt_target *target = t->u.kernel.target; struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; @@ -519,7 +519,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, EXPORT_SYMBOL_GPL(xt_compat_target_from_user); int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr, - int *size) + unsigned int *size) { struct xt_target *target = t->u.kernel.target; struct compat_xt_entry_target __user *ct = *dstptr; -- cgit v1.2.3 From 58a3c9bb0c69f8517c2243cd0912b3f87b4f868c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jan 2008 04:36:54 -0800 Subject: [NETFILTER]: nf_conntrack: use RCU for conntrack helpers Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_helper.h | 4 -- net/netfilter/nf_conntrack_helper.c | 58 +++++++++-------------------- net/netfilter/nf_conntrack_netlink.c | 9 ++--- 3 files changed, 21 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 2f3af00643cf..4ca125e9b3ce 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -42,13 +42,9 @@ struct nf_conntrack_helper extern struct nf_conntrack_helper * __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple); -extern struct nf_conntrack_helper * -nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple); - extern struct nf_conntrack_helper * __nf_conntrack_helper_find_byname(const char *name); -extern void nf_ct_helper_put(struct nf_conntrack_helper *helper); extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 96aa637c0932..42f781fcba64 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -28,6 +28,7 @@ #include #include +static DEFINE_MUTEX(nf_ct_helper_mutex); static struct hlist_head *nf_ct_helper_hash __read_mostly; static unsigned int nf_ct_helper_hsize __read_mostly; static unsigned int nf_ct_helper_count __read_mostly; @@ -54,42 +55,13 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) return NULL; h = helper_hash(tuple); - hlist_for_each_entry(helper, n, &nf_ct_helper_hash[h], hnode) { + hlist_for_each_entry_rcu(helper, n, &nf_ct_helper_hash[h], hnode) { if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask)) return helper; } return NULL; } - -struct nf_conntrack_helper * -nf_ct_helper_find_get(const struct nf_conntrack_tuple *tuple) -{ - struct nf_conntrack_helper *helper; - - /* need nf_conntrack_lock to assure that helper exists until - * try_module_get() is called */ - read_lock_bh(&nf_conntrack_lock); - - helper = __nf_ct_helper_find(tuple); - if (helper) { - /* need to increase module usage count to assure helper will - * not go away while the caller is e.g. busy putting a - * conntrack in the hash that uses the helper */ - if (!try_module_get(helper->me)) - helper = NULL; - } - - read_unlock_bh(&nf_conntrack_lock); - - return helper; -} -EXPORT_SYMBOL_GPL(nf_ct_helper_find_get); - -void nf_ct_helper_put(struct nf_conntrack_helper *helper) -{ - module_put(helper->me); -} -EXPORT_SYMBOL_GPL(nf_ct_helper_put); +EXPORT_SYMBOL_GPL(__nf_ct_helper_find); struct nf_conntrack_helper * __nf_conntrack_helper_find_byname(const char *name) @@ -99,7 +71,7 @@ __nf_conntrack_helper_find_byname(const char *name) unsigned int i; for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry(h, n, &nf_ct_helper_hash[i], hnode) { + hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) { if (!strcmp(h->name, name)) return h; } @@ -140,10 +112,10 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) BUG_ON(me->timeout == 0); - write_lock_bh(&nf_conntrack_lock); - hlist_add_head(&me->hnode, &nf_ct_helper_hash[h]); + mutex_lock(&nf_ct_helper_mutex); + hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); nf_ct_helper_count++; - write_unlock_bh(&nf_conntrack_lock); + mutex_unlock(&nf_ct_helper_mutex); return 0; } @@ -156,10 +128,17 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) struct hlist_node *n, *next; unsigned int i; - /* Need write lock here, to delete helper. */ - write_lock_bh(&nf_conntrack_lock); - hlist_del(&me->hnode); + mutex_lock(&nf_ct_helper_mutex); + hlist_del_rcu(&me->hnode); nf_ct_helper_count--; + mutex_unlock(&nf_ct_helper_mutex); + + /* Make sure every nothing is still using the helper unless its a + * connection in the hash. + */ + synchronize_rcu(); + + write_lock_bh(&nf_conntrack_lock); /* Get rid of expectations */ for (i = 0; i < nf_ct_expect_hsize; i++) { @@ -182,9 +161,6 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) unhelp(h, me); } write_unlock_bh(&nf_conntrack_lock); - - /* Someone could be still looking at the helper in a bh. */ - synchronize_net(); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index bdae2924d425..b6c0935e09df 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1167,11 +1167,12 @@ ctnetlink_create_conntrack(struct nlattr *cda[], ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); #endif - helper = nf_ct_helper_find_get(rtuple); + rcu_read_lock(); + helper = __nf_ct_helper_find(rtuple); if (helper) { help = nf_ct_helper_ext_add(ct, GFP_KERNEL); if (help == NULL) { - nf_ct_helper_put(helper); + rcu_read_unlock(); err = -ENOMEM; goto err; } @@ -1187,9 +1188,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); - - if (helper) - nf_ct_helper_put(helper); + rcu_read_unlock(); return 0; -- cgit v1.2.3 From 7d0742da1c8f5df3a34030f0170b30d1a052be80 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jan 2008 04:38:19 -0800 Subject: [NETFILTER]: nf_conntrack_expect: use RCU for expectation hash Use RCU for expectation hash. This doesn't buy much for conntrack runtime performance, but allows to reduce the use of nf_conntrack_lock for /proc and nf_netlink_conntrack. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_expect.h | 2 + .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 14 ++++--- net/netfilter/nf_conntrack_expect.c | 43 ++++++++++++++-------- net/netfilter/nf_conntrack_netlink.c | 7 ++-- 4 files changed, 41 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 6c3fd254c28e..cb608a1b44e5 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -49,6 +49,8 @@ struct nf_conntrack_expect /* Direction relative to the master connection. */ enum ip_conntrack_dir dir; #endif + + struct rcu_head rcu; }; #define NF_CT_EXPECT_PERMANENT 0x1 diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 543c02b74c96..2fdcd9233a03 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -191,10 +191,12 @@ struct ct_expect_iter_state { static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { struct ct_expect_iter_state *st = seq->private; + struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) - return nf_ct_expect_hash[st->bucket].first; + n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + if (n) + return n; } return NULL; } @@ -204,11 +206,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, { struct ct_expect_iter_state *st = seq->private; - head = head->next; + head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = nf_ct_expect_hash[st->bucket].first; + head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); } return head; } @@ -225,7 +227,7 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) static void *exp_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); return ct_expect_get_idx(seq, *pos); } @@ -237,7 +239,7 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void exp_seq_stop(struct seq_file *seq, void *v) { - read_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); } static int exp_seq_show(struct seq_file *s, void *v) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index e405079e5a49..a5c8ef01f925 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -50,7 +50,7 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) NF_CT_ASSERT(master_help); NF_CT_ASSERT(!timer_pending(&exp->timeout)); - hlist_del(&exp->hnode); + hlist_del_rcu(&exp->hnode); nf_ct_expect_count--; hlist_del(&exp->lnode); @@ -97,7 +97,7 @@ __nf_ct_expect_find(const struct nf_conntrack_tuple *tuple) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry_rcu(i, n, &nf_ct_expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) return i; } @@ -111,11 +111,11 @@ nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); i = __nf_ct_expect_find(tuple); - if (i) - atomic_inc(&i->use); - read_unlock_bh(&nf_conntrack_lock); + if (i && !atomic_inc_not_zero(&i->use)) + i = NULL; + rcu_read_unlock(); return i; } @@ -223,6 +223,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) new->master = me; atomic_set(&new->use, 1); + INIT_RCU_HEAD(&new->rcu); return new; } EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); @@ -278,10 +279,18 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family, } EXPORT_SYMBOL_GPL(nf_ct_expect_init); +static void nf_ct_expect_free_rcu(struct rcu_head *head) +{ + struct nf_conntrack_expect *exp; + + exp = container_of(head, struct nf_conntrack_expect, rcu); + kmem_cache_free(nf_ct_expect_cachep, exp); +} + void nf_ct_expect_put(struct nf_conntrack_expect *exp) { if (atomic_dec_and_test(&exp->use)) - kmem_cache_free(nf_ct_expect_cachep, exp); + call_rcu(&exp->rcu, nf_ct_expect_free_rcu); } EXPORT_SYMBOL_GPL(nf_ct_expect_put); @@ -295,7 +304,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) hlist_add_head(&exp->lnode, &master_help->expectations); master_help->expecting++; - hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]); + hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); nf_ct_expect_count++; setup_timer(&exp->timeout, nf_ct_expectation_timed_out, @@ -394,10 +403,12 @@ struct ct_expect_iter_state { static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { struct ct_expect_iter_state *st = seq->private; + struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - if (!hlist_empty(&nf_ct_expect_hash[st->bucket])) - return nf_ct_expect_hash[st->bucket].first; + n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + if (n) + return n; } return NULL; } @@ -407,11 +418,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, { struct ct_expect_iter_state *st = seq->private; - head = head->next; + head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = nf_ct_expect_hash[st->bucket].first; + head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); } return head; } @@ -427,9 +438,9 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) } static void *exp_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(nf_conntrack_lock) + __acquires(RCU) { - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); return ct_expect_get_idx(seq, *pos); } @@ -440,9 +451,9 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void exp_seq_stop(struct seq_file *seq, void *v) - __releases(nf_conntrack_lock) + __releases(RCU) { - read_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); } static int exp_seq_show(struct seq_file *s, void *v) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b6c0935e09df..557f47137da0 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1471,7 +1471,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_node *n; u_int8_t l3proto = nfmsg->nfgen_family; - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: @@ -1488,7 +1488,8 @@ restart: cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, 1, exp) < 0) { - atomic_inc(&exp->use); + if (!atomic_inc_not_zero(&exp->use)) + continue; cb->args[1] = (unsigned long)exp; goto out; } @@ -1499,7 +1500,7 @@ restart: } } out: - read_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); if (last) nf_ct_expect_put(last); -- cgit v1.2.3 From 76507f69c44ed199a1a68086145398459e55835d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jan 2008 04:38:38 -0800 Subject: [NETFILTER]: nf_conntrack: use RCU for conntrack hash Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 2 + .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 18 ++++--- net/netfilter/nf_conntrack_core.c | 63 ++++++++++++++-------- net/netfilter/nf_conntrack_netlink.c | 11 ++-- net/netfilter/nf_conntrack_standalone.c | 18 ++++--- net/netfilter/xt_connlimit.c | 4 +- 6 files changed, 73 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index dada0411abd1..561ae7658f55 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -129,6 +129,8 @@ struct nf_conn /* Extensions */ struct nf_ct_ext *ext; + + struct rcu_head rcu; }; static inline struct nf_conn * diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 2fdcd9233a03..0ee87edbd286 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -39,12 +39,14 @@ struct ct_iter_state { static struct hlist_node *ct_get_first(struct seq_file *seq) { struct ct_iter_state *st = seq->private; + struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - if (!hlist_empty(&nf_conntrack_hash[st->bucket])) - return nf_conntrack_hash[st->bucket].first; + n = rcu_dereference(nf_conntrack_hash[st->bucket].first); + if (n) + return n; } return NULL; } @@ -54,11 +56,11 @@ static struct hlist_node *ct_get_next(struct seq_file *seq, { struct ct_iter_state *st = seq->private; - head = head->next; + head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = nf_conntrack_hash[st->bucket].first; + head = rcu_dereference(nf_conntrack_hash[st->bucket].first); } return head; } @@ -74,8 +76,9 @@ static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) } static void *ct_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) { - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); return ct_get_idx(seq, *pos); } @@ -86,8 +89,9 @@ static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void ct_seq_stop(struct seq_file *s, void *v) + __releases(RCU) { - read_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); } static int ct_seq_show(struct seq_file *s, void *v) @@ -226,6 +230,7 @@ static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) } static void *exp_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) { rcu_read_lock(); return ct_expect_get_idx(seq, *pos); @@ -238,6 +243,7 @@ static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void exp_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) { rcu_read_unlock(); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 10256079e634..a54bfec61e79 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -166,8 +166,8 @@ static void clean_from_lists(struct nf_conn *ct) { pr_debug("clean_from_lists(%p)\n", ct); - hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); - hlist_del(&ct->tuplehash[IP_CT_DIR_REPLY].hnode); + hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); + hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode); /* Destroy all pending expectations */ nf_ct_remove_expectations(ct); @@ -253,7 +253,7 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, struct hlist_node *n; unsigned int hash = hash_conntrack(tuple); - hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) { + hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(found); @@ -271,12 +271,16 @@ struct nf_conntrack_tuple_hash * nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); h = __nf_conntrack_find(tuple, NULL); - if (h) - atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); - read_unlock_bh(&nf_conntrack_lock); + if (h) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + h = NULL; + } + rcu_read_unlock(); return h; } @@ -286,10 +290,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, unsigned int repl_hash) { - hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, - &nf_conntrack_hash[hash]); - hlist_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, - &nf_conntrack_hash[repl_hash]); + hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, + &nf_conntrack_hash[hash]); + hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, + &nf_conntrack_hash[repl_hash]); } void nf_conntrack_hash_insert(struct nf_conn *ct) @@ -392,9 +396,9 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, { struct nf_conntrack_tuple_hash *h; - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); h = __nf_conntrack_find(tuple, ignored_conntrack); - read_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); return h != NULL; } @@ -413,21 +417,23 @@ static int early_drop(unsigned int hash) unsigned int i, cnt = 0; int dropped = 0; - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); for (i = 0; i < nf_conntrack_htable_size; i++) { - hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) { + hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], + hnode) { tmp = nf_ct_tuplehash_to_ctrack(h); if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) ct = tmp; cnt++; } + + if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + ct = NULL; if (ct || cnt >= NF_CT_EVICTION_RANGE) break; hash = (hash + 1) % nf_conntrack_htable_size; } - if (ct) - atomic_inc(&ct->ct_general.use); - read_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); if (!ct) return dropped; @@ -480,17 +486,25 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, /* Don't set timer yet: wait for confirmation */ setup_timer(&conntrack->timeout, death_by_timeout, (unsigned long)conntrack); + INIT_RCU_HEAD(&conntrack->rcu); return conntrack; } EXPORT_SYMBOL_GPL(nf_conntrack_alloc); -void nf_conntrack_free(struct nf_conn *conntrack) +static void nf_conntrack_free_rcu(struct rcu_head *head) { - nf_ct_ext_free(conntrack); - kmem_cache_free(nf_conntrack_cachep, conntrack); + struct nf_conn *ct = container_of(head, struct nf_conn, rcu); + + nf_ct_ext_free(ct); + kmem_cache_free(nf_conntrack_cachep, ct); atomic_dec(&nf_conntrack_count); } + +void nf_conntrack_free(struct nf_conn *conntrack) +{ + call_rcu(&conntrack->rcu, nf_conntrack_free_rcu); +} EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification @@ -1036,12 +1050,17 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) * use a newrandom seed */ get_random_bytes(&rnd, 4); + /* Lookups in the old hash might happen in parallel, which means we + * might get false negatives during connection lookup. New connections + * created because of a false negative won't make it into the hash + * though since that required taking the lock. + */ write_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_conntrack_htable_size; i++) { while (!hlist_empty(&nf_conntrack_hash[i])) { h = hlist_entry(nf_conntrack_hash[i].first, struct nf_conntrack_tuple_hash, hnode); - hlist_del(&h->hnode); + hlist_del_rcu(&h->hnode); bucket = __hash_conntrack(&h->tuple, hashsize, rnd); hlist_add_head(&h->hnode, &hash[bucket]); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 557f47137da0..b701dcce0e69 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -545,12 +545,12 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); last = (struct nf_conn *)cb->args[1]; for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: - hlist_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]], - hnode) { + hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[cb->args[0]], + hnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); @@ -568,7 +568,8 @@ restart: cb->nlh->nlmsg_seq, IPCTNL_MSG_CT_NEW, 1, ct) < 0) { - nf_conntrack_get(&ct->ct_general); + if (!atomic_inc_not_zero(&ct->ct_general.use)) + continue; cb->args[1] = (unsigned long)ct; goto out; } @@ -584,7 +585,7 @@ restart: } } out: - read_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); if (last) nf_ct_put(last); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 28c5ae8f5625..98f0cd31150d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -58,12 +58,14 @@ struct ct_iter_state { static struct hlist_node *ct_get_first(struct seq_file *seq) { struct ct_iter_state *st = seq->private; + struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - if (!hlist_empty(&nf_conntrack_hash[st->bucket])) - return nf_conntrack_hash[st->bucket].first; + n = rcu_dereference(nf_conntrack_hash[st->bucket].first); + if (n) + return n; } return NULL; } @@ -73,11 +75,11 @@ static struct hlist_node *ct_get_next(struct seq_file *seq, { struct ct_iter_state *st = seq->private; - head = head->next; + head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = nf_conntrack_hash[st->bucket].first; + head = rcu_dereference(nf_conntrack_hash[st->bucket].first); } return head; } @@ -93,9 +95,9 @@ static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) } static void *ct_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(nf_conntrack_lock) + __acquires(RCU) { - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); return ct_get_idx(seq, *pos); } @@ -106,9 +108,9 @@ static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void ct_seq_stop(struct seq_file *s, void *v) - __releases(nf_conntrack_lock) + __releases(RCU) { - read_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); } /* return 0 on success, 1 in case of error */ diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index e00ecd974fa3..f9b59a6753ee 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -120,7 +120,7 @@ static int count_them(struct xt_connlimit_data *data, else hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)]; - read_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); /* check the saved connections */ list_for_each_entry_safe(conn, tmp, hash, list) { @@ -163,7 +163,7 @@ static int count_them(struct xt_connlimit_data *data, ++matches; } - read_unlock_bh(&nf_conntrack_lock); + rcu_read_unlock(); if (addit) { /* save the new connection in our list */ -- cgit v1.2.3 From f8ba1affa18398610e765736153fff614309ccc8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jan 2008 04:38:58 -0800 Subject: [NETFILTER]: nf_conntrack: switch rwlock to spinlock With the RCU conversion only write_lock usages of nf_conntrack_lock are left (except one read_lock that should actually use write_lock in the H.323 helper). Switch to a spinlock. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_core.h | 2 +- net/netfilter/nf_conntrack_core.c | 38 +++++++++++++++---------------- net/netfilter/nf_conntrack_expect.c | 12 +++++----- net/netfilter/nf_conntrack_h323_main.c | 4 ++-- net/netfilter/nf_conntrack_helper.c | 4 ++-- net/netfilter/nf_conntrack_netlink.c | 22 +++++++++--------- 6 files changed, 41 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 7ad0828f05cf..2b9e5713585a 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -72,7 +72,7 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l4proto *proto); extern struct hlist_head *nf_conntrack_hash; -extern rwlock_t nf_conntrack_lock ; +extern spinlock_t nf_conntrack_lock ; extern struct hlist_head unconfirmed; #endif /* _NF_CONNTRACK_CORE_H */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index a54bfec61e79..f284dddfc899 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -40,7 +40,7 @@ #define NF_CONNTRACK_VERSION "0.5.0" -DEFINE_RWLOCK(nf_conntrack_lock); +DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); /* nf_conntrack_standalone needs this */ @@ -199,7 +199,7 @@ destroy_conntrack(struct nf_conntrack *nfct) rcu_read_unlock(); - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); /* Expectations will have been removed in clean_from_lists, * except TFTP can create an expectation on the first packet, * before connection is in the list, so we need to clean here, @@ -213,7 +213,7 @@ destroy_conntrack(struct nf_conntrack *nfct) } NF_CT_STAT_INC(delete); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); if (ct->master) nf_ct_put(ct->master); @@ -236,12 +236,12 @@ static void death_by_timeout(unsigned long ul_conntrack) rcu_read_unlock(); } - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. * Otherwise we can get spurious warnings. */ NF_CT_STAT_INC(delete_list); clean_from_lists(ct); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); nf_ct_put(ct); } @@ -303,9 +303,9 @@ void nf_conntrack_hash_insert(struct nf_conn *ct) hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); __nf_conntrack_hash_insert(ct, hash, repl_hash); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); @@ -342,7 +342,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're @@ -368,7 +368,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) atomic_inc(&ct->ct_general.use); set_bit(IPS_CONFIRMED_BIT, &ct->status); NF_CT_STAT_INC(insert); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); help = nfct_help(ct); if (help && help->helper) nf_conntrack_event_cache(IPCT_HELPER, skb); @@ -383,7 +383,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) out: NF_CT_STAT_INC(insert_failed); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); return NF_DROP; } EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); @@ -538,7 +538,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, return NULL; } - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); exp = nf_ct_find_expectation(tuple); if (exp) { pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", @@ -576,7 +576,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, hlist_add_head(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); if (exp) { if (exp->expectfn) @@ -787,7 +787,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); NF_CT_ASSERT(skb); - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); /* Only update if this is not a fixed timeout */ if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) @@ -824,7 +824,7 @@ acct: } #endif - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); /* must be unlocked when calling event cache */ if (event) @@ -909,7 +909,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), struct nf_conn *ct; struct hlist_node *n; - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) { ct = nf_ct_tuplehash_to_ctrack(h); @@ -922,11 +922,11 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), if (iter(ct, data)) set_bit(IPS_DYING_BIT, &ct->status); } - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); return NULL; found: atomic_inc(&ct->ct_general.use); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); return ct; } @@ -1055,7 +1055,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) * created because of a false negative won't make it into the hash * though since that required taking the lock. */ - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_conntrack_htable_size; i++) { while (!hlist_empty(&nf_conntrack_hash[i])) { h = hlist_entry(nf_conntrack_hash[i].first, @@ -1073,7 +1073,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) nf_conntrack_vmalloc = vmalloced; nf_conntrack_hash = hash; nf_conntrack_hash_rnd = rnd; - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); return 0; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index a5c8ef01f925..e06bf0028bb1 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -65,9 +65,9 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect) { struct nf_conntrack_expect *exp = (void *)ul_expect; - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); nf_ct_unlink_expect(exp); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); nf_ct_expect_put(exp); } @@ -201,12 +201,12 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, /* Generally a bad idea to call this: could have matched already. */ void nf_ct_unexpect_related(struct nf_conntrack_expect *exp) { - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); } - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_ct_unexpect_related); @@ -355,7 +355,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) NF_CT_ASSERT(master_help); - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); if (!master_help->helper) { ret = -ESHUTDOWN; goto out; @@ -390,7 +390,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) nf_ct_expect_event(IPEXP_NEW, expect); ret = 0; out: - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); return ret; } EXPORT_SYMBOL_GPL(nf_ct_expect_related); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 872c1aa3124c..02563050cc3a 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1415,7 +1415,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, nf_ct_refresh(ct, skb, info->timeout * HZ); /* Set expect timeout */ - read_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); exp = find_expect(ct, &ct->tuplehash[dir].tuple.dst.u3, info->sig_port[!dir]); if (exp) { @@ -1425,7 +1425,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, NF_CT_DUMP_TUPLE(&exp->tuple); set_expect_timeout(exp, info->timeout); } - read_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); } return 0; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 42f781fcba64..b1fd21cc1dbc 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -138,7 +138,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) */ synchronize_rcu(); - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); /* Get rid of expectations */ for (i = 0; i < nf_ct_expect_hsize; i++) { @@ -160,7 +160,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) hlist_for_each_entry(h, n, &nf_conntrack_hash[i], hnode) unhelp(h, me); } - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b701dcce0e69..b6a8c089a075 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1220,7 +1220,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; } - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) h = __nf_conntrack_find(&otuple, NULL); else if (cda[CTA_TUPLE_REPLY]) @@ -1248,7 +1248,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, atomic_inc(&master_ct->ct_general.use); } - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) err = ctnetlink_create_conntrack(cda, @@ -1281,7 +1281,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, } out_unlock: - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); return err; } @@ -1614,10 +1614,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct nf_conn_help *m_help; /* delete all expectations for this helper */ - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); h = __nf_conntrack_helper_find_byname(name); if (!h) { - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); return -EINVAL; } for (i = 0; i < nf_ct_expect_hsize; i++) { @@ -1632,10 +1632,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, } } } - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); } else { /* This basically means we have to flush everything*/ - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, &nf_ct_expect_hash[i], @@ -1646,7 +1646,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, } } } - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); } return 0; @@ -1732,11 +1732,11 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - write_lock_bh(&nf_conntrack_lock); + spin_lock_bh(&nf_conntrack_lock); exp = __nf_ct_expect_find(&tuple); if (!exp) { - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) err = ctnetlink_create_expect(cda, u3); @@ -1746,7 +1746,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, err = -EEXIST; if (!(nlh->nlmsg_flags & NLM_F_EXCL)) err = ctnetlink_change_expect(exp, cda); - write_unlock_bh(&nf_conntrack_lock); + spin_unlock_bh(&nf_conntrack_lock); return err; } -- cgit v1.2.3 From ba419aff2cda91680e5d4d3eeff95df49bd2edec Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jan 2008 04:39:23 -0800 Subject: [NETFILTER]: nf_conntrack: optimize __nf_conntrack_find() Ignoring specific entries in __nf_conntrack_find() is only needed by NAT for nf_conntrack_tuple_taken(). Remove it from __nf_conntrack_find() and make nf_conntrack_tuple_taken() search the hash itself. Saves 54 bytes of text in the hotpath on x86_64: __nf_conntrack_find | -54 # 321 -> 267, # inlines: 3 -> 2, size inlines: 181 -> 127 nf_conntrack_tuple_taken | +305 # 15 -> 320, lexblocks: 0 -> 3, # inlines: 0 -> 3, size inlines: 0 -> 181 nf_conntrack_find_get | -2 # 90 -> 88 3 functions changed, 305 bytes added, 56 bytes removed, diff: +249 Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 3 +-- net/netfilter/nf_conntrack_core.c | 22 +++++++++++++++------- net/netfilter/nf_conntrack_netlink.c | 6 +++--- net/netfilter/xt_connlimit.c | 2 +- 4 files changed, 20 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 561ae7658f55..14e0cc8364f2 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -178,8 +178,7 @@ extern void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size); extern struct nf_conntrack_tuple_hash * -__nf_conntrack_find(const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack); +__nf_conntrack_find(const struct nf_conntrack_tuple *tuple); extern void nf_conntrack_hash_insert(struct nf_conn *ct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f284dddfc899..ce4c4ba31cb1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -246,16 +246,14 @@ static void death_by_timeout(unsigned long ul_conntrack) } struct nf_conntrack_tuple_hash * -__nf_conntrack_find(const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack) +__nf_conntrack_find(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_node *n; unsigned int hash = hash_conntrack(tuple); hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { - if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && - nf_ct_tuple_equal(tuple, &h->tuple)) { + if (nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(found); return h; } @@ -274,7 +272,7 @@ nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple) struct nf_conn *ct; rcu_read_lock(); - h = __nf_conntrack_find(tuple, NULL); + h = __nf_conntrack_find(tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) @@ -395,12 +393,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { struct nf_conntrack_tuple_hash *h; + struct hlist_node *n; + unsigned int hash = hash_conntrack(tuple); rcu_read_lock(); - h = __nf_conntrack_find(tuple, ignored_conntrack); + hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { + if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && + nf_ct_tuple_equal(tuple, &h->tuple)) { + NF_CT_STAT_INC(found); + rcu_read_unlock(); + return 1; + } + NF_CT_STAT_INC(searched); + } rcu_read_unlock(); - return h != NULL; + return 0; } EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b6a8c089a075..bf86fdd89fd0 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1222,9 +1222,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(&otuple, NULL); + h = __nf_conntrack_find(&otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(&rtuple, NULL); + h = __nf_conntrack_find(&rtuple); if (h == NULL) { struct nf_conntrack_tuple master; @@ -1239,7 +1239,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) goto out_unlock; - master_h = __nf_conntrack_find(&master, NULL); + master_h = __nf_conntrack_find(&master); if (master_h == NULL) { err = -ENOENT; goto out_unlock; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index f9b59a6753ee..3b0111933f60 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -124,7 +124,7 @@ static int count_them(struct xt_connlimit_data *data, /* check the saved connections */ list_for_each_entry_safe(conn, tmp, hash, list) { - found = __nf_conntrack_find(&conn->tuple, NULL); + found = __nf_conntrack_find(&conn->tuple); found_ct = NULL; if (found != NULL) -- cgit v1.2.3 From 380517dead6ab86d7249a1723f07f2f1b10af5f6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jan 2008 04:40:04 -0800 Subject: [NETFILTER]: nf_conntrack: avoid duplicate protocol comparison in nf_ct_tuple_equal() nf_ct_tuple_src_equal() and nf_ct_tuple_dst_equal() both compare the protocol numbers. Unfortunately gcc doesn't optimize out the second comparison, so remove it and prefix both functions with __ to indicate that they should not be used directly. Saves another 16 byte of text in __nf_conntrack_find() on x86_64: nf_conntrack_tuple_taken | -20 # 320 -> 300, size inlines: 181 -> 161 __nf_conntrack_find | -16 # 267 -> 251, size inlines: 127 -> 115 __nf_conntrack_confirm | -40 # 875 -> 835, size inlines: 570 -> 537 3 functions changed, 76 bytes removed Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_tuple.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 45cb17cdcfd0..e69ab2e87597 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -132,34 +132,33 @@ struct nf_conntrack_tuple_hash #endif /* __KERNEL__ */ -static inline int nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1, - const struct nf_conntrack_tuple *t2) +static inline int __nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) { return (t1->src.u3.all[0] == t2->src.u3.all[0] && t1->src.u3.all[1] == t2->src.u3.all[1] && t1->src.u3.all[2] == t2->src.u3.all[2] && t1->src.u3.all[3] == t2->src.u3.all[3] && t1->src.u.all == t2->src.u.all && - t1->src.l3num == t2->src.l3num && - t1->dst.protonum == t2->dst.protonum); + t1->src.l3num == t2->src.l3num); } -static inline int nf_ct_tuple_dst_equal(const struct nf_conntrack_tuple *t1, - const struct nf_conntrack_tuple *t2) +static inline int __nf_ct_tuple_dst_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) { return (t1->dst.u3.all[0] == t2->dst.u3.all[0] && t1->dst.u3.all[1] == t2->dst.u3.all[1] && t1->dst.u3.all[2] == t2->dst.u3.all[2] && t1->dst.u3.all[3] == t2->dst.u3.all[3] && t1->dst.u.all == t2->dst.u.all && - t1->src.l3num == t2->src.l3num && t1->dst.protonum == t2->dst.protonum); } static inline int nf_ct_tuple_equal(const struct nf_conntrack_tuple *t1, const struct nf_conntrack_tuple *t2) { - return nf_ct_tuple_src_equal(t1, t2) && nf_ct_tuple_dst_equal(t1, t2); + return __nf_ct_tuple_src_equal(t1, t2) && + __nf_ct_tuple_dst_equal(t1, t2); } static inline int nf_ct_tuple_mask_equal(const struct nf_conntrack_tuple_mask *m1, @@ -199,7 +198,7 @@ static inline int nf_ct_tuple_mask_cmp(const struct nf_conntrack_tuple *t, const struct nf_conntrack_tuple_mask *mask) { return nf_ct_tuple_src_mask_cmp(t, tuple, mask) && - nf_ct_tuple_dst_equal(t, tuple); + __nf_ct_tuple_dst_equal(t, tuple); } #endif /* _NF_CONNTRACK_TUPLE_H */ -- cgit v1.2.3 From ffaa9c100bd75c81744a2b7800a45daba53db0eb Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jan 2008 04:41:14 -0800 Subject: [NETFILTER]: nf_conntrack: reorder struct nf_conntrack_l4proto Reorder struct nf_conntrack_l4proto so all members used during packet processing are in the same cacheline. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_l4proto.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index fb50c217ba0a..84892cc1d603 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -23,9 +23,6 @@ struct nf_conntrack_l4proto /* L4 Protocol number. */ u_int8_t l4proto; - /* Protocol name */ - const char *name; - /* Try to fill in the third arg: dataoff is offset past network protocol hdr. Return true if possible. */ int (*pkt_to_tuple)(const struct sk_buff *skb, @@ -38,13 +35,6 @@ struct nf_conntrack_l4proto int (*invert_tuple)(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig); - /* Print out the per-protocol part of the tuple. Return like seq_* */ - int (*print_tuple)(struct seq_file *s, - const struct nf_conntrack_tuple *); - - /* Print out the private part of the conntrack. */ - int (*print_conntrack)(struct seq_file *s, const struct nf_conn *); - /* Returns verdict for packet, or -1 for invalid. */ int (*packet)(struct nf_conn *conntrack, const struct sk_buff *skb, @@ -65,6 +55,13 @@ struct nf_conntrack_l4proto enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum); + /* Print out the per-protocol part of the tuple. Return like seq_* */ + int (*print_tuple)(struct seq_file *s, + const struct nf_conntrack_tuple *); + + /* Print out the private part of the conntrack. */ + int (*print_conntrack)(struct seq_file *s, const struct nf_conn *); + /* convert protoinfo to nfnetink attributes */ int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, const struct nf_conn *ct); @@ -87,6 +84,8 @@ struct nf_conntrack_l4proto struct ctl_table *ctl_compat_table; #endif #endif + /* Protocol name */ + const char *name; /* Module (if any) which this is connected to. */ struct module *me; -- cgit v1.2.3 From c88130bcd546e73e66165f9c29113dae9facf1ec Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jan 2008 04:42:11 -0800 Subject: [NETFILTER]: nf_conntrack: naming unification Rename all "conntrack" variables to "ct" for more consistency and avoiding some overly long lines. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 4 +- include/net/netfilter/nf_conntrack_l3proto.h | 4 +- include/net/netfilter/nf_conntrack_l4proto.h | 6 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 12 +- net/ipv4/netfilter/nf_nat_proto_gre.c | 10 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 8 +- net/netfilter/nf_conntrack_core.c | 58 +++++----- net/netfilter/nf_conntrack_proto_generic.c | 6 +- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 150 ++++++++++++------------- net/netfilter/nf_conntrack_proto_udp.c | 13 +-- net/netfilter/nf_conntrack_proto_udplite.c | 13 +-- net/netfilter/nf_conntrack_standalone.c | 38 +++---- 13 files changed, 157 insertions(+), 167 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 14e0cc8364f2..bda78a286e2b 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -145,7 +145,7 @@ nf_ct_tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) /* Alter reply tuple (maybe alter helper). */ extern void -nf_conntrack_alter_reply(struct nf_conn *conntrack, +nf_conntrack_alter_reply(struct nf_conn *ct, const struct nf_conntrack_tuple *newreply); /* Is this tuple taken? (ignoring any belonging to the given @@ -218,7 +218,7 @@ static inline void nf_ct_refresh(struct nf_conn *ct, /* Update TCP window tracking data when NAT mangles the packet */ extern void nf_conntrack_tcp_update(struct sk_buff *skb, unsigned int dataoff, - struct nf_conn *conntrack, + struct nf_conn *ct, int dir); /* Fake conntrack entry for untracked connections */ diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index d5526bcce147..b886e3ae6cad 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -43,7 +43,7 @@ struct nf_conntrack_l3proto const struct nf_conntrack_tuple *); /* Returns verdict for packet, or -1 for invalid. */ - int (*packet)(struct nf_conn *conntrack, + int (*packet)(struct nf_conn *ct, const struct sk_buff *skb, enum ip_conntrack_info ctinfo); @@ -51,7 +51,7 @@ struct nf_conntrack_l3proto * Called when a new connection for this protocol found; * returns TRUE if it's OK. If so, packet() called next. */ - int (*new)(struct nf_conn *conntrack, const struct sk_buff *skb); + int (*new)(struct nf_conn *ct, const struct sk_buff *skb); /* * Called before tracking. diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 84892cc1d603..efc16eccddb1 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -36,7 +36,7 @@ struct nf_conntrack_l4proto const struct nf_conntrack_tuple *orig); /* Returns verdict for packet, or -1 for invalid. */ - int (*packet)(struct nf_conn *conntrack, + int (*packet)(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, @@ -45,11 +45,11 @@ struct nf_conntrack_l4proto /* Called when a new connection for this protocol found; * returns TRUE if it's OK. If so, packet() called next. */ - int (*new)(struct nf_conn *conntrack, const struct sk_buff *skb, + int (*new)(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff); /* Called when a conntrack entry is destroyed */ - void (*destroy)(struct nf_conn *conntrack); + void (*destroy)(struct nf_conn *ct); int (*error)(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 4004a04c5510..17217f4f9918 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -100,7 +100,7 @@ static int icmp_packet(struct nf_conn *ct, } /* Called when a new connection for this protocol found. */ -static int icmp_new(struct nf_conn *conntrack, +static int icmp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { static const u_int8_t valid_new[] = { @@ -110,15 +110,15 @@ static int icmp_new(struct nf_conn *conntrack, [ICMP_ADDRESS] = 1 }; - if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) - || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { + if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) + || !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { /* Can't create a new ICMP `conn' with this. */ pr_debug("icmp: can't create new conn with type %u\n", - conntrack->tuplehash[0].tuple.dst.u.icmp.type); - NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); + ct->tuplehash[0].tuple.dst.u.icmp.type); + NF_CT_DUMP_TUPLE(&ct->tuplehash[0].tuple); return 0; } - atomic_set(&conntrack->proto.icmp.count, 0); + atomic_set(&ct->proto.icmp.count, 0); return 1; } diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index c9dbd550df39..b887ebb42133 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -59,7 +59,7 @@ static int gre_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, - const struct nf_conn *conntrack) + const struct nf_conn *ct) { static u_int16_t key; __be16 *keyptr; @@ -67,7 +67,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, /* If there is no master conntrack we are not PPTP, do not change tuples */ - if (!conntrack->master) + if (!ct->master) return 0; if (maniptype == IP_NAT_MANIP_SRC) @@ -76,7 +76,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, keyptr = &tuple->dst.u.gre.key; if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { - pr_debug("%p: NATing GRE PPTP\n", conntrack); + pr_debug("%p: NATing GRE PPTP\n", ct); min = 1; range_size = 0xffff; } else { @@ -88,11 +88,11 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, for (i = 0; i < range_size; i++, key++) { *keyptr = htons(min + key % range_size); - if (!nf_nat_used_tuple(tuple, conntrack)) + if (!nf_nat_used_tuple(tuple, ct)) return 1; } - pr_debug("%p: no NAT mapping\n", conntrack); + pr_debug("%p: no NAT mapping\n", ct); return 0; } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index da924c6b5f06..430db1de31e3 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -101,7 +101,7 @@ static int icmpv6_packet(struct nf_conn *ct, } /* Called when a new connection for this protocol found. */ -static int icmpv6_new(struct nf_conn *conntrack, +static int icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { @@ -109,16 +109,16 @@ static int icmpv6_new(struct nf_conn *conntrack, [ICMPV6_ECHO_REQUEST - 128] = 1, [ICMPV6_NI_QUERY - 128] = 1 }; - int type = conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128; + int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128; if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) { /* Can't create a new ICMPv6 `conn' with this. */ pr_debug("icmpv6: can't create new conn with type %u\n", type + 128); - NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); + NF_CT_DUMP_TUPLE(&ct->tuplehash[0].tuple); return 0; } - atomic_set(&conntrack->proto.icmp.count, 0); + atomic_set(&ct->proto.icmp.count, 0); return 1; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 120588333c73..1a3673d55868 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -462,7 +462,7 @@ static noinline int early_drop(unsigned int hash) struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl) { - struct nf_conn *conntrack = NULL; + struct nf_conn *ct = NULL; if (unlikely(!nf_conntrack_hash_rnd_initted)) { get_random_bytes(&nf_conntrack_hash_rnd, 4); @@ -485,22 +485,21 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, } } - conntrack = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC); - if (conntrack == NULL) { + ct = kmem_cache_zalloc(nf_conntrack_cachep, GFP_ATOMIC); + if (ct == NULL) { pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); atomic_dec(&nf_conntrack_count); return ERR_PTR(-ENOMEM); } - atomic_set(&conntrack->ct_general.use, 1); - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; - conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; + atomic_set(&ct->ct_general.use, 1); + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; + ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* Don't set timer yet: wait for confirmation */ - setup_timer(&conntrack->timeout, death_by_timeout, - (unsigned long)conntrack); - INIT_RCU_HEAD(&conntrack->rcu); + setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); + INIT_RCU_HEAD(&ct->rcu); - return conntrack; + return ct; } EXPORT_SYMBOL_GPL(nf_conntrack_alloc); @@ -513,9 +512,9 @@ static void nf_conntrack_free_rcu(struct rcu_head *head) atomic_dec(&nf_conntrack_count); } -void nf_conntrack_free(struct nf_conn *conntrack) +void nf_conntrack_free(struct nf_conn *ct) { - call_rcu(&conntrack->rcu, nf_conntrack_free_rcu); + call_rcu(&ct->rcu, nf_conntrack_free_rcu); } EXPORT_SYMBOL_GPL(nf_conntrack_free); @@ -528,7 +527,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, struct sk_buff *skb, unsigned int dataoff) { - struct nf_conn *conntrack; + struct nf_conn *ct; struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_expect *exp; @@ -538,14 +537,14 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, return NULL; } - conntrack = nf_conntrack_alloc(tuple, &repl_tuple); - if (conntrack == NULL || IS_ERR(conntrack)) { + ct = nf_conntrack_alloc(tuple, &repl_tuple); + if (ct == NULL || IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); - return (struct nf_conntrack_tuple_hash *)conntrack; + return (struct nf_conntrack_tuple_hash *)ct; } - if (!l4proto->new(conntrack, skb, dataoff)) { - nf_conntrack_free(conntrack); + if (!l4proto->new(ct, skb, dataoff)) { + nf_conntrack_free(ct); pr_debug("init conntrack: can't track with proto module\n"); return NULL; } @@ -554,30 +553,30 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, exp = nf_ct_find_expectation(tuple); if (exp) { pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", - conntrack, exp); + ct, exp); /* Welcome, Mr. Bond. We've been expecting you... */ - __set_bit(IPS_EXPECTED_BIT, &conntrack->status); - conntrack->master = exp->master; + __set_bit(IPS_EXPECTED_BIT, &ct->status); + ct->master = exp->master; if (exp->helper) { - help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC); + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help) rcu_assign_pointer(help->helper, exp->helper); } #ifdef CONFIG_NF_CONNTRACK_MARK - conntrack->mark = exp->master->mark; + ct->mark = exp->master->mark; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK - conntrack->secmark = exp->master->secmark; + ct->secmark = exp->master->secmark; #endif - nf_conntrack_get(&conntrack->master->ct_general); + nf_conntrack_get(&ct->master->ct_general); NF_CT_STAT_INC(expect_new); } else { struct nf_conntrack_helper *helper; helper = __nf_ct_helper_find(&repl_tuple); if (helper) { - help = nf_ct_helper_ext_add(conntrack, GFP_ATOMIC); + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help) rcu_assign_pointer(help->helper, helper); } @@ -585,18 +584,17 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, } /* Overload tuple linked list to put us in unconfirmed list. */ - hlist_add_head(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].hnode, - &unconfirmed); + hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed); spin_unlock_bh(&nf_conntrack_lock); if (exp) { if (exp->expectfn) - exp->expectfn(conntrack, exp); + exp->expectfn(ct, exp); nf_ct_expect_put(exp); } - return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; + return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; } /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 22c5dcb6306a..55458915575f 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -41,19 +41,19 @@ static int generic_print_tuple(struct seq_file *s, } /* Returns verdict for packet, or -1 for invalid. */ -static int packet(struct nf_conn *conntrack, +static int packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, int pf, unsigned int hooknum) { - nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_generic_timeout); + nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_generic_timeout); return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ -static int new(struct nf_conn *conntrack, const struct sk_buff *skb, +static int new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { return 1; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 7017b161a14b..f9a08370dbb3 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -25,7 +25,7 @@ #include #include -/* Protects conntrack->proto.sctp */ +/* Protects ct->proto.sctp */ static DEFINE_RWLOCK(sctp_lock); /* FIXME: Examine ipfilter's timeouts and conntrack transitions more diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 8e2207535665..9807af677a56 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -26,7 +26,7 @@ #include #include -/* Protects conntrack->proto.tcp */ +/* Protects ct->proto.tcp */ static DEFINE_RWLOCK(tcp_lock); /* "Be conservative in what you do, @@ -292,13 +292,12 @@ static int tcp_print_tuple(struct seq_file *s, } /* Print out the private part of the conntrack. */ -static int tcp_print_conntrack(struct seq_file *s, - const struct nf_conn *conntrack) +static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) { enum tcp_conntrack state; read_lock_bh(&tcp_lock); - state = conntrack->proto.tcp.state; + state = ct->proto.tcp.state; read_unlock_bh(&tcp_lock); return seq_printf(s, "%s ", tcp_conntrack_names[state]); @@ -689,12 +688,12 @@ static int tcp_in_window(struct nf_conn *ct, /* Caller must linearize skb at tcp header. */ void nf_conntrack_tcp_update(struct sk_buff *skb, unsigned int dataoff, - struct nf_conn *conntrack, + struct nf_conn *ct, int dir) { struct tcphdr *tcph = (void *)skb->data + dataoff; - struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir]; - struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir]; + struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir]; + struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir]; __u32 end; end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); @@ -703,9 +702,9 @@ void nf_conntrack_tcp_update(struct sk_buff *skb, /* * We have to worry for the ack in the reply packet only... */ - if (after(end, conntrack->proto.tcp.seen[dir].td_end)) - conntrack->proto.tcp.seen[dir].td_end = end; - conntrack->proto.tcp.last_end = end; + if (after(end, ct->proto.tcp.seen[dir].td_end)) + ct->proto.tcp.seen[dir].td_end = end; + ct->proto.tcp.last_end = end; write_unlock_bh(&tcp_lock); pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", @@ -794,7 +793,7 @@ static int tcp_error(struct sk_buff *skb, } /* Returns verdict for packet, or -1 for invalid. */ -static int tcp_packet(struct nf_conn *conntrack, +static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, @@ -812,26 +811,24 @@ static int tcp_packet(struct nf_conn *conntrack, BUG_ON(th == NULL); write_lock_bh(&tcp_lock); - old_state = conntrack->proto.tcp.state; + old_state = ct->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); new_state = tcp_conntracks[dir][index][old_state]; - tuple = &conntrack->tuplehash[dir].tuple; + tuple = &ct->tuplehash[dir].tuple; switch (new_state) { case TCP_CONNTRACK_SYN_SENT: if (old_state < TCP_CONNTRACK_TIME_WAIT) break; - if ((conntrack->proto.tcp.seen[!dir].flags & - IP_CT_TCP_FLAG_CLOSE_INIT) - || (conntrack->proto.tcp.last_dir == dir - && conntrack->proto.tcp.last_index == TCP_RST_SET)) { + if ((ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_CLOSE_INIT) + || (ct->proto.tcp.last_dir == dir + && ct->proto.tcp.last_index == TCP_RST_SET)) { /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ write_unlock_bh(&tcp_lock); - if (del_timer(&conntrack->timeout)) - conntrack->timeout.function((unsigned long) - conntrack); + if (del_timer(&ct->timeout)) + ct->timeout.function((unsigned long)ct); return -NF_REPEAT; } /* Fall through */ @@ -843,10 +840,9 @@ static int tcp_packet(struct nf_conn *conntrack, * c) ACK in reply direction after initial SYN in original. */ if (index == TCP_SYNACK_SET - && conntrack->proto.tcp.last_index == TCP_SYN_SET - && conntrack->proto.tcp.last_dir != dir - && ntohl(th->ack_seq) == - conntrack->proto.tcp.last_end) { + && ct->proto.tcp.last_index == TCP_SYN_SET + && ct->proto.tcp.last_dir != dir + && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and * the server are both in sync, while the firewall is @@ -858,15 +854,14 @@ static int tcp_packet(struct nf_conn *conntrack, if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); - if (del_timer(&conntrack->timeout)) - conntrack->timeout.function((unsigned long) - conntrack); + if (del_timer(&ct->timeout)) + ct->timeout.function((unsigned long)ct); return -NF_DROP; } - conntrack->proto.tcp.last_index = index; - conntrack->proto.tcp.last_dir = dir; - conntrack->proto.tcp.last_seq = ntohl(th->seq); - conntrack->proto.tcp.last_end = + ct->proto.tcp.last_index = index; + ct->proto.tcp.last_dir = dir; + ct->proto.tcp.last_seq = ntohl(th->seq); + ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); write_unlock_bh(&tcp_lock); @@ -885,11 +880,11 @@ static int tcp_packet(struct nf_conn *conntrack, return -NF_ACCEPT; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET - && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) - && conntrack->proto.tcp.last_index == TCP_SYN_SET) - || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) - && conntrack->proto.tcp.last_index == TCP_ACK_SET)) - && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { + && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) + && ct->proto.tcp.last_index == TCP_SYN_SET) + || (!test_bit(IPS_ASSURED_BIT, &ct->status) + && ct->proto.tcp.last_index == TCP_ACK_SET)) + && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { /* RST sent to invalid SYN or ACK we had let through * at a) and c) above: * @@ -907,15 +902,15 @@ static int tcp_packet(struct nf_conn *conntrack, break; } - if (!tcp_in_window(conntrack, &conntrack->proto.tcp, dir, index, + if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, skb, dataoff, th, pf)) { write_unlock_bh(&tcp_lock); return -NF_ACCEPT; } in_window: /* From now on we have got in-window packets */ - conntrack->proto.tcp.last_index = index; - conntrack->proto.tcp.last_dir = dir; + ct->proto.tcp.last_index = index; + ct->proto.tcp.last_dir = dir; pr_debug("tcp_conntracks: "); NF_CT_DUMP_TUPLE(tuple); @@ -924,12 +919,12 @@ static int tcp_packet(struct nf_conn *conntrack, (th->fin ? 1 : 0), (th->rst ? 1 : 0), old_state, new_state); - conntrack->proto.tcp.state = new_state; + ct->proto.tcp.state = new_state; if (old_state != new_state && (new_state == TCP_CONNTRACK_FIN_WAIT || new_state == TCP_CONNTRACK_CLOSE)) - conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; - timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans + ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; + timeout = ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans ? nf_ct_tcp_timeout_max_retrans : tcp_timeouts[new_state]; write_unlock_bh(&tcp_lock); @@ -938,41 +933,40 @@ static int tcp_packet(struct nf_conn *conntrack, if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, skb); - if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { + if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* If only reply is a RST, we can consider ourselves not to have an established connection: this is a fairly common problem case, so we can delete the conntrack immediately. --RR */ if (th->rst) { - if (del_timer(&conntrack->timeout)) - conntrack->timeout.function((unsigned long) - conntrack); + if (del_timer(&ct->timeout)) + ct->timeout.function((unsigned long)ct); return NF_ACCEPT; } - } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status) + } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) && new_state == TCP_CONNTRACK_ESTABLISHED) { /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV or a valid answer for a picked up connection. */ - set_bit(IPS_ASSURED_BIT, &conntrack->status); + set_bit(IPS_ASSURED_BIT, &ct->status); nf_conntrack_event_cache(IPCT_STATUS, skb); } - nf_ct_refresh_acct(conntrack, ctinfo, skb, timeout); + nf_ct_refresh_acct(ct, ctinfo, skb, timeout); return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ -static int tcp_new(struct nf_conn *conntrack, +static int tcp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { enum tcp_conntrack new_state; struct tcphdr *th, _tcph; - struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0]; - struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1]; + struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; + struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); @@ -990,17 +984,17 @@ static int tcp_new(struct nf_conn *conntrack, if (new_state == TCP_CONNTRACK_SYN_SENT) { /* SYN packet */ - conntrack->proto.tcp.seen[0].td_end = + ct->proto.tcp.seen[0].td_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); - conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); - if (conntrack->proto.tcp.seen[0].td_maxwin == 0) - conntrack->proto.tcp.seen[0].td_maxwin = 1; - conntrack->proto.tcp.seen[0].td_maxend = - conntrack->proto.tcp.seen[0].td_end; - - tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]); - conntrack->proto.tcp.seen[1].flags = 0; + ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); + if (ct->proto.tcp.seen[0].td_maxwin == 0) + ct->proto.tcp.seen[0].td_maxwin = 1; + ct->proto.tcp.seen[0].td_maxend = + ct->proto.tcp.seen[0].td_end; + + tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); + ct->proto.tcp.seen[1].flags = 0; } else if (nf_ct_tcp_loose == 0) { /* Don't try to pick up connections. */ return 0; @@ -1010,32 +1004,32 @@ static int tcp_new(struct nf_conn *conntrack, * its history is lost for us. * Let's try to use the data from the packet. */ - conntrack->proto.tcp.seen[0].td_end = + ct->proto.tcp.seen[0].td_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); - conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); - if (conntrack->proto.tcp.seen[0].td_maxwin == 0) - conntrack->proto.tcp.seen[0].td_maxwin = 1; - conntrack->proto.tcp.seen[0].td_maxend = - conntrack->proto.tcp.seen[0].td_end + - conntrack->proto.tcp.seen[0].td_maxwin; - conntrack->proto.tcp.seen[0].td_scale = 0; + ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); + if (ct->proto.tcp.seen[0].td_maxwin == 0) + ct->proto.tcp.seen[0].td_maxwin = 1; + ct->proto.tcp.seen[0].td_maxend = + ct->proto.tcp.seen[0].td_end + + ct->proto.tcp.seen[0].td_maxwin; + ct->proto.tcp.seen[0].td_scale = 0; /* We assume SACK and liberal window checking to handle * window scaling */ - conntrack->proto.tcp.seen[0].flags = - conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | - IP_CT_TCP_FLAG_BE_LIBERAL; + ct->proto.tcp.seen[0].flags = + ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM | + IP_CT_TCP_FLAG_BE_LIBERAL; } - conntrack->proto.tcp.seen[1].td_end = 0; - conntrack->proto.tcp.seen[1].td_maxend = 0; - conntrack->proto.tcp.seen[1].td_maxwin = 1; - conntrack->proto.tcp.seen[1].td_scale = 0; + ct->proto.tcp.seen[1].td_end = 0; + ct->proto.tcp.seen[1].td_maxend = 0; + ct->proto.tcp.seen[1].td_maxwin = 1; + ct->proto.tcp.seen[1].td_scale = 0; /* tcp_packet will set them */ - conntrack->proto.tcp.state = TCP_CONNTRACK_NONE; - conntrack->proto.tcp.last_index = TCP_NONE_SET; + ct->proto.tcp.state = TCP_CONNTRACK_NONE; + ct->proto.tcp.last_index = TCP_NONE_SET; pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 384875411082..4c1e67ed63bd 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -61,7 +61,7 @@ static int udp_print_tuple(struct seq_file *s, } /* Returns verdict for packet, and may modify conntracktype */ -static int udp_packet(struct nf_conn *conntrack, +static int udp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, @@ -70,20 +70,19 @@ static int udp_packet(struct nf_conn *conntrack, { /* If we've seen traffic both ways, this is some kind of UDP stream. Extend timeout. */ - if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { - nf_ct_refresh_acct(conntrack, ctinfo, skb, - nf_ct_udp_timeout_stream); + if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { + nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream); /* Also, more likely to be important, and not a probe */ - if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) + if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_STATUS, skb); } else - nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_udp_timeout); + nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout); return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ -static int udp_new(struct nf_conn *conntrack, const struct sk_buff *skb, +static int udp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { return 1; diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 070056d9bcd6..d9e1532b45d9 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -60,7 +60,7 @@ static int udplite_print_tuple(struct seq_file *s, } /* Returns verdict for packet, and may modify conntracktype */ -static int udplite_packet(struct nf_conn *conntrack, +static int udplite_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, @@ -69,21 +69,20 @@ static int udplite_packet(struct nf_conn *conntrack, { /* If we've seen traffic both ways, this is some kind of UDP stream. Extend timeout. */ - if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { - nf_ct_refresh_acct(conntrack, ctinfo, skb, + if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { + nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout_stream); /* Also, more likely to be important, and not a probe */ - if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) + if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_STATUS, skb); } else - nf_ct_refresh_acct(conntrack, ctinfo, skb, - nf_ct_udplite_timeout); + nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout); return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ -static int udplite_new(struct nf_conn *conntrack, const struct sk_buff *skb, +static int udplite_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { return 1; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 98f0cd31150d..278b35e64d74 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -117,71 +117,71 @@ static void ct_seq_stop(struct seq_file *s, void *v) static int ct_seq_show(struct seq_file *s, void *v) { const struct nf_conntrack_tuple_hash *hash = v; - const struct nf_conn *conntrack = nf_ct_tuplehash_to_ctrack(hash); + const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; - NF_CT_ASSERT(conntrack); + NF_CT_ASSERT(ct); /* we only want to print DIR_ORIGINAL */ if (NF_CT_DIRECTION(hash)) return 0; - l3proto = __nf_ct_l3proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.src.l3num); NF_CT_ASSERT(l3proto); - l4proto = __nf_ct_l4proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + l4proto = __nf_ct_l4proto_find(ct->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.src.l3num, - conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + ct->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.protonum); NF_CT_ASSERT(l4proto); if (seq_printf(s, "%-8s %u %-8s %u %ld ", l3proto->name, - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, l4proto->name, - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum, - timer_pending(&conntrack->timeout) - ? (long)(conntrack->timeout.expires - jiffies)/HZ : 0) != 0) + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum, + timer_pending(&ct->timeout) + ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) return -ENOSPC; - if (l4proto->print_conntrack && l4proto->print_conntrack(s, conntrack)) + if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) return -ENOSPC; - if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL])) + if (seq_print_counters(s, &ct->counters[IP_CT_DIR_ORIGINAL])) return -ENOSPC; - if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) + if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) if (seq_printf(s, "[UNREPLIED] ")) return -ENOSPC; - if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, + if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l3proto, l4proto)) return -ENOSPC; - if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY])) + if (seq_print_counters(s, &ct->counters[IP_CT_DIR_REPLY])) return -ENOSPC; - if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) + if (test_bit(IPS_ASSURED_BIT, &ct->status)) if (seq_printf(s, "[ASSURED] ")) return -ENOSPC; #if defined(CONFIG_NF_CONNTRACK_MARK) - if (seq_printf(s, "mark=%u ", conntrack->mark)) + if (seq_printf(s, "mark=%u ", ct->mark)) return -ENOSPC; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK - if (seq_printf(s, "secmark=%u ", conntrack->secmark)) + if (seq_printf(s, "secmark=%u ", ct->secmark)) return -ENOSPC; #endif - if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) + if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) return -ENOSPC; return 0; -- cgit v1.2.3 From 09e410def6432458c7d7e771a1807b157f4c2577 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 31 Jan 2008 04:48:13 -0800 Subject: [NETFILTER]: xt_hashlimit match, revision 1 Introduces the xt_hashlimit match revision 1. It adds support for kernel-level inversion and grouping source and/or destination IP addresses, allowing to limit on a per-subnet basis. While this would technically obsolete xt_limit, xt_hashlimit is a more expensive due to the hashbucketing. Kernel-level inversion: Previously you had to do user-level inversion: iptables -N foo iptables -A foo -m hashlimit --hashlimit(-upto) 5/s -j RETURN iptables -A foo -j DROP iptables -A INPUT -j foo now it is simpler: iptables -A INPUT -m hashlimit --hashlimit-over 5/s -j DROP Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_hashlimit.h | 37 +++- net/netfilter/xt_hashlimit.c | 322 +++++++++++++++++++++++++++++---- 2 files changed, 318 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/xt_hashlimit.h b/include/linux/netfilter/xt_hashlimit.h index c19972e4564d..58b818ee41ca 100644 --- a/include/linux/netfilter/xt_hashlimit.h +++ b/include/linux/netfilter/xt_hashlimit.h @@ -9,13 +9,16 @@ /* details of this structure hidden by the implementation */ struct xt_hashlimit_htable; -#define XT_HASHLIMIT_HASH_DIP 0x0001 -#define XT_HASHLIMIT_HASH_DPT 0x0002 -#define XT_HASHLIMIT_HASH_SIP 0x0004 -#define XT_HASHLIMIT_HASH_SPT 0x0008 +enum { + XT_HASHLIMIT_HASH_DIP = 1 << 0, + XT_HASHLIMIT_HASH_DPT = 1 << 1, + XT_HASHLIMIT_HASH_SIP = 1 << 2, + XT_HASHLIMIT_HASH_SPT = 1 << 3, + XT_HASHLIMIT_INVERT = 1 << 4, +}; struct hashlimit_cfg { - u_int32_t mode; /* bitmask of IPT_HASHLIMIT_HASH_* */ + u_int32_t mode; /* bitmask of XT_HASHLIMIT_HASH_* */ u_int32_t avg; /* Average secs between packets * scale */ u_int32_t burst; /* Period multiplier for upper limit. */ @@ -37,4 +40,28 @@ struct xt_hashlimit_info { struct xt_hashlimit_info *master; } u; }; + +struct hashlimit_cfg1 { + u_int32_t mode; /* bitmask of XT_HASHLIMIT_HASH_* */ + u_int32_t avg; /* Average secs between packets * scale */ + u_int32_t burst; /* Period multiplier for upper limit. */ + + /* user specified */ + u_int32_t size; /* how many buckets */ + u_int32_t max; /* max number of entries */ + u_int32_t gc_interval; /* gc interval */ + u_int32_t expire; /* when do entries expire? */ + + u_int8_t srcmask, dstmask; +}; + +struct xt_hashlimit_mtinfo1 { + char name[IFNAMSIZ]; + struct hashlimit_cfg1 cfg; + + /* Used internally by the kernel */ + struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); + struct xt_hashlimit_mtinfo1 *master __attribute__((aligned(8))); +}; + #endif /*_XT_HASHLIMIT_H*/ diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index b224b8f719a4..54aaf5bac2d7 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -1,9 +1,9 @@ -/* iptables match extension to limit the number of packets per second - * seperately for each hashbucket (sourceip/sourceport/dstip/dstport) +/* + * xt_hashlimit - Netfilter module to limit the number of packets per time + * seperately for each hashbucket (sourceip/sourceport/dstip/dstport) * - * (C) 2003-2004 by Harald Welte - * - * $Id: ipt_hashlimit.c 3244 2004-10-20 16:24:29Z laforge@netfilter.org $ + * (C) 2003-2004 by Harald Welte + * Copyright © CC Computer Consultants GmbH, 2007 - 2008 * * Development of this code was funded by Astaro AG, http://www.astaro.com/ */ @@ -35,6 +35,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); +MODULE_AUTHOR("Jan Engelhardt "); MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match"); MODULE_ALIAS("ipt_hashlimit"); MODULE_ALIAS("ip6t_hashlimit"); @@ -57,7 +58,7 @@ struct dsthash_dst { __be32 dst[4]; } ip6; #endif - } addr; + }; __be16 src_port; __be16 dst_port; }; @@ -81,7 +82,7 @@ struct xt_hashlimit_htable { atomic_t use; int family; - struct hashlimit_cfg cfg; /* config */ + struct hashlimit_cfg1 cfg; /* config */ /* used internally */ spinlock_t lock; /* lock for list_head */ @@ -184,7 +185,7 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) } static void htable_gc(unsigned long htlong); -static int htable_create(struct xt_hashlimit_info *minfo, int family) +static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) { struct xt_hashlimit_htable *hinfo; unsigned int size; @@ -210,7 +211,18 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family) minfo->hinfo = hinfo; /* copy match config into hashtable config */ - memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg)); + hinfo->cfg.mode = minfo->cfg.mode; + hinfo->cfg.avg = minfo->cfg.avg; + hinfo->cfg.burst = minfo->cfg.burst; + hinfo->cfg.max = minfo->cfg.max; + hinfo->cfg.gc_interval = minfo->cfg.gc_interval; + hinfo->cfg.expire = minfo->cfg.expire; + + if (family == AF_INET) + hinfo->cfg.srcmask = hinfo->cfg.dstmask = 32; + else + hinfo->cfg.srcmask = hinfo->cfg.dstmask = 128; + hinfo->cfg.size = size; if (!hinfo->cfg.max) hinfo->cfg.max = 8 * hinfo->cfg.size; @@ -246,6 +258,70 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family) return 0; } +static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, + unsigned int family) +{ + struct xt_hashlimit_htable *hinfo; + unsigned int size; + unsigned int i; + + if (minfo->cfg.size) { + size = minfo->cfg.size; + } else { + size = (num_physpages << PAGE_SHIFT) / 16384 / + sizeof(struct list_head); + if (num_physpages > 1024 * 1024 * 1024 / PAGE_SIZE) + size = 8192; + if (size < 16) + size = 16; + } + /* FIXME: don't use vmalloc() here or anywhere else -HW */ + hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) + + sizeof(struct list_head) * size); + if (hinfo == NULL) { + printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n"); + return -1; + } + minfo->hinfo = hinfo; + + /* copy match config into hashtable config */ + memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg)); + hinfo->cfg.size = size; + if (hinfo->cfg.max == 0) + hinfo->cfg.max = 8 * hinfo->cfg.size; + else if (hinfo->cfg.max < hinfo->cfg.size) + hinfo->cfg.max = hinfo->cfg.size; + + for (i = 0; i < hinfo->cfg.size; i++) + INIT_HLIST_HEAD(&hinfo->hash[i]); + + atomic_set(&hinfo->use, 1); + hinfo->count = 0; + hinfo->family = family; + hinfo->rnd_initialized = 0; + spin_lock_init(&hinfo->lock); + + hinfo->pde = create_proc_entry(minfo->name, 0, + family == AF_INET ? hashlimit_procdir4 : + hashlimit_procdir6); + if (hinfo->pde == NULL) { + vfree(hinfo); + return -1; + } + hinfo->pde->proc_fops = &dl_file_ops; + hinfo->pde->data = hinfo; + + setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo); + hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval); + add_timer(&hinfo->timer); + + spin_lock_bh(&hashlimit_lock); + hlist_add_head(&hinfo->node, &hashlimit_htables); + spin_unlock_bh(&hashlimit_lock); + + return 0; +} + static bool select_all(const struct xt_hashlimit_htable *ht, const struct dsthash_ent *he) { @@ -388,6 +464,46 @@ static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) dh->rateinfo.prev = now; } +static inline __be32 maskl(__be32 a, unsigned int l) +{ + return htonl(ntohl(a) & ~(~(u_int32_t)0 >> l)); +} + +static void hashlimit_ipv6_mask(__be32 *i, unsigned int p) +{ + switch (p) { + case 0: + i[0] = i[1] = 0; + i[2] = i[3] = 0; + break; + case 1 ... 31: + i[0] = maskl(i[0], p); + i[1] = i[2] = i[3] = 0; + break; + case 32: + i[1] = i[2] = i[3] = 0; + break; + case 33 ... 63: + i[1] = maskl(i[1], p - 32); + i[2] = i[3] = 0; + break; + case 64: + i[2] = i[3] = 0; + break; + case 65 ... 95: + i[2] = maskl(i[2], p - 64); + i[3] = 0; + case 96: + i[3] = 0; + break; + case 97 ... 127: + i[3] = maskl(i[3], p - 96); + break; + case 128: + break; + } +} + static int hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst, @@ -401,9 +517,11 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, switch (hinfo->family) { case AF_INET: if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) - dst->addr.ip.dst = ip_hdr(skb)->daddr; + dst->ip.dst = maskl(ip_hdr(skb)->daddr, + hinfo->cfg.dstmask); if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP) - dst->addr.ip.src = ip_hdr(skb)->saddr; + dst->ip.src = maskl(ip_hdr(skb)->saddr, + hinfo->cfg.srcmask); if (!(hinfo->cfg.mode & (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) @@ -412,12 +530,16 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, break; #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) case AF_INET6: - if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) - memcpy(&dst->addr.ip6.dst, &ipv6_hdr(skb)->daddr, - sizeof(dst->addr.ip6.dst)); - if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP) - memcpy(&dst->addr.ip6.src, &ipv6_hdr(skb)->saddr, - sizeof(dst->addr.ip6.src)); + if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) { + memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr, + sizeof(dst->ip6.dst)); + hashlimit_ipv6_mask(dst->ip6.dst, hinfo->cfg.dstmask); + } + if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP) { + memcpy(&dst->ip6.src, &ipv6_hdr(skb)->saddr, + sizeof(dst->ip6.src)); + hashlimit_ipv6_mask(dst->ip6.src, hinfo->cfg.srcmask); + } if (!(hinfo->cfg.mode & (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT))) @@ -457,10 +579,10 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, } static bool -hashlimit_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +hashlimit_mt_v0(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop) { const struct xt_hashlimit_info *r = ((const struct xt_hashlimit_info *)matchinfo)->u.master; @@ -512,9 +634,62 @@ hotdrop: } static bool -hashlimit_mt_check(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +hashlimit_mt(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop) +{ + const struct xt_hashlimit_mtinfo1 *info = matchinfo; + struct xt_hashlimit_htable *hinfo = info->hinfo; + unsigned long now = jiffies; + struct dsthash_ent *dh; + struct dsthash_dst dst; + + if (hashlimit_init_dst(hinfo, &dst, skb, protoff) < 0) + goto hotdrop; + + spin_lock_bh(&hinfo->lock); + dh = dsthash_find(hinfo, &dst); + if (dh == NULL) { + dh = dsthash_alloc_init(hinfo, &dst); + if (dh == NULL) { + spin_unlock_bh(&hinfo->lock); + goto hotdrop; + } + + dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); + dh->rateinfo.prev = jiffies; + dh->rateinfo.credit = user2credits(hinfo->cfg.avg * + hinfo->cfg.burst); + dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * + hinfo->cfg.burst); + dh->rateinfo.cost = user2credits(hinfo->cfg.avg); + } else { + /* update expiration timeout */ + dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); + rateinfo_recalc(dh, now); + } + + if (dh->rateinfo.credit >= dh->rateinfo.cost) { + /* below the limit */ + dh->rateinfo.credit -= dh->rateinfo.cost; + spin_unlock_bh(&hinfo->lock); + return !(info->cfg.mode & XT_HASHLIMIT_INVERT); + } + + spin_unlock_bh(&hinfo->lock); + /* default match is underlimit - so over the limit, we need to invert */ + return info->cfg.mode & XT_HASHLIMIT_INVERT; + + hotdrop: + *hotdrop = true; + return false; +} + +static bool +hashlimit_mt_check_v0(const char *tablename, const void *inf, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { struct xt_hashlimit_info *r = matchinfo; @@ -546,7 +721,7 @@ hashlimit_mt_check(const char *tablename, const void *inf, * create duplicate proc files. -HW */ mutex_lock(&hlimit_mutex); r->hinfo = htable_find_get(r->name, match->family); - if (!r->hinfo && htable_create(r, match->family) != 0) { + if (!r->hinfo && htable_create_v0(r, match->family) != 0) { mutex_unlock(&hlimit_mutex); return false; } @@ -557,14 +732,68 @@ hashlimit_mt_check(const char *tablename, const void *inf, return true; } +static bool +hashlimit_mt_check(const char *tablename, const void *inf, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) +{ + struct xt_hashlimit_mtinfo1 *info = matchinfo; + + /* Check for overflow. */ + if (info->cfg.burst == 0 || + user2credits(info->cfg.avg * info->cfg.burst) < + user2credits(info->cfg.avg)) { + printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n", + info->cfg.avg, info->cfg.burst); + return false; + } + if (info->cfg.gc_interval == 0 || info->cfg.expire == 0) + return false; + if (info->name[sizeof(info->name)-1] != '\0') + return false; + if (match->family == AF_INET) { + if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32) + return false; + } else { + if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128) + return false; + } + + /* This is the best we've got: We cannot release and re-grab lock, + * since checkentry() is called before x_tables.c grabs xt_mutex. + * We also cannot grab the hashtable spinlock, since htable_create will + * call vmalloc, and that can sleep. And we cannot just re-search + * the list of htable's in htable_create(), since then we would + * create duplicate proc files. -HW */ + mutex_lock(&hlimit_mutex); + info->hinfo = htable_find_get(info->name, match->family); + if (!info->hinfo && htable_create(info, match->family) != 0) { + mutex_unlock(&hlimit_mutex); + return false; + } + mutex_unlock(&hlimit_mutex); + + /* Ugly hack: For SMP, we only want to use one set */ + info->master = info; + return true; +} + static void -hashlimit_mt_destroy(const struct xt_match *match, void *matchinfo) +hashlimit_mt_destroy_v0(const struct xt_match *match, void *matchinfo) { const struct xt_hashlimit_info *r = matchinfo; htable_put(r->hinfo); } +static void +hashlimit_mt_destroy(const struct xt_match *match, void *matchinfo) +{ + const struct xt_hashlimit_mtinfo1 *info = matchinfo; + + htable_put(info->hinfo); +} + #ifdef CONFIG_COMPAT struct compat_xt_hashlimit_info { char name[IFNAMSIZ]; @@ -592,33 +821,54 @@ static int hashlimit_mt_compat_to_user(void __user *dst, void *src) static struct xt_match hashlimit_mt_reg[] __read_mostly = { { .name = "hashlimit", + .revision = 0, .family = AF_INET, - .match = hashlimit_mt, + .match = hashlimit_mt_v0, .matchsize = sizeof(struct xt_hashlimit_info), #ifdef CONFIG_COMPAT .compatsize = sizeof(struct compat_xt_hashlimit_info), .compat_from_user = hashlimit_mt_compat_from_user, .compat_to_user = hashlimit_mt_compat_to_user, #endif - .checkentry = hashlimit_mt_check, - .destroy = hashlimit_mt_destroy, + .checkentry = hashlimit_mt_check_v0, + .destroy = hashlimit_mt_destroy_v0, .me = THIS_MODULE }, + { + .name = "hashlimit", + .revision = 1, + .family = AF_INET, + .match = hashlimit_mt, + .matchsize = sizeof(struct xt_hashlimit_mtinfo1), + .checkentry = hashlimit_mt_check, + .destroy = hashlimit_mt_destroy, + .me = THIS_MODULE, + }, #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) { .name = "hashlimit", .family = AF_INET6, - .match = hashlimit_mt, + .match = hashlimit_mt_v0, .matchsize = sizeof(struct xt_hashlimit_info), #ifdef CONFIG_COMPAT .compatsize = sizeof(struct compat_xt_hashlimit_info), .compat_from_user = hashlimit_mt_compat_from_user, .compat_to_user = hashlimit_mt_compat_to_user, #endif - .checkentry = hashlimit_mt_check, - .destroy = hashlimit_mt_destroy, + .checkentry = hashlimit_mt_check_v0, + .destroy = hashlimit_mt_destroy_v0, .me = THIS_MODULE }, + { + .name = "hashlimit", + .revision = 1, + .family = AF_INET6, + .match = hashlimit_mt, + .matchsize = sizeof(struct xt_hashlimit_mtinfo1), + .checkentry = hashlimit_mt_check, + .destroy = hashlimit_mt_destroy, + .me = THIS_MODULE, + }, #endif }; @@ -678,9 +928,9 @@ static int dl_seq_real_show(struct dsthash_ent *ent, int family, return seq_printf(s, "%ld %u.%u.%u.%u:%u->" "%u.%u.%u.%u:%u %u %u %u\n", (long)(ent->expires - jiffies)/HZ, - NIPQUAD(ent->dst.addr.ip.src), + NIPQUAD(ent->dst.ip.src), ntohs(ent->dst.src_port), - NIPQUAD(ent->dst.addr.ip.dst), + NIPQUAD(ent->dst.ip.dst), ntohs(ent->dst.dst_port), ent->rateinfo.credit, ent->rateinfo.credit_cap, ent->rateinfo.cost); @@ -689,9 +939,9 @@ static int dl_seq_real_show(struct dsthash_ent *ent, int family, return seq_printf(s, "%ld " NIP6_FMT ":%u->" NIP6_FMT ":%u %u %u %u\n", (long)(ent->expires - jiffies)/HZ, - NIP6(*(struct in6_addr *)&ent->dst.addr.ip6.src), + NIP6(*(struct in6_addr *)&ent->dst.ip6.src), ntohs(ent->dst.src_port), - NIP6(*(struct in6_addr *)&ent->dst.addr.ip6.dst), + NIP6(*(struct in6_addr *)&ent->dst.ip6.dst), ntohs(ent->dst.dst_port), ent->rateinfo.credit, ent->rateinfo.credit_cap, ent->rateinfo.cost); -- cgit v1.2.3 From 3cb609d57c20027a8b39fc60b79b930a89da82d4 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 31 Jan 2008 04:49:35 -0800 Subject: [NETFILTER]: x_tables: create per-netns /proc/net/*_tables_* Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 4 ++-- net/ipv4/netfilter/arp_tables.c | 21 ++++++++++++++++++--- net/ipv4/netfilter/ip_tables.c | 21 ++++++++++++++++++--- net/ipv6/netfilter/ip6_tables.c | 22 +++++++++++++++++++--- net/netfilter/x_tables.c | 20 ++++++++++---------- 5 files changed, 67 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 11eea39bbf7d..b2c62cc618f5 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -357,8 +357,8 @@ extern struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name); extern void xt_table_unlock(struct xt_table *t); -extern int xt_proto_init(int af); -extern void xt_proto_fini(int af); +extern int xt_proto_init(struct net *net, int af); +extern void xt_proto_fini(struct net *net, int af); extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern void xt_free_table_info(struct xt_table_info *info); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3608675ab08c..a7591ce344d2 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1822,11 +1822,26 @@ static struct nf_sockopt_ops arpt_sockopts = { .owner = THIS_MODULE, }; +static int __net_init arp_tables_net_init(struct net *net) +{ + return xt_proto_init(net, NF_ARP); +} + +static void __net_exit arp_tables_net_exit(struct net *net) +{ + xt_proto_fini(net, NF_ARP); +} + +static struct pernet_operations arp_tables_net_ops = { + .init = arp_tables_net_init, + .exit = arp_tables_net_exit, +}; + static int __init arp_tables_init(void) { int ret; - ret = xt_proto_init(NF_ARP); + ret = register_pernet_subsys(&arp_tables_net_ops); if (ret < 0) goto err1; @@ -1851,7 +1866,7 @@ err4: err3: xt_unregister_target(&arpt_standard_target); err2: - xt_proto_fini(NF_ARP); + unregister_pernet_subsys(&arp_tables_net_ops); err1: return ret; } @@ -1861,7 +1876,7 @@ static void __exit arp_tables_fini(void) nf_unregister_sockopt(&arpt_sockopts); xt_unregister_target(&arpt_error_target); xt_unregister_target(&arpt_standard_target); - xt_proto_fini(NF_ARP); + unregister_pernet_subsys(&arp_tables_net_ops); } EXPORT_SYMBOL(arpt_register_table); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a73afa1ba8b8..600737f122d2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2213,11 +2213,26 @@ static struct xt_match icmp_matchstruct __read_mostly = { .family = AF_INET, }; +static int __net_init ip_tables_net_init(struct net *net) +{ + return xt_proto_init(net, AF_INET); +} + +static void __net_exit ip_tables_net_exit(struct net *net) +{ + xt_proto_fini(net, AF_INET); +} + +static struct pernet_operations ip_tables_net_ops = { + .init = ip_tables_net_init, + .exit = ip_tables_net_exit, +}; + static int __init ip_tables_init(void) { int ret; - ret = xt_proto_init(AF_INET); + ret = register_pernet_subsys(&ip_tables_net_ops); if (ret < 0) goto err1; @@ -2247,7 +2262,7 @@ err4: err3: xt_unregister_target(&ipt_standard_target); err2: - xt_proto_fini(AF_INET); + unregister_pernet_subsys(&ip_tables_net_ops); err1: return ret; } @@ -2260,7 +2275,7 @@ static void __exit ip_tables_fini(void) xt_unregister_target(&ipt_error_target); xt_unregister_target(&ipt_standard_target); - xt_proto_fini(AF_INET); + unregister_pernet_subsys(&ip_tables_net_ops); } EXPORT_SYMBOL(ipt_register_table); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index b91738ab9f4e..bf9bb6e55bb5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2238,11 +2238,26 @@ static struct xt_match icmp6_matchstruct __read_mostly = { .family = AF_INET6, }; +static int __net_init ip6_tables_net_init(struct net *net) +{ + return xt_proto_init(net, AF_INET6); +} + +static void __net_exit ip6_tables_net_exit(struct net *net) +{ + xt_proto_fini(net, AF_INET6); +} + +static struct pernet_operations ip6_tables_net_ops = { + .init = ip6_tables_net_init, + .exit = ip6_tables_net_exit, +}; + static int __init ip6_tables_init(void) { int ret; - ret = xt_proto_init(AF_INET6); + ret = register_pernet_subsys(&ip6_tables_net_ops); if (ret < 0) goto err1; @@ -2272,7 +2287,7 @@ err4: err3: xt_unregister_target(&ip6t_standard_target); err2: - xt_proto_fini(AF_INET6); + unregister_pernet_subsys(&ip6_tables_net_ops); err1: return ret; } @@ -2284,7 +2299,8 @@ static void __exit ip6_tables_fini(void) xt_unregister_match(&icmp6_matchstruct); xt_unregister_target(&ip6t_error_target); xt_unregister_target(&ip6t_standard_target); - xt_proto_fini(AF_INET6); + + unregister_pernet_subsys(&ip6_tables_net_ops); } /* diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 12ed64c0bc9a..a6792089fcf9 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -922,7 +922,7 @@ static const struct file_operations xt_target_ops = { #endif /* CONFIG_PROC_FS */ -int xt_proto_init(int af) +int xt_proto_init(struct net *net, int af) { #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; @@ -936,7 +936,7 @@ int xt_proto_init(int af) #ifdef CONFIG_PROC_FS strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc = proc_net_fops_create(&init_net, buf, 0440, &xt_table_ops); + proc = proc_net_fops_create(net, buf, 0440, &xt_table_ops); if (!proc) goto out; proc->data = (void *)(unsigned long)af; @@ -944,14 +944,14 @@ int xt_proto_init(int af) strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc = proc_net_fops_create(&init_net, buf, 0440, &xt_match_ops); + proc = proc_net_fops_create(net, buf, 0440, &xt_match_ops); if (!proc) goto out_remove_tables; proc->data = (void *)(unsigned long)af; strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc = proc_net_fops_create(&init_net, buf, 0440, &xt_target_ops); + proc = proc_net_fops_create(net, buf, 0440, &xt_target_ops); if (!proc) goto out_remove_matches; proc->data = (void *)(unsigned long)af; @@ -963,34 +963,34 @@ int xt_proto_init(int af) out_remove_matches: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(&init_net, buf); + proc_net_remove(net, buf); out_remove_tables: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(&init_net, buf); + proc_net_remove(net, buf); out: return -1; #endif } EXPORT_SYMBOL_GPL(xt_proto_init); -void xt_proto_fini(int af) +void xt_proto_fini(struct net *net, int af) { #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(&init_net, buf); + proc_net_remove(net, buf); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc_net_remove(&init_net, buf); + proc_net_remove(net, buf); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(&init_net, buf); + proc_net_remove(net, buf); #endif /*CONFIG_PROC_FS*/ } EXPORT_SYMBOL_GPL(xt_proto_fini); -- cgit v1.2.3 From 13f7d63c2911c9d1a254d13899986fc801641127 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 31 Jan 2008 04:50:25 -0800 Subject: [NETFILTER]: nf_{conntrack,nat}_sip: annotate SIP helper with const Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_sip.h | 6 +++--- net/ipv4/netfilter/nf_nat_sip.c | 4 ++-- net/netfilter/nf_conntrack_sip.c | 25 +++++++++++++------------ 3 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 9fff19779bd5..8e5ce1ca7bfc 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -30,9 +30,9 @@ extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, struct nf_conntrack_expect *exp, const char *dptr); -extern int ct_sip_get_info(struct nf_conn *ct, const char *dptr, size_t dlen, - unsigned int *matchoff, unsigned int *matchlen, - enum sip_header_pos pos); +extern int ct_sip_get_info(const struct nf_conn *ct, const char *dptr, + size_t dlen, unsigned int *matchoff, + unsigned int *matchlen, enum sip_header_pos pos); extern int ct_sip_lnlen(const char *line, const char *limit); extern const char *ct_sip_search(const char *needle, const char *haystack, size_t needle_len, size_t haystack_len, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 606a170bf4ca..b4c8d4968bb2 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -35,9 +35,9 @@ struct addr_map { } addr[IP_CT_DIR_MAX]; }; -static void addr_map_init(struct nf_conn *ct, struct addr_map *map) +static void addr_map_init(const struct nf_conn *ct, struct addr_map *map) { - struct nf_conntrack_tuple *t; + const struct nf_conntrack_tuple *t; enum ip_conntrack_dir dir; unsigned int n; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 775f7d468105..c521c891d351 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -48,10 +48,10 @@ unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, const char *dptr) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_hook); -static int digits_len(struct nf_conn *, const char *, const char *, int *); -static int epaddr_len(struct nf_conn *, const char *, const char *, int *); -static int skp_digits_len(struct nf_conn *, const char *, const char *, int *); -static int skp_epaddr_len(struct nf_conn *, const char *, const char *, int *); +static int digits_len(const struct nf_conn *, const char *, const char *, int *); +static int epaddr_len(const struct nf_conn *, const char *, const char *, int *); +static int skp_digits_len(const struct nf_conn *, const char *, const char *, int *); +static int skp_epaddr_len(const struct nf_conn *, const char *, const char *, int *); struct sip_header_nfo { const char *lname; @@ -61,7 +61,7 @@ struct sip_header_nfo { size_t snlen; size_t ln_strlen; int case_sensitive; - int (*match_len)(struct nf_conn *, const char *, + int (*match_len)(const struct nf_conn *, const char *, const char *, int *); }; @@ -225,7 +225,7 @@ const char *ct_sip_search(const char *needle, const char *haystack, } EXPORT_SYMBOL_GPL(ct_sip_search); -static int digits_len(struct nf_conn *ct, const char *dptr, +static int digits_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) { int len = 0; @@ -237,7 +237,7 @@ static int digits_len(struct nf_conn *ct, const char *dptr, } /* get digits length, skipping blank spaces. */ -static int skp_digits_len(struct nf_conn *ct, const char *dptr, +static int skp_digits_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) { for (; dptr <= limit && *dptr == ' '; dptr++) @@ -246,8 +246,9 @@ static int skp_digits_len(struct nf_conn *ct, const char *dptr, return digits_len(ct, dptr, limit, shift); } -static int parse_addr(struct nf_conn *ct, const char *cp, const char **endp, - union nf_inet_addr *addr, const char *limit) +static int parse_addr(const struct nf_conn *ct, const char *cp, + const char **endp, union nf_inet_addr *addr, + const char *limit) { const char *end; int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; @@ -272,7 +273,7 @@ static int parse_addr(struct nf_conn *ct, const char *cp, const char **endp, } /* skip ip address. returns its length. */ -static int epaddr_len(struct nf_conn *ct, const char *dptr, +static int epaddr_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) { union nf_inet_addr addr; @@ -292,7 +293,7 @@ static int epaddr_len(struct nf_conn *ct, const char *dptr, } /* get address length, skiping user info. */ -static int skp_epaddr_len(struct nf_conn *ct, const char *dptr, +static int skp_epaddr_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) { const char *start = dptr; @@ -319,7 +320,7 @@ static int skp_epaddr_len(struct nf_conn *ct, const char *dptr, } /* Returns 0 if not found, -1 error parsing. */ -int ct_sip_get_info(struct nf_conn *ct, +int ct_sip_get_info(const struct nf_conn *ct, const char *dptr, size_t dlen, unsigned int *matchoff, unsigned int *matchlen, -- cgit v1.2.3 From 9ddd0ed050445176a97e11b2b24d6fbc01843da6 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 31 Jan 2008 04:51:23 -0800 Subject: [NETFILTER]: nf_{conntrack,nat}_pptp: annotate PPtP helper with const Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_pptp.h | 2 +- net/ipv4/netfilter/nf_nat_pptp.c | 10 +++++----- net/netfilter/nf_conntrack_pptp.c | 14 ++++++++------ 3 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h index 23435496d24a..3bbde0c3a8a6 100644 --- a/include/linux/netfilter/nf_conntrack_pptp.h +++ b/include/linux/netfilter/nf_conntrack_pptp.h @@ -4,7 +4,7 @@ #include -extern const char *pptp_msg_name[]; +extern const char *const pptp_msg_name[]; /* state of the control session */ enum pptp_ctrlsess_state { diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index e63b944a2ebb..3a1e6d6afc0a 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -40,11 +40,11 @@ MODULE_ALIAS("ip_nat_pptp"); static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { - struct nf_conn *master = ct->master; + const struct nf_conn *master = ct->master; struct nf_conntrack_expect *other_exp; struct nf_conntrack_tuple t; - struct nf_ct_pptp_master *ct_pptp_info; - struct nf_nat_pptp *nat_pptp_info; + const struct nf_ct_pptp_master *ct_pptp_info; + const struct nf_nat_pptp *nat_pptp_info; struct nf_nat_range range; ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; @@ -186,7 +186,7 @@ static void pptp_exp_gre(struct nf_conntrack_expect *expect_orig, struct nf_conntrack_expect *expect_reply) { - struct nf_conn *ct = expect_orig->master; + const struct nf_conn *ct = expect_orig->master; struct nf_ct_pptp_master *ct_pptp_info; struct nf_nat_pptp *nat_pptp_info; @@ -217,7 +217,7 @@ pptp_inbound_pkt(struct sk_buff *skb, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) { - struct nf_nat_pptp *nat_pptp_info; + const struct nf_nat_pptp *nat_pptp_info; u_int16_t msg; __be16 new_pcid; unsigned int pcid_off; diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 099b6df3e2b5..b5cb8e831230 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -67,7 +67,7 @@ EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); #ifdef DEBUG /* PptpControlMessageType names */ -const char *pptp_msg_name[] = { +const char *const pptp_msg_name[] = { "UNKNOWN_MESSAGE", "START_SESSION_REQUEST", "START_SESSION_REPLY", @@ -136,7 +136,7 @@ static void pptp_expectfn(struct nf_conn *ct, static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) { - struct nf_conntrack_tuple_hash *h; + const struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; struct nf_conn *sibling; @@ -168,7 +168,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) /* timeout GRE data connections */ static void pptp_destroy_siblings(struct nf_conn *ct) { - struct nf_conn_help *help = nfct_help(ct); + const struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_tuple t; nf_ct_gre_keymap_destroy(ct); @@ -497,9 +497,11 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, { int dir = CTINFO2DIR(ctinfo); - struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; - struct tcphdr _tcph, *tcph; - struct pptp_pkt_hdr _pptph, *pptph; + const struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; + const struct tcphdr *tcph; + struct tcphdr _tcph; + const struct pptp_pkt_hdr *pptph; + struct pptp_pkt_hdr _pptph; struct PptpControlHeader _ctlh, *ctlh; union pptp_ctrl_union _pptpReq, *pptpReq; unsigned int tcplen = skb->len - protoff; -- cgit v1.2.3 From 82f568fc2f6bcab18e4c80291d21f7f8463ee698 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 31 Jan 2008 04:52:07 -0800 Subject: [NETFILTER]: nf_{conntrack,nat}_proto_tcp: constify and annotate TCP modules Constify a few data tables use const qualifiers on variables where possible in the nf_*_proto_tcp sources. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 2 +- net/ipv4/netfilter/nf_nat_proto_tcp.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 44 ++++++++++++++++++---------------- 3 files changed, 26 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index bda78a286e2b..90b3e7f5df5f 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -216,7 +216,7 @@ static inline void nf_ct_refresh(struct nf_conn *ct, /* These are for NAT. Icky. */ /* Update TCP window tracking data when NAT mangles the packet */ -extern void nf_conntrack_tcp_update(struct sk_buff *skb, +extern void nf_conntrack_tcp_update(const struct sk_buff *skb, unsigned int dataoff, struct nf_conn *ct, int dir); diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index da23e9fbe679..ffd5d1589eca 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -93,7 +93,7 @@ tcp_manip_pkt(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct tcphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; __be32 oldip, newip; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 9807af677a56..3e0cccae5636 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -46,7 +46,7 @@ static int nf_ct_tcp_max_retrans __read_mostly = 3; /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR */ -static const char *tcp_conntrack_names[] = { +static const char *const tcp_conntrack_names[] = { "NONE", "SYN_SENT", "SYN_RECV", @@ -261,7 +261,8 @@ static int tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { - struct tcphdr _hdr, *hp; + const struct tcphdr *hp; + struct tcphdr _hdr; /* Actually only need first 8 bytes. */ hp = skb_header_pointer(skb, dataoff, 8, &_hdr); @@ -343,7 +344,7 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph) static inline __u32 segment_seq_plus_len(__u32 seq, size_t len, unsigned int dataoff, - struct tcphdr *tcph) + const struct tcphdr *tcph) { /* XXX Should I use payload length field in IP/IPv6 header ? * - YK */ @@ -362,11 +363,11 @@ static inline __u32 segment_seq_plus_len(__u32 seq, */ static void tcp_options(const struct sk_buff *skb, unsigned int dataoff, - struct tcphdr *tcph, + const struct tcphdr *tcph, struct ip_ct_tcp_state *state) { unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; - unsigned char *ptr; + const unsigned char *ptr; int length = (tcph->doff*4) - sizeof(struct tcphdr); if (!length) @@ -417,10 +418,10 @@ static void tcp_options(const struct sk_buff *skb, } static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, - struct tcphdr *tcph, __u32 *sack) + const struct tcphdr *tcph, __u32 *sack) { unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; - unsigned char *ptr; + const unsigned char *ptr; int length = (tcph->doff*4) - sizeof(struct tcphdr); __u32 tmp; @@ -477,18 +478,18 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, } } -static int tcp_in_window(struct nf_conn *ct, +static int tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp *state, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, - struct tcphdr *tcph, + const struct tcphdr *tcph, int pf) { struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; - struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; + const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; int res; @@ -686,14 +687,14 @@ static int tcp_in_window(struct nf_conn *ct, #ifdef CONFIG_NF_NAT_NEEDED /* Update sender->td_end after NAT successfully mangled the packet */ /* Caller must linearize skb at tcp header. */ -void nf_conntrack_tcp_update(struct sk_buff *skb, +void nf_conntrack_tcp_update(const struct sk_buff *skb, unsigned int dataoff, struct nf_conn *ct, int dir) { - struct tcphdr *tcph = (void *)skb->data + dataoff; - struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir]; - struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir]; + const struct tcphdr *tcph = (const void *)skb->data + dataoff; + const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir]; + const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir]; __u32 end; end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); @@ -726,7 +727,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update); #define TH_CWR 0x80 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */ -static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = +static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = { [TH_SYN] = 1, [TH_SYN|TH_URG] = 1, @@ -746,7 +747,8 @@ static int tcp_error(struct sk_buff *skb, int pf, unsigned int hooknum) { - struct tcphdr _tcph, *th; + const struct tcphdr *th; + struct tcphdr _tcph; unsigned int tcplen = skb->len - dataoff; u_int8_t tcpflags; @@ -803,7 +805,8 @@ static int tcp_packet(struct nf_conn *ct, struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; - struct tcphdr *th, _tcph; + const struct tcphdr *th; + struct tcphdr _tcph; unsigned long timeout; unsigned int index; @@ -964,9 +967,10 @@ static int tcp_new(struct nf_conn *ct, unsigned int dataoff) { enum tcp_conntrack new_state; - struct tcphdr *th, _tcph; - struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; - struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; + const struct tcphdr *th; + struct tcphdr _tcph; + const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; + const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); -- cgit v1.2.3 From 32948588ac4ec54300bae1037e839277fd4536e2 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 31 Jan 2008 04:53:24 -0800 Subject: [NETFILTER]: nf_conntrack: annotate l3protos with const Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack_core.h | 4 ++-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 14 ++++++++------ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 8 ++++---- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 7 ++++--- net/netfilter/nf_conntrack_standalone.c | 10 +++++----- 5 files changed, 23 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 2b9e5713585a..9ee26469c759 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -68,8 +68,8 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) int print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_l3proto *l3proto, - struct nf_conntrack_l4proto *proto); + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_l4proto *proto); extern struct hlist_head *nf_conntrack_hash; extern spinlock_t nf_conntrack_lock ; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index ac3d61d8026e..a65b845c5f15 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -27,7 +27,8 @@ static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { - __be32 _addrs[2], *ap; + const __be32 *ap; + __be32 _addrs[2]; ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), sizeof(u_int32_t) * 2, _addrs); if (ap == NULL) @@ -76,7 +77,8 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { - struct iphdr _iph, *iph; + const struct iphdr *iph; + struct iphdr _iph; iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); if (iph == NULL) @@ -111,8 +113,8 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - struct nf_conn_help *help; - struct nf_conntrack_helper *helper; + const struct nf_conn_help *help; + const struct nf_conntrack_helper *helper; /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); @@ -299,8 +301,8 @@ static ctl_table ip_ct_sysctl_table[] = { static int getorigdst(struct sock *sk, int optval, void __user *user, int *len) { - struct inet_sock *inet = inet_sk(sk); - struct nf_conntrack_tuple_hash *h; + const struct inet_sock *inet = inet_sk(sk); + const struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; NF_CT_TUPLE_U_BLANK(&tuple); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 0ee87edbd286..089252e82c01 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -98,8 +98,8 @@ static int ct_seq_show(struct seq_file *s, void *v) { const struct nf_conntrack_tuple_hash *hash = v; const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; NF_CT_ASSERT(ct); @@ -251,7 +251,7 @@ static void exp_seq_stop(struct seq_file *seq, void *v) static int exp_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_expect *exp; - struct hlist_node *n = v; + const struct hlist_node *n = v; exp = hlist_entry(n, struct nf_conntrack_expect, hnode); @@ -332,7 +332,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); - struct ip_conntrack_stat *st = v; + const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 2d7b0246475d..3717bdf34f6e 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -30,7 +30,8 @@ static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { - u_int32_t _addrs[8], *ap; + const u_int32_t *ap; + u_int32_t _addrs[8]; ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr), sizeof(_addrs), _addrs); @@ -146,8 +147,8 @@ static unsigned int ipv6_confirm(unsigned int hooknum, int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; - struct nf_conn_help *help; - struct nf_conntrack_helper *helper; + const struct nf_conn_help *help; + const struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; unsigned int ret, protoff; unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 278b35e64d74..ea34090f10cb 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -31,8 +31,8 @@ MODULE_LICENSE("GPL"); #ifdef CONFIG_PROC_FS int print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_l3proto *l3proto, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_l4proto *l4proto) { return l3proto->print_tuple(s, tuple) || l4proto->print_tuple(s, tuple); } @@ -118,8 +118,8 @@ static int ct_seq_show(struct seq_file *s, void *v) { const struct nf_conntrack_tuple_hash *hash = v; const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; NF_CT_ASSERT(ct); @@ -246,7 +246,7 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); - struct ip_conntrack_stat *st = v; + const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); -- cgit v1.2.3 From 535174efbe0f0454f0595c31b823498c197eeb4f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 31 Jan 2008 05:03:27 -0800 Subject: [IPV6]: Introduce the INET6_TW_MATCH macro. We have INET_MATCH, INET_TW_MATCH and INET6_MATCH to test sockets and twbuckets for matching, but ipv6 twbuckets are tested manually. Here's the INET6_TW_MATCH to help with it. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/ipv6.h | 8 ++++++++ net/ipv6/inet6_hashtables.c | 21 +++------------------ 2 files changed, 11 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5d35a4cc3bff..c34786044a1b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -465,6 +465,14 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#define INET6_TW_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif) \ + (((__sk)->sk_hash == (__hash)) && \ + (*((__portpair *)&(inet_twsk(__sk)->tw_dport)) == (__ports)) && \ + ((__sk)->sk_family == PF_INET6) && \ + (ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr))) && \ + (ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_rcv_saddr, (__daddr))) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) + #endif /* __KERNEL__ */ #endif /* _IPV6_H */ diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index a66a7d8e2811..06b01befc90e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -80,17 +80,8 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &head->twchain) { - const struct inet_timewait_sock *tw = inet_twsk(sk); - - if(*((__portpair *)&(tw->tw_dport)) == ports && - sk->sk_family == PF_INET6) { - const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); - - if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && - (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) - goto hit; - } + if (INET6_TW_MATCH(sk, hash, saddr, daddr, ports, dif)) + goto hit; } read_unlock(lock); return NULL; @@ -185,15 +176,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &head->twchain) { - const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); - tw = inet_twsk(sk2); - if(*((__portpair *)&(tw->tw_dport)) == ports && - sk2->sk_family == PF_INET6 && - ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && - (!sk2->sk_bound_dev_if || sk2->sk_bound_dev_if == dif)) { + if (INET6_TW_MATCH(sk2, hash, saddr, daddr, ports, dif)) { if (twsk_unique(sk, sk2, twp)) goto unique; else -- cgit v1.2.3 From 5ee31fc1ecdcbc234c8c56dcacef87c8e09909d8 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 31 Jan 2008 05:04:45 -0800 Subject: [INET]: Consolidate inet(6)_hash_connect. These two functions are the same except for what they call to "check_established" and "hash" for a socket. This saves half-a-kilo for ipv4 and ipv6. add/remove: 1/0 grow/shrink: 1/4 up/down: 582/-1128 (-546) function old new delta __inet_hash_connect - 577 +577 arp_ignore 108 113 +5 static.hint 8 4 -4 rt_worker_func 376 372 -4 inet6_hash_connect 584 25 -559 inet_hash_connect 586 25 -561 Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 5 +++ net/ipv4/inet_hashtables.c | 32 ++++++++++----- net/ipv6/inet6_hashtables.c | 93 +------------------------------------------ 3 files changed, 28 insertions(+), 102 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 761bdc01425d..a34a8f25fc57 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -413,6 +413,11 @@ static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, return sk; } +extern int __inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk, + int (*check_established)(struct inet_timewait_death_row *, + struct sock *, __u16, struct inet_timewait_sock **), + void (*hash)(struct inet_hashinfo *, struct sock *)); extern int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk); #endif /* _INET_HASHTABLES_H */ diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 619c63c6948a..b93d40ff6ef4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -348,11 +348,11 @@ void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk) } EXPORT_SYMBOL_GPL(__inet_hash); -/* - * Bind a port for a connect operation and hash it. - */ -int inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk) +int __inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk, + int (*check_established)(struct inet_timewait_death_row *, + struct sock *, __u16, struct inet_timewait_sock **), + void (*hash)(struct inet_hashinfo *, struct sock *)) { struct inet_hashinfo *hinfo = death_row->hashinfo; const unsigned short snum = inet_sk(sk)->num; @@ -385,9 +385,8 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; - if (!__inet_check_established(death_row, - sk, port, - &tw)) + if (!check_established(death_row, sk, + port, &tw)) goto ok; goto next_port; } @@ -415,7 +414,7 @@ ok: inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->sport = htons(port); - __inet_hash_nolisten(hinfo, sk); + hash(hinfo, sk); } spin_unlock(&head->lock); @@ -432,17 +431,28 @@ ok: tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __inet_hash_nolisten(hinfo, sk); + hash(hinfo, sk); spin_unlock_bh(&head->lock); return 0; } else { spin_unlock(&head->lock); /* No definite answer... Walk to established hash table */ - ret = __inet_check_established(death_row, sk, snum, NULL); + ret = check_established(death_row, sk, snum, NULL); out: local_bh_enable(); return ret; } } +EXPORT_SYMBOL_GPL(__inet_hash_connect); + +/* + * Bind a port for a connect operation and hash it. + */ +int inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk) +{ + return __inet_hash_connect(death_row, sk, + __inet_check_established, __inet_hash_nolisten); +} EXPORT_SYMBOL_GPL(inet_hash_connect); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 06b01befc90e..ece6d0ee2da5 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -233,97 +233,8 @@ static inline u32 inet6_sk_port_offset(const struct sock *sk) int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - struct inet_hashinfo *hinfo = death_row->hashinfo; - const unsigned short snum = inet_sk(sk)->num; - struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - int ret; - - if (snum == 0) { - int i, port, low, high, remaining; - static u32 hint; - const u32 offset = hint + inet6_sk_port_offset(sk); - struct hlist_node *node; - struct inet_timewait_sock *tw = NULL; - - inet_get_local_port_range(&low, &high); - remaining = (high - low) + 1; - - local_bh_disable(); - for (i = 1; i <= remaining; i++) { - port = low + (i + offset) % remaining; - head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; - spin_lock(&head->lock); - - /* Does not bother with rcv_saddr checks, - * because the established check is already - * unique enough. - */ - inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { - BUG_TRAP(!hlist_empty(&tb->owners)); - if (tb->fastreuse >= 0) - goto next_port; - if (!__inet6_check_established(death_row, - sk, port, - &tw)) - goto ok; - goto next_port; - } - } - - tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, - head, port); - if (!tb) { - spin_unlock(&head->lock); - break; - } - tb->fastreuse = -1; - goto ok; - - next_port: - spin_unlock(&head->lock); - } - local_bh_enable(); - - return -EADDRNOTAVAIL; - -ok: - hint += i; - - /* Head lock still held and bh's disabled */ - inet_bind_hash(sk, tb, port); - if (sk_unhashed(sk)) { - inet_sk(sk)->sport = htons(port); - __inet6_hash(hinfo, sk); - } - spin_unlock(&head->lock); - - if (tw) { - inet_twsk_deschedule(tw, death_row); - inet_twsk_put(tw); - } - - ret = 0; - goto out; - } - - head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; - tb = inet_csk(sk)->icsk_bind_hash; - spin_lock_bh(&head->lock); - - if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { - __inet6_hash(hinfo, sk); - spin_unlock_bh(&head->lock); - return 0; - } else { - spin_unlock(&head->lock); - /* No definite answer... Walk to established hash table */ - ret = __inet6_check_established(death_row, sk, snum, NULL); -out: - local_bh_enable(); - return ret; - } + return __inet_hash_connect(death_row, sk, + __inet6_check_established, __inet6_hash); } EXPORT_SYMBOL_GPL(inet6_hash_connect); -- cgit v1.2.3 From 941b1d22cc035ad58b3d9b44a1c74efac2d7e499 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 31 Jan 2008 05:05:50 -0800 Subject: [NETNS]: Make bind buckets live in net namespaces. This tags the inet_bind_bucket struct with net pointer, initializes it during creation and makes a filtering during lookup. A better hashfn, that takes the net into account is to be done in the future, but currently all bind buckets with similar port will be in one hash chain. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 2 ++ net/ipv4/inet_connection_sock.c | 8 +++++--- net/ipv4/inet_hashtables.c | 8 ++++++-- 3 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index a34a8f25fc57..55532b9bb59a 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -74,6 +74,7 @@ struct inet_ehash_bucket { * ports are created in O(1) time? I thought so. ;-) -DaveM */ struct inet_bind_bucket { + struct net *ib_net; unsigned short port; signed short fastreuse; struct hlist_node node; @@ -194,6 +195,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) extern struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, + struct net *net, struct inet_bind_hashbucket *head, const unsigned short snum); extern void inet_bind_bucket_destroy(struct kmem_cache *cachep, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7801cceb2d1b..de5a41de191a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -87,6 +87,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, struct hlist_node *node; struct inet_bind_bucket *tb; int ret; + struct net *net = sk->sk_net; local_bh_disable(); if (!snum) { @@ -100,7 +101,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == rover) + if (tb->ib_net == net && tb->port == rover) goto next; break; next: @@ -127,7 +128,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo, head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == snum) + if (tb->ib_net == net && tb->port == snum) goto tb_found; } tb = NULL; @@ -147,7 +148,8 @@ tb_found: } tb_not_found: ret = 1; - if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, + net, head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index b93d40ff6ef4..db1e53a865c2 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -28,12 +28,14 @@ * The bindhash mutex for snum's hash chain must be held here. */ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, + struct net *net, struct inet_bind_hashbucket *head, const unsigned short snum) { struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { + tb->ib_net = net; tb->port = snum; tb->fastreuse = 0; INIT_HLIST_HEAD(&tb->owners); @@ -359,6 +361,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct inet_bind_hashbucket *head; struct inet_bind_bucket *tb; int ret; + struct net *net = sk->sk_net; if (!snum) { int i, remaining, low, high, port; @@ -381,7 +384,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, * unique enough. */ inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { + if (tb->ib_net == net && tb->port == port) { BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; @@ -392,7 +395,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, } } - tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port); + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, + net, head, port); if (!tb) { spin_unlock(&head->lock); break; -- cgit v1.2.3 From c67499c0e772064b37ad75eb69b28fc218752636 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 31 Jan 2008 05:06:40 -0800 Subject: [NETNS]: Tcp-v4 sockets per-net lookup. Add a net argument to inet_lookup and propagate it further into lookup calls. Plus tune the __inet_check_established. The dccp and inet_diag, which use that lookup functions pass the init_net into them. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 48 ++++++++++++++++++++++++------------------- net/dccp/ipv4.c | 6 +++--- net/ipv4/inet_diag.c | 2 +- net/ipv4/inet_hashtables.c | 29 +++++++++++++++++--------- net/ipv4/tcp_ipv4.c | 15 +++++++------- 5 files changed, 58 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 55532b9bb59a..c23c4ed30724 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -302,15 +302,17 @@ out: wake_up(&hashinfo->lhash_wait); } -extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, +extern struct sock *__inet_lookup_listener(struct net *net, + struct inet_hashinfo *hashinfo, const __be32 daddr, const unsigned short hnum, const int dif); -static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, - __be32 daddr, __be16 dport, int dif) +static inline struct sock *inet_lookup_listener(struct net *net, + struct inet_hashinfo *hashinfo, + __be32 daddr, __be16 dport, int dif) { - return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif); + return __inet_lookup_listener(net, hashinfo, daddr, ntohs(dport), dif); } /* Socket demux engine toys. */ @@ -344,26 +346,26 @@ typedef __u64 __bitwise __addrpair; (((__force __u64)(__be32)(__daddr)) << 32) | \ ((__force __u64)(__be32)(__saddr))); #endif /* __BIG_ENDIAN */ -#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ - (((__sk)->sk_hash == (__hash)) && \ +#define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ + (((__sk)->sk_hash == (__hash)) && ((__sk)->sk_net == (__net)) && \ ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ - (((__sk)->sk_hash == (__hash)) && \ +#define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ + (((__sk)->sk_hash == (__hash)) && ((__sk)->sk_net == (__net)) && \ ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) -#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_hash == (__hash)) && \ +#define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ + (((__sk)->sk_hash == (__hash)) && ((__sk)->sk_net == (__net)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_hash == (__hash)) && \ +#define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ + (((__sk)->sk_hash == (__hash)) && ((__sk)->sk_net == (__net)) && \ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ @@ -376,32 +378,36 @@ typedef __u64 __bitwise __addrpair; * * Local BH must be disabled here. */ -extern struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo, +extern struct sock * __inet_lookup_established(struct net *net, + struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, const int dif); static inline struct sock * - inet_lookup_established(struct inet_hashinfo *hashinfo, + inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, const int dif) { - return __inet_lookup_established(hashinfo, saddr, sport, daddr, + return __inet_lookup_established(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif); } -static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, +static inline struct sock *__inet_lookup(struct net *net, + struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, const int dif) { u16 hnum = ntohs(dport); - struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, - hnum, dif); - return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif); + struct sock *sk = __inet_lookup_established(net, hashinfo, + saddr, sport, daddr, hnum, dif); + + return sk ? : __inet_lookup_listener(net, hashinfo, daddr, hnum, dif); } -static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, +static inline struct sock *inet_lookup(struct net *net, + struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, const int dif) @@ -409,7 +415,7 @@ static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, struct sock *sk; local_bh_disable(); - sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif); + sk = __inet_lookup(net, hashinfo, saddr, sport, daddr, dport, dif); local_bh_enable(); return sk; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 9e38b0d6195c..c982ad88223d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -218,7 +218,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) return; } - sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, + sk = inet_lookup(&init_net, &dccp_hashinfo, iph->daddr, dh->dccph_dport, iph->saddr, dh->dccph_sport, inet_iif(skb)); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); @@ -436,7 +436,7 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req != NULL) return dccp_check_req(sk, skb, req, prev); - nsk = inet_lookup_established(&dccp_hashinfo, + nsk = inet_lookup_established(&init_net, &dccp_hashinfo, iph->saddr, dh->dccph_sport, iph->daddr, dh->dccph_dport, inet_iif(skb)); @@ -817,7 +817,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) /* Step 2: * Look up flow ID in table and get corresponding socket */ - sk = __inet_lookup(&dccp_hashinfo, + sk = __inet_lookup(&init_net, &dccp_hashinfo, iph->saddr, dh->dccph_sport, iph->daddr, dh->dccph_dport, inet_iif(skb)); /* diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 4cfb15c461f1..95c9f1429228 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -268,7 +268,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, err = -EINVAL; if (req->idiag_family == AF_INET) { - sk = inet_lookup(hashinfo, req->id.idiag_dst[0], + sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index db1e53a865c2..48d45008f749 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -127,7 +127,8 @@ EXPORT_SYMBOL(inet_listen_wlock); * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, +static struct sock *inet_lookup_listener_slow(struct net *net, + const struct hlist_head *head, const __be32 daddr, const unsigned short hnum, const int dif) @@ -139,7 +140,8 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, sk_for_each(sk, node, head) { const struct inet_sock *inet = inet_sk(sk); - if (inet->num == hnum && !ipv6_only_sock(sk)) { + if (sk->sk_net == net && inet->num == hnum && + !ipv6_only_sock(sk)) { const __be32 rcv_saddr = inet->rcv_saddr; int score = sk->sk_family == PF_INET ? 1 : 0; @@ -165,7 +167,8 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, } /* Optimize the common listener case. */ -struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, +struct sock *__inet_lookup_listener(struct net *net, + struct inet_hashinfo *hashinfo, const __be32 daddr, const unsigned short hnum, const int dif) { @@ -180,9 +183,9 @@ struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, if (inet->num == hnum && !sk->sk_node.next && (!inet->rcv_saddr || inet->rcv_saddr == daddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if) + !sk->sk_bound_dev_if && sk->sk_net == net) goto sherry_cache; - sk = inet_lookup_listener_slow(head, daddr, hnum, dif); + sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); } if (sk) { sherry_cache: @@ -193,7 +196,8 @@ sherry_cache: } EXPORT_SYMBOL_GPL(__inet_lookup_listener); -struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo, +struct sock * __inet_lookup_established(struct net *net, + struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, const int dif) @@ -212,13 +216,15 @@ struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo, prefetch(head->chain.first); read_lock(lock); sk_for_each(sk, node, &head->chain) { - if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &head->twchain) { - if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) + if (INET_TW_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif)) goto hit; } sk = NULL; @@ -249,6 +255,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; + struct net *net = sk->sk_net; prefetch(head->chain.first); write_lock(lock); @@ -257,7 +264,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, sk_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); - if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { + if (INET_TW_MATCH(sk2, net, hash, acookie, + saddr, daddr, ports, dif)) { if (twsk_unique(sk, sk2, twp)) goto unique; else @@ -268,7 +276,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, net, hash, acookie, + saddr, daddr, ports, dif)) goto not_unique; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9aea88b8d4fc..77c1939a2b0d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -369,8 +369,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) return; } - sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, - th->source, inet_iif(skb)); + sk = inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->daddr, th->dest, + iph->saddr, th->source, inet_iif(skb)); if (!sk) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; @@ -1503,8 +1503,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req) return tcp_check_req(sk, skb, req, prev); - nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source, - iph->daddr, th->dest, inet_iif(skb)); + nsk = inet_lookup_established(sk->sk_net, &tcp_hashinfo, iph->saddr, + th->source, iph->daddr, th->dest, inet_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1661,8 +1661,8 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = iph->tos; TCP_SKB_CB(skb)->sacked = 0; - sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source, - iph->daddr, th->dest, inet_iif(skb)); + sk = __inet_lookup(skb->dev->nd_net, &tcp_hashinfo, iph->saddr, + th->source, iph->daddr, th->dest, inet_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1735,7 +1735,8 @@ do_time_wait: } switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { case TCP_TW_SYN: { - struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, + struct sock *sk2 = inet_lookup_listener(skb->dev->nd_net, + &tcp_hashinfo, iph->daddr, th->dest, inet_iif(skb)); if (sk2) { -- cgit v1.2.3 From d86e0dac2ce412715181f792aa0749fe3effff11 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 31 Jan 2008 05:07:21 -0800 Subject: [NETNS]: Tcp-v6 sockets per-net lookup. Add a net argument to inet6_lookup and propagate it further. Actually, this is tcp-v6 implementation of what was done for tcp-v4 sockets in a previous patch. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/ipv6.h | 8 ++++---- include/net/inet6_hashtables.h | 17 ++++++++++------- net/dccp/ipv6.c | 8 ++++---- net/ipv4/inet_diag.c | 2 +- net/ipv6/inet6_hashtables.c | 25 ++++++++++++++----------- net/ipv6/tcp_ipv6.c | 19 ++++++++++--------- 6 files changed, 43 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c34786044a1b..4aaefc349a4b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -457,16 +457,16 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define inet_v6_ipv6only(__sk) 0 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ -#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\ - (((__sk)->sk_hash == (__hash)) && \ +#define INET6_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif)\ + (((__sk)->sk_hash == (__hash)) && ((__sk)->sk_net == (__net)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define INET6_TW_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_hash == (__hash)) && \ +#define INET6_TW_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif) \ + (((__sk)->sk_hash == (__hash)) && ((__sk)->sk_net == (__net)) && \ (*((__portpair *)&(inet_twsk(__sk)->tw_dport)) == (__ports)) && \ ((__sk)->sk_family == PF_INET6) && \ (ipv6_addr_equal(&inet6_twsk(__sk)->tw_v6_daddr, (__saddr))) && \ diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 668056b4bb0b..fdff630708ce 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -57,34 +57,37 @@ extern void __inet6_hash(struct inet_hashinfo *hashinfo, struct sock *sk); * * The sockhash lock must be held as a reader here. */ -extern struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, +extern struct sock *__inet6_lookup_established(struct net *net, + struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif); -extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, +extern struct sock *inet6_lookup_listener(struct net *net, + struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, const unsigned short hnum, const int dif); -static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, +static inline struct sock *__inet6_lookup(struct net *net, + struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif) { - struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, - daddr, hnum, dif); + struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr, + sport, daddr, hnum, dif); if (sk) return sk; - return inet6_lookup_listener(hashinfo, daddr, hnum, dif); + return inet6_lookup_listener(net, hashinfo, daddr, hnum, dif); } -extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, +extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index f42b75ce7f5c..ed0a0053a797 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -101,8 +101,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int err; __u64 seq; - sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport, - &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); + sk = inet6_lookup(&init_net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, + &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -366,7 +366,7 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) if (req != NULL) return dccp_check_req(sk, skb, req, prev); - nsk = __inet6_lookup_established(&dccp_hashinfo, + nsk = __inet6_lookup_established(&init_net, &dccp_hashinfo, &iph->saddr, dh->dccph_sport, &iph->daddr, ntohs(dh->dccph_dport), inet6_iif(skb)); @@ -797,7 +797,7 @@ static int dccp_v6_rcv(struct sk_buff *skb) /* Step 2: * Look up flow ID in table and get corresponding socket */ - sk = __inet6_lookup(&dccp_hashinfo, &ipv6_hdr(skb)->saddr, + sk = __inet6_lookup(&init_net, &dccp_hashinfo, &ipv6_hdr(skb)->saddr, dh->dccph_sport, &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport), inet6_iif(skb)); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 95c9f1429228..da97695e7096 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -274,7 +274,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, } #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) else if (req->idiag_family == AF_INET6) { - sk = inet6_lookup(hashinfo, + sk = inet6_lookup(&init_net, hashinfo, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index ece6d0ee2da5..d325a9958909 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -54,7 +54,8 @@ EXPORT_SYMBOL(__inet6_hash); * * The sockhash lock must be held as a reader here. */ -struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, +struct sock *__inet6_lookup_established(struct net *net, + struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, @@ -75,12 +76,12 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, read_lock(lock); sk_for_each(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ - if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &head->twchain) { - if (INET6_TW_MATCH(sk, hash, saddr, daddr, ports, dif)) + if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) goto hit; } read_unlock(lock); @@ -93,9 +94,9 @@ hit: } EXPORT_SYMBOL(__inet6_lookup_established); -struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, - const struct in6_addr *daddr, - const unsigned short hnum, const int dif) +struct sock *inet6_lookup_listener(struct net *net, + struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, + const unsigned short hnum, const int dif) { struct sock *sk; const struct hlist_node *node; @@ -104,7 +105,8 @@ struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, read_lock(&hashinfo->lhash_lock); sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { - if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { + if (sk->sk_net == net && inet_sk(sk)->num == hnum && + sk->sk_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); score = 1; @@ -136,7 +138,7 @@ struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, EXPORT_SYMBOL_GPL(inet6_lookup_listener); -struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, +struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif) @@ -144,7 +146,7 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, struct sock *sk; local_bh_disable(); - sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); + sk = __inet6_lookup(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif); local_bh_enable(); return sk; @@ -170,6 +172,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; + struct net *net = sk->sk_net; prefetch(head->chain.first); write_lock(lock); @@ -178,7 +181,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, sk_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); - if (INET6_TW_MATCH(sk2, hash, saddr, daddr, ports, dif)) { + if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) { if (twsk_unique(sk, sk2, twp)) goto unique; else @@ -189,7 +192,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) goto not_unique; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 00c08399837d..59d0029e93a7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -330,8 +330,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct tcp_sock *tp; __u32 seq; - sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, - th->source, skb->dev->ifindex); + sk = inet6_lookup(skb->dev->nd_net, &tcp_hashinfo, &hdr->daddr, + th->dest, &hdr->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -1208,9 +1208,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) if (req) return tcp_check_req(sk, skb, req, prev); - nsk = __inet6_lookup_established(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, - th->source, &ipv6_hdr(skb)->daddr, - ntohs(th->dest), inet6_iif(skb)); + nsk = __inet6_lookup_established(sk->sk_net, &tcp_hashinfo, + &ipv6_hdr(skb)->saddr, th->source, + &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1710,9 +1710,10 @@ static int tcp_v6_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb)); TCP_SKB_CB(skb)->sacked = 0; - sk = __inet6_lookup(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, - &ipv6_hdr(skb)->daddr, ntohs(th->dest), - inet6_iif(skb)); + sk = __inet6_lookup(skb->dev->nd_net, &tcp_hashinfo, + &ipv6_hdr(skb)->saddr, th->source, + &ipv6_hdr(skb)->daddr, ntohs(th->dest), + inet6_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1792,7 +1793,7 @@ do_time_wait: { struct sock *sk2; - sk2 = inet6_lookup_listener(&tcp_hashinfo, + sk2 = inet6_lookup_listener(skb->dev->nd_net, &tcp_hashinfo, &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); if (sk2 != NULL) { -- cgit v1.2.3 From 71d67e666e73e3b7e9ef124745ee2e454ac04be8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 31 Jan 2008 16:45:47 -0800 Subject: [IPV4] fib_trie: rescan if key is lost during dump Normally during a dump the key of the last dumped entry is used for continuation, but since lock is dropped it might be lost. In that case fallback to the old counter based N^2 behaviour. This means the dump will end up skipping some routes which matches what FIB_HASH does. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- net/ipv4/fib_trie.c | 49 ++++++++++++++++++++++++++++++++----------------- 2 files changed, 33 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index bd13b6f4a98e..fb0713b6ffaf 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -219,7 +219,7 @@ struct netlink_callback int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); int family; - long args[5]; + long args[6]; }; struct netlink_notify diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index cbccafde8238..35851c96bdfb 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1758,6 +1758,19 @@ static struct leaf *trie_nextleaf(struct leaf *l) return leaf_walk_rcu(p, c); } +static struct leaf *trie_leafindex(struct trie *t, int index) +{ + struct leaf *l = trie_firstleaf(t); + + while (index-- > 0) { + l = trie_nextleaf(l); + if (!l) + break; + } + return l; +} + + /* * Caller must hold RTNL. */ @@ -1863,7 +1876,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_alias *fa; __be32 xkey = htonl(key); - s_i = cb->args[4]; + s_i = cb->args[5]; i = 0; /* rcu_read_lock is hold by caller */ @@ -1884,12 +1897,12 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, plen, fa->fa_tos, fa->fa_info, NLM_F_MULTI) < 0) { - cb->args[4] = i; + cb->args[5] = i; return -1; } i++; } - cb->args[4] = i; + cb->args[5] = i; return skb->len; } @@ -1900,7 +1913,7 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, struct hlist_node *node; int i, s_i; - s_i = cb->args[3]; + s_i = cb->args[4]; i = 0; /* rcu_read_lock is hold by caller */ @@ -1911,19 +1924,19 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, } if (i > s_i) - cb->args[4] = 0; + cb->args[5] = 0; if (list_empty(&li->falh)) continue; if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) { - cb->args[3] = i; + cb->args[4] = i; return -1; } i++; } - cb->args[3] = i; + cb->args[4] = i; return skb->len; } @@ -1933,35 +1946,37 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct leaf *l; struct trie *t = (struct trie *) tb->tb_data; t_key key = cb->args[2]; + int count = cb->args[3]; rcu_read_lock(); /* Dump starting at last key. * Note: 0.0.0.0/0 (ie default) is first key. */ - if (!key) + if (count == 0) l = trie_firstleaf(t); else { + /* Normally, continue from last key, but if that is missing + * fallback to using slow rescan + */ l = fib_find_node(t, key); - if (!l) { - /* The table changed during the dump, rather than - * giving partial data, just make application retry. - */ - rcu_read_unlock(); - return -EBUSY; - } + if (!l) + l = trie_leafindex(t, count); } while (l) { cb->args[2] = l->key; if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) { + cb->args[3] = count; rcu_read_unlock(); return -1; } + ++count; l = trie_nextleaf(l); - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[4], 0, + sizeof(cb->args) - 4*sizeof(cb->args[0])); } + cb->args[3] = count; rcu_read_unlock(); return skb->len; -- cgit v1.2.3 From e83a2ea850bf0c0c81c675444080970fc07798c6 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Thu, 31 Jan 2008 16:53:23 -0800 Subject: [VLAN]: set_rx_mode support for unicast address list Reuse the existing logic for multicast list synchronization for the unicast address list. The core of dev_mc_sync/unsync are split out as __dev_addr_sync/unsync and moved from dev_mcast.c to dev.c. These are then used to implement dev_unicast_sync/unsync as well. I'm working on cleaning up Intel's FCoE stack, which generates new MAC addresses from the fibre channel device id assigned by the fabric as per the current draft specification in T11. When using such a protocol in a VLAN environment it would be nice to not always be forced into promiscuous mode, assuming the underlying Ethernet driver supports multiple unicast addresses as well. Signed-off-by: Chris Leech Signed-off-by: Patrick McHardy --- include/linux/netdevice.h | 4 ++ net/8021q/vlan_dev.c | 7 +++- net/core/dev.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++ net/core/dev_mcast.c | 39 +++---------------- 4 files changed, 110 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b0813c3286b1..047d432bde55 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1414,12 +1414,16 @@ extern void dev_set_rx_mode(struct net_device *dev); extern void __dev_set_rx_mode(struct net_device *dev); extern int dev_unicast_delete(struct net_device *dev, void *addr, int alen); extern int dev_unicast_add(struct net_device *dev, void *addr, int alen); +extern int dev_unicast_sync(struct net_device *to, struct net_device *from); +extern void dev_unicast_unsync(struct net_device *to, struct net_device *from); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); extern int dev_mc_sync(struct net_device *to, struct net_device *from); extern void dev_mc_unsync(struct net_device *to, struct net_device *from); extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all); extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly); +extern int __dev_addr_sync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count); +extern void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count); extern void dev_set_promiscuity(struct net_device *dev, int inc); extern void dev_set_allmulti(struct net_device *dev, int inc); extern void netdev_state_change(struct net_device *dev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 8059fa42b085..77f04e49a1a0 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -563,6 +563,7 @@ static int vlan_dev_stop(struct net_device *dev) struct net_device *real_dev = vlan_dev_info(dev)->real_dev; dev_mc_unsync(real_dev, dev); + dev_unicast_unsync(real_dev, dev); if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(real_dev, -1); if (dev->flags & IFF_PROMISC) @@ -634,9 +635,10 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change) dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); } -static void vlan_dev_set_multicast_list(struct net_device *vlan_dev) +static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) { dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev); + dev_unicast_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev); } /* @@ -702,7 +704,8 @@ void vlan_setup(struct net_device *dev) dev->open = vlan_dev_open; dev->stop = vlan_dev_stop; dev->set_mac_address = vlan_dev_set_mac_address; - dev->set_multicast_list = vlan_dev_set_multicast_list; + dev->set_rx_mode = vlan_dev_set_rx_mode; + dev->set_multicast_list = vlan_dev_set_rx_mode; dev->change_rx_flags = vlan_dev_change_rx_flags; dev->do_ioctl = vlan_dev_ioctl; dev->destructor = free_netdev; diff --git a/net/core/dev.c b/net/core/dev.c index c9c593e1ba6f..edaff2720e10 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2962,6 +2962,102 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen) } EXPORT_SYMBOL(dev_unicast_add); +int __dev_addr_sync(struct dev_addr_list **to, int *to_count, + struct dev_addr_list **from, int *from_count) +{ + struct dev_addr_list *da, *next; + int err = 0; + + da = *from; + while (da != NULL) { + next = da->next; + if (!da->da_synced) { + err = __dev_addr_add(to, to_count, + da->da_addr, da->da_addrlen, 0); + if (err < 0) + break; + da->da_synced = 1; + da->da_users++; + } else if (da->da_users == 1) { + __dev_addr_delete(to, to_count, + da->da_addr, da->da_addrlen, 0); + __dev_addr_delete(from, from_count, + da->da_addr, da->da_addrlen, 0); + } + da = next; + } + return err; +} + +void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, + struct dev_addr_list **from, int *from_count) +{ + struct dev_addr_list *da, *next; + + da = *from; + while (da != NULL) { + next = da->next; + if (da->da_synced) { + __dev_addr_delete(to, to_count, + da->da_addr, da->da_addrlen, 0); + da->da_synced = 0; + __dev_addr_delete(from, from_count, + da->da_addr, da->da_addrlen, 0); + } + da = next; + } +} + +/** + * dev_unicast_sync - Synchronize device's unicast list to another device + * @to: destination device + * @from: source device + * + * Add newly added addresses to the destination device and release + * addresses that have no users left. The source device must be + * locked by netif_tx_lock_bh. + * + * This function is intended to be called from the dev->set_rx_mode + * function of layered software devices. + */ +int dev_unicast_sync(struct net_device *to, struct net_device *from) +{ + int err = 0; + + netif_tx_lock_bh(to); + err = __dev_addr_sync(&to->uc_list, &to->uc_count, + &from->uc_list, &from->uc_count); + if (!err) + __dev_set_rx_mode(to); + netif_tx_unlock_bh(to); + return err; +} +EXPORT_SYMBOL(dev_unicast_sync); + +/** + * dev_unicast_unsync - Remove synchronized addresses from the destination + * device + * @to: destination device + * @from: source device + * + * Remove all addresses that were added to the destination device by + * dev_unicast_sync(). This function is intended to be called from the + * dev->stop function of layered software devices. + */ +void dev_unicast_unsync(struct net_device *to, struct net_device *from) +{ + netif_tx_lock_bh(from); + netif_tx_lock_bh(to); + + __dev_addr_unsync(&to->uc_list, &to->uc_count, + &from->uc_list, &from->uc_count); + __dev_set_rx_mode(to); + + netif_tx_unlock_bh(to); + netif_tx_unlock_bh(from); +} +EXPORT_SYMBOL(dev_unicast_unsync); + static void __dev_addr_discard(struct dev_addr_list **list) { struct dev_addr_list *tmp; diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index cadbfbf7e7f5..cec582563e0d 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -113,32 +113,15 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) * locked by netif_tx_lock_bh. * * This function is intended to be called from the dev->set_multicast_list - * function of layered software devices. + * or dev->set_rx_mode function of layered software devices. */ int dev_mc_sync(struct net_device *to, struct net_device *from) { - struct dev_addr_list *da, *next; int err = 0; netif_tx_lock_bh(to); - da = from->mc_list; - while (da != NULL) { - next = da->next; - if (!da->da_synced) { - err = __dev_addr_add(&to->mc_list, &to->mc_count, - da->da_addr, da->da_addrlen, 0); - if (err < 0) - break; - da->da_synced = 1; - da->da_users++; - } else if (da->da_users == 1) { - __dev_addr_delete(&to->mc_list, &to->mc_count, - da->da_addr, da->da_addrlen, 0); - __dev_addr_delete(&from->mc_list, &from->mc_count, - da->da_addr, da->da_addrlen, 0); - } - da = next; - } + err = __dev_addr_sync(&to->mc_list, &to->mc_count, + &from->mc_list, &from->mc_count); if (!err) __dev_set_rx_mode(to); netif_tx_unlock_bh(to); @@ -160,23 +143,11 @@ EXPORT_SYMBOL(dev_mc_sync); */ void dev_mc_unsync(struct net_device *to, struct net_device *from) { - struct dev_addr_list *da, *next; - netif_tx_lock_bh(from); netif_tx_lock_bh(to); - da = from->mc_list; - while (da != NULL) { - next = da->next; - if (da->da_synced) { - __dev_addr_delete(&to->mc_list, &to->mc_count, - da->da_addr, da->da_addrlen, 0); - da->da_synced = 0; - __dev_addr_delete(&from->mc_list, &from->mc_count, - da->da_addr, da->da_addrlen, 0); - } - da = next; - } + __dev_addr_unsync(&to->mc_list, &to->mc_count, + &from->mc_list, &from->mc_count); __dev_set_rx_mode(to); netif_tx_unlock_bh(to); -- cgit v1.2.3 From 29e75252da20f3ab9e132c68c9aed156b87beae6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Jan 2008 17:05:09 -0800 Subject: [IPV4] route cache: Introduce rt_genid for smooth cache invalidation Current ip route cache implementation is not suited to large caches. We can consume a lot of CPU when cache must be invalidated, since we currently need to evict all cache entries, and this eviction is sometimes asynchronous. min_delay & max_delay can somewhat control this asynchronism behavior, but whole thing is a kludge, regularly triggering infamous soft lockup messages. When entries are still in use, this also consumes a lot of ram, filling dst_garbage.list. A better scheme is to use a generation identifier on each entry, so that cache invalidation can be performed by changing the table identifier, without having to scan all entries. No more delayed flushing, no more stalling when secret_interval expires. Invalidated entries will then be freed at GC time (controled by ip_rt_gc_timeout or stress), or when an invalidated entry is found in a chain when an insert is done. Thus we keep a normal equilibrium. This patch : - renames rt_hash_rnd to rt_genid (and makes it an atomic_t) - Adds a new rt_genid field to 'struct rtable' (filling a hole on 64bit) - Checks entry->rt_genid at appropriate places : --- Documentation/filesystems/proc.txt | 5 - include/linux/sysctl.h | 4 +- include/net/route.h | 1 + net/ipv4/route.c | 209 ++++++++++++++++--------------------- 4 files changed, 92 insertions(+), 127 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 4413a2d4646f..11fe51c036bf 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1919,11 +1919,6 @@ max_size Maximum size of the routing cache. Old entries will be purged once the cache reached has this size. -max_delay, min_delay --------------------- - -Delays for flushing the routing cache. - redirect_load, redirect_number ------------------------------ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 89faebfe48b8..bf4ae4e138f7 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -440,8 +440,8 @@ enum enum { NET_IPV4_ROUTE_FLUSH=1, - NET_IPV4_ROUTE_MIN_DELAY=2, - NET_IPV4_ROUTE_MAX_DELAY=3, + NET_IPV4_ROUTE_MIN_DELAY=2, /* obsolete since 2.6.25 */ + NET_IPV4_ROUTE_MAX_DELAY=3, /* obsolete since 2.6.25 */ NET_IPV4_ROUTE_GC_THRESH=4, NET_IPV4_ROUTE_MAX_SIZE=5, NET_IPV4_ROUTE_GC_MIN_INTERVAL=6, diff --git a/include/net/route.h b/include/net/route.h index fcc6d5b35863..eadad5901429 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -62,6 +62,7 @@ struct rtable struct in_device *idev; + int rt_genid; unsigned rt_flags; __u16 rt_type; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 163086b2f058..8842ecb9be48 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -117,8 +117,6 @@ #define RT_GC_TIMEOUT (300*HZ) -static int ip_rt_min_delay = 2 * HZ; -static int ip_rt_max_delay = 10 * HZ; static int ip_rt_max_size; static int ip_rt_gc_timeout = RT_GC_TIMEOUT; static int ip_rt_gc_interval = 60 * HZ; @@ -133,12 +131,9 @@ static int ip_rt_mtu_expires = 10 * 60 * HZ; static int ip_rt_min_pmtu = 512 + 20 + 20; static int ip_rt_min_advmss = 256; static int ip_rt_secret_interval = 10 * 60 * HZ; -static int ip_rt_flush_expected; -static unsigned long rt_deadline; #define RTprint(a...) printk(KERN_DEBUG a) -static struct timer_list rt_flush_timer; static void rt_worker_func(struct work_struct *work); static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); static struct timer_list rt_secret_timer; @@ -260,19 +255,16 @@ static inline void rt_hash_lock_init(void) static struct rt_hash_bucket *rt_hash_table; static unsigned rt_hash_mask; static unsigned int rt_hash_log; -static unsigned int rt_hash_rnd; +static atomic_t rt_genid; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) \ (__raw_get_cpu_var(rt_cache_stat).field++) -static int rt_intern_hash(unsigned hash, struct rtable *rth, - struct rtable **res); - static unsigned int rt_hash_code(u32 daddr, u32 saddr) { - return (jhash_2words(daddr, saddr, rt_hash_rnd) - & rt_hash_mask); + return jhash_2words(daddr, saddr, atomic_read(&rt_genid)) + & rt_hash_mask; } #define rt_hash(daddr, saddr, idx) \ @@ -282,27 +274,28 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr) #ifdef CONFIG_PROC_FS struct rt_cache_iter_state { int bucket; + int genid; }; -static struct rtable *rt_cache_get_first(struct seq_file *seq) +static struct rtable *rt_cache_get_first(struct rt_cache_iter_state *st) { struct rtable *r = NULL; - struct rt_cache_iter_state *st = seq->private; for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { rcu_read_lock_bh(); - r = rt_hash_table[st->bucket].chain; - if (r) - break; + r = rcu_dereference(rt_hash_table[st->bucket].chain); + while (r) { + if (r->rt_genid == st->genid) + return r; + r = rcu_dereference(r->u.dst.rt_next); + } rcu_read_unlock_bh(); } - return rcu_dereference(r); + return r; } -static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) +static struct rtable *rt_cache_get_next(struct rt_cache_iter_state *st, struct rtable *r) { - struct rt_cache_iter_state *st = seq->private; - r = r->u.dst.rt_next; while (!r) { rcu_read_unlock_bh(); @@ -314,29 +307,38 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) return rcu_dereference(r); } -static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) +static struct rtable *rt_cache_get_idx(struct rt_cache_iter_state *st, loff_t pos) { - struct rtable *r = rt_cache_get_first(seq); + struct rtable *r = rt_cache_get_first(st); if (r) - while (pos && (r = rt_cache_get_next(seq, r))) + while (pos && (r = rt_cache_get_next(st, r))) { + if (r->rt_genid != st->genid) + continue; --pos; + } return pos ? NULL : r; } static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) { - return *pos ? rt_cache_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; + struct rt_cache_iter_state *st = seq->private; + + if (*pos) + return rt_cache_get_idx(st, *pos - 1); + st->genid = atomic_read(&rt_genid); + return SEQ_START_TOKEN; } static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct rtable *r = NULL; + struct rtable *r; + struct rt_cache_iter_state *st = seq->private; if (v == SEQ_START_TOKEN) - r = rt_cache_get_first(seq); + r = rt_cache_get_first(st); else - r = rt_cache_get_next(seq, v); + r = rt_cache_get_next(st, v); ++*pos; return r; } @@ -709,6 +711,11 @@ static void rt_check_expire(void) continue; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { + if (rth->rt_genid != atomic_read(&rt_genid)) { + *rthp = rth->u.dst.rt_next; + rt_free(rth); + continue; + } if (rth->u.dst.expires) { /* Entry is expired even if it is in use */ if (time_before_eq(jiffies, rth->u.dst.expires)) { @@ -733,83 +740,45 @@ static void rt_check_expire(void) /* * rt_worker_func() is run in process context. - * If a whole flush was scheduled, it is done. - * Else, we call rt_check_expire() to scan part of the hash table + * we call rt_check_expire() to scan part of the hash table */ static void rt_worker_func(struct work_struct *work) { - if (ip_rt_flush_expected) { - ip_rt_flush_expected = 0; - rt_do_flush(1); - } else - rt_check_expire(); + rt_check_expire(); schedule_delayed_work(&expires_work, ip_rt_gc_interval); } -/* This can run from both BH and non-BH contexts, the latter - * in the case of a forced flush event. +/* + * Pertubation of rt_genid by a small quantity [1..256] + * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() + * many times (2^24) without giving recent rt_genid. + * Jenkins hash is strong enough that litle changes of rt_genid are OK. */ -static void rt_run_flush(unsigned long process_context) +static void rt_cache_invalidate(void) { - rt_deadline = 0; - - get_random_bytes(&rt_hash_rnd, 4); + unsigned char shuffle; - rt_do_flush(process_context); + get_random_bytes(&shuffle, sizeof(shuffle)); + atomic_add(shuffle + 1U, &rt_genid); } -static DEFINE_SPINLOCK(rt_flush_lock); - +/* + * delay < 0 : invalidate cache (fast : entries will be deleted later) + * delay >= 0 : invalidate & flush cache (can be long) + */ void rt_cache_flush(int delay) { - unsigned long now = jiffies; - int user_mode = !in_softirq(); - - if (delay < 0) - delay = ip_rt_min_delay; - - spin_lock_bh(&rt_flush_lock); - - if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { - long tmo = (long)(rt_deadline - now); - - /* If flush timer is already running - and flush request is not immediate (delay > 0): - - if deadline is not achieved, prolongate timer to "delay", - otherwise fire it at deadline time. - */ - - if (user_mode && tmo < ip_rt_max_delay-ip_rt_min_delay) - tmo = 0; - - if (delay > tmo) - delay = tmo; - } - - if (delay <= 0) { - spin_unlock_bh(&rt_flush_lock); - rt_run_flush(user_mode); - return; - } - - if (rt_deadline == 0) - rt_deadline = now + ip_rt_max_delay; - - mod_timer(&rt_flush_timer, now+delay); - spin_unlock_bh(&rt_flush_lock); + rt_cache_invalidate(); + if (delay >= 0) + rt_do_flush(!in_softirq()); } /* - * We change rt_hash_rnd and ask next rt_worker_func() invocation - * to perform a flush in process context + * We change rt_genid and let gc do the cleanup */ static void rt_secret_rebuild(unsigned long dummy) { - get_random_bytes(&rt_hash_rnd, 4); - ip_rt_flush_expected = 1; - cancel_delayed_work(&expires_work); - schedule_delayed_work(&expires_work, HZ/10); + rt_cache_invalidate(); mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); } @@ -886,7 +855,8 @@ static int rt_garbage_collect(struct dst_ops *ops) rthp = &rt_hash_table[k].chain; spin_lock_bh(rt_hash_lock_addr(k)); while ((rth = *rthp) != NULL) { - if (!rt_may_expire(rth, tmo, expire)) { + if (rth->rt_genid == atomic_read(&rt_genid) && + !rt_may_expire(rth, tmo, expire)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; continue; @@ -967,6 +937,11 @@ restart: spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { + if (rth->rt_genid != atomic_read(&rt_genid)) { + *rthp = rth->u.dst.rt_next; + rt_free(rth); + continue; + } if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { /* Put it first */ *rthp = rth->u.dst.rt_next; @@ -1132,17 +1107,19 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) static void rt_del(unsigned hash, struct rtable *rt) { - struct rtable **rthp; + struct rtable **rthp, *aux; + rthp = &rt_hash_table[hash].chain; spin_lock_bh(rt_hash_lock_addr(hash)); ip_rt_put(rt); - for (rthp = &rt_hash_table[hash].chain; *rthp; - rthp = &(*rthp)->u.dst.rt_next) - if (*rthp == rt) { - *rthp = rt->u.dst.rt_next; - rt_free(rt); - break; + while ((aux = *rthp) != NULL) { + if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) { + *rthp = aux->u.dst.rt_next; + rt_free(aux); + continue; } + rthp = &aux->u.dst.rt_next; + } spin_unlock_bh(rt_hash_lock_addr(hash)); } @@ -1187,7 +1164,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, if (rth->fl.fl4_dst != daddr || rth->fl.fl4_src != skeys[i] || rth->fl.oif != ikeys[k] || - rth->fl.iif != 0) { + rth->fl.iif != 0 || + rth->rt_genid != atomic_read(&rt_genid)) { rthp = &rth->u.dst.rt_next; continue; } @@ -1225,7 +1203,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->u.dst.neighbour = NULL; rt->u.dst.hh = NULL; rt->u.dst.xfrm = NULL; - + rt->rt_genid = atomic_read(&rt_genid); rt->rt_flags |= RTCF_REDIRECTED; /* Gateway is different ... */ @@ -1446,7 +1424,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rth->rt_src == iph->saddr && rth->fl.iif == 0 && !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) && - rth->u.dst.dev->nd_net == net) { + rth->u.dst.dev->nd_net == net && + rth->rt_genid == atomic_read(&rt_genid)) { unsigned short mtu = new_mtu; if (new_mtu < 68 || new_mtu >= old_mtu) { @@ -1681,8 +1660,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; - rth->rt_type = RTN_MULTICAST; + rth->rt_genid = atomic_read(&rt_genid); rth->rt_flags = RTCF_MULTICAST; + rth->rt_type = RTN_MULTICAST; if (our) { rth->u.dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; @@ -1821,6 +1801,7 @@ static inline int __mkroute_input(struct sk_buff *skb, rth->u.dst.input = ip_forward; rth->u.dst.output = ip_output; + rth->rt_genid = atomic_read(&rt_genid); rt_set_nexthop(rth, res, itag); @@ -1981,6 +1962,7 @@ local_input: goto e_nobufs; rth->u.dst.output= ip_rt_bug; + rth->rt_genid = atomic_read(&rt_genid); atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; @@ -2072,7 +2054,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.oif == 0 && rth->fl.mark == skb->mark && rth->fl.fl4_tos == tos && - rth->u.dst.dev->nd_net == net) { + rth->u.dst.dev->nd_net == net && + rth->rt_genid == atomic_read(&rt_genid)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); @@ -2200,6 +2183,7 @@ static inline int __mkroute_output(struct rtable **result, rth->rt_spec_dst= fl->fl4_src; rth->u.dst.output=ip_output; + rth->rt_genid = atomic_read(&rt_genid); RT_CACHE_STAT_INC(out_slow_tot); @@ -2472,7 +2456,8 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && - rth->u.dst.dev->nd_net == net) { + rth->u.dst.dev->nd_net == net && + rth->rt_genid == atomic_read(&rt_genid)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); @@ -2527,6 +2512,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp, struct sock rt->idev = ort->idev; if (rt->idev) in_dev_hold(rt->idev); + rt->rt_genid = atomic_read(&rt_genid); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_dst = ort->rt_dst; @@ -2781,6 +2767,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) rt = rcu_dereference(rt->u.dst.rt_next), idx++) { if (idx < s_idx) continue; + if (rt->rt_genid != atomic_read(&rt_genid)) + continue; skb->dst = dst_clone(&rt->u.dst); if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, @@ -2849,24 +2837,6 @@ ctl_table ipv4_route_table[] = { .proc_handler = &ipv4_sysctl_rtcache_flush, .strategy = &ipv4_sysctl_rtcache_flush_strategy, }, - { - .ctl_name = NET_IPV4_ROUTE_MIN_DELAY, - .procname = "min_delay", - .data = &ip_rt_min_delay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, - { - .ctl_name = NET_IPV4_ROUTE_MAX_DELAY, - .procname = "max_delay", - .data = &ip_rt_max_delay, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, { .ctl_name = NET_IPV4_ROUTE_GC_THRESH, .procname = "gc_thresh", @@ -3025,8 +2995,8 @@ int __init ip_rt_init(void) { int rc = 0; - rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ - (jiffies ^ (jiffies >> 7))); + atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^ + (jiffies ^ (jiffies >> 7)))); #ifdef CONFIG_NET_CLS_ROUTE ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); @@ -3059,7 +3029,6 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); - setup_timer(&rt_flush_timer, rt_run_flush, 0); setup_timer(&rt_secret_timer, rt_secret_rebuild, 0); /* All the timers, started at system startup tend -- cgit v1.2.3 From 9472c9ef645d03ea823801d7716e658aeaf894e4 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Thu, 31 Jan 2008 17:14:58 -0800 Subject: [XFRM]: Fix statistics. o Outbound sequence number overflow error status is counted as XfrmOutStateSeqError. o Additionaly, it changes inbound sequence number replay error name from XfrmInSeqOutOfWindow to XfrmInStateSeqError to apply name scheme above. o Inbound IPv4 UDP encapsuling type mismatch error is wrongly mapped to XfrmInStateInvalid then this patch fiex the error to XfrmInStateMismatch. Signed-off-by: Masahide NAKAMURA Signed-off-by: David S. Miller --- Documentation/networking/xfrm_proc.txt | 8 ++++++-- include/linux/snmp.h | 3 ++- net/xfrm/xfrm_input.c | 4 ++-- net/xfrm/xfrm_output.c | 1 + net/xfrm/xfrm_proc.c | 3 ++- 5 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/Documentation/networking/xfrm_proc.txt b/Documentation/networking/xfrm_proc.txt index 53c1a58b02f1..d0d8bafa9016 100644 --- a/Documentation/networking/xfrm_proc.txt +++ b/Documentation/networking/xfrm_proc.txt @@ -26,8 +26,9 @@ XfrmInStateProtoError: e.g. SA key is wrong XfrmInStateModeError: Transformation mode specific error -XfrmInSeqOutOfWindow: - Sequence out of window +XfrmInStateSeqError: + Sequence error + i.e. Sequence number is out of window XfrmInStateExpired: State is expired XfrmInStateMismatch: @@ -60,6 +61,9 @@ XfrmOutStateProtoError: Transformation protocol specific error XfrmOutStateModeError: Transformation mode specific error +XfrmOutStateSeqError: + Sequence error + i.e. Sequence number overflow XfrmOutStateExpired: State is expired XfrmOutPolBlock: diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 86d3effb2836..5df62ef1280c 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -227,7 +227,7 @@ enum LINUX_MIB_XFRMINNOSTATES, /* XfrmInNoStates */ LINUX_MIB_XFRMINSTATEPROTOERROR, /* XfrmInStateProtoError */ LINUX_MIB_XFRMINSTATEMODEERROR, /* XfrmInStateModeError */ - LINUX_MIB_XFRMINSEQOUTOFWINDOW, /* XfrmInSeqOutOfWindow */ + LINUX_MIB_XFRMINSTATESEQERROR, /* XfrmInStateSeqError */ LINUX_MIB_XFRMINSTATEEXPIRED, /* XfrmInStateExpired */ LINUX_MIB_XFRMINSTATEMISMATCH, /* XfrmInStateMismatch */ LINUX_MIB_XFRMINSTATEINVALID, /* XfrmInStateInvalid */ @@ -241,6 +241,7 @@ enum LINUX_MIB_XFRMOUTNOSTATES, /* XfrmOutNoStates */ LINUX_MIB_XFRMOUTSTATEPROTOERROR, /* XfrmOutStateProtoError */ LINUX_MIB_XFRMOUTSTATEMODEERROR, /* XfrmOutStateModeError */ + LINUX_MIB_XFRMOUTSTATESEQERROR, /* XfrmOutStateSeqError */ LINUX_MIB_XFRMOUTSTATEEXPIRED, /* XfrmOutStateExpired */ LINUX_MIB_XFRMOUTPOLBLOCK, /* XfrmOutPolBlock */ LINUX_MIB_XFRMOUTPOLDEAD, /* XfrmOutPolDead */ diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index d32b67a4e0fd..4d6ebc633a94 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -159,12 +159,12 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) } if ((x->encap ? x->encap->encap_type : 0) != encap_type) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEINVALID); + XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH); goto drop_unlock; } if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSEQOUTOFWINDOW); + XFRM_INC_STATS(LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index f4a1047a5573..fc690368325f 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -64,6 +64,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq = ++x->replay.oseq; if (unlikely(x->replay.oseq == 0)) { + XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATESEQERROR); x->replay.oseq--; xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 31d035415ecd..2b0db13f0cda 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -22,7 +22,7 @@ static struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmInNoStates", LINUX_MIB_XFRMINNOSTATES), SNMP_MIB_ITEM("XfrmInStateProtoError", LINUX_MIB_XFRMINSTATEPROTOERROR), SNMP_MIB_ITEM("XfrmInStateModeError", LINUX_MIB_XFRMINSTATEMODEERROR), - SNMP_MIB_ITEM("XfrmInSeqOutOfWindow", LINUX_MIB_XFRMINSEQOUTOFWINDOW), + SNMP_MIB_ITEM("XfrmInStateSeqError", LINUX_MIB_XFRMINSTATESEQERROR), SNMP_MIB_ITEM("XfrmInStateExpired", LINUX_MIB_XFRMINSTATEEXPIRED), SNMP_MIB_ITEM("XfrmInStateMismatch", LINUX_MIB_XFRMINSTATEMISMATCH), SNMP_MIB_ITEM("XfrmInStateInvalid", LINUX_MIB_XFRMINSTATEINVALID), @@ -36,6 +36,7 @@ static struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmOutNoStates", LINUX_MIB_XFRMOUTNOSTATES), SNMP_MIB_ITEM("XfrmOutStateProtoError", LINUX_MIB_XFRMOUTSTATEPROTOERROR), SNMP_MIB_ITEM("XfrmOutStateModeError", LINUX_MIB_XFRMOUTSTATEMODEERROR), + SNMP_MIB_ITEM("XfrmOutStateSeqError", LINUX_MIB_XFRMOUTSTATESEQERROR), SNMP_MIB_ITEM("XfrmOutStateExpired", LINUX_MIB_XFRMOUTSTATEEXPIRED), SNMP_MIB_ITEM("XfrmOutPolBlock", LINUX_MIB_XFRMOUTPOLBLOCK), SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD), -- cgit v1.2.3 From 0027ba843450a5e28dd7fed580ad1e1546b7696b Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 31 Jan 2008 17:17:31 -0800 Subject: [IPV4]: Make struct ipv4_devconf static. struct ipv4_devconf can now become static. Signed-off-by: Adrian Bunk Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 2 -- net/ipv4/devinet.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 8d9eaaebded7..fc4e3db649e8 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -17,8 +17,6 @@ struct ipv4_devconf DECLARE_BITMAP(state, __NET_IPV4_CONF_MAX - 1); }; -extern struct ipv4_devconf ipv4_devconf; - struct in_device { struct net_device *dev; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 21f71bf912d5..5ab5acc17e44 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -64,7 +64,7 @@ #include #include -struct ipv4_devconf ipv4_devconf = { +static struct ipv4_devconf ipv4_devconf = { .data = { [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, -- cgit v1.2.3 From 5239008b0de2507a531440b8c3019fb9c116fb1a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jan 2008 18:36:18 -0800 Subject: [NET_SCHED]: Constify struct tcf_ext_map Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 6 +++--- net/sched/cls_api.c | 6 +++--- net/sched/cls_basic.c | 2 +- net/sched/cls_fw.c | 2 +- net/sched/cls_route.c | 2 +- net/sched/cls_tcindex.c | 2 +- net/sched/cls_u32.c | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 8716eb757d51..d349c66ef828 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -131,14 +131,14 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, extern int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, - struct tcf_ext_map *map); + const struct tcf_ext_map *map); extern void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts); extern void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, struct tcf_exts *src); extern int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts, - struct tcf_ext_map *map); + const struct tcf_ext_map *map); extern int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts, - struct tcf_ext_map *map); + const struct tcf_ext_map *map); /** * struct tcf_pkt_info - packet information diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3377ca0d0a0c..0fbedcabf111 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -482,7 +482,7 @@ EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, - struct tcf_ext_map *map) + const struct tcf_ext_map *map) { memset(exts, 0, sizeof(*exts)); @@ -535,7 +535,7 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, EXPORT_SYMBOL(tcf_exts_change); int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts, - struct tcf_ext_map *map) + const struct tcf_ext_map *map) { #ifdef CONFIG_NET_CLS_ACT if (map->action && exts->action) { @@ -571,7 +571,7 @@ EXPORT_SYMBOL(tcf_exts_dump); int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts, - struct tcf_ext_map *map) + const struct tcf_ext_map *map) { #ifdef CONFIG_NET_CLS_ACT if (exts->action) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index bfb4342ea88c..956915c217d6 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -35,7 +35,7 @@ struct basic_filter struct list_head link; }; -static struct tcf_ext_map basic_ext_map = { +static const struct tcf_ext_map basic_ext_map = { .action = TCA_BASIC_ACT, .police = TCA_BASIC_POLICE }; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 436a6e7c438e..b0f90e593af0 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -47,7 +47,7 @@ struct fw_filter struct tcf_exts exts; }; -static struct tcf_ext_map fw_ext_map = { +static const struct tcf_ext_map fw_ext_map = { .action = TCA_FW_ACT, .police = TCA_FW_POLICE }; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index f7e7d3955d28..784dcb870b98 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -62,7 +62,7 @@ struct route4_filter #define ROUTE4_FAILURE ((struct route4_filter*)(-1L)) -static struct tcf_ext_map route_ext_map = { +static const struct tcf_ext_map route_ext_map = { .police = TCA_ROUTE4_POLICE, .action = TCA_ROUTE4_ACT }; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index ee60b2d1705d..7a7bff5ded24 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -55,7 +55,7 @@ struct tcindex_data { int fall_through; /* 0: only classify if explicit match */ }; -static struct tcf_ext_map tcindex_ext_map = { +static const struct tcf_ext_map tcindex_ext_map = { .police = TCA_TCINDEX_POLICE, .action = TCA_TCINDEX_ACT }; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index e8a775689123..b18fa95ef248 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -82,7 +82,7 @@ struct tc_u_common u32 hgenerator; }; -static struct tcf_ext_map u32_ext_map = { +static const struct tcf_ext_map u32_ext_map = { .action = TCA_U32_ACT, .police = TCA_U32_POLICE }; -- cgit v1.2.3 From 94de78d19580143c407ff2492edf2410d0e7d48c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jan 2008 18:37:16 -0800 Subject: [NET_SCHED]: sch_sfq: make internal queues visible as classes Add support for dumping statistics and make internal queues visible as classes. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 5 +++++ net/sched/sch_sfq.c | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 32761352e858..dbb7ac37960d 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -150,6 +150,11 @@ struct tc_sfq_qopt unsigned flows; /* Maximal number of flows */ }; +struct tc_sfq_xstats +{ + __s32 allot; +}; + /* * NOTE: limit, divisor and flows are hardwired to code at the moment. * diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index d818d1985cca..a20e2ef7704b 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -566,15 +566,54 @@ static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl) return &q->filter_list; } +static int sfq_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + tcm->tcm_handle |= TC_H_MIN(cl); + return 0; +} + +static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct sfq_sched_data *q = qdisc_priv(sch); + sfq_index idx = q->ht[cl-1]; + struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen }; + struct tc_sfq_xstats xstats = { .allot = q->allot[idx] }; + + if (gnet_stats_copy_queue(d, &qs) < 0) + return -1; + return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); +} + static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) { - return; + struct sfq_sched_data *q = qdisc_priv(sch); + unsigned int i; + + if (arg->stop) + return; + + for (i = 0; i < SFQ_HASH_DIVISOR; i++) { + if (q->ht[i] == SFQ_DEPTH || + arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, i + 1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } } static const struct Qdisc_class_ops sfq_class_ops = { .get = sfq_get, .change = sfq_change_class, .tcf_chain = sfq_find_tcf, + .dump = sfq_dump_class, + .dump_stats = sfq_dump_class_stats, .walk = sfq_walk, }; -- cgit v1.2.3 From e5dfb815181fcb186d6080ac3a091eadff2d98fe Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jan 2008 18:37:42 -0800 Subject: [NET_SCHED]: Add flow classifier Add new "flow" classifier, which is meant to extend the SFQ hashing capabilities without hard-coding new hash functions and also allows deterministic mappings of keys to classes, replacing some out of tree iptables patches like IPCLASSIFY (maps IPs to classes), IPMARK (maps IPs to marks, with fw filters to classes), ... Some examples: - Classic SFQ hash: tc filter add ... flow hash \ keys src,dst,proto,proto-src,proto-dst divisor 1024 - Classic SFQ hash, but using information from conntrack to work properly in combination with NAT: tc filter add ... flow hash \ keys nfct-src,nfct-dst,proto,nfct-proto-src,nfct-proto-dst divisor 1024 - Map destination IPs of 192.168.0.0/24 to classids 1-257: tc filter add ... flow map \ key dst addend -192.168.0.0 divisor 256 - alternatively: tc filter add ... flow map \ key dst and 0xff - similar, but reverse ordered: tc filter add ... flow map \ key dst and 0xff xor 0xff Perturbation is currently not supported because we can't reliable kill the timer on destruction. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_cls.h | 50 ++++ net/sched/Kconfig | 11 + net/sched/Makefile | 1 + net/sched/cls_flow.c | 660 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 722 insertions(+) create mode 100644 net/sched/cls_flow.c (limited to 'include') diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 30b8571e6b34..1c1dba9ea5fb 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -328,6 +328,56 @@ enum #define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1) +/* Flow filter */ + +enum +{ + FLOW_KEY_SRC, + FLOW_KEY_DST, + FLOW_KEY_PROTO, + FLOW_KEY_PROTO_SRC, + FLOW_KEY_PROTO_DST, + FLOW_KEY_IIF, + FLOW_KEY_PRIORITY, + FLOW_KEY_MARK, + FLOW_KEY_NFCT, + FLOW_KEY_NFCT_SRC, + FLOW_KEY_NFCT_DST, + FLOW_KEY_NFCT_PROTO_SRC, + FLOW_KEY_NFCT_PROTO_DST, + FLOW_KEY_RTCLASSID, + FLOW_KEY_SKUID, + FLOW_KEY_SKGID, + __FLOW_KEY_MAX, +}; + +#define FLOW_KEY_MAX (__FLOW_KEY_MAX - 1) + +enum +{ + FLOW_MODE_MAP, + FLOW_MODE_HASH, +}; + +enum +{ + TCA_FLOW_UNSPEC, + TCA_FLOW_KEYS, + TCA_FLOW_MODE, + TCA_FLOW_BASECLASS, + TCA_FLOW_RSHIFT, + TCA_FLOW_ADDEND, + TCA_FLOW_MASK, + TCA_FLOW_XOR, + TCA_FLOW_DIVISOR, + TCA_FLOW_ACT, + TCA_FLOW_POLICE, + TCA_FLOW_EMATCHES, + __TCA_FLOW_MAX +}; + +#define TCA_FLOW_MAX (__TCA_FLOW_MAX - 1) + /* Basic filter */ enum diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 7d4085a4af66..82adfe6447d7 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -307,6 +307,17 @@ config NET_CLS_RSVP6 To compile this code as a module, choose M here: the module will be called cls_rsvp6. +config NET_CLS_FLOW + tristate "Flow classifier" + select NET_CLS + ---help--- + If you say Y here, you will be able to classify packets based on + a configurable combination of packet keys. This is mostly useful + in combination with SFQ. + + To compile this code as a module, choose M here: the + module will be called cls_flow. + config NET_EMATCH bool "Extended Matches" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index 81ecbe8e7dce..1d2b0f7df848 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o +obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o obj-$(CONFIG_NET_EMATCH) += ematch.o obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c new file mode 100644 index 000000000000..5a7f6a3060fc --- /dev/null +++ b/net/sched/cls_flow.c @@ -0,0 +1,660 @@ +/* + * net/sched/cls_flow.c Generic flow classifier + * + * Copyright (c) 2007, 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#include +#endif + +struct flow_head { + struct list_head filters; +}; + +struct flow_filter { + struct list_head list; + struct tcf_exts exts; + struct tcf_ematch_tree ematches; + u32 handle; + + u32 nkeys; + u32 keymask; + u32 mode; + u32 mask; + u32 xor; + u32 rshift; + u32 addend; + u32 divisor; + u32 baseclass; +}; + +static u32 flow_hashrnd __read_mostly; +static int flow_hashrnd_initted __read_mostly; + +static const struct tcf_ext_map flow_ext_map = { + .action = TCA_FLOW_ACT, + .police = TCA_FLOW_POLICE, +}; + +static inline u32 addr_fold(void *addr) +{ + unsigned long a = (unsigned long)addr; + + return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0); +} + +static u32 flow_get_src(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + return ntohl(ip_hdr(skb)->saddr); + case __constant_htons(ETH_P_IPV6): + return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]); + default: + return addr_fold(skb->sk); + } +} + +static u32 flow_get_dst(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + return ntohl(ip_hdr(skb)->daddr); + case __constant_htons(ETH_P_IPV6): + return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]); + default: + return addr_fold(skb->dst) ^ (__force u16)skb->protocol; + } +} + +static u32 flow_get_proto(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + return ip_hdr(skb)->protocol; + case __constant_htons(ETH_P_IPV6): + return ipv6_hdr(skb)->nexthdr; + default: + return 0; + } +} + +static int has_ports(u8 protocol) +{ + switch (protocol) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_SCTP: + case IPPROTO_DCCP: + case IPPROTO_ESP: + return 1; + default: + return 0; + } +} + +static u32 flow_get_proto_src(const struct sk_buff *skb) +{ + u32 res = 0; + + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): { + struct iphdr *iph = ip_hdr(skb); + + if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && + has_ports(iph->protocol)) + res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); + break; + } + case __constant_htons(ETH_P_IPV6): { + struct ipv6hdr *iph = ipv6_hdr(skb); + + if (has_ports(iph->nexthdr)) + res = ntohs(*(__be16 *)&iph[1]); + break; + } + default: + res = addr_fold(skb->sk); + } + + return res; +} + +static u32 flow_get_proto_dst(const struct sk_buff *skb) +{ + u32 res = 0; + + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): { + struct iphdr *iph = ip_hdr(skb); + + if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && + has_ports(iph->protocol)) + res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); + break; + } + case __constant_htons(ETH_P_IPV6): { + struct ipv6hdr *iph = ipv6_hdr(skb); + + if (has_ports(iph->nexthdr)) + res = ntohs(*(__be16 *)((void *)&iph[1] + 2)); + break; + } + default: + res = addr_fold(skb->dst) ^ (__force u16)skb->protocol; + } + + return res; +} + +static u32 flow_get_iif(const struct sk_buff *skb) +{ + return skb->iif; +} + +static u32 flow_get_priority(const struct sk_buff *skb) +{ + return skb->priority; +} + +static u32 flow_get_mark(const struct sk_buff *skb) +{ + return skb->mark; +} + +static u32 flow_get_nfct(const struct sk_buff *skb) +{ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + return addr_fold(skb->nfct); +#else + return 0; +#endif +} + +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#define CTTUPLE(skb, member) \ +({ \ + enum ip_conntrack_info ctinfo; \ + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \ + if (ct == NULL) \ + goto fallback; \ + ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member; \ +}) +#else +#define CTTUPLE(skb, member) \ +({ \ + goto fallback; \ + 0; \ +}) +#endif + +static u32 flow_get_nfct_src(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + return ntohl(CTTUPLE(skb, src.u3.ip)); + case __constant_htons(ETH_P_IPV6): + return ntohl(CTTUPLE(skb, src.u3.ip6[3])); + } +fallback: + return flow_get_src(skb); +} + +static u32 flow_get_nfct_dst(const struct sk_buff *skb) +{ + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + return ntohl(CTTUPLE(skb, dst.u3.ip)); + case __constant_htons(ETH_P_IPV6): + return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); + } +fallback: + return flow_get_dst(skb); +} + +static u32 flow_get_nfct_proto_src(const struct sk_buff *skb) +{ + return ntohs(CTTUPLE(skb, src.u.all)); +fallback: + return flow_get_proto_src(skb); +} + +static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb) +{ + return ntohs(CTTUPLE(skb, dst.u.all)); +fallback: + return flow_get_proto_dst(skb); +} + +static u32 flow_get_rtclassid(const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_CLS_ROUTE + if (skb->dst) + return skb->dst->tclassid; +#endif + return 0; +} + +static u32 flow_get_skuid(const struct sk_buff *skb) +{ + if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) + return skb->sk->sk_socket->file->f_uid; + return 0; +} + +static u32 flow_get_skgid(const struct sk_buff *skb) +{ + if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) + return skb->sk->sk_socket->file->f_gid; + return 0; +} + +static u32 flow_key_get(const struct sk_buff *skb, int key) +{ + switch (key) { + case FLOW_KEY_SRC: + return flow_get_src(skb); + case FLOW_KEY_DST: + return flow_get_dst(skb); + case FLOW_KEY_PROTO: + return flow_get_proto(skb); + case FLOW_KEY_PROTO_SRC: + return flow_get_proto_src(skb); + case FLOW_KEY_PROTO_DST: + return flow_get_proto_dst(skb); + case FLOW_KEY_IIF: + return flow_get_iif(skb); + case FLOW_KEY_PRIORITY: + return flow_get_priority(skb); + case FLOW_KEY_MARK: + return flow_get_mark(skb); + case FLOW_KEY_NFCT: + return flow_get_nfct(skb); + case FLOW_KEY_NFCT_SRC: + return flow_get_nfct_src(skb); + case FLOW_KEY_NFCT_DST: + return flow_get_nfct_dst(skb); + case FLOW_KEY_NFCT_PROTO_SRC: + return flow_get_nfct_proto_src(skb); + case FLOW_KEY_NFCT_PROTO_DST: + return flow_get_nfct_proto_dst(skb); + case FLOW_KEY_RTCLASSID: + return flow_get_rtclassid(skb); + case FLOW_KEY_SKUID: + return flow_get_skuid(skb); + case FLOW_KEY_SKGID: + return flow_get_skgid(skb); + default: + WARN_ON(1); + return 0; + } +} + +static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) +{ + struct flow_head *head = tp->root; + struct flow_filter *f; + u32 keymask; + u32 classid; + unsigned int n, key; + int r; + + list_for_each_entry(f, &head->filters, list) { + u32 keys[f->nkeys]; + + if (!tcf_em_tree_match(skb, &f->ematches, NULL)) + continue; + + keymask = f->keymask; + + for (n = 0; n < f->nkeys; n++) { + key = ffs(keymask) - 1; + keymask &= ~(1 << key); + keys[n] = flow_key_get(skb, key); + } + + if (f->mode == FLOW_MODE_HASH) + classid = jhash2(keys, f->nkeys, flow_hashrnd); + else { + classid = keys[0]; + classid = (classid & f->mask) ^ f->xor; + classid = (classid >> f->rshift) + f->addend; + } + + if (f->divisor) + classid %= f->divisor; + + res->class = 0; + res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid); + + r = tcf_exts_exec(skb, &f->exts, res); + if (r < 0) + continue; + return r; + } + return -1; +} + +static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { + [TCA_FLOW_KEYS] = { .type = NLA_U32 }, + [TCA_FLOW_MODE] = { .type = NLA_U32 }, + [TCA_FLOW_BASECLASS] = { .type = NLA_U32 }, + [TCA_FLOW_RSHIFT] = { .type = NLA_U32 }, + [TCA_FLOW_ADDEND] = { .type = NLA_U32 }, + [TCA_FLOW_MASK] = { .type = NLA_U32 }, + [TCA_FLOW_XOR] = { .type = NLA_U32 }, + [TCA_FLOW_DIVISOR] = { .type = NLA_U32 }, + [TCA_FLOW_ACT] = { .type = NLA_NESTED }, + [TCA_FLOW_POLICE] = { .type = NLA_NESTED }, + [TCA_FLOW_EMATCHES] = { .type = NLA_NESTED }, +}; + +static int flow_change(struct tcf_proto *tp, unsigned long base, + u32 handle, struct nlattr **tca, + unsigned long *arg) +{ + struct flow_head *head = tp->root; + struct flow_filter *f; + struct nlattr *opt = tca[TCA_OPTIONS]; + struct nlattr *tb[TCA_FLOW_MAX + 1]; + struct tcf_exts e; + struct tcf_ematch_tree t; + unsigned int nkeys = 0; + u32 baseclass = 0; + u32 keymask = 0; + u32 mode; + int err; + + if (opt == NULL) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy); + if (err < 0) + return err; + + if (tb[TCA_FLOW_BASECLASS]) { + baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]); + if (TC_H_MIN(baseclass) == 0) + return -EINVAL; + } + + if (tb[TCA_FLOW_KEYS]) { + keymask = nla_get_u32(tb[TCA_FLOW_KEYS]); + if (fls(keymask) - 1 > FLOW_KEY_MAX) + return -EOPNOTSUPP; + + nkeys = hweight32(keymask); + if (nkeys == 0) + return -EINVAL; + } + + err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map); + if (err < 0) + return err; + + err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t); + if (err < 0) + goto err1; + + f = (struct flow_filter *)*arg; + if (f != NULL) { + err = -EINVAL; + if (f->handle != handle && handle) + goto err2; + + mode = f->mode; + if (tb[TCA_FLOW_MODE]) + mode = nla_get_u32(tb[TCA_FLOW_MODE]); + if (mode != FLOW_MODE_HASH && nkeys > 1) + goto err2; + } else { + err = -EINVAL; + if (!handle) + goto err2; + if (!tb[TCA_FLOW_KEYS]) + goto err2; + + mode = FLOW_MODE_MAP; + if (tb[TCA_FLOW_MODE]) + mode = nla_get_u32(tb[TCA_FLOW_MODE]); + if (mode != FLOW_MODE_HASH && nkeys > 1) + goto err2; + + if (TC_H_MAJ(baseclass) == 0) + baseclass = TC_H_MAKE(tp->q->handle, baseclass); + if (TC_H_MIN(baseclass) == 0) + baseclass = TC_H_MAKE(baseclass, 1); + + err = -ENOBUFS; + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (f == NULL) + goto err2; + + f->handle = handle; + f->mask = ~0U; + } + + tcf_exts_change(tp, &f->exts, &e); + tcf_em_tree_change(tp, &f->ematches, &t); + + tcf_tree_lock(tp); + + if (tb[TCA_FLOW_KEYS]) { + f->keymask = keymask; + f->nkeys = nkeys; + } + + f->mode = mode; + + if (tb[TCA_FLOW_MASK]) + f->mask = nla_get_u32(tb[TCA_FLOW_MASK]); + if (tb[TCA_FLOW_XOR]) + f->xor = nla_get_u32(tb[TCA_FLOW_XOR]); + if (tb[TCA_FLOW_RSHIFT]) + f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]); + if (tb[TCA_FLOW_ADDEND]) + f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]); + + if (tb[TCA_FLOW_DIVISOR]) + f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]); + if (baseclass) + f->baseclass = baseclass; + + if (*arg == 0) + list_add_tail(&f->list, &head->filters); + + tcf_tree_unlock(tp); + + *arg = (unsigned long)f; + return 0; + +err2: + tcf_em_tree_destroy(tp, &t); +err1: + tcf_exts_destroy(tp, &e); + return err; +} + +static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f) +{ + tcf_exts_destroy(tp, &f->exts); + tcf_em_tree_destroy(tp, &f->ematches); + kfree(f); +} + +static int flow_delete(struct tcf_proto *tp, unsigned long arg) +{ + struct flow_filter *f = (struct flow_filter *)arg; + + tcf_tree_lock(tp); + list_del(&f->list); + tcf_tree_unlock(tp); + flow_destroy_filter(tp, f); + return 0; +} + +static int flow_init(struct tcf_proto *tp) +{ + struct flow_head *head; + + if (!flow_hashrnd_initted) { + get_random_bytes(&flow_hashrnd, 4); + flow_hashrnd_initted = 1; + } + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + INIT_LIST_HEAD(&head->filters); + tp->root = head; + return 0; +} + +static void flow_destroy(struct tcf_proto *tp) +{ + struct flow_head *head = tp->root; + struct flow_filter *f, *next; + + list_for_each_entry_safe(f, next, &head->filters, list) { + list_del(&f->list); + flow_destroy_filter(tp, f); + } + kfree(head); +} + +static unsigned long flow_get(struct tcf_proto *tp, u32 handle) +{ + struct flow_head *head = tp->root; + struct flow_filter *f; + + list_for_each_entry(f, &head->filters, list) + if (f->handle == handle) + return (unsigned long)f; + return 0; +} + +static void flow_put(struct tcf_proto *tp, unsigned long f) +{ + return; +} + +static int flow_dump(struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct flow_filter *f = (struct flow_filter *)fh; + struct nlattr *nest; + + if (f == NULL) + return skb->len; + + t->tcm_handle = f->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + + NLA_PUT_U32(skb, TCA_FLOW_KEYS, f->keymask); + NLA_PUT_U32(skb, TCA_FLOW_MODE, f->mode); + + if (f->mask != ~0 || f->xor != 0) { + NLA_PUT_U32(skb, TCA_FLOW_MASK, f->mask); + NLA_PUT_U32(skb, TCA_FLOW_XOR, f->xor); + } + if (f->rshift) + NLA_PUT_U32(skb, TCA_FLOW_RSHIFT, f->rshift); + if (f->addend) + NLA_PUT_U32(skb, TCA_FLOW_ADDEND, f->addend); + + if (f->divisor) + NLA_PUT_U32(skb, TCA_FLOW_DIVISOR, f->divisor); + if (f->baseclass) + NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass); + + if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0) + goto nla_put_failure; + + if (f->ematches.hdr.nmatches && + tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest); + + if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0) + goto nla_put_failure; + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, nest); + return -1; +} + +static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct flow_head *head = tp->root; + struct flow_filter *f; + + list_for_each_entry(f, &head->filters, list) { + if (arg->count < arg->skip) + goto skip; + if (arg->fn(tp, (unsigned long)f, arg) < 0) { + arg->stop = 1; + break; + } +skip: + arg->count++; + } +} + +static struct tcf_proto_ops cls_flow_ops __read_mostly = { + .kind = "flow", + .classify = flow_classify, + .init = flow_init, + .destroy = flow_destroy, + .change = flow_change, + .delete = flow_delete, + .get = flow_get, + .put = flow_put, + .dump = flow_dump, + .walk = flow_walk, + .owner = THIS_MODULE, +}; + +static int __init cls_flow_init(void) +{ + return register_tcf_proto_ops(&cls_flow_ops); +} + +static void __exit cls_flow_exit(void) +{ + unregister_tcf_proto_ops(&cls_flow_ops); +} + +module_init(cls_flow_init); +module_exit(cls_flow_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("TC flow classifier"); -- cgit v1.2.3 From 85326fa54b5516d8859617cc5fdfce8ae19c1480 Mon Sep 17 00:00:00 2001 From: Denis V. Lunev Date: Thu, 31 Jan 2008 18:48:47 -0800 Subject: [IPV4]: fib_sync_down rework. fib_sync_down can be called with an address and with a device. In reality it is called either with address OR with a device. The codepath inside is completely different, so lets separate it into two calls for these two cases. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ip_fib.h | 3 +- net/ipv4/fib_frontend.c | 4 +- net/ipv4/fib_semantics.c | 104 ++++++++++++++++++++++++----------------------- 3 files changed, 57 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9daa60b544ba..1b2f008db978 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -218,7 +218,8 @@ extern void fib_select_default(struct net *net, const struct flowi *flp, /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); -extern int fib_sync_down(__be32 local, struct net_device *dev, int force); +extern int fib_sync_down_dev(struct net_device *dev, int force); +extern int fib_sync_down_addr(__be32 local); extern int fib_sync_up(struct net_device *dev); extern __be32 __fib_res_prefsrc(struct fib_result *res); extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d0507f4f848a..d69ffa2a2e98 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) First of all, we scan fib_info list searching for stray nexthop entries, then ignite fib_flush. */ - if (fib_sync_down(ifa->ifa_local, NULL, 0)) + if (fib_sync_down_addr(ifa->ifa_local)) fib_flush(dev->nd_net); } } @@ -898,7 +898,7 @@ static void nl_fib_lookup_exit(struct net *net) static void fib_disable_ip(struct net_device *dev, int force) { - if (fib_sync_down(0, dev, force)) + if (fib_sync_down_dev(dev, force)) fib_flush(dev->nd_net); rt_cache_flush(0); arp_ifdown(dev); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c7912866d987..5beff2e02751 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1031,70 +1031,72 @@ nla_put_failure: referring to it. - device went down -> we must shutdown all nexthops going via it. */ - -int fib_sync_down(__be32 local, struct net_device *dev, int force) +int fib_sync_down_addr(__be32 local) { int ret = 0; - int scope = RT_SCOPE_NOWHERE; - - if (force) - scope = -1; + unsigned int hash = fib_laddr_hashfn(local); + struct hlist_head *head = &fib_info_laddrhash[hash]; + struct hlist_node *node; + struct fib_info *fi; - if (local && fib_info_laddrhash) { - unsigned int hash = fib_laddr_hashfn(local); - struct hlist_head *head = &fib_info_laddrhash[hash]; - struct hlist_node *node; - struct fib_info *fi; + if (fib_info_laddrhash == NULL || local == 0) + return 0; - hlist_for_each_entry(fi, node, head, fib_lhash) { - if (fi->fib_prefsrc == local) { - fi->fib_flags |= RTNH_F_DEAD; - ret++; - } + hlist_for_each_entry(fi, node, head, fib_lhash) { + if (fi->fib_prefsrc == local) { + fi->fib_flags |= RTNH_F_DEAD; + ret++; } } + return ret; +} - if (dev) { - struct fib_info *prev_fi = NULL; - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; - struct hlist_node *node; - struct fib_nh *nh; +int fib_sync_down_dev(struct net_device *dev, int force) +{ + int ret = 0; + int scope = RT_SCOPE_NOWHERE; + struct fib_info *prev_fi = NULL; + unsigned int hash = fib_devindex_hashfn(dev->ifindex); + struct hlist_head *head = &fib_info_devhash[hash]; + struct hlist_node *node; + struct fib_nh *nh; - hlist_for_each_entry(nh, node, head, nh_hash) { - struct fib_info *fi = nh->nh_parent; - int dead; + if (force) + scope = -1; - BUG_ON(!fi->fib_nhs); - if (nh->nh_dev != dev || fi == prev_fi) - continue; - prev_fi = fi; - dead = 0; - change_nexthops(fi) { - if (nh->nh_flags&RTNH_F_DEAD) - dead++; - else if (nh->nh_dev == dev && - nh->nh_scope != scope) { - nh->nh_flags |= RTNH_F_DEAD; + hlist_for_each_entry(nh, node, head, nh_hash) { + struct fib_info *fi = nh->nh_parent; + int dead; + + BUG_ON(!fi->fib_nhs); + if (nh->nh_dev != dev || fi == prev_fi) + continue; + prev_fi = fi; + dead = 0; + change_nexthops(fi) { + if (nh->nh_flags&RTNH_F_DEAD) + dead++; + else if (nh->nh_dev == dev && + nh->nh_scope != scope) { + nh->nh_flags |= RTNH_F_DEAD; #ifdef CONFIG_IP_ROUTE_MULTIPATH - spin_lock_bh(&fib_multipath_lock); - fi->fib_power -= nh->nh_power; - nh->nh_power = 0; - spin_unlock_bh(&fib_multipath_lock); + spin_lock_bh(&fib_multipath_lock); + fi->fib_power -= nh->nh_power; + nh->nh_power = 0; + spin_unlock_bh(&fib_multipath_lock); #endif - dead++; - } + dead++; + } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (force > 1 && nh->nh_dev == dev) { - dead = fi->fib_nhs; - break; - } -#endif - } endfor_nexthops(fi) - if (dead == fi->fib_nhs) { - fi->fib_flags |= RTNH_F_DEAD; - ret++; + if (force > 1 && nh->nh_dev == dev) { + dead = fi->fib_nhs; + break; } +#endif + } endfor_nexthops(fi) + if (dead == fi->fib_nhs) { + fi->fib_flags |= RTNH_F_DEAD; + ret++; } } -- cgit v1.2.3 From 7462bd744e8882f9ebb9220d46fd4fec8b35b082 Mon Sep 17 00:00:00 2001 From: Denis V. Lunev Date: Thu, 31 Jan 2008 18:49:32 -0800 Subject: [NETNS]: Add a namespace mark to fib_info. This is required to make fib_info lookups namespace aware. In the other case initial namespace devices are marked as dead in the local routing table during other namespace stop. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_semantics.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 1b2f008db978..cb0df37950b7 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -69,6 +69,7 @@ struct fib_nh { struct fib_info { struct hlist_node fib_hash; struct hlist_node fib_lhash; + struct net *fib_net; int fib_treeref; atomic_t fib_clntref; int fib_dead; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 5beff2e02751..97cc49412684 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -687,6 +687,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) struct fib_info *fi = NULL; struct fib_info *ofi; int nhs = 1; + struct net *net = cfg->fc_nlinfo.nl_net; /* Fast check to catch the most weird cases */ if (fib_props[cfg->fc_type].scope > cfg->fc_scope) @@ -727,6 +728,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; fib_info_cnt++; + fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_flags = cfg->fc_flags; fi->fib_priority = cfg->fc_priority; @@ -798,8 +800,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (nhs != 1 || nh->nh_gw) goto err_inval; nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(cfg->fc_nlinfo.nl_net, - fi->fib_nh->nh_oif); + nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); err = -ENODEV; if (nh->nh_dev == NULL) goto failure; @@ -813,8 +814,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (fi->fib_prefsrc) { if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || fi->fib_prefsrc != cfg->fc_dst) - if (inet_addr_type(cfg->fc_nlinfo.nl_net, - fi->fib_prefsrc) != RTN_LOCAL) + if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } -- cgit v1.2.3 From 4814bdbd590e835ecec2d5e505165ec1c19796b2 Mon Sep 17 00:00:00 2001 From: Denis V. Lunev Date: Thu, 31 Jan 2008 18:50:07 -0800 Subject: [NETNS]: Lookup in FIB semantic hashes taking into account the namespace. The namespace is not available in the fib_sync_down_addr, add it as a parameter. Looking up a device by the pointer to it is OK. Looking up using a result from fib_trie/fib_hash table lookup is also safe. No need to fix that at all. So, just fix lookup by address and insertion to the hash table path. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_semantics.c | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index cb0df37950b7..90d1175f63de 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -220,7 +220,7 @@ extern void fib_select_default(struct net *net, const struct flowi *flp, /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); extern int fib_sync_down_dev(struct net_device *dev, int force); -extern int fib_sync_down_addr(__be32 local); +extern int fib_sync_down_addr(struct net *net, __be32 local); extern int fib_sync_up(struct net_device *dev); extern __be32 __fib_res_prefsrc(struct fib_result *res); extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d69ffa2a2e98..86ff2711fc95 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -808,7 +808,7 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) First of all, we scan fib_info list searching for stray nexthop entries, then ignite fib_flush. */ - if (fib_sync_down_addr(ifa->ifa_local)) + if (fib_sync_down_addr(dev->nd_net, ifa->ifa_local)) fib_flush(dev->nd_net); } } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 97cc49412684..a13c84763d4c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -229,6 +229,8 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) head = &fib_info_hash[hash]; hlist_for_each_entry(fi, node, head, fib_hash) { + if (fi->fib_net != nfi->fib_net) + continue; if (fi->fib_nhs != nfi->fib_nhs) continue; if (nfi->fib_protocol == fi->fib_protocol && @@ -1031,7 +1033,7 @@ nla_put_failure: referring to it. - device went down -> we must shutdown all nexthops going via it. */ -int fib_sync_down_addr(__be32 local) +int fib_sync_down_addr(struct net *net, __be32 local) { int ret = 0; unsigned int hash = fib_laddr_hashfn(local); @@ -1043,6 +1045,8 @@ int fib_sync_down_addr(__be32 local) return 0; hlist_for_each_entry(fi, node, head, fib_lhash) { + if (fi->fib_net != net) + continue; if (fi->fib_prefsrc == local) { fi->fib_flags |= RTNH_F_DEAD; ret++; -- cgit v1.2.3