From 66df0fdb5981052f3ad97c9879eda93712bdefc2 Mon Sep 17 00:00:00 2001 From: Haiyue Wang Date: Sun, 3 Apr 2022 19:53:26 +0800 Subject: bpf: Correct the comment for BTF kind bitfield The commit 8fd886911a6a ("bpf: Add BTF_KIND_FLOAT to uapi") has extended the BTF kind bitfield from 4 to 5 bits, correct the comment. Signed-off-by: Haiyue Wang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220403115327.205964-1-haiyue.wang@intel.com --- include/uapi/linux/btf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index b0d8fea1951d..a9162a6c0284 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -33,8 +33,8 @@ struct btf_type { /* "info" bits arrangement * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused - * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-30: unused + * bits 24-28: kind (e.g. int, ptr, array...etc) + * bits 29-30: unused * bit 31: kind_flag, currently used by * struct, union and fwd */ -- cgit v1.2.3 From 1ee375d77bb944321c969b456aa73994566cecf6 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 4 Apr 2022 10:54:47 -0700 Subject: net, uapi: remove inclusion of arpa/inet.h In include/uapi/linux/tipc_config.h, there's a comment that it includes arpa/inet.h for ntohs; but ntohs is not defined in any UAPI header. For now, reuse the definitions from include/linux/byteorder/generic.h, since the various conversion functions do exist in UAPI headers: include/uapi/linux/byteorder/big_endian.h include/uapi/linux/byteorder/little_endian.h We would like to get to the point where we can build UAPI header tests with -nostdinc, meaning that kernel UAPI headers should not have a circular dependency on libc headers. Link: https://android-review.googlesource.com/c/platform/bionic/+/2048127 Suggested-by: Jakub Kicinski Signed-off-by: Nick Desaulniers Signed-off-by: David S. Miller --- include/uapi/linux/tipc_config.h | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h index 4dfc05651c98..c00adf2fe868 100644 --- a/include/uapi/linux/tipc_config.h +++ b/include/uapi/linux/tipc_config.h @@ -43,10 +43,6 @@ #include #include -#ifndef __KERNEL__ -#include /* for ntohs etc. */ -#endif - /* * Configuration * @@ -269,33 +265,33 @@ static inline int TLV_OK(const void *tlv, __u16 space) */ return (space >= TLV_SPACE(0)) && - (ntohs(((struct tlv_desc *)tlv)->tlv_len) <= space); + (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_len) <= space); } static inline int TLV_CHECK(const void *tlv, __u16 space, __u16 exp_type) { return TLV_OK(tlv, space) && - (ntohs(((struct tlv_desc *)tlv)->tlv_type) == exp_type); + (__be16_to_cpu(((struct tlv_desc *)tlv)->tlv_type) == exp_type); } static inline int TLV_GET_LEN(struct tlv_desc *tlv) { - return ntohs(tlv->tlv_len); + return __be16_to_cpu(tlv->tlv_len); } static inline void TLV_SET_LEN(struct tlv_desc *tlv, __u16 len) { - tlv->tlv_len = htons(len); + tlv->tlv_len = __cpu_to_be16(len); } static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv, __u16 type) { - return (ntohs(tlv->tlv_type) == type); + return (__be16_to_cpu(tlv->tlv_type) == type); } static inline void TLV_SET_TYPE(struct tlv_desc *tlv, __u16 type) { - tlv->tlv_type = htons(type); + tlv->tlv_type = __cpu_to_be16(type); } static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len) @@ -305,8 +301,8 @@ static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len) tlv_len = TLV_LENGTH(len); tlv_ptr = (struct tlv_desc *)tlv; - tlv_ptr->tlv_type = htons(type); - tlv_ptr->tlv_len = htons(tlv_len); + tlv_ptr->tlv_type = __cpu_to_be16(type); + tlv_ptr->tlv_len = __cpu_to_be16(tlv_len); if (len && data) { memcpy(TLV_DATA(tlv_ptr), data, len); memset((char *)TLV_DATA(tlv_ptr) + len, 0, TLV_SPACE(len) - tlv_len); @@ -348,7 +344,7 @@ static inline void *TLV_LIST_DATA(struct tlv_list_desc *list) static inline void TLV_LIST_STEP(struct tlv_list_desc *list) { - __u16 tlv_space = TLV_ALIGN(ntohs(list->tlv_ptr->tlv_len)); + __u16 tlv_space = TLV_ALIGN(__be16_to_cpu(list->tlv_ptr->tlv_len)); list->tlv_ptr = (struct tlv_desc *)((char *)list->tlv_ptr + tlv_space); list->tlv_space -= tlv_space; @@ -404,9 +400,9 @@ static inline int TCM_SET(void *msg, __u16 cmd, __u16 flags, msg_len = TCM_LENGTH(data_len); tcm_hdr = (struct tipc_cfg_msg_hdr *)msg; - tcm_hdr->tcm_len = htonl(msg_len); - tcm_hdr->tcm_type = htons(cmd); - tcm_hdr->tcm_flags = htons(flags); + tcm_hdr->tcm_len = __cpu_to_be32(msg_len); + tcm_hdr->tcm_type = __cpu_to_be16(cmd); + tcm_hdr->tcm_flags = __cpu_to_be16(flags); if (data_len && data) { memcpy(TCM_DATA(msg), data, data_len); memset((char *)TCM_DATA(msg) + data_len, 0, TCM_SPACE(data_len) - msg_len); -- cgit v1.2.3 From 794c24e9921f32ded4422833a990ccf11dc3c00e Mon Sep 17 00:00:00 2001 From: Jeffrey Ji Date: Wed, 6 Apr 2022 17:26:00 +0000 Subject: net-core: rx_otherhost_dropped to core_stats Increment rx_otherhost_dropped counter when packet dropped due to mismatched dest MAC addr. An example when this drop can occur is when manually crafting raw packets that will be consumed by a user space application via a tap device. For testing purposes local traffic was generated using trafgen for the client and netcat to start a server Tested: Created 2 netns, sent 1 packet using trafgen from 1 to the other with "{eth(daddr=$INCORRECT_MAC...}", verified that iproute2 showed the counter was incremented. (Also had to modify iproute2 to show the stat, additional patch for that coming next.) Signed-off-by: Jeffrey Ji Reviewed-by: Brian Vazquez Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20220406172600.1141083-1-jeffreyjilinux@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 ++ include/uapi/linux/if_link.h | 5 +++++ net/core/dev.c | 1 + net/ipv4/ip_input.c | 1 + net/ipv6/ip6_input.c | 1 + 5 files changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7e7b2a72e473..28ea4f8269d4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -203,6 +203,7 @@ struct net_device_core_stats { local_t rx_dropped; local_t tx_dropped; local_t rx_nohandler; + local_t rx_otherhost_dropped; } __aligned(4 * sizeof(local_t)); #include @@ -3837,6 +3838,7 @@ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) DEV_CORE_STATS_INC(rx_nohandler) +DEV_CORE_STATS_INC(rx_otherhost_dropped) static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cc284c048e69..d1e600816b82 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -211,6 +211,9 @@ struct rtnl_link_stats { * @rx_nohandler: Number of packets received on the interface * but dropped by the networking stack because the device is * not designated to receive packets (e.g. backup link in a bond). + * + * @rx_otherhost_dropped: Number of packets dropped due to mismatch + * in destination MAC address. */ struct rtnl_link_stats64 { __u64 rx_packets; @@ -243,6 +246,8 @@ struct rtnl_link_stats64 { __u64 rx_compressed; __u64 tx_compressed; __u64 rx_nohandler; + + __u64 rx_otherhost_dropped; }; /* Subset of link stats useful for in-HW collection. Meaning of the fields is as diff --git a/net/core/dev.c b/net/core/dev.c index f00d29856b43..e027410e861b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10358,6 +10358,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, storage->rx_dropped += local_read(&core_stats->rx_dropped); storage->tx_dropped += local_read(&core_stats->tx_dropped); storage->rx_nohandler += local_read(&core_stats->rx_nohandler); + storage->rx_otherhost_dropped += local_read(&core_stats->rx_otherhost_dropped); } } return storage; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 95f7bb052784..b1165f717cd1 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -451,6 +451,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) * that it receives, do not try to analyse it. */ if (skb->pkt_type == PACKET_OTHERHOST) { + dev_core_stats_rx_otherhost_dropped_inc(skb->dev); drop_reason = SKB_DROP_REASON_OTHERHOST; goto drop; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5b5ea35635f9..b4880c7c84eb 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -150,6 +150,7 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, struct inet6_dev *idev; if (skb->pkt_type == PACKET_OTHERHOST) { + dev_core_stats_rx_otherhost_dropped_inc(skb->dev); kfree_skb(skb); return NULL; } -- cgit v1.2.3 From 545528d788556c724eeb5400757f828ef27782a8 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 13 Apr 2022 13:51:54 +0300 Subject: net: netlink: add NLM_F_BULK delete request modifier Add a new delete request modifier called NLM_F_BULK which, when supported, would cause the request to delete multiple objects. The flag is a convenient way to signal that a multiple delete operation is requested which can be gradually added to different delete requests. In order to make sure older kernels will error out if the operation is not supported instead of doing something unintended we have to break a required condition when implementing support for this flag, f.e. for neighbors we will omit the mandatory mac address attribute. Initially it will be used to add flush with filtering support for bridge fdbs, but it also opens the door to add similar support to others. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index 4c0cde075c27..855dffb4c1c3 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -72,6 +72,7 @@ struct nlmsghdr { /* Modifiers to DELETE request */ #define NLM_F_NONREC 0x100 /* Do not delete recursively */ +#define NLM_F_BULK 0x200 /* Delete multiple objects */ /* Flags for ACK message */ #define NLM_F_CAPPED 0x100 /* request was capped */ -- cgit v1.2.3 From ea2c0f9e3fc2f94f090d693b7235c02af1289629 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 13 Apr 2022 13:52:00 +0300 Subject: net: rtnetlink: add ndm flags and state mask attributes Add ndm flags/state masks which will be used for bulk delete filtering. All of these are used by the bridge and vxlan drivers. Also minimal attr policy validation is added, it is up to ndo_fdb_del_bulk implementers to further validate them. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/neighbour.h | 2 ++ net/core/rtnetlink.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index db05fb55055e..39c565e460c7 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -32,6 +32,8 @@ enum { NDA_NH_ID, NDA_FDB_EXT_ATTRS, NDA_FLAGS_EXT, + NDA_NDM_STATE_MASK, + NDA_NDM_FLAGS_MASK, __NDA_MAX }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 520d50fcaaea..ab7fb9a16da9 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4172,6 +4172,8 @@ EXPORT_SYMBOL(ndo_dflt_fdb_del); static const struct nla_policy fdb_del_bulk_policy[NDA_MAX + 1] = { [NDA_VLAN] = { .type = NLA_U16 }, [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), + [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, + [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, }; static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, -- cgit v1.2.3 From 4dc84c06a343fcb95fd5a0acb537aefa4ebdd1b0 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Tue, 12 Apr 2022 10:01:19 +0800 Subject: net: ethtool: extend ringparam set/get APIs for tx_push Currently tx push is a standard driver feature which controls use of a fast path descriptor push. So this patch extends the ringparam APIs and data structures to support set/get tx push by ethtool -G/g. Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski --- Documentation/networking/ethtool-netlink.rst | 8 ++++++++ include/linux/ethtool.h | 4 ++++ include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/netlink.h | 2 +- net/ethtool/rings.c | 18 ++++++++++++++++-- 5 files changed, 30 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 24d9be69065d..dbca3e9ec782 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -862,6 +862,7 @@ Kernel response contents: ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` u8 TCP header / data split ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE + ``ETHTOOL_A_RINGS_TX_PUSH`` u8 flag of TX Push mode ==================================== ====== =========================== ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with @@ -871,6 +872,12 @@ separate buffers. The device configuration must make it possible to receive full memory pages of data, for example because MTU is high enough or through HW-GRO. +``ETHTOOL_A_RINGS_TX_PUSH`` flag is used to enable descriptor fast +path to send packets. In ordinary path, driver fills descriptors in DRAM and +notifies NIC hardware. In fast path, driver pushes descriptors to the device +through MMIO writes, thus reducing the latency. However, enabling this feature +may increase the CPU cost. Drivers may enforce additional per-packet +eligibility checks (e.g. on packet size). RINGS_SET ========= @@ -887,6 +894,7 @@ Request contents: ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE + ``ETHTOOL_A_RINGS_TX_PUSH`` u8 flag of TX Push mode ==================================== ====== =========================== Kernel checks that requested ring sizes do not exceed limits reported by diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 4af58459a1e7..99dc7bfbcd3c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -71,11 +71,13 @@ enum { * struct kernel_ethtool_ringparam - RX/TX ring configuration * @rx_buf_len: Current length of buffers on the rx ring. * @tcp_data_split: Scatter packet headers and data to separate buffers + * @tx_push: The flag of tx push mode * @cqe_size: Size of TX/RX completion queue event */ struct kernel_ethtool_ringparam { u32 rx_buf_len; u8 tcp_data_split; + u8 tx_push; u32 cqe_size; }; @@ -83,10 +85,12 @@ struct kernel_ethtool_ringparam { * enum ethtool_supported_ring_param - indicator caps for setting ring params * @ETHTOOL_RING_USE_RX_BUF_LEN: capture for setting rx_buf_len * @ETHTOOL_RING_USE_CQE_SIZE: capture for setting cqe_size + * @ETHTOOL_RING_USE_TX_PUSH: capture for setting tx_push */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), ETHTOOL_RING_USE_CQE_SIZE = BIT(1), + ETHTOOL_RING_USE_TX_PUSH = BIT(2), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 979850221b8d..d2fb4f7be61b 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -338,6 +338,7 @@ enum { ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ + ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 29d01662a48b..7919ddb2371c 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -363,7 +363,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1]; extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1]; extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1]; -extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_CQE_SIZE + 1]; +extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH + 1]; extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index 9f33c9689b56..9ed60c507d97 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -55,7 +55,8 @@ static int rings_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u32)) + /* _RINGS_TX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */ nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */ - nla_total_size(sizeof(u32)); /* _RINGS_CQE_SIZE */ + nla_total_size(sizeof(u32) + /* _RINGS_CQE_SIZE */ + nla_total_size(sizeof(u8))); /* _RINGS_TX_PUSH */ } static int rings_fill_reply(struct sk_buff *skb, @@ -94,7 +95,8 @@ static int rings_fill_reply(struct sk_buff *skb, (nla_put_u8(skb, ETHTOOL_A_RINGS_TCP_DATA_SPLIT, kr->tcp_data_split))) || (kr->cqe_size && - (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size)))) + (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))) || + nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push)) return -EMSGSIZE; return 0; @@ -123,6 +125,7 @@ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1), [ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1), + [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), }; int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) @@ -149,6 +152,15 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) if (!ops->get_ringparam || !ops->set_ringparam) goto out_dev; + if (tb[ETHTOOL_A_RINGS_TX_PUSH] && + !(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH)) { + ret = -EOPNOTSUPP; + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_TX_PUSH], + "setting tx push not supported"); + goto out_dev; + } + rtnl_lock(); ret = ethnl_ops_begin(dev); if (ret < 0) @@ -165,6 +177,8 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod); ethnl_update_u32(&kernel_ringparam.cqe_size, tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod); + ethnl_update_u8(&kernel_ringparam.tx_push, + tb[ETHTOOL_A_RINGS_TX_PUSH], &mod); ret = 0; if (!mod) goto out_ops; -- cgit v1.2.3 From f9a2fb73318eb4dbf8cd84866b8b0dd012d8b116 Mon Sep 17 00:00:00 2001 From: Arun Ajith S Date: Fri, 15 Apr 2022 08:34:02 +0000 Subject: net/ipv6: Introduce accept_unsolicited_na knob to implement router-side changes for RFC9131 Add a new neighbour cache entry in STALE state for routers on receiving an unsolicited (gratuitous) neighbour advertisement with target link-layer-address option specified. This is similar to the arp_accept configuration for IPv4. A new sysctl endpoint is created to turn on this behaviour: /proc/sys/net/ipv6/conf/interface/accept_unsolicited_na. Signed-off-by: Arun Ajith S Reviewed-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 27 +++ include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 10 + net/ipv6/ndisc.c | 20 +- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/ndisc_unsolicited_na_test.sh | 255 +++++++++++++++++++++ 7 files changed, 314 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/ndisc_unsolicited_na_test.sh (limited to 'include/uapi') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index b0024aa7b051..433f2e4a5fed 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2467,6 +2467,33 @@ drop_unsolicited_na - BOOLEAN By default this is turned off. +accept_unsolicited_na - BOOLEAN + Add a new neighbour cache entry in STALE state for routers on receiving an + unsolicited neighbour advertisement with target link-layer address option + specified. This is as per router-side behavior documented in RFC9131. + This has lower precedence than drop_unsolicited_na. + + ==== ====== ====== ============================================== + drop accept fwding behaviour + ---- ------ ------ ---------------------------------------------- + 1 X X Drop NA packet and don't pass up the stack + 0 0 X Pass NA packet up the stack, don't update NC + 0 1 0 Pass NA packet up the stack, don't update NC + 0 1 1 Pass NA packet up the stack, and add a STALE + NC entry + ==== ====== ====== ============================================== + + This will optimize the return path for the initial off-link communication + that is initiated by a directly connected host, by ensuring that + the first-hop router which turns on this setting doesn't have to + buffer the initial return packets to do neighbour-solicitation. + The prerequisite is that the host is configured to send + unsolicited neighbour advertisements on interface bringup. + This setting should be used in conjunction with the ndisc_notify setting + on the host to satisfy this prerequisite. + + By default this is turned off. + enhanced_dad - BOOLEAN Include a nonce option in the IPv6 neighbor solicitation messages used for duplicate address detection per RFC7527. A received DAD NS will only signal diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 16870f86c74d..918bfea4ef5f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -61,6 +61,7 @@ struct ipv6_devconf { __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; __s32 drop_unsolicited_na; + __s32 accept_unsolicited_na; struct ipv6_stable_secret { bool initialized; struct in6_addr secret; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index d4178dace0bf..549ddeaf788b 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -194,6 +194,7 @@ enum { DEVCONF_IOAM6_ID, DEVCONF_IOAM6_ID_WIDE, DEVCONF_NDISC_EVICT_NOCARRIER, + DEVCONF_ACCEPT_UNSOLICITED_NA, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1afc4c024981..6473dc84b71d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5587,6 +5587,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier; + array[DEVCONF_ACCEPT_UNSOLICITED_NA] = cnf->accept_unsolicited_na; } static inline size_t inet6_ifla6_size(void) @@ -7037,6 +7038,15 @@ static const struct ctl_table addrconf_sysctl[] = { .extra1 = (void *)SYSCTL_ZERO, .extra2 = (void *)SYSCTL_ONE, }, + { + .procname = "accept_unsolicited_na", + .data = &ipv6_devconf.accept_unsolicited_na, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)SYSCTL_ONE, + }, { /* sentinel */ } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index fcb288b0ae13..254addad0dd3 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -979,6 +979,7 @@ static void ndisc_recv_na(struct sk_buff *skb) struct inet6_dev *idev = __in6_dev_get(dev); struct inet6_ifaddr *ifp; struct neighbour *neigh; + bool create_neigh; if (skb->len < sizeof(struct nd_msg)) { ND_PRINTK(2, warn, "NA: packet too short\n"); @@ -999,6 +1000,7 @@ static void ndisc_recv_na(struct sk_buff *skb) /* For some 802.11 wireless deployments (and possibly other networks), * there will be a NA proxy and unsolicitd packets are attacks * and thus should not be accepted. + * drop_unsolicited_na takes precedence over accept_unsolicited_na */ if (!msg->icmph.icmp6_solicited && idev && idev->cnf.drop_unsolicited_na) @@ -1039,7 +1041,23 @@ static void ndisc_recv_na(struct sk_buff *skb) in6_ifa_put(ifp); return; } - neigh = neigh_lookup(&nd_tbl, &msg->target, dev); + /* RFC 9131 updates original Neighbour Discovery RFC 4861. + * An unsolicited NA can now create a neighbour cache entry + * on routers if it has Target LL Address option. + * + * drop accept fwding behaviour + * ---- ------ ------ ---------------------------------------------- + * 1 X X Drop NA packet and don't pass up the stack + * 0 0 X Pass NA packet up the stack, don't update NC + * 0 1 0 Pass NA packet up the stack, don't update NC + * 0 1 1 Pass NA packet up the stack, and add a STALE + * NC entry + * Note that we don't do a (daddr == all-routers-mcast) check. + */ + create_neigh = !msg->icmph.icmp6_solicited && lladdr && + idev && idev->cnf.forwarding && + idev->cnf.accept_unsolicited_na; + neigh = __neigh_lookup(&nd_tbl, &msg->target, dev, create_neigh); if (neigh) { u8 old_flags = neigh->flags; diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 3fe2515aa616..af7f6e6ff182 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -36,6 +36,7 @@ TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh +TEST_PROGS += ndisc_unsolicited_na_test.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh new file mode 100755 index 000000000000..f508657ee126 --- /dev/null +++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh @@ -0,0 +1,255 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for the accept_unsolicited_na feature to +# enable RFC9131 behaviour. The following is the test-matrix. +# drop accept fwding behaviour +# ---- ------ ------ ---------------------------------------------- +# 1 X X Drop NA packet and don't pass up the stack +# 0 0 X Pass NA packet up the stack, don't update NC +# 0 1 0 Pass NA packet up the stack, don't update NC +# 0 1 1 Pass NA packet up the stack, and add a STALE +# NC entry + +ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +PAUSE_ON_FAIL=no +PAUSE=no + +HOST_NS="ns-host" +ROUTER_NS="ns-router" + +HOST_INTF="veth-host" +ROUTER_INTF="veth-router" + +ROUTER_ADDR="2000:20::1" +HOST_ADDR="2000:20::2" +SUBNET_WIDTH=64 +ROUTER_ADDR_WITH_MASK="${ROUTER_ADDR}/${SUBNET_WIDTH}" +HOST_ADDR_WITH_MASK="${HOST_ADDR}/${SUBNET_WIDTH}" + +IP_HOST="ip -6 -netns ${HOST_NS}" +IP_HOST_EXEC="ip netns exec ${HOST_NS}" +IP_ROUTER="ip -6 -netns ${ROUTER_NS}" +IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}" + +tcpdump_stdout= +tcpdump_stderr= + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf " TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf " TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi +} + +setup() +{ + set -e + + local drop_unsolicited_na=$1 + local accept_unsolicited_na=$2 + local forwarding=$3 + + # Setup two namespaces and a veth tunnel across them. + # On end of the tunnel is a router and the other end is a host. + ip netns add ${HOST_NS} + ip netns add ${ROUTER_NS} + ${IP_ROUTER} link add ${ROUTER_INTF} type veth \ + peer name ${HOST_INTF} netns ${HOST_NS} + + # Enable IPv6 on both router and host, and configure static addresses. + # The router here is the DUT + # Setup router configuration as specified by the arguments. + # forwarding=0 case is to check that a non-router + # doesn't add neighbour entries. + ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF} + ${IP_ROUTER_EXEC} sysctl -qw \ + ${ROUTER_CONF}.forwarding=${forwarding} + ${IP_ROUTER_EXEC} sysctl -qw \ + ${ROUTER_CONF}.drop_unsolicited_na=${drop_unsolicited_na} + ${IP_ROUTER_EXEC} sysctl -qw \ + ${ROUTER_CONF}.accept_unsolicited_na=${accept_unsolicited_na} + ${IP_ROUTER_EXEC} sysctl -qw ${ROUTER_CONF}.disable_ipv6=0 + ${IP_ROUTER} addr add ${ROUTER_ADDR_WITH_MASK} dev ${ROUTER_INTF} + + # Turn on ndisc_notify on host interface so that + # the host sends unsolicited NAs. + HOST_CONF=net.ipv6.conf.${HOST_INTF} + ${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.ndisc_notify=1 + ${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.disable_ipv6=0 + ${IP_HOST} addr add ${HOST_ADDR_WITH_MASK} dev ${HOST_INTF} + + set +e +} + +start_tcpdump() { + set -e + tcpdump_stdout=`mktemp` + tcpdump_stderr=`mktemp` + ${IP_ROUTER_EXEC} timeout 15s \ + tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \ + "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR}" \ + > ${tcpdump_stdout} 2> /dev/null + set +e +} + +cleanup_tcpdump() +{ + set -e + [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout} + [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr} + tcpdump_stdout= + tcpdump_stderr= + set +e +} + +cleanup() +{ + cleanup_tcpdump + ip netns del ${HOST_NS} + ip netns del ${ROUTER_NS} +} + +link_up() { + set -e + ${IP_ROUTER} link set dev ${ROUTER_INTF} up + ${IP_HOST} link set dev ${HOST_INTF} up + set +e +} + +verify_ndisc() { + local drop_unsolicited_na=$1 + local accept_unsolicited_na=$2 + local forwarding=$3 + + neigh_show_output=$(${IP_ROUTER} neigh show \ + to ${HOST_ADDR} dev ${ROUTER_INTF} nud stale) + if [ ${drop_unsolicited_na} -eq 0 ] && \ + [ ${accept_unsolicited_na} -eq 1 ] && \ + [ ${forwarding} -eq 1 ]; then + # Neighbour entry expected to be present for 011 case + [[ ${neigh_show_output} ]] + else + # Neighbour entry expected to be absent for all other cases + [[ -z ${neigh_show_output} ]] + fi +} + +test_unsolicited_na_common() +{ + # Setup the test bed, but keep links down + setup $1 $2 $3 + + # Bring the link up, wait for the NA, + # and add a delay to ensure neighbour processing is done. + link_up + start_tcpdump + + # Verify the neighbour table + verify_ndisc $1 $2 $3 + +} + +test_unsolicited_na_combination() { + test_unsolicited_na_common $1 $2 $3 + test_msg=("test_unsolicited_na: " + "drop_unsolicited_na=$1 " + "accept_unsolicited_na=$2 " + "forwarding=$3") + log_test $? 0 "${test_msg[*]}" + cleanup +} + +test_unsolicited_na_combinations() { + # Args: drop_unsolicited_na accept_unsolicited_na forwarding + + # Expect entry + test_unsolicited_na_combination 0 1 1 + + # Expect no entry + test_unsolicited_na_combination 0 0 0 + test_unsolicited_na_combination 0 0 1 + test_unsolicited_na_combination 0 1 0 + test_unsolicited_na_combination 1 0 0 + test_unsolicited_na_combination 1 0 1 + test_unsolicited_na_combination 1 1 0 + test_unsolicited_na_combination 1 1 1 +} + +############################################################################### +# usage + +usage() +{ + cat < /dev/null + +test_unsolicited_na_combinations + +printf "\nTests passed: %3d\n" ${nsuccess} +printf "Tests failed: %3d\n" ${nfail} + +exit $ret -- cgit v1.2.3 From c246f9b5fd617fe487f8b6f18851703f468501d6 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 18 Apr 2022 09:42:25 +0300 Subject: devlink: add support to create line card and expose to user Extend the devlink API so the driver is going to be able to create and destroy linecard instances. There can be multiple line cards per devlink device. Expose this new type of object over devlink netlink API to the userspace, with notifications. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/devlink.h | 4 + include/uapi/linux/devlink.h | 7 ++ net/core/devlink.c | 270 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 280 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/net/devlink.h b/include/net/devlink.h index a30180c0988a..7aabdfb3bc6a 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -22,6 +22,7 @@ #include struct devlink; +struct devlink_linecard; struct devlink_port_phys_attrs { u32 port_number; /* Same value as "split group". @@ -1536,6 +1537,9 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, int devlink_rate_leaf_create(struct devlink_port *port, void *priv); void devlink_rate_leaf_destroy(struct devlink_port *devlink_port); void devlink_rate_nodes_destroy(struct devlink *devlink); +struct devlink_linecard *devlink_linecard_create(struct devlink *devlink, + unsigned int linecard_index); +void devlink_linecard_destroy(struct devlink_linecard *linecard); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b897b80770f6..59c33ed2d3e7 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -131,6 +131,11 @@ enum devlink_command { DEVLINK_CMD_RATE_NEW, DEVLINK_CMD_RATE_DEL, + DEVLINK_CMD_LINECARD_GET, /* can dump */ + DEVLINK_CMD_LINECARD_SET, + DEVLINK_CMD_LINECARD_NEW, + DEVLINK_CMD_LINECARD_DEL, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -553,6 +558,8 @@ enum devlink_attr { DEVLINK_ATTR_REGION_MAX_SNAPSHOTS, /* u32 */ + DEVLINK_ATTR_LINECARD_INDEX, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index aeca13b6e57b..4cdacd74b82a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -54,6 +54,8 @@ struct devlink { struct list_head trap_list; struct list_head trap_group_list; struct list_head trap_policer_list; + struct list_head linecard_list; + struct mutex linecards_lock; /* protects linecard_list */ const struct devlink_ops *ops; u64 features; struct xarray snapshot_ids; @@ -70,6 +72,13 @@ struct devlink { char priv[] __aligned(NETDEV_ALIGN); }; +struct devlink_linecard { + struct list_head list; + struct devlink *devlink; + unsigned int index; + refcount_t refcount; +}; + /** * struct devlink_resource - devlink resource * @name: name of the resource @@ -397,6 +406,56 @@ devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info) return ERR_PTR(-EINVAL); } +static struct devlink_linecard * +devlink_linecard_get_by_index(struct devlink *devlink, + unsigned int linecard_index) +{ + struct devlink_linecard *devlink_linecard; + + list_for_each_entry(devlink_linecard, &devlink->linecard_list, list) { + if (devlink_linecard->index == linecard_index) + return devlink_linecard; + } + return NULL; +} + +static bool devlink_linecard_index_exists(struct devlink *devlink, + unsigned int linecard_index) +{ + return devlink_linecard_get_by_index(devlink, linecard_index); +} + +static struct devlink_linecard * +devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs) +{ + if (attrs[DEVLINK_ATTR_LINECARD_INDEX]) { + u32 linecard_index = nla_get_u32(attrs[DEVLINK_ATTR_LINECARD_INDEX]); + struct devlink_linecard *linecard; + + mutex_lock(&devlink->linecards_lock); + linecard = devlink_linecard_get_by_index(devlink, linecard_index); + if (linecard) + refcount_inc(&linecard->refcount); + mutex_unlock(&devlink->linecards_lock); + if (!linecard) + return ERR_PTR(-ENODEV); + return linecard; + } + return ERR_PTR(-EINVAL); +} + +static struct devlink_linecard * +devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info) +{ + return devlink_linecard_get_from_attrs(devlink, info->attrs); +} + +static void devlink_linecard_put(struct devlink_linecard *linecard) +{ + if (refcount_dec_and_test(&linecard->refcount)) + kfree(linecard); +} + struct devlink_sb { struct list_head list; unsigned int index; @@ -617,16 +676,18 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) #define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) #define DEVLINK_NL_FLAG_NEED_RATE BIT(2) #define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3) +#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4) /* The per devlink instance lock is taken by default in the pre-doit * operation, yet several commands do not require this. The global * devlink lock is taken and protects from disruption by user-calls. */ -#define DEVLINK_NL_FLAG_NO_LOCK BIT(4) +#define DEVLINK_NL_FLAG_NO_LOCK BIT(5) static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { + struct devlink_linecard *linecard; struct devlink_port *devlink_port; struct devlink *devlink; int err; @@ -669,6 +730,13 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, goto unlock; } info->user_ptr[1] = rate_node; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) { + linecard = devlink_linecard_get_from_info(devlink, info); + if (IS_ERR(linecard)) { + err = PTR_ERR(linecard); + goto unlock; + } + info->user_ptr[1] = linecard; } return 0; @@ -683,9 +751,14 @@ unlock: static void devlink_nl_post_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { + struct devlink_linecard *linecard; struct devlink *devlink; devlink = info->user_ptr[0]; + if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) { + linecard = info->user_ptr[1]; + devlink_linecard_put(linecard); + } if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); devlink_put(devlink); @@ -1964,6 +2037,132 @@ static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, return err; } +static int devlink_nl_linecard_fill(struct sk_buff *msg, + struct devlink *devlink, + struct devlink_linecard *linecard, + enum devlink_command cmd, u32 portid, + u32 seq, int flags, + struct netlink_ext_ack *extack) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static void devlink_linecard_notify(struct devlink_linecard *linecard, + enum devlink_command cmd) +{ + struct devlink *devlink = linecard->devlink; + struct sk_buff *msg; + int err; + + WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW && + cmd != DEVLINK_CMD_LINECARD_DEL); + + if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + err = devlink_nl_linecard_fill(msg, devlink, linecard, cmd, 0, 0, 0, + NULL); + if (err) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); +} + +static int devlink_nl_cmd_linecard_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_linecard *linecard = info->user_ptr[1]; + struct devlink *devlink = linecard->devlink; + struct sk_buff *msg; + int err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_linecard_fill(msg, devlink, linecard, + DEVLINK_CMD_LINECARD_NEW, + info->snd_portid, info->snd_seq, 0, + info->extack); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_linecard *linecard; + struct devlink *devlink; + int start = cb->args[0]; + unsigned long index; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) + continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + + mutex_lock(&devlink->linecards_lock); + list_for_each_entry(linecard, &devlink->linecard_list, list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_linecard_fill(msg, devlink, linecard, + DEVLINK_CMD_LINECARD_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + cb->extack); + if (err) { + mutex_unlock(&devlink->linecards_lock); + devlink_put(devlink); + goto out; + } + idx++; + } + mutex_unlock(&devlink->linecards_lock); +retry: + devlink_put(devlink); + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, @@ -8589,6 +8788,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 }, [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -8664,6 +8864,13 @@ static const struct genl_small_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, + { + .cmd = DEVLINK_CMD_LINECARD_GET, + .doit = devlink_nl_cmd_linecard_get_doit, + .dumpit = devlink_nl_cmd_linecard_get_dumpit, + .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD, + /* can be retrieved by unprivileged users */ + }, { .cmd = DEVLINK_CMD_SB_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, @@ -9043,6 +9250,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, write_pnet(&devlink->_net, net); INIT_LIST_HEAD(&devlink->port_list); INIT_LIST_HEAD(&devlink->rate_list); + INIT_LIST_HEAD(&devlink->linecard_list); INIT_LIST_HEAD(&devlink->sb_list); INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); INIT_LIST_HEAD(&devlink->resource_list); @@ -9054,6 +9262,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, INIT_LIST_HEAD(&devlink->trap_policer_list); mutex_init(&devlink->lock); mutex_init(&devlink->reporters_lock); + mutex_init(&devlink->linecards_lock); refcount_set(&devlink->refcount, 1); init_completion(&devlink->comp); @@ -9080,10 +9289,14 @@ static void devlink_notify_register(struct devlink *devlink) struct devlink_param_item *param_item; struct devlink_trap_item *trap_item; struct devlink_port *devlink_port; + struct devlink_linecard *linecard; struct devlink_rate *rate_node; struct devlink_region *region; devlink_notify(devlink, DEVLINK_CMD_NEW); + list_for_each_entry(linecard, &devlink->linecard_list, list) + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + list_for_each_entry(devlink_port, &devlink->port_list, list) devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); @@ -9191,6 +9404,7 @@ void devlink_free(struct devlink *devlink) { ASSERT_DEVLINK_NOT_REGISTERED(devlink); + mutex_destroy(&devlink->linecards_lock); mutex_destroy(&devlink->reporters_lock); mutex_destroy(&devlink->lock); WARN_ON(!list_empty(&devlink->trap_policer_list)); @@ -9203,6 +9417,7 @@ void devlink_free(struct devlink *devlink) WARN_ON(!list_empty(&devlink->dpipe_table_list)); WARN_ON(!list_empty(&devlink->sb_list)); WARN_ON(!list_empty(&devlink->rate_list)); + WARN_ON(!list_empty(&devlink->linecard_list)); WARN_ON(!list_empty(&devlink->port_list)); xa_destroy(&devlink->snapshot_ids); @@ -9747,6 +9962,59 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, return 0; } +/** + * devlink_linecard_create - Create devlink linecard + * + * @devlink: devlink + * @linecard_index: driver-specific numerical identifier of the linecard + * + * Create devlink linecard instance with provided linecard index. + * Caller can use any indexing, even hw-related one. + */ +struct devlink_linecard *devlink_linecard_create(struct devlink *devlink, + unsigned int linecard_index) +{ + struct devlink_linecard *linecard; + + mutex_lock(&devlink->linecards_lock); + if (devlink_linecard_index_exists(devlink, linecard_index)) { + mutex_unlock(&devlink->linecards_lock); + return ERR_PTR(-EEXIST); + } + + linecard = kzalloc(sizeof(*linecard), GFP_KERNEL); + if (!linecard) { + mutex_unlock(&devlink->linecards_lock); + return ERR_PTR(-ENOMEM); + } + + linecard->devlink = devlink; + linecard->index = linecard_index; + list_add_tail(&linecard->list, &devlink->linecard_list); + refcount_set(&linecard->refcount, 1); + mutex_unlock(&devlink->linecards_lock); + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + return linecard; +} +EXPORT_SYMBOL_GPL(devlink_linecard_create); + +/** + * devlink_linecard_destroy - Destroy devlink linecard + * + * @linecard: devlink linecard + */ +void devlink_linecard_destroy(struct devlink_linecard *linecard) +{ + struct devlink *devlink = linecard->devlink; + + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL); + mutex_lock(&devlink->linecards_lock); + list_del(&linecard->list); + mutex_unlock(&devlink->linecards_lock); + devlink_linecard_put(linecard); +} +EXPORT_SYMBOL_GPL(devlink_linecard_destroy); + int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, -- cgit v1.2.3 From fcdc8ce23a309c26a67fc613a741d9b21a248311 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 18 Apr 2022 09:42:26 +0300 Subject: devlink: implement line card provisioning In order to be able to configure all needed stuff on a port/netdevice of a line card without the line card being present, introduce line card provisioning. Basically by setting a type, provisioning process will start and driver is supposed to create a placeholder for instances (ports/netdevices) for a line card type. Allow the user to query the supported line card types over line card get command. Then implement two netlink command SET to allow user to set/unset the card type. On the driver API side, add provision/unprovision ops and supported types array to be advertised. Upon provision op call, the driver should take care of creating the instances for the particular line card type. Introduce provision_set/clear() functions to be called by the driver once the provisioning/unprovisioning is done on its side. These helpers are not to be called directly due to the async nature of provisioning. Example: $ devlink port # No ports are listed $ devlink lc pci/0000:01:00.0: lc 1 state unprovisioned supported_types: 16x100G lc 2 state unprovisioned supported_types: 16x100G lc 3 state unprovisioned supported_types: 16x100G lc 4 state unprovisioned supported_types: 16x100G lc 5 state unprovisioned supported_types: 16x100G lc 6 state unprovisioned supported_types: 16x100G lc 7 state unprovisioned supported_types: 16x100G lc 8 state unprovisioned supported_types: 16x100G $ devlink lc set pci/0000:01:00.0 lc 8 type 16x100G $ devlink lc show pci/0000:01:00.0 lc 8 pci/0000:01:00.0: lc 8 state active type 16x100G supported_types: 16x100G $ devlink port pci/0000:01:00.0/0: type notset flavour cpu port 0 splittable false pci/0000:01:00.0/53: type eth netdev enp1s0nl8p1 flavour physical lc 8 port 1 splittable true lanes 4 pci/0000:01:00.0/54: type eth netdev enp1s0nl8p2 flavour physical lc 8 port 2 splittable true lanes 4 pci/0000:01:00.0/55: type eth netdev enp1s0nl8p3 flavour physical lc 8 port 3 splittable true lanes 4 pci/0000:01:00.0/56: type eth netdev enp1s0nl8p4 flavour physical lc 8 port 4 splittable true lanes 4 pci/0000:01:00.0/57: type eth netdev enp1s0nl8p5 flavour physical lc 8 port 5 splittable true lanes 4 pci/0000:01:00.0/58: type eth netdev enp1s0nl8p6 flavour physical lc 8 port 6 splittable true lanes 4 pci/0000:01:00.0/59: type eth netdev enp1s0nl8p7 flavour physical lc 8 port 7 splittable true lanes 4 pci/0000:01:00.0/60: type eth netdev enp1s0nl8p8 flavour physical lc 8 port 8 splittable true lanes 4 pci/0000:01:00.0/61: type eth netdev enp1s0nl8p9 flavour physical lc 8 port 9 splittable true lanes 4 pci/0000:01:00.0/62: type eth netdev enp1s0nl8p10 flavour physical lc 8 port 10 splittable true lanes 4 pci/0000:01:00.0/63: type eth netdev enp1s0nl8p11 flavour physical lc 8 port 11 splittable true lanes 4 pci/0000:01:00.0/64: type eth netdev enp1s0nl8p12 flavour physical lc 8 port 12 splittable true lanes 4 pci/0000:01:00.0/125: type eth netdev enp1s0nl8p13 flavour physical lc 8 port 13 splittable true lanes 4 pci/0000:01:00.0/126: type eth netdev enp1s0nl8p14 flavour physical lc 8 port 14 splittable true lanes 4 pci/0000:01:00.0/127: type eth netdev enp1s0nl8p15 flavour physical lc 8 port 15 splittable true lanes 4 pci/0000:01:00.0/128: type eth netdev enp1s0nl8p16 flavour physical lc 8 port 16 splittable true lanes 4 $ devlink lc set pci/0000:01:00.0 lc 8 notype Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../networking/devlink/devlink-linecard.rst | 121 ++++++++ Documentation/networking/devlink/index.rst | 1 + include/net/devlink.h | 43 ++- include/uapi/linux/devlink.h | 15 + net/core/devlink.c | 322 ++++++++++++++++++++- 5 files changed, 497 insertions(+), 5 deletions(-) create mode 100644 Documentation/networking/devlink/devlink-linecard.rst (limited to 'include/uapi') diff --git a/Documentation/networking/devlink/devlink-linecard.rst b/Documentation/networking/devlink/devlink-linecard.rst new file mode 100644 index 000000000000..63ccd17f40ac --- /dev/null +++ b/Documentation/networking/devlink/devlink-linecard.rst @@ -0,0 +1,121 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================= +Devlink Line card +================= + +Background +========== + +The ``devlink-linecard`` mechanism is targeted for manipulation of +line cards that serve as a detachable PHY modules for modular switch +system. Following operations are provided: + + * Get a list of supported line card types. + * Provision of a slot with specific line card type. + * Get and monitor of line card state and its change. + +Line card according to the type may contain one or more gearboxes +to mux the lanes with certain speed to multiple ports with lanes +of different speed. Line card ensures N:M mapping between +the switch ASIC modules and physical front panel ports. + +Overview +======== + +Each line card devlink object is created by device driver, +according to the physical line card slots available on the device. + +Similar to splitter cable, where the device might have no way +of detection of the splitter cable geometry, the device +might not have a way to detect line card type. For that devices, +concept of provisioning is introduced. It allows the user to: + + * Provision a line card slot with certain line card type + + - Device driver would instruct the ASIC to prepare all + resources accordingly. The device driver would + create all instances, namely devlink port and netdevices + that reside on the line card, according to the line card type + * Manipulate of line card entities even without line card + being physically connected or powered-up + * Setup splitter cable on line card ports + + - As on the ordinary ports, user may provision a splitter + cable of a certain type, without the need to + be physically connected to the port + * Configure devlink ports and netdevices + +Netdevice carrier is decided as follows: + + * Line card is not inserted or powered-down + + - The carrier is always down + * Line card is inserted and powered up + + - The carrier is decided as for ordinary port netdevice + +Line card state +=============== + +The ``devlink-linecard`` mechanism supports the following line card states: + + * ``unprovisioned``: Line card is not provisioned on the slot. + * ``unprovisioning``: Line card slot is currently being unprovisioned. + * ``provisioning``: Line card slot is currently in a process of being provisioned + with a line card type. + * ``provisioning_failed``: Provisioning was not successful. + * ``provisioned``: Line card slot is provisioned with a type. + +The following diagram provides a general overview of ``devlink-linecard`` +state transitions:: + + +-------------------------+ + | | + +----------------------------------> unprovisioned | + | | | + | +--------|-------^--------+ + | | | + | | | + | +--------v-------|--------+ + | | | + | | provisioning | + | | | + | +------------|------------+ + | | + | +-----------------------------+ + | | | + | +------------v------------+ +------------v------------+ + | | | | | + +----- provisioning_failed | | provisioned | + | | | | | + | +------------^------------+ +------------|------------+ + | | | + | | | + | | +------------v------------+ + | | | | + | | | unprovisioning | + | | | | + | | +------------|------------+ + | | | + | +-----------------------------+ + | | + +-----------------------------------------------+ + + +Example usage +============= + +.. code:: shell + + $ devlink lc show [ DEV [ lc LC_INDEX ] ] + $ devlink lc set DEV lc LC_INDEX [ { type LC_TYPE | notype } ] + + # Show current line card configuration and status for all slots: + $ devlink lc + + # Set slot 8 to be provisioned with type "16x100G": + $ devlink lc set pci/0000:01:00.0 lc 8 type 16x100G + + # Set slot 8 to be unprovisioned: + $ devlink lc set pci/0000:01:00.0 lc 8 notype diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index c17cdb079611..850715512293 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -39,6 +39,7 @@ general. devlink-resource devlink-reload devlink-trap + devlink-linecard Driver-specific documentation ----------------------------- diff --git a/include/net/devlink.h b/include/net/devlink.h index 7aabdfb3bc6a..3e49d4ff498c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -149,6 +149,40 @@ struct devlink_port_new_attrs { sfnum_valid:1; }; +/** + * struct devlink_linecard_ops - Linecard operations + * @provision: callback to provision the linecard slot with certain + * type of linecard. As a result of this operation, + * driver is expected to eventually (could be after + * the function call returns) call one of: + * devlink_linecard_provision_set() + * devlink_linecard_provision_fail() + * @unprovision: callback to unprovision the linecard slot. As a result + * of this operation, driver is expected to eventually + * (could be after the function call returns) call + * devlink_linecard_provision_clear() + * devlink_linecard_provision_fail() + * @same_provision: callback to ask the driver if linecard is already + * provisioned in the same way user asks this linecard to be + * provisioned. + * @types_count: callback to get number of supported types + * @types_get: callback to get next type in list + */ +struct devlink_linecard_ops { + int (*provision)(struct devlink_linecard *linecard, void *priv, + const char *type, const void *type_priv, + struct netlink_ext_ack *extack); + int (*unprovision)(struct devlink_linecard *linecard, void *priv, + struct netlink_ext_ack *extack); + bool (*same_provision)(struct devlink_linecard *linecard, void *priv, + const char *type, const void *type_priv); + unsigned int (*types_count)(struct devlink_linecard *linecard, + void *priv); + void (*types_get)(struct devlink_linecard *linecard, + void *priv, unsigned int index, const char **type, + const void **type_priv); +}; + struct devlink_sb_pool_info { enum devlink_sb_pool_type pool_type; u32 size; @@ -1537,9 +1571,14 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, int devlink_rate_leaf_create(struct devlink_port *port, void *priv); void devlink_rate_leaf_destroy(struct devlink_port *devlink_port); void devlink_rate_nodes_destroy(struct devlink *devlink); -struct devlink_linecard *devlink_linecard_create(struct devlink *devlink, - unsigned int linecard_index); +struct devlink_linecard * +devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index, + const struct devlink_linecard_ops *ops, void *priv); void devlink_linecard_destroy(struct devlink_linecard *linecard); +void devlink_linecard_provision_set(struct devlink_linecard *linecard, + const char *type); +void devlink_linecard_provision_clear(struct devlink_linecard *linecard); +void devlink_linecard_provision_fail(struct devlink_linecard *linecard); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 59c33ed2d3e7..de91e4a0d476 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -343,6 +343,18 @@ enum devlink_reload_limit { #define DEVLINK_RELOAD_LIMITS_VALID_MASK (_BITUL(__DEVLINK_RELOAD_LIMIT_MAX) - 1) +enum devlink_linecard_state { + DEVLINK_LINECARD_STATE_UNSPEC, + DEVLINK_LINECARD_STATE_UNPROVISIONED, + DEVLINK_LINECARD_STATE_UNPROVISIONING, + DEVLINK_LINECARD_STATE_PROVISIONING, + DEVLINK_LINECARD_STATE_PROVISIONING_FAILED, + DEVLINK_LINECARD_STATE_PROVISIONED, + + __DEVLINK_LINECARD_STATE_MAX, + DEVLINK_LINECARD_STATE_MAX = __DEVLINK_LINECARD_STATE_MAX - 1 +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -559,6 +571,9 @@ enum devlink_attr { DEVLINK_ATTR_REGION_MAX_SNAPSHOTS, /* u32 */ DEVLINK_ATTR_LINECARD_INDEX, /* u32 */ + DEVLINK_ATTR_LINECARD_STATE, /* u8 */ + DEVLINK_ATTR_LINECARD_TYPE, /* string */ + DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES, /* nested */ /* add new attributes above here, update the policy in devlink.c */ diff --git a/net/core/devlink.c b/net/core/devlink.c index 4cdacd74b82a..b7c3a82fbd4b 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -72,11 +72,21 @@ struct devlink { char priv[] __aligned(NETDEV_ALIGN); }; +struct devlink_linecard_ops; +struct devlink_linecard_type; + struct devlink_linecard { struct list_head list; struct devlink *devlink; unsigned int index; refcount_t refcount; + const struct devlink_linecard_ops *ops; + void *priv; + enum devlink_linecard_state state; + struct mutex state_lock; /* Protects state */ + const char *type; + struct devlink_linecard_type *types; + unsigned int types_count; }; /** @@ -452,8 +462,10 @@ devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info) static void devlink_linecard_put(struct devlink_linecard *linecard) { - if (refcount_dec_and_test(&linecard->refcount)) + if (refcount_dec_and_test(&linecard->refcount)) { + mutex_destroy(&linecard->state_lock); kfree(linecard); + } } struct devlink_sb { @@ -2037,6 +2049,11 @@ static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, return err; } +struct devlink_linecard_type { + const char *type; + const void *priv; +}; + static int devlink_nl_linecard_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_linecard *linecard, @@ -2044,7 +2061,10 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, u32 seq, int flags, struct netlink_ext_ack *extack) { + struct devlink_linecard_type *linecard_type; + struct nlattr *attr; void *hdr; + int i; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) @@ -2054,6 +2074,27 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index)) goto nla_put_failure; + if (nla_put_u8(msg, DEVLINK_ATTR_LINECARD_STATE, linecard->state)) + goto nla_put_failure; + if (linecard->type && + nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE, linecard->type)) + goto nla_put_failure; + + if (linecard->types_count) { + attr = nla_nest_start(msg, + DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES); + if (!attr) + goto nla_put_failure; + for (i = 0; i < linecard->types_count; i++) { + linecard_type = &linecard->types[i]; + if (nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE, + linecard_type->type)) { + nla_nest_cancel(msg, attr); + goto nla_put_failure; + } + } + nla_nest_end(msg, attr); + } genlmsg_end(msg, hdr); return 0; @@ -2103,10 +2144,12 @@ static int devlink_nl_cmd_linecard_get_doit(struct sk_buff *skb, if (!msg) return -ENOMEM; + mutex_lock(&linecard->state_lock); err = devlink_nl_linecard_fill(msg, devlink, linecard, DEVLINK_CMD_LINECARD_NEW, info->snd_portid, info->snd_seq, 0, info->extack); + mutex_unlock(&linecard->state_lock); if (err) { nlmsg_free(msg); return err; @@ -2139,12 +2182,14 @@ static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg, idx++; continue; } + mutex_lock(&linecard->state_lock); err = devlink_nl_linecard_fill(msg, devlink, linecard, DEVLINK_CMD_LINECARD_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->extack); + mutex_unlock(&linecard->state_lock); if (err) { mutex_unlock(&devlink->linecards_lock); devlink_put(devlink); @@ -2163,6 +2208,163 @@ out: return msg->len; } +static struct devlink_linecard_type * +devlink_linecard_type_lookup(struct devlink_linecard *linecard, + const char *type) +{ + struct devlink_linecard_type *linecard_type; + int i; + + for (i = 0; i < linecard->types_count; i++) { + linecard_type = &linecard->types[i]; + if (!strcmp(type, linecard_type->type)) + return linecard_type; + } + return NULL; +} + +static int devlink_linecard_type_set(struct devlink_linecard *linecard, + const char *type, + struct netlink_ext_ack *extack) +{ + const struct devlink_linecard_ops *ops = linecard->ops; + struct devlink_linecard_type *linecard_type; + int err; + + mutex_lock(&linecard->state_lock); + if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) { + NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned"); + err = -EBUSY; + goto out; + } + if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) { + NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned"); + err = -EBUSY; + goto out; + } + + linecard_type = devlink_linecard_type_lookup(linecard, type); + if (!linecard_type) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported line card type provided"); + err = -EINVAL; + goto out; + } + + if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED && + linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) { + NL_SET_ERR_MSG_MOD(extack, "Line card already provisioned"); + err = -EBUSY; + /* Check if the line card is provisioned in the same + * way the user asks. In case it is, make the operation + * to return success. + */ + if (ops->same_provision && + ops->same_provision(linecard, linecard->priv, + linecard_type->type, + linecard_type->priv)) + err = 0; + goto out; + } + + linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING; + linecard->type = linecard_type->type; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); + err = ops->provision(linecard, linecard->priv, linecard_type->type, + linecard_type->priv, extack); + if (err) { + /* Provisioning failed. Assume the linecard is unprovisioned + * for future operations. + */ + mutex_lock(&linecard->state_lock); + linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; + linecard->type = NULL; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); + } + return err; + +out: + mutex_unlock(&linecard->state_lock); + return err; +} + +static int devlink_linecard_type_unset(struct devlink_linecard *linecard, + struct netlink_ext_ack *extack) +{ + int err; + + mutex_lock(&linecard->state_lock); + if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) { + NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned"); + err = -EBUSY; + goto out; + } + if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) { + NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned"); + err = -EBUSY; + goto out; + } + if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) { + linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; + linecard->type = NULL; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + err = 0; + goto out; + } + + if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) { + NL_SET_ERR_MSG_MOD(extack, "Line card is not provisioned"); + err = 0; + goto out; + } + linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONING; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); + err = linecard->ops->unprovision(linecard, linecard->priv, + extack); + if (err) { + /* Unprovisioning failed. Assume the linecard is unprovisioned + * for future operations. + */ + mutex_lock(&linecard->state_lock); + linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; + linecard->type = NULL; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); + } + return err; + +out: + mutex_unlock(&linecard->state_lock); + return err; +} + +static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_linecard *linecard = info->user_ptr[1]; + struct netlink_ext_ack *extack = info->extack; + int err; + + if (info->attrs[DEVLINK_ATTR_LINECARD_TYPE]) { + const char *type; + + type = nla_data(info->attrs[DEVLINK_ATTR_LINECARD_TYPE]); + if (strcmp(type, "")) { + err = devlink_linecard_type_set(linecard, type, extack); + if (err) + return err; + } else { + err = devlink_linecard_type_unset(linecard, extack); + if (err) + return err; + } + } + + return 0; +} + static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, @@ -8789,6 +8991,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 }, + [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -8871,6 +9074,12 @@ static const struct genl_small_ops devlink_nl_ops[] = { .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD, /* can be retrieved by unprivileged users */ }, + { + .cmd = DEVLINK_CMD_LINECARD_SET, + .doit = devlink_nl_cmd_linecard_set_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD, + }, { .cmd = DEVLINK_CMD_SB_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, @@ -9962,19 +10171,56 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, return 0; } +static int devlink_linecard_types_init(struct devlink_linecard *linecard) +{ + struct devlink_linecard_type *linecard_type; + unsigned int count; + int i; + + count = linecard->ops->types_count(linecard, linecard->priv); + linecard->types = kmalloc_array(count, sizeof(*linecard_type), + GFP_KERNEL); + if (!linecard->types) + return -ENOMEM; + linecard->types_count = count; + + for (i = 0; i < count; i++) { + linecard_type = &linecard->types[i]; + linecard->ops->types_get(linecard, linecard->priv, i, + &linecard_type->type, + &linecard_type->priv); + } + return 0; +} + +static void devlink_linecard_types_fini(struct devlink_linecard *linecard) +{ + kfree(linecard->types); +} + /** * devlink_linecard_create - Create devlink linecard * * @devlink: devlink * @linecard_index: driver-specific numerical identifier of the linecard + * @ops: linecards ops + * @priv: user priv pointer * * Create devlink linecard instance with provided linecard index. * Caller can use any indexing, even hw-related one. + * + * Return: Line card structure or an ERR_PTR() encoded error code. */ -struct devlink_linecard *devlink_linecard_create(struct devlink *devlink, - unsigned int linecard_index) +struct devlink_linecard * +devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index, + const struct devlink_linecard_ops *ops, void *priv) { struct devlink_linecard *linecard; + int err; + + if (WARN_ON(!ops || !ops->provision || !ops->unprovision || + !ops->types_count || !ops->types_get)) + return ERR_PTR(-EINVAL); mutex_lock(&devlink->linecards_lock); if (devlink_linecard_index_exists(devlink, linecard_index)) { @@ -9990,6 +10236,19 @@ struct devlink_linecard *devlink_linecard_create(struct devlink *devlink, linecard->devlink = devlink; linecard->index = linecard_index; + linecard->ops = ops; + linecard->priv = priv; + linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; + mutex_init(&linecard->state_lock); + + err = devlink_linecard_types_init(linecard); + if (err) { + mutex_destroy(&linecard->state_lock); + kfree(linecard); + mutex_unlock(&devlink->linecards_lock); + return ERR_PTR(err); + } + list_add_tail(&linecard->list, &devlink->linecard_list); refcount_set(&linecard->refcount, 1); mutex_unlock(&devlink->linecards_lock); @@ -10010,11 +10269,68 @@ void devlink_linecard_destroy(struct devlink_linecard *linecard) devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL); mutex_lock(&devlink->linecards_lock); list_del(&linecard->list); + devlink_linecard_types_fini(linecard); mutex_unlock(&devlink->linecards_lock); devlink_linecard_put(linecard); } EXPORT_SYMBOL_GPL(devlink_linecard_destroy); +/** + * devlink_linecard_provision_set - Set provisioning on linecard + * + * @linecard: devlink linecard + * @type: linecard type + * + * This is either called directly from the provision() op call or + * as a result of the provision() op call asynchronously. + */ +void devlink_linecard_provision_set(struct devlink_linecard *linecard, + const char *type) +{ + mutex_lock(&linecard->state_lock); + WARN_ON(linecard->type && strcmp(linecard->type, type)); + linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED; + linecard->type = type; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); +} +EXPORT_SYMBOL_GPL(devlink_linecard_provision_set); + +/** + * devlink_linecard_provision_clear - Clear provisioning on linecard + * + * @linecard: devlink linecard + * + * This is either called directly from the unprovision() op call or + * as a result of the unprovision() op call asynchronously. + */ +void devlink_linecard_provision_clear(struct devlink_linecard *linecard) +{ + mutex_lock(&linecard->state_lock); + linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; + linecard->type = NULL; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); +} +EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear); + +/** + * devlink_linecard_provision_fail - Fail provisioning on linecard + * + * @linecard: devlink linecard + * + * This is either called directly from the provision() op call or + * as a result of the provision() op call asynchronously. + */ +void devlink_linecard_provision_fail(struct devlink_linecard *linecard) +{ + mutex_lock(&linecard->state_lock); + linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); +} +EXPORT_SYMBOL_GPL(devlink_linecard_provision_fail); + int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, -- cgit v1.2.3 From fc9f50d5b366cd9f35bdee22fe3f8d77833cb1d8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 18 Apr 2022 09:42:27 +0300 Subject: devlink: implement line card active state Allow driver to mark a line card as active. Expose this state to the userspace over devlink netlink interface with proper notifications. 'active' state means that line card was plugged in after being provisioned. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../networking/devlink/devlink-linecard.rst | 11 +++--- include/net/devlink.h | 2 ++ include/uapi/linux/devlink.h | 1 + net/core/devlink.c | 41 ++++++++++++++++++++++ 4 files changed, 50 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/devlink/devlink-linecard.rst b/Documentation/networking/devlink/devlink-linecard.rst index 63ccd17f40ac..6c0b8928bc13 100644 --- a/Documentation/networking/devlink/devlink-linecard.rst +++ b/Documentation/networking/devlink/devlink-linecard.rst @@ -66,6 +66,7 @@ The ``devlink-linecard`` mechanism supports the following line card states: with a line card type. * ``provisioning_failed``: Provisioning was not successful. * ``provisioned``: Line card slot is provisioned with a type. + * ``active``: Line card is powered-up and active. The following diagram provides a general overview of ``devlink-linecard`` state transitions:: @@ -85,11 +86,11 @@ state transitions:: | | | +-----------------------------+ | | | - | +------------v------------+ +------------v------------+ - | | | | | - +----- provisioning_failed | | provisioned | - | | | | | - | +------------^------------+ +------------|------------+ + | +------------v------------+ +------------v------------+ +-------------------------+ + | | | | ----> | + +----- provisioning_failed | | provisioned | | active | + | | | | <---- | + | +------------^------------+ +------------|------------+ +-------------------------+ | | | | | | | | +------------v------------+ diff --git a/include/net/devlink.h b/include/net/devlink.h index 3e49d4ff498c..d8061a11fee6 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1579,6 +1579,8 @@ void devlink_linecard_provision_set(struct devlink_linecard *linecard, const char *type); void devlink_linecard_provision_clear(struct devlink_linecard *linecard); void devlink_linecard_provision_fail(struct devlink_linecard *linecard); +void devlink_linecard_activate(struct devlink_linecard *linecard); +void devlink_linecard_deactivate(struct devlink_linecard *linecard); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index de91e4a0d476..b3d40a5d72ff 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -350,6 +350,7 @@ enum devlink_linecard_state { DEVLINK_LINECARD_STATE_PROVISIONING, DEVLINK_LINECARD_STATE_PROVISIONING_FAILED, DEVLINK_LINECARD_STATE_PROVISIONED, + DEVLINK_LINECARD_STATE_ACTIVE, __DEVLINK_LINECARD_STATE_MAX, DEVLINK_LINECARD_STATE_MAX = __DEVLINK_LINECARD_STATE_MAX - 1 diff --git a/net/core/devlink.c b/net/core/devlink.c index b7c3a82fbd4b..aec0a517282c 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -10331,6 +10331,47 @@ void devlink_linecard_provision_fail(struct devlink_linecard *linecard) } EXPORT_SYMBOL_GPL(devlink_linecard_provision_fail); +/** + * devlink_linecard_activate - Set linecard active + * + * @linecard: devlink linecard + */ +void devlink_linecard_activate(struct devlink_linecard *linecard) +{ + mutex_lock(&linecard->state_lock); + WARN_ON(linecard->state != DEVLINK_LINECARD_STATE_PROVISIONED); + linecard->state = DEVLINK_LINECARD_STATE_ACTIVE; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); +} +EXPORT_SYMBOL_GPL(devlink_linecard_activate); + +/** + * devlink_linecard_deactivate - Set linecard inactive + * + * @linecard: devlink linecard + */ +void devlink_linecard_deactivate(struct devlink_linecard *linecard) +{ + mutex_lock(&linecard->state_lock); + switch (linecard->state) { + case DEVLINK_LINECARD_STATE_ACTIVE: + linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + break; + case DEVLINK_LINECARD_STATE_UNPROVISIONING: + /* Line card is being deactivated as part + * of unprovisioning flow. + */ + break; + default: + WARN_ON(1); + break; + } + mutex_unlock(&linecard->state_lock); +} +EXPORT_SYMBOL_GPL(devlink_linecard_deactivate); + int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, -- cgit v1.2.3 From 38a6f0865796e26fc38fff4858f681d9ae76fa0f Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Sat, 16 Apr 2022 00:40:46 +0800 Subject: net: sched: support hash selecting tx queue This patch allows users to pick queue_mapping, range from A to B. Then we can load balance packets from A to B tx queue. The range is an unsigned 16bit value in decimal format. $ tc filter ... action skbedit queue_mapping skbhash A B "skbedit queue_mapping QUEUE_MAPPING" (from "man 8 tc-skbedit") is enhanced with flags: SKBEDIT_F_TXQ_SKBHASH +----+ +----+ +----+ | P1 | | P2 | | Pn | +----+ +----+ +----+ | | | +-----------+-----------+ | | clsact/skbedit | MQ v +-----------+-----------+ | q0 | qn | qm v v v HTB/FQ FIFO ... FIFO For example: If P1 sends out packets to different Pods on other host, and we want distribute flows from qn - qm. Then we can use skb->hash as hash. setup commands: $ NETDEV=eth0 $ ip netns add n1 $ ip link add ipv1 link $NETDEV type ipvlan mode l2 $ ip link set ipv1 netns n1 $ ip netns exec n1 ifconfig ipv1 2.2.2.100/24 up $ tc qdisc add dev $NETDEV clsact $ tc filter add dev $NETDEV egress protocol ip prio 1 \ flower skip_hw src_ip 2.2.2.100 action skbedit queue_mapping skbhash 2 6 $ tc qdisc add dev $NETDEV handle 1: root mq $ tc qdisc add dev $NETDEV parent 1:1 handle 2: htb $ tc class add dev $NETDEV parent 2: classid 2:1 htb rate 100kbit $ tc class add dev $NETDEV parent 2: classid 2:2 htb rate 200kbit $ tc qdisc add dev $NETDEV parent 1:2 tbf rate 100mbit burst 100mb latency 1 $ tc qdisc add dev $NETDEV parent 1:3 pfifo $ tc qdisc add dev $NETDEV parent 1:4 pfifo $ tc qdisc add dev $NETDEV parent 1:5 pfifo $ tc qdisc add dev $NETDEV parent 1:6 pfifo $ tc qdisc add dev $NETDEV parent 1:7 pfifo $ ip netns exec n1 iperf3 -c 2.2.2.1 -i 1 -t 10 -P 10 pick txqueue from 2 - 6: $ ethtool -S $NETDEV | grep -i tx_queue_[0-9]_bytes tx_queue_0_bytes: 42 tx_queue_1_bytes: 0 tx_queue_2_bytes: 11442586444 tx_queue_3_bytes: 7383615334 tx_queue_4_bytes: 3981365579 tx_queue_5_bytes: 3983235051 tx_queue_6_bytes: 6706236461 tx_queue_7_bytes: 42 tx_queue_8_bytes: 0 tx_queue_9_bytes: 0 txqueues 2 - 6 are mapped to classid 1:3 - 1:7 $ tc -s class show dev $NETDEV ... class mq 1:3 root leaf 8002: Sent 11949133672 bytes 7929798 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 class mq 1:4 root leaf 8003: Sent 7710449050 bytes 5117279 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 class mq 1:5 root leaf 8004: Sent 4157648675 bytes 2758990 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 class mq 1:6 root leaf 8005: Sent 4159632195 bytes 2759990 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 class mq 1:7 root leaf 8006: Sent 7003169603 bytes 4646912 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 ... Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Jonathan Lemon Cc: Eric Dumazet Cc: Alexander Lobakin Cc: Paolo Abeni Cc: Talal Ahmad Cc: Kevin Hao Cc: Ilias Apalodimas Cc: Kees Cook Cc: Kumar Kartikeya Dwivedi Cc: Antoine Tenart Cc: Wei Wang Cc: Arnd Bergmann Signed-off-by: Tonghao Zhang Reviewed-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- include/net/tc_act/tc_skbedit.h | 1 + include/uapi/linux/tc_act/tc_skbedit.h | 2 ++ net/sched/act_skbedit.c | 49 ++++++++++++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index cab8229b9bed..dc1079f28e13 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -17,6 +17,7 @@ struct tcf_skbedit_params { u32 mark; u32 mask; u16 queue_mapping; + u16 mapping_mod; u16 ptype; struct rcu_head rcu; }; diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index 800e93377218..6cb6101208d0 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -29,6 +29,7 @@ #define SKBEDIT_F_PTYPE 0x8 #define SKBEDIT_F_MASK 0x10 #define SKBEDIT_F_INHERITDSFIELD 0x20 +#define SKBEDIT_F_TXQ_SKBHASH 0x40 struct tc_skbedit { tc_gen; @@ -45,6 +46,7 @@ enum { TCA_SKBEDIT_PTYPE, TCA_SKBEDIT_MASK, TCA_SKBEDIT_FLAGS, + TCA_SKBEDIT_QUEUE_MAPPING_MAX, __TCA_SKBEDIT_MAX }; #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 1c5fdb6e7c2f..e3bd11dfe1ca 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -23,6 +23,20 @@ static unsigned int skbedit_net_id; static struct tc_action_ops act_skbedit_ops; +static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params, + struct sk_buff *skb) +{ + u16 queue_mapping = params->queue_mapping; + + if (params->flags & SKBEDIT_F_TXQ_SKBHASH) { + u32 hash = skb_get_hash(skb); + + queue_mapping += hash % params->mapping_mod; + } + + return netdev_cap_txqueue(skb->dev, queue_mapping); +} + static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -62,7 +76,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, #ifdef CONFIG_NET_EGRESS netdev_xmit_skip_txqueue(true); #endif - skb_set_queue_mapping(skb, params->queue_mapping); + skb_set_queue_mapping(skb, tcf_skbedit_hash(params, skb)); } if (params->flags & SKBEDIT_F_MARK) { skb->mark &= ~params->mask; @@ -96,6 +110,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) }, [TCA_SKBEDIT_MASK] = { .len = sizeof(u32) }, [TCA_SKBEDIT_FLAGS] = { .len = sizeof(u64) }, + [TCA_SKBEDIT_QUEUE_MAPPING_MAX] = { .len = sizeof(u16) }, }; static int tcf_skbedit_init(struct net *net, struct nlattr *nla, @@ -112,6 +127,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL; u16 *queue_mapping = NULL, *ptype = NULL; + u16 mapping_mod = 1; bool exists = false; int ret = 0, err; u32 index; @@ -157,6 +173,25 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (tb[TCA_SKBEDIT_FLAGS] != NULL) { u64 *pure_flags = nla_data(tb[TCA_SKBEDIT_FLAGS]); + if (*pure_flags & SKBEDIT_F_TXQ_SKBHASH) { + u16 *queue_mapping_max; + + if (!tb[TCA_SKBEDIT_QUEUE_MAPPING] || + !tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]) { + NL_SET_ERR_MSG_MOD(extack, "Missing required range of queue_mapping."); + return -EINVAL; + } + + queue_mapping_max = + nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]); + if (*queue_mapping_max < *queue_mapping) { + NL_SET_ERR_MSG_MOD(extack, "The range of queue_mapping is invalid, max < min."); + return -EINVAL; + } + + mapping_mod = *queue_mapping_max - *queue_mapping + 1; + flags |= SKBEDIT_F_TXQ_SKBHASH; + } if (*pure_flags & SKBEDIT_F_INHERITDSFIELD) flags |= SKBEDIT_F_INHERITDSFIELD; } @@ -208,8 +243,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, params_new->flags = flags; if (flags & SKBEDIT_F_PRIORITY) params_new->priority = *priority; - if (flags & SKBEDIT_F_QUEUE_MAPPING) + if (flags & SKBEDIT_F_QUEUE_MAPPING) { params_new->queue_mapping = *queue_mapping; + params_new->mapping_mod = mapping_mod; + } if (flags & SKBEDIT_F_MARK) params_new->mark = *mark; if (flags & SKBEDIT_F_PTYPE) @@ -276,6 +313,13 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, goto nla_put_failure; if (params->flags & SKBEDIT_F_INHERITDSFIELD) pure_flags |= SKBEDIT_F_INHERITDSFIELD; + if (params->flags & SKBEDIT_F_TXQ_SKBHASH) { + if (nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING_MAX, + params->queue_mapping + params->mapping_mod - 1)) + goto nla_put_failure; + + pure_flags |= SKBEDIT_F_TXQ_SKBHASH; + } if (pure_flags != 0 && nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags)) goto nla_put_failure; @@ -325,6 +369,7 @@ static size_t tcf_skbedit_get_fill_size(const struct tc_action *act) return nla_total_size(sizeof(struct tc_skbedit)) + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */ + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */ + + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING_MAX */ + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */ + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */ + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */ -- cgit v1.2.3 From b4000312822615ba2222e368188029e9b725dbf4 Mon Sep 17 00:00:00 2001 From: Boris Sukholitko Date: Tue, 19 Apr 2022 11:14:33 +0300 Subject: net/sched: flower: Add number of vlan tags filter These are bookkeeping parts of the new num_of_vlans filter. Defines, dump, load and set are being done here. Signed-off-by: Boris Sukholitko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_cls.h | 2 ++ net/sched/cls_flower.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 404f97fb239c..9a2ee1e39fad 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -587,6 +587,8 @@ enum { TCA_FLOWER_KEY_HASH, /* u32 */ TCA_FLOWER_KEY_HASH_MASK, /* u32 */ + TCA_FLOWER_KEY_NUM_OF_VLANS, /* u8 */ + __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 86fd0420ac4f..4ec4d742e82f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -72,6 +72,7 @@ struct fl_flow_key { } tp_range; struct flow_dissector_key_ct ct; struct flow_dissector_key_hash hash; + struct flow_dissector_key_num_of_vlans num_of_vlans; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -712,6 +713,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_FLAGS] = { .type = NLA_U32 }, [TCA_FLOWER_KEY_HASH] = { .type = NLA_U32 }, [TCA_FLOWER_KEY_HASH_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_NUM_OF_VLANS] = { .type = NLA_U8 }, }; @@ -1615,6 +1617,11 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC, mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, sizeof(key->eth.src)); + fl_set_key_val(tb, &key->num_of_vlans, + TCA_FLOWER_KEY_NUM_OF_VLANS, + &mask->num_of_vlans, + TCA_FLOWER_UNSPEC, + sizeof(key->num_of_vlans)); if (is_vlan_key(tb[TCA_FLOWER_KEY_ETH_TYPE], ðertype, key, mask)) { fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID, @@ -1906,6 +1913,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_CT, ct); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_HASH, hash); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_NUM_OF_VLANS, num_of_vlans); skb_flow_dissector_init(dissector, keys, cnt); } @@ -2994,6 +3003,11 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, sizeof(key->basic.n_proto))) goto nla_put_failure; + if (mask->num_of_vlans.num_of_vlans) { + if (nla_put_u8(skb, TCA_FLOWER_KEY_NUM_OF_VLANS, key->num_of_vlans.num_of_vlans)) + goto nla_put_failure; + } + if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls)) goto nla_put_failure; -- cgit v1.2.3 From 8d92e4fbcf0fb7ecb24223b7b1ce95b9beb4dfa2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Apr 2022 06:44:21 +0300 Subject: devlink: introduce line card devices support Line card can contain one or more devices that makes sense to make visible to the user. For example, this can be a gearbox with flash memory, which could be updated. Provide the driver possibility to attach such devices to a line card and expose those to user. Example: $ devlink lc show pci/0000:01:00.0 lc 8 pci/0000:01:00.0: lc 8 state active type 16x100G supported_types: 16x100G devices: device 0 device 1 device 2 device 3 Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/devlink.h | 7 +++ include/uapi/linux/devlink.h | 3 ++ net/core/devlink.c | 104 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 113 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/net/devlink.h b/include/net/devlink.h index 2a2a2a0c93f7..c84b52fb9ff0 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1578,6 +1578,13 @@ struct devlink_linecard * devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index, const struct devlink_linecard_ops *ops, void *priv); void devlink_linecard_destroy(struct devlink_linecard *linecard); +struct devlink_linecard_device; +struct devlink_linecard_device * +devlink_linecard_device_create(struct devlink_linecard *linecard, + unsigned int device_index); +void +devlink_linecard_device_destroy(struct devlink_linecard *linecard, + struct devlink_linecard_device *linecard_device); void devlink_linecard_provision_set(struct devlink_linecard *linecard, const char *type); void devlink_linecard_provision_clear(struct devlink_linecard *linecard); diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b3d40a5d72ff..cd578645f94f 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -575,6 +575,9 @@ enum devlink_attr { DEVLINK_ATTR_LINECARD_STATE, /* u8 */ DEVLINK_ATTR_LINECARD_TYPE, /* string */ DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES, /* nested */ + DEVLINK_ATTR_LINECARD_DEVICE_LIST, /* nested */ + DEVLINK_ATTR_LINECARD_DEVICE, /* nested */ + DEVLINK_ATTR_LINECARD_DEVICE_INDEX, /* u32 */ /* add new attributes above here, update the policy in devlink.c */ diff --git a/net/core/devlink.c b/net/core/devlink.c index 5cc88490f18f..41d9631ceada 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -83,10 +83,11 @@ struct devlink_linecard { const struct devlink_linecard_ops *ops; void *priv; enum devlink_linecard_state state; - struct mutex state_lock; /* Protects state */ + struct mutex state_lock; /* Protects state and device_list */ const char *type; struct devlink_linecard_type *types; unsigned int types_count; + struct list_head device_list; }; /** @@ -2058,6 +2059,55 @@ struct devlink_linecard_type { const void *priv; }; +struct devlink_linecard_device { + struct list_head list; + unsigned int index; +}; + +static int +devlink_nl_linecard_device_fill(struct sk_buff *msg, + struct devlink_linecard_device *linecard_device) +{ + struct nlattr *attr; + + attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE); + if (!attr) + return -EMSGSIZE; + if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_DEVICE_INDEX, + linecard_device->index)) { + nla_nest_cancel(msg, attr); + return -EMSGSIZE; + } + nla_nest_end(msg, attr); + + return 0; +} + +static int devlink_nl_linecard_devices_fill(struct sk_buff *msg, + struct devlink_linecard *linecard) +{ + struct devlink_linecard_device *linecard_device; + struct nlattr *attr; + int err; + + if (list_empty(&linecard->device_list)) + return 0; + + attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE_LIST); + if (!attr) + return -EMSGSIZE; + list_for_each_entry(linecard_device, &linecard->device_list, list) { + err = devlink_nl_linecard_device_fill(msg, linecard_device); + if (err) { + nla_nest_cancel(msg, attr); + return err; + } + } + nla_nest_end(msg, attr); + + return 0; +} + static int devlink_nl_linecard_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_linecard *linecard, @@ -2068,6 +2118,7 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, struct devlink_linecard_type *linecard_type; struct nlattr *attr; void *hdr; + int err; int i; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); @@ -2100,6 +2151,10 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, nla_nest_end(msg, attr); } + err = devlink_nl_linecard_devices_fill(msg, linecard); + if (err) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -10264,6 +10319,7 @@ devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index, linecard->priv = priv; linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; mutex_init(&linecard->state_lock); + INIT_LIST_HEAD(&linecard->device_list); err = devlink_linecard_types_init(linecard); if (err) { @@ -10291,6 +10347,7 @@ void devlink_linecard_destroy(struct devlink_linecard *linecard) struct devlink *devlink = linecard->devlink; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL); + WARN_ON(!list_empty(&linecard->device_list)); mutex_lock(&devlink->linecards_lock); list_del(&linecard->list); devlink_linecard_types_fini(linecard); @@ -10299,6 +10356,50 @@ void devlink_linecard_destroy(struct devlink_linecard *linecard) } EXPORT_SYMBOL_GPL(devlink_linecard_destroy); +/** + * devlink_linecard_device_create - Create a device on linecard + * + * @linecard: devlink linecard + * @device_index: index of the linecard device + * + * Return: Line card device structure or an ERR_PTR() encoded error code. + */ +struct devlink_linecard_device * +devlink_linecard_device_create(struct devlink_linecard *linecard, + unsigned int device_index) +{ + struct devlink_linecard_device *linecard_device; + + linecard_device = kzalloc(sizeof(*linecard_device), GFP_KERNEL); + if (!linecard_device) + return ERR_PTR(-ENOMEM); + linecard_device->index = device_index; + mutex_lock(&linecard->state_lock); + list_add_tail(&linecard_device->list, &linecard->device_list); + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); + return linecard_device; +} +EXPORT_SYMBOL_GPL(devlink_linecard_device_create); + +/** + * devlink_linecard_device_destroy - Destroy device on linecard + * + * @linecard: devlink linecard + * @linecard_device: devlink linecard device + */ +void +devlink_linecard_device_destroy(struct devlink_linecard *linecard, + struct devlink_linecard_device *linecard_device) +{ + mutex_lock(&linecard->state_lock); + list_del(&linecard_device->list); + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); + mutex_unlock(&linecard->state_lock); + kfree(linecard_device); +} +EXPORT_SYMBOL_GPL(devlink_linecard_device_destroy); + /** * devlink_linecard_provision_set - Set provisioning on linecard * @@ -10331,6 +10432,7 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set); void devlink_linecard_provision_clear(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); + WARN_ON(!list_empty(&linecard->device_list)); linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; linecard->type = NULL; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); -- cgit v1.2.3 From 276910aecc6a4076f5fbfd8160ff70695d6c1eb5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Apr 2022 06:44:22 +0300 Subject: devlink: introduce line card info get message Allow the driver to provide per line card info get op to fill-up info, similar to the "devlink dev info". Example: $ devlink lc info pci/0000:01:00.0 lc 8 pci/0000:01:00.0: lc 8 versions: fixed: hw.revision 0 running: ini.version 4 Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../networking/devlink/devlink-linecard.rst | 4 + include/net/devlink.h | 7 +- include/uapi/linux/devlink.h | 2 + net/core/devlink.c | 130 ++++++++++++++++++++- 4 files changed, 138 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/devlink/devlink-linecard.rst b/Documentation/networking/devlink/devlink-linecard.rst index 6c0b8928bc13..5a8d5989702a 100644 --- a/Documentation/networking/devlink/devlink-linecard.rst +++ b/Documentation/networking/devlink/devlink-linecard.rst @@ -14,6 +14,7 @@ system. Following operations are provided: * Get a list of supported line card types. * Provision of a slot with specific line card type. * Get and monitor of line card state and its change. + * Get information about line card versions. Line card according to the type may contain one or more gearboxes to mux the lanes with certain speed to multiple ports with lanes @@ -120,3 +121,6 @@ Example usage # Set slot 8 to be unprovisioned: $ devlink lc set pci/0000:01:00.0 lc 8 notype + + # Set info for slot 8: + $ devlink lc info pci/0000:01:00.0 lc 8 diff --git a/include/net/devlink.h b/include/net/devlink.h index c84b52fb9ff0..f96dcb376630 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -150,6 +150,8 @@ struct devlink_port_new_attrs { sfnum_valid:1; }; +struct devlink_info_req; + /** * struct devlink_linecard_ops - Linecard operations * @provision: callback to provision the linecard slot with certain @@ -168,6 +170,7 @@ struct devlink_port_new_attrs { * provisioned. * @types_count: callback to get number of supported types * @types_get: callback to get next type in list + * @info_get: callback to get linecard info */ struct devlink_linecard_ops { int (*provision)(struct devlink_linecard *linecard, void *priv, @@ -182,6 +185,9 @@ struct devlink_linecard_ops { void (*types_get)(struct devlink_linecard *linecard, void *priv, unsigned int index, const char **type, const void **type_priv); + int (*info_get)(struct devlink_linecard *linecard, void *priv, + struct devlink_info_req *req, + struct netlink_ext_ack *extack); }; struct devlink_sb_pool_info { @@ -628,7 +634,6 @@ struct devlink_flash_update_params { #define DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK BIT(1) struct devlink_region; -struct devlink_info_req; /** * struct devlink_region_ops - Region operations diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index cd578645f94f..fb8c3864457f 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -136,6 +136,8 @@ enum devlink_command { DEVLINK_CMD_LINECARD_NEW, DEVLINK_CMD_LINECARD_DEL, + DEVLINK_CMD_LINECARD_INFO_GET, /* can dump */ + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 diff --git a/net/core/devlink.c b/net/core/devlink.c index 41d9631ceada..5facd10de64a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2424,6 +2424,125 @@ static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb, return 0; } +struct devlink_info_req { + struct sk_buff *msg; +}; + +static int +devlink_nl_linecard_info_fill(struct sk_buff *msg, struct devlink *devlink, + struct devlink_linecard *linecard, + enum devlink_command cmd, u32 portid, + u32 seq, int flags, struct netlink_ext_ack *extack) +{ + struct devlink_info_req req; + void *hdr; + int err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + err = -EMSGSIZE; + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index)) + goto nla_put_failure; + + req.msg = msg; + err = linecard->ops->info_get(linecard, linecard->priv, &req, extack); + if (err) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return err; +} + +static int devlink_nl_cmd_linecard_info_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_linecard *linecard = info->user_ptr[1]; + struct devlink *devlink = linecard->devlink; + struct sk_buff *msg; + int err; + + if (!linecard->ops->info_get) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + mutex_lock(&linecard->state_lock); + err = devlink_nl_linecard_info_fill(msg, devlink, linecard, + DEVLINK_CMD_LINECARD_INFO_GET, + info->snd_portid, info->snd_seq, 0, + info->extack); + mutex_unlock(&linecard->state_lock); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_linecard_info_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_linecard *linecard; + struct devlink *devlink; + int start = cb->args[0]; + unsigned long index; + int idx = 0; + int err = 0; + + mutex_lock(&devlink_mutex); + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) + continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + + mutex_lock(&devlink->linecards_lock); + list_for_each_entry(linecard, &devlink->linecard_list, list) { + if (idx < start || !linecard->ops->info_get) { + idx++; + continue; + } + mutex_lock(&linecard->state_lock); + err = devlink_nl_linecard_info_fill(msg, devlink, linecard, + DEVLINK_CMD_LINECARD_INFO_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + cb->extack); + mutex_unlock(&linecard->state_lock); + if (err) { + mutex_unlock(&devlink->linecards_lock); + devlink_put(devlink); + goto out; + } + idx++; + } + mutex_unlock(&devlink->linecards_lock); +retry: + devlink_put(devlink); + } +out: + mutex_unlock(&devlink_mutex); + + if (err != -EMSGSIZE) + return err; + + cb->args[0] = idx; + return msg->len; +} + static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, @@ -6416,10 +6535,6 @@ out_dev: return err; } -struct devlink_info_req { - struct sk_buff *msg; -}; - int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name) { return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name); @@ -9139,6 +9254,13 @@ static const struct genl_small_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD, }, + { + .cmd = DEVLINK_CMD_LINECARD_INFO_GET, + .doit = devlink_nl_cmd_linecard_info_get_doit, + .dumpit = devlink_nl_cmd_linecard_info_get_dumpit, + .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD, + /* can be retrieved by unprivileged users */ + }, { .cmd = DEVLINK_CMD_SB_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, -- cgit v1.2.3 From c0a5a21c25f37c9fd7b36072f9968cdff1e4aa13 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 25 Apr 2022 03:18:51 +0530 Subject: bpf: Allow storing referenced kptr in map Extending the code in previous commits, introduce referenced kptr support, which needs to be tagged using 'kptr_ref' tag instead. Unlike unreferenced kptr, referenced kptr have a lot more restrictions. In addition to the type matching, only a newly introduced bpf_kptr_xchg helper is allowed to modify the map value at that offset. This transfers the referenced pointer being stored into the map, releasing the references state for the program, and returning the old value and creating new reference state for the returned pointer. Similar to unreferenced pointer case, return value for this case will also be PTR_TO_BTF_ID_OR_NULL. The reference for the returned pointer must either be eventually released by calling the corresponding release function, otherwise it must be transferred into another map. It is also allowed to call bpf_kptr_xchg with a NULL pointer, to clear the value, and obtain the old value if any. BPF_LDX, BPF_STX, and BPF_ST cannot access referenced kptr. A future commit will permit using BPF_LDX for such pointers, but attempt at making it safe, since the lifetime of object won't be guaranteed. There are valid reasons to enforce the restriction of permitting only bpf_kptr_xchg to operate on referenced kptr. The pointer value must be consistent in face of concurrent modification, and any prior values contained in the map must also be released before a new one is moved into the map. To ensure proper transfer of this ownership, bpf_kptr_xchg returns the old value, which the verifier would require the user to either free or move into another map, and releases the reference held for the pointer being moved in. In the future, direct BPF_XCHG instruction may also be permitted to work like bpf_kptr_xchg helper. Note that process_kptr_func doesn't have to call check_helper_mem_access, since we already disallow rdonly/wronly flags for map, which is what check_map_access_type checks, and we already ensure the PTR_TO_MAP_VALUE refers to kptr by obtaining its off_desc, so check_map_access is also not required. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220424214901.2743946-4-memxor@gmail.com --- include/linux/bpf.h | 8 ++++ include/uapi/linux/bpf.h | 12 ++++++ kernel/bpf/btf.c | 10 ++++- kernel/bpf/helpers.c | 24 +++++++++++ kernel/bpf/verifier.c | 98 ++++++++++++++++++++++++++++++++++++------ tools/include/uapi/linux/bpf.h | 12 ++++++ 6 files changed, 151 insertions(+), 13 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 492edd2c5713..24310837bafc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -160,8 +160,14 @@ enum { BPF_MAP_VALUE_OFF_MAX = 8, }; +enum bpf_kptr_type { + BPF_KPTR_UNREF, + BPF_KPTR_REF, +}; + struct bpf_map_value_off_desc { u32 offset; + enum bpf_kptr_type type; struct { struct btf *btf; u32 btf_id; @@ -418,6 +424,7 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ + ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ __BPF_ARG_TYPE_MAX, /* Extended arg_types. */ @@ -427,6 +434,7 @@ enum bpf_arg_type { ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, + ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d14b10b85e51..444fe6f1cf35 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5143,6 +5143,17 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. + * + * void *bpf_kptr_xchg(void *map_value, void *ptr) + * Description + * Exchange kptr at pointer *map_value* with *ptr*, and return the + * old value. *ptr* can be NULL, otherwise it must be a referenced + * pointer which will be released when this helper is called. + * Return + * The old value of kptr (which can be NULL). The returned pointer + * if not NULL, is a reference which must be released using its + * corresponding release function, or moved into a BPF map before + * program exit. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5339,6 +5350,7 @@ union bpf_attr { FN(copy_from_user_task), \ FN(skb_set_tstamp), \ FN(ima_file_hash), \ + FN(kptr_xchg), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index f0287342204f..4138c51728dd 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3177,6 +3177,7 @@ enum { struct btf_field_info { u32 type_id; u32 off; + enum bpf_kptr_type type; }; static int btf_find_struct(const struct btf *btf, const struct btf_type *t, @@ -3193,6 +3194,7 @@ static int btf_find_struct(const struct btf *btf, const struct btf_type *t, static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, u32 off, int sz, struct btf_field_info *info) { + enum bpf_kptr_type type; u32 res_id; /* For PTR, sz is always == 8 */ @@ -3205,7 +3207,11 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, /* Reject extra tags */ if (btf_type_is_type_tag(btf_type_by_id(btf, t->type))) return -EINVAL; - if (strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) + if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) + type = BPF_KPTR_UNREF; + else if (!strcmp("kptr_ref", __btf_name_by_offset(btf, t->name_off))) + type = BPF_KPTR_REF; + else return -EINVAL; /* Get the base type */ @@ -3216,6 +3222,7 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, info->type_id = res_id; info->off = off; + info->type = type; return BTF_FIELD_FOUND; } @@ -3420,6 +3427,7 @@ struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf, } tab->off[i].offset = info_arr[i].off; + tab->off[i].type = info_arr[i].type; tab->off[i].kptr.btf_id = id; tab->off[i].kptr.btf = kernel_btf; } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 315053ef6a75..3e709fed5306 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1374,6 +1374,28 @@ out: kfree(t); } +BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) +{ + unsigned long *kptr = map_value; + + return xchg(kptr, (unsigned long)ptr); +} + +/* Unlike other PTR_TO_BTF_ID helpers the btf_id in bpf_kptr_xchg() + * helper is determined dynamically by the verifier. + */ +#define BPF_PTR_POISON ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) + +const struct bpf_func_proto bpf_kptr_xchg_proto = { + .func = bpf_kptr_xchg, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .ret_btf_id = BPF_PTR_POISON, + .arg1_type = ARG_PTR_TO_KPTR, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE, + .arg2_btf_id = BPF_PTR_POISON, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; @@ -1452,6 +1474,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_timer_start_proto; case BPF_FUNC_timer_cancel: return &bpf_timer_cancel_proto; + case BPF_FUNC_kptr_xchg: + return &bpf_kptr_xchg_proto; default: break; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5426bab7f02c..c9ee44efed89 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -258,6 +258,7 @@ struct bpf_call_arg_meta { struct btf *ret_btf; u32 ret_btf_id; u32 subprogno; + struct bpf_map_value_off_desc *kptr_off_desc; }; struct btf *btf_vmlinux; @@ -489,7 +490,8 @@ static bool is_acquire_function(enum bpf_func_id func_id, if (func_id == BPF_FUNC_sk_lookup_tcp || func_id == BPF_FUNC_sk_lookup_udp || func_id == BPF_FUNC_skc_lookup_tcp || - func_id == BPF_FUNC_ringbuf_reserve) + func_id == BPF_FUNC_ringbuf_reserve || + func_id == BPF_FUNC_kptr_xchg) return true; if (func_id == BPF_FUNC_map_lookup_elem && @@ -3514,6 +3516,12 @@ static int map_kptr_match_type(struct bpf_verifier_env *env, /* We need to verify reg->type and reg->btf, before accessing reg->btf */ reg_name = kernel_type_name(reg->btf, reg->btf_id); + /* For ref_ptr case, release function check should ensure we get one + * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the + * normal store of unreferenced kptr, we must ensure var_off is zero. + * Since ref_ptr cannot be accessed directly by BPF insns, checks for + * reg->off and reg->ref_obj_id are not needed here. + */ if (__check_ptr_off_reg(env, reg, regno, true)) return -EACCES; @@ -3569,6 +3577,12 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, return -EACCES; } + /* We cannot directly access kptr_ref */ + if (off_desc->type == BPF_KPTR_REF) { + verbose(env, "accessing referenced kptr disallowed\n"); + return -EACCES; + } + if (class == BPF_LDX) { val_reg = reg_state(env, value_regno); /* We can simply mark the value_regno receiving the pointer @@ -5293,6 +5307,53 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno, return 0; } +static int process_kptr_func(struct bpf_verifier_env *env, int regno, + struct bpf_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_map_value_off_desc *off_desc; + struct bpf_map *map_ptr = reg->map_ptr; + u32 kptr_off; + int ret; + + if (!tnum_is_const(reg->var_off)) { + verbose(env, + "R%d doesn't have constant offset. kptr has to be at the constant offset\n", + regno); + return -EINVAL; + } + if (!map_ptr->btf) { + verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n", + map_ptr->name); + return -EINVAL; + } + if (!map_value_has_kptrs(map_ptr)) { + ret = PTR_ERR(map_ptr->kptr_off_tab); + if (ret == -E2BIG) + verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name, + BPF_MAP_VALUE_OFF_MAX); + else if (ret == -EEXIST) + verbose(env, "map '%s' has repeating kptr BTF tags\n", map_ptr->name); + else + verbose(env, "map '%s' has no valid kptr\n", map_ptr->name); + return -EINVAL; + } + + meta->map_ptr = map_ptr; + kptr_off = reg->off + reg->var_off.value; + off_desc = bpf_map_kptr_off_contains(map_ptr, kptr_off); + if (!off_desc) { + verbose(env, "off=%d doesn't point to kptr\n", kptr_off); + return -EACCES; + } + if (off_desc->type != BPF_KPTR_REF) { + verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off); + return -EACCES; + } + meta->kptr_off_desc = off_desc; + return 0; +} + static bool arg_type_is_mem_ptr(enum bpf_arg_type type) { return base_type(type) == ARG_PTR_TO_MEM || @@ -5433,6 +5494,7 @@ static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; +static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@ -5460,11 +5522,13 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_STACK] = &stack_ptr_types, [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, + [ARG_PTR_TO_KPTR] = &kptr_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, - const u32 *arg_btf_id) + const u32 *arg_btf_id, + struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; enum bpf_reg_type expected, type = reg->type; @@ -5517,8 +5581,11 @@ found: arg_btf_id = compatible->btf_id; } - if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, - btf_vmlinux, *arg_btf_id)) { + if (meta->func_id == BPF_FUNC_kptr_xchg) { + if (map_kptr_match_type(env, meta->kptr_off_desc, reg, regno)) + return -EACCES; + } else if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off, + btf_vmlinux, *arg_btf_id)) { verbose(env, "R%d is of type %s but %s is expected\n", regno, kernel_type_name(reg->btf, reg->btf_id), kernel_type_name(btf_vmlinux, *arg_btf_id)); @@ -5625,7 +5692,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, */ goto skip_type_check; - err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]); + err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta); if (err) return err; @@ -5801,6 +5868,9 @@ skip_type_check: verbose(env, "string is not zero-terminated\n"); return -EINVAL; } + } else if (arg_type == ARG_PTR_TO_KPTR) { + if (process_kptr_func(env, regno, meta)) + return -EACCES; } return err; @@ -6143,10 +6213,10 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn) int i; for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { - if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) + if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) return false; - if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) + if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) return false; } @@ -7012,21 +7082,25 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].btf_id = meta.ret_btf_id; } } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) { + struct btf *ret_btf; int ret_btf_id; mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; - ret_btf_id = *fn->ret_btf_id; + if (func_id == BPF_FUNC_kptr_xchg) { + ret_btf = meta.kptr_off_desc->kptr.btf; + ret_btf_id = meta.kptr_off_desc->kptr.btf_id; + } else { + ret_btf = btf_vmlinux; + ret_btf_id = *fn->ret_btf_id; + } if (ret_btf_id == 0) { verbose(env, "invalid return type %u of func %s#%d\n", base_type(ret_type), func_id_name(func_id), func_id); return -EINVAL; } - /* current BPF helper definitions are only coming from - * built-in code with type IDs from vmlinux BTF - */ - regs[BPF_REG_0].btf = btf_vmlinux; + regs[BPF_REG_0].btf = ret_btf; regs[BPF_REG_0].btf_id = ret_btf_id; } else { verbose(env, "unknown return type %u of func %s#%d\n", diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d14b10b85e51..444fe6f1cf35 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5143,6 +5143,17 @@ union bpf_attr { * The **hash_algo** is returned on success, * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. + * + * void *bpf_kptr_xchg(void *map_value, void *ptr) + * Description + * Exchange kptr at pointer *map_value* with *ptr*, and return the + * old value. *ptr* can be NULL, otherwise it must be a referenced + * pointer which will be released when this helper is called. + * Return + * The old value of kptr (which can be NULL). The returned pointer + * if not NULL, is a reference which must be released using its + * corresponding release function, or moved into a BPF map before + * program exit. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5339,6 +5350,7 @@ union bpf_attr { FN(copy_from_user_task), \ FN(skb_set_tstamp), \ FN(ima_file_hash), \ + FN(kptr_xchg), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 052e1f01bfae8be6f31b61ed3a2356edfca855dc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 Apr 2022 10:54:33 -0700 Subject: net: atm: remove support for ZeitNet ZN122x ATM devices This driver received nothing but automated fixes in the last 15 years. Since it's using virt_to_bus it's unlikely to be used on any modern platform. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- arch/mips/configs/gpr_defconfig | 1 - arch/mips/configs/mtx1_defconfig | 1 - drivers/atm/Kconfig | 20 - drivers/atm/Makefile | 1 - drivers/atm/uPD98401.h | 293 ------- drivers/atm/uPD98402.c | 266 ------ drivers/atm/uPD98402.h | 107 --- drivers/atm/zatm.c | 1652 -------------------------------------- drivers/atm/zatm.h | 104 --- include/uapi/linux/atm_zatm.h | 47 -- 10 files changed, 2492 deletions(-) delete mode 100644 drivers/atm/uPD98401.h delete mode 100644 drivers/atm/uPD98402.c delete mode 100644 drivers/atm/uPD98402.h delete mode 100644 drivers/atm/zatm.c delete mode 100644 drivers/atm/zatm.h delete mode 100644 include/uapi/linux/atm_zatm.h (limited to 'include/uapi') diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig index 7ed202db9ef0..d82f4ebf687f 100644 --- a/arch/mips/configs/gpr_defconfig +++ b/arch/mips/configs/gpr_defconfig @@ -178,7 +178,6 @@ CONFIG_NETCONSOLE=m CONFIG_ATM_TCP=m CONFIG_ATM_LANAI=m CONFIG_ATM_ENI=m -CONFIG_ATM_ZATM=m CONFIG_ATM_NICSTAR=m CONFIG_ATM_IDT77252=m CONFIG_ATM_IA=m diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index f46ad2e294fa..0cb4d9aa14d1 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -255,7 +255,6 @@ CONFIG_ARCNET_COM20020_CS=m CONFIG_ATM_TCP=m CONFIG_ATM_LANAI=m CONFIG_ATM_ENI=m -CONFIG_ATM_ZATM=m CONFIG_ATM_NICSTAR=m CONFIG_ATM_IDT77252=m CONFIG_ATM_IA=m diff --git a/drivers/atm/Kconfig b/drivers/atm/Kconfig index 9c778308722a..63cdb46a3439 100644 --- a/drivers/atm/Kconfig +++ b/drivers/atm/Kconfig @@ -146,26 +146,6 @@ config ATM_ENI_BURST_RX_2W try this if you have disabled 4W and 8W bursts. Enabling 2W if 4W or 8W are also set may or may not improve throughput. -config ATM_ZATM - tristate "ZeitNet ZN1221/ZN1225" - depends on PCI && VIRT_TO_BUS - help - Driver for the ZeitNet ZN1221 (MMF) and ZN1225 (UTP-5) 155 Mbps ATM - adapters. - - To compile this driver as a module, choose M here: the module will - be called zatm. - -config ATM_ZATM_DEBUG - bool "Enable extended debugging" - depends on ATM_ZATM - help - Extended debugging records various events and displays that list - when an inconsistency is detected. This mechanism is faster than - generally using printks, but still has some impact on performance. - Note that extended debugging may create certain race conditions - itself. Enable this ONLY if you suspect problems with the driver. - config ATM_NICSTAR tristate "IDT 77201 (NICStAR) (ForeRunnerLE)" depends on PCI diff --git a/drivers/atm/Makefile b/drivers/atm/Makefile index 1b6a8ddaf007..c9eade92019b 100644 --- a/drivers/atm/Makefile +++ b/drivers/atm/Makefile @@ -5,7 +5,6 @@ fore_200e-y := fore200e.o -obj-$(CONFIG_ATM_ZATM) += zatm.o uPD98402.o obj-$(CONFIG_ATM_NICSTAR) += nicstar.o obj-$(CONFIG_ATM_IA) += iphase.o suni.o obj-$(CONFIG_ATM_FORE200E) += fore_200e.o diff --git a/drivers/atm/uPD98401.h b/drivers/atm/uPD98401.h deleted file mode 100644 index f766a5ef0c5d..000000000000 --- a/drivers/atm/uPD98401.h +++ /dev/null @@ -1,293 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* drivers/atm/uPD98401.h - NEC uPD98401 (SAR) declarations */ - -/* Written 1995 by Werner Almesberger, EPFL LRC */ - - -#ifndef DRIVERS_ATM_uPD98401_H -#define DRIVERS_ATM_uPD98401_H - - -#define MAX_CRAM_SIZE (1 << 18) /* 2^18 words */ -#define RAM_INCREMENT 1024 /* check in 4 kB increments */ - -#define uPD98401_PORTS 0x24 /* probably more ? */ - - -/* - * Commands - */ - -#define uPD98401_OPEN_CHAN 0x20000000 /* open channel */ -#define uPD98401_CHAN_ADDR 0x0003fff8 /* channel address */ -#define uPD98401_CHAN_ADDR_SHIFT 3 -#define uPD98401_CLOSE_CHAN 0x24000000 /* close channel */ -#define uPD98401_CHAN_RT 0x02000000 /* RX/TX (0 TX, 1 RX) */ -#define uPD98401_DEACT_CHAN 0x28000000 /* deactivate channel */ -#define uPD98401_TX_READY 0x30000000 /* TX ready */ -#define uPD98401_ADD_BAT 0x34000000 /* add batches */ -#define uPD98401_POOL 0x000f0000 /* pool number */ -#define uPD98401_POOL_SHIFT 16 -#define uPD98401_POOL_NUMBAT 0x0000ffff /* number of batches */ -#define uPD98401_NOP 0x3f000000 /* NOP */ -#define uPD98401_IND_ACC 0x00000000 /* Indirect Access */ -#define uPD98401_IA_RW 0x10000000 /* Read/Write (0 W, 1 R) */ -#define uPD98401_IA_B3 0x08000000 /* Byte select, 1 enable */ -#define uPD98401_IA_B2 0x04000000 -#define uPD98401_IA_B1 0x02000000 -#define uPD98401_IA_B0 0x01000000 -#define uPD98401_IA_BALL 0x0f000000 /* whole longword */ -#define uPD98401_IA_TGT 0x000c0000 /* Target */ -#define uPD98401_IA_TGT_SHIFT 18 -#define uPD98401_IA_TGT_CM 0 /* - Control Memory */ -#define uPD98401_IA_TGT_SAR 1 /* - uPD98401 registers */ -#define uPD98401_IA_TGT_PHY 3 /* - PHY device */ -#define uPD98401_IA_ADDR 0x0003ffff - -/* - * Command Register Status - */ - -#define uPD98401_BUSY 0x80000000 /* SAR is busy */ -#define uPD98401_LOCKED 0x40000000 /* SAR is locked by other CPU */ - -/* - * Indications - */ - -/* Normal (AAL5) Receive Indication */ -#define uPD98401_AAL5_UINFO 0xffff0000 /* user-supplied information */ -#define uPD98401_AAL5_UINFO_SHIFT 16 -#define uPD98401_AAL5_SIZE 0x0000ffff /* PDU size (in _CELLS_ !!) */ -#define uPD98401_AAL5_CHAN 0x7fff0000 /* Channel number */ -#define uPD98401_AAL5_CHAN_SHIFT 16 -#define uPD98401_AAL5_ERR 0x00008000 /* Error indication */ -#define uPD98401_AAL5_CI 0x00004000 /* Congestion Indication */ -#define uPD98401_AAL5_CLP 0x00002000 /* CLP (>= 1 cell had CLP=1) */ -#define uPD98401_AAL5_ES 0x00000f00 /* Error Status */ -#define uPD98401_AAL5_ES_SHIFT 8 -#define uPD98401_AAL5_ES_NONE 0 /* No error */ -#define uPD98401_AAL5_ES_FREE 1 /* Receiver free buf underflow */ -#define uPD98401_AAL5_ES_FIFO 2 /* Receiver FIFO overrun */ -#define uPD98401_AAL5_ES_TOOBIG 3 /* Maximum length violation */ -#define uPD98401_AAL5_ES_CRC 4 /* CRC error */ -#define uPD98401_AAL5_ES_ABORT 5 /* User abort */ -#define uPD98401_AAL5_ES_LENGTH 6 /* Length violation */ -#define uPD98401_AAL5_ES_T1 7 /* T1 error (timeout) */ -#define uPD98401_AAL5_ES_DEACT 8 /* Deactivated with DEACT_CHAN */ -#define uPD98401_AAL5_POOL 0x0000001f /* Free buffer pool number */ - -/* Raw Cell Indication */ -#define uPD98401_RAW_UINFO uPD98401_AAL5_UINFO -#define uPD98401_RAW_UINFO_SHIFT uPD98401_AAL5_UINFO_SHIFT -#define uPD98401_RAW_HEC 0x000000ff /* HEC */ -#define uPD98401_RAW_CHAN uPD98401_AAL5_CHAN -#define uPD98401_RAW_CHAN_SHIFT uPD98401_AAL5_CHAN_SHIFT - -/* Transmit Indication */ -#define uPD98401_TXI_CONN 0x7fff0000 /* Connection Number */ -#define uPD98401_TXI_CONN_SHIFT 16 -#define uPD98401_TXI_ACTIVE 0x00008000 /* Channel remains active */ -#define uPD98401_TXI_PQP 0x00007fff /* Packet Queue Pointer */ - -/* - * Directly Addressable Registers - */ - -#define uPD98401_GMR 0x00 /* General Mode Register */ -#define uPD98401_GSR 0x01 /* General Status Register */ -#define uPD98401_IMR 0x02 /* Interrupt Mask Register */ -#define uPD98401_RQU 0x03 /* Receive Queue Underrun */ -#define uPD98401_RQA 0x04 /* Receive Queue Alert */ -#define uPD98401_ADDR 0x05 /* Last Burst Address */ -#define uPD98401_VER 0x06 /* Version Number */ -#define uPD98401_SWR 0x07 /* Software Reset */ -#define uPD98401_CMR 0x08 /* Command Register */ -#define uPD98401_CMR_L 0x09 /* Command Register and Lock/Unlock */ -#define uPD98401_CER 0x0a /* Command Extension Register */ -#define uPD98401_CER_L 0x0b /* Command Ext Reg and Lock/Unlock */ - -#define uPD98401_MSH(n) (0x10+(n)) /* Mailbox n Start Address High */ -#define uPD98401_MSL(n) (0x14+(n)) /* Mailbox n Start Address High */ -#define uPD98401_MBA(n) (0x18+(n)) /* Mailbox n Bottom Address */ -#define uPD98401_MTA(n) (0x1c+(n)) /* Mailbox n Tail Address */ -#define uPD98401_MWA(n) (0x20+(n)) /* Mailbox n Write Address */ - -/* GMR is at 0x00 */ -#define uPD98401_GMR_ONE 0x80000000 /* Must be set to one */ -#define uPD98401_GMR_SLM 0x40000000 /* Address mode (0 word, 1 byte) */ -#define uPD98401_GMR_CPE 0x00008000 /* Control Memory Parity Enable */ -#define uPD98401_GMR_LP 0x00004000 /* Loopback */ -#define uPD98401_GMR_WA 0x00002000 /* Early Bus Write Abort/RDY */ -#define uPD98401_GMR_RA 0x00001000 /* Early Read Abort/RDY */ -#define uPD98401_GMR_SZ 0x00000f00 /* Burst Size Enable */ -#define uPD98401_BURST16 0x00000800 /* 16-word burst */ -#define uPD98401_BURST8 0x00000400 /* 8-word burst */ -#define uPD98401_BURST4 0x00000200 /* 4-word burst */ -#define uPD98401_BURST2 0x00000100 /* 2-word burst */ -#define uPD98401_GMR_AD 0x00000080 /* Address (burst resolution) Disable */ -#define uPD98401_GMR_BO 0x00000040 /* Byte Order (0 little, 1 big) */ -#define uPD98401_GMR_PM 0x00000020 /* Bus Parity Mode (0 byte, 1 word)*/ -#define uPD98401_GMR_PC 0x00000010 /* Bus Parity Control (0even,1odd) */ -#define uPD98401_GMR_BPE 0x00000008 /* Bus Parity Enable */ -#define uPD98401_GMR_DR 0x00000004 /* Receive Drop Mode (0drop,1don't)*/ -#define uPD98401_GMR_SE 0x00000002 /* Shapers Enable */ -#define uPD98401_GMR_RE 0x00000001 /* Receiver Enable */ - -/* GSR is at 0x01, IMR is at 0x02 */ -#define uPD98401_INT_PI 0x80000000 /* PHY interrupt */ -#define uPD98401_INT_RQA 0x40000000 /* Receive Queue Alert */ -#define uPD98401_INT_RQU 0x20000000 /* Receive Queue Underrun */ -#define uPD98401_INT_RD 0x10000000 /* Receiver Deactivated */ -#define uPD98401_INT_SPE 0x08000000 /* System Parity Error */ -#define uPD98401_INT_CPE 0x04000000 /* Control Memory Parity Error */ -#define uPD98401_INT_SBE 0x02000000 /* System Bus Error */ -#define uPD98401_INT_IND 0x01000000 /* Initialization Done */ -#define uPD98401_INT_RCR 0x0000ff00 /* Raw Cell Received */ -#define uPD98401_INT_RCR_SHIFT 8 -#define uPD98401_INT_MF 0x000000f0 /* Mailbox Full */ -#define uPD98401_INT_MF_SHIFT 4 -#define uPD98401_INT_MM 0x0000000f /* Mailbox Modified */ - -/* VER is at 0x06 */ -#define uPD98401_MAJOR 0x0000ff00 /* Major revision */ -#define uPD98401_MAJOR_SHIFT 8 -#define uPD98401_MINOR 0x000000ff /* Minor revision */ - -/* - * Indirectly Addressable Registers - */ - -#define uPD98401_IM(n) (0x40000+(n)) /* Scheduler n I and M */ -#define uPD98401_X(n) (0x40010+(n)) /* Scheduler n X */ -#define uPD98401_Y(n) (0x40020+(n)) /* Scheduler n Y */ -#define uPD98401_PC(n) (0x40030+(n)) /* Scheduler n P, C, p and c */ -#define uPD98401_PS(n) (0x40040+(n)) /* Scheduler n priority and status */ - -/* IM contents */ -#define uPD98401_IM_I 0xff000000 /* I */ -#define uPD98401_IM_I_SHIFT 24 -#define uPD98401_IM_M 0x00ffffff /* M */ - -/* PC contents */ -#define uPD98401_PC_P 0xff000000 /* P */ -#define uPD98401_PC_P_SHIFT 24 -#define uPD98401_PC_C 0x00ff0000 /* C */ -#define uPD98401_PC_C_SHIFT 16 -#define uPD98401_PC_p 0x0000ff00 /* p */ -#define uPD98401_PC_p_SHIFT 8 -#define uPD98401_PC_c 0x000000ff /* c */ - -/* PS contents */ -#define uPD98401_PS_PRIO 0xf0 /* Priority level (0 high, 15 low) */ -#define uPD98401_PS_PRIO_SHIFT 4 -#define uPD98401_PS_S 0x08 /* Scan - must be 0 (internal) */ -#define uPD98401_PS_R 0x04 /* Round Robin (internal) */ -#define uPD98401_PS_A 0x02 /* Active (internal) */ -#define uPD98401_PS_E 0x01 /* Enabled */ - -#define uPD98401_TOS 0x40100 /* Top of Stack Control Memory Address */ -#define uPD98401_SMA 0x40200 /* Shapers Control Memory Start Address */ -#define uPD98401_PMA 0x40201 /* Receive Pool Control Memory Start Address */ -#define uPD98401_T1R 0x40300 /* T1 Register */ -#define uPD98401_VRR 0x40301 /* VPI/VCI Reduction Register/Recv. Shutdown */ -#define uPD98401_TSR 0x40302 /* Time-Stamp Register */ - -/* VRR is at 0x40301 */ -#define uPD98401_VRR_SDM 0x80000000 /* Shutdown Mode */ -#define uPD98401_VRR_SHIFT 0x000f0000 /* VPI/VCI Shift */ -#define uPD98401_VRR_SHIFT_SHIFT 16 -#define uPD98401_VRR_MASK 0x0000ffff /* VPI/VCI mask */ - -/* - * TX packet descriptor - */ - -#define uPD98401_TXPD_SIZE 16 /* descriptor size (in bytes) */ - -#define uPD98401_TXPD_V 0x80000000 /* Valid bit */ -#define uPD98401_TXPD_DP 0x40000000 /* Descriptor (1) or Pointer (0) */ -#define uPD98401_TXPD_SM 0x20000000 /* Single (1) or Multiple (0) */ -#define uPD98401_TXPD_CLPM 0x18000000 /* CLP mode */ -#define uPD98401_CLPM_0 0 /* 00 CLP = 0 */ -#define uPD98401_CLPM_1 3 /* 11 CLP = 1 */ -#define uPD98401_CLPM_LAST 1 /* 01 CLP unless last cell */ -#define uPD98401_TXPD_CLPM_SHIFT 27 -#define uPD98401_TXPD_PTI 0x07000000 /* PTI pattern */ -#define uPD98401_TXPD_PTI_SHIFT 24 -#define uPD98401_TXPD_GFC 0x00f00000 /* GFC pattern */ -#define uPD98401_TXPD_GFC_SHIFT 20 -#define uPD98401_TXPD_C10 0x00040000 /* insert CRC-10 */ -#define uPD98401_TXPD_AAL5 0x00020000 /* AAL5 processing */ -#define uPD98401_TXPD_MB 0x00010000 /* TX mailbox number */ -#define uPD98401_TXPD_UU 0x0000ff00 /* CPCS-UU */ -#define uPD98401_TXPD_UU_SHIFT 8 -#define uPD98401_TXPD_CPI 0x000000ff /* CPI */ - -/* - * TX buffer descriptor - */ - -#define uPD98401_TXBD_SIZE 8 /* descriptor size (in bytes) */ - -#define uPD98401_TXBD_LAST 0x80000000 /* last buffer in packet */ - -/* - * TX VC table - */ - -/* 1st word has the same structure as in a TX packet descriptor */ -#define uPD98401_TXVC_L 0x80000000 /* last buffer */ -#define uPD98401_TXVC_SHP 0x0f000000 /* shaper number */ -#define uPD98401_TXVC_SHP_SHIFT 24 -#define uPD98401_TXVC_VPI 0x00ff0000 /* VPI */ -#define uPD98401_TXVC_VPI_SHIFT 16 -#define uPD98401_TXVC_VCI 0x0000ffff /* VCI */ -#define uPD98401_TXVC_QRP 6 /* Queue Read Pointer is in word 6 */ - -/* - * RX free buffer pools descriptor - */ - -#define uPD98401_RXFP_ALERT 0x70000000 /* low water mark */ -#define uPD98401_RXFP_ALERT_SHIFT 28 -#define uPD98401_RXFP_BFSZ 0x0f000000 /* buffer size, 64*2^n */ -#define uPD98401_RXFP_BFSZ_SHIFT 24 -#define uPD98401_RXFP_BTSZ 0x00ff0000 /* batch size, n+1 */ -#define uPD98401_RXFP_BTSZ_SHIFT 16 -#define uPD98401_RXFP_REMAIN 0x0000ffff /* remaining batches in pool */ - -/* - * RX VC table - */ - -#define uPD98401_RXVC_BTSZ 0xff000000 /* remaining free buffers in batch */ -#define uPD98401_RXVC_BTSZ_SHIFT 24 -#define uPD98401_RXVC_MB 0x00200000 /* RX mailbox number */ -#define uPD98401_RXVC_POOL 0x001f0000 /* free buffer pool number */ -#define uPD98401_RXVC_POOL_SHIFT 16 -#define uPD98401_RXVC_UINFO 0x0000ffff /* user-supplied information */ -#define uPD98401_RXVC_T1 0xffff0000 /* T1 timestamp */ -#define uPD98401_RXVC_T1_SHIFT 16 -#define uPD98401_RXVC_PR 0x00008000 /* Packet Reception, 1 if busy */ -#define uPD98401_RXVC_DR 0x00004000 /* FIFO Drop */ -#define uPD98401_RXVC_OD 0x00001000 /* Drop OAM cells */ -#define uPD98401_RXVC_AR 0x00000800 /* AAL5 or raw cell; 1 if AAL5 */ -#define uPD98401_RXVC_MAXSEG 0x000007ff /* max number of segments per PDU */ -#define uPD98401_RXVC_REM 0xfffe0000 /* remaining words in curr buffer */ -#define uPD98401_RXVC_REM_SHIFT 17 -#define uPD98401_RXVC_CLP 0x00010000 /* CLP received */ -#define uPD98401_RXVC_BFA 0x00008000 /* Buffer Assigned */ -#define uPD98401_RXVC_BTA 0x00004000 /* Batch Assigned */ -#define uPD98401_RXVC_CI 0x00002000 /* Congestion Indication */ -#define uPD98401_RXVC_DD 0x00001000 /* Dropping incoming cells */ -#define uPD98401_RXVC_DP 0x00000800 /* like PR ? */ -#define uPD98401_RXVC_CURSEG 0x000007ff /* Current Segment count */ - -/* - * RX lookup table - */ - -#define uPD98401_RXLT_ENBL 0x8000 /* Enable */ - -#endif diff --git a/drivers/atm/uPD98402.c b/drivers/atm/uPD98402.c deleted file mode 100644 index 239852d85558..000000000000 --- a/drivers/atm/uPD98402.c +++ /dev/null @@ -1,266 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* drivers/atm/uPD98402.c - NEC uPD98402 (PHY) declarations */ - -/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "uPD98402.h" - - -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - - -struct uPD98402_priv { - struct k_sonet_stats sonet_stats;/* link diagnostics */ - unsigned char framing; /* SONET/SDH framing */ - int loop_mode; /* loopback mode */ - spinlock_t lock; -}; - - -#define PRIV(dev) ((struct uPD98402_priv *) dev->phy_data) - -#define PUT(val,reg) dev->ops->phy_put(dev,val,uPD98402_##reg) -#define GET(reg) dev->ops->phy_get(dev,uPD98402_##reg) - - -static int fetch_stats(struct atm_dev *dev,struct sonet_stats __user *arg,int zero) -{ - struct sonet_stats tmp; - int error = 0; - - atomic_add(GET(HECCT),&PRIV(dev)->sonet_stats.uncorr_hcs); - sonet_copy_stats(&PRIV(dev)->sonet_stats,&tmp); - if (arg) error = copy_to_user(arg,&tmp,sizeof(tmp)); - if (zero && !error) { - /* unused fields are reported as -1, but we must not "adjust" - them */ - tmp.corr_hcs = tmp.tx_cells = tmp.rx_cells = 0; - sonet_subtract_stats(&PRIV(dev)->sonet_stats,&tmp); - } - return error ? -EFAULT : 0; -} - - -static int set_framing(struct atm_dev *dev,unsigned char framing) -{ - static const unsigned char sonet[] = { 1,2,3,0 }; - static const unsigned char sdh[] = { 1,0,0,2 }; - const char *set; - unsigned long flags; - - switch (framing) { - case SONET_FRAME_SONET: - set = sonet; - break; - case SONET_FRAME_SDH: - set = sdh; - break; - default: - return -EINVAL; - } - spin_lock_irqsave(&PRIV(dev)->lock, flags); - PUT(set[0],C11T); - PUT(set[1],C12T); - PUT(set[2],C13T); - PUT((GET(MDR) & ~uPD98402_MDR_SS_MASK) | (set[3] << - uPD98402_MDR_SS_SHIFT),MDR); - spin_unlock_irqrestore(&PRIV(dev)->lock, flags); - return 0; -} - - -static int get_sense(struct atm_dev *dev,u8 __user *arg) -{ - unsigned long flags; - unsigned char s[3]; - - spin_lock_irqsave(&PRIV(dev)->lock, flags); - s[0] = GET(C11R); - s[1] = GET(C12R); - s[2] = GET(C13R); - spin_unlock_irqrestore(&PRIV(dev)->lock, flags); - return (put_user(s[0], arg) || put_user(s[1], arg+1) || - put_user(s[2], arg+2) || put_user(0xff, arg+3) || - put_user(0xff, arg+4) || put_user(0xff, arg+5)) ? -EFAULT : 0; -} - - -static int set_loopback(struct atm_dev *dev,int mode) -{ - unsigned char mode_reg; - - mode_reg = GET(MDR) & ~(uPD98402_MDR_TPLP | uPD98402_MDR_ALP | - uPD98402_MDR_RPLP); - switch (__ATM_LM_XTLOC(mode)) { - case __ATM_LM_NONE: - break; - case __ATM_LM_PHY: - mode_reg |= uPD98402_MDR_TPLP; - break; - case __ATM_LM_ATM: - mode_reg |= uPD98402_MDR_ALP; - break; - default: - return -EINVAL; - } - switch (__ATM_LM_XTRMT(mode)) { - case __ATM_LM_NONE: - break; - case __ATM_LM_PHY: - mode_reg |= uPD98402_MDR_RPLP; - break; - default: - return -EINVAL; - } - PUT(mode_reg,MDR); - PRIV(dev)->loop_mode = mode; - return 0; -} - - -static int uPD98402_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg) -{ - switch (cmd) { - - case SONET_GETSTATZ: - case SONET_GETSTAT: - return fetch_stats(dev,arg, cmd == SONET_GETSTATZ); - case SONET_SETFRAMING: - return set_framing(dev, (int)(unsigned long)arg); - case SONET_GETFRAMING: - return put_user(PRIV(dev)->framing,(int __user *)arg) ? - -EFAULT : 0; - case SONET_GETFRSENSE: - return get_sense(dev,arg); - case ATM_SETLOOP: - return set_loopback(dev, (int)(unsigned long)arg); - case ATM_GETLOOP: - return put_user(PRIV(dev)->loop_mode,(int __user *)arg) ? - -EFAULT : 0; - case ATM_QUERYLOOP: - return put_user(ATM_LM_LOC_PHY | ATM_LM_LOC_ATM | - ATM_LM_RMT_PHY,(int __user *)arg) ? -EFAULT : 0; - default: - return -ENOIOCTLCMD; - } -} - - -#define ADD_LIMITED(s,v) \ - { atomic_add(GET(v),&PRIV(dev)->sonet_stats.s); \ - if (atomic_read(&PRIV(dev)->sonet_stats.s) < 0) \ - atomic_set(&PRIV(dev)->sonet_stats.s,INT_MAX); } - - -static void stat_event(struct atm_dev *dev) -{ - unsigned char events; - - events = GET(PCR); - if (events & uPD98402_PFM_PFEB) ADD_LIMITED(path_febe,PFECB); - if (events & uPD98402_PFM_LFEB) ADD_LIMITED(line_febe,LECCT); - if (events & uPD98402_PFM_B3E) ADD_LIMITED(path_bip,B3ECT); - if (events & uPD98402_PFM_B2E) ADD_LIMITED(line_bip,B2ECT); - if (events & uPD98402_PFM_B1E) ADD_LIMITED(section_bip,B1ECT); -} - - -#undef ADD_LIMITED - - -static void uPD98402_int(struct atm_dev *dev) -{ - static unsigned long silence = 0; - unsigned char reason; - - while ((reason = GET(PICR))) { - if (reason & uPD98402_INT_LOS) - printk(KERN_NOTICE "%s(itf %d): signal lost\n", - dev->type,dev->number); - if (reason & uPD98402_INT_PFM) stat_event(dev); - if (reason & uPD98402_INT_PCO) { - (void) GET(PCOCR); /* clear interrupt cause */ - atomic_add(GET(HECCT), - &PRIV(dev)->sonet_stats.uncorr_hcs); - } - if ((reason & uPD98402_INT_RFO) && - (time_after(jiffies, silence) || silence == 0)) { - printk(KERN_WARNING "%s(itf %d): uPD98402 receive " - "FIFO overflow\n",dev->type,dev->number); - silence = (jiffies+HZ/2)|1; - } - } -} - - -static int uPD98402_start(struct atm_dev *dev) -{ - DPRINTK("phy_start\n"); - if (!(dev->phy_data = kmalloc(sizeof(struct uPD98402_priv),GFP_KERNEL))) - return -ENOMEM; - spin_lock_init(&PRIV(dev)->lock); - memset(&PRIV(dev)->sonet_stats,0,sizeof(struct k_sonet_stats)); - (void) GET(PCR); /* clear performance events */ - PUT(uPD98402_PFM_FJ,PCMR); /* ignore frequency adj */ - (void) GET(PCOCR); /* clear overflows */ - PUT(~uPD98402_PCO_HECC,PCOMR); - (void) GET(PICR); /* clear interrupts */ - PUT(~(uPD98402_INT_PFM | uPD98402_INT_ALM | uPD98402_INT_RFO | - uPD98402_INT_LOS),PIMR); /* enable them */ - (void) fetch_stats(dev,NULL,1); /* clear kernel counters */ - atomic_set(&PRIV(dev)->sonet_stats.corr_hcs,-1); - atomic_set(&PRIV(dev)->sonet_stats.tx_cells,-1); - atomic_set(&PRIV(dev)->sonet_stats.rx_cells,-1); - return 0; -} - - -static int uPD98402_stop(struct atm_dev *dev) -{ - /* let SAR driver worry about stopping interrupts */ - kfree(PRIV(dev)); - return 0; -} - - -static const struct atmphy_ops uPD98402_ops = { - .start = uPD98402_start, - .ioctl = uPD98402_ioctl, - .interrupt = uPD98402_int, - .stop = uPD98402_stop, -}; - - -int uPD98402_init(struct atm_dev *dev) -{ -DPRINTK("phy_init\n"); - dev->phy = &uPD98402_ops; - return 0; -} - - -MODULE_LICENSE("GPL"); - -EXPORT_SYMBOL(uPD98402_init); - -static __init int uPD98402_module_init(void) -{ - return 0; -} -module_init(uPD98402_module_init); -/* module_exit not defined so not unloadable */ diff --git a/drivers/atm/uPD98402.h b/drivers/atm/uPD98402.h deleted file mode 100644 index 437cfaa20c96..000000000000 --- a/drivers/atm/uPD98402.h +++ /dev/null @@ -1,107 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* drivers/atm/uPD98402.h - NEC uPD98402 (PHY) declarations */ - -/* Written 1995 by Werner Almesberger, EPFL LRC */ - - -#ifndef DRIVERS_ATM_uPD98402_H -#define DRIVERS_ATM_uPD98402_H - -/* - * Registers - */ - -#define uPD98402_CMR 0x00 /* Command Register */ -#define uPD98402_MDR 0x01 /* Mode Register */ -#define uPD98402_PICR 0x02 /* PHY Interrupt Cause Register */ -#define uPD98402_PIMR 0x03 /* PHY Interrupt Mask Register */ -#define uPD98402_ACR 0x04 /* Alarm Cause Register */ -#define uPD98402_ACMR 0x05 /* Alarm Cause Mask Register */ -#define uPD98402_PCR 0x06 /* Performance Cause Register */ -#define uPD98402_PCMR 0x07 /* Performance Cause Mask Register */ -#define uPD98402_IACM 0x08 /* Internal Alarm Cause Mask Register */ -#define uPD98402_B1ECT 0x09 /* B1 Error Count Register */ -#define uPD98402_B2ECT 0x0a /* B2 Error Count Register */ -#define uPD98402_B3ECT 0x0b /* B3 Error Count Regster */ -#define uPD98402_PFECB 0x0c /* Path FEBE Count Register */ -#define uPD98402_LECCT 0x0d /* Line FEBE Count Register */ -#define uPD98402_HECCT 0x0e /* HEC Error Count Register */ -#define uPD98402_FJCT 0x0f /* Frequence Justification Count Reg */ -#define uPD98402_PCOCR 0x10 /* Perf. Counter Overflow Cause Reg */ -#define uPD98402_PCOMR 0x11 /* Perf. Counter Overflow Mask Reg */ -#define uPD98402_C11T 0x20 /* C11T Data Register */ -#define uPD98402_C12T 0x21 /* C12T Data Register */ -#define uPD98402_C13T 0x22 /* C13T Data Register */ -#define uPD98402_F1T 0x23 /* F1T Data Register */ -#define uPD98402_K2T 0x25 /* K2T Data Register */ -#define uPD98402_C2T 0x26 /* C2T Data Register */ -#define uPD98402_F2T 0x27 /* F2T Data Register */ -#define uPD98402_C11R 0x30 /* C11T Data Register */ -#define uPD98402_C12R 0x31 /* C12T Data Register */ -#define uPD98402_C13R 0x32 /* C13T Data Register */ -#define uPD98402_F1R 0x33 /* F1T Data Register */ -#define uPD98402_K2R 0x35 /* K2T Data Register */ -#define uPD98402_C2R 0x36 /* C2T Data Register */ -#define uPD98402_F2R 0x37 /* F2T Data Register */ - -/* CMR is at 0x00 */ -#define uPD98402_CMR_PFRF 0x01 /* Send path FERF */ -#define uPD98402_CMR_LFRF 0x02 /* Send line FERF */ -#define uPD98402_CMR_PAIS 0x04 /* Send path AIS */ -#define uPD98402_CMR_LAIS 0x08 /* Send line AIS */ - -/* MDR is at 0x01 */ -#define uPD98402_MDR_ALP 0x01 /* ATM layer loopback */ -#define uPD98402_MDR_TPLP 0x02 /* PMD loopback, to host */ -#define uPD98402_MDR_RPLP 0x04 /* PMD loopback, to network */ -#define uPD98402_MDR_SS0 0x08 /* SS0 */ -#define uPD98402_MDR_SS1 0x10 /* SS1 */ -#define uPD98402_MDR_SS_MASK 0x18 /* mask */ -#define uPD98402_MDR_SS_SHIFT 3 /* shift */ -#define uPD98402_MDR_HEC 0x20 /* disable HEC inbound processing */ -#define uPD98402_MDR_FSR 0x40 /* disable frame scrambler */ -#define uPD98402_MDR_CSR 0x80 /* disable cell scrambler */ - -/* PICR is at 0x02, PIMR is at 0x03 */ -#define uPD98402_INT_PFM 0x01 /* performance counter has changed */ -#define uPD98402_INT_ALM 0x02 /* line fault */ -#define uPD98402_INT_RFO 0x04 /* receive FIFO overflow */ -#define uPD98402_INT_PCO 0x08 /* performance counter overflow */ -#define uPD98402_INT_OTD 0x20 /* OTD has occurred */ -#define uPD98402_INT_LOS 0x40 /* Loss Of Signal */ -#define uPD98402_INT_LOF 0x80 /* Loss Of Frame */ - -/* ACR is as 0x04, ACMR is at 0x05 */ -#define uPD98402_ALM_PFRF 0x01 /* path FERF */ -#define uPD98402_ALM_LFRF 0x02 /* line FERF */ -#define uPD98402_ALM_PAIS 0x04 /* path AIS */ -#define uPD98402_ALM_LAIS 0x08 /* line AIS */ -#define uPD98402_ALM_LOD 0x10 /* loss of delineation */ -#define uPD98402_ALM_LOP 0x20 /* loss of pointer */ -#define uPD98402_ALM_OOF 0x40 /* out of frame */ - -/* PCR is at 0x06, PCMR is at 0x07 */ -#define uPD98402_PFM_PFEB 0x01 /* path FEBE */ -#define uPD98402_PFM_LFEB 0x02 /* line FEBE */ -#define uPD98402_PFM_B3E 0x04 /* B3 error */ -#define uPD98402_PFM_B2E 0x08 /* B2 error */ -#define uPD98402_PFM_B1E 0x10 /* B1 error */ -#define uPD98402_PFM_FJ 0x20 /* frequency justification */ - -/* IACM is at 0x08 */ -#define uPD98402_IACM_PFRF 0x01 /* don't generate path FERF */ -#define uPD98402_IACM_LFRF 0x02 /* don't generate line FERF */ - -/* PCOCR is at 0x010, PCOMR is at 0x11 */ -#define uPD98402_PCO_B1EC 0x01 /* B1ECT overflow */ -#define uPD98402_PCO_B2EC 0x02 /* B2ECT overflow */ -#define uPD98402_PCO_B3EC 0x04 /* B3ECT overflow */ -#define uPD98402_PCO_PFBC 0x08 /* PFEBC overflow */ -#define uPD98402_PCO_LFBC 0x10 /* LFEVC overflow */ -#define uPD98402_PCO_HECC 0x20 /* HECCT overflow */ -#define uPD98402_PCO_FJC 0x40 /* FJCT overflow */ - - -int uPD98402_init(struct atm_dev *dev); - -#endif diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c deleted file mode 100644 index cf5fffcf98a1..000000000000 --- a/drivers/atm/zatm.c +++ /dev/null @@ -1,1652 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* drivers/atm/zatm.c - ZeitNet ZN122x device driver */ - -/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "uPD98401.h" -#include "uPD98402.h" -#include "zeprom.h" -#include "zatm.h" - - -/* - * TODO: - * - * Minor features - * - support 64 kB SDUs (will have to use multibuffer batches then :-( ) - * - proper use of CDV, credit = max(1,CDVT*PCR) - * - AAL0 - * - better receive timestamps - * - OAM - */ - -#define ZATM_COPPER 1 - -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - -#ifndef CONFIG_ATM_ZATM_DEBUG - - -#define NULLCHECK(x) - -#define EVENT(s,a,b) - - -static void event_dump(void) -{ -} - - -#else - - -/* - * NULL pointer checking - */ - -#define NULLCHECK(x) \ - if ((unsigned long) (x) < 0x30) printk(KERN_CRIT #x "==0x%x\n", (int) (x)) - -/* - * Very extensive activity logging. Greatly improves bug detection speed but - * costs a few Mbps if enabled. - */ - -#define EV 64 - -static const char *ev[EV]; -static unsigned long ev_a[EV],ev_b[EV]; -static int ec = 0; - - -static void EVENT(const char *s,unsigned long a,unsigned long b) -{ - ev[ec] = s; - ev_a[ec] = a; - ev_b[ec] = b; - ec = (ec+1) % EV; -} - - -static void event_dump(void) -{ - int n,i; - - printk(KERN_NOTICE "----- event dump follows -----\n"); - for (n = 0; n < EV; n++) { - i = (ec+n) % EV; - printk(KERN_NOTICE); - printk(ev[i] ? ev[i] : "(null)",ev_a[i],ev_b[i]); - } - printk(KERN_NOTICE "----- event dump ends here -----\n"); -} - - -#endif /* CONFIG_ATM_ZATM_DEBUG */ - - -#define RING_BUSY 1 /* indication from do_tx that PDU has to be - backlogged */ - -static struct atm_dev *zatm_boards = NULL; -static unsigned long dummy[2] = {0,0}; - - -#define zin_n(r) inl(zatm_dev->base+r*4) -#define zin(r) inl(zatm_dev->base+uPD98401_##r*4) -#define zout(v,r) outl(v,zatm_dev->base+uPD98401_##r*4) -#define zwait() do {} while (zin(CMR) & uPD98401_BUSY) - -/* RX0, RX1, TX0, TX1 */ -static const int mbx_entries[NR_MBX] = { 1024,1024,1024,1024 }; -static const int mbx_esize[NR_MBX] = { 16,16,4,4 }; /* entry size in bytes */ - -#define MBX_SIZE(i) (mbx_entries[i]*mbx_esize[i]) - - -/*-------------------------------- utilities --------------------------------*/ - - -static void zpokel(struct zatm_dev *zatm_dev,u32 value,u32 addr) -{ - zwait(); - zout(value,CER); - zout(uPD98401_IND_ACC | uPD98401_IA_BALL | - (uPD98401_IA_TGT_CM << uPD98401_IA_TGT_SHIFT) | addr,CMR); -} - - -static u32 zpeekl(struct zatm_dev *zatm_dev,u32 addr) -{ - zwait(); - zout(uPD98401_IND_ACC | uPD98401_IA_BALL | uPD98401_IA_RW | - (uPD98401_IA_TGT_CM << uPD98401_IA_TGT_SHIFT) | addr,CMR); - zwait(); - return zin(CER); -} - - -/*------------------------------- free lists --------------------------------*/ - - -/* - * Free buffer head structure: - * [0] pointer to buffer (for SAR) - * [1] buffer descr link pointer (for SAR) - * [2] back pointer to skb (for poll_rx) - * [3] data - * ... - */ - -struct rx_buffer_head { - u32 buffer; /* pointer to buffer (for SAR) */ - u32 link; /* buffer descriptor link pointer (for SAR) */ - struct sk_buff *skb; /* back pointer to skb (for poll_rx) */ -}; - - -static void refill_pool(struct atm_dev *dev,int pool) -{ - struct zatm_dev *zatm_dev; - struct sk_buff *skb; - struct rx_buffer_head *first; - unsigned long flags; - int align,offset,free,count,size; - - EVENT("refill_pool\n",0,0); - zatm_dev = ZATM_DEV(dev); - size = (64 << (pool <= ZATM_AAL5_POOL_BASE ? 0 : - pool-ZATM_AAL5_POOL_BASE))+sizeof(struct rx_buffer_head); - if (size < PAGE_SIZE) { - align = 32; /* for 32 byte alignment */ - offset = sizeof(struct rx_buffer_head); - } - else { - align = 4096; - offset = zatm_dev->pool_info[pool].offset+ - sizeof(struct rx_buffer_head); - } - size += align; - spin_lock_irqsave(&zatm_dev->lock, flags); - free = zpeekl(zatm_dev,zatm_dev->pool_base+2*pool) & - uPD98401_RXFP_REMAIN; - spin_unlock_irqrestore(&zatm_dev->lock, flags); - if (free >= zatm_dev->pool_info[pool].low_water) return; - EVENT("starting ... POOL: 0x%x, 0x%x\n", - zpeekl(zatm_dev,zatm_dev->pool_base+2*pool), - zpeekl(zatm_dev,zatm_dev->pool_base+2*pool+1)); - EVENT("dummy: 0x%08lx, 0x%08lx\n",dummy[0],dummy[1]); - count = 0; - first = NULL; - while (free < zatm_dev->pool_info[pool].high_water) { - struct rx_buffer_head *head; - - skb = alloc_skb(size,GFP_ATOMIC); - if (!skb) { - printk(KERN_WARNING DEV_LABEL "(Itf %d): got no new " - "skb (%d) with %d free\n",dev->number,size,free); - break; - } - skb_reserve(skb,(unsigned char *) ((((unsigned long) skb->data+ - align+offset-1) & ~(unsigned long) (align-1))-offset)- - skb->data); - head = (struct rx_buffer_head *) skb->data; - skb_reserve(skb,sizeof(struct rx_buffer_head)); - if (!first) first = head; - count++; - head->buffer = virt_to_bus(skb->data); - head->link = 0; - head->skb = skb; - EVENT("enq skb 0x%08lx/0x%08lx\n",(unsigned long) skb, - (unsigned long) head); - spin_lock_irqsave(&zatm_dev->lock, flags); - if (zatm_dev->last_free[pool]) - ((struct rx_buffer_head *) (zatm_dev->last_free[pool]-> - data))[-1].link = virt_to_bus(head); - zatm_dev->last_free[pool] = skb; - skb_queue_tail(&zatm_dev->pool[pool],skb); - spin_unlock_irqrestore(&zatm_dev->lock, flags); - free++; - } - if (first) { - spin_lock_irqsave(&zatm_dev->lock, flags); - zwait(); - zout(virt_to_bus(first),CER); - zout(uPD98401_ADD_BAT | (pool << uPD98401_POOL_SHIFT) | count, - CMR); - spin_unlock_irqrestore(&zatm_dev->lock, flags); - EVENT ("POOL: 0x%x, 0x%x\n", - zpeekl(zatm_dev,zatm_dev->pool_base+2*pool), - zpeekl(zatm_dev,zatm_dev->pool_base+2*pool+1)); - EVENT("dummy: 0x%08lx, 0x%08lx\n",dummy[0],dummy[1]); - } -} - - -static void drain_free(struct atm_dev *dev,int pool) -{ - skb_queue_purge(&ZATM_DEV(dev)->pool[pool]); -} - - -static int pool_index(int max_pdu) -{ - int i; - - if (max_pdu % ATM_CELL_PAYLOAD) - printk(KERN_ERR DEV_LABEL ": driver error in pool_index: " - "max_pdu is %d\n",max_pdu); - if (max_pdu > 65536) return -1; - for (i = 0; (64 << i) < max_pdu; i++); - return i+ZATM_AAL5_POOL_BASE; -} - - -/* use_pool isn't reentrant */ - - -static void use_pool(struct atm_dev *dev,int pool) -{ - struct zatm_dev *zatm_dev; - unsigned long flags; - int size; - - zatm_dev = ZATM_DEV(dev); - if (!(zatm_dev->pool_info[pool].ref_count++)) { - skb_queue_head_init(&zatm_dev->pool[pool]); - size = pool-ZATM_AAL5_POOL_BASE; - if (size < 0) size = 0; /* 64B... */ - else if (size > 10) size = 10; /* ... 64kB */ - spin_lock_irqsave(&zatm_dev->lock, flags); - zpokel(zatm_dev,((zatm_dev->pool_info[pool].low_water/4) << - uPD98401_RXFP_ALERT_SHIFT) | - (1 << uPD98401_RXFP_BTSZ_SHIFT) | - (size << uPD98401_RXFP_BFSZ_SHIFT), - zatm_dev->pool_base+pool*2); - zpokel(zatm_dev,(unsigned long) dummy,zatm_dev->pool_base+ - pool*2+1); - spin_unlock_irqrestore(&zatm_dev->lock, flags); - zatm_dev->last_free[pool] = NULL; - refill_pool(dev,pool); - } - DPRINTK("pool %d: %d\n",pool,zatm_dev->pool_info[pool].ref_count); -} - - -static void unuse_pool(struct atm_dev *dev,int pool) -{ - if (!(--ZATM_DEV(dev)->pool_info[pool].ref_count)) - drain_free(dev,pool); -} - -/*----------------------------------- RX ------------------------------------*/ - - -#if 0 -static void exception(struct atm_vcc *vcc) -{ - static int count = 0; - struct zatm_dev *zatm_dev = ZATM_DEV(vcc->dev); - struct zatm_vcc *zatm_vcc = ZATM_VCC(vcc); - unsigned long *qrp; - int i; - - if (count++ > 2) return; - for (i = 0; i < 8; i++) - printk("TX%d: 0x%08lx\n",i, - zpeekl(zatm_dev,zatm_vcc->tx_chan*VC_SIZE/4+i)); - for (i = 0; i < 5; i++) - printk("SH%d: 0x%08lx\n",i, - zpeekl(zatm_dev,uPD98401_IM(zatm_vcc->shaper)+16*i)); - qrp = (unsigned long *) zpeekl(zatm_dev,zatm_vcc->tx_chan*VC_SIZE/4+ - uPD98401_TXVC_QRP); - printk("qrp=0x%08lx\n",(unsigned long) qrp); - for (i = 0; i < 4; i++) printk("QRP[%d]: 0x%08lx",i,qrp[i]); -} -#endif - - -static const char *err_txt[] = { - "No error", - "RX buf underflow", - "RX FIFO overrun", - "Maximum len violation", - "CRC error", - "User abort", - "Length violation", - "T1 error", - "Deactivated", - "???", - "???", - "???", - "???", - "???", - "???", - "???" -}; - - -static void poll_rx(struct atm_dev *dev,int mbx) -{ - struct zatm_dev *zatm_dev; - unsigned long pos; - u32 x; - int error; - - EVENT("poll_rx\n",0,0); - zatm_dev = ZATM_DEV(dev); - pos = (zatm_dev->mbx_start[mbx] & ~0xffffUL) | zin(MTA(mbx)); - while (x = zin(MWA(mbx)), (pos & 0xffff) != x) { - u32 *here; - struct sk_buff *skb; - struct atm_vcc *vcc; - int cells,size,chan; - - EVENT("MBX: host 0x%lx, nic 0x%x\n",pos,x); - here = (u32 *) pos; - if (((pos += 16) & 0xffff) == zatm_dev->mbx_end[mbx]) - pos = zatm_dev->mbx_start[mbx]; - cells = here[0] & uPD98401_AAL5_SIZE; -#if 0 -printk("RX IND: 0x%x, 0x%x, 0x%x, 0x%x\n",here[0],here[1],here[2],here[3]); -{ -unsigned long *x; - printk("POOL: 0x%08x, 0x%08x\n",zpeekl(zatm_dev, - zatm_dev->pool_base), - zpeekl(zatm_dev,zatm_dev->pool_base+1)); - x = (unsigned long *) here[2]; - printk("[0..3] = 0x%08lx, 0x%08lx, 0x%08lx, 0x%08lx\n", - x[0],x[1],x[2],x[3]); -} -#endif - error = 0; - if (here[3] & uPD98401_AAL5_ERR) { - error = (here[3] & uPD98401_AAL5_ES) >> - uPD98401_AAL5_ES_SHIFT; - if (error == uPD98401_AAL5_ES_DEACT || - error == uPD98401_AAL5_ES_FREE) continue; - } -EVENT("error code 0x%x/0x%x\n",(here[3] & uPD98401_AAL5_ES) >> - uPD98401_AAL5_ES_SHIFT,error); - skb = ((struct rx_buffer_head *) bus_to_virt(here[2]))->skb; - __net_timestamp(skb); -#if 0 -printk("[-3..0] 0x%08lx 0x%08lx 0x%08lx 0x%08lx\n",((unsigned *) skb->data)[-3], - ((unsigned *) skb->data)[-2],((unsigned *) skb->data)[-1], - ((unsigned *) skb->data)[0]); -#endif - EVENT("skb 0x%lx, here 0x%lx\n",(unsigned long) skb, - (unsigned long) here); -#if 0 -printk("dummy: 0x%08lx, 0x%08lx\n",dummy[0],dummy[1]); -#endif - size = error ? 0 : ntohs(((__be16 *) skb->data)[cells* - ATM_CELL_PAYLOAD/sizeof(u16)-3]); - EVENT("got skb 0x%lx, size %d\n",(unsigned long) skb,size); - chan = (here[3] & uPD98401_AAL5_CHAN) >> - uPD98401_AAL5_CHAN_SHIFT; - if (chan < zatm_dev->chans && zatm_dev->rx_map[chan]) { - int pos; - vcc = zatm_dev->rx_map[chan]; - pos = ZATM_VCC(vcc)->pool; - if (skb == zatm_dev->last_free[pos]) - zatm_dev->last_free[pos] = NULL; - skb_unlink(skb, zatm_dev->pool + pos); - } - else { - printk(KERN_ERR DEV_LABEL "(itf %d): RX indication " - "for non-existing channel\n",dev->number); - size = 0; - vcc = NULL; - event_dump(); - } - if (error) { - static unsigned long silence = 0; - static int last_error = 0; - - if (error != last_error || - time_after(jiffies, silence) || silence == 0){ - printk(KERN_WARNING DEV_LABEL "(itf %d): " - "chan %d error %s\n",dev->number,chan, - err_txt[error]); - last_error = error; - silence = (jiffies+2*HZ)|1; - } - size = 0; - } - if (size && (size > cells*ATM_CELL_PAYLOAD-ATM_AAL5_TRAILER || - size <= (cells-1)*ATM_CELL_PAYLOAD-ATM_AAL5_TRAILER)) { - printk(KERN_ERR DEV_LABEL "(itf %d): size %d with %d " - "cells\n",dev->number,size,cells); - size = 0; - event_dump(); - } - if (size > ATM_MAX_AAL5_PDU) { - printk(KERN_ERR DEV_LABEL "(itf %d): size too big " - "(%d)\n",dev->number,size); - size = 0; - event_dump(); - } - if (!size) { - dev_kfree_skb_irq(skb); - if (vcc) atomic_inc(&vcc->stats->rx_err); - continue; - } - if (!atm_charge(vcc,skb->truesize)) { - dev_kfree_skb_irq(skb); - continue; - } - skb->len = size; - ATM_SKB(skb)->vcc = vcc; - vcc->push(vcc,skb); - atomic_inc(&vcc->stats->rx); - } - zout(pos & 0xffff,MTA(mbx)); -#if 0 /* probably a stupid idea */ - refill_pool(dev,zatm_vcc->pool); - /* maybe this saves us a few interrupts */ -#endif -} - - -static int open_rx_first(struct atm_vcc *vcc) -{ - struct zatm_dev *zatm_dev; - struct zatm_vcc *zatm_vcc; - unsigned long flags; - unsigned short chan; - int cells; - - DPRINTK("open_rx_first (0x%x)\n",inb_p(0xc053)); - zatm_dev = ZATM_DEV(vcc->dev); - zatm_vcc = ZATM_VCC(vcc); - zatm_vcc->rx_chan = 0; - if (vcc->qos.rxtp.traffic_class == ATM_NONE) return 0; - if (vcc->qos.aal == ATM_AAL5) { - if (vcc->qos.rxtp.max_sdu > 65464) - vcc->qos.rxtp.max_sdu = 65464; - /* fix this - we may want to receive 64kB SDUs - later */ - cells = DIV_ROUND_UP(vcc->qos.rxtp.max_sdu + ATM_AAL5_TRAILER, - ATM_CELL_PAYLOAD); - zatm_vcc->pool = pool_index(cells*ATM_CELL_PAYLOAD); - } - else { - cells = 1; - zatm_vcc->pool = ZATM_AAL0_POOL; - } - if (zatm_vcc->pool < 0) return -EMSGSIZE; - spin_lock_irqsave(&zatm_dev->lock, flags); - zwait(); - zout(uPD98401_OPEN_CHAN,CMR); - zwait(); - DPRINTK("0x%x 0x%x\n",zin(CMR),zin(CER)); - chan = (zin(CMR) & uPD98401_CHAN_ADDR) >> uPD98401_CHAN_ADDR_SHIFT; - spin_unlock_irqrestore(&zatm_dev->lock, flags); - DPRINTK("chan is %d\n",chan); - if (!chan) return -EAGAIN; - use_pool(vcc->dev,zatm_vcc->pool); - DPRINTK("pool %d\n",zatm_vcc->pool); - /* set up VC descriptor */ - spin_lock_irqsave(&zatm_dev->lock, flags); - zpokel(zatm_dev,zatm_vcc->pool << uPD98401_RXVC_POOL_SHIFT, - chan*VC_SIZE/4); - zpokel(zatm_dev,uPD98401_RXVC_OD | (vcc->qos.aal == ATM_AAL5 ? - uPD98401_RXVC_AR : 0) | cells,chan*VC_SIZE/4+1); - zpokel(zatm_dev,0,chan*VC_SIZE/4+2); - zatm_vcc->rx_chan = chan; - zatm_dev->rx_map[chan] = vcc; - spin_unlock_irqrestore(&zatm_dev->lock, flags); - return 0; -} - - -static int open_rx_second(struct atm_vcc *vcc) -{ - struct zatm_dev *zatm_dev; - struct zatm_vcc *zatm_vcc; - unsigned long flags; - int pos,shift; - - DPRINTK("open_rx_second (0x%x)\n",inb_p(0xc053)); - zatm_dev = ZATM_DEV(vcc->dev); - zatm_vcc = ZATM_VCC(vcc); - if (!zatm_vcc->rx_chan) return 0; - spin_lock_irqsave(&zatm_dev->lock, flags); - /* should also handle VPI @@@ */ - pos = vcc->vci >> 1; - shift = (1-(vcc->vci & 1)) << 4; - zpokel(zatm_dev,(zpeekl(zatm_dev,pos) & ~(0xffff << shift)) | - ((zatm_vcc->rx_chan | uPD98401_RXLT_ENBL) << shift),pos); - spin_unlock_irqrestore(&zatm_dev->lock, flags); - return 0; -} - - -static void close_rx(struct atm_vcc *vcc) -{ - struct zatm_dev *zatm_dev; - struct zatm_vcc *zatm_vcc; - unsigned long flags; - int pos,shift; - - zatm_vcc = ZATM_VCC(vcc); - zatm_dev = ZATM_DEV(vcc->dev); - if (!zatm_vcc->rx_chan) return; - DPRINTK("close_rx\n"); - /* disable receiver */ - if (vcc->vpi != ATM_VPI_UNSPEC && vcc->vci != ATM_VCI_UNSPEC) { - spin_lock_irqsave(&zatm_dev->lock, flags); - pos = vcc->vci >> 1; - shift = (1-(vcc->vci & 1)) << 4; - zpokel(zatm_dev,zpeekl(zatm_dev,pos) & ~(0xffff << shift),pos); - zwait(); - zout(uPD98401_NOP,CMR); - zwait(); - zout(uPD98401_NOP,CMR); - spin_unlock_irqrestore(&zatm_dev->lock, flags); - } - spin_lock_irqsave(&zatm_dev->lock, flags); - zwait(); - zout(uPD98401_DEACT_CHAN | uPD98401_CHAN_RT | (zatm_vcc->rx_chan << - uPD98401_CHAN_ADDR_SHIFT),CMR); - zwait(); - udelay(10); /* why oh why ... ? */ - zout(uPD98401_CLOSE_CHAN | uPD98401_CHAN_RT | (zatm_vcc->rx_chan << - uPD98401_CHAN_ADDR_SHIFT),CMR); - zwait(); - if (!(zin(CMR) & uPD98401_CHAN_ADDR)) - printk(KERN_CRIT DEV_LABEL "(itf %d): can't close RX channel " - "%d\n",vcc->dev->number,zatm_vcc->rx_chan); - spin_unlock_irqrestore(&zatm_dev->lock, flags); - zatm_dev->rx_map[zatm_vcc->rx_chan] = NULL; - zatm_vcc->rx_chan = 0; - unuse_pool(vcc->dev,zatm_vcc->pool); -} - - -static int start_rx(struct atm_dev *dev) -{ - struct zatm_dev *zatm_dev; - int i; - - DPRINTK("start_rx\n"); - zatm_dev = ZATM_DEV(dev); - zatm_dev->rx_map = kcalloc(zatm_dev->chans, - sizeof(*zatm_dev->rx_map), - GFP_KERNEL); - if (!zatm_dev->rx_map) return -ENOMEM; - /* set VPI/VCI split (use all VCIs and give what's left to VPIs) */ - zpokel(zatm_dev,(1 << dev->ci_range.vci_bits)-1,uPD98401_VRR); - /* prepare free buffer pools */ - for (i = 0; i <= ZATM_LAST_POOL; i++) { - zatm_dev->pool_info[i].ref_count = 0; - zatm_dev->pool_info[i].rqa_count = 0; - zatm_dev->pool_info[i].rqu_count = 0; - zatm_dev->pool_info[i].low_water = LOW_MARK; - zatm_dev->pool_info[i].high_water = HIGH_MARK; - zatm_dev->pool_info[i].offset = 0; - zatm_dev->pool_info[i].next_off = 0; - zatm_dev->pool_info[i].next_cnt = 0; - zatm_dev->pool_info[i].next_thres = OFF_CNG_THRES; - } - return 0; -} - - -/*----------------------------------- TX ------------------------------------*/ - - -static int do_tx(struct sk_buff *skb) -{ - struct atm_vcc *vcc; - struct zatm_dev *zatm_dev; - struct zatm_vcc *zatm_vcc; - u32 *dsc; - unsigned long flags; - - EVENT("do_tx\n",0,0); - DPRINTK("sending skb %p\n",skb); - vcc = ATM_SKB(skb)->vcc; - zatm_dev = ZATM_DEV(vcc->dev); - zatm_vcc = ZATM_VCC(vcc); - EVENT("iovcnt=%d\n",skb_shinfo(skb)->nr_frags,0); - spin_lock_irqsave(&zatm_dev->lock, flags); - if (!skb_shinfo(skb)->nr_frags) { - if (zatm_vcc->txing == RING_ENTRIES-1) { - spin_unlock_irqrestore(&zatm_dev->lock, flags); - return RING_BUSY; - } - zatm_vcc->txing++; - dsc = zatm_vcc->ring+zatm_vcc->ring_curr; - zatm_vcc->ring_curr = (zatm_vcc->ring_curr+RING_WORDS) & - (RING_ENTRIES*RING_WORDS-1); - dsc[1] = 0; - dsc[2] = skb->len; - dsc[3] = virt_to_bus(skb->data); - mb(); - dsc[0] = uPD98401_TXPD_V | uPD98401_TXPD_DP | uPD98401_TXPD_SM - | (vcc->qos.aal == ATM_AAL5 ? uPD98401_TXPD_AAL5 : 0 | - (ATM_SKB(skb)->atm_options & ATM_ATMOPT_CLP ? - uPD98401_CLPM_1 : uPD98401_CLPM_0)); - EVENT("dsc (0x%lx)\n",(unsigned long) dsc,0); - } - else { -printk("NONONONOO!!!!\n"); - dsc = NULL; -#if 0 - u32 *put; - int i; - - dsc = kmalloc(uPD98401_TXPD_SIZE * 2 + - uPD98401_TXBD_SIZE * ATM_SKB(skb)->iovcnt, GFP_ATOMIC); - if (!dsc) { - if (vcc->pop) - vcc->pop(vcc, skb); - else - dev_kfree_skb_irq(skb); - return -EAGAIN; - } - /* @@@ should check alignment */ - put = dsc+8; - dsc[0] = uPD98401_TXPD_V | uPD98401_TXPD_DP | - (vcc->aal == ATM_AAL5 ? uPD98401_TXPD_AAL5 : 0 | - (ATM_SKB(skb)->atm_options & ATM_ATMOPT_CLP ? - uPD98401_CLPM_1 : uPD98401_CLPM_0)); - dsc[1] = 0; - dsc[2] = ATM_SKB(skb)->iovcnt * uPD98401_TXBD_SIZE; - dsc[3] = virt_to_bus(put); - for (i = 0; i < ATM_SKB(skb)->iovcnt; i++) { - *put++ = ((struct iovec *) skb->data)[i].iov_len; - *put++ = virt_to_bus(((struct iovec *) - skb->data)[i].iov_base); - } - put[-2] |= uPD98401_TXBD_LAST; -#endif - } - ZATM_PRV_DSC(skb) = dsc; - skb_queue_tail(&zatm_vcc->tx_queue,skb); - DPRINTK("QRP=0x%08lx\n",zpeekl(zatm_dev,zatm_vcc->tx_chan*VC_SIZE/4+ - uPD98401_TXVC_QRP)); - zwait(); - zout(uPD98401_TX_READY | (zatm_vcc->tx_chan << - uPD98401_CHAN_ADDR_SHIFT),CMR); - spin_unlock_irqrestore(&zatm_dev->lock, flags); - EVENT("done\n",0,0); - return 0; -} - - -static inline void dequeue_tx(struct atm_vcc *vcc) -{ - struct zatm_vcc *zatm_vcc; - struct sk_buff *skb; - - EVENT("dequeue_tx\n",0,0); - zatm_vcc = ZATM_VCC(vcc); - skb = skb_dequeue(&zatm_vcc->tx_queue); - if (!skb) { - printk(KERN_CRIT DEV_LABEL "(itf %d): dequeue_tx but not " - "txing\n",vcc->dev->number); - return; - } -#if 0 /* @@@ would fail on CLP */ -if (*ZATM_PRV_DSC(skb) != (uPD98401_TXPD_V | uPD98401_TXPD_DP | - uPD98401_TXPD_SM | uPD98401_TXPD_AAL5)) printk("@#*$!!!! (%08x)\n", - *ZATM_PRV_DSC(skb)); -#endif - *ZATM_PRV_DSC(skb) = 0; /* mark as invalid */ - zatm_vcc->txing--; - if (vcc->pop) vcc->pop(vcc,skb); - else dev_kfree_skb_irq(skb); - while ((skb = skb_dequeue(&zatm_vcc->backlog))) - if (do_tx(skb) == RING_BUSY) { - skb_queue_head(&zatm_vcc->backlog,skb); - break; - } - atomic_inc(&vcc->stats->tx); - wake_up(&zatm_vcc->tx_wait); -} - - -static void poll_tx(struct atm_dev *dev,int mbx) -{ - struct zatm_dev *zatm_dev; - unsigned long pos; - u32 x; - - EVENT("poll_tx\n",0,0); - zatm_dev = ZATM_DEV(dev); - pos = (zatm_dev->mbx_start[mbx] & ~0xffffUL) | zin(MTA(mbx)); - while (x = zin(MWA(mbx)), (pos & 0xffff) != x) { - int chan; - -#if 1 - u32 data,*addr; - - EVENT("MBX: host 0x%lx, nic 0x%x\n",pos,x); - addr = (u32 *) pos; - data = *addr; - chan = (data & uPD98401_TXI_CONN) >> uPD98401_TXI_CONN_SHIFT; - EVENT("addr = 0x%lx, data = 0x%08x,",(unsigned long) addr, - data); - EVENT("chan = %d\n",chan,0); -#else -NO ! - chan = (zatm_dev->mbx_start[mbx][pos >> 2] & uPD98401_TXI_CONN) - >> uPD98401_TXI_CONN_SHIFT; -#endif - if (chan < zatm_dev->chans && zatm_dev->tx_map[chan]) - dequeue_tx(zatm_dev->tx_map[chan]); - else { - printk(KERN_CRIT DEV_LABEL "(itf %d): TX indication " - "for non-existing channel %d\n",dev->number,chan); - event_dump(); - } - if (((pos += 4) & 0xffff) == zatm_dev->mbx_end[mbx]) - pos = zatm_dev->mbx_start[mbx]; - } - zout(pos & 0xffff,MTA(mbx)); -} - - -/* - * BUG BUG BUG: Doesn't handle "new-style" rate specification yet. - */ - -static int alloc_shaper(struct atm_dev *dev,int *pcr,int min,int max,int ubr) -{ - struct zatm_dev *zatm_dev; - unsigned long flags; - unsigned long i,m,c; - int shaper; - - DPRINTK("alloc_shaper (min = %d, max = %d)\n",min,max); - zatm_dev = ZATM_DEV(dev); - if (!zatm_dev->free_shapers) return -EAGAIN; - for (shaper = 0; !((zatm_dev->free_shapers >> shaper) & 1); shaper++); - zatm_dev->free_shapers &= ~1 << shaper; - if (ubr) { - c = 5; - i = m = 1; - zatm_dev->ubr_ref_cnt++; - zatm_dev->ubr = shaper; - *pcr = 0; - } - else { - if (min) { - if (min <= 255) { - i = min; - m = ATM_OC3_PCR; - } - else { - i = 255; - m = ATM_OC3_PCR*255/min; - } - } - else { - if (max > zatm_dev->tx_bw) max = zatm_dev->tx_bw; - if (max <= 255) { - i = max; - m = ATM_OC3_PCR; - } - else { - i = 255; - m = DIV_ROUND_UP(ATM_OC3_PCR*255, max); - } - } - if (i > m) { - printk(KERN_CRIT DEV_LABEL "shaper algorithm botched " - "[%d,%d] -> i=%ld,m=%ld\n",min,max,i,m); - m = i; - } - *pcr = i*ATM_OC3_PCR/m; - c = 20; /* @@@ should use max_cdv ! */ - if ((min && *pcr < min) || (max && *pcr > max)) return -EINVAL; - if (zatm_dev->tx_bw < *pcr) return -EAGAIN; - zatm_dev->tx_bw -= *pcr; - } - spin_lock_irqsave(&zatm_dev->lock, flags); - DPRINTK("i = %d, m = %d, PCR = %d\n",i,m,*pcr); - zpokel(zatm_dev,(i << uPD98401_IM_I_SHIFT) | m,uPD98401_IM(shaper)); - zpokel(zatm_dev,c << uPD98401_PC_C_SHIFT,uPD98401_PC(shaper)); - zpokel(zatm_dev,0,uPD98401_X(shaper)); - zpokel(zatm_dev,0,uPD98401_Y(shaper)); - zpokel(zatm_dev,uPD98401_PS_E,uPD98401_PS(shaper)); - spin_unlock_irqrestore(&zatm_dev->lock, flags); - return shaper; -} - - -static void dealloc_shaper(struct atm_dev *dev,int shaper) -{ - struct zatm_dev *zatm_dev; - unsigned long flags; - - zatm_dev = ZATM_DEV(dev); - if (shaper == zatm_dev->ubr) { - if (--zatm_dev->ubr_ref_cnt) return; - zatm_dev->ubr = -1; - } - spin_lock_irqsave(&zatm_dev->lock, flags); - zpokel(zatm_dev,zpeekl(zatm_dev,uPD98401_PS(shaper)) & ~uPD98401_PS_E, - uPD98401_PS(shaper)); - spin_unlock_irqrestore(&zatm_dev->lock, flags); - zatm_dev->free_shapers |= 1 << shaper; -} - - -static void close_tx(struct atm_vcc *vcc) -{ - struct zatm_dev *zatm_dev; - struct zatm_vcc *zatm_vcc; - unsigned long flags; - int chan; - - zatm_vcc = ZATM_VCC(vcc); - zatm_dev = ZATM_DEV(vcc->dev); - chan = zatm_vcc->tx_chan; - if (!chan) return; - DPRINTK("close_tx\n"); - if (skb_peek(&zatm_vcc->backlog)) { - printk("waiting for backlog to drain ...\n"); - event_dump(); - wait_event(zatm_vcc->tx_wait, !skb_peek(&zatm_vcc->backlog)); - } - if (skb_peek(&zatm_vcc->tx_queue)) { - printk("waiting for TX queue to drain ...\n"); - event_dump(); - wait_event(zatm_vcc->tx_wait, !skb_peek(&zatm_vcc->tx_queue)); - } - spin_lock_irqsave(&zatm_dev->lock, flags); -#if 0 - zwait(); - zout(uPD98401_DEACT_CHAN | (chan << uPD98401_CHAN_ADDR_SHIFT),CMR); -#endif - zwait(); - zout(uPD98401_CLOSE_CHAN | (chan << uPD98401_CHAN_ADDR_SHIFT),CMR); - zwait(); - if (!(zin(CMR) & uPD98401_CHAN_ADDR)) - printk(KERN_CRIT DEV_LABEL "(itf %d): can't close TX channel " - "%d\n",vcc->dev->number,chan); - spin_unlock_irqrestore(&zatm_dev->lock, flags); - zatm_vcc->tx_chan = 0; - zatm_dev->tx_map[chan] = NULL; - if (zatm_vcc->shaper != zatm_dev->ubr) { - zatm_dev->tx_bw += vcc->qos.txtp.min_pcr; - dealloc_shaper(vcc->dev,zatm_vcc->shaper); - } - kfree(zatm_vcc->ring); -} - - -static int open_tx_first(struct atm_vcc *vcc) -{ - struct zatm_dev *zatm_dev; - struct zatm_vcc *zatm_vcc; - unsigned long flags; - u32 *loop; - unsigned short chan; - int unlimited; - - DPRINTK("open_tx_first\n"); - zatm_dev = ZATM_DEV(vcc->dev); - zatm_vcc = ZATM_VCC(vcc); - zatm_vcc->tx_chan = 0; - if (vcc->qos.txtp.traffic_class == ATM_NONE) return 0; - spin_lock_irqsave(&zatm_dev->lock, flags); - zwait(); - zout(uPD98401_OPEN_CHAN,CMR); - zwait(); - DPRINTK("0x%x 0x%x\n",zin(CMR),zin(CER)); - chan = (zin(CMR) & uPD98401_CHAN_ADDR) >> uPD98401_CHAN_ADDR_SHIFT; - spin_unlock_irqrestore(&zatm_dev->lock, flags); - DPRINTK("chan is %d\n",chan); - if (!chan) return -EAGAIN; - unlimited = vcc->qos.txtp.traffic_class == ATM_UBR && - (!vcc->qos.txtp.max_pcr || vcc->qos.txtp.max_pcr == ATM_MAX_PCR || - vcc->qos.txtp.max_pcr >= ATM_OC3_PCR); - if (unlimited && zatm_dev->ubr != -1) zatm_vcc->shaper = zatm_dev->ubr; - else { - int pcr; - - if (unlimited) vcc->qos.txtp.max_sdu = ATM_MAX_AAL5_PDU; - if ((zatm_vcc->shaper = alloc_shaper(vcc->dev,&pcr, - vcc->qos.txtp.min_pcr,vcc->qos.txtp.max_pcr,unlimited)) - < 0) { - close_tx(vcc); - return zatm_vcc->shaper; - } - if (pcr > ATM_OC3_PCR) pcr = ATM_OC3_PCR; - vcc->qos.txtp.min_pcr = vcc->qos.txtp.max_pcr = pcr; - } - zatm_vcc->tx_chan = chan; - skb_queue_head_init(&zatm_vcc->tx_queue); - init_waitqueue_head(&zatm_vcc->tx_wait); - /* initialize ring */ - zatm_vcc->ring = kzalloc(RING_SIZE,GFP_KERNEL); - if (!zatm_vcc->ring) return -ENOMEM; - loop = zatm_vcc->ring+RING_ENTRIES*RING_WORDS; - loop[0] = uPD98401_TXPD_V; - loop[1] = loop[2] = 0; - loop[3] = virt_to_bus(zatm_vcc->ring); - zatm_vcc->ring_curr = 0; - zatm_vcc->txing = 0; - skb_queue_head_init(&zatm_vcc->backlog); - zpokel(zatm_dev,virt_to_bus(zatm_vcc->ring), - chan*VC_SIZE/4+uPD98401_TXVC_QRP); - return 0; -} - - -static int open_tx_second(struct atm_vcc *vcc) -{ - struct zatm_dev *zatm_dev; - struct zatm_vcc *zatm_vcc; - unsigned long flags; - - DPRINTK("open_tx_second\n"); - zatm_dev = ZATM_DEV(vcc->dev); - zatm_vcc = ZATM_VCC(vcc); - if (!zatm_vcc->tx_chan) return 0; - /* set up VC descriptor */ - spin_lock_irqsave(&zatm_dev->lock, flags); - zpokel(zatm_dev,0,zatm_vcc->tx_chan*VC_SIZE/4); - zpokel(zatm_dev,uPD98401_TXVC_L | (zatm_vcc->shaper << - uPD98401_TXVC_SHP_SHIFT) | (vcc->vpi << uPD98401_TXVC_VPI_SHIFT) | - vcc->vci,zatm_vcc->tx_chan*VC_SIZE/4+1); - zpokel(zatm_dev,0,zatm_vcc->tx_chan*VC_SIZE/4+2); - spin_unlock_irqrestore(&zatm_dev->lock, flags); - zatm_dev->tx_map[zatm_vcc->tx_chan] = vcc; - return 0; -} - - -static int start_tx(struct atm_dev *dev) -{ - struct zatm_dev *zatm_dev; - int i; - - DPRINTK("start_tx\n"); - zatm_dev = ZATM_DEV(dev); - zatm_dev->tx_map = kmalloc_array(zatm_dev->chans, - sizeof(*zatm_dev->tx_map), - GFP_KERNEL); - if (!zatm_dev->tx_map) return -ENOMEM; - zatm_dev->tx_bw = ATM_OC3_PCR; - zatm_dev->free_shapers = (1 << NR_SHAPERS)-1; - zatm_dev->ubr = -1; - zatm_dev->ubr_ref_cnt = 0; - /* initialize shapers */ - for (i = 0; i < NR_SHAPERS; i++) zpokel(zatm_dev,0,uPD98401_PS(i)); - return 0; -} - - -/*------------------------------- interrupts --------------------------------*/ - - -static irqreturn_t zatm_int(int irq,void *dev_id) -{ - struct atm_dev *dev; - struct zatm_dev *zatm_dev; - u32 reason; - int handled = 0; - - dev = dev_id; - zatm_dev = ZATM_DEV(dev); - while ((reason = zin(GSR))) { - handled = 1; - EVENT("reason 0x%x\n",reason,0); - if (reason & uPD98401_INT_PI) { - EVENT("PHY int\n",0,0); - dev->phy->interrupt(dev); - } - if (reason & uPD98401_INT_RQA) { - unsigned long pools; - int i; - - pools = zin(RQA); - EVENT("RQA (0x%08x)\n",pools,0); - for (i = 0; pools; i++) { - if (pools & 1) { - refill_pool(dev,i); - zatm_dev->pool_info[i].rqa_count++; - } - pools >>= 1; - } - } - if (reason & uPD98401_INT_RQU) { - unsigned long pools; - int i; - pools = zin(RQU); - printk(KERN_WARNING DEV_LABEL "(itf %d): RQU 0x%08lx\n", - dev->number,pools); - event_dump(); - for (i = 0; pools; i++) { - if (pools & 1) { - refill_pool(dev,i); - zatm_dev->pool_info[i].rqu_count++; - } - pools >>= 1; - } - } - /* don't handle RD */ - if (reason & uPD98401_INT_SPE) - printk(KERN_ALERT DEV_LABEL "(itf %d): system parity " - "error at 0x%08x\n",dev->number,zin(ADDR)); - if (reason & uPD98401_INT_CPE) - printk(KERN_ALERT DEV_LABEL "(itf %d): control memory " - "parity error at 0x%08x\n",dev->number,zin(ADDR)); - if (reason & uPD98401_INT_SBE) { - printk(KERN_ALERT DEV_LABEL "(itf %d): system bus " - "error at 0x%08x\n",dev->number,zin(ADDR)); - event_dump(); - } - /* don't handle IND */ - if (reason & uPD98401_INT_MF) { - printk(KERN_CRIT DEV_LABEL "(itf %d): mailbox full " - "(0x%x)\n",dev->number,(reason & uPD98401_INT_MF) - >> uPD98401_INT_MF_SHIFT); - event_dump(); - /* @@@ should try to recover */ - } - if (reason & uPD98401_INT_MM) { - if (reason & 1) poll_rx(dev,0); - if (reason & 2) poll_rx(dev,1); - if (reason & 4) poll_tx(dev,2); - if (reason & 8) poll_tx(dev,3); - } - /* @@@ handle RCRn */ - } - return IRQ_RETVAL(handled); -} - - -/*----------------------------- (E)EPROM access -----------------------------*/ - - -static void eprom_set(struct zatm_dev *zatm_dev, unsigned long value, - unsigned short cmd) -{ - int error; - - if ((error = pci_write_config_dword(zatm_dev->pci_dev,cmd,value))) - printk(KERN_ERR DEV_LABEL ": PCI write failed (0x%02x)\n", - error); -} - - -static unsigned long eprom_get(struct zatm_dev *zatm_dev, unsigned short cmd) -{ - unsigned int value; - int error; - - if ((error = pci_read_config_dword(zatm_dev->pci_dev,cmd,&value))) - printk(KERN_ERR DEV_LABEL ": PCI read failed (0x%02x)\n", - error); - return value; -} - - -static void eprom_put_bits(struct zatm_dev *zatm_dev, unsigned long data, - int bits, unsigned short cmd) -{ - unsigned long value; - int i; - - for (i = bits-1; i >= 0; i--) { - value = ZEPROM_CS | (((data >> i) & 1) ? ZEPROM_DI : 0); - eprom_set(zatm_dev,value,cmd); - eprom_set(zatm_dev,value | ZEPROM_SK,cmd); - eprom_set(zatm_dev,value,cmd); - } -} - - -static void eprom_get_byte(struct zatm_dev *zatm_dev, unsigned char *byte, - unsigned short cmd) -{ - int i; - - *byte = 0; - for (i = 8; i; i--) { - eprom_set(zatm_dev,ZEPROM_CS,cmd); - eprom_set(zatm_dev,ZEPROM_CS | ZEPROM_SK,cmd); - *byte <<= 1; - if (eprom_get(zatm_dev,cmd) & ZEPROM_DO) *byte |= 1; - eprom_set(zatm_dev,ZEPROM_CS,cmd); - } -} - - -static int eprom_try_esi(struct atm_dev *dev, unsigned short cmd, int offset, - int swap) -{ - unsigned char buf[ZEPROM_SIZE]; - struct zatm_dev *zatm_dev; - int i; - - zatm_dev = ZATM_DEV(dev); - for (i = 0; i < ZEPROM_SIZE; i += 2) { - eprom_set(zatm_dev,ZEPROM_CS,cmd); /* select EPROM */ - eprom_put_bits(zatm_dev,ZEPROM_CMD_READ,ZEPROM_CMD_LEN,cmd); - eprom_put_bits(zatm_dev,i >> 1,ZEPROM_ADDR_LEN,cmd); - eprom_get_byte(zatm_dev,buf+i+swap,cmd); - eprom_get_byte(zatm_dev,buf+i+1-swap,cmd); - eprom_set(zatm_dev,0,cmd); /* deselect EPROM */ - } - memcpy(dev->esi,buf+offset,ESI_LEN); - return memcmp(dev->esi,"\0\0\0\0\0",ESI_LEN); /* assumes ESI_LEN == 6 */ -} - - -static void eprom_get_esi(struct atm_dev *dev) -{ - if (eprom_try_esi(dev,ZEPROM_V1_REG,ZEPROM_V1_ESI_OFF,1)) return; - (void) eprom_try_esi(dev,ZEPROM_V2_REG,ZEPROM_V2_ESI_OFF,0); -} - - -/*--------------------------------- entries ---------------------------------*/ - - -static int zatm_init(struct atm_dev *dev) -{ - struct zatm_dev *zatm_dev; - struct pci_dev *pci_dev; - unsigned short command; - int error,i,last; - unsigned long t0,t1,t2; - - DPRINTK(">zatm_init\n"); - zatm_dev = ZATM_DEV(dev); - spin_lock_init(&zatm_dev->lock); - pci_dev = zatm_dev->pci_dev; - zatm_dev->base = pci_resource_start(pci_dev, 0); - zatm_dev->irq = pci_dev->irq; - if ((error = pci_read_config_word(pci_dev,PCI_COMMAND,&command))) { - printk(KERN_ERR DEV_LABEL "(itf %d): init error 0x%02x\n", - dev->number,error); - return -EINVAL; - } - if ((error = pci_write_config_word(pci_dev,PCI_COMMAND, - command | PCI_COMMAND_IO | PCI_COMMAND_MASTER))) { - printk(KERN_ERR DEV_LABEL "(itf %d): can't enable IO (0x%02x)" - "\n",dev->number,error); - return -EIO; - } - eprom_get_esi(dev); - printk(KERN_NOTICE DEV_LABEL "(itf %d): rev.%d,base=0x%x,irq=%d,", - dev->number,pci_dev->revision,zatm_dev->base,zatm_dev->irq); - /* reset uPD98401 */ - zout(0,SWR); - while (!(zin(GSR) & uPD98401_INT_IND)); - zout(uPD98401_GMR_ONE /*uPD98401_BURST4*/,GMR); - last = MAX_CRAM_SIZE; - for (i = last-RAM_INCREMENT; i >= 0; i -= RAM_INCREMENT) { - zpokel(zatm_dev,0x55555555,i); - if (zpeekl(zatm_dev,i) != 0x55555555) last = i; - else { - zpokel(zatm_dev,0xAAAAAAAA,i); - if (zpeekl(zatm_dev,i) != 0xAAAAAAAA) last = i; - else zpokel(zatm_dev,i,i); - } - } - for (i = 0; i < last; i += RAM_INCREMENT) - if (zpeekl(zatm_dev,i) != i) break; - zatm_dev->mem = i << 2; - while (i) zpokel(zatm_dev,0,--i); - /* reset again to rebuild memory pointers */ - zout(0,SWR); - while (!(zin(GSR) & uPD98401_INT_IND)); - zout(uPD98401_GMR_ONE | uPD98401_BURST8 | uPD98401_BURST4 | - uPD98401_BURST2 | uPD98401_GMR_PM | uPD98401_GMR_DR,GMR); - /* TODO: should shrink allocation now */ - printk("mem=%dkB,%s (",zatm_dev->mem >> 10,zatm_dev->copper ? "UTP" : - "MMF"); - for (i = 0; i < ESI_LEN; i++) - printk("%02X%s",dev->esi[i],i == ESI_LEN-1 ? ")\n" : "-"); - do { - unsigned long flags; - - spin_lock_irqsave(&zatm_dev->lock, flags); - t0 = zpeekl(zatm_dev,uPD98401_TSR); - udelay(10); - t1 = zpeekl(zatm_dev,uPD98401_TSR); - udelay(1010); - t2 = zpeekl(zatm_dev,uPD98401_TSR); - spin_unlock_irqrestore(&zatm_dev->lock, flags); - } - while (t0 > t1 || t1 > t2); /* loop if wrapping ... */ - zatm_dev->khz = t2-2*t1+t0; - printk(KERN_NOTICE DEV_LABEL "(itf %d): uPD98401 %d.%d at %d.%03d " - "MHz\n",dev->number, - (zin(VER) & uPD98401_MAJOR) >> uPD98401_MAJOR_SHIFT, - zin(VER) & uPD98401_MINOR,zatm_dev->khz/1000,zatm_dev->khz % 1000); - return uPD98402_init(dev); -} - - -static int zatm_start(struct atm_dev *dev) -{ - struct zatm_dev *zatm_dev = ZATM_DEV(dev); - struct pci_dev *pdev = zatm_dev->pci_dev; - unsigned long curr; - int pools,vccs,rx; - int error, i, ld; - - DPRINTK("zatm_start\n"); - zatm_dev->rx_map = zatm_dev->tx_map = NULL; - for (i = 0; i < NR_MBX; i++) - zatm_dev->mbx_start[i] = 0; - error = request_irq(zatm_dev->irq, zatm_int, IRQF_SHARED, DEV_LABEL, dev); - if (error < 0) { - printk(KERN_ERR DEV_LABEL "(itf %d): IRQ%d is already in use\n", - dev->number,zatm_dev->irq); - goto done; - } - /* define memory regions */ - pools = NR_POOLS; - if (NR_SHAPERS*SHAPER_SIZE > pools*POOL_SIZE) - pools = NR_SHAPERS*SHAPER_SIZE/POOL_SIZE; - vccs = (zatm_dev->mem-NR_SHAPERS*SHAPER_SIZE-pools*POOL_SIZE)/ - (2*VC_SIZE+RX_SIZE); - ld = -1; - for (rx = 1; rx < vccs; rx <<= 1) ld++; - dev->ci_range.vpi_bits = 0; /* @@@ no VPI for now */ - dev->ci_range.vci_bits = ld; - dev->link_rate = ATM_OC3_PCR; - zatm_dev->chans = vccs; /* ??? */ - curr = rx*RX_SIZE/4; - DPRINTK("RX pool 0x%08lx\n",curr); - zpokel(zatm_dev,curr,uPD98401_PMA); /* receive pool */ - zatm_dev->pool_base = curr; - curr += pools*POOL_SIZE/4; - DPRINTK("Shapers 0x%08lx\n",curr); - zpokel(zatm_dev,curr,uPD98401_SMA); /* shapers */ - curr += NR_SHAPERS*SHAPER_SIZE/4; - DPRINTK("Free 0x%08lx\n",curr); - zpokel(zatm_dev,curr,uPD98401_TOS); /* free pool */ - printk(KERN_INFO DEV_LABEL "(itf %d): %d shapers, %d pools, %d RX, " - "%ld VCs\n",dev->number,NR_SHAPERS,pools,rx, - (zatm_dev->mem-curr*4)/VC_SIZE); - /* create mailboxes */ - for (i = 0; i < NR_MBX; i++) { - void *mbx; - dma_addr_t mbx_dma; - - if (!mbx_entries[i]) - continue; - mbx = dma_alloc_coherent(&pdev->dev, - 2 * MBX_SIZE(i), &mbx_dma, GFP_KERNEL); - if (!mbx) { - error = -ENOMEM; - goto out; - } - /* - * Alignment provided by dma_alloc_coherent() isn't enough - * for this device. - */ - if (((unsigned long)mbx ^ mbx_dma) & 0xffff) { - printk(KERN_ERR DEV_LABEL "(itf %d): system " - "bus incompatible with driver\n", dev->number); - dma_free_coherent(&pdev->dev, 2*MBX_SIZE(i), mbx, mbx_dma); - error = -ENODEV; - goto out; - } - DPRINTK("mbx@0x%08lx-0x%08lx\n", mbx, mbx + MBX_SIZE(i)); - zatm_dev->mbx_start[i] = (unsigned long)mbx; - zatm_dev->mbx_dma[i] = mbx_dma; - zatm_dev->mbx_end[i] = (zatm_dev->mbx_start[i] + MBX_SIZE(i)) & - 0xffff; - zout(mbx_dma >> 16, MSH(i)); - zout(mbx_dma, MSL(i)); - zout(zatm_dev->mbx_end[i], MBA(i)); - zout((unsigned long)mbx & 0xffff, MTA(i)); - zout((unsigned long)mbx & 0xffff, MWA(i)); - } - error = start_tx(dev); - if (error) - goto out; - error = start_rx(dev); - if (error) - goto out_tx; - error = dev->phy->start(dev); - if (error) - goto out_rx; - zout(0xffffffff,IMR); /* enable interrupts */ - /* enable TX & RX */ - zout(zin(GMR) | uPD98401_GMR_SE | uPD98401_GMR_RE,GMR); -done: - return error; - -out_rx: - kfree(zatm_dev->rx_map); -out_tx: - kfree(zatm_dev->tx_map); -out: - while (i-- > 0) { - dma_free_coherent(&pdev->dev, 2 * MBX_SIZE(i), - (void *)zatm_dev->mbx_start[i], - zatm_dev->mbx_dma[i]); - } - free_irq(zatm_dev->irq, dev); - goto done; -} - - -static void zatm_close(struct atm_vcc *vcc) -{ - DPRINTK(">zatm_close\n"); - if (!ZATM_VCC(vcc)) return; - clear_bit(ATM_VF_READY,&vcc->flags); - close_rx(vcc); - EVENT("close_tx\n",0,0); - close_tx(vcc); - DPRINTK("zatm_close: done waiting\n"); - /* deallocate memory */ - kfree(ZATM_VCC(vcc)); - vcc->dev_data = NULL; - clear_bit(ATM_VF_ADDR,&vcc->flags); -} - - -static int zatm_open(struct atm_vcc *vcc) -{ - struct zatm_vcc *zatm_vcc; - short vpi = vcc->vpi; - int vci = vcc->vci; - int error; - - DPRINTK(">zatm_open\n"); - if (!test_bit(ATM_VF_PARTIAL,&vcc->flags)) - vcc->dev_data = NULL; - if (vci != ATM_VPI_UNSPEC && vpi != ATM_VCI_UNSPEC) - set_bit(ATM_VF_ADDR,&vcc->flags); - if (vcc->qos.aal != ATM_AAL5) return -EINVAL; /* @@@ AAL0 */ - DPRINTK(DEV_LABEL "(itf %d): open %d.%d\n",vcc->dev->number,vcc->vpi, - vcc->vci); - if (!test_bit(ATM_VF_PARTIAL,&vcc->flags)) { - zatm_vcc = kmalloc(sizeof(*zatm_vcc), GFP_KERNEL); - if (!zatm_vcc) { - clear_bit(ATM_VF_ADDR,&vcc->flags); - return -ENOMEM; - } - vcc->dev_data = zatm_vcc; - ZATM_VCC(vcc)->tx_chan = 0; /* for zatm_close after open_rx */ - if ((error = open_rx_first(vcc))) { - zatm_close(vcc); - return error; - } - if ((error = open_tx_first(vcc))) { - zatm_close(vcc); - return error; - } - } - if (vci == ATM_VPI_UNSPEC || vpi == ATM_VCI_UNSPEC) return 0; - if ((error = open_rx_second(vcc))) { - zatm_close(vcc); - return error; - } - if ((error = open_tx_second(vcc))) { - zatm_close(vcc); - return error; - } - set_bit(ATM_VF_READY,&vcc->flags); - return 0; -} - - -static int zatm_change_qos(struct atm_vcc *vcc,struct atm_qos *qos,int flags) -{ - printk("Not yet implemented\n"); - return -ENOSYS; - /* @@@ */ -} - - -static int zatm_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg) -{ - struct zatm_dev *zatm_dev; - unsigned long flags; - - zatm_dev = ZATM_DEV(dev); - switch (cmd) { - case ZATM_GETPOOLZ: - if (!capable(CAP_NET_ADMIN)) return -EPERM; - fallthrough; - case ZATM_GETPOOL: - { - struct zatm_pool_info info; - int pool; - - if (get_user(pool, - &((struct zatm_pool_req __user *) arg)->pool_num)) - return -EFAULT; - if (pool < 0 || pool > ZATM_LAST_POOL) - return -EINVAL; - pool = array_index_nospec(pool, - ZATM_LAST_POOL + 1); - spin_lock_irqsave(&zatm_dev->lock, flags); - info = zatm_dev->pool_info[pool]; - if (cmd == ZATM_GETPOOLZ) { - zatm_dev->pool_info[pool].rqa_count = 0; - zatm_dev->pool_info[pool].rqu_count = 0; - } - spin_unlock_irqrestore(&zatm_dev->lock, flags); - return copy_to_user( - &((struct zatm_pool_req __user *) arg)->info, - &info,sizeof(info)) ? -EFAULT : 0; - } - case ZATM_SETPOOL: - { - struct zatm_pool_info info; - int pool; - - if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (get_user(pool, - &((struct zatm_pool_req __user *) arg)->pool_num)) - return -EFAULT; - if (pool < 0 || pool > ZATM_LAST_POOL) - return -EINVAL; - pool = array_index_nospec(pool, - ZATM_LAST_POOL + 1); - if (copy_from_user(&info, - &((struct zatm_pool_req __user *) arg)->info, - sizeof(info))) return -EFAULT; - if (!info.low_water) - info.low_water = zatm_dev-> - pool_info[pool].low_water; - if (!info.high_water) - info.high_water = zatm_dev-> - pool_info[pool].high_water; - if (!info.next_thres) - info.next_thres = zatm_dev-> - pool_info[pool].next_thres; - if (info.low_water >= info.high_water || - info.low_water < 0) - return -EINVAL; - spin_lock_irqsave(&zatm_dev->lock, flags); - zatm_dev->pool_info[pool].low_water = - info.low_water; - zatm_dev->pool_info[pool].high_water = - info.high_water; - zatm_dev->pool_info[pool].next_thres = - info.next_thres; - spin_unlock_irqrestore(&zatm_dev->lock, flags); - return 0; - } - default: - if (!dev->phy->ioctl) return -ENOIOCTLCMD; - return dev->phy->ioctl(dev,cmd,arg); - } -} - -static int zatm_send(struct atm_vcc *vcc,struct sk_buff *skb) -{ - int error; - - EVENT(">zatm_send 0x%lx\n",(unsigned long) skb,0); - if (!ZATM_VCC(vcc)->tx_chan || !test_bit(ATM_VF_READY,&vcc->flags)) { - if (vcc->pop) vcc->pop(vcc,skb); - else dev_kfree_skb(skb); - return -EINVAL; - } - if (!skb) { - printk(KERN_CRIT "!skb in zatm_send ?\n"); - if (vcc->pop) vcc->pop(vcc,skb); - return -EINVAL; - } - ATM_SKB(skb)->vcc = vcc; - error = do_tx(skb); - if (error != RING_BUSY) return error; - skb_queue_tail(&ZATM_VCC(vcc)->backlog,skb); - return 0; -} - - -static void zatm_phy_put(struct atm_dev *dev,unsigned char value, - unsigned long addr) -{ - struct zatm_dev *zatm_dev; - - zatm_dev = ZATM_DEV(dev); - zwait(); - zout(value,CER); - zout(uPD98401_IND_ACC | uPD98401_IA_B0 | - (uPD98401_IA_TGT_PHY << uPD98401_IA_TGT_SHIFT) | addr,CMR); -} - - -static unsigned char zatm_phy_get(struct atm_dev *dev,unsigned long addr) -{ - struct zatm_dev *zatm_dev; - - zatm_dev = ZATM_DEV(dev); - zwait(); - zout(uPD98401_IND_ACC | uPD98401_IA_B0 | uPD98401_IA_RW | - (uPD98401_IA_TGT_PHY << uPD98401_IA_TGT_SHIFT) | addr,CMR); - zwait(); - return zin(CER) & 0xff; -} - - -static const struct atmdev_ops ops = { - .open = zatm_open, - .close = zatm_close, - .ioctl = zatm_ioctl, - .send = zatm_send, - .phy_put = zatm_phy_put, - .phy_get = zatm_phy_get, - .change_qos = zatm_change_qos, -}; - -static int zatm_init_one(struct pci_dev *pci_dev, - const struct pci_device_id *ent) -{ - struct atm_dev *dev; - struct zatm_dev *zatm_dev; - int ret = -ENOMEM; - - zatm_dev = kmalloc(sizeof(*zatm_dev), GFP_KERNEL); - if (!zatm_dev) { - printk(KERN_EMERG "%s: memory shortage\n", DEV_LABEL); - goto out; - } - - dev = atm_dev_register(DEV_LABEL, &pci_dev->dev, &ops, -1, NULL); - if (!dev) - goto out_free; - - ret = pci_enable_device(pci_dev); - if (ret < 0) - goto out_deregister; - - ret = pci_request_regions(pci_dev, DEV_LABEL); - if (ret < 0) - goto out_disable; - - ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); - if (ret < 0) - goto out_release; - - zatm_dev->pci_dev = pci_dev; - dev->dev_data = zatm_dev; - zatm_dev->copper = (int)ent->driver_data; - if ((ret = zatm_init(dev)) || (ret = zatm_start(dev))) - goto out_release; - - pci_set_drvdata(pci_dev, dev); - zatm_dev->more = zatm_boards; - zatm_boards = dev; - ret = 0; -out: - return ret; - -out_release: - pci_release_regions(pci_dev); -out_disable: - pci_disable_device(pci_dev); -out_deregister: - atm_dev_deregister(dev); -out_free: - kfree(zatm_dev); - goto out; -} - - -MODULE_LICENSE("GPL"); - -static const struct pci_device_id zatm_pci_tbl[] = { - { PCI_VDEVICE(ZEITNET, PCI_DEVICE_ID_ZEITNET_1221), ZATM_COPPER }, - { PCI_VDEVICE(ZEITNET, PCI_DEVICE_ID_ZEITNET_1225), 0 }, - { 0, } -}; -MODULE_DEVICE_TABLE(pci, zatm_pci_tbl); - -static struct pci_driver zatm_driver = { - .name = DEV_LABEL, - .id_table = zatm_pci_tbl, - .probe = zatm_init_one, -}; - -static int __init zatm_init_module(void) -{ - return pci_register_driver(&zatm_driver); -} - -module_init(zatm_init_module); -/* module_exit not defined so not unloadable */ diff --git a/drivers/atm/zatm.h b/drivers/atm/zatm.h deleted file mode 100644 index 8204369fe825..000000000000 --- a/drivers/atm/zatm.h +++ /dev/null @@ -1,104 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* drivers/atm/zatm.h - ZeitNet ZN122x device driver declarations */ - -/* Written 1995-1998 by Werner Almesberger, EPFL LRC/ICA */ - - -#ifndef DRIVER_ATM_ZATM_H -#define DRIVER_ATM_ZATM_H - -#include -#include -#include -#include -#include - - -#define DEV_LABEL "zatm" - -#define MAX_AAL5_PDU 10240 /* allocate for AAL5 PDUs of this size */ -#define MAX_RX_SIZE_LD 14 /* ceil(log2((MAX_AAL5_PDU+47)/48)) */ - -#define LOW_MARK 12 /* start adding new buffers if less than 12 */ -#define HIGH_MARK 30 /* stop adding buffers after reaching 30 */ -#define OFF_CNG_THRES 5 /* threshold for offset changes */ - -#define RX_SIZE 2 /* RX lookup entry size (in bytes) */ -#define NR_POOLS 32 /* number of free buffer pointers */ -#define POOL_SIZE 8 /* buffer entry size (in bytes) */ -#define NR_SHAPERS 16 /* number of shapers */ -#define SHAPER_SIZE 4 /* shaper entry size (in bytes) */ -#define VC_SIZE 32 /* VC dsc (TX or RX) size (in bytes) */ - -#define RING_ENTRIES 32 /* ring entries (without back pointer) */ -#define RING_WORDS 4 /* ring element size */ -#define RING_SIZE (sizeof(unsigned long)*(RING_ENTRIES+1)*RING_WORDS) - -#define NR_MBX 4 /* four mailboxes */ -#define MBX_RX_0 0 /* mailbox indices */ -#define MBX_RX_1 1 -#define MBX_TX_0 2 -#define MBX_TX_1 3 - -struct zatm_vcc { - /*-------------------------------- RX part */ - int rx_chan; /* RX channel, 0 if none */ - int pool; /* free buffer pool */ - /*-------------------------------- TX part */ - int tx_chan; /* TX channel, 0 if none */ - int shaper; /* shaper, <0 if none */ - struct sk_buff_head tx_queue; /* list of buffers in transit */ - wait_queue_head_t tx_wait; /* for close */ - u32 *ring; /* transmit ring */ - int ring_curr; /* current write position */ - int txing; /* number of transmits in progress */ - struct sk_buff_head backlog; /* list of buffers waiting for ring */ -}; - -struct zatm_dev { - /*-------------------------------- TX part */ - int tx_bw; /* remaining bandwidth */ - u32 free_shapers; /* bit set */ - int ubr; /* UBR shaper; -1 if none */ - int ubr_ref_cnt; /* number of VCs using UBR shaper */ - /*-------------------------------- RX part */ - int pool_ref[NR_POOLS]; /* free buffer pool usage counters */ - volatile struct sk_buff *last_free[NR_POOLS]; - /* last entry in respective pool */ - struct sk_buff_head pool[NR_POOLS];/* free buffer pools */ - struct zatm_pool_info pool_info[NR_POOLS]; /* pool information */ - /*-------------------------------- maps */ - struct atm_vcc **tx_map; /* TX VCCs */ - struct atm_vcc **rx_map; /* RX VCCs */ - int chans; /* map size, must be 2^n */ - /*-------------------------------- mailboxes */ - unsigned long mbx_start[NR_MBX];/* start addresses */ - dma_addr_t mbx_dma[NR_MBX]; - u16 mbx_end[NR_MBX]; /* end offset (in bytes) */ - /*-------------------------------- other pointers */ - u32 pool_base; /* Free buffer pool dsc (word addr) */ - /*-------------------------------- ZATM links */ - struct atm_dev *more; /* other ZATM devices */ - /*-------------------------------- general information */ - int mem; /* RAM on board (in bytes) */ - int khz; /* timer clock */ - int copper; /* PHY type */ - unsigned char irq; /* IRQ */ - unsigned int base; /* IO base address */ - struct pci_dev *pci_dev; /* PCI stuff */ - spinlock_t lock; -}; - - -#define ZATM_DEV(d) ((struct zatm_dev *) (d)->dev_data) -#define ZATM_VCC(d) ((struct zatm_vcc *) (d)->dev_data) - - -struct zatm_skb_prv { - struct atm_skb_data _; /* reserved */ - u32 *dsc; /* pointer to skb's descriptor */ -}; - -#define ZATM_PRV_DSC(skb) (((struct zatm_skb_prv *) (skb)->cb)->dsc) - -#endif diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h deleted file mode 100644 index 5135027b93c1..000000000000 --- a/include/uapi/linux/atm_zatm.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* atm_zatm.h - Driver-specific declarations of the ZATM driver (for use by - driver-specific utilities) */ - -/* Written 1995-1999 by Werner Almesberger, EPFL LRC/ICA */ - - -#ifndef LINUX_ATM_ZATM_H -#define LINUX_ATM_ZATM_H - -/* - * Note: non-kernel programs including this file must also include - * sys/types.h for struct timeval - */ - -#include -#include - -#define ZATM_GETPOOL _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc) - /* get pool statistics */ -#define ZATM_GETPOOLZ _IOW('a',ATMIOC_SARPRV+2,struct atmif_sioc) - /* get statistics and zero */ -#define ZATM_SETPOOL _IOW('a',ATMIOC_SARPRV+3,struct atmif_sioc) - /* set pool parameters */ - -struct zatm_pool_info { - int ref_count; /* free buffer pool usage counters */ - int low_water,high_water; /* refill parameters */ - int rqa_count,rqu_count; /* queue condition counters */ - int offset,next_off; /* alignment optimizations: offset */ - int next_cnt,next_thres; /* repetition counter and threshold */ -}; - -struct zatm_pool_req { - int pool_num; /* pool number */ - struct zatm_pool_info info; /* actual information */ -}; - -#define ZATM_OAM_POOL 0 /* free buffer pool for OAM cells */ -#define ZATM_AAL0_POOL 1 /* free buffer pool for AAL0 cells */ -#define ZATM_AAL5_POOL_BASE 2 /* first AAL5 free buffer pool */ -#define ZATM_LAST_POOL ZATM_AAL5_POOL_BASE+10 /* max. 64 kB */ - -#define ZATM_TIMER_HISTORY_SIZE 16 /* number of timer adjustments to - record; must be 2^n */ - -#endif -- cgit v1.2.3 From 6fd1d51cfa253b5ee7dae18d7cf1df830e9b6137 Mon Sep 17 00:00:00 2001 From: Erin MacNeil Date: Wed, 27 Apr 2022 16:02:37 -0400 Subject: net: SO_RCVMARK socket option for SO_MARK with recvmsg() Adding a new socket option, SO_RCVMARK, to indicate that SO_MARK should be included in the ancillary data returned by recvmsg(). Renamed the sock_recv_ts_and_drops() function to sock_recv_cmsgs(). Signed-off-by: Erin MacNeil Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Acked-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20220427200259.2564-1-lnx.erin@gmail.com Signed-off-by: Jakub Kicinski --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 1 + include/net/sock.h | 18 ++++++++++-------- include/uapi/asm-generic/socket.h | 2 ++ net/atm/common.c | 2 +- net/bluetooth/af_bluetooth.c | 4 ++-- net/can/bcm.c | 2 +- net/can/j1939/socket.c | 2 +- net/can/raw.c | 2 +- net/core/sock.c | 7 +++++++ net/ieee802154/socket.c | 4 ++-- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- net/key/af_key.c | 2 +- net/mctp/af_mctp.c | 2 +- net/packet/af_packet.c | 2 +- net/sctp/socket.c | 2 +- net/socket.c | 15 ++++++++++++--- tools/include/uapi/asm-generic/socket.h | 2 ++ 23 files changed, 56 insertions(+), 27 deletions(-) (limited to 'include/uapi') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 7d81535893af..739891b94136 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -135,6 +135,8 @@ #define SO_TXREHASH 74 +#define SO_RCVMARK 75 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 1d55e57b8466..18f3d95ecfec 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -146,6 +146,8 @@ #define SO_TXREHASH 74 +#define SO_RCVMARK 75 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 654061e0964e..f486d3dfb6bb 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -127,6 +127,8 @@ #define SO_TXREHASH 0x4048 +#define SO_RCVMARK 0x4049 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 666f81e617ea..2fda57a3ea86 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -128,6 +128,7 @@ #define SO_TXREHASH 0x0053 +#define SO_RCVMARK 0x0054 #if !defined(__KERNEL__) diff --git a/include/net/sock.h b/include/net/sock.h index f9f8ecae0f8d..663041b92c21 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -893,6 +893,7 @@ enum sock_flags { SOCK_TXTIME, SOCK_XDP, /* XDP is attached */ SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ + SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) @@ -2647,20 +2648,21 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) __sock_recv_wifi_status(msg, sk, skb); } -void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb); +void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb); #define SK_DEFAULT_STAMP (-1L * NSEC_PER_SEC) -static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb) +static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) { -#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \ - (1UL << SOCK_RCVTSTAMP)) +#define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \ + (1UL << SOCK_RCVTSTAMP) | \ + (1UL << SOCK_RCVMARK)) #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) - if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY) - __sock_recv_ts_and_drops(msg, sk, skb); + if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY) + __sock_recv_cmsgs(msg, sk, skb); else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP))) sock_write_timestamp(sk, skb->tstamp); else if (unlikely(sk->sk_stamp == SK_DEFAULT_STAMP)) diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 467ca2f28760..638230899e98 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -130,6 +130,8 @@ #define SO_TXREHASH 74 +#define SO_RCVMARK 75 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/net/atm/common.c b/net/atm/common.c index d0c8ab7ff8f6..f7019df41c3e 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -553,7 +553,7 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, error = skb_copy_datagram_msg(skb, 0, msg, copied); if (error) return error; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (!(flags & MSG_PEEK)) { pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc), diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 62705734343b..b506409bb498 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -280,7 +280,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_reset_transport_header(skb); err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err == 0) { - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name && bt_sk(sk)->skb_msg_name) bt_sk(sk)->skb_msg_name(skb, msg->msg_name, @@ -384,7 +384,7 @@ int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, copied += chunk; size -= chunk; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (!(flags & MSG_PEEK)) { int skb_len = skb_headlen(skb); diff --git a/net/can/bcm.c b/net/can/bcm.c index 64c07e650bb4..65ee1b784a30 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1647,7 +1647,7 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, return err; } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(BCM_MIN_NAMELEN); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 0bb4fd3f6264..f5ecfdcf57b2 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -841,7 +841,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg, paddr->can_addr.j1939.pgn = skcb->addr.pgn; } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); msg->msg_flags |= skcb->msg_flags; skb_free_datagram(sk, skb); diff --git a/net/can/raw.c b/net/can/raw.c index 0cf728dcff36..b7dbb57557f3 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -866,7 +866,7 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, return err; } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(RAW_MIN_NAMELEN); diff --git a/net/core/sock.c b/net/core/sock.c index a0f3989de3d6..770408fd935f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1311,6 +1311,9 @@ set_sndbuf: __sock_set_mark(sk, val); break; + case SO_RCVMARK: + sock_valbool_flag(sk, SOCK_RCVMARK, valbool); + break; case SO_RXQ_OVFL: sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); @@ -1737,6 +1740,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_mark; break; + case SO_RCVMARK: + v.val = sock_flag(sk, SOCK_RCVMARK); + break; + case SO_RXQ_OVFL: v.val = sock_flag(sk, SOCK_RXQ_OVFL); break; diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index f24852814fa3..718fb77bb372 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -328,7 +328,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (err) goto done; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (flags & MSG_TRUNC) copied = skb->len; @@ -718,7 +718,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (err) goto done; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (saddr) { /* Clear the implicit padding in struct sockaddr_ieee802154 diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4056b0da85ea..bbd717805b10 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -783,7 +783,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (err) goto done; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (sin) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index aa8545ca6964..9d5071c79c95 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1909,7 +1909,7 @@ try_again: UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (sin) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0d7c13d33d1a..3b7cbd522b54 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -512,7 +512,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, *addr_len = sizeof(*sin6); } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (np->rxopt.all) ip6_datagram_recv_ctl(sk, msg, skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 688af6f809fe..3fc97d4621ac 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -391,7 +391,7 @@ try_again: if (!peeking) SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS); - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); /* Copy the address. */ if (msg->msg_name) { diff --git a/net/key/af_key.c b/net/key/af_key.c index d09ec26b1081..175a162eec58 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3711,7 +3711,7 @@ static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (err) goto out_free; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); err = (flags & MSG_TRUNC) ? skb->len : copied; diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 221863afc4b1..c2fc2a7b2528 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -238,7 +238,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (rc < 0) goto out_free; - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (addr) { struct mctp_skb_cb *cb = mctp_cb(skb); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index fd31334cf688..677f9cfa9660 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3477,7 +3477,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sll->sll_protocol = skb->protocol; } - sock_recv_ts_and_drops(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { const size_t max_len = min(sizeof(skb->cb), diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3e3fe923bed5..6d37d2dfb3da 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2128,7 +2128,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, head_skb = event->chunk->head_skb; else head_skb = skb; - sock_recv_ts_and_drops(msg, sk, head_skb); + sock_recv_cmsgs(msg, sk, head_skb); if (sctp_ulpevent_is_notification(event)) { msg->msg_flags |= MSG_NOTIFICATION; sp->pf->event_msgname(event, msg->msg_name, addr_len); diff --git a/net/socket.c b/net/socket.c index 6887840682bb..f0c39c874665 100644 --- a/net/socket.c +++ b/net/socket.c @@ -930,13 +930,22 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount); } -void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, - struct sk_buff *skb) +static void sock_recv_mark(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + if (sock_flag(sk, SOCK_RCVMARK) && skb) + put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), + &skb->mark); +} + +void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) { sock_recv_timestamp(msg, sk, skb); sock_recv_drops(msg, sk, skb); + sock_recv_mark(msg, sk, skb); } -EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); +EXPORT_SYMBOL_GPL(__sock_recv_cmsgs); INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *, size_t, int)); diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index 77f7c1638eb1..8756df13be50 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -119,6 +119,8 @@ #define SO_DETACH_REUSEPORT_BPF 68 +#define SO_RCVMARK 75 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) -- cgit v1.2.3 From 3254e0b9eb5649ffaa48717ebc9c593adc4ee6a9 Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Fri, 29 Apr 2022 18:34:31 +0300 Subject: ethtool: Add 10base-T1L link mode entry Add entry for the 10base-T1L full duplex mode. Reviewed-by: Andrew Lunn Reviewed-by: Oleksij Rempel Signed-off-by: Alexandru Tachici Signed-off-by: David S. Miller --- drivers/net/phy/phy-core.c | 3 ++- drivers/net/phy/phy_device.c | 3 ++- drivers/net/phy/phylink.c | 4 +++- include/linux/phy.h | 2 +- include/uapi/linux/ethtool.h | 1 + net/ethtool/common.c | 3 +++ 6 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 2001f3329133..1f2531a1a876 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -13,7 +13,7 @@ */ const char *phy_speed_to_str(int speed) { - BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 92, + BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 93, "Enum ethtool_link_mode_bit_indices and phylib are out of sync. " "If a speed or mode has been added please update phy_speed_to_str " "and the PHY settings array.\n"); @@ -176,6 +176,7 @@ static const struct phy_setting settings[] = { /* 10M */ PHY_SETTING( 10, FULL, 10baseT_Full ), PHY_SETTING( 10, HALF, 10baseT_Half ), + PHY_SETTING( 10, FULL, 10baseT1L_Full ), }; #undef PHY_SETTING diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index f867042b2eb4..1369daeded14 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -90,8 +90,9 @@ const int phy_10_100_features_array[4] = { }; EXPORT_SYMBOL_GPL(phy_10_100_features_array); -const int phy_basic_t1_features_array[2] = { +const int phy_basic_t1_features_array[3] = { ETHTOOL_LINK_MODE_TP_BIT, + ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, ETHTOOL_LINK_MODE_100baseT1_Full_BIT, }; EXPORT_SYMBOL_GPL(phy_basic_t1_features_array); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 33c285252584..d707604d1d5a 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -168,8 +168,10 @@ static void phylink_caps_to_linkmodes(unsigned long *linkmodes, if (caps & MAC_10HD) __set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, linkmodes); - if (caps & MAC_10FD) + if (caps & MAC_10FD) { __set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, linkmodes); + __set_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, linkmodes); + } if (caps & MAC_100HD) { __set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, linkmodes); diff --git a/include/linux/phy.h b/include/linux/phy.h index 36ca2b5c2253..b12af9e2f389 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -65,7 +65,7 @@ extern const int phy_basic_ports_array[3]; extern const int phy_fibre_port_array[1]; extern const int phy_all_ports_features_array[7]; extern const int phy_10_100_features_array[4]; -extern const int phy_basic_t1_features_array[2]; +extern const int phy_basic_t1_features_array[3]; extern const int phy_gbit_features_array[2]; extern const int phy_10gbit_features_array[1]; diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 7bc4b8def12c..e0f0ee9bc89e 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1691,6 +1691,7 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT = 89, ETHTOOL_LINK_MODE_100baseFX_Half_BIT = 90, ETHTOOL_LINK_MODE_100baseFX_Full_BIT = 91, + ETHTOOL_LINK_MODE_10baseT1L_Full_BIT = 92, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS }; diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 0c5210015911..566adf85e658 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -201,6 +201,7 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(400000, CR4, Full), __DEFINE_LINK_MODE_NAME(100, FX, Half), __DEFINE_LINK_MODE_NAME(100, FX, Full), + __DEFINE_LINK_MODE_NAME(10, T1L, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -236,6 +237,7 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_T1 1 #define __LINK_MODE_LANES_X 1 #define __LINK_MODE_LANES_FX 1 +#define __LINK_MODE_LANES_T1L 1 #define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ @@ -349,6 +351,7 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(400000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(100, FX, Half), __DEFINE_LINK_MODE_PARAMS(100, FX, Full), + __DEFINE_LINK_MODE_PARAMS(10, T1L, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); -- cgit v1.2.3 From 909b4f2bf764a903e9183111368f1509f9b40e6d Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Fri, 29 Apr 2022 18:34:32 +0300 Subject: net: phy: Add 10-BaseT1L registers The 802.3gc specification defines the 10-BaseT1L link mode for ethernet trafic on twisted wire pair. PMA status register can be used to detect if the phy supports 2.4 V TX level and PCS control register can be used to enable/disable PCS level loopback. Reviewed-by: Andrew Lunn Signed-off-by: Alexandru Tachici Signed-off-by: David S. Miller --- include/uapi/linux/mdio.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index c54e6eae5366..0b2eba36dd7c 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -67,6 +67,9 @@ #define MDIO_PCS_10GBRT_STAT2 33 /* 10GBASE-R/-T PCS status 2 */ #define MDIO_AN_10GBT_CTRL 32 /* 10GBASE-T auto-negotiation control */ #define MDIO_AN_10GBT_STAT 33 /* 10GBASE-T auto-negotiation status */ +#define MDIO_B10L_PMA_CTRL 2294 /* 10BASE-T1L PMA control */ +#define MDIO_PMA_10T1L_STAT 2295 /* 10BASE-T1L PMA status */ +#define MDIO_PCS_10T1L_CTRL 2278 /* 10BASE-T1L PCS control */ /* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */ #define MDIO_PMA_LASI_RXCTRL 0x9000 /* RX_ALARM control */ @@ -268,6 +271,28 @@ #define MDIO_AN_10GBT_STAT_MS 0x4000 /* Master/slave config */ #define MDIO_AN_10GBT_STAT_MSFLT 0x8000 /* Master/slave config fault */ +/* 10BASE-T1L PMA control */ +#define MDIO_PMA_10T1L_CTRL_LB_EN 0x0001 /* Enable loopback mode */ +#define MDIO_PMA_10T1L_CTRL_EEE_EN 0x0400 /* Enable EEE mode */ +#define MDIO_PMA_10T1L_CTRL_LOW_POWER 0x0800 /* Low-power mode */ +#define MDIO_PMA_10T1L_CTRL_2V4_EN 0x1000 /* Enable 2.4 Vpp operating mode */ +#define MDIO_PMA_10T1L_CTRL_TX_DIS 0x4000 /* Transmit disable */ +#define MDIO_PMA_10T1L_CTRL_PMA_RST 0x8000 /* MA reset */ + +/* 10BASE-T1L PMA status register. */ +#define MDIO_PMA_10T1L_STAT_LINK 0x0001 /* PMA receive link up */ +#define MDIO_PMA_10T1L_STAT_FAULT 0x0002 /* Fault condition detected */ +#define MDIO_PMA_10T1L_STAT_POLARITY 0x0004 /* Receive polarity is reversed */ +#define MDIO_PMA_10T1L_STAT_RECV_FAULT 0x0200 /* Able to detect fault on receive path */ +#define MDIO_PMA_10T1L_STAT_EEE 0x0400 /* PHY has EEE ability */ +#define MDIO_PMA_10T1L_STAT_LOW_POWER 0x0800 /* PMA has low-power ability */ +#define MDIO_PMA_10T1L_STAT_2V4_ABLE 0x1000 /* PHY has 2.4 Vpp operating mode ability */ +#define MDIO_PMA_10T1L_STAT_LB_ABLE 0x2000 /* PHY has loopback ability */ + +/* 10BASE-T1L PCS control register. */ +#define MDIO_PCS_10T1L_CTRL_LB 0x4000 /* Enable PCS level loopback mode */ +#define MDIO_PCS_10T1L_CTRL_RESET 0x8000 /* PCS reset */ + /* EEE Supported/Advertisement/LP Advertisement registers. * * EEE capability Register (3.20), Advertisement (7.60) and -- cgit v1.2.3 From 1b020e448e0fb67bcb04ee0f778d413045f965d3 Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Fri, 29 Apr 2022 18:34:33 +0300 Subject: net: phy: Add BaseT1 auto-negotiation registers Added BASE-T1 AN advertisement register (Registers 7.514, 7.515, and 7.516) and BASE-T1 AN LP Base Page ability register (Registers 7.517, 7.518, and 7.519). Reviewed-by: Andrew Lunn Signed-off-by: Alexandru Tachici Signed-off-by: David S. Miller --- include/uapi/linux/mdio.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 0b2eba36dd7c..fa3515257f54 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -70,6 +70,14 @@ #define MDIO_B10L_PMA_CTRL 2294 /* 10BASE-T1L PMA control */ #define MDIO_PMA_10T1L_STAT 2295 /* 10BASE-T1L PMA status */ #define MDIO_PCS_10T1L_CTRL 2278 /* 10BASE-T1L PCS control */ +#define MDIO_AN_T1_CTRL 512 /* BASE-T1 AN control */ +#define MDIO_AN_T1_STAT 513 /* BASE-T1 AN status */ +#define MDIO_AN_T1_ADV_L 514 /* BASE-T1 AN advertisement register [15:0] */ +#define MDIO_AN_T1_ADV_M 515 /* BASE-T1 AN advertisement register [31:16] */ +#define MDIO_AN_T1_ADV_H 516 /* BASE-T1 AN advertisement register [47:32] */ +#define MDIO_AN_T1_LP_L 517 /* BASE-T1 AN LP Base Page ability register [15:0] */ +#define MDIO_AN_T1_LP_M 518 /* BASE-T1 AN LP Base Page ability register [31:16] */ +#define MDIO_AN_T1_LP_H 519 /* BASE-T1 AN LP Base Page ability register [47:32] */ /* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */ #define MDIO_PMA_LASI_RXCTRL 0x9000 /* RX_ALARM control */ @@ -293,6 +301,38 @@ #define MDIO_PCS_10T1L_CTRL_LB 0x4000 /* Enable PCS level loopback mode */ #define MDIO_PCS_10T1L_CTRL_RESET 0x8000 /* PCS reset */ +/* BASE-T1 auto-negotiation advertisement register [15:0] */ +#define MDIO_AN_T1_ADV_L_PAUSE_CAP ADVERTISE_PAUSE_CAP +#define MDIO_AN_T1_ADV_L_PAUSE_ASYM ADVERTISE_PAUSE_ASYM +#define MDIO_AN_T1_ADV_L_FORCE_MS 0x1000 /* Force Master/slave Configuration */ +#define MDIO_AN_T1_ADV_L_REMOTE_FAULT ADVERTISE_RFAULT +#define MDIO_AN_T1_ADV_L_ACK ADVERTISE_LPACK +#define MDIO_AN_T1_ADV_L_NEXT_PAGE_REQ ADVERTISE_NPAGE + +/* BASE-T1 auto-negotiation advertisement register [31:16] */ +#define MDIO_AN_T1_ADV_M_B10L 0x4000 /* device is compatible with 10BASE-T1L */ +#define MDIO_AN_T1_ADV_M_MST 0x0010 /* advertise master preference */ + +/* BASE-T1 auto-negotiation advertisement register [47:32] */ +#define MDIO_AN_T1_ADV_H_10L_TX_HI_REQ 0x1000 /* 10BASE-T1L High Level Transmit Request */ +#define MDIO_AN_T1_ADV_H_10L_TX_HI 0x2000 /* 10BASE-T1L High Level Transmit Ability */ + +/* BASE-T1 AN LP Base Page ability register [15:0] */ +#define MDIO_AN_T1_LP_L_PAUSE_CAP LPA_PAUSE_CAP +#define MDIO_AN_T1_LP_L_PAUSE_ASYM LPA_PAUSE_ASYM +#define MDIO_AN_T1_LP_L_FORCE_MS 0x1000 /* LP Force Master/slave Configuration */ +#define MDIO_AN_T1_LP_L_REMOTE_FAULT LPA_RFAULT +#define MDIO_AN_T1_LP_L_ACK LPA_LPACK +#define MDIO_AN_T1_LP_L_NEXT_PAGE_REQ LPA_NPAGE + +/* BASE-T1 AN LP Base Page ability register [31:16] */ +#define MDIO_AN_T1_LP_M_MST 0x0010 /* LP master preference */ +#define MDIO_AN_T1_LP_M_B10L 0x4000 /* LP is compatible with 10BASE-T1L */ + +/* BASE-T1 AN LP Base Page ability register [47:32] */ +#define MDIO_AN_T1_LP_H_10L_TX_HI_REQ 0x1000 /* 10BASE-T1L High Level LP Transmit Request */ +#define MDIO_AN_T1_LP_H_10L_TX_HI 0x2000 /* 10BASE-T1L High Level LP Transmit Ability */ + /* EEE Supported/Advertisement/LP Advertisement registers. * * EEE capability Register (3.20), Advertisement (7.60) and -- cgit v1.2.3 From 3da8ffd8545f62fec85a48a3c637b2f427974f11 Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Fri, 29 Apr 2022 18:34:34 +0300 Subject: net: phy: Add 10BASE-T1L support in phy-c45 This patch is needed because the BASE-T1 uses different registers for status, control and advertisement to those already employed in the existing phy-c45 functions. Where required, genphy_c45 functions will now check whether the device supports BASE-T1 and use the specific registers instead: 45.2.7.19 BASE-T1 AN control register, 45.2.7.20 BASE-T1 AN status, 45.2.7.21 BASE-T1 AN advertisement register, 45.2.7.22 BASE-T1 AN LP Base Page ability register, 45.2.1.185 BASE-T1 PMA/PMD control register. Tested-by: Oleksij Rempel Signed-off-by: Alexandru Tachici Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 257 ++++++++++++++++++++++++++++++++++++++++++- drivers/net/phy/phy_device.c | 1 + include/linux/mdio.h | 70 ++++++++++++ include/linux/phy.h | 3 + include/uapi/linux/mdio.h | 10 ++ 5 files changed, 336 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index db709d30bf84..eefdd67d5556 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -8,6 +8,25 @@ #include #include +/** + * genphy_c45_baset1_able - checks if the PMA has BASE-T1 extended abilities + * @phydev: target phy_device struct + */ +static bool genphy_c45_baset1_able(struct phy_device *phydev) +{ + int val; + + if (phydev->pma_extable == -ENODATA) { + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_EXTABLE); + if (val < 0) + return false; + + phydev->pma_extable = val; + } + + return !!(phydev->pma_extable & MDIO_PMA_EXTABLE_BT1); +} + /** * genphy_c45_pma_can_sleep - checks if the PMA have sleep support * @phydev: target phy_device struct @@ -80,7 +99,10 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev) switch (phydev->speed) { case SPEED_10: - ctrl2 |= MDIO_PMA_CTRL2_10BT; + if (genphy_c45_baset1_able(phydev)) + ctrl2 |= MDIO_PMA_CTRL2_BASET1; + else + ctrl2 |= MDIO_PMA_CTRL2_10BT; break; case SPEED_100: ctrl1 |= MDIO_PMA_CTRL1_SPEED100; @@ -118,10 +140,95 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev) if (ret < 0) return ret; + if (genphy_c45_baset1_able(phydev)) { + int ctl = 0; + + switch (phydev->master_slave_set) { + case MASTER_SLAVE_CFG_MASTER_PREFERRED: + case MASTER_SLAVE_CFG_MASTER_FORCE: + ctl = MDIO_PMA_PMD_BT1_CTRL_CFG_MST; + break; + case MASTER_SLAVE_CFG_SLAVE_FORCE: + case MASTER_SLAVE_CFG_SLAVE_PREFERRED: + case MASTER_SLAVE_CFG_UNKNOWN: + case MASTER_SLAVE_CFG_UNSUPPORTED: + break; + default: + phydev_warn(phydev, "Unsupported Master/Slave mode\n"); + return -EOPNOTSUPP; + } + + ret = phy_modify_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_PMD_BT1_CTRL, + MDIO_PMA_PMD_BT1_CTRL_CFG_MST, ctl); + if (ret < 0) + return ret; + } + return genphy_c45_an_disable_aneg(phydev); } EXPORT_SYMBOL_GPL(genphy_c45_pma_setup_forced); +/* Sets master/slave preference and supported technologies. + * The preference is set in the BIT(4) of BASE-T1 AN + * advertisement register 7.515 and whether the status + * is forced or not, it is set in the BIT(12) of BASE-T1 + * AN advertisement register 7.514. + * Sets 10BASE-T1L Ability BIT(14) in BASE-T1 autonegotiation + * advertisement register [31:16] if supported. + */ +static int genphy_c45_baset1_an_config_aneg(struct phy_device *phydev) +{ + int changed = 0; + u16 adv_l = 0; + u16 adv_m = 0; + int ret; + + switch (phydev->master_slave_set) { + case MASTER_SLAVE_CFG_MASTER_FORCE: + case MASTER_SLAVE_CFG_SLAVE_FORCE: + adv_l |= MDIO_AN_T1_ADV_L_FORCE_MS; + break; + case MASTER_SLAVE_CFG_MASTER_PREFERRED: + case MASTER_SLAVE_CFG_SLAVE_PREFERRED: + break; + default: + break; + } + + switch (phydev->master_slave_set) { + case MASTER_SLAVE_CFG_MASTER_FORCE: + case MASTER_SLAVE_CFG_MASTER_PREFERRED: + adv_m |= MDIO_AN_T1_ADV_M_MST; + break; + case MASTER_SLAVE_CFG_SLAVE_FORCE: + case MASTER_SLAVE_CFG_SLAVE_PREFERRED: + break; + default: + break; + } + + adv_l |= linkmode_adv_to_mii_t1_adv_l_t(phydev->advertising); + + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_T1_ADV_L, + (MDIO_AN_T1_ADV_L_FORCE_MS | MDIO_AN_T1_ADV_L_PAUSE_CAP + | MDIO_AN_T1_ADV_L_PAUSE_ASYM), adv_l); + if (ret < 0) + return ret; + if (ret > 0) + changed = 1; + + adv_m |= linkmode_adv_to_mii_t1_adv_m_t(phydev->advertising); + + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_T1_ADV_M, + MDIO_AN_T1_ADV_M_MST | MDIO_AN_T1_ADV_M_B10L, adv_m); + if (ret < 0) + return ret; + if (ret > 0) + changed = 1; + + return changed; +} + /** * genphy_c45_an_config_aneg - configure advertisement registers * @phydev: target phy_device struct @@ -141,6 +248,9 @@ int genphy_c45_an_config_aneg(struct phy_device *phydev) changed = genphy_config_eee_advert(phydev); + if (genphy_c45_baset1_able(phydev)) + return genphy_c45_baset1_an_config_aneg(phydev); + adv = linkmode_adv_to_mii_adv_t(phydev->advertising); ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, @@ -178,8 +288,12 @@ EXPORT_SYMBOL_GPL(genphy_c45_an_config_aneg); */ int genphy_c45_an_disable_aneg(struct phy_device *phydev) { + u16 reg = MDIO_CTRL1; - return phy_clear_bits_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1, + if (genphy_c45_baset1_able(phydev)) + reg = MDIO_AN_T1_CTRL; + + return phy_clear_bits_mmd(phydev, MDIO_MMD_AN, reg, MDIO_AN_CTRL1_ENABLE | MDIO_AN_CTRL1_RESTART); } EXPORT_SYMBOL_GPL(genphy_c45_an_disable_aneg); @@ -194,7 +308,12 @@ EXPORT_SYMBOL_GPL(genphy_c45_an_disable_aneg); */ int genphy_c45_restart_aneg(struct phy_device *phydev) { - return phy_set_bits_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1, + u16 reg = MDIO_CTRL1; + + if (genphy_c45_baset1_able(phydev)) + reg = MDIO_AN_T1_CTRL; + + return phy_set_bits_mmd(phydev, MDIO_MMD_AN, reg, MDIO_AN_CTRL1_ENABLE | MDIO_AN_CTRL1_RESTART); } EXPORT_SYMBOL_GPL(genphy_c45_restart_aneg); @@ -210,11 +329,15 @@ EXPORT_SYMBOL_GPL(genphy_c45_restart_aneg); */ int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart) { + u16 reg = MDIO_CTRL1; int ret; + if (genphy_c45_baset1_able(phydev)) + reg = MDIO_AN_T1_CTRL; + if (!restart) { /* Configure and restart aneg if it wasn't set before */ - ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1); + ret = phy_read_mmd(phydev, MDIO_MMD_AN, reg); if (ret < 0) return ret; @@ -242,7 +365,13 @@ EXPORT_SYMBOL_GPL(genphy_c45_check_and_restart_aneg); */ int genphy_c45_aneg_done(struct phy_device *phydev) { - int val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1); + int reg = MDIO_STAT1; + int val; + + if (genphy_c45_baset1_able(phydev)) + reg = MDIO_AN_T1_STAT; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, reg); return val < 0 ? val : val & MDIO_AN_STAT1_COMPLETE ? 1 : 0; } @@ -307,6 +436,49 @@ int genphy_c45_read_link(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(genphy_c45_read_link); +/* Read the Clause 45 defined BASE-T1 AN (7.513) status register to check + * if autoneg is complete. If so read the BASE-T1 Autonegotiation + * Advertisement registers filling in the link partner advertisement, + * pause and asym_pause members in phydev. + */ +static int genphy_c45_baset1_read_lpa(struct phy_device *phydev) +{ + int val; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_T1_STAT); + if (val < 0) + return val; + + if (!(val & MDIO_AN_STAT1_COMPLETE)) { + linkmode_clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->lp_advertising); + mii_t1_adv_l_mod_linkmode_t(phydev->lp_advertising, 0); + mii_t1_adv_m_mod_linkmode_t(phydev->lp_advertising, 0); + + phydev->pause = 0; + phydev->asym_pause = 0; + + return 0; + } + + linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->lp_advertising, 1); + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_T1_LP_L); + if (val < 0) + return val; + + mii_t1_adv_l_mod_linkmode_t(phydev->lp_advertising, val); + phydev->pause = val & MDIO_AN_T1_ADV_L_PAUSE_CAP ? 1 : 0; + phydev->asym_pause = val & MDIO_AN_T1_ADV_L_PAUSE_ASYM ? 1 : 0; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_T1_LP_M); + if (val < 0) + return val; + + mii_t1_adv_m_mod_linkmode_t(phydev->lp_advertising, val); + + return 0; +} + /** * genphy_c45_read_lpa - read the link partner advertisement and pause * @phydev: target phy_device struct @@ -321,6 +493,9 @@ int genphy_c45_read_lpa(struct phy_device *phydev) { int val; + if (genphy_c45_baset1_able(phydev)) + return genphy_c45_baset1_read_lpa(phydev); + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1); if (val < 0) return val; @@ -399,6 +574,17 @@ int genphy_c45_read_pma(struct phy_device *phydev) phydev->duplex = DUPLEX_FULL; + if (genphy_c45_baset1_able(phydev)) { + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_PMD_BT1_CTRL); + if (val < 0) + return val; + + if (MDIO_PMA_PMD_BT1_CTRL_CFG_MST) + phydev->master_slave_state = MASTER_SLAVE_STATE_MASTER; + else + phydev->master_slave_state = MASTER_SLAVE_STATE_SLAVE; + } + return 0; } EXPORT_SYMBOL_GPL(genphy_c45_read_pma); @@ -530,12 +716,67 @@ int genphy_c45_pma_read_abilities(struct phy_device *phydev) phydev->supported, val & MDIO_PMA_NG_EXTABLE_5GBT); } + + if (val & MDIO_PMA_EXTABLE_BT1) { + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_PMD_BT1); + if (val < 0) + return val; + + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, + phydev->supported, + val & MDIO_PMA_PMD_BT1_B10L_ABLE); + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_T1_STAT); + if (val < 0) + return val; + + linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + phydev->supported, + val & MDIO_AN_STAT1_ABLE); + } } return 0; } EXPORT_SYMBOL_GPL(genphy_c45_pma_read_abilities); +/* Read master/slave preference from registers. + * The preference is read from the BIT(4) of BASE-T1 AN + * advertisement register 7.515 and whether the preference + * is forced or not, it is read from BASE-T1 AN advertisement + * register 7.514. + */ +static int genphy_c45_baset1_read_status(struct phy_device *phydev) +{ + int ret; + int cfg; + + phydev->master_slave_get = MASTER_SLAVE_CFG_UNKNOWN; + phydev->master_slave_state = MASTER_SLAVE_STATE_UNKNOWN; + + ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_T1_ADV_L); + if (ret < 0) + return ret; + + cfg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_T1_ADV_M); + if (cfg < 0) + return cfg; + + if (ret & MDIO_AN_T1_ADV_L_FORCE_MS) { + if (cfg & MDIO_AN_T1_ADV_M_MST) + phydev->master_slave_get = MASTER_SLAVE_CFG_MASTER_FORCE; + else + phydev->master_slave_get = MASTER_SLAVE_CFG_SLAVE_FORCE; + } else { + if (cfg & MDIO_AN_T1_ADV_M_MST) + phydev->master_slave_get = MASTER_SLAVE_CFG_MASTER_PREFERRED; + else + phydev->master_slave_get = MASTER_SLAVE_CFG_SLAVE_PREFERRED; + } + + return 0; +} + /** * genphy_c45_read_status - read PHY status * @phydev: target phy_device struct @@ -560,6 +801,12 @@ int genphy_c45_read_status(struct phy_device *phydev) if (ret) return ret; + if (genphy_c45_baset1_able(phydev)) { + ret = genphy_c45_baset1_read_status(phydev); + if (ret < 0) + return ret; + } + phy_resolve_aneg_linkmode(phydev); } else { ret = genphy_c45_read_pma(phydev); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1369daeded14..431a8719c635 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -600,6 +600,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, dev->autoneg = AUTONEG_ENABLE; + dev->pma_extable = -ENODATA; dev->is_c45 = is_c45; dev->phy_id = phy_id; if (c45_ids) diff --git a/include/linux/mdio.h b/include/linux/mdio.h index ecac96d52e01..00177567cfef 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -340,6 +340,76 @@ static inline void mii_10gbt_stat_mod_linkmode_lpa_t(unsigned long *advertising, advertising, lpa & MDIO_AN_10GBT_STAT_LP10G); } +/** + * mii_t1_adv_l_mod_linkmode_t + * @advertising: target the linkmode advertisement settings + * @lpa: value of the BASE-T1 Autonegotiation Advertisement [15:0] Register + * + * A small helper function that translates BASE-T1 Autonegotiation + * Advertisement [15:0] Register bits to linkmode advertisement settings. + * Other bits in advertising aren't changed. + */ +static inline void mii_t1_adv_l_mod_linkmode_t(unsigned long *advertising, u32 lpa) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising, + lpa & MDIO_AN_T1_ADV_L_PAUSE_CAP); + linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising, + lpa & MDIO_AN_T1_ADV_L_PAUSE_ASYM); +} + +/** + * mii_t1_adv_m_mod_linkmode_t + * @advertising: target the linkmode advertisement settings + * @lpa: value of the BASE-T1 Autonegotiation Advertisement [31:16] Register + * + * A small helper function that translates BASE-T1 Autonegotiation + * Advertisement [31:16] Register bits to linkmode advertisement settings. + * Other bits in advertising aren't changed. + */ +static inline void mii_t1_adv_m_mod_linkmode_t(unsigned long *advertising, u32 lpa) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, + advertising, lpa & MDIO_AN_T1_ADV_M_B10L); +} + +/** + * linkmode_adv_to_mii_t1_adv_l_t + * @advertising: the linkmode advertisement settings + * + * A small helper function that translates linkmode advertisement + * settings to phy autonegotiation advertisements for the + * BASE-T1 Autonegotiation Advertisement [15:0] Register. + */ +static inline u32 linkmode_adv_to_mii_t1_adv_l_t(unsigned long *advertising) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising)) + result |= MDIO_AN_T1_ADV_L_PAUSE_CAP; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising)) + result |= MDIO_AN_T1_ADV_L_PAUSE_ASYM; + + return result; +} + +/** + * linkmode_adv_to_mii_t1_adv_m_t + * @advertising: the linkmode advertisement settings + * + * A small helper function that translates linkmode advertisement + * settings to phy autonegotiation advertisements for the + * BASE-T1 Autonegotiation Advertisement [31:16] Register. + */ +static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, advertising)) + result |= MDIO_AN_T1_ADV_M_B10L; + + return result; +} + int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum, diff --git a/include/linux/phy.h b/include/linux/phy.h index b12af9e2f389..2d12054932ba 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -570,6 +570,7 @@ struct macsec_ops; * @autoneg_complete: Flag auto negotiation of the link has completed * @mdix: Current crossover * @mdix_ctrl: User setting of crossover + * @pma_extable: Cached value of PMA/PMD Extended Abilities Register * @interrupts: Flag interrupts have been enabled * @interface: enum phy_interface_t value * @skb: Netlink message for cable diagnostics @@ -698,6 +699,8 @@ struct phy_device { u8 mdix; u8 mdix_ctrl; + int pma_extable; + void (*phy_link_change)(struct phy_device *phydev, bool up); void (*adjust_link)(struct net_device *dev); diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index fa3515257f54..75b7257a51e1 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -70,6 +70,7 @@ #define MDIO_B10L_PMA_CTRL 2294 /* 10BASE-T1L PMA control */ #define MDIO_PMA_10T1L_STAT 2295 /* 10BASE-T1L PMA status */ #define MDIO_PCS_10T1L_CTRL 2278 /* 10BASE-T1L PCS control */ +#define MDIO_PMA_PMD_BT1 18 /* BASE-T1 PMA/PMD extended ability */ #define MDIO_AN_T1_CTRL 512 /* BASE-T1 AN control */ #define MDIO_AN_T1_STAT 513 /* BASE-T1 AN status */ #define MDIO_AN_T1_ADV_L 514 /* BASE-T1 AN advertisement register [15:0] */ @@ -78,6 +79,7 @@ #define MDIO_AN_T1_LP_L 517 /* BASE-T1 AN LP Base Page ability register [15:0] */ #define MDIO_AN_T1_LP_M 518 /* BASE-T1 AN LP Base Page ability register [31:16] */ #define MDIO_AN_T1_LP_H 519 /* BASE-T1 AN LP Base Page ability register [47:32] */ +#define MDIO_PMA_PMD_BT1_CTRL 2100 /* BASE-T1 PMA/PMD control register */ /* LASI (Link Alarm Status Interrupt) registers, defined by XENPAK MSA. */ #define MDIO_PMA_LASI_RXCTRL 0x9000 /* RX_ALARM control */ @@ -170,6 +172,7 @@ #define MDIO_PMA_CTRL2_10BT 0x000f /* 10BASE-T type */ #define MDIO_PMA_CTRL2_2_5GBT 0x0030 /* 2.5GBaseT type */ #define MDIO_PMA_CTRL2_5GBT 0x0031 /* 5GBaseT type */ +#define MDIO_PMA_CTRL2_BASET1 0x003D /* BASE-T1 type */ #define MDIO_PCS_CTRL2_TYPE 0x0003 /* PCS type selection */ #define MDIO_PCS_CTRL2_10GBR 0x0000 /* 10GBASE-R type */ #define MDIO_PCS_CTRL2_10GBX 0x0001 /* 10GBASE-X type */ @@ -223,6 +226,7 @@ #define MDIO_PMA_EXTABLE_1000BKX 0x0040 /* 1000BASE-KX ability */ #define MDIO_PMA_EXTABLE_100BTX 0x0080 /* 100BASE-TX ability */ #define MDIO_PMA_EXTABLE_10BT 0x0100 /* 10BASE-T ability */ +#define MDIO_PMA_EXTABLE_BT1 0x0800 /* BASE-T1 ability */ #define MDIO_PMA_EXTABLE_NBT 0x4000 /* 2.5/5GBASE-T ability */ /* PHY XGXS lane state register. */ @@ -301,6 +305,9 @@ #define MDIO_PCS_10T1L_CTRL_LB 0x4000 /* Enable PCS level loopback mode */ #define MDIO_PCS_10T1L_CTRL_RESET 0x8000 /* PCS reset */ +/* BASE-T1 PMA/PMD extended ability register. */ +#define MDIO_PMA_PMD_BT1_B10L_ABLE 0x0004 /* 10BASE-T1L Ability */ + /* BASE-T1 auto-negotiation advertisement register [15:0] */ #define MDIO_AN_T1_ADV_L_PAUSE_CAP ADVERTISE_PAUSE_CAP #define MDIO_AN_T1_ADV_L_PAUSE_ASYM ADVERTISE_PAUSE_ASYM @@ -333,6 +340,9 @@ #define MDIO_AN_T1_LP_H_10L_TX_HI_REQ 0x1000 /* 10BASE-T1L High Level LP Transmit Request */ #define MDIO_AN_T1_LP_H_10L_TX_HI 0x2000 /* 10BASE-T1L High Level LP Transmit Ability */ +/* BASE-T1 PMA/PMD control register */ +#define MDIO_PMA_PMD_BT1_CTRL_CFG_MST 0x4000 /* MASTER-SLAVE config value */ + /* EEE Supported/Advertisement/LP Advertisement registers. * * EEE capability Register (3.20), Advertisement (7.60) and -- cgit v1.2.3 From 41b3c69bf9414375319290c59f198ff5c71d273f Mon Sep 17 00:00:00 2001 From: Kishen Maloor Date: Mon, 2 May 2022 13:52:36 -0700 Subject: mptcp: expose server_side attribute in MPTCP netlink events This change records the 'server_side' attribute of MPTCP_EVENT_CREATED and MPTCP_EVENT_ESTABLISHED events to inform their recipient about the Client/Server role of the running MPTCP application. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/246 Acked-by: Paolo Abeni Signed-off-by: Kishen Maloor Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + net/mptcp/pm_netlink.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 9690efedb5fa..e41ea01a94bb 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -188,6 +188,7 @@ enum mptcp_event_attr { MPTCP_ATTR_IF_IDX, /* s32 */ MPTCP_ATTR_RESET_REASON,/* u32 */ MPTCP_ATTR_RESET_FLAGS, /* u32 */ + MPTCP_ATTR_SERVER_SIDE, /* u8 */ __MPTCP_ATTR_AFTER_LAST }; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index eeaa96bcae6c..a4430c576ce9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1985,6 +1985,9 @@ static int mptcp_event_created(struct sk_buff *skb, if (err) return err; + if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side))) + return -EMSGSIZE; + return mptcp_event_add_subflow(skb, ssk); } -- cgit v1.2.3 From 9ab4807c84a4aacfc9b4f79cc81254035e0ec361 Mon Sep 17 00:00:00 2001 From: Kishen Maloor Date: Tue, 3 May 2022 19:38:52 -0700 Subject: mptcp: netlink: Add MPTCP_PM_CMD_ANNOUNCE This change adds a MPTCP netlink interface for issuing ADD_ADDR advertisements over the chosen MPTCP connection from a userspace path manager. The command requires the following parameters: { token, { loc_id, family, daddr4 | daddr6 [, dport] } [, if_idx], flags[signal] }. Acked-by: Paolo Abeni Signed-off-by: Kishen Maloor Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 2 ++ net/mptcp/pm_netlink.c | 16 ++++++++---- net/mptcp/pm_userspace.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++ net/mptcp/protocol.h | 7 ++++++ 4 files changed, 81 insertions(+), 5 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index e41ea01a94bb..ac66c1263f02 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -55,6 +55,7 @@ enum { MPTCP_PM_ATTR_ADDR, /* nested address */ MPTCP_PM_ATTR_RCV_ADD_ADDRS, /* u32 */ MPTCP_PM_ATTR_SUBFLOWS, /* u32 */ + MPTCP_PM_ATTR_TOKEN, /* u32 */ __MPTCP_PM_ATTR_MAX }; @@ -93,6 +94,7 @@ enum { MPTCP_PM_CMD_SET_LIMITS, MPTCP_PM_CMD_GET_LIMITS, MPTCP_PM_CMD_SET_FLAGS, + MPTCP_PM_CMD_ANNOUNCE, __MPTCP_PM_CMD_AFTER_LAST }; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 7d9bed536966..dbe5ccd95ac5 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -352,8 +352,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, return entry; } -static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, - const struct mptcp_pm_addr_entry *entry) +bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, + const struct mptcp_pm_addr_entry *entry) { struct mptcp_pm_add_entry *add_entry = NULL; struct sock *sk = (struct sock *)msk; @@ -1094,6 +1094,7 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = { NLA_POLICY_NESTED(mptcp_pm_addr_policy), [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, }; void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) @@ -1203,9 +1204,9 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[], return err; } -static int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, - bool require_family, - struct mptcp_pm_addr_entry *entry) +int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, + bool require_family, + struct mptcp_pm_addr_entry *entry) { struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; int err; @@ -2198,6 +2199,11 @@ static const struct genl_small_ops mptcp_pm_ops[] = { .doit = mptcp_nl_cmd_set_flags, .flags = GENL_ADMIN_PERM, }, + { + .cmd = MPTCP_PM_CMD_ANNOUNCE, + .doit = mptcp_nl_cmd_announce, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_family mptcp_genl_family __ro_after_init = { diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 910116b0f5b9..347184a9157b 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -119,3 +119,64 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry); } + +int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_pm_addr_entry addr_val; + struct mptcp_sock *msk; + int err = -EINVAL; + u32 token_val; + + if (!addr || !token) { + GENL_SET_ERR_MSG(info, "missing required inputs"); + return err; + } + + token_val = nla_get_u32(token); + + msk = mptcp_token_get_sock(sock_net(skb->sk), token_val); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return err; + } + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto announce_err; + } + + err = mptcp_pm_parse_entry(addr, info, true, &addr_val); + if (err < 0) { + GENL_SET_ERR_MSG(info, "error parsing local address"); + goto announce_err; + } + + if (addr_val.addr.id == 0 || !(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + GENL_SET_ERR_MSG(info, "invalid addr id or flags"); + goto announce_err; + } + + err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val); + if (err < 0) { + GENL_SET_ERR_MSG(info, "did not match address and id"); + goto announce_err; + } + + lock_sock((struct sock *)msk); + spin_lock_bh(&msk->pm.lock); + + if (mptcp_pm_alloc_anno_list(msk, &addr_val)) { + mptcp_pm_announce_addr(msk, &addr_val.addr, false); + mptcp_pm_nl_addr_send_ack(msk); + } + + spin_unlock_bh(&msk->pm.lock); + release_sock((struct sock *)msk); + + err = 0; + announce_err: + sock_put((struct sock *)msk); + return err; +} diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7257dc7aed43..de645efbc806 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -11,6 +11,7 @@ #include #include #include +#include #define MPTCP_SUPPORTED_VERSION 1 @@ -755,6 +756,9 @@ u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); void mptcp_pm_data_reset(struct mptcp_sock *msk); +int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, + bool require_family, + struct mptcp_pm_addr_entry *entry); void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); @@ -775,6 +779,8 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); +bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, + const struct mptcp_pm_addr_entry *entry); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * @@ -798,6 +804,7 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list * int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry); void mptcp_free_local_addr_list(struct mptcp_sock *msk); +int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); -- cgit v1.2.3 From d9a4594edabf125dc17dfd52acc722c3de1cb44c Mon Sep 17 00:00:00 2001 From: Kishen Maloor Date: Tue, 3 May 2022 19:38:54 -0700 Subject: mptcp: netlink: Add MPTCP_PM_CMD_REMOVE This change adds a MPTCP netlink command for issuing a REMOVE_ADDR signal for an address over the chosen MPTCP connection from a userspace path manager. The command requires the following parameters: {token, loc_id}. Acked-by: Paolo Abeni Signed-off-by: Kishen Maloor Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 2 ++ net/mptcp/pm_netlink.c | 10 ++++++-- net/mptcp/pm_userspace.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++ net/mptcp/protocol.h | 4 +++ 4 files changed, 76 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index ac66c1263f02..11f9fa001a3c 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -56,6 +56,7 @@ enum { MPTCP_PM_ATTR_RCV_ADD_ADDRS, /* u32 */ MPTCP_PM_ATTR_SUBFLOWS, /* u32 */ MPTCP_PM_ATTR_TOKEN, /* u32 */ + MPTCP_PM_ATTR_LOC_ID, /* u8 */ __MPTCP_PM_ATTR_MAX }; @@ -95,6 +96,7 @@ enum { MPTCP_PM_CMD_GET_LIMITS, MPTCP_PM_CMD_SET_FLAGS, MPTCP_PM_CMD_ANNOUNCE, + MPTCP_PM_CMD_REMOVE, __MPTCP_PM_CMD_AFTER_LAST }; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index dbe5ccd95ac5..a26750f19f65 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1095,6 +1095,7 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = { [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, }, }; void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) @@ -1504,8 +1505,8 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) return ret; } -static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, - struct list_head *rm_list) +void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; @@ -2204,6 +2205,11 @@ static const struct genl_small_ops mptcp_pm_ops[] = { .doit = mptcp_nl_cmd_announce, .flags = GENL_ADMIN_PERM, }, + { + .cmd = MPTCP_PM_CMD_REMOVE, + .doit = mptcp_nl_cmd_remove, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_family mptcp_genl_family __ro_after_init = { diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 347184a9157b..3a42c9e66126 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -180,3 +180,65 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) sock_put((struct sock *)msk); return err; } + +int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; + struct mptcp_pm_addr_entry *match = NULL; + struct mptcp_pm_addr_entry *entry; + struct mptcp_sock *msk; + LIST_HEAD(free_list); + int err = -EINVAL; + u32 token_val; + u8 id_val; + + if (!id || !token) { + GENL_SET_ERR_MSG(info, "missing required inputs"); + return err; + } + + id_val = nla_get_u8(id); + token_val = nla_get_u32(token); + + msk = mptcp_token_get_sock(sock_net(skb->sk), token_val); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return err; + } + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto remove_err; + } + + lock_sock((struct sock *)msk); + + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (entry->addr.id == id_val) { + match = entry; + break; + } + } + + if (!match) { + GENL_SET_ERR_MSG(info, "address with specified id not found"); + release_sock((struct sock *)msk); + goto remove_err; + } + + list_move(&match->list, &free_list); + + mptcp_pm_remove_addrs_and_subflows(msk, &free_list); + + release_sock((struct sock *)msk); + + list_for_each_entry_safe(match, entry, &free_list, list) { + sock_kfree_s((struct sock *)msk, match, sizeof(*match)); + } + + err = 0; + remove_err: + sock_put((struct sock *)msk); + return err; +} diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index de645efbc806..4026aa3df7f4 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -801,10 +801,14 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); +void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, + struct list_head *rm_list); + int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry); void mptcp_free_local_addr_list(struct mptcp_sock *msk); int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info); +int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); -- cgit v1.2.3 From 702c2f646d42cfd9e31133d68a8283fea48fd810 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 3 May 2022 19:38:56 -0700 Subject: mptcp: netlink: allow userspace-driven subflow establishment This allows userspace to tell kernel to add a new subflow to an existing mptcp connection. Userspace provides the token to identify the mptcp-level connection that needs a change in active subflows and the local and remote addresses of the new or the to-be-removed subflow. MPTCP_PM_CMD_SUBFLOW_CREATE requires the following parameters: { token, { loc_id, family, loc_addr4 | loc_addr6 }, { family, rem_addr4 | rem_addr6, rem_port } MPTCP_PM_CMD_SUBFLOW_DESTROY requires the following parameters: { token, { family, loc_addr4 | loc_addr6, loc_port }, { family, rem_addr4 | rem_addr6, rem_port } Acked-by: Paolo Abeni Co-developed-by: Kishen Maloor Signed-off-by: Kishen Maloor Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 3 + net/mptcp/pm_netlink.c | 22 ++++++ net/mptcp/pm_userspace.c | 185 +++++++++++++++++++++++++++++++++++++++++++++ net/mptcp/protocol.h | 4 + 4 files changed, 214 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 11f9fa001a3c..921963589904 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -57,6 +57,7 @@ enum { MPTCP_PM_ATTR_SUBFLOWS, /* u32 */ MPTCP_PM_ATTR_TOKEN, /* u32 */ MPTCP_PM_ATTR_LOC_ID, /* u8 */ + MPTCP_PM_ATTR_ADDR_REMOTE, /* nested address */ __MPTCP_PM_ATTR_MAX }; @@ -97,6 +98,8 @@ enum { MPTCP_PM_CMD_SET_FLAGS, MPTCP_PM_CMD_ANNOUNCE, MPTCP_PM_CMD_REMOVE, + MPTCP_PM_CMD_SUBFLOW_CREATE, + MPTCP_PM_CMD_SUBFLOW_DESTROY, __MPTCP_PM_CMD_AFTER_LAST }; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a26750f19f65..e099f2a12504 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1096,6 +1096,8 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = { [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, }, + [MPTCP_PM_ATTR_ADDR_REMOTE] = + NLA_POLICY_NESTED(mptcp_pm_addr_policy), }; void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) @@ -1205,6 +1207,16 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[], return err; } +int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, + struct mptcp_addr_info *addr) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + + memset(addr, 0, sizeof(*addr)); + + return mptcp_pm_parse_pm_addr_attr(tb, attr, info, addr, true); +} + int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, bool require_family, struct mptcp_pm_addr_entry *entry) @@ -2210,6 +2222,16 @@ static const struct genl_small_ops mptcp_pm_ops[] = { .doit = mptcp_nl_cmd_remove, .flags = GENL_ADMIN_PERM, }, + { + .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE, + .doit = mptcp_nl_cmd_sf_create, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY, + .doit = mptcp_nl_cmd_sf_destroy, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_family mptcp_genl_family __ro_after_init = { diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 3a42c9e66126..f56378e4f597 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -242,3 +242,188 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) sock_put((struct sock *)msk); return err; } + +int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_addr_info addr_r; + struct mptcp_addr_info addr_l; + struct mptcp_sock *msk; + int err = -EINVAL; + struct sock *sk; + u32 token_val; + + if (!laddr || !raddr || !token) { + GENL_SET_ERR_MSG(info, "missing required inputs"); + return err; + } + + token_val = nla_get_u32(token); + + msk = mptcp_token_get_sock(genl_info_net(info), token_val); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return err; + } + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto create_err; + } + + err = mptcp_pm_parse_addr(laddr, info, &addr_l); + if (err < 0) { + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); + goto create_err; + } + + if (addr_l.id == 0) { + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local addr id"); + goto create_err; + } + + err = mptcp_pm_parse_addr(raddr, info, &addr_r); + if (err < 0) { + NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); + goto create_err; + } + + sk = &msk->sk.icsk_inet.sk; + lock_sock(sk); + + err = __mptcp_subflow_connect(sk, &addr_l, &addr_r); + + release_sock(sk); + + create_err: + sock_put((struct sock *)msk); + return err; +} + +static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk, + const struct mptcp_addr_info *local, + const struct mptcp_addr_info *remote) +{ + struct sock *sk = &msk->sk.icsk_inet.sk; + struct mptcp_subflow_context *subflow; + struct sock *found = NULL; + + if (local->family != remote->family) + return NULL; + + lock_sock(sk); + + mptcp_for_each_subflow(msk, subflow) { + const struct inet_sock *issk; + struct sock *ssk; + + ssk = mptcp_subflow_tcp_sock(subflow); + + if (local->family != ssk->sk_family) + continue; + + issk = inet_sk(ssk); + + switch (ssk->sk_family) { + case AF_INET: + if (issk->inet_saddr != local->addr.s_addr || + issk->inet_daddr != remote->addr.s_addr) + continue; + break; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + case AF_INET6: { + const struct ipv6_pinfo *pinfo = inet6_sk(ssk); + + if (!ipv6_addr_equal(&local->addr6, &pinfo->saddr) || + !ipv6_addr_equal(&remote->addr6, &ssk->sk_v6_daddr)) + continue; + break; + } +#endif + default: + continue; + } + + if (issk->inet_sport == local->port && + issk->inet_dport == remote->port) { + found = ssk; + goto found; + } + } + +found: + release_sock(sk); + + return found; +} + +int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; + struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_addr_info addr_l; + struct mptcp_addr_info addr_r; + struct mptcp_sock *msk; + struct sock *sk, *ssk; + int err = -EINVAL; + u32 token_val; + + if (!laddr || !raddr || !token) { + GENL_SET_ERR_MSG(info, "missing required inputs"); + return err; + } + + token_val = nla_get_u32(token); + + msk = mptcp_token_get_sock(genl_info_net(info), token_val); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return err; + } + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + goto destroy_err; + } + + err = mptcp_pm_parse_addr(laddr, info, &addr_l); + if (err < 0) { + NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); + goto destroy_err; + } + + err = mptcp_pm_parse_addr(raddr, info, &addr_r); + if (err < 0) { + NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr"); + goto destroy_err; + } + + if (addr_l.family != addr_r.family) { + GENL_SET_ERR_MSG(info, "address families do not match"); + goto destroy_err; + } + + if (!addr_l.port || !addr_r.port) { + GENL_SET_ERR_MSG(info, "missing local or remote port"); + goto destroy_err; + } + + sk = &msk->sk.icsk_inet.sk; + ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r); + if (ssk) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + + mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN); + mptcp_close_ssk(sk, ssk, subflow); + err = 0; + } else { + err = -ESRCH; + } + + destroy_err: + sock_put((struct sock *)msk); + return err; +} diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 4026aa3df7f4..f542aeaa5b09 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -756,6 +756,8 @@ u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); void mptcp_pm_data_reset(struct mptcp_sock *msk); +int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, + struct mptcp_addr_info *addr); int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, bool require_family, struct mptcp_pm_addr_entry *entry); @@ -809,6 +811,8 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, void mptcp_free_local_addr_list(struct mptcp_sock *msk); int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info); int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info); +int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info); +int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); -- cgit v1.2.3 From 36f8423597000bd7d5e48b7b306e1d0958e72359 Mon Sep 17 00:00:00 2001 From: Muna Sinada Date: Wed, 23 Mar 2022 15:46:35 -0700 Subject: cfg80211: support disabling EHT mode Allow userspace to disable EHT mode during association. Signed-off-by: Muna Sinada Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/20220323224636.20211-1-quic_alokad@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 2 ++ net/wireless/nl80211.c | 7 +++++++ 3 files changed, 11 insertions(+) (limited to 'include/uapi') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cd1212113901..6a3e3f0a8615 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2735,6 +2735,7 @@ struct cfg80211_auth_request { * userspace if this flag is set. Only applicable for cfg80211_connect() * request (connect callback). * @ASSOC_REQ_DISABLE_HE: Disable HE + * @ASSOC_REQ_DISABLE_EHT: Disable EHT */ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HT = BIT(0), @@ -2742,6 +2743,7 @@ enum cfg80211_assoc_req_flags { ASSOC_REQ_USE_RRM = BIT(2), CONNECT_REQ_EXTERNAL_AUTH_SUPPORT = BIT(3), ASSOC_REQ_DISABLE_HE = BIT(4), + ASSOC_REQ_DISABLE_EHT = BIT(5), }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 0568a79097b8..d9490e3062a7 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3175,6 +3175,8 @@ enum nl80211_attrs { NL80211_ATTR_EHT_CAPABILITY, + NL80211_ATTR_DISABLE_EHT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 174f254ee947..2c64baae9863 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -791,6 +791,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { NLA_POLICY_RANGE(NLA_BINARY, NL80211_EHT_MIN_CAPABILITY_LEN, NL80211_EHT_MAX_CAPABILITY_LEN), + [NL80211_ATTR_DISABLE_EHT] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -10378,6 +10379,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE])) req.flags |= ASSOC_REQ_DISABLE_HE; + if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_EHT])) + req.flags |= ASSOC_REQ_DISABLE_EHT; + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) memcpy(&req.vht_capa_mask, nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), @@ -11166,6 +11170,9 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE])) connect.flags |= ASSOC_REQ_DISABLE_HE; + if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_EHT])) + connect.flags |= ASSOC_REQ_DISABLE_EHT; + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) memcpy(&connect.vht_capa_mask, nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), -- cgit v1.2.3 From c4a67a21a6d255ddcbaa076c0412aad73c7e0c02 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 4 May 2022 08:40:37 -0700 Subject: Revert "Merge branch 'mlxsw-line-card-model'" This reverts commit 5e927a9f4b9f29d78a7c7d66ea717bb5c8bbad8e, reversing changes made to cfc1d91a7d78cf9de25b043d81efcc16966d55b3. The discussion is still ongoing so let's remove the uAPI until the discussion settles. Link: https://lore.kernel.org/all/20220425090021.32e9a98f@kernel.org/ Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20220504154037.539442-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../networking/devlink/devlink-linecard.rst | 4 - Documentation/networking/devlink/mlxsw.rst | 33 --- drivers/net/ethernet/mellanox/mlxsw/core.h | 1 - .../net/ethernet/mellanox/mlxsw/core_linecards.c | 237 +--------------- drivers/net/ethernet/mellanox/mlxsw/reg.h | 87 +----- include/net/devlink.h | 18 +- include/uapi/linux/devlink.h | 5 - net/core/devlink.c | 303 +-------------------- .../drivers/net/mlxsw/devlink_linecard.sh | 61 ----- 9 files changed, 10 insertions(+), 739 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/devlink/devlink-linecard.rst b/Documentation/networking/devlink/devlink-linecard.rst index a98b468ad479..6c0b8928bc13 100644 --- a/Documentation/networking/devlink/devlink-linecard.rst +++ b/Documentation/networking/devlink/devlink-linecard.rst @@ -14,7 +14,6 @@ system. Following operations are provided: * Get a list of supported line card types. * Provision of a slot with specific line card type. * Get and monitor of line card state and its change. - * Get information about line card versions and devices. Line card according to the type may contain one or more gearboxes to mux the lanes with certain speed to multiple ports with lanes @@ -121,6 +120,3 @@ Example usage # Set slot 8 to be unprovisioned: $ devlink lc set pci/0000:01:00.0 lc 8 notype - - # Set info for slot 8: - $ devlink lc info pci/0000:01:00.0 lc 8 diff --git a/Documentation/networking/devlink/mlxsw.rst b/Documentation/networking/devlink/mlxsw.rst index 0af345680510..cf857cb4ba8f 100644 --- a/Documentation/networking/devlink/mlxsw.rst +++ b/Documentation/networking/devlink/mlxsw.rst @@ -58,39 +58,6 @@ The ``mlxsw`` driver reports the following versions - running - Three digit firmware version -Line card info versions -======================= - -The ``mlxsw`` driver reports the following versions for line cards - -.. list-table:: devlink line card info versions implemented - :widths: 5 5 90 - - * - Name - - Type - - Description - * - ``hw.revision`` - - fixed - - The hardware revision for this line card - * - ``ini.version`` - - running - - Version of line card INI loaded - -Line card device info versions -============================== - -The ``mlxsw`` driver reports the following versions for line card devices - -.. list-table:: devlink line card device info versions implemented - :widths: 5 5 90 - - * - Name - - Type - - Description - * - ``fw.version`` - - running - - Three digit firmware version - Driver-specific Traps ===================== diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index d008282d7f2e..c2a891287047 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -581,7 +581,6 @@ struct mlxsw_linecard { active:1; u16 hw_revision; u16 ini_version; - struct list_head device_list; }; struct mlxsw_linecard_types_info; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c index 2abd31a62776..5c9869dcf674 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c @@ -87,191 +87,11 @@ static const char *mlxsw_linecard_type_name(struct mlxsw_linecard *linecard) return linecard->name; } -struct mlxsw_linecard_device_info { - u16 fw_major; - u16 fw_minor; - u16 fw_sub_minor; -}; - -struct mlxsw_linecard_device { - struct list_head list; - u8 index; - struct mlxsw_linecard *linecard; - struct devlink_linecard_device *devlink_device; - struct mlxsw_linecard_device_info info; -}; - -static struct mlxsw_linecard_device * -mlxsw_linecard_device_lookup(struct mlxsw_linecard *linecard, u8 index) -{ - struct mlxsw_linecard_device *device; - - list_for_each_entry(device, &linecard->device_list, list) - if (device->index == index) - return device; - return NULL; -} - -static int mlxsw_linecard_device_attach(struct mlxsw_core *mlxsw_core, - struct mlxsw_linecard *linecard, - u8 device_index, bool flash_owner) -{ - struct mlxsw_linecard_device *device; - int err; - - device = kzalloc(sizeof(*device), GFP_KERNEL); - if (!device) - return -ENOMEM; - device->index = device_index; - device->linecard = linecard; - - device->devlink_device = devlink_linecard_device_create(linecard->devlink_linecard, - device_index, device); - if (IS_ERR(device->devlink_device)) { - err = PTR_ERR(device->devlink_device); - goto err_devlink_linecard_device_attach; - } - - list_add_tail(&device->list, &linecard->device_list); - return 0; - -err_devlink_linecard_device_attach: - kfree(device); - return err; -} - -static void mlxsw_linecard_device_detach(struct mlxsw_core *mlxsw_core, - struct mlxsw_linecard *linecard, - struct mlxsw_linecard_device *device) -{ - list_del(&device->list); - devlink_linecard_device_destroy(linecard->devlink_linecard, - device->devlink_device); - kfree(device); -} - -static void mlxsw_linecard_devices_detach(struct mlxsw_linecard *linecard) -{ - struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core; - struct mlxsw_linecard_device *device, *tmp; - - list_for_each_entry_safe(device, tmp, &linecard->device_list, list) - mlxsw_linecard_device_detach(mlxsw_core, linecard, device); -} - -static int mlxsw_linecard_devices_attach(struct mlxsw_linecard *linecard) -{ - struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core; - u8 msg_seq = 0; - int err; - - do { - char mddq_pl[MLXSW_REG_MDDQ_LEN]; - bool flash_owner; - bool data_valid; - u8 device_index; - - mlxsw_reg_mddq_device_info_pack(mddq_pl, linecard->slot_index, - msg_seq); - err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddq), mddq_pl); - if (err) - return err; - mlxsw_reg_mddq_device_info_unpack(mddq_pl, &msg_seq, - &data_valid, &flash_owner, - &device_index, NULL, - NULL, NULL); - if (!data_valid) - break; - err = mlxsw_linecard_device_attach(mlxsw_core, linecard, - device_index, flash_owner); - if (err) - goto rollback; - } while (msg_seq); - - return 0; - -rollback: - mlxsw_linecard_devices_detach(linecard); - return err; -} - -static void mlxsw_linecard_device_update(struct mlxsw_linecard *linecard, - u8 device_index, - struct mlxsw_linecard_device_info *info) -{ - struct mlxsw_linecard_device *device; - - device = mlxsw_linecard_device_lookup(linecard, device_index); - if (!device) - return; - device->info = *info; -} - -static int mlxsw_linecard_devices_update(struct mlxsw_linecard *linecard) -{ - struct mlxsw_core *mlxsw_core = linecard->linecards->mlxsw_core; - u8 msg_seq = 0; - - do { - struct mlxsw_linecard_device_info info; - char mddq_pl[MLXSW_REG_MDDQ_LEN]; - bool data_valid; - u8 device_index; - int err; - - mlxsw_reg_mddq_device_info_pack(mddq_pl, linecard->slot_index, - msg_seq); - err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mddq), mddq_pl); - if (err) - return err; - mlxsw_reg_mddq_device_info_unpack(mddq_pl, &msg_seq, - &data_valid, NULL, - &device_index, - &info.fw_major, - &info.fw_minor, - &info.fw_sub_minor); - if (!data_valid) - break; - mlxsw_linecard_device_update(linecard, device_index, &info); - } while (msg_seq); - - return 0; -} - -static int -mlxsw_linecard_device_info_get(struct devlink_linecard_device *devlink_linecard_device, - void *priv, struct devlink_info_req *req, - struct netlink_ext_ack *extack) -{ - struct mlxsw_linecard_device *device = priv; - struct mlxsw_linecard_device_info *info; - struct mlxsw_linecard *linecard; - char buf[32]; - - linecard = device->linecard; - mutex_lock(&linecard->lock); - if (!linecard->active) { - mutex_unlock(&linecard->lock); - return 0; - } - - info = &device->info; - - sprintf(buf, "%u.%u.%u", info->fw_major, info->fw_minor, - info->fw_sub_minor); - mutex_unlock(&linecard->lock); - - return devlink_info_version_running_put(req, - DEVLINK_INFO_VERSION_GENERIC_FW, - buf); -} - static void mlxsw_linecard_provision_fail(struct mlxsw_linecard *linecard) { linecard->provisioned = false; linecard->ready = false; linecard->active = false; - mlxsw_linecard_devices_detach(linecard); devlink_linecard_provision_fail(linecard->devlink_linecard); } @@ -412,7 +232,6 @@ mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type, { struct mlxsw_linecards *linecards = linecard->linecards; const char *type; - int err; type = mlxsw_linecard_types_lookup(linecards, card_type); mlxsw_linecard_status_event_done(linecard, @@ -430,11 +249,6 @@ mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type, return PTR_ERR(type); } } - err = mlxsw_linecard_devices_attach(linecard); - if (err) { - mlxsw_linecard_provision_fail(linecard); - return err; - } linecard->provisioned = true; linecard->hw_revision = hw_revision; linecard->ini_version = ini_version; @@ -447,7 +261,6 @@ static void mlxsw_linecard_provision_clear(struct mlxsw_linecard *linecard) mlxsw_linecard_status_event_done(linecard, MLXSW_LINECARD_STATUS_EVENT_TYPE_UNPROVISION); linecard->provisioned = false; - mlxsw_linecard_devices_detach(linecard); devlink_linecard_provision_clear(linecard->devlink_linecard); } @@ -479,18 +292,11 @@ static int mlxsw_linecard_ready_clear(struct mlxsw_linecard *linecard) return 0; } -static int mlxsw_linecard_active_set(struct mlxsw_linecard *linecard) +static void mlxsw_linecard_active_set(struct mlxsw_linecard *linecard) { - int err; - - err = mlxsw_linecard_devices_update(linecard); - if (err) - return err; - mlxsw_linecard_active_ops_call(linecard); linecard->active = true; devlink_linecard_activate(linecard->devlink_linecard); - return 0; } static void mlxsw_linecard_active_clear(struct mlxsw_linecard *linecard) @@ -539,11 +345,8 @@ static int mlxsw_linecard_status_process(struct mlxsw_linecards *linecards, goto out; } - if (active && linecard->active != active) { - err = mlxsw_linecard_active_set(linecard); - if (err) - goto out; - } + if (active && linecard->active != active) + mlxsw_linecard_active_set(linecard); if (!active && linecard->active != active) mlxsw_linecard_active_clear(linecard); @@ -934,44 +737,12 @@ static void mlxsw_linecard_types_get(struct devlink_linecard *devlink_linecard, *type_priv = ini_file; } -static int -mlxsw_linecard_info_get(struct devlink_linecard *devlink_linecard, void *priv, - struct devlink_info_req *req, - struct netlink_ext_ack *extack) -{ - struct mlxsw_linecard *linecard = priv; - char buf[32]; - int err; - - mutex_lock(&linecard->lock); - if (!linecard->provisioned) { - err = 0; - goto unlock; - } - - sprintf(buf, "%d", linecard->hw_revision); - err = devlink_info_version_fixed_put(req, "hw.revision", buf); - if (err) - goto unlock; - - sprintf(buf, "%d", linecard->ini_version); - err = devlink_info_version_running_put(req, "ini.version", buf); - if (err) - goto unlock; - -unlock: - mutex_unlock(&linecard->lock); - return err; -} - static const struct devlink_linecard_ops mlxsw_linecard_ops = { .provision = mlxsw_linecard_provision, .unprovision = mlxsw_linecard_unprovision, .same_provision = mlxsw_linecard_same_provision, .types_count = mlxsw_linecard_types_count, .types_get = mlxsw_linecard_types_get, - .info_get = mlxsw_linecard_info_get, - .device_info_get = mlxsw_linecard_device_info_get, }; struct mlxsw_linecard_status_event { @@ -1069,7 +840,6 @@ static int mlxsw_linecard_init(struct mlxsw_core *mlxsw_core, linecard->slot_index = slot_index; linecard->linecards = linecards; mutex_init(&linecard->lock); - INIT_LIST_HEAD(&linecard->device_list); devlink_linecard = devlink_linecard_create(priv_to_devlink(mlxsw_core), slot_index, &mlxsw_linecard_ops, @@ -1115,7 +885,6 @@ static void mlxsw_linecard_fini(struct mlxsw_core *mlxsw_core, mlxsw_core_flush_owq(); if (linecard->active) mlxsw_linecard_active_clear(linecard); - mlxsw_linecard_devices_detach(linecard); devlink_linecard_destroy(linecard->devlink_linecard); mutex_destroy(&linecard->lock); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 078e3aa04383..93af6c974ece 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -11643,11 +11643,7 @@ MLXSW_ITEM32(reg, mddq, sie, 0x00, 31, 1); enum mlxsw_reg_mddq_query_type { MLXSW_REG_MDDQ_QUERY_TYPE_SLOT_INFO = 1, - MLXSW_REG_MDDQ_QUERY_TYPE_DEVICE_INFO, /* If there are no devices - * on the slot, data_valid - * will be '0'. - */ - MLXSW_REG_MDDQ_QUERY_TYPE_SLOT_NAME, + MLXSW_REG_MDDQ_QUERY_TYPE_SLOT_NAME = 3, }; /* reg_mddq_query_type @@ -11661,28 +11657,6 @@ MLXSW_ITEM32(reg, mddq, query_type, 0x00, 16, 8); */ MLXSW_ITEM32(reg, mddq, slot_index, 0x00, 0, 4); -/* reg_mddq_response_msg_seq - * Response message sequential number. For a specific request, the response - * message sequential number is the following one. In addition, the last - * message should be 0. - * Access: RO - */ -MLXSW_ITEM32(reg, mddq, response_msg_seq, 0x04, 16, 8); - -/* reg_mddq_request_msg_seq - * Request message sequential number. - * The first message number should be 0. - * Access: Index - */ -MLXSW_ITEM32(reg, mddq, request_msg_seq, 0x04, 0, 8); - -/* reg_mddq_data_valid - * If set, the data in the data field is valid and contain the information - * for the queried index. - * Access: RO - */ -MLXSW_ITEM32(reg, mddq, data_valid, 0x08, 31, 1); - /* reg_mddq_slot_info_provisioned * If set, the INI file is applied and the card is provisioned. * Access: RO @@ -11769,65 +11743,6 @@ mlxsw_reg_mddq_slot_info_unpack(const char *payload, u8 *p_slot_index, *p_card_type = mlxsw_reg_mddq_slot_info_card_type_get(payload); } -/* reg_mddq_device_info_flash_owner - * If set, the device is the flash owner. Otherwise, a shared flash - * is used by this device (another device is the flash owner). - * Access: RO - */ -MLXSW_ITEM32(reg, mddq, device_info_flash_owner, 0x10, 30, 1); - -/* reg_mddq_device_info_device_index - * Device index. The first device should number 0. - * Access: RO - */ -MLXSW_ITEM32(reg, mddq, device_info_device_index, 0x10, 0, 8); - -/* reg_mddq_device_info_fw_major - * Major FW version number. - * Access: RO - */ -MLXSW_ITEM32(reg, mddq, device_info_fw_major, 0x14, 16, 16); - -/* reg_mddq_device_info_fw_minor - * Minor FW version number. - * Access: RO - */ -MLXSW_ITEM32(reg, mddq, device_info_fw_minor, 0x18, 16, 16); - -/* reg_mddq_device_info_fw_sub_minor - * Sub-minor FW version number. - * Access: RO - */ -MLXSW_ITEM32(reg, mddq, device_info_fw_sub_minor, 0x18, 0, 16); - -static inline void -mlxsw_reg_mddq_device_info_pack(char *payload, u8 slot_index, - u8 request_msg_seq) -{ - __mlxsw_reg_mddq_pack(payload, slot_index, - MLXSW_REG_MDDQ_QUERY_TYPE_DEVICE_INFO); - mlxsw_reg_mddq_request_msg_seq_set(payload, request_msg_seq); -} - -static inline void -mlxsw_reg_mddq_device_info_unpack(const char *payload, u8 *p_response_msg_seq, - bool *p_data_valid, bool *p_flash_owner, - u8 *p_device_index, u16 *p_fw_major, - u16 *p_fw_minor, u16 *p_fw_sub_minor) -{ - *p_response_msg_seq = mlxsw_reg_mddq_response_msg_seq_get(payload); - *p_data_valid = mlxsw_reg_mddq_data_valid_get(payload); - if (p_flash_owner) - *p_flash_owner = mlxsw_reg_mddq_device_info_flash_owner_get(payload); - *p_device_index = mlxsw_reg_mddq_device_info_device_index_get(payload); - if (p_fw_major) - *p_fw_major = mlxsw_reg_mddq_device_info_fw_major_get(payload); - if (p_fw_minor) - *p_fw_minor = mlxsw_reg_mddq_device_info_fw_minor_get(payload); - if (p_fw_sub_minor) - *p_fw_sub_minor = mlxsw_reg_mddq_device_info_fw_sub_minor_get(payload); -} - #define MLXSW_REG_MDDQ_SLOT_ASCII_NAME_LEN 20 /* reg_mddq_slot_ascii_name diff --git a/include/net/devlink.h b/include/net/devlink.h index 062895973656..2a2a2a0c93f7 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -150,9 +150,6 @@ struct devlink_port_new_attrs { sfnum_valid:1; }; -struct devlink_info_req; -struct devlink_linecard_device; - /** * struct devlink_linecard_ops - Linecard operations * @provision: callback to provision the linecard slot with certain @@ -171,8 +168,6 @@ struct devlink_linecard_device; * provisioned. * @types_count: callback to get number of supported types * @types_get: callback to get next type in list - * @info_get: callback to get linecard info - * @device_info_get: callback to get linecard device info */ struct devlink_linecard_ops { int (*provision)(struct devlink_linecard *linecard, void *priv, @@ -187,12 +182,6 @@ struct devlink_linecard_ops { void (*types_get)(struct devlink_linecard *linecard, void *priv, unsigned int index, const char **type, const void **type_priv); - int (*info_get)(struct devlink_linecard *linecard, void *priv, - struct devlink_info_req *req, - struct netlink_ext_ack *extack); - int (*device_info_get)(struct devlink_linecard_device *device, - void *priv, struct devlink_info_req *req, - struct netlink_ext_ack *extack); }; struct devlink_sb_pool_info { @@ -639,6 +628,7 @@ struct devlink_flash_update_params { #define DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK BIT(1) struct devlink_region; +struct devlink_info_req; /** * struct devlink_region_ops - Region operations @@ -1588,12 +1578,6 @@ struct devlink_linecard * devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index, const struct devlink_linecard_ops *ops, void *priv); void devlink_linecard_destroy(struct devlink_linecard *linecard); -struct devlink_linecard_device * -devlink_linecard_device_create(struct devlink_linecard *linecard, - unsigned int device_index, void *priv); -void -devlink_linecard_device_destroy(struct devlink_linecard *linecard, - struct devlink_linecard_device *linecard_device); void devlink_linecard_provision_set(struct devlink_linecard *linecard, const char *type); void devlink_linecard_provision_clear(struct devlink_linecard *linecard); diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index fb8c3864457f..b3d40a5d72ff 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -136,8 +136,6 @@ enum devlink_command { DEVLINK_CMD_LINECARD_NEW, DEVLINK_CMD_LINECARD_DEL, - DEVLINK_CMD_LINECARD_INFO_GET, /* can dump */ - /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 @@ -577,9 +575,6 @@ enum devlink_attr { DEVLINK_ATTR_LINECARD_STATE, /* u8 */ DEVLINK_ATTR_LINECARD_TYPE, /* string */ DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES, /* nested */ - DEVLINK_ATTR_LINECARD_DEVICE_LIST, /* nested */ - DEVLINK_ATTR_LINECARD_DEVICE, /* nested */ - DEVLINK_ATTR_LINECARD_DEVICE_INDEX, /* u32 */ /* add new attributes above here, update the policy in devlink.c */ diff --git a/net/core/devlink.c b/net/core/devlink.c index 5f441a0e34f4..5cc88490f18f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -83,11 +83,10 @@ struct devlink_linecard { const struct devlink_linecard_ops *ops; void *priv; enum devlink_linecard_state state; - struct mutex state_lock; /* Protects state and device_list */ + struct mutex state_lock; /* Protects state */ const char *type; struct devlink_linecard_type *types; unsigned int types_count; - struct list_head device_list; }; /** @@ -2059,56 +2058,6 @@ struct devlink_linecard_type { const void *priv; }; -struct devlink_linecard_device { - struct list_head list; - unsigned int index; - void *priv; -}; - -static int -devlink_nl_linecard_device_fill(struct sk_buff *msg, - struct devlink_linecard_device *linecard_device) -{ - struct nlattr *attr; - - attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE); - if (!attr) - return -EMSGSIZE; - if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_DEVICE_INDEX, - linecard_device->index)) { - nla_nest_cancel(msg, attr); - return -EMSGSIZE; - } - nla_nest_end(msg, attr); - - return 0; -} - -static int devlink_nl_linecard_devices_fill(struct sk_buff *msg, - struct devlink_linecard *linecard) -{ - struct devlink_linecard_device *linecard_device; - struct nlattr *attr; - int err; - - if (list_empty(&linecard->device_list)) - return 0; - - attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE_LIST); - if (!attr) - return -EMSGSIZE; - list_for_each_entry(linecard_device, &linecard->device_list, list) { - err = devlink_nl_linecard_device_fill(msg, linecard_device); - if (err) { - nla_nest_cancel(msg, attr); - return err; - } - } - nla_nest_end(msg, attr); - - return 0; -} - static int devlink_nl_linecard_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_linecard *linecard, @@ -2119,7 +2068,6 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, struct devlink_linecard_type *linecard_type; struct nlattr *attr; void *hdr; - int err; int i; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); @@ -2152,10 +2100,6 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, nla_nest_end(msg, attr); } - err = devlink_nl_linecard_devices_fill(msg, linecard); - if (err) - goto nla_put_failure; - genlmsg_end(msg, hdr); return 0; @@ -2425,191 +2369,6 @@ static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb, return 0; } -struct devlink_info_req { - struct sk_buff *msg; -}; - -static int -devlink_nl_linecard_device_info_fill(struct sk_buff *msg, - struct devlink_linecard *linecard, - struct devlink_linecard_device *linecard_device, - struct netlink_ext_ack *extack) -{ - struct nlattr *attr; - - attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE); - if (!attr) - return -EMSGSIZE; - if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_DEVICE_INDEX, - linecard_device->index)) { - nla_nest_cancel(msg, attr); - return -EMSGSIZE; - } - if (linecard->ops->device_info_get) { - struct devlink_info_req req; - int err; - - req.msg = msg; - err = linecard->ops->device_info_get(linecard_device, - linecard_device->priv, - &req, extack); - if (err) { - nla_nest_cancel(msg, attr); - return err; - } - } - nla_nest_end(msg, attr); - - return 0; -} - -static int devlink_nl_linecard_devices_info_fill(struct sk_buff *msg, - struct devlink_linecard *linecard, - struct netlink_ext_ack *extack) -{ - struct devlink_linecard_device *linecard_device; - struct nlattr *attr; - int err; - - if (list_empty(&linecard->device_list)) - return 0; - - attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE_LIST); - if (!attr) - return -EMSGSIZE; - list_for_each_entry(linecard_device, &linecard->device_list, list) { - err = devlink_nl_linecard_device_info_fill(msg, linecard, - linecard_device, - extack); - if (err) { - nla_nest_cancel(msg, attr); - return err; - } - } - nla_nest_end(msg, attr); - - return 0; -} - -static int -devlink_nl_linecard_info_fill(struct sk_buff *msg, struct devlink *devlink, - struct devlink_linecard *linecard, - enum devlink_command cmd, u32 portid, - u32 seq, int flags, struct netlink_ext_ack *extack) -{ - struct devlink_info_req req; - void *hdr; - int err; - - hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); - if (!hdr) - return -EMSGSIZE; - - err = -EMSGSIZE; - if (devlink_nl_put_handle(msg, devlink)) - goto nla_put_failure; - if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index)) - goto nla_put_failure; - - req.msg = msg; - err = linecard->ops->info_get(linecard, linecard->priv, &req, extack); - if (err) - goto nla_put_failure; - - err = devlink_nl_linecard_devices_info_fill(msg, linecard, extack); - if (err) - goto nla_put_failure; - - genlmsg_end(msg, hdr); - return 0; - -nla_put_failure: - genlmsg_cancel(msg, hdr); - return err; -} - -static int devlink_nl_cmd_linecard_info_get_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink_linecard *linecard = info->user_ptr[1]; - struct devlink *devlink = linecard->devlink; - struct sk_buff *msg; - int err; - - if (!linecard->ops->info_get) - return -EOPNOTSUPP; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - - mutex_lock(&linecard->state_lock); - err = devlink_nl_linecard_info_fill(msg, devlink, linecard, - DEVLINK_CMD_LINECARD_INFO_GET, - info->snd_portid, info->snd_seq, 0, - info->extack); - mutex_unlock(&linecard->state_lock); - if (err) { - nlmsg_free(msg); - return err; - } - - return genlmsg_reply(msg, info); -} - -static int devlink_nl_cmd_linecard_info_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) -{ - struct devlink_linecard *linecard; - struct devlink *devlink; - int start = cb->args[0]; - unsigned long index; - int idx = 0; - int err = 0; - - mutex_lock(&devlink_mutex); - xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { - if (!devlink_try_get(devlink)) - continue; - - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - goto retry; - - mutex_lock(&devlink->linecards_lock); - list_for_each_entry(linecard, &devlink->linecard_list, list) { - if (idx < start || !linecard->ops->info_get) { - idx++; - continue; - } - mutex_lock(&linecard->state_lock); - err = devlink_nl_linecard_info_fill(msg, devlink, linecard, - DEVLINK_CMD_LINECARD_INFO_GET, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI, - cb->extack); - mutex_unlock(&linecard->state_lock); - if (err) { - mutex_unlock(&devlink->linecards_lock); - devlink_put(devlink); - goto out; - } - idx++; - } - mutex_unlock(&devlink->linecards_lock); -retry: - devlink_put(devlink); - } -out: - mutex_unlock(&devlink_mutex); - - if (err != -EMSGSIZE) - return err; - - cb->args[0] = idx; - return msg->len; -} - static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, @@ -6602,6 +6361,10 @@ out_dev: return err; } +struct devlink_info_req { + struct sk_buff *msg; +}; + int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name) { return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name); @@ -9321,13 +9084,6 @@ static const struct genl_small_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD, }, - { - .cmd = DEVLINK_CMD_LINECARD_INFO_GET, - .doit = devlink_nl_cmd_linecard_info_get_doit, - .dumpit = devlink_nl_cmd_linecard_info_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD, - /* can be retrieved by unprivileged users */ - }, { .cmd = DEVLINK_CMD_SB_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, @@ -10508,7 +10264,6 @@ devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index, linecard->priv = priv; linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; mutex_init(&linecard->state_lock); - INIT_LIST_HEAD(&linecard->device_list); err = devlink_linecard_types_init(linecard); if (err) { @@ -10536,7 +10291,6 @@ void devlink_linecard_destroy(struct devlink_linecard *linecard) struct devlink *devlink = linecard->devlink; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL); - WARN_ON(!list_empty(&linecard->device_list)); mutex_lock(&devlink->linecards_lock); list_del(&linecard->list); devlink_linecard_types_fini(linecard); @@ -10545,52 +10299,6 @@ void devlink_linecard_destroy(struct devlink_linecard *linecard) } EXPORT_SYMBOL_GPL(devlink_linecard_destroy); -/** - * devlink_linecard_device_create - Create a device on linecard - * - * @linecard: devlink linecard - * @device_index: index of the linecard device - * @priv: user priv pointer - * - * Return: Line card device structure or an ERR_PTR() encoded error code. - */ -struct devlink_linecard_device * -devlink_linecard_device_create(struct devlink_linecard *linecard, - unsigned int device_index, void *priv) -{ - struct devlink_linecard_device *linecard_device; - - linecard_device = kzalloc(sizeof(*linecard_device), GFP_KERNEL); - if (!linecard_device) - return ERR_PTR(-ENOMEM); - linecard_device->index = device_index; - linecard_device->priv = priv; - mutex_lock(&linecard->state_lock); - list_add_tail(&linecard_device->list, &linecard->device_list); - devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); - mutex_unlock(&linecard->state_lock); - return linecard_device; -} -EXPORT_SYMBOL_GPL(devlink_linecard_device_create); - -/** - * devlink_linecard_device_destroy - Destroy device on linecard - * - * @linecard: devlink linecard - * @linecard_device: devlink linecard device - */ -void -devlink_linecard_device_destroy(struct devlink_linecard *linecard, - struct devlink_linecard_device *linecard_device) -{ - mutex_lock(&linecard->state_lock); - list_del(&linecard_device->list); - devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); - mutex_unlock(&linecard->state_lock); - kfree(linecard_device); -} -EXPORT_SYMBOL_GPL(devlink_linecard_device_destroy); - /** * devlink_linecard_provision_set - Set provisioning on linecard * @@ -10623,7 +10331,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set); void devlink_linecard_provision_clear(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); - WARN_ON(!list_empty(&linecard->device_list)); linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; linecard->type = NULL; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh index 53a65f416770..08a922d8b86a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh @@ -152,7 +152,6 @@ unprovision_test() LC_16X100G_TYPE="16x100G" LC_16X100G_PORT_COUNT=16 -LC_16X100G_DEVICE_COUNT=4 supported_types_check() { @@ -178,42 +177,6 @@ supported_types_check() check_err $? "16X100G not found between supported types of linecard $lc" } -lc_info_check() -{ - local lc=$1 - local fixed_hw_revision - local running_ini_version - - fixed_hw_revision=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \ - jq -e -r '.[][][].versions.fixed."hw.revision"') - check_err $? "Failed to get linecard $lc fixed.hw.revision" - log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\"" - running_ini_version=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \ - jq -e -r '.[][][].versions.running."ini.version"') - check_err $? "Failed to get linecard $lc running.ini.version" - log_info "Linecard $lc running.ini.version: \"$running_ini_version\"" -} - -lc_devices_check() -{ - local lc=$1 - local expected_device_count=$2 - local device_count - local device - - device_count=$(devlink lc show $DEVLINK_DEV lc $lc -j | \ - jq -e -r ".[][][].devices |length") - check_err $? "Failed to get linecard $lc device count" - [ $device_count != 0 ] - check_err $? "No device found on linecard $lc" - [ $device_count == $expected_device_count ] - check_err $? "Unexpected device count on linecard $lc (got $expected_device_count, expected $device_count)" - for (( device=0; device Link: https://lore.kernel.org/r/20220430074844.69214-2-fankaixi.li@bytedance.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 4 ++++ net/core/filter.c | 9 +++++++++ tools/include/uapi/linux/bpf.h | 4 ++++ 3 files changed, 17 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 444fe6f1cf35..95a3d1ff6255 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5604,6 +5604,10 @@ struct bpf_tunnel_key { __u8 tunnel_ttl; __u16 tunnel_ext; /* Padding, future use. */ __u32 tunnel_label; + union { + __u32 local_ipv4; + __u32 local_ipv6[4]; + }; }; /* user accessible mirror of in-kernel xfrm_state. diff --git a/net/core/filter.c b/net/core/filter.c index b741b9f7e6a9..fe0da529d00f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4498,6 +4498,7 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key if (unlikely(size != sizeof(struct bpf_tunnel_key))) { err = -EINVAL; switch (size) { + case offsetof(struct bpf_tunnel_key, local_ipv6[0]): case offsetof(struct bpf_tunnel_key, tunnel_label): case offsetof(struct bpf_tunnel_key, tunnel_ext): goto set_compat; @@ -4523,10 +4524,14 @@ set_compat: if (flags & BPF_F_TUNINFO_IPV6) { memcpy(to->remote_ipv6, &info->key.u.ipv6.src, sizeof(to->remote_ipv6)); + memcpy(to->local_ipv6, &info->key.u.ipv6.dst, + sizeof(to->local_ipv6)); to->tunnel_label = be32_to_cpu(info->key.label); } else { to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3); + to->local_ipv4 = be32_to_cpu(info->key.u.ipv4.dst); + memset(&to->local_ipv6[1], 0, sizeof(__u32) * 3); to->tunnel_label = 0; } @@ -4597,6 +4602,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, return -EINVAL; if (unlikely(size != sizeof(struct bpf_tunnel_key))) { switch (size) { + case offsetof(struct bpf_tunnel_key, local_ipv6[0]): case offsetof(struct bpf_tunnel_key, tunnel_label): case offsetof(struct bpf_tunnel_key, tunnel_ext): case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): @@ -4639,10 +4645,13 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, info->mode |= IP_TUNNEL_INFO_IPV6; memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, sizeof(from->remote_ipv6)); + memcpy(&info->key.u.ipv6.src, from->local_ipv6, + sizeof(from->local_ipv6)); info->key.label = cpu_to_be32(from->tunnel_label) & IPV6_FLOWLABEL_MASK; } else { info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); + info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4); } return 0; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 444fe6f1cf35..95a3d1ff6255 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5604,6 +5604,10 @@ struct bpf_tunnel_key { __u8 tunnel_ttl; __u16 tunnel_ext; /* Padding, future use. */ __u32 tunnel_label; + union { + __u32 local_ipv4; + __u32 local_ipv6[4]; + }; }; /* user accessible mirror of in-kernel xfrm_state. -- cgit v1.2.3 From f7e0beaf39d3868dc700d4954b26cf8443c5d423 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Tue, 10 May 2022 13:59:19 -0700 Subject: bpf, x86: Generate trampolines from bpf_tramp_links Replace struct bpf_tramp_progs with struct bpf_tramp_links to collect struct bpf_tramp_link(s) for a trampoline. struct bpf_tramp_link extends bpf_link to act as a linked list node. arch_prepare_bpf_trampoline() accepts a struct bpf_tramp_links to collects all bpf_tramp_link(s) that a trampoline should call. Change BPF trampoline and bpf_struct_ops to pass bpf_tramp_links instead of bpf_tramp_progs. Signed-off-by: Kui-Feng Lee Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220510205923.3206889-2-kuifeng@fb.com --- arch/x86/net/bpf_jit_comp.c | 36 +++++++++++---------- include/linux/bpf.h | 36 ++++++++++++++------- include/linux/bpf_types.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/bpf_struct_ops.c | 71 +++++++++++++++++++++++++++------------- kernel/bpf/syscall.c | 23 +++++-------- kernel/bpf/trampoline.c | 73 ++++++++++++++++++++++++------------------ net/bpf/bpf_dummy_struct_ops.c | 24 +++++++++++--- tools/bpf/bpftool/link.c | 1 + tools/include/uapi/linux/bpf.h | 1 + 10 files changed, 164 insertions(+), 103 deletions(-) (limited to 'include/uapi') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 16b6efacf7c6..38eb43159230 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1762,10 +1762,12 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, - struct bpf_prog *p, int stack_size, bool save_ret) + struct bpf_tramp_link *l, int stack_size, + bool save_ret) { u8 *prog = *pprog; u8 *jmp_insn; + struct bpf_prog *p = l->link.prog; /* arg1: mov rdi, progs[i] */ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); @@ -1850,14 +1852,14 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) } static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_progs *tp, int stack_size, + struct bpf_tramp_links *tl, int stack_size, bool save_ret) { int i; u8 *prog = *pprog; - for (i = 0; i < tp->nr_progs; i++) { - if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, + for (i = 0; i < tl->nr_links; i++) { + if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, save_ret)) return -EINVAL; } @@ -1866,7 +1868,7 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, } static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_progs *tp, int stack_size, + struct bpf_tramp_links *tl, int stack_size, u8 **branches) { u8 *prog = *pprog; @@ -1877,8 +1879,8 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, */ emit_mov_imm32(&prog, false, BPF_REG_0, 0); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); - for (i = 0; i < tp->nr_progs; i++) { - if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true)) + for (i = 0; i < tl->nr_links; i++) { + if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, true)) return -EINVAL; /* mod_ret prog stored return value into [rbp - 8]. Emit: @@ -1980,14 +1982,14 @@ static bool is_valid_bpf_tramp_flags(unsigned int flags) */ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_progs *tprogs, + struct bpf_tramp_links *tlinks, void *orig_call) { int ret, i, nr_args = m->nr_args; int regs_off, ip_off, args_off, stack_size = nr_args * 8; - struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; - struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; - struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; u8 **branches = NULL; u8 *prog; bool save_ret; @@ -2078,13 +2080,13 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } } - if (fentry->nr_progs) + if (fentry->nr_links) if (invoke_bpf(m, &prog, fentry, regs_off, flags & BPF_TRAMP_F_RET_FENTRY_RET)) return -EINVAL; - if (fmod_ret->nr_progs) { - branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *), + if (fmod_ret->nr_links) { + branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *), GFP_KERNEL); if (!branches) return -ENOMEM; @@ -2111,7 +2113,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i prog += X86_PATCH_SIZE; } - if (fmod_ret->nr_progs) { + if (fmod_ret->nr_links) { /* From Intel 64 and IA-32 Architectures Optimization * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler * Coding Rule 11: All branch targets should be 16-byte @@ -2121,12 +2123,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i /* Update the branches saved in invoke_bpf_mod_ret with the * aligned address of do_fexit. */ - for (i = 0; i < fmod_ret->nr_progs; i++) + for (i = 0; i < fmod_ret->nr_links; i++) emit_cond_near_jump(&branches[i], prog, branches[i], X86_JNE); } - if (fexit->nr_progs) + if (fexit->nr_links) if (invoke_bpf(m, &prog, fexit, regs_off, false)) { ret = -EINVAL; goto cleanup; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 551b7198ae8a..75e0110a65e1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -723,11 +723,11 @@ struct btf_func_model { /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 */ -#define BPF_MAX_TRAMP_PROGS 38 +#define BPF_MAX_TRAMP_LINKS 38 -struct bpf_tramp_progs { - struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS]; - int nr_progs; +struct bpf_tramp_links { + struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS]; + int nr_links; }; /* Different use cases for BPF trampoline: @@ -753,7 +753,7 @@ struct bpf_tramp_progs { struct bpf_tramp_image; int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_progs *tprogs, + struct bpf_tramp_links *tlinks, void *orig_call); /* these two functions are called from generated trampoline */ u64 notrace __bpf_prog_enter(struct bpf_prog *prog); @@ -852,9 +852,10 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( { return bpf_func(ctx, insnsi); } + #ifdef CONFIG_BPF_JIT -int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); -int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); @@ -905,12 +906,12 @@ int bpf_jit_charge_modmem(u32 size); void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else -static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, +static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { return -ENOTSUPP; } -static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog, +static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { return -ENOTSUPP; @@ -1009,7 +1010,6 @@ struct bpf_prog_aux { bool tail_call_reachable; bool xdp_has_frags; bool use_bpf_prog_pack; - struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1096,6 +1096,18 @@ struct bpf_link_ops { struct bpf_link_info *info); }; +struct bpf_tramp_link { + struct bpf_link link; + struct hlist_node tramp_hlist; +}; + +struct bpf_tracing_link { + struct bpf_tramp_link link; + enum bpf_attach_type attach_type; + struct bpf_trampoline *trampoline; + struct bpf_prog *tgt_prog; +}; + struct bpf_link_primer { struct bpf_link *link; struct file *file; @@ -1133,8 +1145,8 @@ bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value); -int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, - struct bpf_prog *prog, +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, + struct bpf_tramp_link *link, const struct btf_func_model *model, void *image, void *image_end); static inline bool bpf_try_module_get(const void *data, struct module *owner) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 3e24ad0c4b3c..2b9112b80171 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -141,3 +141,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) +BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 95a3d1ff6255..3d032ea1b6a3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1013,6 +1013,7 @@ enum bpf_link_type { BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, + BPF_LINK_TYPE_STRUCT_OPS = 9, MAX_BPF_LINK_TYPE, }; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 3a0103ad97bc..d9a3c9207240 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -33,15 +33,15 @@ struct bpf_struct_ops_map { const struct bpf_struct_ops *st_ops; /* protect map_update */ struct mutex lock; - /* progs has all the bpf_prog that is populated + /* link has all the bpf_links that is populated * to the func ptr of the kernel's struct * (in kvalue.data). */ - struct bpf_prog **progs; + struct bpf_link **links; /* image is a page that has all the trampolines * that stores the func args before calling the bpf_prog. * A PAGE_SIZE "image" is enough to store all trampoline for - * "progs[]". + * "links[]". */ void *image; /* uvalue->data stores the kernel struct @@ -283,9 +283,9 @@ static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map) u32 i; for (i = 0; i < btf_type_vlen(t); i++) { - if (st_map->progs[i]) { - bpf_prog_put(st_map->progs[i]); - st_map->progs[i] = NULL; + if (st_map->links[i]) { + bpf_link_put(st_map->links[i]); + st_map->links[i] = NULL; } } } @@ -316,18 +316,34 @@ static int check_zero_holes(const struct btf_type *t, void *data) return 0; } -int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, - struct bpf_prog *prog, +static void bpf_struct_ops_link_release(struct bpf_link *link) +{ +} + +static void bpf_struct_ops_link_dealloc(struct bpf_link *link) +{ + struct bpf_tramp_link *tlink = container_of(link, struct bpf_tramp_link, link); + + kfree(tlink); +} + +const struct bpf_link_ops bpf_struct_ops_link_lops = { + .release = bpf_struct_ops_link_release, + .dealloc = bpf_struct_ops_link_dealloc, +}; + +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, + struct bpf_tramp_link *link, const struct btf_func_model *model, void *image, void *image_end) { u32 flags; - tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; - tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; + tlinks[BPF_TRAMP_FENTRY].links[0] = link; + tlinks[BPF_TRAMP_FENTRY].nr_links = 1; flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0; return arch_prepare_bpf_trampoline(NULL, image, image_end, - model, flags, tprogs, NULL); + model, flags, tlinks, NULL); } static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, @@ -338,7 +354,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, struct bpf_struct_ops_value *uvalue, *kvalue; const struct btf_member *member; const struct btf_type *t = st_ops->type; - struct bpf_tramp_progs *tprogs = NULL; + struct bpf_tramp_links *tlinks = NULL; void *udata, *kdata; int prog_fd, err = 0; void *image, *image_end; @@ -362,8 +378,8 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, if (uvalue->state || refcount_read(&uvalue->refcnt)) return -EINVAL; - tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); - if (!tprogs) + tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL); + if (!tlinks) return -ENOMEM; uvalue = (struct bpf_struct_ops_value *)st_map->uvalue; @@ -386,6 +402,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, for_each_member(i, t, member) { const struct btf_type *mtype, *ptype; struct bpf_prog *prog; + struct bpf_tramp_link *link; u32 moff; moff = __btf_member_bit_offset(t, member) / 8; @@ -439,16 +456,26 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, err = PTR_ERR(prog); goto reset_unlock; } - st_map->progs[i] = prog; if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || prog->aux->attach_btf_id != st_ops->type_id || prog->expected_attach_type != i) { + bpf_prog_put(prog); err = -EINVAL; goto reset_unlock; } - err = bpf_struct_ops_prepare_trampoline(tprogs, prog, + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + bpf_prog_put(prog); + err = -ENOMEM; + goto reset_unlock; + } + bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, + &bpf_struct_ops_link_lops, prog); + st_map->links[i] = &link->link; + + err = bpf_struct_ops_prepare_trampoline(tlinks, link, &st_ops->func_models[i], image, image_end); if (err < 0) @@ -491,7 +518,7 @@ reset_unlock: memset(uvalue, 0, map->value_size); memset(kvalue, 0, map->value_size); unlock: - kfree(tprogs); + kfree(tlinks); mutex_unlock(&st_map->lock); return err; } @@ -546,9 +573,9 @@ static void bpf_struct_ops_map_free(struct bpf_map *map) { struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; - if (st_map->progs) + if (st_map->links) bpf_struct_ops_map_put_progs(st_map); - bpf_map_area_free(st_map->progs); + bpf_map_area_free(st_map->links); bpf_jit_free_exec(st_map->image); bpf_map_area_free(st_map->uvalue); bpf_map_area_free(st_map); @@ -597,11 +624,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) map = &st_map->map; st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE); - st_map->progs = - bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_prog *), + st_map->links = + bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *), NUMA_NO_NODE); st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); - if (!st_map->uvalue || !st_map->progs || !st_map->image) { + if (!st_map->uvalue || !st_map->links || !st_map->image) { bpf_struct_ops_map_free(map); return ERR_PTR(-ENOMEM); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 50164d324eaf..2dc582773344 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2864,19 +2864,12 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) } EXPORT_SYMBOL(bpf_link_get_from_fd); -struct bpf_tracing_link { - struct bpf_link link; - enum bpf_attach_type attach_type; - struct bpf_trampoline *trampoline; - struct bpf_prog *tgt_prog; -}; - static void bpf_tracing_link_release(struct bpf_link *link) { struct bpf_tracing_link *tr_link = - container_of(link, struct bpf_tracing_link, link); + container_of(link, struct bpf_tracing_link, link.link); - WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog, + WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, tr_link->trampoline)); bpf_trampoline_put(tr_link->trampoline); @@ -2889,7 +2882,7 @@ static void bpf_tracing_link_release(struct bpf_link *link) static void bpf_tracing_link_dealloc(struct bpf_link *link) { struct bpf_tracing_link *tr_link = - container_of(link, struct bpf_tracing_link, link); + container_of(link, struct bpf_tracing_link, link.link); kfree(tr_link); } @@ -2898,7 +2891,7 @@ static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq) { struct bpf_tracing_link *tr_link = - container_of(link, struct bpf_tracing_link, link); + container_of(link, struct bpf_tracing_link, link.link); seq_printf(seq, "attach_type:\t%d\n", @@ -2909,7 +2902,7 @@ static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info) { struct bpf_tracing_link *tr_link = - container_of(link, struct bpf_tracing_link, link); + container_of(link, struct bpf_tracing_link, link.link); info->tracing.attach_type = tr_link->attach_type; bpf_trampoline_unpack_key(tr_link->trampoline->key, @@ -2990,7 +2983,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, err = -ENOMEM; goto out_put_prog; } - bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING, + bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING, &bpf_tracing_link_lops, prog); link->attach_type = prog->expected_attach_type; @@ -3060,11 +3053,11 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, tgt_prog = prog->aux->dst_prog; } - err = bpf_link_prime(&link->link, &link_primer); + err = bpf_link_prime(&link->link.link, &link_primer); if (err) goto out_unlock; - err = bpf_trampoline_link_prog(prog, tr); + err = bpf_trampoline_link_prog(&link->link, tr); if (err) { bpf_link_cleanup(&link_primer); link = NULL; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index ada97751ae1b..d5e6bc5517cb 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -168,30 +168,30 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) return ret; } -static struct bpf_tramp_progs * +static struct bpf_tramp_links * bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg) { - const struct bpf_prog_aux *aux; - struct bpf_tramp_progs *tprogs; - struct bpf_prog **progs; + struct bpf_tramp_link *link; + struct bpf_tramp_links *tlinks; + struct bpf_tramp_link **links; int kind; *total = 0; - tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); - if (!tprogs) + tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL); + if (!tlinks) return ERR_PTR(-ENOMEM); for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { - tprogs[kind].nr_progs = tr->progs_cnt[kind]; + tlinks[kind].nr_links = tr->progs_cnt[kind]; *total += tr->progs_cnt[kind]; - progs = tprogs[kind].progs; + links = tlinks[kind].links; - hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) { - *ip_arg |= aux->prog->call_get_func_ip; - *progs++ = aux->prog; + hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) { + *ip_arg |= link->link.prog->call_get_func_ip; + *links++ = link; } } - return tprogs; + return tlinks; } static void __bpf_tramp_image_put_deferred(struct work_struct *work) @@ -330,14 +330,14 @@ out: static int bpf_trampoline_update(struct bpf_trampoline *tr) { struct bpf_tramp_image *im; - struct bpf_tramp_progs *tprogs; + struct bpf_tramp_links *tlinks; u32 flags = BPF_TRAMP_F_RESTORE_REGS; bool ip_arg = false; int err, total; - tprogs = bpf_trampoline_get_progs(tr, &total, &ip_arg); - if (IS_ERR(tprogs)) - return PTR_ERR(tprogs); + tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg); + if (IS_ERR(tlinks)) + return PTR_ERR(tlinks); if (total == 0) { err = unregister_fentry(tr, tr->cur_image->image); @@ -353,15 +353,15 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) goto out; } - if (tprogs[BPF_TRAMP_FEXIT].nr_progs || - tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) + if (tlinks[BPF_TRAMP_FEXIT].nr_links || + tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; if (ip_arg) flags |= BPF_TRAMP_F_IP_ARG; err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE, - &tr->func.model, flags, tprogs, + &tr->func.model, flags, tlinks, tr->func.addr); if (err < 0) goto out; @@ -381,7 +381,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) tr->cur_image = im; tr->selector++; out: - kfree(tprogs); + kfree(tlinks); return err; } @@ -407,13 +407,14 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) } } -int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { enum bpf_tramp_prog_type kind; + struct bpf_tramp_link *link_exiting; int err = 0; int cnt; - kind = bpf_attach_type_to_tramp(prog); + kind = bpf_attach_type_to_tramp(link->link.prog); mutex_lock(&tr->mutex); if (tr->extension_prog) { /* cannot attach fentry/fexit if extension prog is attached. @@ -429,25 +430,33 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) err = -EBUSY; goto out; } - tr->extension_prog = prog; + tr->extension_prog = link->link.prog; err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, - prog->bpf_func); + link->link.prog->bpf_func); goto out; } - if (cnt >= BPF_MAX_TRAMP_PROGS) { + if (cnt >= BPF_MAX_TRAMP_LINKS) { err = -E2BIG; goto out; } - if (!hlist_unhashed(&prog->aux->tramp_hlist)) { + if (!hlist_unhashed(&link->tramp_hlist)) { /* prog already linked */ err = -EBUSY; goto out; } - hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]); + hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) { + if (link_exiting->link.prog != link->link.prog) + continue; + /* prog already linked */ + err = -EBUSY; + goto out; + } + + hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]); tr->progs_cnt[kind]++; err = bpf_trampoline_update(tr); if (err) { - hlist_del_init(&prog->aux->tramp_hlist); + hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; } out: @@ -456,12 +465,12 @@ out: } /* bpf_trampoline_unlink_prog() should never fail. */ -int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { enum bpf_tramp_prog_type kind; int err; - kind = bpf_attach_type_to_tramp(prog); + kind = bpf_attach_type_to_tramp(link->link.prog); mutex_lock(&tr->mutex); if (kind == BPF_TRAMP_REPLACE) { WARN_ON_ONCE(!tr->extension_prog); @@ -470,7 +479,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) tr->extension_prog = NULL; goto out; } - hlist_del_init(&prog->aux->tramp_hlist); + hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; err = bpf_trampoline_update(tr); out: @@ -635,7 +644,7 @@ void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr) int __weak arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_progs *tprogs, + struct bpf_tramp_links *tlinks, void *orig_call) { return -ENOTSUPP; diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index d0e54e30658a..e78dadfc5829 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -72,13 +72,16 @@ static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args) args->args[3], args->args[4]); } +extern const struct bpf_link_ops bpf_struct_ops_link_lops; + int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops; const struct btf_type *func_proto; struct bpf_dummy_ops_test_args *args; - struct bpf_tramp_progs *tprogs; + struct bpf_tramp_links *tlinks; + struct bpf_tramp_link *link = NULL; void *image = NULL; unsigned int op_idx; int prog_ret; @@ -92,8 +95,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, if (IS_ERR(args)) return PTR_ERR(args); - tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); - if (!tprogs) { + tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL); + if (!tlinks) { err = -ENOMEM; goto out; } @@ -105,8 +108,17 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, } set_vm_flush_reset_perms(image); + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + err = -ENOMEM; + goto out; + } + /* prog doesn't take the ownership of the reference from caller */ + bpf_prog_inc(prog); + bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops, prog); + op_idx = prog->expected_attach_type; - err = bpf_struct_ops_prepare_trampoline(tprogs, prog, + err = bpf_struct_ops_prepare_trampoline(tlinks, link, &st_ops->func_models[op_idx], image, image + PAGE_SIZE); if (err < 0) @@ -124,7 +136,9 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, out: kfree(args); bpf_jit_free_exec(image); - kfree(tprogs); + if (link) + bpf_link_put(&link->link); + kfree(tlinks); return err; } diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 8fb0116f9136..6353a789322b 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -23,6 +23,7 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_XDP] = "xdp", [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", + [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", }; static struct hashmap *link_table; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 95a3d1ff6255..3d032ea1b6a3 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1013,6 +1013,7 @@ enum bpf_link_type { BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, + BPF_LINK_TYPE_STRUCT_OPS = 9, MAX_BPF_LINK_TYPE, }; -- cgit v1.2.3 From 2fcc82411e74e5e6aba336561cf56fb899bfae4e Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Tue, 10 May 2022 13:59:21 -0700 Subject: bpf, x86: Attach a cookie to fentry/fexit/fmod_ret/lsm. Pass a cookie along with BPF_LINK_CREATE requests. Add a bpf_cookie field to struct bpf_tracing_link to attach a cookie. The cookie of a bpf_tracing_link is available by calling bpf_get_attach_cookie when running the BPF program of the attached link. The value of a cookie will be set at bpf_tramp_run_ctx by the trampoline of the link. Signed-off-by: Kui-Feng Lee Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220510205923.3206889-4-kuifeng@fb.com --- arch/x86/net/bpf_jit_comp.c | 5 +++-- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 9 +++++++++ kernel/bpf/bpf_lsm.c | 17 +++++++++++++++++ kernel/bpf/syscall.c | 12 ++++++++---- kernel/bpf/trampoline.c | 7 +++++-- kernel/trace/bpf_trace.c | 17 +++++++++++++++++ tools/include/uapi/linux/bpf.h | 9 +++++++++ 8 files changed, 69 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 1fbc5cf1c7a7..a2b6d197c226 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1769,9 +1769,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, u8 *jmp_insn; int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); struct bpf_prog *p = l->link.prog; + u64 cookie = l->cookie; - /* mov rdi, 0 */ - emit_mov_imm64(&prog, BPF_REG_1, 0, 0); + /* mov rdi, cookie */ + emit_mov_imm64(&prog, BPF_REG_1, (long) cookie >> 32, (u32) (long) cookie); /* Prepare struct bpf_tramp_run_ctx. * diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 256fb802e580..aba7ded56436 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1102,6 +1102,7 @@ struct bpf_link_ops { struct bpf_tramp_link { struct bpf_link link; struct hlist_node tramp_hlist; + u64 cookie; }; struct bpf_tracing_link { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3d032ea1b6a3..bc7f89948f54 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1490,6 +1490,15 @@ union bpf_attr { __aligned_u64 addrs; __aligned_u64 cookies; } kprobe_multi; + struct { + /* this is overlaid with the target_btf_id above. */ + __u32 target_btf_id; + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 cookie; + } tracing; }; } link_create; diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 064eccba641d..c1351df9f7ee 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -117,6 +117,21 @@ static const struct bpf_func_proto bpf_ima_file_hash_proto = { .allowed = bpf_ima_inode_hash_allowed, }; +BPF_CALL_1(bpf_get_attach_cookie, void *, ctx) +{ + struct bpf_trace_run_ctx *run_ctx; + + run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); + return run_ctx->bpf_cookie; +} + +static const struct bpf_func_proto bpf_get_attach_cookie_proto = { + .func = bpf_get_attach_cookie, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + static const struct bpf_func_proto * bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -141,6 +156,8 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL; case BPF_FUNC_ima_file_hash: return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL; + case BPF_FUNC_get_attach_cookie: + return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d48165fccf49..72e53489165d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2921,7 +2921,8 @@ static const struct bpf_link_ops bpf_tracing_link_lops = { static int bpf_tracing_prog_attach(struct bpf_prog *prog, int tgt_prog_fd, - u32 btf_id) + u32 btf_id, + u64 bpf_cookie) { struct bpf_link_primer link_primer; struct bpf_prog *tgt_prog = NULL; @@ -2986,6 +2987,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING, &bpf_tracing_link_lops, prog); link->attach_type = prog->expected_attach_type; + link->link.cookie = bpf_cookie; mutex_lock(&prog->aux->dst_mutex); @@ -3271,7 +3273,7 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog, tp_name = prog->aux->attach_func_name; break; } - return bpf_tracing_prog_attach(prog, 0, 0); + return bpf_tracing_prog_attach(prog, 0, 0, 0); case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) @@ -4524,7 +4526,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_EXT: ret = bpf_tracing_prog_attach(prog, attr->link_create.target_fd, - attr->link_create.target_btf_id); + attr->link_create.target_btf_id, + attr->link_create.tracing.cookie); break; case BPF_PROG_TYPE_LSM: case BPF_PROG_TYPE_TRACING: @@ -4539,7 +4542,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) else ret = bpf_tracing_prog_attach(prog, attr->link_create.target_fd, - attr->link_create.target_btf_id); + attr->link_create.target_btf_id, + attr->link_create.tracing.cookie); break; case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_SK_LOOKUP: diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index baf1b65d523e..0e9b3aefc34a 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -30,9 +30,12 @@ static DEFINE_MUTEX(trampoline_mutex); bool bpf_prog_has_trampoline(const struct bpf_prog *prog) { enum bpf_attach_type eatype = prog->expected_attach_type; + enum bpf_prog_type ptype = prog->type; - return eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || - eatype == BPF_MODIFY_RETURN; + return (ptype == BPF_PROG_TYPE_TRACING && + (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || + eatype == BPF_MODIFY_RETURN)) || + (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC); } void *bpf_jit_alloc_exec_page(void) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 7fd11c17558d..2eaac094caf8 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1091,6 +1091,21 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_get_attach_cookie_tracing, void *, ctx) +{ + struct bpf_trace_run_ctx *run_ctx; + + run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); + return run_ctx->bpf_cookie; +} + +static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = { + .func = bpf_get_attach_cookie_tracing, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags) { #ifndef CONFIG_X86 @@ -1719,6 +1734,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL; case BPF_FUNC_get_func_arg_cnt: return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL; + case BPF_FUNC_get_attach_cookie: + return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL; default: fn = raw_tp_prog_func_proto(func_id, prog); if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3d032ea1b6a3..bc7f89948f54 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1490,6 +1490,15 @@ union bpf_attr { __aligned_u64 addrs; __aligned_u64 cookies; } kprobe_multi; + struct { + /* this is overlaid with the target_btf_id above. */ + __u32 target_btf_id; + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 cookie; + } tracing; }; } link_create; -- cgit v1.2.3 From 07343110b293456d30393e89b86c4dee1ac051c8 Mon Sep 17 00:00:00 2001 From: Feng Zhou Date: Wed, 11 May 2022 17:38:53 +0800 Subject: bpf: add bpf_map_lookup_percpu_elem for percpu map Add new ebpf helpers bpf_map_lookup_percpu_elem. The implementation method is relatively simple, refer to the implementation method of map_lookup_elem of percpu map, increase the parameters of cpu, and obtain it according to the specified cpu. Signed-off-by: Feng Zhou Link: https://lore.kernel.org/r/20220511093854.411-2-zhoufeng.zf@bytedance.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ include/uapi/linux/bpf.h | 9 +++++++++ kernel/bpf/arraymap.c | 15 +++++++++++++++ kernel/bpf/core.c | 1 + kernel/bpf/hashtab.c | 32 ++++++++++++++++++++++++++++++++ kernel/bpf/helpers.c | 18 ++++++++++++++++++ kernel/bpf/verifier.c | 17 +++++++++++++++-- kernel/trace/bpf_trace.c | 2 ++ tools/include/uapi/linux/bpf.h | 9 +++++++++ 9 files changed, 103 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3ded8711457f..5061ccd8b2dc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -89,6 +89,7 @@ struct bpf_map_ops { int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); int (*map_pop_elem)(struct bpf_map *map, void *value); int (*map_peek_elem)(struct bpf_map *map, void *value); + void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, @@ -2184,6 +2185,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_map_push_elem_proto; extern const struct bpf_func_proto bpf_map_pop_elem_proto; extern const struct bpf_func_proto bpf_map_peek_elem_proto; +extern const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bc7f89948f54..0210f85131b3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5164,6 +5164,14 @@ union bpf_attr { * if not NULL, is a reference which must be released using its * corresponding release function, or moved into a BPF map before * program exit. + * + * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) + * Description + * Perform a lookup in *percpu map* for an entry associated to + * *key* on *cpu*. + * Return + * Map value associated to *key* on *cpu*, or **NULL** if no entry + * was found or *cpu* is invalid. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5361,6 +5369,7 @@ union bpf_attr { FN(skb_set_tstamp), \ FN(ima_file_hash), \ FN(kptr_xchg), \ + FN(map_lookup_percpu_elem), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 724613da6576..fe40d3b9458f 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -243,6 +243,20 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) return this_cpu_ptr(array->pptrs[index & array->index_mask]); } +static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; + + if (cpu >= nr_cpu_ids) + return NULL; + + if (unlikely(index >= array->map.max_entries)) + return NULL; + + return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu); +} + int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) { struct bpf_array *array = container_of(map, struct bpf_array, map); @@ -725,6 +739,7 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_lookup_elem = percpu_array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, + .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem, .map_seq_show_elem = percpu_array_map_seq_show_elem, .map_check_btf = array_map_check_btf, .map_lookup_batch = generic_map_lookup_batch, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 13e9dbeeedf3..76f68d0a7ae8 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2619,6 +2619,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_map_push_elem_proto __weak; const struct bpf_func_proto bpf_map_pop_elem_proto __weak; const struct bpf_func_proto bpf_map_peek_elem_proto __weak; +const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto __weak; const struct bpf_func_proto bpf_spin_lock_proto __weak; const struct bpf_func_proto bpf_spin_unlock_proto __weak; const struct bpf_func_proto bpf_jiffies64_proto __weak; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 705841279d16..17fb69c0e0dc 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -2199,6 +2199,20 @@ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) return NULL; } +static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) +{ + struct htab_elem *l; + + if (cpu >= nr_cpu_ids) + return NULL; + + l = __htab_map_lookup_elem(map, key); + if (l) + return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); + else + return NULL; +} + static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) { struct htab_elem *l = __htab_map_lookup_elem(map, key); @@ -2211,6 +2225,22 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) return NULL; } +static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) +{ + struct htab_elem *l; + + if (cpu >= nr_cpu_ids) + return NULL; + + l = __htab_map_lookup_elem(map, key); + if (l) { + bpf_lru_node_set_ref(&l->lru_node); + return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); + } + + return NULL; +} + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) { struct htab_elem *l; @@ -2300,6 +2330,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem, .map_update_elem = htab_percpu_map_update_elem, .map_delete_elem = htab_map_delete_elem, + .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, @@ -2318,6 +2349,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem, .map_update_elem = htab_lru_percpu_map_update_elem, .map_delete_elem = htab_lru_map_delete_elem, + .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3e709fed5306..d5f104a39092 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -119,6 +119,22 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = { .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, }; +BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu) +{ + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu); +} + +const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = { + .func = bpf_map_lookup_percpu_elem, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_KEY, + .arg3_type = ARG_ANYTHING, +}; + const struct bpf_func_proto bpf_get_prandom_u32_proto = { .func = bpf_user_rnd_u32, .gpl_only = false, @@ -1420,6 +1436,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_map_pop_elem_proto; case BPF_FUNC_map_peek_elem: return &bpf_map_peek_elem_proto; + case BPF_FUNC_map_lookup_percpu_elem: + return &bpf_map_lookup_percpu_elem_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c27fee73a2cb..05c1b6656824 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6137,6 +6137,12 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) goto error; break; + case BPF_FUNC_map_lookup_percpu_elem: + if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && + map->map_type != BPF_MAP_TYPE_PERCPU_HASH && + map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH) + goto error; + break; case BPF_FUNC_sk_storage_get: case BPF_FUNC_sk_storage_delete: if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) @@ -6750,7 +6756,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, func_id != BPF_FUNC_map_pop_elem && func_id != BPF_FUNC_map_peek_elem && func_id != BPF_FUNC_for_each_map_elem && - func_id != BPF_FUNC_redirect_map) + func_id != BPF_FUNC_redirect_map && + func_id != BPF_FUNC_map_lookup_percpu_elem) return 0; if (map == NULL) { @@ -13810,7 +13817,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) insn->imm == BPF_FUNC_map_pop_elem || insn->imm == BPF_FUNC_map_peek_elem || insn->imm == BPF_FUNC_redirect_map || - insn->imm == BPF_FUNC_for_each_map_elem)) { + insn->imm == BPF_FUNC_for_each_map_elem || + insn->imm == BPF_FUNC_map_lookup_percpu_elem)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@ -13859,6 +13867,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) bpf_callback_t callback_fn, void *callback_ctx, u64 flags))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem, + (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL)); patch_map_ops_generic: switch (insn->imm) { @@ -13886,6 +13896,9 @@ patch_map_ops_generic: case BPF_FUNC_for_each_map_elem: insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); continue; + case BPF_FUNC_map_lookup_percpu_elem: + insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem); + continue; } goto patch_call_imm; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 2eaac094caf8..7141ca8a1c2d 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1197,6 +1197,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_map_pop_elem_proto; case BPF_FUNC_map_peek_elem: return &bpf_map_peek_elem_proto; + case BPF_FUNC_map_lookup_percpu_elem: + return &bpf_map_lookup_percpu_elem_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bc7f89948f54..0210f85131b3 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5164,6 +5164,14 @@ union bpf_attr { * if not NULL, is a reference which must be released using its * corresponding release function, or moved into a BPF map before * program exit. + * + * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) + * Description + * Perform a lookup in *percpu map* for an entry associated to + * *key* on *cpu*. + * Return + * Map value associated to *key* on *cpu*, or **NULL** if no entry + * was found or *cpu* is invalid. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5361,6 +5369,7 @@ union bpf_attr { FN(skb_set_tstamp), \ FN(ima_file_hash), \ FN(kptr_xchg), \ + FN(map_lookup_percpu_elem), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 89527be8d8d672773eeaec910118a6e84fb597e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 May 2022 11:33:56 -0700 Subject: net: add IFLA_TSO_{MAX_SIZE|SEGS} attributes New netlink attributes IFLA_TSO_MAX_SIZE and IFLA_TSO_MAX_SEGS are used to report to user-space the device TSO limits. ip -d link sh dev eth1 ... tso_max_size 65536 tso_max_segs 65535 Signed-off-by: Eric Dumazet Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 2 ++ net/core/rtnetlink.c | 6 ++++++ tools/include/uapi/linux/if_link.h | 2 ++ 3 files changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d1e600816b82..5f58dcfe2787 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -368,6 +368,8 @@ enum { IFLA_PARENT_DEV_NAME, IFLA_PARENT_DEV_BUS_NAME, IFLA_GRO_MAX_SIZE, + IFLA_TSO_MAX_SIZE, + IFLA_TSO_MAX_SEGS, __IFLA_MAX }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bdc891326102..f35cc21298ac 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1064,6 +1064,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */ + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */ + nla_total_size(4) /* IFLA_GRO_MAX_SIZE */ + + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */ + + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ @@ -1769,6 +1771,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) || + nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) || + nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) || #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif @@ -1922,6 +1926,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), [IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING }, [IFLA_GRO_MAX_SIZE] = { .type = NLA_U32 }, + [IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT }, + [IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index e1ba2d51b717..b339bf2196ca 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -348,6 +348,8 @@ enum { IFLA_PARENT_DEV_NAME, IFLA_PARENT_DEV_BUS_NAME, IFLA_GRO_MAX_SIZE, + IFLA_TSO_MAX_SIZE, + IFLA_TSO_MAX_SEGS, __IFLA_MAX }; -- cgit v1.2.3 From 9f39d36530e5678d092d53c5c2c60d82b4dcc169 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 7 May 2022 13:55:58 +0200 Subject: can: isotp: add support for transmission without flow control Usually the ISO 15765-2 protocol is a point-to-point protocol to transfer segmented PDUs to a dedicated receiver. This receiver sends a flow control message to specify protocol options and timings (e.g. block size / STmin). The so called functional addressing communication allows a 1:N communication but is limited to a single frame length. This new CAN_ISOTP_CF_BROADCAST allows an unconfirmed 1:N communication with PDU length that would not fit into a single frame. This feature is not covered by the ISO 15765-2 standard. Link: https://lore.kernel.org/all/20220507115558.19065-1-socketcan@hartkopp.net Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/isotp.h | 25 ++++++----- net/can/isotp.c | 100 ++++++++++++++++++++++++++++++++--------- 2 files changed, 92 insertions(+), 33 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index 590f8aea2b6d..439c982f7e81 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -124,18 +124,19 @@ struct can_isotp_ll_options { /* flags for isotp behaviour */ -#define CAN_ISOTP_LISTEN_MODE 0x001 /* listen only (do not send FC) */ -#define CAN_ISOTP_EXTEND_ADDR 0x002 /* enable extended addressing */ -#define CAN_ISOTP_TX_PADDING 0x004 /* enable CAN frame padding tx path */ -#define CAN_ISOTP_RX_PADDING 0x008 /* enable CAN frame padding rx path */ -#define CAN_ISOTP_CHK_PAD_LEN 0x010 /* check received CAN frame padding */ -#define CAN_ISOTP_CHK_PAD_DATA 0x020 /* check received CAN frame padding */ -#define CAN_ISOTP_HALF_DUPLEX 0x040 /* half duplex error state handling */ -#define CAN_ISOTP_FORCE_TXSTMIN 0x080 /* ignore stmin from received FC */ -#define CAN_ISOTP_FORCE_RXSTMIN 0x100 /* ignore CFs depending on rx stmin */ -#define CAN_ISOTP_RX_EXT_ADDR 0x200 /* different rx extended addressing */ -#define CAN_ISOTP_WAIT_TX_DONE 0x400 /* wait for tx completion */ -#define CAN_ISOTP_SF_BROADCAST 0x800 /* 1-to-N functional addressing */ +#define CAN_ISOTP_LISTEN_MODE 0x0001 /* listen only (do not send FC) */ +#define CAN_ISOTP_EXTEND_ADDR 0x0002 /* enable extended addressing */ +#define CAN_ISOTP_TX_PADDING 0x0004 /* enable CAN frame padding tx path */ +#define CAN_ISOTP_RX_PADDING 0x0008 /* enable CAN frame padding rx path */ +#define CAN_ISOTP_CHK_PAD_LEN 0x0010 /* check received CAN frame padding */ +#define CAN_ISOTP_CHK_PAD_DATA 0x0020 /* check received CAN frame padding */ +#define CAN_ISOTP_HALF_DUPLEX 0x0040 /* half duplex error state handling */ +#define CAN_ISOTP_FORCE_TXSTMIN 0x0080 /* ignore stmin from received FC */ +#define CAN_ISOTP_FORCE_RXSTMIN 0x0100 /* ignore CFs depending on rx stmin */ +#define CAN_ISOTP_RX_EXT_ADDR 0x0200 /* different rx extended addressing */ +#define CAN_ISOTP_WAIT_TX_DONE 0x0400 /* wait for tx completion */ +#define CAN_ISOTP_SF_BROADCAST 0x0800 /* 1-to-N functional addressing */ +#define CAN_ISOTP_CF_BROADCAST 0x1000 /* 1-to-N transmission w/o FC */ /* protocol machine default values */ diff --git a/net/can/isotp.c b/net/can/isotp.c index 35a1ae61744c..2caeeae8ec16 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -104,6 +104,7 @@ MODULE_ALIAS("can-proto-6"); #define FC_CONTENT_SZ 3 /* flow control content size in byte (FS/BS/STmin) */ #define ISOTP_CHECK_PADDING (CAN_ISOTP_CHK_PAD_LEN | CAN_ISOTP_CHK_PAD_DATA) +#define ISOTP_ALL_BC_FLAGS (CAN_ISOTP_SF_BROADCAST | CAN_ISOTP_CF_BROADCAST) /* Flow Status given in FC frame */ #define ISOTP_FC_CTS 0 /* clear to send */ @@ -159,6 +160,23 @@ static inline struct isotp_sock *isotp_sk(const struct sock *sk) return (struct isotp_sock *)sk; } +static u32 isotp_bc_flags(struct isotp_sock *so) +{ + return so->opt.flags & ISOTP_ALL_BC_FLAGS; +} + +static bool isotp_register_rxid(struct isotp_sock *so) +{ + /* no broadcast modes => register rx_id for FC frame reception */ + return (isotp_bc_flags(so) == 0); +} + +static bool isotp_register_txecho(struct isotp_sock *so) +{ + /* all modes but SF_BROADCAST register for tx echo skbs */ + return (isotp_bc_flags(so) != CAN_ISOTP_SF_BROADCAST); +} + static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer) { struct isotp_sock *so = container_of(hrtimer, struct isotp_sock, @@ -803,7 +821,6 @@ static void isotp_create_fframe(struct canfd_frame *cf, struct isotp_sock *so, cf->data[i] = so->tx.buf[so->tx.idx++]; so->tx.sn = 1; - so->tx.state = ISOTP_WAIT_FIRST_FC; } static void isotp_rcv_echo(struct sk_buff *skb, void *data) @@ -936,7 +953,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0; /* does the given data fit into a single frame for SF_BROADCAST? */ - if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) && + if ((isotp_bc_flags(so) == CAN_ISOTP_SF_BROADCAST) && (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) { err = -EINVAL; goto err_out_drop; @@ -1000,12 +1017,41 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) /* don't enable wait queue for a single frame transmission */ wait_tx_done = 0; } else { - /* send first frame and wait for FC */ + /* send first frame */ isotp_create_fframe(cf, so, ae); - /* start timeout for FC */ - hrtimer_sec = 1; + if (isotp_bc_flags(so) == CAN_ISOTP_CF_BROADCAST) { + /* set timer for FC-less operation (STmin = 0) */ + if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN) + so->tx_gap = ktime_set(0, so->force_tx_stmin); + else + so->tx_gap = ktime_set(0, so->frame_txtime); + + /* disable wait for FCs due to activated block size */ + so->txfc.bs = 0; + + /* cfecho should have been zero'ed by init */ + if (so->cfecho) + pr_notice_once("can-isotp: no fc cfecho %08X\n", + so->cfecho); + + /* set consecutive frame echo tag */ + so->cfecho = *(u32 *)cf->data; + + /* switch directly to ISOTP_SENDING state */ + so->tx.state = ISOTP_SENDING; + + /* start timeout for unlikely lost echo skb */ + hrtimer_sec = 2; + } else { + /* standard flow control check */ + so->tx.state = ISOTP_WAIT_FIRST_FC; + + /* start timeout for FC */ + hrtimer_sec = 1; + } + hrtimer_start(&so->txtimer, ktime_set(hrtimer_sec, 0), HRTIMER_MODE_REL_SOFT); } @@ -1025,6 +1071,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (hrtimer_sec) hrtimer_cancel(&so->txtimer); + /* reset consecutive frame echo tag */ + so->cfecho = 0; + goto err_out_drop; } @@ -1120,15 +1169,17 @@ static int isotp_release(struct socket *sock) lock_sock(sk); /* remove current filters & unregister */ - if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) { + if (so->bound && isotp_register_txecho(so)) { if (so->ifindex) { struct net_device *dev; dev = dev_get_by_index(net, so->ifindex); if (dev) { - can_rx_unregister(net, dev, so->rxid, - SINGLE_MASK(so->rxid), - isotp_rcv, sk); + if (isotp_register_rxid(so)) + can_rx_unregister(net, dev, so->rxid, + SINGLE_MASK(so->rxid), + isotp_rcv, sk); + can_rx_unregister(net, dev, so->txid, SINGLE_MASK(so->txid), isotp_rcv_echo, sk); @@ -1164,7 +1215,6 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) canid_t tx_id, rx_id; int err = 0; int notify_enetdown = 0; - int do_rx_reg = 1; if (len < ISOTP_MIN_NAMELEN) return -EINVAL; @@ -1192,12 +1242,8 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) goto out; } - /* do not register frame reception for functional addressing */ - if (so->opt.flags & CAN_ISOTP_SF_BROADCAST) - do_rx_reg = 0; - - /* do not validate rx address for functional addressing */ - if (do_rx_reg && rx_id == tx_id) { + /* ensure different CAN IDs when the rx_id is to be registered */ + if (isotp_register_rxid(so) && rx_id == tx_id) { err = -EADDRNOTAVAIL; goto out; } @@ -1222,10 +1268,11 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) ifindex = dev->ifindex; - if (do_rx_reg) { + if (isotp_register_rxid(so)) can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id), isotp_rcv, sk, "isotp", sk); + if (isotp_register_txecho(so)) { /* no consecutive frame echo skb in flight */ so->cfecho = 0; @@ -1294,6 +1341,15 @@ static int isotp_setsockopt_locked(struct socket *sock, int level, int optname, if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR)) so->opt.rx_ext_address = so->opt.ext_address; + /* these broadcast flags are not allowed together */ + if (isotp_bc_flags(so) == ISOTP_ALL_BC_FLAGS) { + /* CAN_ISOTP_SF_BROADCAST is prioritized */ + so->opt.flags &= ~CAN_ISOTP_CF_BROADCAST; + + /* give user feedback on wrong config attempt */ + ret = -EINVAL; + } + /* check for frame_txtime changes (0 => no changes) */ if (so->opt.frame_txtime) { if (so->opt.frame_txtime == CAN_ISOTP_FRAME_TXTIME_ZERO) @@ -1444,10 +1500,12 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg, case NETDEV_UNREGISTER: lock_sock(sk); /* remove current filters & unregister */ - if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) { - can_rx_unregister(dev_net(dev), dev, so->rxid, - SINGLE_MASK(so->rxid), - isotp_rcv, sk); + if (so->bound && isotp_register_txecho(so)) { + if (isotp_register_rxid(so)) + can_rx_unregister(dev_net(dev), dev, so->rxid, + SINGLE_MASK(so->rxid), + isotp_rcv, sk); + can_rx_unregister(dev_net(dev), dev, so->txid, SINGLE_MASK(so->txid), isotp_rcv_echo, sk); -- cgit v1.2.3 From c1318b39c7d36bd5139a9c71044ff2b2d3c6f9d8 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Wed, 18 May 2022 12:27:31 +0300 Subject: tls: Add opt-in zerocopy mode of sendfile() TLS device offload copies sendfile data to a bounce buffer before transmitting. It allows to maintain the valid MAC on TLS records when the file contents change and a part of TLS record has to be retransmitted on TCP level. In many common use cases (like serving static files over HTTPS) the file contents are not changed on the fly. In many use cases breaking the connection is totally acceptable if the file is changed during transmission, because it would be received corrupted in any case. This commit allows to optimize performance for such use cases to providing a new optional mode of TLS sendfile(), in which the extra copy is skipped. Removing this copy improves performance significantly, as TLS and TCP sendfile perform the same operations, and the only overhead is TLS header/trailer insertion. The new mode can only be enabled with the new socket option named TLS_TX_ZEROCOPY_SENDFILE on per-socket basis. It preserves backwards compatibility with existing applications that rely on the copying behavior. The new mode is safe, meaning that unsolicited modifications of the file being sent can't break integrity of the kernel. The worst thing that can happen is sending a corrupted TLS record, which is in any case not forbidden when using regular TCP sockets. Sockets other than TLS device offload are not affected by the new socket option. The actual status of zerocopy sendfile can be queried with sock_diag. Performance numbers in a single-core test with 24 HTTPS streams on nginx, under 100% CPU load: * non-zerocopy: 33.6 Gbit/s * zerocopy: 79.92 Gbit/s CPU: Intel(R) Xeon(R) Platinum 8380 CPU @ 2.30GHz Signed-off-by: Boris Pismenny Signed-off-by: Tariq Toukan Signed-off-by: Maxim Mikityanskiy Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20220518092731.1243494-1-maximmi@nvidia.com Signed-off-by: Paolo Abeni --- include/net/tls.h | 1 + include/uapi/linux/tls.h | 2 ++ net/tls/tls_device.c | 53 ++++++++++++++++++++++++++++++++++------------ net/tls/tls_main.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 13 deletions(-) (limited to 'include/uapi') diff --git a/include/net/tls.h b/include/net/tls.h index b59f0a63292b..8017f1703447 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -238,6 +238,7 @@ struct tls_context { u8 tx_conf:3; u8 rx_conf:3; + u8 zerocopy_sendfile:1; int (*push_pending_record)(struct sock *sk, int flags); void (*sk_write_space)(struct sock *sk); diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index 5f38be0ec0f3..ac39328eabe7 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -39,6 +39,7 @@ /* TLS socket options */ #define TLS_TX 1 /* Set transmit parameters */ #define TLS_RX 2 /* Set receive parameters */ +#define TLS_TX_ZEROCOPY_SENDFILE 3 /* transmit zerocopy sendfile */ /* Supported versions */ #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) @@ -160,6 +161,7 @@ enum { TLS_INFO_CIPHER, TLS_INFO_TXCONF, TLS_INFO_RXCONF, + TLS_INFO_ZC_SENDFILE, __TLS_INFO_MAX, }; #define TLS_INFO_MAX (__TLS_INFO_MAX - 1) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index bca00521ebc1..ec6f4b699a2b 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -411,10 +411,16 @@ static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i) return 0; } +union tls_iter_offset { + struct iov_iter *msg_iter; + int offset; +}; + static int tls_push_data(struct sock *sk, - struct iov_iter *msg_iter, + union tls_iter_offset iter_offset, size_t size, int flags, - unsigned char record_type) + unsigned char record_type, + struct page *zc_page) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; @@ -480,12 +486,21 @@ handle_error: } record = ctx->open_record; - copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); - copy = min_t(size_t, copy, (max_open_record_len - record->len)); - if (copy) { + copy = min_t(size_t, size, max_open_record_len - record->len); + if (copy && zc_page) { + struct page_frag zc_pfrag; + + zc_pfrag.page = zc_page; + zc_pfrag.offset = iter_offset.offset; + zc_pfrag.size = copy; + tls_append_frag(record, &zc_pfrag, copy); + } else if (copy) { + copy = min_t(size_t, copy, pfrag->size - pfrag->offset); + rc = tls_device_copy_data(page_address(pfrag->page) + - pfrag->offset, copy, msg_iter); + pfrag->offset, copy, + iter_offset.msg_iter); if (rc) goto handle_error; tls_append_frag(record, pfrag, copy); @@ -540,6 +555,7 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { unsigned char record_type = TLS_RECORD_TYPE_DATA; struct tls_context *tls_ctx = tls_get_ctx(sk); + union tls_iter_offset iter; int rc; mutex_lock(&tls_ctx->tx_lock); @@ -551,8 +567,8 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) goto out; } - rc = tls_push_data(sk, &msg->msg_iter, size, - msg->msg_flags, record_type); + iter.msg_iter = &msg->msg_iter; + rc = tls_push_data(sk, iter, size, msg->msg_flags, record_type, NULL); out: release_sock(sk); @@ -564,7 +580,8 @@ int tls_device_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); - struct iov_iter msg_iter; + union tls_iter_offset iter_offset; + struct iov_iter msg_iter; char *kaddr; struct kvec iov; int rc; @@ -580,12 +597,20 @@ int tls_device_sendpage(struct sock *sk, struct page *page, goto out; } + if (tls_ctx->zerocopy_sendfile) { + iter_offset.offset = offset; + rc = tls_push_data(sk, iter_offset, size, + flags, TLS_RECORD_TYPE_DATA, page); + goto out; + } + kaddr = kmap(page); iov.iov_base = kaddr + offset; iov.iov_len = size; iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size); - rc = tls_push_data(sk, &msg_iter, size, - flags, TLS_RECORD_TYPE_DATA); + iter_offset.msg_iter = &msg_iter; + rc = tls_push_data(sk, iter_offset, size, flags, TLS_RECORD_TYPE_DATA, + NULL); kunmap(page); out: @@ -656,10 +681,12 @@ EXPORT_SYMBOL(tls_get_record); static int tls_device_push_pending_record(struct sock *sk, int flags) { - struct iov_iter msg_iter; + union tls_iter_offset iter; + struct iov_iter msg_iter; iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0); - return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA); + iter.msg_iter = &msg_iter; + return tls_push_data(sk, iter, 0, flags, TLS_RECORD_TYPE_DATA, NULL); } void tls_device_write_space(struct sock *sk, struct tls_context *ctx) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 7b2b0e7ffee4..b91ddc110786 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -513,6 +513,26 @@ out: return rc; } +static int do_tls_getsockopt_tx_zc(struct sock *sk, char __user *optval, + int __user *optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + unsigned int value; + int len; + + if (get_user(len, optlen)) + return -EFAULT; + + if (len != sizeof(value)) + return -EINVAL; + + value = ctx->zerocopy_sendfile; + if (copy_to_user(optval, &value, sizeof(value))) + return -EFAULT; + + return 0; +} + static int do_tls_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen) { @@ -524,6 +544,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname, rc = do_tls_getsockopt_conf(sk, optval, optlen, optname == TLS_TX); break; + case TLS_TX_ZEROCOPY_SENDFILE: + rc = do_tls_getsockopt_tx_zc(sk, optval, optlen); + break; default: rc = -ENOPROTOOPT; break; @@ -675,6 +698,26 @@ err_crypto_info: return rc; } +static int do_tls_setsockopt_tx_zc(struct sock *sk, sockptr_t optval, + unsigned int optlen) +{ + struct tls_context *ctx = tls_get_ctx(sk); + unsigned int value; + + if (sockptr_is_null(optval) || optlen != sizeof(value)) + return -EINVAL; + + if (copy_from_sockptr(&value, optval, sizeof(value))) + return -EFAULT; + + if (value > 1) + return -EINVAL; + + ctx->zerocopy_sendfile = value; + + return 0; +} + static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen) { @@ -688,6 +731,11 @@ static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval, optname == TLS_TX); release_sock(sk); break; + case TLS_TX_ZEROCOPY_SENDFILE: + lock_sock(sk); + rc = do_tls_setsockopt_tx_zc(sk, optval, optlen); + release_sock(sk); + break; default: rc = -ENOPROTOOPT; break; @@ -921,6 +969,12 @@ static int tls_get_info(const struct sock *sk, struct sk_buff *skb) if (err) goto nla_failure; + if (ctx->tx_conf == TLS_HW && ctx->zerocopy_sendfile) { + err = nla_put_flag(skb, TLS_INFO_ZC_SENDFILE); + if (err) + goto nla_failure; + } + rcu_read_unlock(); nla_nest_end(skb, start); return 0; @@ -940,6 +994,7 @@ static size_t tls_get_info_size(const struct sock *sk) nla_total_size(sizeof(u16)) + /* TLS_INFO_CIPHER */ nla_total_size(sizeof(u16)) + /* TLS_INFO_RXCONF */ nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */ + nla_total_size(0) + /* TLS_INFO_ZC_SENDFILE */ 0; return size; -- cgit v1.2.3 From 3bc253c2e652cf5f12cd8c00d80d8ec55d67d1a7 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 19 May 2022 16:30:10 -0700 Subject: bpf: Add bpf_skc_to_mptcp_sock_proto This patch implements a new struct bpf_func_proto, named bpf_skc_to_mptcp_sock_proto. Define a new bpf_id BTF_SOCK_TYPE_MPTCP, and a new helper bpf_skc_to_mptcp_sock(), which invokes another new helper bpf_mptcp_sock_from_subflow() in net/mptcp/bpf.c to get struct mptcp_sock from a given subflow socket. v2: Emit BTF type, add func_id checks in verifier.c and bpf_trace.c, remove build check for CONFIG_BPF_JIT v5: Drop EXPORT_SYMBOL (Martin) Co-developed-by: Nicolas Rybowski Co-developed-by: Matthieu Baerts Signed-off-by: Nicolas Rybowski Signed-off-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220519233016.105670-2-mathew.j.martineau@linux.intel.com --- include/linux/bpf.h | 1 + include/linux/btf_ids.h | 3 ++- include/net/mptcp.h | 6 ++++++ include/uapi/linux/bpf.h | 7 +++++++ kernel/bpf/verifier.c | 1 + kernel/trace/bpf_trace.c | 2 ++ net/core/filter.c | 18 ++++++++++++++++++ net/mptcp/Makefile | 2 ++ net/mptcp/bpf.c | 21 +++++++++++++++++++++ scripts/bpf_doc.py | 2 ++ tools/include/uapi/linux/bpf.h | 7 +++++++ 11 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 net/mptcp/bpf.c (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c107392b0ba7..a3ef078401cf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2231,6 +2231,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto; +extern const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_snprintf_proto; diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index bc5d9cc34e4c..335a19092368 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -178,7 +178,8 @@ extern struct btf_id_set name; BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) enum { #define BTF_SOCK_TYPE(name, str) name, diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 8b1afd6f5cc4..2ba09de955c7 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -284,4 +284,10 @@ static inline int mptcpv6_init(void) { return 0; } static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { } #endif +#if defined(CONFIG_MPTCP) && defined(CONFIG_BPF_SYSCALL) +struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk); +#else +static inline struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) { return NULL; } +#endif + #endif /* __NET_MPTCP_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0210f85131b3..56688bee20d9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5172,6 +5172,12 @@ union bpf_attr { * Return * Map value associated to *key* on *cpu*, or **NULL** if no entry * was found or *cpu* is invalid. + * + * struct mptcp_sock *bpf_skc_to_mptcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5370,6 +5376,7 @@ union bpf_attr { FN(ima_file_hash), \ FN(kptr_xchg), \ FN(map_lookup_percpu_elem), \ + FN(skc_to_mptcp_sock), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9b59581026f8..14e8c17d3d8d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -509,6 +509,7 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id) func_id == BPF_FUNC_skc_to_tcp_sock || func_id == BPF_FUNC_skc_to_tcp6_sock || func_id == BPF_FUNC_skc_to_udp6_sock || + func_id == BPF_FUNC_skc_to_mptcp_sock || func_id == BPF_FUNC_skc_to_tcp_timewait_sock || func_id == BPF_FUNC_skc_to_tcp_request_sock; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 7141ca8a1c2d..10b157a6d73e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1705,6 +1705,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skc_to_udp6_sock_proto; case BPF_FUNC_skc_to_unix_sock: return &bpf_skc_to_unix_sock_proto; + case BPF_FUNC_skc_to_mptcp_sock: + return &bpf_skc_to_mptcp_sock_proto; case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_tracing_proto; case BPF_FUNC_sk_storage_delete: diff --git a/net/core/filter.c b/net/core/filter.c index fe0da529d00f..5af58eb48587 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -78,6 +78,7 @@ #include #include #include +#include static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -11281,6 +11282,20 @@ const struct bpf_func_proto bpf_skc_to_unix_sock_proto = { .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX], }; +BPF_CALL_1(bpf_skc_to_mptcp_sock, struct sock *, sk) +{ + BTF_TYPE_EMIT(struct mptcp_sock); + return (unsigned long)bpf_mptcp_sock_from_subflow(sk); +} + +const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = { + .func = bpf_skc_to_mptcp_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP], +}; + BPF_CALL_1(bpf_sock_from_file, struct file *, file) { return (unsigned long)sock_from_file(file); @@ -11323,6 +11338,9 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_skc_to_unix_sock: func = &bpf_skc_to_unix_sock_proto; break; + case BPF_FUNC_skc_to_mptcp_sock: + func = &bpf_skc_to_mptcp_sock_proto; + break; case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index e54daceac58b..99dddf08ca73 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -10,3 +10,5 @@ obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o mptcp_crypto_test-objs := crypto_test.o mptcp_token_test-objs := token_test.o obj-$(CONFIG_MPTCP_KUNIT_TEST) += mptcp_crypto_test.o mptcp_token_test.o + +obj-$(CONFIG_BPF_SYSCALL) += bpf.o diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c new file mode 100644 index 000000000000..5a0a84ad94af --- /dev/null +++ b/net/mptcp/bpf.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Multipath TCP + * + * Copyright (c) 2020, Tessares SA. + * Copyright (c) 2022, SUSE. + * + * Author: Nicolas Rybowski + */ + +#define pr_fmt(fmt) "MPTCP: " fmt + +#include +#include "protocol.h" + +struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) +{ + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) + return mptcp_sk(mptcp_subflow_ctx(sk)->conn); + + return NULL; +} diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 096625242475..d5452f7eb996 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -633,6 +633,7 @@ class PrinterHelpers(Printer): 'struct socket', 'struct file', 'struct bpf_timer', + 'struct mptcp_sock', ] known_types = { '...', @@ -682,6 +683,7 @@ class PrinterHelpers(Printer): 'struct socket', 'struct file', 'struct bpf_timer', + 'struct mptcp_sock', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0210f85131b3..56688bee20d9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5172,6 +5172,12 @@ union bpf_attr { * Return * Map value associated to *key* on *cpu*, or **NULL** if no entry * was found or *cpu* is invalid. + * + * struct mptcp_sock *bpf_skc_to_mptcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5370,6 +5376,7 @@ union bpf_attr { FN(ima_file_hash), \ FN(kptr_xchg), \ FN(map_lookup_percpu_elem), \ + FN(skc_to_mptcp_sock), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 97e03f521050c092919591e668107b3d69c5f426 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 23 May 2022 14:07:07 -0700 Subject: bpf: Add verifier support for dynptrs This patch adds the bulk of the verifier work for supporting dynamic pointers (dynptrs) in bpf. A bpf_dynptr is opaque to the bpf program. It is a 16-byte structure defined internally as: struct bpf_dynptr_kern { void *data; u32 size; u32 offset; } __aligned(8); The upper 8 bits of *size* is reserved (it contains extra metadata about read-only status and dynptr type). Consequently, a dynptr only supports memory less than 16 MB. There are different types of dynptrs (eg malloc, ringbuf, ...). In this patchset, the most basic one, dynptrs to a bpf program's local memory, is added. For now only local memory that is of reg type PTR_TO_MAP_VALUE is supported. In the verifier, dynptr state information will be tracked in stack slots. When the program passes in an uninitialized dynptr (ARG_PTR_TO_DYNPTR | MEM_UNINIT), the stack slots corresponding to the frame pointer where the dynptr resides at are marked STACK_DYNPTR. For helper functions that take in initialized dynptrs (eg bpf_dynptr_read + bpf_dynptr_write which are added later in this patchset), the verifier enforces that the dynptr has been initialized properly by checking that their corresponding stack slots have been marked as STACK_DYNPTR. The 6th patch in this patchset adds test cases that the verifier should successfully reject, such as for example attempting to use a dynptr after doing a direct write into it inside the bpf program. Signed-off-by: Joanne Koong Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20220523210712.3641569-2-joannelkoong@gmail.com --- include/linux/bpf.h | 28 ++++++ include/linux/bpf_verifier.h | 18 ++++ include/uapi/linux/bpf.h | 5 ++ kernel/bpf/verifier.c | 188 ++++++++++++++++++++++++++++++++++++++++- scripts/bpf_doc.py | 2 + tools/include/uapi/linux/bpf.h | 5 ++ 6 files changed, 243 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a9b1875212f6..b26c8176b9e0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -392,10 +392,15 @@ enum bpf_type_flag { MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to memory local to the bpf program. */ + DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; +#define DYNPTR_TYPE_FLAG_MASK DYNPTR_TYPE_LOCAL + /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -438,6 +443,7 @@ enum bpf_arg_type { ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ + ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */ __BPF_ARG_TYPE_MAX, /* Extended arg_types. */ @@ -2376,4 +2382,26 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 **bin_buf, u32 num_args); void bpf_bprintf_cleanup(void); +/* the implementation of the opaque uapi struct bpf_dynptr */ +struct bpf_dynptr_kern { + void *data; + /* Size represents the number of usable bytes of dynptr data. + * If for example the offset is at 4 for a local dynptr whose data is + * of type u64, the number of usable bytes is 4. + * + * The upper 8 bits are reserved. It is as follows: + * Bits 0 - 23 = size + * Bits 24 - 30 = dynptr type + * Bit 31 = whether dynptr is read-only + */ + u32 size; + u32 offset; +} __aligned(8); + +enum bpf_dynptr_type { + BPF_DYNPTR_TYPE_INVALID, + /* Points to memory that is local to the bpf program */ + BPF_DYNPTR_TYPE_LOCAL, +}; + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1f1e7f2ea967..af5b2135215e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -72,6 +72,18 @@ struct bpf_reg_state { u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ + /* For dynptr stack slots */ + struct { + enum bpf_dynptr_type type; + /* A dynptr is 16 bytes so it takes up 2 stack slots. + * We need to track which slot is the first slot + * to protect against cases where the user may try to + * pass in an address starting at the second slot of the + * dynptr. + */ + bool first_slot; + } dynptr; + /* Max size from any of the above. */ struct { unsigned long raw1; @@ -174,9 +186,15 @@ enum bpf_stack_slot_type { STACK_SPILL, /* register spilled into stack */ STACK_MISC, /* BPF program wrote some data into this slot */ STACK_ZERO, /* BPF program wrote constant zero */ + /* A dynptr is stored in this stack slot. The type of dynptr + * is stored in bpf_stack_state->spilled_ptr.dynptr.type + */ + STACK_DYNPTR, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ +#define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern) +#define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE) struct bpf_stack_state { struct bpf_reg_state spilled_ptr; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 56688bee20d9..610944cb3389 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6528,6 +6528,11 @@ struct bpf_timer { __u64 :64; } __attribute__((aligned(8))); +struct bpf_dynptr { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 45153cbc2bd6..1fd0b81c3fec 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -259,6 +259,7 @@ struct bpf_call_arg_meta { u32 ret_btf_id; u32 subprogno; struct bpf_map_value_off_desc *kptr_off_desc; + u8 uninit_dynptr_regno; }; struct btf *btf_vmlinux; @@ -581,6 +582,7 @@ static char slot_type_char[] = { [STACK_SPILL] = 'r', [STACK_MISC] = 'm', [STACK_ZERO] = '0', + [STACK_DYNPTR] = 'd', }; static void print_liveness(struct bpf_verifier_env *env, @@ -596,6 +598,25 @@ static void print_liveness(struct bpf_verifier_env *env, verbose(env, "D"); } +static int get_spi(s32 off) +{ + return (-off - 1) / BPF_REG_SIZE; +} + +static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots) +{ + int allocated_slots = state->allocated_stack / BPF_REG_SIZE; + + /* We need to check that slots between [spi - nr_slots + 1, spi] are + * within [0, allocated_stack). + * + * Please note that the spi grows downwards. For example, a dynptr + * takes the size of two stack slots; the first slot will be at + * spi and the second slot will be at spi - 1. + */ + return spi - nr_slots + 1 >= 0 && spi < allocated_slots; +} + static struct bpf_func_state *func(struct bpf_verifier_env *env, const struct bpf_reg_state *reg) { @@ -647,6 +668,108 @@ static void mark_verifier_state_scratched(struct bpf_verifier_env *env) env->scratched_stack_slots = ~0ULL; } +static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) +{ + switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { + case DYNPTR_TYPE_LOCAL: + return BPF_DYNPTR_TYPE_LOCAL; + default: + return BPF_DYNPTR_TYPE_INVALID; + } +} + +static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + enum bpf_arg_type arg_type, int insn_idx) +{ + struct bpf_func_state *state = func(env, reg); + enum bpf_dynptr_type type; + int spi, i; + + spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return -EINVAL; + + for (i = 0; i < BPF_REG_SIZE; i++) { + state->stack[spi].slot_type[i] = STACK_DYNPTR; + state->stack[spi - 1].slot_type[i] = STACK_DYNPTR; + } + + type = arg_to_dynptr_type(arg_type); + if (type == BPF_DYNPTR_TYPE_INVALID) + return -EINVAL; + + state->stack[spi].spilled_ptr.dynptr.first_slot = true; + state->stack[spi].spilled_ptr.dynptr.type = type; + state->stack[spi - 1].spilled_ptr.dynptr.type = type; + + return 0; +} + +static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i; + + spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return -EINVAL; + + for (i = 0; i < BPF_REG_SIZE; i++) { + state->stack[spi].slot_type[i] = STACK_INVALID; + state->stack[spi - 1].slot_type[i] = STACK_INVALID; + } + + state->stack[spi].spilled_ptr.dynptr.first_slot = false; + state->stack[spi].spilled_ptr.dynptr.type = 0; + state->stack[spi - 1].spilled_ptr.dynptr.type = 0; + + return 0; +} + +static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + int i; + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return true; + + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack[spi].slot_type[i] == STACK_DYNPTR || + state->stack[spi - 1].slot_type[i] == STACK_DYNPTR) + return false; + } + + return true; +} + +static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + enum bpf_arg_type arg_type) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + int i; + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || + !state->stack[spi].spilled_ptr.dynptr.first_slot) + return false; + + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack[spi].slot_type[i] != STACK_DYNPTR || + state->stack[spi - 1].slot_type[i] != STACK_DYNPTR) + return false; + } + + /* ARG_PTR_TO_DYNPTR takes any type of dynptr */ + if (arg_type == ARG_PTR_TO_DYNPTR) + return true; + + return state->stack[spi].spilled_ptr.dynptr.type == arg_to_dynptr_type(arg_type); +} + /* The reg state of a pointer or a bounded scalar was saved when * it was spilled to the stack. */ @@ -5400,6 +5523,11 @@ static bool arg_type_is_release(enum bpf_arg_type type) return type & OBJ_RELEASE; } +static bool arg_type_is_dynptr(enum bpf_arg_type type) +{ + return base_type(type) == ARG_PTR_TO_DYNPTR; +} + static int int_ptr_type_to_size(enum bpf_arg_type type) { if (type == ARG_PTR_TO_INT) @@ -5539,6 +5667,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, [ARG_PTR_TO_KPTR] = &kptr_types, + [ARG_PTR_TO_DYNPTR] = &stack_ptr_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@ -5628,8 +5757,13 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, bool fixed_off_ok = false; switch ((u32)type) { - case SCALAR_VALUE: /* Pointer types where reg offset is explicitly allowed: */ + case PTR_TO_STACK: + if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) { + verbose(env, "cannot pass in dynptr at an offset\n"); + return -EINVAL; + } + fallthrough; case PTR_TO_PACKET: case PTR_TO_PACKET_META: case PTR_TO_MAP_KEY: @@ -5639,7 +5773,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, case PTR_TO_MEM | MEM_ALLOC: case PTR_TO_BUF: case PTR_TO_BUF | MEM_RDONLY: - case PTR_TO_STACK: + case SCALAR_VALUE: /* Some of the argument types nevertheless require a * zero register offset. */ @@ -5837,6 +5971,36 @@ skip_type_check: bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta); + } else if (arg_type_is_dynptr(arg_type)) { + if (arg_type & MEM_UNINIT) { + if (!is_dynptr_reg_valid_uninit(env, reg)) { + verbose(env, "Dynptr has to be an uninitialized dynptr\n"); + return -EINVAL; + } + + /* We only support one dynptr being uninitialized at the moment, + * which is sufficient for the helper functions we have right now. + */ + if (meta->uninit_dynptr_regno) { + verbose(env, "verifier internal error: multiple uninitialized dynptr args\n"); + return -EFAULT; + } + + meta->uninit_dynptr_regno = regno; + } else if (!is_dynptr_reg_valid_init(env, reg, arg_type)) { + const char *err_extra = ""; + + switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { + case DYNPTR_TYPE_LOCAL: + err_extra = "local "; + break; + default: + break; + } + verbose(env, "Expected an initialized %sdynptr as arg #%d\n", + err_extra, arg + 1); + return -EINVAL; + } } else if (arg_type_is_alloc_size(arg_type)) { if (!tnum_is_const(reg->var_off)) { verbose(env, "R%d is not a known constant'\n", @@ -6970,9 +7134,27 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs = cur_regs(env); + if (meta.uninit_dynptr_regno) { + /* we write BPF_DW bits (8 bytes) at a time */ + for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) { + err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno, + i, BPF_DW, BPF_WRITE, -1, false); + if (err) + return err; + } + + err = mark_stack_slots_dynptr(env, ®s[meta.uninit_dynptr_regno], + fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1], + insn_idx); + if (err) + return err; + } + if (meta.release_regno) { err = -EINVAL; - if (meta.ref_obj_id) + if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) + err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]); + else if (meta.ref_obj_id) err = release_reference(env, meta.ref_obj_id); /* meta.ref_obj_id can only be 0 if register that is meant to be * released is NULL, which must be > R0. diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index d5452f7eb996..855b937e7585 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -634,6 +634,7 @@ class PrinterHelpers(Printer): 'struct file', 'struct bpf_timer', 'struct mptcp_sock', + 'struct bpf_dynptr', ] known_types = { '...', @@ -684,6 +685,7 @@ class PrinterHelpers(Printer): 'struct file', 'struct bpf_timer', 'struct mptcp_sock', + 'struct bpf_dynptr', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 56688bee20d9..610944cb3389 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6528,6 +6528,11 @@ struct bpf_timer { __u64 :64; } __attribute__((aligned(8))); +struct bpf_dynptr { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. -- cgit v1.2.3 From 263ae152e96253f40c2c276faad8629e096b3bad Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 23 May 2022 14:07:08 -0700 Subject: bpf: Add bpf_dynptr_from_mem for local dynptrs This patch adds a new api bpf_dynptr_from_mem: long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr); which initializes a dynptr to point to a bpf program's local memory. For now only local memory that is of reg type PTR_TO_MAP_VALUE is supported. Signed-off-by: Joanne Koong Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220523210712.3641569-3-joannelkoong@gmail.com --- include/uapi/linux/bpf.h | 12 ++++++++ kernel/bpf/helpers.c | 65 ++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 6 ++++ tools/include/uapi/linux/bpf.h | 12 ++++++++ 4 files changed, 95 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 610944cb3389..9be3644457dd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5178,6 +5178,17 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. * Return * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Get a dynptr to local memory *data*. + * + * *data* must be a ptr to a map value. + * The maximum *size* supported is DYNPTR_MAX_SIZE. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, + * -EINVAL if flags is not 0. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5377,6 +5388,7 @@ union bpf_attr { FN(kptr_xchg), \ FN(map_lookup_percpu_elem), \ FN(skc_to_mptcp_sock), \ + FN(dynptr_from_mem), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bad96131a510..d3e935c2e25e 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1412,6 +1412,69 @@ const struct bpf_func_proto bpf_kptr_xchg_proto = { .arg2_btf_id = BPF_PTR_POISON, }; +/* Since the upper 8 bits of dynptr->size is reserved, the + * maximum supported size is 2^24 - 1. + */ +#define DYNPTR_MAX_SIZE ((1UL << 24) - 1) +#define DYNPTR_TYPE_SHIFT 28 + +static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type) +{ + ptr->size |= type << DYNPTR_TYPE_SHIFT; +} + +static int bpf_dynptr_check_size(u32 size) +{ + return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; +} + +static void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size) +{ + ptr->data = data; + ptr->offset = offset; + ptr->size = size; + bpf_dynptr_set_type(ptr, type); +} + +static void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) +{ + memset(ptr, 0, sizeof(*ptr)); +} + +BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr) +{ + int err; + + err = bpf_dynptr_check_size(size); + if (err) + goto error; + + /* flags is currently unsupported */ + if (flags) { + err = -EINVAL; + goto error; + } + + bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size); + + return 0; + +error: + bpf_dynptr_set_null(ptr); + return err; +} + +const struct bpf_func_proto bpf_dynptr_from_mem_proto = { + .func = bpf_dynptr_from_mem, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; @@ -1466,6 +1529,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_loop_proto; case BPF_FUNC_strncmp: return &bpf_strncmp_proto; + case BPF_FUNC_dynptr_from_mem: + return &bpf_dynptr_from_mem_proto; default: break; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1fd0b81c3fec..b657d46f886e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7204,6 +7204,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_loop_callback_state); break; + case BPF_FUNC_dynptr_from_mem: + if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { + verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n", + reg_type_str(env, regs[BPF_REG_1].type)); + return -EACCES; + } } if (err) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 610944cb3389..9be3644457dd 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5178,6 +5178,17 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. * Return * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Get a dynptr to local memory *data*. + * + * *data* must be a ptr to a map value. + * The maximum *size* supported is DYNPTR_MAX_SIZE. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, + * -EINVAL if flags is not 0. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5377,6 +5388,7 @@ union bpf_attr { FN(kptr_xchg), \ FN(map_lookup_percpu_elem), \ FN(skc_to_mptcp_sock), \ + FN(dynptr_from_mem), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From bc34dee65a65e9c920c420005b8a43f2a721a458 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 23 May 2022 14:07:09 -0700 Subject: bpf: Dynptr support for ring buffers Currently, our only way of writing dynamically-sized data into a ring buffer is through bpf_ringbuf_output but this incurs an extra memcpy cost. bpf_ringbuf_reserve + bpf_ringbuf_commit avoids this extra memcpy, but it can only safely support reservation sizes that are statically known since the verifier cannot guarantee that the bpf program won’t access memory outside the reserved space. The bpf_dynptr abstraction allows for dynamically-sized ring buffer reservations without the extra memcpy. There are 3 new APIs: long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr); void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags); void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags); These closely follow the functionalities of the original ringbuf APIs. For example, all ringbuffer dynptrs that have been reserved must be either submitted or discarded before the program exits. Signed-off-by: Joanne Koong Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20220523210712.3641569-4-joannelkoong@gmail.com --- include/linux/bpf.h | 15 +++++++- include/linux/bpf_verifier.h | 2 ++ include/uapi/linux/bpf.h | 35 +++++++++++++++++++ kernel/bpf/helpers.c | 14 +++++--- kernel/bpf/ringbuf.c | 78 ++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 52 ++++++++++++++++++++++++++-- tools/include/uapi/linux/bpf.h | 35 +++++++++++++++++++ 7 files changed, 223 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b26c8176b9e0..c72321b6f306 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -395,11 +395,14 @@ enum bpf_type_flag { /* DYNPTR points to memory local to the bpf program. */ DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to a ringbuf record. */ + DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; -#define DYNPTR_TYPE_FLAG_MASK DYNPTR_TYPE_LOCAL +#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -2231,6 +2234,9 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto; extern const struct bpf_func_proto bpf_ringbuf_submit_proto; extern const struct bpf_func_proto bpf_ringbuf_discard_proto; extern const struct bpf_func_proto bpf_ringbuf_query_proto; +extern const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto; +extern const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto; +extern const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto; extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; @@ -2402,6 +2408,13 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_INVALID, /* Points to memory that is local to the bpf program */ BPF_DYNPTR_TYPE_LOCAL, + /* Underlying data is a ringbuf record */ + BPF_DYNPTR_TYPE_RINGBUF, }; +void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size); +void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); +int bpf_dynptr_check_size(u32 size); + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index af5b2135215e..e8439f6cbe57 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -100,6 +100,8 @@ struct bpf_reg_state { * for the purpose of tracking that it's freed. * For PTR_TO_SOCKET this is used to share which pointers retain the * same reference to the socket, to determine proper reference freeing. + * For stack slots that are dynptrs, this is used to track references to + * the dynptr to determine proper reference freeing. */ u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9be3644457dd..081a55540aa5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5189,6 +5189,38 @@ union bpf_attr { * Return * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, * -EINVAL if flags is not 0. + * + * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf* + * through the dynptr interface. *flags* must be 0. + * + * Please note that a corresponding bpf_ringbuf_submit_dynptr or + * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the + * reservation fails. This is enforced by the verifier. + * Return + * 0 on success, or a negative error in case of failure. + * + * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*, + * through the dynptr interface. This is a no-op if the dynptr is + * invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_submit'. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Discard reserved ring buffer sample through the dynptr + * interface. This is a no-op if the dynptr is invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_discard'. + * Return + * Nothing. Always succeeds. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5389,6 +5421,9 @@ union bpf_attr { FN(map_lookup_percpu_elem), \ FN(skc_to_mptcp_sock), \ FN(dynptr_from_mem), \ + FN(ringbuf_reserve_dynptr), \ + FN(ringbuf_submit_dynptr), \ + FN(ringbuf_discard_dynptr), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index d3e935c2e25e..abb08999ff56 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1423,13 +1423,13 @@ static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_typ ptr->size |= type << DYNPTR_TYPE_SHIFT; } -static int bpf_dynptr_check_size(u32 size) +int bpf_dynptr_check_size(u32 size) { return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; } -static void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, - enum bpf_dynptr_type type, u32 offset, u32 size) +void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size) { ptr->data = data; ptr->offset = offset; @@ -1437,7 +1437,7 @@ static void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, bpf_dynptr_set_type(ptr, type); } -static void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) +void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) { memset(ptr, 0, sizeof(*ptr)); } @@ -1523,6 +1523,12 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ringbuf_discard_proto; case BPF_FUNC_ringbuf_query: return &bpf_ringbuf_query_proto; + case BPF_FUNC_ringbuf_reserve_dynptr: + return &bpf_ringbuf_reserve_dynptr_proto; + case BPF_FUNC_ringbuf_submit_dynptr: + return &bpf_ringbuf_submit_dynptr_proto; + case BPF_FUNC_ringbuf_discard_dynptr: + return &bpf_ringbuf_discard_dynptr_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; case BPF_FUNC_loop: diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 311264ab80c4..ded4faeca192 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -475,3 +475,81 @@ const struct bpf_func_proto bpf_ringbuf_query_proto = { .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_ANYTHING, }; + +BPF_CALL_4(bpf_ringbuf_reserve_dynptr, struct bpf_map *, map, u32, size, u64, flags, + struct bpf_dynptr_kern *, ptr) +{ + struct bpf_ringbuf_map *rb_map; + void *sample; + int err; + + if (unlikely(flags)) { + bpf_dynptr_set_null(ptr); + return -EINVAL; + } + + err = bpf_dynptr_check_size(size); + if (err) { + bpf_dynptr_set_null(ptr); + return err; + } + + rb_map = container_of(map, struct bpf_ringbuf_map, map); + + sample = __bpf_ringbuf_reserve(rb_map->rb, size); + if (!sample) { + bpf_dynptr_set_null(ptr); + return -EINVAL; + } + + bpf_dynptr_init(ptr, sample, BPF_DYNPTR_TYPE_RINGBUF, 0, size); + + return 0; +} + +const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto = { + .func = bpf_ringbuf_reserve_dynptr, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT, +}; + +BPF_CALL_2(bpf_ringbuf_submit_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags) +{ + if (!ptr->data) + return 0; + + bpf_ringbuf_commit(ptr->data, flags, false /* discard */); + + bpf_dynptr_set_null(ptr); + + return 0; +} + +const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto = { + .func = bpf_ringbuf_submit_dynptr, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE, + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_ringbuf_discard_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags) +{ + if (!ptr->data) + return 0; + + bpf_ringbuf_commit(ptr->data, flags, true /* discard */); + + bpf_dynptr_set_null(ptr); + + return 0; +} + +const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto = { + .func = bpf_ringbuf_discard_dynptr, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE, + .arg2_type = ARG_ANYTHING, +}; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b657d46f886e..8be140351966 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -187,6 +187,9 @@ struct bpf_verifier_stack_elem { POISON_POINTER_DELTA)) #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) +static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx); +static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); + static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) { return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON; @@ -673,17 +676,24 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { case DYNPTR_TYPE_LOCAL: return BPF_DYNPTR_TYPE_LOCAL; + case DYNPTR_TYPE_RINGBUF: + return BPF_DYNPTR_TYPE_RINGBUF; default: return BPF_DYNPTR_TYPE_INVALID; } } +static bool dynptr_type_refcounted(enum bpf_dynptr_type type) +{ + return type == BPF_DYNPTR_TYPE_RINGBUF; +} + static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, enum bpf_arg_type arg_type, int insn_idx) { struct bpf_func_state *state = func(env, reg); enum bpf_dynptr_type type; - int spi, i; + int spi, i, id; spi = get_spi(reg->off); @@ -703,6 +713,16 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_ state->stack[spi].spilled_ptr.dynptr.type = type; state->stack[spi - 1].spilled_ptr.dynptr.type = type; + if (dynptr_type_refcounted(type)) { + /* The id is used to track proper releasing */ + id = acquire_reference_state(env, insn_idx); + if (id < 0) + return id; + + state->stack[spi].spilled_ptr.id = id; + state->stack[spi - 1].spilled_ptr.id = id; + } + return 0; } @@ -721,6 +741,13 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re state->stack[spi - 1].slot_type[i] = STACK_INVALID; } + /* Invalidate any slices associated with this dynptr */ + if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) { + release_reference(env, state->stack[spi].spilled_ptr.id); + state->stack[spi].spilled_ptr.id = 0; + state->stack[spi - 1].spilled_ptr.id = 0; + } + state->stack[spi].spilled_ptr.dynptr.first_slot = false; state->stack[spi].spilled_ptr.dynptr.type = 0; state->stack[spi - 1].spilled_ptr.dynptr.type = 0; @@ -5859,7 +5886,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, skip_type_check: if (arg_type_is_release(arg_type)) { - if (!reg->ref_obj_id && !register_is_null(reg)) { + if (arg_type_is_dynptr(arg_type)) { + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || + !state->stack[spi].spilled_ptr.id) { + verbose(env, "arg %d is an unacquired reference\n", regno); + return -EINVAL; + } + } else if (!reg->ref_obj_id && !register_is_null(reg)) { verbose(env, "R%d must be referenced when passed to release function\n", regno); return -EINVAL; @@ -5994,9 +6030,13 @@ skip_type_check: case DYNPTR_TYPE_LOCAL: err_extra = "local "; break; + case DYNPTR_TYPE_RINGBUF: + err_extra = "ringbuf "; + break; default: break; } + verbose(env, "Expected an initialized %sdynptr as arg #%d\n", err_extra, arg + 1); return -EINVAL; @@ -6122,7 +6162,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_RINGBUF: if (func_id != BPF_FUNC_ringbuf_output && func_id != BPF_FUNC_ringbuf_reserve && - func_id != BPF_FUNC_ringbuf_query) + func_id != BPF_FUNC_ringbuf_query && + func_id != BPF_FUNC_ringbuf_reserve_dynptr && + func_id != BPF_FUNC_ringbuf_submit_dynptr && + func_id != BPF_FUNC_ringbuf_discard_dynptr) goto error; break; case BPF_MAP_TYPE_STACK_TRACE: @@ -6238,6 +6281,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_ringbuf_output: case BPF_FUNC_ringbuf_reserve: case BPF_FUNC_ringbuf_query: + case BPF_FUNC_ringbuf_reserve_dynptr: + case BPF_FUNC_ringbuf_submit_dynptr: + case BPF_FUNC_ringbuf_discard_dynptr: if (map->map_type != BPF_MAP_TYPE_RINGBUF) goto error; break; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9be3644457dd..081a55540aa5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5189,6 +5189,38 @@ union bpf_attr { * Return * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, * -EINVAL if flags is not 0. + * + * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf* + * through the dynptr interface. *flags* must be 0. + * + * Please note that a corresponding bpf_ringbuf_submit_dynptr or + * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the + * reservation fails. This is enforced by the verifier. + * Return + * 0 on success, or a negative error in case of failure. + * + * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*, + * through the dynptr interface. This is a no-op if the dynptr is + * invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_submit'. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Discard reserved ring buffer sample through the dynptr + * interface. This is a no-op if the dynptr is invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_discard'. + * Return + * Nothing. Always succeeds. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5389,6 +5421,9 @@ union bpf_attr { FN(map_lookup_percpu_elem), \ FN(skc_to_mptcp_sock), \ FN(dynptr_from_mem), \ + FN(ringbuf_reserve_dynptr), \ + FN(ringbuf_submit_dynptr), \ + FN(ringbuf_discard_dynptr), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 13bbbfbea7598ea9f8d9c3d73bf053bb57f9c4b2 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 23 May 2022 14:07:10 -0700 Subject: bpf: Add bpf_dynptr_read and bpf_dynptr_write This patch adds two helper functions, bpf_dynptr_read and bpf_dynptr_write: long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset); long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len); The dynptr passed into these functions must be valid dynptrs that have been initialized. Signed-off-by: Joanne Koong Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220523210712.3641569-5-joannelkoong@gmail.com --- include/uapi/linux/bpf.h | 19 ++++++++++ kernel/bpf/helpers.c | 78 ++++++++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 19 ++++++++++ 3 files changed, 116 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 081a55540aa5..efe2505650e6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5221,6 +5221,23 @@ union bpf_attr { * 'bpf_ringbuf_discard'. * Return * Nothing. Always succeeds. + * + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * Description + * Read *len* bytes from *src* into *dst*, starting from *offset* + * into *src*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * Description + * Write *len* bytes from *src* into *dst*, starting from *offset* + * into *dst*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* + * is a read-only dynptr. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5424,6 +5441,8 @@ union bpf_attr { FN(ringbuf_reserve_dynptr), \ FN(ringbuf_submit_dynptr), \ FN(ringbuf_discard_dynptr), \ + FN(dynptr_read), \ + FN(dynptr_write), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index abb08999ff56..8cef3fb0d143 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1417,12 +1417,24 @@ const struct bpf_func_proto bpf_kptr_xchg_proto = { */ #define DYNPTR_MAX_SIZE ((1UL << 24) - 1) #define DYNPTR_TYPE_SHIFT 28 +#define DYNPTR_SIZE_MASK 0xFFFFFF +#define DYNPTR_RDONLY_BIT BIT(31) + +static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr) +{ + return ptr->size & DYNPTR_RDONLY_BIT; +} static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type) { ptr->size |= type << DYNPTR_TYPE_SHIFT; } +static u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr) +{ + return ptr->size & DYNPTR_SIZE_MASK; +} + int bpf_dynptr_check_size(u32 size) { return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; @@ -1442,6 +1454,16 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) memset(ptr, 0, sizeof(*ptr)); } +static int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len) +{ + u32 size = bpf_dynptr_get_size(ptr); + + if (len > size || offset > size - len) + return -E2BIG; + + return 0; +} + BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr) { int err; @@ -1475,6 +1497,58 @@ const struct bpf_func_proto bpf_dynptr_from_mem_proto = { .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, }; +BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, u32, offset) +{ + int err; + + if (!src->data) + return -EINVAL; + + err = bpf_dynptr_check_off_len(src, offset, len); + if (err) + return err; + + memcpy(dst, src->data + src->offset + offset, len); + + return 0; +} + +const struct bpf_func_proto bpf_dynptr_read_proto = { + .func = bpf_dynptr_read, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_PTR_TO_DYNPTR, + .arg4_type = ARG_ANYTHING, +}; + +BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len) +{ + int err; + + if (!dst->data || bpf_dynptr_is_rdonly(dst)) + return -EINVAL; + + err = bpf_dynptr_check_off_len(dst, offset, len); + if (err) + return err; + + memcpy(dst->data + dst->offset + offset, src, len); + + return 0; +} + +const struct bpf_func_proto bpf_dynptr_write_proto = { + .func = bpf_dynptr_write, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_DYNPTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE_OR_ZERO, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; @@ -1537,6 +1611,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_strncmp_proto; case BPF_FUNC_dynptr_from_mem: return &bpf_dynptr_from_mem_proto; + case BPF_FUNC_dynptr_read: + return &bpf_dynptr_read_proto; + case BPF_FUNC_dynptr_write: + return &bpf_dynptr_write_proto; default: break; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 081a55540aa5..efe2505650e6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5221,6 +5221,23 @@ union bpf_attr { * 'bpf_ringbuf_discard'. * Return * Nothing. Always succeeds. + * + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * Description + * Read *len* bytes from *src* into *dst*, starting from *offset* + * into *src*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * Description + * Write *len* bytes from *src* into *dst*, starting from *offset* + * into *dst*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* + * is a read-only dynptr. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5424,6 +5441,8 @@ union bpf_attr { FN(ringbuf_reserve_dynptr), \ FN(ringbuf_submit_dynptr), \ FN(ringbuf_discard_dynptr), \ + FN(dynptr_read), \ + FN(dynptr_write), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 34d4ef5775f776ec4b0d53a02d588bf3195cada6 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 23 May 2022 14:07:11 -0700 Subject: bpf: Add dynptr data slices This patch adds a new helper function void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len); which returns a pointer to the underlying data of a dynptr. *len* must be a statically known value. The bpf program may access the returned data slice as a normal buffer (eg can do direct reads and writes), since the verifier associates the length with the returned pointer, and enforces that no out of bounds accesses occur. Signed-off-by: Joanne Koong Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220523210712.3641569-6-joannelkoong@gmail.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 12 ++++++++++++ kernel/bpf/helpers.c | 28 ++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 23 +++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 12 ++++++++++++ 5 files changed, 76 insertions(+) (limited to 'include/uapi') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c72321b6f306..a7080c86fa76 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -488,6 +488,7 @@ enum bpf_return_type { RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index efe2505650e6..f4009dbdf62d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5238,6 +5238,17 @@ union bpf_attr { * 0 on success, -E2BIG if *offset* + *len* exceeds the length * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* * is a read-only dynptr. + * + * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) + * Description + * Get a pointer to the underlying dynptr data. + * + * *len* must be a statically known value. The returned data slice + * is invalidated whenever the dynptr is invalidated. + * Return + * Pointer to the underlying dynptr data, NULL if the dynptr is + * read-only, if the dynptr is invalid, or if the offset and length + * is out of bounds. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5443,6 +5454,7 @@ union bpf_attr { FN(ringbuf_discard_dynptr), \ FN(dynptr_read), \ FN(dynptr_write), \ + FN(dynptr_data), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 8cef3fb0d143..225806a02efb 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1549,6 +1549,32 @@ const struct bpf_func_proto bpf_dynptr_write_proto = { .arg4_type = ARG_CONST_SIZE_OR_ZERO, }; +BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len) +{ + int err; + + if (!ptr->data) + return 0; + + err = bpf_dynptr_check_off_len(ptr, offset, len); + if (err) + return 0; + + if (bpf_dynptr_is_rdonly(ptr)) + return 0; + + return (unsigned long)(ptr->data + ptr->offset + offset); +} + +const struct bpf_func_proto bpf_dynptr_data_proto = { + .func = bpf_dynptr_data, + .gpl_only = false, + .ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL, + .arg1_type = ARG_PTR_TO_DYNPTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; @@ -1615,6 +1641,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_dynptr_read_proto; case BPF_FUNC_dynptr_write: return &bpf_dynptr_write_proto; + case BPF_FUNC_dynptr_data: + return &bpf_dynptr_data_proto; default: break; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8be140351966..aedac2ac02b9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5832,6 +5832,14 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, return __check_ptr_off_reg(env, reg, regno, fixed_off_ok); } +static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + + return state->stack[spi].spilled_ptr.id; +} + static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn) @@ -7384,6 +7392,21 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].id = id; /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = id; + } else if (func_id == BPF_FUNC_dynptr_data) { + int dynptr_id = 0, i; + + /* Find the id of the dynptr we're acquiring a reference to */ + for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { + if (arg_type_is_dynptr(fn->arg_type[i])) { + if (dynptr_id) { + verbose(env, "verifier internal error: multiple dynptr args in func\n"); + return -EFAULT; + } + dynptr_id = stack_slot_get_id(env, ®s[BPF_REG_1 + i]); + } + } + /* For release_reference() */ + regs[BPF_REG_0].ref_obj_id = dynptr_id; } do_refine_retval_range(regs, fn->ret_type, func_id, &meta); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index efe2505650e6..f4009dbdf62d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5238,6 +5238,17 @@ union bpf_attr { * 0 on success, -E2BIG if *offset* + *len* exceeds the length * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* * is a read-only dynptr. + * + * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) + * Description + * Get a pointer to the underlying dynptr data. + * + * *len* must be a statically known value. The returned data slice + * is invalidated whenever the dynptr is invalidated. + * Return + * Pointer to the underlying dynptr data, NULL if the dynptr is + * read-only, if the dynptr is invalid, or if the offset and length + * is out of bounds. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5443,6 +5454,7 @@ union bpf_attr { FN(ringbuf_discard_dynptr), \ FN(dynptr_read), \ FN(dynptr_write), \ + FN(dynptr_data), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3