diff options
author | Linus Torvalds | 2021-11-03 08:05:59 -0700 |
---|---|---|
committer | Linus Torvalds | 2021-11-03 08:05:59 -0700 |
commit | 25edbc383b72c2364c7b339245c1c5db84e615e1 (patch) | |
tree | 6f1924c768c6926a51bc53125612fe6141222da6 | |
parent | ff0700f03609b9f0defacd4ce96d9519d721e0a2 (diff) | |
parent | f1a090f09f42be5a5542009f0be310fdb3e768fc (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"A typical collection of patches this cycle, mostly fixing with a few
new features:
- Fixes from static tools. clang warnings, dead code, unused
variable, coccinelle sweeps, etc
- Driver bug fixes and minor improvements in rxe, bnxt_re, hfi1,
mlx5, irdma, qedr
- rtrs ULP bug fixes an improvments
- Additional counters for bnxt_re
- Support verbs CQ notifications in EFA
- Continued reworking and fixing of rxe
- netlink control to enable/disable optional device counters
- rxe now can use AH objects for its UD path, fixing various bugs in
the process
- Add DMABUF support to EFA"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (103 commits)
RDMA/core: Require the driver to set the IOVA correctly during rereg_mr
RDMA/bnxt_re: Remove unsupported bnxt_re_modify_ah callback
RDMA/irdma: optimize rx path by removing unnecessary copy
RDMA/qed: Use helper function to set GUIDs
RDMA/hns: Use the core code to manage the fixed mmap entries
IB/opa_vnic: Rebranding of OPA VNIC driver to Cornelis Networks
IB/qib: Rebranding of qib driver to Cornelis Networks
IB/hfi1: Rebranding of hfi1 driver to Cornelis Networks
RDMA/bnxt_re: Use helper function to set GUIDs
RDMA/bnxt_re: Fix kernel panic when trying to access bnxt_re_stat_descs
RDMA/qedr: Fix NULL deref for query_qp on the GSI QP
RDMA/hns: Modify the value of MAX_LP_MSG_LEN to meet hardware compatibility
RDMA/hns: Fix initial arm_st of CQ
RDMA/rxe: Make rxe_type_info static const
RDMA/rxe: Use 'bitmap_zalloc()' when applicable
RDMA/rxe: Save a few bytes from struct rxe_pool
RDMA/irdma: Remove the unused variable local_qp
RDMA/core: Fix missed initialization of rdma_hw_stats::lock
RDMA/efa: Add support for dmabuf memory regions
RDMA/umem: Allow pinned dmabuf umem usage
...
127 files changed, 3603 insertions, 1894 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 704ce595542c..835ac54d4a24 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -453,7 +453,7 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, id_priv->id.device = cma_dev->device; id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); - list_add_tail(&id_priv->list, &cma_dev->id_list); + list_add_tail(&id_priv->device_item, &cma_dev->id_list); trace_cm_id_attach(id_priv, cma_dev->device); } @@ -470,7 +470,7 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv, static void cma_release_dev(struct rdma_id_private *id_priv) { mutex_lock(&lock); - list_del(&id_priv->list); + list_del_init(&id_priv->device_item); cma_dev_put(id_priv->cma_dev); id_priv->cma_dev = NULL; id_priv->id.device = NULL; @@ -854,6 +854,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, init_completion(&id_priv->comp); refcount_set(&id_priv->refcount, 1); mutex_init(&id_priv->handler_mutex); + INIT_LIST_HEAD(&id_priv->device_item); INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); @@ -1647,7 +1648,7 @@ static struct rdma_id_private *cma_find_listener( return id_priv; list_for_each_entry(id_priv_dev, &id_priv->listen_list, - listen_list) { + listen_item) { if (id_priv_dev->id.device == cm_id->device && cma_match_net_dev(&id_priv_dev->id, net_dev, req)) @@ -1756,14 +1757,15 @@ static void _cma_cancel_listens(struct rdma_id_private *id_priv) * Remove from listen_any_list to prevent added devices from spawning * additional listen requests. */ - list_del(&id_priv->list); + list_del_init(&id_priv->listen_any_item); while (!list_empty(&id_priv->listen_list)) { - dev_id_priv = list_entry(id_priv->listen_list.next, - struct rdma_id_private, listen_list); + dev_id_priv = + list_first_entry(&id_priv->listen_list, + struct rdma_id_private, listen_item); /* sync with device removal to avoid duplicate destruction */ - list_del_init(&dev_id_priv->list); - list_del(&dev_id_priv->listen_list); + list_del_init(&dev_id_priv->device_item); + list_del_init(&dev_id_priv->listen_item); mutex_unlock(&lock); rdma_destroy_id(&dev_id_priv->id); @@ -2564,7 +2566,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv, ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); if (ret) goto err_listen; - list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); + list_add_tail(&dev_id_priv->listen_item, &id_priv->listen_list); return 0; err_listen: /* Caller must destroy this after releasing lock */ @@ -2580,13 +2582,13 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv) int ret; mutex_lock(&lock); - list_add_tail(&id_priv->list, &listen_any_list); + list_add_tail(&id_priv->listen_any_item, &listen_any_list); list_for_each_entry(cma_dev, &dev_list, list) { ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); if (ret) { /* Prevent racing with cma_process_remove() */ if (to_destroy) - list_del_init(&to_destroy->list); + list_del_init(&to_destroy->device_item); goto err_listen; } } @@ -4895,7 +4897,7 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event, mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) - list_for_each_entry(id_priv, &cma_dev->id_list, list) { + list_for_each_entry(id_priv, &cma_dev->id_list, device_item) { ret = cma_netdev_change(ndev, id_priv); if (ret) goto out; @@ -4955,10 +4957,10 @@ static void cma_process_remove(struct cma_device *cma_dev) mutex_lock(&lock); while (!list_empty(&cma_dev->id_list)) { struct rdma_id_private *id_priv = list_first_entry( - &cma_dev->id_list, struct rdma_id_private, list); + &cma_dev->id_list, struct rdma_id_private, device_item); - list_del(&id_priv->listen_list); - list_del_init(&id_priv->list); + list_del_init(&id_priv->listen_item); + list_del_init(&id_priv->device_item); cma_id_get(id_priv); mutex_unlock(&lock); @@ -5035,7 +5037,7 @@ static int cma_add_one(struct ib_device *device) mutex_lock(&lock); list_add_tail(&cma_dev->list, &dev_list); - list_for_each_entry(id_priv, &listen_any_list, list) { + list_for_each_entry(id_priv, &listen_any_list, listen_any_item) { ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy); if (ret) goto free_listen; diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index f92f101ea981..757a0ef79872 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -55,8 +55,15 @@ struct rdma_id_private { struct rdma_bind_list *bind_list; struct hlist_node node; - struct list_head list; /* listen_any_list or cma_device.list */ - struct list_head listen_list; /* per device listens */ + union { + struct list_head device_item; /* On cma_device->id_list */ + struct list_head listen_any_item; /* On listen_any_list */ + }; + union { + /* On rdma_id_private->listen_list */ + struct list_head listen_item; + struct list_head listen_list; + }; struct cma_device *cma_dev; struct list_head mc_list; diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index df9e6c5e4ddf..af59486fe418 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -106,6 +106,38 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter, return ret; } +int rdma_counter_modify(struct ib_device *dev, u32 port, + unsigned int index, bool enable) +{ + struct rdma_hw_stats *stats; + int ret = 0; + + if (!dev->ops.modify_hw_stat) + return -EOPNOTSUPP; + + stats = ib_get_hw_stats_port(dev, port); + if (!stats || index >= stats->num_counters || + !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) + return -EINVAL; + + mutex_lock(&stats->lock); + + if (enable != test_bit(index, stats->is_disabled)) + goto out; + + ret = dev->ops.modify_hw_stat(dev, port, index, enable); + if (ret) + goto out; + + if (enable) + clear_bit(index, stats->is_disabled); + else + set_bit(index, stats->is_disabled); +out: + mutex_unlock(&stats->lock); + return ret; +} + static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, struct ib_qp *qp, enum rdma_nl_counter_mode mode) @@ -165,7 +197,7 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, return counter; err_mode: - kfree(counter->stats); + rdma_free_hw_stats_struct(counter->stats); err_stats: rdma_restrack_put(&counter->res); kfree(counter); @@ -186,7 +218,7 @@ static void rdma_counter_free(struct rdma_counter *counter) mutex_unlock(&port_counter->lock); rdma_restrack_del(&counter->res); - kfree(counter->stats); + rdma_free_hw_stats_struct(counter->stats); kfree(counter); } @@ -618,7 +650,7 @@ void rdma_counter_init(struct ib_device *dev) fail: for (i = port; i >= rdma_start_port(dev); i--) { port_counter = &dev->port_data[port].port_counter; - kfree(port_counter->hstats); + rdma_free_hw_stats_struct(port_counter->hstats); port_counter->hstats = NULL; mutex_destroy(&port_counter->lock); } @@ -631,7 +663,7 @@ void rdma_counter_release(struct ib_device *dev) rdma_for_each_port(dev, port) { port_counter = &dev->port_data[port].port_counter; - kfree(port_counter->hstats); + rdma_free_hw_stats_struct(port_counter->hstats); mutex_destroy(&port_counter->lock); } } diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f4814bb7f082..22a4adda7981 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2676,6 +2676,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_device); SET_DEVICE_OP(dev_ops, modify_flow_action_esp); + SET_DEVICE_OP(dev_ops, modify_hw_stat); SET_DEVICE_OP(dev_ops, modify_port); SET_DEVICE_OP(dev_ops, modify_qp); SET_DEVICE_OP(dev_ops, modify_srq); diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 54f4feb604d8..358a2db38d23 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -762,7 +762,7 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; - const char *err_str = ""; + const char *err_str; int ret = -EINVAL; skb = iwpm_create_nlmsg(RDMA_NL_IWPM_HELLO, &nlh, nl_client); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index e9b4b2cccaa0..fedc0fa6ebf9 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -154,6 +154,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -968,14 +970,21 @@ static int fill_stat_counter_hwcounters(struct sk_buff *msg, if (!table_attr) return -EMSGSIZE; - for (i = 0; i < st->num_counters; i++) - if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i])) + mutex_lock(&st->lock); + for (i = 0; i < st->num_counters; i++) { + if (test_bit(i, st->is_disabled)) + continue; + if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name, + st->value[i])) goto err; + } + mutex_unlock(&st->lock); nla_nest_end(msg, table_attr); return 0; err: + mutex_unlock(&st->lock); nla_nest_cancel(msg, table_attr); return -EMSGSIZE; } @@ -1888,24 +1897,111 @@ static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } +static int nldev_stat_set_mode_doit(struct sk_buff *msg, + struct netlink_ext_ack *extack, + struct nlattr *tb[], + struct ib_device *device, u32 port) +{ + u32 mode, mask = 0, qpn, cntn = 0; + int ret; + + /* Currently only counter for QP is supported */ + if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) + return -EINVAL; + + mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); + if (mode == RDMA_COUNTER_MODE_AUTO) { + if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) + mask = nla_get_u32( + tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); + return rdma_counter_set_auto_mode(device, port, mask, extack); + } + + if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) + return -EINVAL; + + qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); + if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { + cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); + ret = rdma_counter_bind_qpn(device, port, qpn, cntn); + if (ret) + return ret; + } else { + ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn); + if (ret) + return ret; + } + + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { + ret = -EMSGSIZE; + goto err_fill; + } + + return 0; + +err_fill: + rdma_counter_unbind_qpn(device, port, qpn, cntn); + return ret; +} + +static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], + struct ib_device *device, + u32 port) +{ + struct rdma_hw_stats *stats; + int rem, i, index, ret = 0; + struct nlattr *entry_attr; + unsigned long *target; + + stats = ib_get_hw_stats_port(device, port); + if (!stats) + return -EINVAL; + + target = kcalloc(BITS_TO_LONGS(stats->num_counters), + sizeof(*stats->is_disabled), GFP_KERNEL); + if (!target) + return -ENOMEM; + + nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], + rem) { + index = nla_get_u32(entry_attr); + if ((index >= stats->num_counters) || + !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) { + ret = -EINVAL; + goto out; + } + + set_bit(index, target); + } + + for (i = 0; i < stats->num_counters; i++) { + if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL)) + continue; + + ret = rdma_counter_modify(device, port, i, test_bit(i, target)); + if (ret) + goto out; + } + +out: + kfree(target); + return ret; +} + static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - u32 index, port, mode, mask = 0, qpn, cntn = 0; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; struct sk_buff *msg; + u32 index, port; int ret; - ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, - nldev_policy, extack); - /* Currently only counter for QP is supported */ - if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] || - !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || - !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE]) - return -EINVAL; - - if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, + extack); + if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || + !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); @@ -1916,59 +2012,49 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); if (!rdma_is_port_valid(device, port)) { ret = -EINVAL; - goto err; + goto err_put_device; + } + + if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] && + !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { + ret = -EINVAL; + goto err_put_device; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto err; + goto err_put_device; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_SET), 0, 0); + if (fill_nldev_handle(msg, device) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { + ret = -EMSGSIZE; + goto err_free_msg; + } - mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); - if (mode == RDMA_COUNTER_MODE_AUTO) { - if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) - mask = nla_get_u32( - tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); - ret = rdma_counter_set_auto_mode(device, port, mask, extack); - if (ret) - goto err_msg; - } else { - if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) - goto err_msg; - qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); - if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { - cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); - ret = rdma_counter_bind_qpn(device, port, qpn, cntn); - } else { - ret = rdma_counter_bind_qpn_alloc(device, port, - qpn, &cntn); - } + if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) { + ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port); if (ret) - goto err_msg; + goto err_free_msg; + } - if (fill_nldev_handle(msg, device) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || - nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { - ret = -EMSGSIZE; - goto err_fill; - } + if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { + ret = nldev_stat_set_counter_dynamic_doit(tb, device, port); + if (ret) + goto err_free_msg; } nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); -err_fill: - rdma_counter_unbind_qpn(device, port, qpn, cntn); -err_msg: +err_free_msg: nlmsg_free(msg); -err: +err_put_device: ib_device_put(device); return ret; } @@ -2103,9 +2189,13 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, goto err_stats; } for (i = 0; i < num_cnts; i++) { + if (test_bit(i, stats->is_disabled)) + continue; + v = stats->value[i] + rdma_counter_get_hwstat_value(device, port, i); - if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) { + if (rdma_nl_stat_hwcounter_entry(msg, + stats->descs[i].name, v)) { ret = -EMSGSIZE; goto err_table; } @@ -2253,6 +2343,99 @@ static int nldev_stat_get_dumpit(struct sk_buff *skb, return ret; } +static int nldev_stat_get_counter_status_doit(struct sk_buff *skb, + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry; + struct rdma_hw_stats *stats; + struct ib_device *device; + struct sk_buff *msg; + u32 devid, port; + int ret, i; + + ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || + !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) + return -EINVAL; + + devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + device = ib_device_get_by_index(sock_net(skb->sk), devid); + if (!device) + return -EINVAL; + + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(device, port)) { + ret = -EINVAL; + goto err; + } + + stats = ib_get_hw_stats_port(device, port); + if (!stats) { + ret = -EINVAL; + goto err; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto err; + } + + nlh = nlmsg_put( + msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS), + 0, 0); + + ret = -EMSGSIZE; + if (fill_nldev_handle(msg, device) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) + goto err_msg; + + table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + if (!table) + goto err_msg; + + mutex_lock(&stats->lock); + for (i = 0; i < stats->num_counters; i++) { + entry = nla_nest_start(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); + if (!entry) + goto err_msg_table; + + if (nla_put_string(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, + stats->descs[i].name) || + nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i)) + goto err_msg_entry; + + if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) && + (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, + !test_bit(i, stats->is_disabled)))) + goto err_msg_entry; + + nla_nest_end(msg, entry); + } + mutex_unlock(&stats->lock); + + nla_nest_end(msg, table); + nlmsg_end(msg, nlh); + ib_device_put(device); + return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); + +err_msg_entry: + nla_nest_cancel(msg, entry); +err_msg_table: + mutex_unlock(&stats->lock); + nla_nest_cancel(msg, table); +err_msg: + nlmsg_free(msg); +err: + ib_device_put(device); + return ret; +} + static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, @@ -2342,6 +2525,9 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { .dump = nldev_res_get_mr_raw_dumpit, .flags = RDMA_NL_ADMIN_PERM, }, + [RDMA_NLDEV_CMD_STAT_GET_STATUS] = { + .doit = nldev_stat_get_counter_status_doit, + }, }; void __init nldev_init(void) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 5221cce65675..5a3bd41b331c 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -282,15 +282,22 @@ static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, ib_dma_unmap_sg(dev, sg, sg_cnt, dir); } -static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, - u32 sg_cnt, enum dma_data_direction dir) +static int rdma_rw_map_sgtable(struct ib_device *dev, struct sg_table *sgt, + enum dma_data_direction dir) { - if (is_pci_p2pdma_page(sg_page(sg))) { + int nents; + + if (is_pci_p2pdma_page(sg_page(sgt->sgl))) { if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) return 0; - return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); + nents = pci_p2pdma_map_sg(dev->dma_device, sgt->sgl, + sgt->orig_nents, dir); + if (!nents) + return -EIO; + sgt->nents = nents; + return 0; } - return ib_dma_map_sg(dev, sg, sg_cnt, dir); + return ib_dma_map_sgtable_attrs(dev, sgt, dir, 0); } /** @@ -313,12 +320,16 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { struct ib_device *dev = qp->pd->device; + struct sg_table sgt = { + .sgl = sg, + .orig_nents = sg_cnt, + }; int ret; - ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir); - if (!ret) - return -ENOMEM; - sg_cnt = ret; + ret = rdma_rw_map_sgtable(dev, &sgt, dir); + if (ret) + return ret; + sg_cnt = sgt.nents; /* * Skip to the S/G entry that sg_offset falls into: @@ -354,7 +365,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, return ret; out_unmap_sg: - rdma_rw_unmap_sg(dev, sg, sg_cnt, dir); + rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_init); @@ -385,6 +396,14 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, struct ib_device *dev = qp->pd->device; u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, qp->integrity_en); + struct sg_table sgt = { + .sgl = sg, + .orig_nents = sg_cnt, + }; + struct sg_table prot_sgt = { + .sgl = prot_sg, + .orig_nents = prot_sg_cnt, + }; struct ib_rdma_wr *rdma_wr; int count = 0, ret; @@ -394,18 +413,14 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return -EINVAL; } - ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir); - if (!ret) - return -ENOMEM; - sg_cnt = ret; + ret = rdma_rw_map_sgtable(dev, &sgt, dir); + if (ret) + return ret; if (prot_sg_cnt) { - ret = rdma_rw_map_sg(dev, prot_sg, prot_sg_cnt, dir); - if (!ret) { - ret = -ENOMEM; + ret = rdma_rw_map_sgtable(dev, &prot_sgt, dir); + if (ret) goto out_unmap_sg; - } - prot_sg_cnt = ret; } ctx->type = RDMA_RW_SIG_MR; @@ -426,10 +441,11 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs)); - ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sg_cnt, NULL, prot_sg, - prot_sg_cnt, NULL, SZ_4K); + ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sgt.nents, NULL, prot_sg, + prot_sgt.nents, NULL, SZ_4K); if (unlikely(ret)) { - pr_err("failed to map PI sg (%u)\n", sg_cnt + prot_sg_cnt); + pr_err("failed to map PI sg (%u)\n", + sgt.nents + prot_sgt.nents); goto out_destroy_sig_mr; } @@ -468,10 +484,10 @@ out_destroy_sig_mr: out_free_ctx: kfree(ctx->reg); out_unmap_prot_sg: - if (prot_sg_cnt) - rdma_rw_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); + if (prot_sgt.nents) + rdma_rw_unmap_sg(dev, prot_sgt.sgl, prot_sgt.orig_nents, dir); out_unmap_sg: - rdma_rw_unmap_sg(dev, sg, sg_cnt, dir); + rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_signature_init); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index c00f8e28aab7..74ecd7456a11 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -2262,7 +2262,6 @@ err1: void ib_sa_cleanup(void) { cancel_delayed_work(&ib_nl_timed_work); - flush_workqueue(ib_nl_wq); destroy_workqueue(ib_nl_wq); mcast_cleanup(); ib_unregister_client(&sa_client); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 6146c3c1cbe5..a3f84b50c46a 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -755,9 +755,9 @@ static void ib_port_release(struct kobject *kobj) for (i = 0; i != ARRAY_SIZE(port->groups); i++) kfree(port->groups[i].attrs); if (port->hw_stats_data) - kfree(port->hw_stats_data->stats); + rdma_free_hw_stats_struct(port->hw_stats_data->stats); kfree(port->hw_stats_data); - kfree(port); + kvfree(port); } static void ib_port_gid_attr_release(struct kobject *kobj) @@ -895,7 +895,7 @@ alloc_hw_stats_device(struct ib_device *ibdev) stats = ibdev->ops.alloc_hw_device_stats(ibdev); if (!stats) return ERR_PTR(-ENOMEM); - if (!stats->names || stats->num_counters <= 0) + if (!stats->descs || stats->num_counters <= 0) goto err_free_stats; /* @@ -911,7 +911,6 @@ alloc_hw_stats_device(struct ib_device *ibdev) if (!data->group.attrs) goto err_free_data; - mutex_init(&stats->lock); data->group.name = "hw_counters"; data->stats = stats; return data; @@ -919,14 +918,14 @@ alloc_hw_stats_device(struct ib_device *ibdev) err_free_data: kfree(data); err_free_stats: - kfree(stats); + rdma_free_hw_stats_struct(stats); return ERR_PTR(-ENOMEM); } void ib_device_release_hw_stats(struct hw_stats_device_data *data) { kfree(data->group.attrs); - kfree(data->stats); + rdma_free_hw_stats_struct(data->stats); kfree(data); } @@ -934,7 +933,8 @@ int ib_setup_device_attrs(struct ib_device *ibdev) { struct hw_stats_device_attribute *attr; struct hw_stats_device_data *data; - int i, ret; + bool opstat_skipped = false; + int i, ret, pos = 0; data = alloc_hw_stats_device(ibdev); if (IS_ERR(data)) { @@ -955,16 +955,23 @@ int ib_setup_device_attrs(struct ib_device *ibdev) data->stats->timestamp = jiffies; for (i = 0; i < data->stats->num_counters; i++) { - attr = &data->attrs[i]; + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) { + opstat_skipped = true; + continue; + } + + WARN_ON(opstat_skipped); + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); - attr->attr.attr.name = data->stats->names[i]; + attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.mode = 0444; attr->attr.show = hw_stat_device_show; attr->show = show_hw_stats; - data->group.attrs[i] = &attr->attr.attr; + data->group.attrs[pos] = &attr->attr.attr; + pos++; } - attr = &data->attrs[i]; + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = "lifespan"; attr->attr.attr.mode = 0644; @@ -972,7 +979,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev) attr->show = show_stats_lifespan; attr->attr.store = hw_stat_device_store; attr->store = set_stats_lifespan; - data->group.attrs[i] = &attr->attr.attr; + data->group.attrs[pos] = &attr->attr.attr; for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++) if (!ibdev->groups[i]) { ibdev->groups[i] = &data->group; @@ -994,7 +1001,7 @@ alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group) stats = ibdev->ops.alloc_hw_port_stats(port->ibdev, port->port_num); if (!stats) return ERR_PTR(-ENOMEM); - if (!stats->names || stats->num_counters <= 0) + if (!stats->descs || stats->num_counters <= 0) goto err_free_stats; /* @@ -1010,7 +1017,6 @@ alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group) if (!group->attrs) goto err_free_data; - mutex_init(&stats->lock); group->name = "hw_counters"; data->stats = stats; return data; @@ -1018,7 +1024,7 @@ alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group) err_free_data: kfree(data); err_free_stats: - kfree(stats); + rdma_free_hw_stats_struct(stats); return ERR_PTR(-ENOMEM); } @@ -1027,7 +1033,8 @@ static int setup_hw_port_stats(struct ib_port *port, { struct hw_stats_port_attribute *attr; struct hw_stats_port_data *data; - int i, ret; + bool opstat_skipped = false; + int i, ret, pos = 0; data = alloc_hw_stats_port(port, group); if (IS_ERR(data)) @@ -1045,16 +1052,23 @@ static int setup_hw_port_stats(struct ib_port *port, data->stats->timestamp = jiffies; for (i = 0; i < data->stats->num_counters; i++) { - attr = &data->attrs[i]; + if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) { + opstat_skipped = true; + continue; + } + + WARN_ON(opstat_skipped); + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); - attr->attr.attr.name = data->stats->names[i]; + attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.mode = 0444; attr->attr.show = hw_stat_port_show; attr->show = show_hw_stats; - group->attrs[i] = &attr->attr.attr; + group->attrs[pos] = &attr->attr.attr; + pos++; } - attr = &data->attrs[i]; + attr = &data->attrs[pos]; sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = "lifespan"; attr->attr.attr.mode = 0644; @@ -1062,7 +1076,7 @@ static int setup_hw_port_stats(struct ib_port *port, attr->show = show_stats_lifespan; attr->attr.store = hw_stat_port_store; attr->store = set_stats_lifespan; - group->attrs[i] = &attr->attr.attr; + group->attrs[pos] = &attr->attr.attr; port->hw_stats_data = data; return 0; @@ -1189,7 +1203,7 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num, struct ib_port *p; int ret; - p = kzalloc(struct_size(p, attrs_list, + p = kvzalloc(struct_size(p, attrs_list, attr->gid_tbl_len + attr->pkey_tbl_len), GFP_KERNEL); if (!p) diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index e824baf4640d..372c21cd2927 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -163,12 +163,63 @@ out_release_dmabuf: } EXPORT_SYMBOL(ib_umem_dmabuf_get); +static void +ib_umem_dmabuf_unsupported_move_notify(struct dma_buf_attachment *attach) +{ + struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; + + ibdev_warn_ratelimited(umem_dmabuf->umem.ibdev, + "Invalidate callback should not be called when memory is pinned\n"); +} + +static struct dma_buf_attach_ops ib_umem_dmabuf_attach_pinned_ops = { + .allow_peer2peer = true, + .move_notify = ib_umem_dmabuf_unsupported_move_notify, +}; + +struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device, + unsigned long offset, + size_t size, int fd, + int access) +{ + struct ib_umem_dmabuf *umem_dmabuf; + int err; + + umem_dmabuf = ib_umem_dmabuf_get(device, offset, size, fd, access, + &ib_umem_dmabuf_attach_pinned_ops); + if (IS_ERR(umem_dmabuf)) + return umem_dmabuf; + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + err = dma_buf_pin(umem_dmabuf->attach); + if (err) + goto err_release; + umem_dmabuf->pinned = 1; + + err = ib_umem_dmabuf_map_pages(umem_dmabuf); + if (err) + goto err_unpin; + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + return umem_dmabuf; + +err_unpin: + dma_buf_unpin(umem_dmabuf->attach); +err_release: + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + ib_umem_release(&umem_dmabuf->umem); + return ERR_PTR(err); +} +EXPORT_SYMBOL(ib_umem_dmabuf_get_pinned); + void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf) { struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf; dma_resv_lock(dmabuf->resv, NULL); ib_umem_dmabuf_unmap_pages(umem_dmabuf); + if (umem_dmabuf->pinned) + dma_buf_unpin(umem_dmabuf->attach); dma_resv_unlock(dmabuf->resv); dma_buf_detach(dmabuf, umem_dmabuf->attach); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 740e6b2efe0e..d1345d76d9b1 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -837,11 +837,8 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) new_mr->device = new_pd->device; new_mr->pd = new_pd; new_mr->type = IB_MR_TYPE_USER; - new_mr->dm = NULL; - new_mr->sig_attrs = NULL; new_mr->uobject = uobj; atomic_inc(&new_pd->usecnt); - new_mr->iova = cmd.hca_va; new_uobj->object = new_mr; rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 89a2b21976d6..692d5ff657df 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2976,3 +2976,52 @@ bool __rdma_block_iter_next(struct ib_block_iter *biter) return true; } EXPORT_SYMBOL(__rdma_block_iter_next); + +/** + * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct + * for the drivers. + * @descs: array of static descriptors + * @num_counters: number of elements in array + * @lifespan: milliseconds between updates + */ +struct rdma_hw_stats *rdma_alloc_hw_stats_struct( + const struct rdma_stat_desc *descs, int num_counters, + unsigned long lifespan) +{ + struct rdma_hw_stats *stats; + + stats = kzalloc(struct_size(stats, value, num_counters), GFP_KERNEL); + if (!stats) + return NULL; + + stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters), + sizeof(*stats->is_disabled), GFP_KERNEL); + if (!stats->is_disabled) + goto err; + + stats->descs = descs; + stats->num_counters = num_counters; + stats->lifespan = msecs_to_jiffies(lifespan); + mutex_init(&stats->lock); + + return stats; + +err: + kfree(stats); + return NULL; +} +EXPORT_SYMBOL(rdma_alloc_hw_stats_struct); + +/** + * rdma_free_hw_stats_struct - Helper function to release rdma_hw_stats + * @stats: statistics to release + */ +void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats) +{ + if (!stats) + return; + + kfree(stats->is_disabled); + kfree(stats); +} +EXPORT_SYMBOL(rdma_free_hw_stats_struct); diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index ba26d8e6a9c2..79401e6c6aa9 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -39,22 +39,13 @@ #ifndef __BNXT_RE_H__ #define __BNXT_RE_H__ +#include "hw_counters.h" #define ROCE_DRV_MODULE_NAME "bnxt_re" #define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver" -#define BNXT_RE_PAGE_SHIFT_4K (12) -#define BNXT_RE_PAGE_SHIFT_8K (13) -#define BNXT_RE_PAGE_SHIFT_64K (16) -#define BNXT_RE_PAGE_SHIFT_2M (21) -#define BNXT_RE_PAGE_SHIFT_8M (23) -#define BNXT_RE_PAGE_SHIFT_1G (30) -#define BNXT_RE_PAGE_SIZE_4K BIT(BNXT_RE_PAGE_SHIFT_4K) -#define BNXT_RE_PAGE_SIZE_8K BIT(BNXT_RE_PAGE_SHIFT_8K) -#define BNXT_RE_PAGE_SIZE_64K BIT(BNXT_RE_PAGE_SHIFT_64K) -#define BNXT_RE_PAGE_SIZE_2M BIT(BNXT_RE_PAGE_SHIFT_2M) -#define BNXT_RE_PAGE_SIZE_8M BIT(BNXT_RE_PAGE_SHIFT_8M) -#define BNXT_RE_PAGE_SIZE_1G BIT(BNXT_RE_PAGE_SHIFT_1G) +#define BNXT_RE_PAGE_SHIFT_1G (30) +#define BNXT_RE_PAGE_SIZE_SUPPORTED 0x7FFFF000 /* 4kb - 1G */ #define BNXT_RE_MAX_MR_SIZE_LOW BIT_ULL(BNXT_RE_PAGE_SHIFT_1G) #define BNXT_RE_MAX_MR_SIZE_HIGH BIT_ULL(39) @@ -177,15 +168,17 @@ struct bnxt_re_dev { atomic_t srq_count; atomic_t mr_count; atomic_t mw_count; + atomic_t ah_count; + atomic_t pd_count; /* Max of 2 lossless traffic class supported per port */ u16 cosq[2]; /* QP for for handling QP1 packets */ struct bnxt_re_gsi_context gsi_ctx; + struct bnxt_re_stats stats; atomic_t nq_alloc_cnt; u32 is_virtfn; u32 num_vfs; - struct bnxt_qplib_roce_stats stats; }; #define to_bnxt_re_dev(ptr, member) \ diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 7ba07797845c..825d512799d9 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -57,69 +57,208 @@ #include "bnxt_re.h" #include "hw_counters.h" -static const char * const bnxt_re_stat_name[] = { - [BNXT_RE_ACTIVE_QP] = "active_qps", - [BNXT_RE_ACTIVE_SRQ] = "active_srqs", - [BNXT_RE_ACTIVE_CQ] = "active_cqs", - [BNXT_RE_ACTIVE_MR] = "active_mrs", - [BNXT_RE_ACTIVE_MW] = "active_mws", - [BNXT_RE_RX_PKTS] = "rx_pkts", - [BNXT_RE_RX_BYTES] = "rx_bytes", - [BNXT_RE_TX_PKTS] = "tx_pkts", - [BNXT_RE_TX_BYTES] = "tx_bytes", - [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors", - [BNXT_RE_RX_DROPS] = "rx_roce_drops", - [BNXT_RE_RX_DISCARDS] = "rx_roce_discards", - [BNXT_RE_TO_RETRANSMITS] = "to_retransmits", - [BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd", - [BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded", - [BNXT_RE_RNR_NAKS_RCVD] = "rnr_naks_rcvd", - [BNXT_RE_MISSING_RESP] = "missing_resp", - [BNXT_RE_UNRECOVERABLE_ERR] = "unrecoverable_err", - [BNXT_RE_BAD_RESP_ERR] = "bad_resp_err", - [BNXT_RE_LOCAL_QP_OP_ERR] = "local_qp_op_err", - [BNXT_RE_LOCAL_PROTECTION_ERR] = "local_protection_err", - [BNXT_RE_MEM_MGMT_OP_ERR] = "mem_mgmt_op_err", - [BNXT_RE_REMOTE_INVALID_REQ_ERR] = "remote_invalid_req_err", - [BNXT_RE_REMOTE_ACCESS_ERR] = "remote_access_err", - [BNXT_RE_REMOTE_OP_ERR] = "remote_op_err", - [BNXT_RE_DUP_REQ] = "dup_req", - [BNXT_RE_RES_EXCEED_MAX] = "res_exceed_max", - [BNXT_RE_RES_LENGTH_MISMATCH] = "res_length_mismatch", - [BNXT_RE_RES_EXCEEDS_WQE] = "res_exceeds_wqe", - [BNXT_RE_RES_OPCODE_ERR] = "res_opcode_err", - [BNXT_RE_RES_RX_INVALID_RKEY] = "res_rx_invalid_rkey", - [BNXT_RE_RES_RX_DOMAIN_ERR] = "res_rx_domain_err", - [BNXT_RE_RES_RX_NO_PERM] = "res_rx_no_perm", - [BNXT_RE_RES_RX_RANGE_ERR] = "res_rx_range_err", - [BNXT_RE_RES_TX_INVALID_RKEY] = "res_tx_invalid_rkey", - [BNXT_RE_RES_TX_DOMAIN_ERR] = "res_tx_domain_err", - [BNXT_RE_RES_TX_NO_PERM] = "res_tx_no_perm", - [BNXT_RE_RES_TX_RANGE_ERR] = "res_tx_range_err", - [BNXT_RE_RES_IRRQ_OFLOW] = "res_irrq_oflow", - [BNXT_RE_RES_UNSUP_OPCODE] = "res_unsup_opcode", - [BNXT_RE_RES_UNALIGNED_ATOMIC] = "res_unaligned_atomic", - [BNXT_RE_RES_REM_INV_ERR] = "res_rem_inv_err", - [BNXT_RE_RES_MEM_ERROR] = "res_mem_err", - [BNXT_RE_RES_SRQ_ERR] = "res_srq_err", - [BNXT_RE_RES_CMP_ERR] = "res_cmp_err", - [BNXT_RE_RES_INVALID_DUP_RKEY] = "res_invalid_dup_rkey", - [BNXT_RE_RES_WQE_FORMAT_ERR] = "res_wqe_format_err", - [BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err", - [BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err", - [BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err", - [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err", - [BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count" +static const struct rdma_stat_desc bnxt_re_stat_descs[] = { + [BNXT_RE_ACTIVE_PD].name = "active_pds", + [BNXT_RE_ACTIVE_AH].name = "active_ahs", + [BNXT_RE_ACTIVE_QP].name = "active_qps", + [BNXT_RE_ACTIVE_SRQ].name = "active_srqs", + [BNXT_RE_ACTIVE_CQ].name = "active_cqs", + [BNXT_RE_ACTIVE_MR].name = "active_mrs", + [BNXT_RE_ACTIVE_MW].name = "active_mws", + [BNXT_RE_RX_PKTS].name = "rx_pkts", + [BNXT_RE_RX_BYTES].name = "rx_bytes", + [BNXT_RE_TX_PKTS].name = "tx_pkts", + [BNXT_RE_TX_BYTES].name = "tx_bytes", + [BNXT_RE_RECOVERABLE_ERRORS].name = "recoverable_errors", + [BNXT_RE_RX_ERRORS].name = "rx_roce_errors", + [BNXT_RE_RX_DISCARDS].name = "rx_roce_discards", + [BNXT_RE_TO_RETRANSMITS].name = "to_retransmits", + [BNXT_RE_SEQ_ERR_NAKS_RCVD].name = "seq_err_naks_rcvd", + [BNXT_RE_MAX_RETRY_EXCEEDED].name = "max_retry_exceeded", + [BNXT_RE_RNR_NAKS_RCVD].name = "rnr_naks_rcvd", + [BNXT_RE_MISSING_RESP].name = "missing_resp", + [BNXT_RE_UNRECOVERABLE_ERR].name = "unrecoverable_err", + [BNXT_RE_BAD_RESP_ERR].name = "bad_resp_err", + [BNXT_RE_LOCAL_QP_OP_ERR].name = "local_qp_op_err", + [BNXT_RE_LOCAL_PROTECTION_ERR].name = "local_protection_err", + [BNXT_RE_MEM_MGMT_OP_ERR].name = "mem_mgmt_op_err", + [BNXT_RE_REMOTE_INVALID_REQ_ERR].name = "remote_invalid_req_err", + [BNXT_RE_REMOTE_ACCESS_ERR].name = "remote_access_err", + [BNXT_RE_REMOTE_OP_ERR].name = "remote_op_err", + [BNXT_RE_DUP_REQ].name = "dup_req", + [BNXT_RE_RES_EXCEED_MAX].name = "res_exceed_max", + [BNXT_RE_RES_LENGTH_MISMATCH].name = "res_length_mismatch", + [BNXT_RE_RES_EXCEEDS_WQE].name = "res_exceeds_wqe", + [BNXT_RE_RES_OPCODE_ERR].name = "res_opcode_err", + [BNXT_RE_RES_RX_INVALID_RKEY].name = "res_rx_invalid_rkey", + [BNXT_RE_RES_RX_DOMAIN_ERR].name = "res_rx_domain_err", + [BNXT_RE_RES_RX_NO_PERM].name = "res_rx_no_perm", + [BNXT_RE_RES_RX_RANGE_ERR].name = "res_rx_range_err", + [BNXT_RE_RES_TX_INVALID_RKEY].name = "res_tx_invalid_rkey", + [BNXT_RE_RES_TX_DOMAIN_ERR].name = "res_tx_domain_err", + [BNXT_RE_RES_TX_NO_PERM].name = "res_tx_no_perm", + [BNXT_RE_RES_TX_RANGE_ERR].name = "res_tx_range_err", + [BNXT_RE_RES_IRRQ_OFLOW].name = "res_irrq_oflow", + [BNXT_RE_RES_UNSUP_OPCODE].name = "res_unsup_opcode", + [BNXT_RE_RES_UNALIGNED_ATOMIC].name = "res_unaligned_atomic", + [BNXT_RE_RES_REM_INV_ERR].name = "res_rem_inv_err", + [BNXT_RE_RES_MEM_ERROR].name = "res_mem_err", + [BNXT_RE_RES_SRQ_ERR].name = "res_srq_err", + [BNXT_RE_RES_CMP_ERR].name = "res_cmp_err", + [BNXT_RE_RES_INVALID_DUP_RKEY].name = "res_invalid_dup_rkey", + [BNXT_RE_RES_WQE_FORMAT_ERR].name = "res_wqe_format_err", + [BNXT_RE_RES_CQ_LOAD_ERR].name = "res_cq_load_err", + [BNXT_RE_RES_SRQ_LOAD_ERR].name = "res_srq_load_err", + [BNXT_RE_RES_TX_PCI_ERR].name = "res_tx_pci_err", + [BNXT_RE_RES_RX_PCI_ERR].name = "res_rx_pci_err", + [BNXT_RE_OUT_OF_SEQ_ERR].name = "oos_drop_count", + [BNXT_RE_TX_ATOMIC_REQ].name = "tx_atomic_req", + [BNXT_RE_TX_READ_REQ].name = "tx_read_req", + [BNXT_RE_TX_READ_RES].name = "tx_read_resp", + [BNXT_RE_TX_WRITE_REQ].name = "tx_write_req", + [BNXT_RE_TX_SEND_REQ].name = "tx_send_req", + [BNXT_RE_RX_ATOMIC_REQ].name = "rx_atomic_req", + [BNXT_RE_RX_READ_REQ].name = "rx_read_req", + [BNXT_RE_RX_READ_RESP].name = "rx_read_resp", + [BNXT_RE_RX_WRITE_REQ].name = "rx_write_req", + [BNXT_RE_RX_SEND_REQ].name = "rx_send_req", + [BNXT_RE_RX_ROCE_GOOD_PKTS].name = "rx_roce_good_pkts", + [BNXT_RE_RX_ROCE_GOOD_BYTES].name = "rx_roce_good_bytes", + [BNXT_RE_OOB].name = "rx_out_of_buffer" }; +static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev, + struct rdma_hw_stats *stats, + struct bnxt_qplib_ext_stat *s) +{ + stats->value[BNXT_RE_TX_ATOMIC_REQ] = s->tx_atomic_req; + stats->value[BNXT_RE_TX_READ_REQ] = s->tx_read_req; + stats->value[BNXT_RE_TX_READ_RES] = s->tx_read_res; + stats->value[BNXT_RE_TX_WRITE_REQ] = s->tx_write_req; + stats->value[BNXT_RE_TX_SEND_REQ] = s->tx_send_req; + stats->value[BNXT_RE_RX_ATOMIC_REQ] = s->rx_atomic_req; + stats->value[BNXT_RE_RX_READ_REQ] = s->rx_read_req; + stats->value[BNXT_RE_RX_READ_RESP] = s->rx_read_res; + stats->value[BNXT_RE_RX_WRITE_REQ] = s->rx_write_req; + stats->value[BNXT_RE_RX_SEND_REQ] = s->rx_send_req; + stats->value[BNXT_RE_RX_ROCE_GOOD_PKTS] = s->rx_roce_good_pkts; + stats->value[BNXT_RE_RX_ROCE_GOOD_BYTES] = s->rx_roce_good_bytes; + stats->value[BNXT_RE_OOB] = s->rx_out_of_buffer; +} + +static int bnxt_re_get_ext_stat(struct bnxt_re_dev *rdev, + struct rdma_hw_stats *stats) +{ + struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat; + u32 fid; + int rc; + + fid = PCI_FUNC(rdev->en_dev->pdev->devfn); + rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat); + if (rc) + goto done; + bnxt_re_copy_ext_stats(rdev, stats, estat); + +done: + return rc; +} + +static void bnxt_re_copy_err_stats(struct bnxt_re_dev *rdev, + struct rdma_hw_stats *stats, + struct bnxt_qplib_roce_stats *err_s) +{ + stats->value[BNXT_RE_TO_RETRANSMITS] = + err_s->to_retransmits; + stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] = + err_s->seq_err_naks_rcvd; + stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] = + err_s->max_retry_exceeded; + stats->value[BNXT_RE_RNR_NAKS_RCVD] = + err_s->rnr_naks_rcvd; + stats->value[BNXT_RE_MISSING_RESP] = + err_s->missing_resp; + stats->value[BNXT_RE_UNRECOVERABLE_ERR] = + err_s->unrecoverable_err; + stats->value[BNXT_RE_BAD_RESP_ERR] = + err_s->bad_resp_err; + stats->value[BNXT_RE_LOCAL_QP_OP_ERR] = + err_s->local_qp_op_err; + stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] = + err_s->local_protection_err; + stats->value[BNXT_RE_MEM_MGMT_OP_ERR] = + err_s->mem_mgmt_op_err; + stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] = + err_s->remote_invalid_req_err; + stats->value[BNXT_RE_REMOTE_ACCESS_ERR] = + err_s->remote_access_err; + stats->value[BNXT_RE_REMOTE_OP_ERR] = + err_s->remote_op_err; + stats->value[BNXT_RE_DUP_REQ] = + err_s->dup_req; + stats->value[BNXT_RE_RES_EXCEED_MAX] = + err_s->res_exceed_max; + stats->value[BNXT_RE_RES_LENGTH_MISMATCH] = + err_s->res_length_mismatch; + stats->value[BNXT_RE_RES_EXCEEDS_WQE] = + err_s->res_exceeds_wqe; + stats->value[BNXT_RE_RES_OPCODE_ERR] = + err_s->res_opcode_err; + stats->value[BNXT_RE_RES_RX_INVALID_RKEY] = + err_s->res_rx_invalid_rkey; + stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] = + err_s->res_rx_domain_err; + stats->value[BNXT_RE_RES_RX_NO_PERM] = + err_s->res_rx_no_perm; + stats->value[BNXT_RE_RES_RX_RANGE_ERR] = + err_s->res_rx_range_err; + stats->value[BNXT_RE_RES_TX_INVALID_RKEY] = + err_s->res_tx_invalid_rkey; + stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] = + err_s->res_tx_domain_err; + stats->value[BNXT_RE_RES_TX_NO_PERM] = + err_s->res_tx_no_perm; + stats->value[BNXT_RE_RES_TX_RANGE_ERR] = + err_s->res_tx_range_err; + stats->value[BNXT_RE_RES_IRRQ_OFLOW] = + err_s->res_irrq_oflow; + stats->value[BNXT_RE_RES_UNSUP_OPCODE] = + err_s->res_unsup_opcode; + stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] = + err_s->res_unaligned_atomic; + stats->value[BNXT_RE_RES_REM_INV_ERR] = + err_s->res_rem_inv_err; + stats->value[BNXT_RE_RES_MEM_ERROR] = + err_s->res_mem_error; + stats->value[BNXT_RE_RES_SRQ_ERR] = + err_s->res_srq_err; + stats->value[BNXT_RE_RES_CMP_ERR] = + err_s->res_cmp_err; + stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] = + err_s->res_invalid_dup_rkey; + stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] = + err_s->res_wqe_format_err; + stats->value[BNXT_RE_RES_CQ_LOAD_ERR] = + err_s->res_cq_load_err; + stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] = + err_s->res_srq_load_err; + stats->value[BNXT_RE_RES_TX_PCI_ERR] = + err_s->res_tx_pci_err; + stats->value[BNXT_RE_RES_RX_PCI_ERR] = + err_s->res_rx_pci_err; + stats->value[BNXT_RE_OUT_OF_SEQ_ERR] = + err_s->res_oos_drop_count; +} + int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u32 port, int index) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct ctx_hw_stats *bnxt_re_stats = rdev->qplib_ctx.stats.dma; + struct ctx_hw_stats *hw_stats = NULL; + struct bnxt_qplib_roce_stats *err_s = NULL; int rc = 0; + hw_stats = rdev->qplib_ctx.stats.dma; if (!port || !stats) return -EINVAL; @@ -128,118 +267,61 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&rdev->cq_count); stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&rdev->mr_count); stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&rdev->mw_count); - if (bnxt_re_stats) { + stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&rdev->pd_count); + stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&rdev->ah_count); + + if (hw_stats) { stats->value[BNXT_RE_RECOVERABLE_ERRORS] = - le64_to_cpu(bnxt_re_stats->tx_bcast_pkts); - stats->value[BNXT_RE_RX_DROPS] = - le64_to_cpu(bnxt_re_stats->rx_error_pkts); + le64_to_cpu(hw_stats->tx_bcast_pkts); + stats->value[BNXT_RE_RX_ERRORS] = + le64_to_cpu(hw_stats->rx_error_pkts); stats->value[BNXT_RE_RX_DISCARDS] = - le64_to_cpu(bnxt_re_stats->rx_discard_pkts); + le64_to_cpu(hw_stats->rx_discard_pkts); stats->value[BNXT_RE_RX_PKTS] = - le64_to_cpu(bnxt_re_stats->rx_ucast_pkts); + le64_to_cpu(hw_stats->rx_ucast_pkts); stats->value[BNXT_RE_RX_BYTES] = - le64_to_cpu(bnxt_re_stats->rx_ucast_bytes); + le64_to_cpu(hw_stats->rx_ucast_bytes); stats->value[BNXT_RE_TX_PKTS] = - le64_to_cpu(bnxt_re_stats->tx_ucast_pkts); + le64_to_cpu(hw_stats->tx_ucast_pkts); stats->value[BNXT_RE_TX_BYTES] = - le64_to_cpu(bnxt_re_stats->tx_ucast_bytes); + le64_to_cpu(hw_stats->tx_ucast_bytes); } + err_s = &rdev->stats.rstat.errs; if (test_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags)) { - rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, &rdev->stats); - if (rc) + rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, err_s); + if (rc) { clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); - stats->value[BNXT_RE_TO_RETRANSMITS] = - rdev->stats.to_retransmits; - stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] = - rdev->stats.seq_err_naks_rcvd; - stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] = - rdev->stats.max_retry_exceeded; - stats->value[BNXT_RE_RNR_NAKS_RCVD] = - rdev->stats.rnr_naks_rcvd; - stats->value[BNXT_RE_MISSING_RESP] = - rdev->stats.missing_resp; - stats->value[BNXT_RE_UNRECOVERABLE_ERR] = - rdev->stats.unrecoverable_err; - stats->value[BNXT_RE_BAD_RESP_ERR] = - rdev->stats.bad_resp_err; - stats->value[BNXT_RE_LOCAL_QP_OP_ERR] = - rdev->stats.local_qp_op_err; - stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] = - rdev->stats.local_protection_err; - stats->value[BNXT_RE_MEM_MGMT_OP_ERR] = - rdev->stats.mem_mgmt_op_err; - stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] = - rdev->stats.remote_invalid_req_err; - stats->value[BNXT_RE_REMOTE_ACCESS_ERR] = - rdev->stats.remote_access_err; - stats->value[BNXT_RE_REMOTE_OP_ERR] = - rdev->stats.remote_op_err; - stats->value[BNXT_RE_DUP_REQ] = - rdev->stats.dup_req; - stats->value[BNXT_RE_RES_EXCEED_MAX] = - rdev->stats.res_exceed_max; - stats->value[BNXT_RE_RES_LENGTH_MISMATCH] = - rdev->stats.res_length_mismatch; - stats->value[BNXT_RE_RES_EXCEEDS_WQE] = - rdev->stats.res_exceeds_wqe; - stats->value[BNXT_RE_RES_OPCODE_ERR] = - rdev->stats.res_opcode_err; - stats->value[BNXT_RE_RES_RX_INVALID_RKEY] = - rdev->stats.res_rx_invalid_rkey; - stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] = - rdev->stats.res_rx_domain_err; - stats->value[BNXT_RE_RES_RX_NO_PERM] = - rdev->stats.res_rx_no_perm; - stats->value[BNXT_RE_RES_RX_RANGE_ERR] = - rdev->stats.res_rx_range_err; - stats->value[BNXT_RE_RES_TX_INVALID_RKEY] = - rdev->stats.res_tx_invalid_rkey; - stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] = - rdev->stats.res_tx_domain_err; - stats->value[BNXT_RE_RES_TX_NO_PERM] = - rdev->stats.res_tx_no_perm; - stats->value[BNXT_RE_RES_TX_RANGE_ERR] = - rdev->stats.res_tx_range_err; - stats->value[BNXT_RE_RES_IRRQ_OFLOW] = - rdev->stats.res_irrq_oflow; - stats->value[BNXT_RE_RES_UNSUP_OPCODE] = - rdev->stats.res_unsup_opcode; - stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] = - rdev->stats.res_unaligned_atomic; - stats->value[BNXT_RE_RES_REM_INV_ERR] = - rdev->stats.res_rem_inv_err; - stats->value[BNXT_RE_RES_MEM_ERROR] = - rdev->stats.res_mem_error; - stats->value[BNXT_RE_RES_SRQ_ERR] = - rdev->stats.res_srq_err; - stats->value[BNXT_RE_RES_CMP_ERR] = - rdev->stats.res_cmp_err; - stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] = - rdev->stats.res_invalid_dup_rkey; - stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] = - rdev->stats.res_wqe_format_err; - stats->value[BNXT_RE_RES_CQ_LOAD_ERR] = - rdev->stats.res_cq_load_err; - stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] = - rdev->stats.res_srq_load_err; - stats->value[BNXT_RE_RES_TX_PCI_ERR] = - rdev->stats.res_tx_pci_err; - stats->value[BNXT_RE_RES_RX_PCI_ERR] = - rdev->stats.res_rx_pci_err; - stats->value[BNXT_RE_OUT_OF_SEQ_ERR] = - rdev->stats.res_oos_drop_count; + goto done; + } + if (_is_ext_stats_supported(rdev->dev_attr.dev_cap_flags) && + !rdev->is_virtfn) { + rc = bnxt_re_get_ext_stat(rdev, stats); + if (rc) { + clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, + &rdev->flags); + goto done; + } + } + bnxt_re_copy_err_stats(rdev, stats, err_s); } - return ARRAY_SIZE(bnxt_re_stat_name); +done: + return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + BNXT_RE_NUM_EXT_COUNTERS : BNXT_RE_NUM_STD_COUNTERS; } struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) { - BUILD_BUG_ON(ARRAY_SIZE(bnxt_re_stat_name) != BNXT_RE_NUM_COUNTERS); + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); + int num_counters = 0; + + if (bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) + num_counters = BNXT_RE_NUM_EXT_COUNTERS; + else + num_counters = BNXT_RE_NUM_STD_COUNTERS; - return rdma_alloc_hw_stats_struct(bnxt_re_stat_name, - ARRAY_SIZE(bnxt_re_stat_name), + return rdma_alloc_hw_stats_struct(bnxt_re_stat_descs, num_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h index 6f2d2f91d9ff..7943b2c393e4 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.h +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h @@ -41,6 +41,8 @@ #define __BNXT_RE_HW_STATS_H__ enum bnxt_re_hw_stats { + BNXT_RE_ACTIVE_PD, + BNXT_RE_ACTIVE_AH, BNXT_RE_ACTIVE_QP, BNXT_RE_ACTIVE_SRQ, BNXT_RE_ACTIVE_CQ, @@ -51,7 +53,7 @@ enum bnxt_re_hw_stats { BNXT_RE_TX_PKTS, BNXT_RE_TX_BYTES, BNXT_RE_RECOVERABLE_ERRORS, - BNXT_RE_RX_DROPS, + BNXT_RE_RX_ERRORS, BNXT_RE_RX_DISCARDS, BNXT_RE_TO_RETRANSMITS, BNXT_RE_SEQ_ERR_NAKS_RCVD, @@ -93,7 +95,31 @@ enum bnxt_re_hw_stats { BNXT_RE_RES_TX_PCI_ERR, BNXT_RE_RES_RX_PCI_ERR, BNXT_RE_OUT_OF_SEQ_ERR, - BNXT_RE_NUM_COUNTERS + BNXT_RE_TX_ATOMIC_REQ, + BNXT_RE_TX_READ_REQ, + BNXT_RE_TX_READ_RES, + BNXT_RE_TX_WRITE_REQ, + BNXT_RE_TX_SEND_REQ, + BNXT_RE_RX_ATOMIC_REQ, + BNXT_RE_RX_READ_REQ, + BNXT_RE_RX_READ_RESP, + BNXT_RE_RX_WRITE_REQ, + BNXT_RE_RX_SEND_REQ, + BNXT_RE_RX_ROCE_GOOD_PKTS, + BNXT_RE_RX_ROCE_GOOD_BYTES, + BNXT_RE_OOB, + BNXT_RE_NUM_EXT_COUNTERS +}; + +#define BNXT_RE_NUM_STD_COUNTERS (BNXT_RE_OUT_OF_SEQ_ERR + 1) + +struct bnxt_re_rstat { + struct bnxt_qplib_roce_stats errs; + struct bnxt_qplib_ext_stat ext_stat; +}; + +struct bnxt_re_stats { + struct bnxt_re_rstat rstat; }; struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 408dfbcc47b5..29cc0d14399a 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -41,6 +41,7 @@ #include <linux/pci.h> #include <linux/netdevice.h> #include <linux/if_ether.h> +#include <net/addrconf.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> @@ -130,10 +131,10 @@ int bnxt_re_query_device(struct ib_device *ibdev, memcpy(&ib_attr->fw_ver, dev_attr->fw_ver, min(sizeof(dev_attr->fw_ver), sizeof(ib_attr->fw_ver))); - bnxt_qplib_get_guid(rdev->netdev->dev_addr, - (u8 *)&ib_attr->sys_image_guid); + addrconf_addr_eui48((u8 *)&ib_attr->sys_image_guid, + rdev->netdev->dev_addr); ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE; - ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M; + ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_SUPPORTED; ib_attr->vendor_id = rdev->en_dev->pdev->vendor; ib_attr->vendor_part_id = rdev->en_dev->pdev->device; @@ -541,9 +542,12 @@ int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) bnxt_re_destroy_fence_mr(pd); - if (pd->qplib_pd.id) - bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, - &pd->qplib_pd); + if (pd->qplib_pd.id) { + if (!bnxt_qplib_dealloc_pd(&rdev->qplib_res, + &rdev->qplib_res.pd_tbl, + &pd->qplib_pd)) + atomic_dec(&rdev->pd_count); + } return 0; } @@ -595,6 +599,8 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) if (bnxt_re_create_fence_mr(pd)) ibdev_warn(&rdev->ibdev, "Failed to create Fence-MR\n"); + atomic_inc(&rdev->pd_count); + return 0; dbfail: bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, @@ -611,6 +617,8 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, !(flags & RDMA_DESTROY_AH_SLEEPABLE)); + atomic_dec(&rdev->ah_count); + return 0; } @@ -695,15 +703,11 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, wmb(); /* make sure cache is updated. */ spin_unlock_irqrestore(&uctx->sh_lock, flag); } + atomic_inc(&rdev->ah_count); return 0; } -int bnxt_re_modify_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) -{ - return 0; -} - int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) { struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); @@ -760,6 +764,7 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) bnxt_qplib_destroy_ah(&rdev->qplib_res, &gsi_sah->qplib_ah, true); + atomic_dec(&rdev->ah_count); bnxt_qplib_clean_qp(&qp->qplib_qp); ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n"); @@ -1006,6 +1011,7 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah "Failed to allocate HW AH for Shadow QP"); goto fail; } + atomic_inc(&rdev->ah_count); return ah; @@ -2478,7 +2484,8 @@ static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr, wqe->frmr.l_key = wr->key; wqe->frmr.length = wr->mr->length; - wqe->frmr.pbl_pg_sz_log = (wr->mr->page_size >> PAGE_SHIFT_4K) - 1; + wqe->frmr.pbl_pg_sz_log = ilog2(PAGE_SIZE >> PAGE_SHIFT_4K); + wqe->frmr.pg_sz_log = ilog2(wr->mr->page_size >> PAGE_SHIFT_4K); wqe->frmr.va = wr->mr->iova; return 0; } @@ -3354,8 +3361,11 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp, struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) { + struct bnxt_re_dev *rdev; + u16 vlan_id = 0; u8 nw_type; + rdev = qp->rdev; wc->opcode = IB_WC_RECV; wc->status = __rc_to_ib_wc_status(cqe->status); @@ -3367,9 +3377,12 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp, memcpy(wc->smac, cqe->smac, ETH_ALEN); wc->wc_flags |= IB_WC_WITH_SMAC; if (cqe->flags & CQ_RES_UD_FLAGS_META_FORMAT_VLAN) { - wc->vlan_id = (cqe->cfa_meta & 0xFFF); - if (wc->vlan_id < 0x1000) - wc->wc_flags |= IB_WC_WITH_VLAN; + vlan_id = (cqe->cfa_meta & 0xFFF); + } + /* Mark only if vlan_id is non zero */ + if (vlan_id && bnxt_re_check_if_vlan_valid(rdev, vlan_id)) { + wc->vlan_id = vlan_id; + wc->wc_flags |= IB_WC_WITH_VLAN; } nw_type = (cqe->flags & CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK) >> CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT; @@ -3798,7 +3811,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, mr->qplib_mr.va = virt_addr; page_size = ib_umem_find_best_pgsz( - umem, BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, virt_addr); + umem, BNXT_RE_PAGE_SIZE_SUPPORTED, virt_addr); if (!page_size) { ibdev_err(&rdev->ibdev, "umem page size unsupported!"); rc = -EFAULT; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index b5c6e0f4f877..94326267f9bb 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -166,7 +166,6 @@ int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); int bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); int bnxt_re_create_srq(struct ib_srq *srq, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 66268e41b470..b44944fb9b24 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -127,6 +127,8 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) rdev->qplib_res.cctx = rdev->chip_ctx; rdev->rcfw.res = &rdev->qplib_res; + rdev->qplib_res.dattr = &rdev->dev_attr; + rdev->qplib_res.is_vf = BNXT_VF(bp); bnxt_re_set_drv_mode(rdev, wqe_mode); if (bnxt_qplib_determine_atomics(en_dev->pdev)) @@ -523,7 +525,8 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, u32 fw_stats_ctx_id) { struct bnxt_en_dev *en_dev = rdev->en_dev; - struct hwrm_stat_ctx_free_input req = {0}; + struct hwrm_stat_ctx_free_input req = {}; + struct hwrm_stat_ctx_free_output resp = {}; struct bnxt_fw_msg fw_msg; int rc = -EINVAL; @@ -537,8 +540,8 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1); req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id); - bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&req, - sizeof(req), DFLT_HWRM_CMD_TIMEOUT); + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, + sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x", @@ -693,7 +696,6 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .get_port_immutable = bnxt_re_get_port_immutable, .map_mr_sg = bnxt_re_map_mr_sg, .mmap = bnxt_re_mmap, - .modify_ah = bnxt_re_modify_ah, .modify_qp = bnxt_re_modify_qp, .modify_srq = bnxt_re_modify_srq, .poll_cq = bnxt_re_poll_cq, @@ -727,7 +729,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) strlen(BNXT_RE_DESC) + 5); ibdev->phys_port_cnt = 1; - bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid); + addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr); ibdev->num_comp_vectors = rdev->num_msix - 1; ibdev->dev.parent = &rdev->en_dev->pdev->dev; @@ -777,6 +779,8 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev, atomic_set(&rdev->srq_count, 0); atomic_set(&rdev->mr_count, 0); atomic_set(&rdev->mw_count, 0); + atomic_set(&rdev->ah_count, 0); + atomic_set(&rdev->pd_count, 0); rdev->cosq[0] = 0xFFFF; rdev->cosq[1] = 0xFFFF; @@ -1725,7 +1729,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, } if (sch_work) { /* Allocate for the deferred task */ - re_work = kzalloc(sizeof(*re_work), GFP_ATOMIC); + re_work = kzalloc(sizeof(*re_work), GFP_KERNEL); if (re_work) { get_device(&rdev->ibdev.dev); re_work->rdev = rdev; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index d4d4959c2434..ca88849559bf 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -707,12 +707,13 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, int rc = 0; RCFW_CMD_PREP(req, QUERY_SRQ, cmd_flags); - req.srq_cid = cpu_to_le32(srq->id); /* Configure the request */ sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); if (!sbuf) return -ENOMEM; + req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; + req.srq_cid = cpu_to_le32(srq->id); sb = sbuf->sb; rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, (void *)sbuf, 0); @@ -1049,6 +1050,9 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED; + if (_is_ext_stats_supported(res->dattr->dev_cap_flags) && !res->is_vf) + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED; + req.qp_flags = cpu_to_le32(qp_flags); /* ORRQ and IRRQ */ @@ -2851,6 +2855,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, struct cq_base *hw_cqe; u32 sw_cons, raw_cons; int budget, rc = 0; + u8 type; raw_cons = cq->hwq.cons; budget = num_cqes; @@ -2869,7 +2874,8 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, */ dma_rmb(); /* From the device's respective CQE format to qplib_wc*/ - switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) { + type = hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; + switch (type) { case CQ_BASE_CQE_TYPE_REQ: rc = bnxt_qplib_cq_process_req(cq, (struct cq_req *)hw_cqe, @@ -2916,8 +2922,9 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, /* Error while processing the CQE, just skip to the * next one */ - dev_err(&cq->hwq.pdev->dev, - "process_cqe error rc = 0x%x\n", rc); + if (type != CQ_BASE_CQE_TYPE_TERMINAL) + dev_err(&cq->hwq.pdev->dev, + "process_cqe error rc = 0x%x\n", rc); } raw_cons++; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 5d384def5e5f..3de854727460 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -78,7 +78,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) if (!test_bit(cbit, cmdq->cmdq_bitmap)) goto done; do { - mdelay(1); /* 1m sec */ + udelay(1); bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet); } while (test_bit(cbit, cmdq->cmdq_bitmap) && --count); done: @@ -848,13 +848,13 @@ struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf( { struct bnxt_qplib_rcfw_sbuf *sbuf; - sbuf = kzalloc(sizeof(*sbuf), GFP_ATOMIC); + sbuf = kzalloc(sizeof(*sbuf), GFP_KERNEL); if (!sbuf) return NULL; sbuf->size = size; sbuf->sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf->size, - &sbuf->dma_addr, GFP_ATOMIC); + &sbuf->dma_addr, GFP_KERNEL); if (!sbuf->sb) goto bail; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 9474c0046582..82faa4e4cda8 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -96,7 +96,7 @@ static inline void bnxt_qplib_set_cmd_slots(struct cmdq_base *req) #define RCFW_MAX_COOKIE_VALUE 0x7FFF #define RCFW_CMD_IS_BLOCKING 0x8000 -#define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20 +#define RCFW_BLOCKED_CMD_WAIT_COUNT 20000000UL /* 20 sec */ #define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 44282a8cdd4f..bc1ba4b51ba4 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -228,15 +228,16 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, npages++; } - if (npages == MAX_PBL_LVL_0_PGS) { + if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) { /* This request is Level 0, map PTE */ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], hwq_attr->sginfo); if (rc) goto fail; hwq->level = PBL_LVL_0; + goto done; } - if (npages > MAX_PBL_LVL_0_PGS) { + if (npages >= MAX_PBL_LVL_0_PGS) { if (npages > MAX_PBL_LVL_1_PGS) { u32 flag = (hwq_attr->type == HWQ_TYPE_L2_CMPL) ? 0 : PTU_PTE_VALID; @@ -571,23 +572,6 @@ fail: return rc; } -/* GUID */ -void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid) -{ - u8 mac[ETH_ALEN]; - - /* MAC-48 to EUI-64 mapping */ - memcpy(mac, dev_addr, ETH_ALEN); - guid[0] = mac[0] ^ 2; - guid[1] = mac[1]; - guid[2] = mac[2]; - guid[3] = 0xff; - guid[4] = 0xfe; - guid[5] = mac[3]; - guid[6] = mac[4]; - guid[7] = mac[5]; -} - static void bnxt_qplib_free_sgid_tbl(struct bnxt_qplib_res *res, struct bnxt_qplib_sgid_tbl *sgid_tbl) { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 91031502e8f5..e1411a2352a7 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -253,14 +253,15 @@ struct bnxt_qplib_ctx { struct bnxt_qplib_res { struct pci_dev *pdev; struct bnxt_qplib_chip_ctx *cctx; + struct bnxt_qplib_dev_attr *dattr; struct net_device *netdev; - struct bnxt_qplib_rcfw *rcfw; struct bnxt_qplib_pd_tbl pd_tbl; struct bnxt_qplib_sgid_tbl sgid_tbl; struct bnxt_qplib_pkey_tbl pkey_tbl; struct bnxt_qplib_dpi_tbl dpi_tbl; bool prio; + bool is_vf; }; static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) @@ -345,7 +346,6 @@ void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res, struct bnxt_qplib_hwq *hwq); int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, struct bnxt_qplib_hwq_attr *hwq_attr); -void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid); int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl, struct bnxt_qplib_pd *pd); int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res, @@ -450,4 +450,10 @@ static inline void bnxt_qplib_ring_nq_db(struct bnxt_qplib_db_info *info, else bnxt_qplib_ring_db32(info, arm); } + +static inline bool _is_ext_stats_supported(u16 dev_cap_flags) +{ + return dev_cap_flags & + CREQ_QUERY_FUNC_RESP_SB_EXT_STATS; +} #endif /* __BNXT_QPLIB_RES_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 3d9259632eb3..379e715ebd30 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -161,6 +161,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->l2_db_size = (sb->l2_db_space_size + 1) * (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED; + attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags); bnxt_qplib_query_version(rcfw, attr->fw_ver); @@ -286,8 +287,8 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, } int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, - struct bnxt_qplib_gid *gid, u8 *smac, u16 vlan_id, - bool update, u32 *index) + struct bnxt_qplib_gid *gid, const u8 *smac, + u16 vlan_id, bool update, u32 *index) { struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl, struct bnxt_qplib_res, @@ -378,7 +379,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u16 gid_idx, - u8 *smac) + const u8 *smac) { struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl, struct bnxt_qplib_res, @@ -869,3 +870,53 @@ bail: bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); return rc; } + +int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid, + struct bnxt_qplib_ext_stat *estat) +{ + struct creq_query_roce_stats_ext_resp resp = {}; + struct creq_query_roce_stats_ext_resp_sb *sb; + struct cmdq_query_roce_stats_ext req = {}; + struct bnxt_qplib_rcfw_sbuf *sbuf; + u16 cmd_flags = 0; + int rc; + + sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); + if (!sbuf) { + dev_err(&rcfw->pdev->dev, + "SP: QUERY_ROCE_STATS_EXT alloc sb failed"); + return -ENOMEM; + } + + RCFW_CMD_PREP(req, QUERY_ROCE_STATS_EXT, cmd_flags); + + req.resp_size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + req.resp_addr = cpu_to_le64(sbuf->dma_addr); + req.function_id = cpu_to_le32(fid); + req.flags = cpu_to_le16(CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID); + + rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, + (void *)&resp, (void *)sbuf, 0); + if (rc) + goto bail; + + sb = sbuf->sb; + estat->tx_atomic_req = le64_to_cpu(sb->tx_atomic_req_pkts); + estat->tx_read_req = le64_to_cpu(sb->tx_read_req_pkts); + estat->tx_read_res = le64_to_cpu(sb->tx_read_res_pkts); + estat->tx_write_req = le64_to_cpu(sb->tx_write_req_pkts); + estat->tx_send_req = le64_to_cpu(sb->tx_send_req_pkts); + estat->rx_atomic_req = le64_to_cpu(sb->rx_atomic_req_pkts); + estat->rx_read_req = le64_to_cpu(sb->rx_read_req_pkts); + estat->rx_read_res = le64_to_cpu(sb->rx_read_res_pkts); + estat->rx_write_req = le64_to_cpu(sb->rx_write_req_pkts); + estat->rx_send_req = le64_to_cpu(sb->rx_send_req_pkts); + estat->rx_roce_good_pkts = le64_to_cpu(sb->rx_roce_good_pkts); + estat->rx_roce_good_bytes = le64_to_cpu(sb->rx_roce_good_bytes); + estat->rx_out_of_buffer = le64_to_cpu(sb->rx_out_of_buffer_pkts); + estat->rx_out_of_sequence = le64_to_cpu(sb->rx_out_of_sequence_pkts); + +bail: + bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); + return rc; +} diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 260104783691..a18f568cb23e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -71,6 +71,7 @@ struct bnxt_qplib_dev_attr { u32 l2_db_size; u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ]; bool is_atomic; + u16 dev_cap_flags; }; struct bnxt_qplib_pd { @@ -219,16 +220,41 @@ struct bnxt_qplib_roce_stats { /* port 3 active qps */ }; +struct bnxt_qplib_ext_stat { + u64 tx_atomic_req; + u64 tx_read_req; + u64 tx_read_res; + u64 tx_write_req; + u64 tx_send_req; + u64 tx_roce_pkts; + u64 tx_roce_bytes; + u64 rx_atomic_req; + u64 rx_read_req; + u64 rx_read_res; + u64 rx_write_req; + u64 rx_send_req; + u64 rx_roce_pkts; + u64 rx_roce_bytes; + u64 rx_roce_good_pkts; + u64 rx_roce_good_bytes; + u64 rx_out_of_buffer; + u64 rx_out_of_sequence; + u64 tx_cnp; + u64 rx_cnp; + u64 rx_ecn_marked; +}; + int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, struct bnxt_qplib_sgid_tbl *sgid_tbl, int index, struct bnxt_qplib_gid *gid); int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u16 vlan_id, bool update); int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, - struct bnxt_qplib_gid *gid, u8 *mac, u16 vlan_id, + struct bnxt_qplib_gid *gid, const u8 *mac, u16 vlan_id, bool update, u32 *index); int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, - struct bnxt_qplib_gid *gid, u16 gid_idx, u8 *smac); + struct bnxt_qplib_gid *gid, u16 gid_idx, + const u8 *smac); int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index, u16 *pkey); @@ -263,4 +289,7 @@ int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res, int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids); int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_roce_stats *stats); +int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid, + struct bnxt_qplib_ext_stat *estat); + #endif /* __BNXT_QPLIB_SP_H__*/ diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 3e40e0d76efd..ecb719098b75 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -1102,6 +1102,7 @@ struct cmdq_base { #define CMDQ_BASE_OPCODE_MODIFY_CC 0x8cUL #define CMDQ_BASE_OPCODE_QUERY_CC 0x8dUL #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS 0x8eUL + #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL u8 cmd_size; __le16 flags; __le16 cookie; @@ -1127,6 +1128,10 @@ struct cmdq_create_qp { #define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL #define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL #define CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED 0x10UL + #define CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED 0x80UL + #define CMDQ_CREATE_QP_QP_FLAGS_LAST \ + CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED + u8 type; #define CMDQ_CREATE_QP_TYPE_RC 0x2UL #define CMDQ_CREATE_QP_TYPE_UD 0x4UL @@ -2848,6 +2853,7 @@ struct creq_query_func_resp_sb { __le16 max_qp_wr; __le16 dev_cap_flags; #define CREQ_QUERY_FUNC_RESP_SB_DEV_CAP_FLAGS_RESIZE_QP 0x1UL + #define CREQ_QUERY_FUNC_RESP_SB_EXT_STATS 0x10UL __le32 max_cq; __le32 max_cqe; __le32 max_pd; @@ -3087,6 +3093,85 @@ struct creq_query_roce_stats_resp_sb { __le64 active_qp_count_p3; }; +/* cmdq_query_roce_stats_ext (size:192b/24B) */ +struct cmdq_query_roce_stats_ext { + u8 opcode; + #define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS 0x92UL + #define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_LAST \ + CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS + u8 cmd_size; + __le16 flags; + #define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_COLLECTION_ID 0x1UL + #define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID 0x2UL + __le16 cookie; + u8 resp_size; + u8 collection_id; + __le64 resp_addr; + __le32 function_id; + #define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_MASK 0xffUL + #define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_SFT 0 + #define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_MASK 0xffff00UL + #define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_SFT 8 + #define CMDQ_QUERY_ROCE_STATS_EXT_VF_VALID 0x1000000UL + __le32 reserved32; +}; + +/* creq_query_roce_stats_ext_resp (size:128b/16B) */ +struct creq_query_roce_stats_ext_resp { + u8 type; + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_MASK 0x3fUL + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_SFT 0 + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT 0x38UL + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_LAST \ + CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT + u8 status; + __le16 cookie; + __le32 size; + u8 v; + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_V 0x1UL + u8 event; + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT 0x92UL + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_LAST \ + CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT + u8 reserved48[6]; +}; + +/* creq_query_roce_stats_ext_resp_sb (size:1536b/192B) */ +struct creq_query_roce_stats_ext_resp_sb { + u8 opcode; + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL + #define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_LAST \ + CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT + u8 status; + __le16 cookie; + __le16 flags; + u8 resp_size; + u8 rsvd; + __le64 tx_atomic_req_pkts; + __le64 tx_read_req_pkts; + __le64 tx_read_res_pkts; + __le64 tx_write_req_pkts; + __le64 tx_send_req_pkts; + __le64 tx_roce_pkts; + __le64 tx_roce_bytes; + __le64 rx_atomic_req_pkts; + __le64 rx_read_req_pkts; + __le64 rx_read_res_pkts; + __le64 rx_write_req_pkts; + __le64 rx_send_req_pkts; + __le64 rx_roce_pkts; + __le64 rx_roce_bytes; + __le64 rx_roce_good_pkts; + __le64 rx_roce_good_bytes; + __le64 rx_out_of_buffer_pkts; + __le64 rx_out_of_sequence_pkts; + __le64 tx_cnp_pkts; + __le64 rx_cnp_pkts; + __le64 rx_ecn_marked_pkts; + __le64 tx_cnp_bytes; + __le64 rx_cnp_bytes; +}; + /* QP error notification event (16 bytes) */ struct creq_qp_error_notification { u8 type; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 291471d12197..913f39ee4416 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -4464,6 +4464,5 @@ int __init c4iw_cm_init(void) void c4iw_cm_term(void) { WARN_ON(!list_empty(&timeout_list)); - flush_workqueue(workq); destroy_workqueue(workq); } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 541dbcf22d0e..80970a1738f8 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -1562,7 +1562,6 @@ static void __exit c4iw_exit_module(void) kfree(ctx); } mutex_unlock(&dev_mutex); - flush_workqueue(reg_workq); destroy_workqueue(reg_workq); cxgb4_unregister_uld(CXGB4_ULD_RDMA); c4iw_cm_term(); diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index e7337662aff8..0c8fd5a85fcb 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -366,23 +366,23 @@ enum counters { NR_COUNTERS }; -static const char * const names[] = { - [IP4INSEGS] = "ip4InSegs", - [IP4OUTSEGS] = "ip4OutSegs", - [IP4RETRANSSEGS] = "ip4RetransSegs", - [IP4OUTRSTS] = "ip4OutRsts", - [IP6INSEGS] = "ip6InSegs", - [IP6OUTSEGS] = "ip6OutSegs", - [IP6RETRANSSEGS] = "ip6RetransSegs", - [IP6OUTRSTS] = "ip6OutRsts" +static const struct rdma_stat_desc cxgb4_descs[] = { + [IP4INSEGS].name = "ip4InSegs", + [IP4OUTSEGS].name = "ip4OutSegs", + [IP4RETRANSSEGS].name = "ip4RetransSegs", + [IP4OUTRSTS].name = "ip4OutRsts", + [IP6INSEGS].name = "ip6InSegs", + [IP6OUTSEGS].name = "ip6OutSegs", + [IP6RETRANSSEGS].name = "ip6RetransSegs", + [IP6OUTRSTS].name = "ip6OutRsts" }; static struct rdma_hw_stats *c4iw_alloc_device_stats(struct ib_device *ibdev) { - BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS); + BUILD_BUG_ON(ARRAY_SIZE(cxgb4_descs) != NR_COUNTERS); /* FIXME: these look like port stats */ - return rdma_alloc_hw_stats_struct(names, NR_COUNTERS, + return rdma_alloc_hw_stats_struct(cxgb4_descs, NR_COUNTERS, RDMA_HW_STATS_DEFAULT_LIFESPAN); } diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 87b1dadeb7fe..7352a1f5d811 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -20,14 +20,14 @@ #define EFA_IRQNAME_SIZE 40 -/* 1 for AENQ + ADMIN */ -#define EFA_NUM_MSIX_VEC 1 #define EFA_MGMNT_MSIX_VEC_IDX 0 +#define EFA_COMP_EQS_VEC_BASE 1 struct efa_irq { irq_handler_t handler; void *data; u32 irqn; + u32 vector; cpumask_t affinity_hint_mask; char name[EFA_IRQNAME_SIZE]; }; @@ -61,6 +61,13 @@ struct efa_dev { struct efa_irq admin_irq; struct efa_stats stats; + + /* Array of completion EQs */ + struct efa_eq *eqs; + unsigned int neqs; + + /* Only stores CQs with interrupts enabled */ + struct xarray cqs_xa; }; struct efa_ucontext { @@ -84,8 +91,11 @@ struct efa_cq { dma_addr_t dma_addr; void *cpu_addr; struct rdma_user_mmap_entry *mmap_entry; + struct rdma_user_mmap_entry *db_mmap_entry; size_t size; u16 cq_idx; + /* NULL when no interrupts requested */ + struct efa_eq *eq; }; struct efa_qp { @@ -116,6 +126,11 @@ struct efa_ah { u8 id[EFA_GID_SIZE]; }; +struct efa_eq { + struct efa_com_eq eeq; + struct efa_irq irq; +}; + int efa_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *udata); @@ -139,6 +154,10 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata); int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num, struct ib_port_immutable *immutable); diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index fa38b34eddb8..0b0b93b529f3 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -28,7 +28,9 @@ enum efa_admin_aq_opcode { EFA_ADMIN_DEALLOC_PD = 15, EFA_ADMIN_ALLOC_UAR = 16, EFA_ADMIN_DEALLOC_UAR = 17, - EFA_ADMIN_MAX_OPCODE = 17, + EFA_ADMIN_CREATE_EQ = 18, + EFA_ADMIN_DESTROY_EQ = 19, + EFA_ADMIN_MAX_OPCODE = 19, }; enum efa_admin_aq_feature_id { @@ -38,6 +40,7 @@ enum efa_admin_aq_feature_id { EFA_ADMIN_QUEUE_ATTR = 4, EFA_ADMIN_HW_HINTS = 5, EFA_ADMIN_HOST_INFO = 6, + EFA_ADMIN_EVENT_QUEUE_ATTR = 7, }; /* QP transport type */ @@ -430,8 +433,8 @@ struct efa_admin_create_cq_cmd { /* * 4:0 : reserved5 - MBZ * 5 : interrupt_mode_enabled - if set, cq operates - * in interrupt mode (i.e. CQ events and MSI-X are - * generated), otherwise - polling + * in interrupt mode (i.e. CQ events and EQ elements + * are generated), otherwise - polling * 6 : virt - If set, ring base address is virtual * (IOVA returned by MR registration) * 7 : reserved6 - MBZ @@ -448,8 +451,11 @@ struct efa_admin_create_cq_cmd { /* completion queue depth in # of entries. must be power of 2 */ u16 cq_depth; - /* msix vector assigned to this cq */ - u32 msix_vector_idx; + /* EQ number assigned to this cq */ + u16 eqn; + + /* MBZ */ + u16 reserved; /* * CQ ring base address, virtual or physical depending on 'virt' @@ -480,6 +486,15 @@ struct efa_admin_create_cq_resp { /* actual cq depth in number of entries */ u16 cq_actual_depth; + + /* CQ doorbell address, as offset to PCIe DB BAR */ + u32 db_offset; + + /* + * 0 : db_valid - If set, doorbell offset is valid. + * Always set when interrupts are requested. + */ + u32 flags; }; struct efa_admin_destroy_cq_cmd { @@ -669,6 +684,17 @@ struct efa_admin_feature_queue_attr_desc { u16 max_tx_batch; }; +struct efa_admin_event_queue_attr_desc { + /* The maximum number of event queues supported */ + u32 max_eq; + + /* Maximum number of EQEs per Event Queue */ + u32 max_eq_depth; + + /* Supported events bitmask */ + u32 event_bitmask; +}; + struct efa_admin_feature_aenq_desc { /* bitmask for AENQ groups the device can report */ u32 supported_groups; @@ -727,6 +753,8 @@ struct efa_admin_get_feature_resp { struct efa_admin_feature_queue_attr_desc queue_attr; + struct efa_admin_event_queue_attr_desc event_queue_attr; + struct efa_admin_hw_hints hw_hints; } u; }; @@ -810,6 +838,60 @@ struct efa_admin_dealloc_uar_resp { struct efa_admin_acq_common_desc acq_common_desc; }; +struct efa_admin_create_eq_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + /* Size of the EQ in entries, must be power of 2 */ + u16 depth; + + /* MSI-X table entry index */ + u8 msix_vec; + + /* + * 4:0 : entry_size_words - size of EQ entry in + * 32-bit words + * 7:5 : reserved - MBZ + */ + u8 caps; + + /* EQ ring base address */ + struct efa_common_mem_addr ba; + + /* + * Enabled events on this EQ + * 0 : completion_events - Enable completion events + * 31:1 : reserved - MBZ + */ + u32 event_bitmask; + + /* MBZ */ + u32 reserved; +}; + +struct efa_admin_create_eq_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + /* EQ number */ + u16 eqn; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_destroy_eq_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + /* EQ number */ + u16 eqn; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_destroy_eq_resp { + struct efa_admin_acq_common_desc acq_common_desc; +}; + /* asynchronous event notification groups */ enum efa_admin_aenq_group { EFA_ADMIN_FATAL_ERROR = 1, @@ -899,10 +981,18 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) +/* create_cq_resp */ +#define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0) + /* feature_device_attr_desc */ #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) +/* create_eq_cmd */ +#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) +#define EFA_ADMIN_CREATE_EQ_CMD_VIRT_MASK BIT(6) +#define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK BIT(0) + /* host_info */ #define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0) #define EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR_MASK GENMASK(15, 8) diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h index 78ff9389ae25..83f20c38a840 100644 --- a/drivers/infiniband/hw/efa/efa_admin_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h @@ -118,6 +118,43 @@ struct efa_admin_aenq_entry { u32 inline_data_w4[12]; }; +enum efa_admin_eqe_event_type { + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION = 0, +}; + +/* Completion event */ +struct efa_admin_comp_event { + /* CQ number */ + u16 cqn; + + /* MBZ */ + u16 reserved; + + /* MBZ */ + u32 reserved2; +}; + +/* Event Queue Element */ +struct efa_admin_eqe { + /* + * 0 : phase + * 8:1 : event_type - Event type + * 31:9 : reserved - MBZ + */ + u32 common; + + /* MBZ */ + u32 reserved; + + union { + /* Event data */ + u32 event_data[2]; + + /* Completion Event */ + struct efa_admin_comp_event comp_event; + } u; +}; + /* aq_common_desc */ #define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) #define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) @@ -131,4 +168,8 @@ struct efa_admin_aenq_entry { /* aenq_common_desc */ #define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) +/* eqe */ +#define EFA_ADMIN_EQE_PHASE_MASK BIT(0) +#define EFA_ADMIN_EQE_EVENT_TYPE_MASK GENMASK(8, 1) + #endif /* _EFA_ADMIN_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 0d523ad736c7..16a24a05fc2a 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -56,11 +56,19 @@ static const char *efa_com_cmd_str(u8 cmd) EFA_CMD_STR_CASE(DEALLOC_PD); EFA_CMD_STR_CASE(ALLOC_UAR); EFA_CMD_STR_CASE(DEALLOC_UAR); + EFA_CMD_STR_CASE(CREATE_EQ); + EFA_CMD_STR_CASE(DESTROY_EQ); default: return "unknown command opcode"; } #undef EFA_CMD_STR_CASE } +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) +{ + *addr_low = lower_32_bits(addr); + *addr_high = upper_32_bits(addr); +} + static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) { struct efa_com_mmio_read *mmio_read = &edev->mmio_read; @@ -1081,3 +1089,159 @@ int efa_com_dev_reset(struct efa_com_dev *edev, return 0; } + +static int efa_com_create_eq(struct efa_com_dev *edev, + struct efa_com_create_eq_params *params, + struct efa_com_create_eq_result *result) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_create_eq_resp resp = {}; + struct efa_admin_create_eq_cmd cmd = {}; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_CREATE_EQ; + EFA_SET(&cmd.caps, EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS, + params->entry_size_in_bytes / 4); + cmd.depth = params->depth; + cmd.event_bitmask = params->event_bitmask; + cmd.msix_vec = params->msix_vec; + + efa_com_set_dma_addr(params->dma_addr, &cmd.ba.mem_addr_high, + &cmd.ba.mem_addr_low); + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err_ratelimited(edev->efa_dev, + "Failed to create eq[%d]\n", err); + return err; + } + + result->eqn = resp.eqn; + + return 0; +} + +static void efa_com_destroy_eq(struct efa_com_dev *edev, + struct efa_com_destroy_eq_params *params) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_destroy_eq_resp resp = {}; + struct efa_admin_destroy_eq_cmd cmd = {}; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_DESTROY_EQ; + cmd.eqn = params->eqn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) + ibdev_err_ratelimited(edev->efa_dev, + "Failed to destroy EQ-%u [%d]\n", cmd.eqn, + err); +} + +static void efa_com_arm_eq(struct efa_com_dev *edev, struct efa_com_eq *eeq) +{ + u32 val = 0; + + EFA_SET(&val, EFA_REGS_EQ_DB_EQN, eeq->eqn); + EFA_SET(&val, EFA_REGS_EQ_DB_ARM, 1); + + writel(val, edev->reg_bar + EFA_REGS_EQ_DB_OFF); +} + +void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev, + struct efa_com_eq *eeq) +{ + struct efa_admin_eqe *eqe; + u32 processed = 0; + u8 phase; + u32 ci; + + ci = eeq->cc & (eeq->depth - 1); + phase = eeq->phase; + eqe = &eeq->eqes[ci]; + + /* Go over all the events */ + while ((READ_ONCE(eqe->common) & EFA_ADMIN_EQE_PHASE_MASK) == phase) { + /* + * Do not read the rest of the completion entry before the + * phase bit was validated + */ + dma_rmb(); + + eeq->cb(eeq, eqe); + + /* Get next event entry */ + ci++; + processed++; + + if (ci == eeq->depth) { + ci = 0; + phase = !phase; + } + + eqe = &eeq->eqes[ci]; + } + + eeq->cc += processed; + eeq->phase = phase; + efa_com_arm_eq(eeq->edev, eeq); +} + +void efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq) +{ + struct efa_com_destroy_eq_params params = { + .eqn = eeq->eqn, + }; + + efa_com_destroy_eq(edev, ¶ms); + dma_free_coherent(edev->dmadev, eeq->depth * sizeof(*eeq->eqes), + eeq->eqes, eeq->dma_addr); +} + +int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq, + efa_eqe_handler cb, u16 depth, u8 msix_vec) +{ + struct efa_com_create_eq_params params = {}; + struct efa_com_create_eq_result result = {}; + int err; + + params.depth = depth; + params.entry_size_in_bytes = sizeof(*eeq->eqes); + EFA_SET(¶ms.event_bitmask, + EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS, 1); + params.msix_vec = msix_vec; + + eeq->eqes = dma_alloc_coherent(edev->dmadev, + params.depth * sizeof(*eeq->eqes), + ¶ms.dma_addr, GFP_KERNEL); + if (!eeq->eqes) + return -ENOMEM; + + err = efa_com_create_eq(edev, ¶ms, &result); + if (err) + goto err_free_coherent; + + eeq->eqn = result.eqn; + eeq->edev = edev; + eeq->dma_addr = params.dma_addr; + eeq->phase = 1; + eeq->depth = params.depth; + eeq->cb = cb; + efa_com_arm_eq(edev, eeq); + + return 0; + +err_free_coherent: + dma_free_coherent(edev->dmadev, params.depth * sizeof(*eeq->eqes), + eeq->eqes, params.dma_addr); + return err; +} diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h index 5e4c88877ddb..77282234ce68 100644 --- a/drivers/infiniband/hw/efa/efa_com.h +++ b/drivers/infiniband/hw/efa/efa_com.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_H_ @@ -80,6 +80,9 @@ struct efa_com_admin_queue { }; struct efa_aenq_handlers; +struct efa_com_eq; +typedef void (*efa_eqe_handler)(struct efa_com_eq *eeq, + struct efa_admin_eqe *eqe); struct efa_com_aenq { struct efa_admin_aenq_entry *entries; @@ -112,6 +115,33 @@ struct efa_com_dev { struct efa_com_mmio_read mmio_read; }; +struct efa_com_eq { + struct efa_com_dev *edev; + struct efa_admin_eqe *eqes; + dma_addr_t dma_addr; + u32 cc; /* Consumer counter */ + u16 eqn; + u16 depth; + u8 phase; + efa_eqe_handler cb; +}; + +struct efa_com_create_eq_params { + dma_addr_t dma_addr; + u32 event_bitmask; + u16 depth; + u8 entry_size_in_bytes; + u8 msix_vec; +}; + +struct efa_com_create_eq_result { + u16 eqn; +}; + +struct efa_com_destroy_eq_params { + u16 eqn; +}; + typedef void (*efa_aenq_handler)(void *data, struct efa_admin_aenq_entry *aenq_e); @@ -121,9 +151,13 @@ struct efa_aenq_handlers { efa_aenq_handler unimplemented_handler; }; +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); int efa_com_admin_init(struct efa_com_dev *edev, struct efa_aenq_handlers *aenq_handlers); void efa_com_admin_destroy(struct efa_com_dev *edev); +int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq, + efa_eqe_handler cb, u16 depth, u8 msix_vec); +void efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq); int efa_com_dev_reset(struct efa_com_dev *edev, enum efa_regs_reset_reason_types reset_reason); void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling); @@ -140,5 +174,7 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, struct efa_admin_acq_entry *comp, size_t comp_size); void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data); +void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev, + struct efa_com_eq *eeq); #endif /* _EFA_COM_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index f752ef64159c..fb405da4e1db 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -1,17 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" #include "efa_com_cmd.h" -void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) -{ - *addr_low = lower_32_bits(addr); - *addr_high = upper_32_bits(addr); -} - int efa_com_create_qp(struct efa_com_dev *edev, struct efa_com_create_qp_params *params, struct efa_com_create_qp_result *res) @@ -157,7 +151,7 @@ int efa_com_create_cq(struct efa_com_dev *edev, struct efa_com_create_cq_params *params, struct efa_com_create_cq_result *result) { - struct efa_admin_create_cq_resp cmd_completion; + struct efa_admin_create_cq_resp cmd_completion = {}; struct efa_admin_create_cq_cmd create_cmd = {}; struct efa_com_admin_queue *aq = &edev->aq; int err; @@ -169,6 +163,11 @@ int efa_com_create_cq(struct efa_com_dev *edev, create_cmd.cq_depth = params->cq_depth; create_cmd.num_sub_cqs = params->num_sub_cqs; create_cmd.uar = params->uarn; + if (params->interrupt_mode_enabled) { + EFA_SET(&create_cmd.cq_caps_1, + EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1); + create_cmd.eqn = params->eqn; + } efa_com_set_dma_addr(params->dma_addr, &create_cmd.cq_ba.mem_addr_high, @@ -187,6 +186,9 @@ int efa_com_create_cq(struct efa_com_dev *edev, result->cq_idx = cmd_completion.cq_idx; result->actual_depth = params->cq_depth; + result->db_off = cmd_completion.db_offset; + result->db_valid = EFA_GET(&cmd_completion.flags, + EFA_ADMIN_CREATE_CQ_RESP_DB_VALID); return 0; } @@ -497,6 +499,23 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, sizeof(resp.u.network_attr.addr)); result->mtu = resp.u.network_attr.mtu; + if (efa_com_check_supported_feature_id(edev, + EFA_ADMIN_EVENT_QUEUE_ATTR)) { + err = efa_com_get_feature(edev, &resp, + EFA_ADMIN_EVENT_QUEUE_ATTR); + if (err) { + ibdev_err_ratelimited( + edev->efa_dev, + "Failed to get event queue attributes %d\n", + err); + return err; + } + + result->max_eq = resp.u.event_queue_attr.max_eq; + result->max_eq_depth = resp.u.event_queue_attr.max_eq_depth; + result->event_bitmask = resp.u.event_queue_attr.event_bitmask; + } + return 0; } diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index eea4ebfbe6ec..c33010bbf9e8 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_CMD_H_ @@ -73,7 +73,9 @@ struct efa_com_create_cq_params { u16 cq_depth; u16 num_sub_cqs; u16 uarn; + u16 eqn; u8 entry_size_in_bytes; + bool interrupt_mode_enabled; }; struct efa_com_create_cq_result { @@ -81,6 +83,8 @@ struct efa_com_create_cq_result { u16 cq_idx; /* actual cq depth in # of entries */ u16 actual_depth; + u32 db_off; + bool db_valid; }; struct efa_com_destroy_cq_params { @@ -125,6 +129,9 @@ struct efa_com_get_device_attr_result { u32 max_llq_size; u32 max_rdma_size; u32 device_caps; + u32 max_eq; + u32 max_eq_depth; + u32 event_bitmask; /* EQ events bitmask */ u16 sub_cqs_per_cq; u16 max_sq_sge; u16 max_rq_sge; @@ -260,7 +267,6 @@ union efa_com_get_stats_result { struct efa_com_rdma_read_stats rdma_read_stats; }; -void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); int efa_com_create_qp(struct efa_com_dev *edev, struct efa_com_create_qp_params *params, struct efa_com_create_qp_result *res); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 417dea5f90cf..94b94cca4870 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -67,6 +67,47 @@ static void efa_release_bars(struct efa_dev *dev, int bars_mask) pci_release_selected_regions(pdev, release_bars); } +static void efa_process_comp_eqe(struct efa_dev *dev, struct efa_admin_eqe *eqe) +{ + u16 cqn = eqe->u.comp_event.cqn; + struct efa_cq *cq; + + /* Safe to load as we're in irq and removal calls synchronize_irq() */ + cq = xa_load(&dev->cqs_xa, cqn); + if (unlikely(!cq)) { + ibdev_err_ratelimited(&dev->ibdev, + "Completion event on non-existent CQ[%u]", + cqn); + return; + } + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + +static void efa_process_eqe(struct efa_com_eq *eeq, struct efa_admin_eqe *eqe) +{ + struct efa_dev *dev = container_of(eeq->edev, struct efa_dev, edev); + + if (likely(EFA_GET(&eqe->common, EFA_ADMIN_EQE_EVENT_TYPE) == + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION)) + efa_process_comp_eqe(dev, eqe); + else + ibdev_err_ratelimited(&dev->ibdev, + "Unknown event type received %lu", + EFA_GET(&eqe->common, + EFA_ADMIN_EQE_EVENT_TYPE)); +} + +static irqreturn_t efa_intr_msix_comp(int irq, void *data) +{ + struct efa_eq *eq = data; + struct efa_com_dev *edev = eq->eeq.edev; + + efa_com_eq_comp_intr_handler(edev, &eq->eeq); + + return IRQ_HANDLED; +} + static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) { struct efa_dev *dev = data; @@ -77,26 +118,43 @@ static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) return IRQ_HANDLED; } -static int efa_request_mgmnt_irq(struct efa_dev *dev) +static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq) { - struct efa_irq *irq; int err; - irq = &dev->admin_irq; err = request_irq(irq->irqn, irq->handler, 0, irq->name, irq->data); if (err) { - dev_err(&dev->pdev->dev, "Failed to request admin irq (%d)\n", - err); + dev_err(&dev->pdev->dev, "Failed to request irq %s (%d)\n", + irq->name, err); return err; } - dev_dbg(&dev->pdev->dev, "Set affinity hint of mgmnt irq to %*pbl (irq vector: %d)\n", - nr_cpumask_bits, &irq->affinity_hint_mask, irq->irqn); irq_set_affinity_hint(irq->irqn, &irq->affinity_hint_mask); return 0; } +static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, + int vector) +{ + u32 cpu; + + cpu = vector - EFA_COMP_EQS_VEC_BASE; + snprintf(eq->irq.name, EFA_IRQNAME_SIZE, "efa-comp%d@pci:%s", cpu, + pci_name(dev->pdev)); + eq->irq.handler = efa_intr_msix_comp; + eq->irq.data = eq; + eq->irq.vector = vector; + eq->irq.irqn = pci_irq_vector(dev->pdev, vector); + cpumask_set_cpu(cpu, &eq->irq.affinity_hint_mask); +} + +static void efa_free_irq(struct efa_dev *dev, struct efa_irq *irq) +{ + irq_set_affinity_hint(irq->irqn, NULL); + free_irq(irq->irqn, irq->data); +} + static void efa_setup_mgmnt_irq(struct efa_dev *dev) { u32 cpu; @@ -105,8 +163,9 @@ static void efa_setup_mgmnt_irq(struct efa_dev *dev) "efa-mgmnt@pci:%s", pci_name(dev->pdev)); dev->admin_irq.handler = efa_intr_msix_mgmnt; dev->admin_irq.data = dev; - dev->admin_irq.irqn = - pci_irq_vector(dev->pdev, dev->admin_msix_vector_idx); + dev->admin_irq.vector = dev->admin_msix_vector_idx; + dev->admin_irq.irqn = pci_irq_vector(dev->pdev, + dev->admin_msix_vector_idx); cpu = cpumask_first(cpu_online_mask); cpumask_set_cpu(cpu, &dev->admin_irq.affinity_hint_mask); @@ -115,20 +174,11 @@ static void efa_setup_mgmnt_irq(struct efa_dev *dev) dev->admin_irq.name); } -static void efa_free_mgmnt_irq(struct efa_dev *dev) -{ - struct efa_irq *irq; - - irq = &dev->admin_irq; - irq_set_affinity_hint(irq->irqn, NULL); - free_irq(irq->irqn, irq->data); -} - static int efa_set_mgmnt_irq(struct efa_dev *dev) { efa_setup_mgmnt_irq(dev); - return efa_request_mgmnt_irq(dev); + return efa_request_irq(dev, &dev->admin_irq); } static int efa_request_doorbell_bar(struct efa_dev *dev) @@ -234,6 +284,72 @@ static void efa_set_host_info(struct efa_dev *dev) dma_free_coherent(&dev->pdev->dev, bufsz, hinf, hinf_dma); } +static void efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq) +{ + efa_com_eq_destroy(&dev->edev, &eq->eeq); + efa_free_irq(dev, &eq->irq); +} + +static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u8 msix_vec) +{ + int err; + + efa_setup_comp_irq(dev, eq, msix_vec); + err = efa_request_irq(dev, &eq->irq); + if (err) + return err; + + err = efa_com_eq_init(&dev->edev, &eq->eeq, efa_process_eqe, + dev->dev_attr.max_eq_depth, msix_vec); + if (err) + goto err_free_comp_irq; + + return 0; + +err_free_comp_irq: + efa_free_irq(dev, &eq->irq); + return err; +} + +static int efa_create_eqs(struct efa_dev *dev) +{ + unsigned int neqs = dev->dev_attr.max_eq; + int err; + int i; + + neqs = min_t(unsigned int, neqs, num_online_cpus()); + dev->neqs = neqs; + dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL); + if (!dev->eqs) + return -ENOMEM; + + for (i = 0; i < neqs; i++) { + err = efa_create_eq(dev, &dev->eqs[i], + i + EFA_COMP_EQS_VEC_BASE); + if (err) + goto err_destroy_eqs; + } + + return 0; + +err_destroy_eqs: + for (i--; i >= 0; i--) + efa_destroy_eq(dev, &dev->eqs[i]); + kfree(dev->eqs); + + return err; +} + +static void efa_destroy_eqs(struct efa_dev *dev) +{ + int i; + + for (i = 0; i < dev->neqs; i++) + efa_destroy_eq(dev, &dev->eqs[i]); + + kfree(dev->eqs); +} + static const struct ib_device_ops efa_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_EFA, @@ -264,6 +380,7 @@ static const struct ib_device_ops efa_dev_ops = { .query_port = efa_query_port, .query_qp = efa_query_qp, .reg_user_mr = efa_reg_mr, + .reg_user_mr_dmabuf = efa_reg_user_mr_dmabuf, INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, efa_cq, ibcq), @@ -300,23 +417,29 @@ static int efa_ib_device_add(struct efa_dev *dev) if (err) goto err_release_doorbell_bar; + err = efa_create_eqs(dev); + if (err) + goto err_release_doorbell_bar; + efa_set_host_info(dev); dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED; dev->ibdev.phys_port_cnt = 1; - dev->ibdev.num_comp_vectors = 1; + dev->ibdev.num_comp_vectors = dev->neqs ?: 1; dev->ibdev.dev.parent = &pdev->dev; ib_set_device_ops(&dev->ibdev, &efa_dev_ops); err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); if (err) - goto err_release_doorbell_bar; + goto err_destroy_eqs; ibdev_info(&dev->ibdev, "IB device registered\n"); return 0; +err_destroy_eqs: + efa_destroy_eqs(dev); err_release_doorbell_bar: efa_release_doorbell_bar(dev); return err; @@ -324,9 +447,10 @@ err_release_doorbell_bar: static void efa_ib_device_remove(struct efa_dev *dev) { - efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); ibdev_info(&dev->ibdev, "Unregister ib device\n"); ib_unregister_device(&dev->ibdev); + efa_destroy_eqs(dev); + efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); efa_release_doorbell_bar(dev); } @@ -339,8 +463,12 @@ static int efa_enable_msix(struct efa_dev *dev) { int msix_vecs, irq_num; - /* Reserve the max msix vectors we might need */ - msix_vecs = EFA_NUM_MSIX_VEC; + /* + * Reserve the max msix vectors we might need, one vector is reserved + * for admin. + */ + msix_vecs = min_t(int, pci_msix_vec_count(dev->pdev), + num_online_cpus() + 1); dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n", msix_vecs); @@ -421,6 +549,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev) edev->efa_dev = dev; edev->dmadev = &pdev->dev; dev->pdev = pdev; + xa_init(&dev->cqs_xa); bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK; err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); @@ -476,7 +605,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev) return dev; err_free_mgmnt_irq: - efa_free_mgmnt_irq(dev); + efa_free_irq(dev, &dev->admin_irq); err_disable_msix: efa_disable_msix(dev); err_reg_read_destroy: @@ -499,11 +628,12 @@ static void efa_remove_device(struct pci_dev *pdev) edev = &dev->edev; efa_com_admin_destroy(edev); - efa_free_mgmnt_irq(dev); + efa_free_irq(dev, &dev->admin_irq); efa_disable_msix(dev); efa_com_mmio_reg_read_destroy(edev); devm_iounmap(&pdev->dev, edev->reg_bar); efa_release_bars(dev, EFA_BASE_BAR_MASK); + xa_destroy(&dev->cqs_xa); ib_dealloc_device(&dev->ibdev); pci_disable_device(pdev); } diff --git a/drivers/infiniband/hw/efa/efa_regs_defs.h b/drivers/infiniband/hw/efa/efa_regs_defs.h index 4017982fe13b..714ae6258800 100644 --- a/drivers/infiniband/hw/efa/efa_regs_defs.h +++ b/drivers/infiniband/hw/efa/efa_regs_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_REGS_H_ @@ -42,6 +42,7 @@ enum efa_regs_reset_reason_types { #define EFA_REGS_MMIO_REG_READ_OFF 0x5c #define EFA_REGS_MMIO_RESP_LO_OFF 0x60 #define EFA_REGS_MMIO_RESP_HI_OFF 0x64 +#define EFA_REGS_EQ_DB_OFF 0x68 /* version register */ #define EFA_REGS_VERSION_MINOR_VERSION_MASK 0xff @@ -93,4 +94,8 @@ enum efa_regs_reset_reason_types { #define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff #define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 +/* eq_db register */ +#define EFA_REGS_EQ_DB_EQN_MASK 0xffff +#define EFA_REGS_EQ_DB_ARM_MASK 0x80000000 + #endif /* _EFA_REGS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index e5f9d90aad5e..ecfe70eb5efb 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -3,6 +3,8 @@ * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> #include <linux/vmalloc.h> #include <linux/log2.h> @@ -60,13 +62,14 @@ struct efa_user_mmap_entry { op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \ #define EFA_STATS_ENUM(ename, name) ename, -#define EFA_STATS_STR(ename, name) [ename] = name, +#define EFA_STATS_STR(ename, nam) \ + [ename].name = nam, enum efa_hw_device_stats { EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM) }; -static const char *const efa_device_stats_names[] = { +static const struct rdma_stat_desc efa_device_stats_descs[] = { EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR) }; @@ -74,7 +77,7 @@ enum efa_hw_port_stats { EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM) }; -static const char *const efa_port_stats_names[] = { +static const struct rdma_stat_desc efa_port_stats_descs[] = { EFA_DEFINE_PORT_STATS(EFA_STATS_STR) }; @@ -245,6 +248,9 @@ int efa_query_device(struct ib_device *ibdev, if (EFA_DEV_CAP(dev, RNR_RETRY)) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY; + if (dev->neqs) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS; + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { @@ -984,6 +990,12 @@ static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) return efa_com_destroy_cq(&dev->edev, ¶ms); } +static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq) +{ + rdma_user_mmap_entry_remove(cq->db_mmap_entry); + rdma_user_mmap_entry_remove(cq->mmap_entry); +} + int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibcq->device); @@ -993,15 +1005,25 @@ int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); - rdma_user_mmap_entry_remove(cq->mmap_entry); + efa_cq_user_mmap_entries_remove(cq); efa_destroy_cq_idx(dev, cq->cq_idx); + if (cq->eq) { + xa_erase(&dev->cqs_xa, cq->cq_idx); + synchronize_irq(cq->eq->irq.irqn); + } efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE); return 0; } +static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec) +{ + return &dev->eqs[vec]; +} + static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, - struct efa_ibv_create_cq_resp *resp) + struct efa_ibv_create_cq_resp *resp, + bool db_valid) { resp->q_mmap_size = cq->size; cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, @@ -1011,6 +1033,21 @@ static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, if (!cq->mmap_entry) return -ENOMEM; + if (db_valid) { + cq->db_mmap_entry = + efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, + dev->db_bar_addr + resp->db_off, + PAGE_SIZE, EFA_MMAP_IO_NC, + &resp->db_mmap_key); + if (!cq->db_mmap_entry) { + rdma_user_mmap_entry_remove(cq->mmap_entry); + return -ENOMEM; + } + + resp->db_off &= ~PAGE_MASK; + resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF; + } + return 0; } @@ -1019,8 +1056,8 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, { struct efa_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct efa_ucontext, ibucontext); + struct efa_com_create_cq_params params = {}; struct efa_ibv_create_cq_resp resp = {}; - struct efa_com_create_cq_params params; struct efa_com_create_cq_result result; struct ib_device *ibdev = ibcq->device; struct efa_dev *dev = to_edev(ibdev); @@ -1065,7 +1102,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_out; } - if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) { + if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) { ibdev_dbg(ibdev, "Incompatible ABI params, unknown fields in udata\n"); err = -EINVAL; @@ -1101,29 +1138,45 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, params.dma_addr = cq->dma_addr; params.entry_size_in_bytes = cmd.cq_entry_size; params.num_sub_cqs = cmd.num_sub_cqs; + if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) { + cq->eq = efa_vec2eq(dev, attr->comp_vector); + params.eqn = cq->eq->eeq.eqn; + params.interrupt_mode_enabled = true; + } + err = efa_com_create_cq(&dev->edev, ¶ms, &result); if (err) goto err_free_mapped; + resp.db_off = result.db_off; resp.cq_idx = result.cq_idx; cq->cq_idx = result.cq_idx; cq->ibcq.cqe = result.actual_depth; WARN_ON_ONCE(entries != result.actual_depth); - err = cq_mmap_entries_setup(dev, cq, &resp); + err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid); if (err) { ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n", cq->cq_idx); goto err_destroy_cq; } + if (cq->eq) { + err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL)); + if (err) { + ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n", + cq->cq_idx); + goto err_remove_mmap; + } + } + if (udata->outlen) { err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { ibdev_dbg(ibdev, "Failed to copy udata for create_cq\n"); - goto err_remove_mmap; + goto err_xa_erase; } } @@ -1132,8 +1185,11 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; +err_xa_erase: + if (cq->eq) + xa_erase(&dev->cqs_xa, cq->cq_idx); err_remove_mmap: - rdma_user_mmap_entry_remove(cq->mmap_entry); + efa_cq_user_mmap_entries_remove(cq); err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); err_free_mapped: @@ -1490,26 +1546,18 @@ static int efa_create_pbl(struct efa_dev *dev, return 0; } -struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, - u64 virt_addr, int access_flags, - struct ib_udata *udata) +static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags, + struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibpd->device); - struct efa_com_reg_mr_params params = {}; - struct efa_com_reg_mr_result result = {}; - struct pbl_context pbl; int supp_access_flags; - unsigned int pg_sz; struct efa_mr *mr; - int inline_size; - int err; if (udata && udata->inlen && !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { ibdev_dbg(&dev->ibdev, "Incompatible ABI params, udata not cleared\n"); - err = -EINVAL; - goto err_out; + return ERR_PTR(-EINVAL); } supp_access_flags = @@ -1521,23 +1569,26 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, ibdev_dbg(&dev->ibdev, "Unsupported access flags[%#x], supported[%#x]\n", access_flags, supp_access_flags); - err = -EOPNOTSUPP; - goto err_out; + return ERR_PTR(-EOPNOTSUPP); } mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) { - err = -ENOMEM; - goto err_out; - } + if (!mr) + return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); - if (IS_ERR(mr->umem)) { - err = PTR_ERR(mr->umem); - ibdev_dbg(&dev->ibdev, - "Failed to pin and map user space memory[%d]\n", err); - goto err_free; - } + return mr; +} + +static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start, + u64 length, u64 virt_addr, int access_flags) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_com_reg_mr_params params = {}; + struct efa_com_reg_mr_result result = {}; + struct pbl_context pbl; + unsigned int pg_sz; + int inline_size; + int err; params.pd = to_epd(ibpd)->pdn; params.iova = virt_addr; @@ -1548,10 +1599,9 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, dev->dev_attr.page_size_cap, virt_addr); if (!pg_sz) { - err = -EOPNOTSUPP; ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n", dev->dev_attr.page_size_cap); - goto err_unmap; + return -EOPNOTSUPP; } params.page_shift = order_base_2(pg_sz); @@ -1565,21 +1615,21 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, if (params.page_num <= inline_size) { err = efa_create_inline_pbl(dev, mr, ¶ms); if (err) - goto err_unmap; + return err; err = efa_com_register_mr(&dev->edev, ¶ms, &result); if (err) - goto err_unmap; + return err; } else { err = efa_create_pbl(dev, &pbl, mr, ¶ms); if (err) - goto err_unmap; + return err; err = efa_com_register_mr(&dev->edev, ¶ms, &result); pbl_destroy(dev, &pbl); if (err) - goto err_unmap; + return err; } mr->ibmr.lkey = result.l_key; @@ -1587,9 +1637,78 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, mr->ibmr.length = length; ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); + return 0; +} + +struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct ib_umem_dmabuf *umem_dmabuf; + struct efa_mr *mr; + int err; + + mr = efa_alloc_mr(ibpd, access_flags, udata); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto err_out; + } + + umem_dmabuf = ib_umem_dmabuf_get_pinned(ibpd->device, start, length, fd, + access_flags); + if (IS_ERR(umem_dmabuf)) { + err = PTR_ERR(umem_dmabuf); + ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err); + goto err_free; + } + + mr->umem = &umem_dmabuf->umem; + err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); + if (err) + goto err_release; + + return &mr->ibmr; + +err_release: + ib_umem_release(mr->umem); +err_free: + kfree(mr); +err_out: + atomic64_inc(&dev->stats.reg_mr_err); + return ERR_PTR(err); +} + +struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_mr *mr; + int err; + + mr = efa_alloc_mr(ibpd, access_flags, udata); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto err_out; + } + + mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + ibdev_dbg(&dev->ibdev, + "Failed to pin and map user space memory[%d]\n", err); + goto err_free; + } + + err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); + if (err) + goto err_release; + return &mr->ibmr; -err_unmap: +err_release: ib_umem_release(mr->umem); err_free: kfree(mr); @@ -1906,15 +2025,15 @@ int efa_destroy_ah(struct ib_ah *ibah, u32 flags) struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) { - return rdma_alloc_hw_stats_struct(efa_port_stats_names, - ARRAY_SIZE(efa_port_stats_names), + return rdma_alloc_hw_stats_struct(efa_port_stats_descs, + ARRAY_SIZE(efa_port_stats_descs), RDMA_HW_STATS_DEFAULT_LIFESPAN); } struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev) { - return rdma_alloc_hw_stats_struct(efa_device_stats_names, - ARRAY_SIZE(efa_device_stats_names), + return rdma_alloc_hw_stats_struct(efa_device_stats_descs, + ARRAY_SIZE(efa_device_stats_descs), RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -1939,7 +2058,7 @@ static int efa_fill_device_stats(struct efa_dev *dev, stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err); stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err); - return ARRAY_SIZE(efa_device_stats_names); + return ARRAY_SIZE(efa_device_stats_descs); } static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats, @@ -1988,7 +2107,7 @@ static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats, stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err; stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes; - return ARRAY_SIZE(efa_port_stats_names); + return ARRAY_SIZE(efa_port_stats_descs); } int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig index 519866b30a13..6eb739052121 100644 --- a/drivers/infiniband/hw/hfi1/Kconfig +++ b/drivers/infiniband/hw/hfi1/Kconfig @@ -1,12 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_HFI1 - tristate "Intel OPA Gen1 support" + tristate "Cornelis OPX Gen1 support" depends on X86_64 && INFINIBAND_RDMAVT && I2C select MMU_NOTIFIER select CRC32 select I2C_ALGOBIT help - This is a low-level driver for Intel OPA Gen1 adapter. + This is a low-level driver for Cornelis OPX Gen1 adapter. config HFI1_DEBUG_SDMA_ORDER bool "HFI1 SDMA Order debug" depends on INFINIBAND_HFI1 diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 37273dc0c03c..ec37f4fd8e96 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2015 - 2020 Intel Corporation. + * Copyright(c) 2021 Cornelis Networks. */ /* @@ -14918,7 +14919,7 @@ static int obtain_boardname(struct hfi1_devdata *dd) { /* generic board description */ const char generic[] = - "Intel Omni-Path Host Fabric Interface Adapter 100 Series"; + "Cornelis Omni-Path Host Fabric Interface Adapter 100 Series"; unsigned long size; int ret; diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index de411884386b..61f341c3005c 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2015-2020 Intel Corporation. + * Copyright(c) 2021 Cornelis Networks. */ #include <linux/spinlock.h> @@ -56,7 +57,7 @@ module_param_cb(cap_mask, &cap_ops, &hfi1_cap_mask, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("Intel Omni-Path Architecture driver"); +MODULE_DESCRIPTION("Cornelis Omni-Path Express driver"); /* * MAX_PKT_RCV is the max # if packets processed per receive interrupt. diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c index f275dd1abed8..e8ed05516bf2 100644 --- a/drivers/infiniband/hw/hfi1/efivar.c +++ b/drivers/infiniband/hw/hfi1/efivar.c @@ -3,7 +3,9 @@ * Copyright(c) 2015, 2016 Intel Corporation. */ -#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/string_helpers.h> + #include "efivar.h" /* GUID for HFI1 variables in EFI */ @@ -112,7 +114,6 @@ int read_hfi1_efi_var(struct hfi1_devdata *dd, const char *kind, char prefix_name[64]; char name[64]; int result; - int i; /* create a common prefix */ snprintf(prefix_name, sizeof(prefix_name), "%04x:%02x:%02x.%x", @@ -128,10 +129,7 @@ int read_hfi1_efi_var(struct hfi1_devdata *dd, const char *kind, * variable. */ if (result) { - /* Converting to uppercase */ - for (i = 0; prefix_name[i]; i++) - if (isalpha(prefix_name[i])) - prefix_name[i] = toupper(prefix_name[i]); + string_upper(prefix_name, prefix_name); snprintf(name, sizeof(name), "%s-%s", prefix_name, kind); result = read_efi_var(name, size, return_data); } diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e3679d076eaa..dbd1c31830b9 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* * Copyright(c) 2015 - 2020 Intel Corporation. + * Copyright(c) 2021 Cornelis Networks. */ #include <linux/pci.h> @@ -1342,7 +1343,7 @@ static void remove_one(struct pci_dev *); static int init_one(struct pci_dev *, const struct pci_device_id *); static void shutdown_one(struct pci_dev *); -#define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: " +#define DRIVER_LOAD_MSG "Cornelis " DRIVER_NAME " loaded: " #define PFX DRIVER_NAME ": " const struct pci_device_id hfi1_pci_tbl[] = { diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index 2cff38b105ac..909122934246 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -44,22 +44,52 @@ union hfi1_ipoib_flow { }; /** + * struct ipoib_txreq - IPOIB transmit descriptor + * @txreq: sdma transmit request + * @sdma_hdr: 9b ib headers + * @sdma_status: status returned by sdma engine + * @complete: non-zero implies complete + * @priv: ipoib netdev private data + * @txq: txq on which skb was output + * @skb: skb to send + */ +struct ipoib_txreq { + struct sdma_txreq txreq; + struct hfi1_sdma_header sdma_hdr; + int sdma_status; + int complete; + struct hfi1_ipoib_dev_priv *priv; + struct hfi1_ipoib_txq *txq; + struct sk_buff *skb; +}; + +/** * struct hfi1_ipoib_circ_buf - List of items to be processed - * @items: ring of items - * @head: ring head - * @tail: ring tail + * @items: ring of items each a power of two size * @max_items: max items + 1 that the ring can contain - * @producer_lock: producer sync lock - * @consumer_lock: consumer sync lock + * @shift: log2 of size for getting txreq + * @sent_txreqs: count of txreqs posted to sdma + * @tail: ring tail + * @stops: count of stops of queue + * @ring_full: ring has been filled + * @no_desc: descriptor shortage seen + * @complete_txreqs: count of txreqs completed by sdma + * @head: ring head */ -struct ipoib_txreq; struct hfi1_ipoib_circ_buf { - struct ipoib_txreq **items; - unsigned long head; - unsigned long tail; - unsigned long max_items; - spinlock_t producer_lock; /* head sync lock */ - spinlock_t consumer_lock; /* tail sync lock */ + void *items; + u32 max_items; + u32 shift; + /* consumer cache line */ + u64 ____cacheline_aligned_in_smp sent_txreqs; + u32 avail; + u32 tail; + atomic_t stops; + atomic_t ring_full; + atomic_t no_desc; + /* producer cache line */ + u64 ____cacheline_aligned_in_smp complete_txreqs; + u32 head; }; /** @@ -68,33 +98,24 @@ struct hfi1_ipoib_circ_buf { * @sde: sdma engine * @tx_list: tx request list * @sent_txreqs: count of txreqs posted to sdma - * @stops: count of stops of queue - * @ring_full: ring has been filled - * @no_desc: descriptor shortage seen * @flow: tracks when list needs to be flushed for a flow change * @q_idx: ipoib Tx queue index * @pkts_sent: indicator packets have been sent from this queue * @wait: iowait structure - * @complete_txreqs: count of txreqs completed by sdma * @napi: pointer to tx napi interface * @tx_ring: ring of ipoib txreqs to be reaped by napi callback */ struct hfi1_ipoib_txq { + struct napi_struct napi; struct hfi1_ipoib_dev_priv *priv; struct sdma_engine *sde; struct list_head tx_list; - u64 sent_txreqs; - atomic_t stops; - atomic_t ring_full; - atomic_t no_desc; union hfi1_ipoib_flow flow; u8 q_idx; bool pkts_sent; struct iowait wait; - atomic64_t ____cacheline_aligned_in_smp complete_txreqs; - struct napi_struct *napi; - struct hfi1_ipoib_circ_buf tx_ring; + struct hfi1_ipoib_circ_buf ____cacheline_aligned_in_smp tx_ring; }; struct hfi1_ipoib_dev_priv { @@ -102,15 +123,12 @@ struct hfi1_ipoib_dev_priv { struct net_device *netdev; struct ib_device *device; struct hfi1_ipoib_txq *txqs; - struct kmem_cache *txreq_cache; - struct napi_struct *tx_napis; + const struct net_device_ops *netdev_ops; + struct rvt_qp *qp; + u32 qkey; u16 pkey; u16 pkey_index; - u32 qkey; u8 port_num; - - const struct net_device_ops *netdev_ops; - struct rvt_qp *qp; }; /* hfi1 ipoib rdma netdev's private data structure */ diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c index e594a961f513..e1a2b02bbd91 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_main.c +++ b/drivers/infiniband/hw/hfi1/ipoib_main.c @@ -11,7 +11,7 @@ #include "ipoib.h" #include "hfi.h" -static u32 qpn_from_mac(u8 *mac_arr) +static u32 qpn_from_mac(const u8 *mac_arr) { return (u32)mac_arr[1] << 16 | mac_arr[2] << 8 | mac_arr[3]; } diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index 15b0cb0f363f..f4010890309f 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -22,24 +22,6 @@ #define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size) #define CIRC_PREV(val, size) CIRC_ADD(val, -1, size) -/** - * struct ipoib_txreq - IPOIB transmit descriptor - * @txreq: sdma transmit request - * @sdma_hdr: 9b ib headers - * @sdma_status: status returned by sdma engine - * @priv: ipoib netdev private data - * @txq: txq on which skb was output - * @skb: skb to send - */ -struct ipoib_txreq { - struct sdma_txreq txreq; - struct hfi1_sdma_header sdma_hdr; - int sdma_status; - struct hfi1_ipoib_dev_priv *priv; - struct hfi1_ipoib_txq *txq; - struct sk_buff *skb; -}; - struct ipoib_txparms { struct hfi1_devdata *dd; struct rdma_ah_attr *ah_attr; @@ -51,28 +33,34 @@ struct ipoib_txparms { u8 entropy; }; -static u64 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) +static struct ipoib_txreq * +hfi1_txreq_from_idx(struct hfi1_ipoib_circ_buf *r, u32 idx) +{ + return (struct ipoib_txreq *)(r->items + (idx << r->shift)); +} + +static u32 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) { return sent - completed; } static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq) { - return hfi1_ipoib_txreqs(txq->sent_txreqs, - atomic64_read(&txq->complete_txreqs)); + return hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs, + txq->tx_ring.complete_txreqs); } static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq) { trace_hfi1_txq_stop(txq); - if (atomic_inc_return(&txq->stops) == 1) + if (atomic_inc_return(&txq->tx_ring.stops) == 1) netif_stop_subqueue(txq->priv->netdev, txq->q_idx); } static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq) { trace_hfi1_txq_wake(txq); - if (atomic_dec_and_test(&txq->stops)) + if (atomic_dec_and_test(&txq->tx_ring.stops)) netif_wake_subqueue(txq->priv->netdev, txq->q_idx); } @@ -90,9 +78,9 @@ static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq) static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq) { - ++txq->sent_txreqs; + ++txq->tx_ring.sent_txreqs; if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) && - !atomic_xchg(&txq->ring_full, 1)) { + !atomic_xchg(&txq->tx_ring.ring_full, 1)) { trace_hfi1_txq_full(txq); hfi1_ipoib_stop_txq(txq); } @@ -117,7 +105,7 @@ static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq) * to protect against ring overflow. */ if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) && - atomic_xchg(&txq->ring_full, 0)) { + atomic_xchg(&txq->tx_ring.ring_full, 0)) { trace_hfi1_txq_xmit_unstopped(txq); hfi1_ipoib_wake_txq(txq); } @@ -125,7 +113,7 @@ static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq) static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) { - struct hfi1_ipoib_dev_priv *priv = tx->priv; + struct hfi1_ipoib_dev_priv *priv = tx->txq->priv; if (likely(!tx->sdma_status)) { dev_sw_netstats_tx_add(priv->netdev, 1, tx->skb->len); @@ -139,97 +127,73 @@ static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) } napi_consume_skb(tx->skb, budget); + tx->skb = NULL; sdma_txclean(priv->dd, &tx->txreq); - kmem_cache_free(priv->txreq_cache, tx); } -static int hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq, int budget) +static void hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq) { struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; - unsigned long head; - unsigned long tail; - unsigned int max_tx; - int work_done; - int tx_count; - - spin_lock_bh(&tx_ring->consumer_lock); - - /* Read index before reading contents at that index. */ - head = smp_load_acquire(&tx_ring->head); - tail = tx_ring->tail; - max_tx = tx_ring->max_items; - - work_done = min_t(int, CIRC_CNT(head, tail, max_tx), budget); + int i; + struct ipoib_txreq *tx; - for (tx_count = work_done; tx_count; tx_count--) { - hfi1_ipoib_free_tx(tx_ring->items[tail], budget); - tail = CIRC_NEXT(tail, max_tx); + for (i = 0; i < tx_ring->max_items; i++) { + tx = hfi1_txreq_from_idx(tx_ring, i); + tx->complete = 0; + dev_kfree_skb_any(tx->skb); + tx->skb = NULL; + sdma_txclean(txq->priv->dd, &tx->txreq); } + tx_ring->head = 0; + tx_ring->tail = 0; + tx_ring->complete_txreqs = 0; + tx_ring->sent_txreqs = 0; + tx_ring->avail = hfi1_ipoib_ring_hwat(txq); +} - atomic64_add(work_done, &txq->complete_txreqs); +static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget) +{ + struct hfi1_ipoib_txq *txq = + container_of(napi, struct hfi1_ipoib_txq, napi); + struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; + u32 head = tx_ring->head; + u32 max_tx = tx_ring->max_items; + int work_done; + struct ipoib_txreq *tx = hfi1_txreq_from_idx(tx_ring, head); - /* Finished freeing tx items so store the tail value. */ - smp_store_release(&tx_ring->tail, tail); + trace_hfi1_txq_poll(txq); + for (work_done = 0; work_done < budget; work_done++) { + /* See hfi1_ipoib_sdma_complete() */ + if (!smp_load_acquire(&tx->complete)) + break; + tx->complete = 0; + trace_hfi1_tx_produce(tx, head); + hfi1_ipoib_free_tx(tx, budget); + head = CIRC_NEXT(head, max_tx); + tx = hfi1_txreq_from_idx(tx_ring, head); + } + tx_ring->complete_txreqs += work_done; - spin_unlock_bh(&tx_ring->consumer_lock); + /* Finished freeing tx items so store the head value. */ + smp_store_release(&tx_ring->head, head); hfi1_ipoib_check_queue_stopped(txq); - return work_done; -} - -static int hfi1_ipoib_process_tx_ring(struct napi_struct *napi, int budget) -{ - struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(napi->dev); - struct hfi1_ipoib_txq *txq = &priv->txqs[napi - priv->tx_napis]; - - int work_done = hfi1_ipoib_drain_tx_ring(txq, budget); - if (work_done < budget) napi_complete_done(napi, work_done); return work_done; } -static void hfi1_ipoib_add_tx(struct ipoib_txreq *tx) -{ - struct hfi1_ipoib_circ_buf *tx_ring = &tx->txq->tx_ring; - unsigned long head; - unsigned long tail; - size_t max_tx; - - spin_lock(&tx_ring->producer_lock); - - head = tx_ring->head; - tail = READ_ONCE(tx_ring->tail); - max_tx = tx_ring->max_items; - - if (likely(CIRC_SPACE(head, tail, max_tx))) { - tx_ring->items[head] = tx; - - /* Finish storing txreq before incrementing head. */ - smp_store_release(&tx_ring->head, CIRC_ADD(head, 1, max_tx)); - napi_schedule_irqoff(tx->txq->napi); - } else { - struct hfi1_ipoib_txq *txq = tx->txq; - struct hfi1_ipoib_dev_priv *priv = tx->priv; - - /* Ring was full */ - hfi1_ipoib_free_tx(tx, 0); - atomic64_inc(&txq->complete_txreqs); - dd_dev_dbg(priv->dd, "txq %d full.\n", txq->q_idx); - } - - spin_unlock(&tx_ring->producer_lock); -} - static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status) { struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq); + trace_hfi1_txq_complete(tx->txq); tx->sdma_status = status; - - hfi1_ipoib_add_tx(tx); + /* see hfi1_ipoib_poll_tx_ring */ + smp_store_release(&tx->complete, 1); + napi_schedule_irqoff(&tx->txq->napi); } static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx, @@ -291,7 +255,7 @@ static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx, static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx, struct ipoib_txparms *txp) { - struct hfi1_ipoib_dev_priv *priv = tx->priv; + struct hfi1_ipoib_dev_priv *priv = tx->txq->priv; struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; struct sk_buff *skb = tx->skb; struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp); @@ -362,7 +326,7 @@ static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx, ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(txp->dqpn); - ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->sent_txreqs)); + ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->tx_ring.sent_txreqs)); /* Build the deth */ ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey); @@ -385,19 +349,32 @@ static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, struct ipoib_txparms *txp) { struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + struct hfi1_ipoib_txq *txq = txp->txq; struct ipoib_txreq *tx; + struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; + u32 tail = tx_ring->tail; int ret; - tx = kmem_cache_alloc_node(priv->txreq_cache, - GFP_ATOMIC, - priv->dd->node); - if (unlikely(!tx)) - return ERR_PTR(-ENOMEM); + if (unlikely(!tx_ring->avail)) { + u32 head; + + if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq)) + /* This shouldn't happen with a stopped queue */ + return ERR_PTR(-ENOMEM); + /* See hfi1_ipoib_poll_tx_ring() */ + head = smp_load_acquire(&tx_ring->head); + tx_ring->avail = + min_t(u32, hfi1_ipoib_ring_hwat(txq), + CIRC_CNT(head, tail, tx_ring->max_items)); + } else { + tx_ring->avail--; + } + tx = hfi1_txreq_from_idx(tx_ring, tail); + trace_hfi1_txq_alloc_tx(txq); /* so that we can test if the sdma descriptors are there */ tx->txreq.num_desc = 0; - tx->priv = priv; - tx->txq = txp->txq; + tx->txq = txq; tx->skb = skb; INIT_LIST_HEAD(&tx->txreq.list); @@ -405,21 +382,20 @@ static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, ret = hfi1_ipoib_build_tx_desc(tx, txp); if (likely(!ret)) { - if (txp->txq->flow.as_int != txp->flow.as_int) { - txp->txq->flow.tx_queue = txp->flow.tx_queue; - txp->txq->flow.sc5 = txp->flow.sc5; - txp->txq->sde = + if (txq->flow.as_int != txp->flow.as_int) { + txq->flow.tx_queue = txp->flow.tx_queue; + txq->flow.sc5 = txp->flow.sc5; + txq->sde = sdma_select_engine_sc(priv->dd, txp->flow.tx_queue, txp->flow.sc5); - trace_hfi1_flow_switch(txp->txq); + trace_hfi1_flow_switch(txq); } return tx; } sdma_txclean(priv->dd, &tx->txreq); - kmem_cache_free(priv->txreq_cache, tx); return ERR_PTR(ret); } @@ -480,8 +456,8 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev, struct sk_buff *skb, struct ipoib_txparms *txp) { - struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); struct hfi1_ipoib_txq *txq = txp->txq; + struct hfi1_ipoib_circ_buf *tx_ring; struct ipoib_txreq *tx; int ret; @@ -499,10 +475,14 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev, return NETDEV_TX_OK; } + tx_ring = &txq->tx_ring; + trace_hfi1_tx_consume(tx, tx_ring->tail); + /* consume tx */ + smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items)); ret = hfi1_ipoib_submit_tx(txq, tx); if (likely(!ret)) { tx_ok: - trace_sdma_output_ibhdr(tx->priv->dd, + trace_sdma_output_ibhdr(txq->priv->dd, &tx->sdma_hdr.hdr, ib_is_sc5(txp->flow.sc5)); hfi1_ipoib_check_queue_depth(txq); @@ -514,9 +494,10 @@ tx_ok: if (ret == -EBUSY || ret == -ECOMM) goto tx_ok; - sdma_txclean(priv->dd, &tx->txreq); - dev_kfree_skb_any(skb); - kmem_cache_free(priv->txreq_cache, tx); + /* mark complete and kick napi tx */ + smp_store_release(&tx->complete, 1); + napi_schedule(&tx->txq->napi); + ++dev->stats.tx_carrier_errors; return NETDEV_TX_OK; @@ -527,6 +508,7 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev, struct ipoib_txparms *txp) { struct hfi1_ipoib_txq *txq = txp->txq; + struct hfi1_ipoib_circ_buf *tx_ring; struct ipoib_txreq *tx; /* Has the flow change ? */ @@ -556,11 +538,15 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev, return NETDEV_TX_OK; } + tx_ring = &txq->tx_ring; + trace_hfi1_tx_consume(tx, tx_ring->tail); + /* consume tx */ + smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items)); list_add_tail(&tx->txreq.list, &txq->tx_list); hfi1_ipoib_check_queue_depth(txq); - trace_sdma_output_ibhdr(tx->priv->dd, + trace_sdma_output_ibhdr(txq->priv->dd, &tx->sdma_hdr.hdr, ib_is_sc5(txp->flow.sc5)); @@ -646,7 +632,7 @@ static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde, if (list_empty(&txq->wait.list)) { struct hfi1_ibport *ibp = &sde->ppd->ibport_data; - if (!atomic_xchg(&txq->no_desc, 1)) { + if (!atomic_xchg(&txq->tx_ring.no_desc, 1)) { trace_hfi1_txq_queued(txq); hfi1_ipoib_stop_txq(txq); } @@ -689,45 +675,29 @@ static void hfi1_ipoib_flush_txq(struct work_struct *work) if (likely(dev->reg_state == NETREG_REGISTERED) && likely(!hfi1_ipoib_flush_tx_list(dev, txq))) - if (atomic_xchg(&txq->no_desc, 0)) + if (atomic_xchg(&txq->tx_ring.no_desc, 0)) hfi1_ipoib_wake_txq(txq); } int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) { struct net_device *dev = priv->netdev; - char buf[HFI1_IPOIB_TXREQ_NAME_LEN]; - unsigned long tx_ring_size; + u32 tx_ring_size, tx_item_size; int i; /* * Ring holds 1 less than tx_ring_size * Round up to next power of 2 in order to hold at least tx_queue_len */ - tx_ring_size = roundup_pow_of_two((unsigned long)dev->tx_queue_len + 1); - - snprintf(buf, sizeof(buf), "hfi1_%u_ipoib_txreq_cache", priv->dd->unit); - priv->txreq_cache = kmem_cache_create(buf, - sizeof(struct ipoib_txreq), - 0, - 0, - NULL); - if (!priv->txreq_cache) - return -ENOMEM; - - priv->tx_napis = kcalloc_node(dev->num_tx_queues, - sizeof(struct napi_struct), - GFP_KERNEL, - priv->dd->node); - if (!priv->tx_napis) - goto free_txreq_cache; + tx_ring_size = roundup_pow_of_two(dev->tx_queue_len + 1); + tx_item_size = roundup_pow_of_two(sizeof(struct ipoib_txreq)); priv->txqs = kcalloc_node(dev->num_tx_queues, sizeof(struct hfi1_ipoib_txq), GFP_KERNEL, priv->dd->node); if (!priv->txqs) - goto free_tx_napis; + return -ENOMEM; for (i = 0; i < dev->num_tx_queues; i++) { struct hfi1_ipoib_txq *txq = &priv->txqs[i]; @@ -743,10 +713,9 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) txq->priv = priv; txq->sde = NULL; INIT_LIST_HEAD(&txq->tx_list); - atomic64_set(&txq->complete_txreqs, 0); - atomic_set(&txq->stops, 0); - atomic_set(&txq->ring_full, 0); - atomic_set(&txq->no_desc, 0); + atomic_set(&txq->tx_ring.stops, 0); + atomic_set(&txq->tx_ring.ring_full, 0); + atomic_set(&txq->tx_ring.no_desc, 0); txq->q_idx = i; txq->flow.tx_queue = 0xff; txq->flow.sc5 = 0xff; @@ -756,19 +725,17 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) priv->dd->node); txq->tx_ring.items = - kcalloc_node(tx_ring_size, - sizeof(struct ipoib_txreq *), + kcalloc_node(tx_ring_size, tx_item_size, GFP_KERNEL, priv->dd->node); if (!txq->tx_ring.items) goto free_txqs; - spin_lock_init(&txq->tx_ring.producer_lock); - spin_lock_init(&txq->tx_ring.consumer_lock); txq->tx_ring.max_items = tx_ring_size; + txq->tx_ring.shift = ilog2(tx_ring_size); + txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq); - txq->napi = &priv->tx_napis[i]; - netif_tx_napi_add(dev, txq->napi, - hfi1_ipoib_process_tx_ring, + netif_tx_napi_add(dev, &txq->napi, + hfi1_ipoib_poll_tx_ring, NAPI_POLL_WEIGHT); } @@ -778,20 +745,12 @@ free_txqs: for (i--; i >= 0; i--) { struct hfi1_ipoib_txq *txq = &priv->txqs[i]; - netif_napi_del(txq->napi); + netif_napi_del(&txq->napi); kfree(txq->tx_ring.items); } kfree(priv->txqs); priv->txqs = NULL; - -free_tx_napis: - kfree(priv->tx_napis); - priv->tx_napis = NULL; - -free_txreq_cache: - kmem_cache_destroy(priv->txreq_cache); - priv->txreq_cache = NULL; return -ENOMEM; } @@ -799,7 +758,6 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) { struct sdma_txreq *txreq; struct sdma_txreq *txreq_tmp; - atomic64_t *complete_txreqs = &txq->complete_txreqs; list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) { struct ipoib_txreq *tx = @@ -808,16 +766,16 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) list_del(&txreq->list); sdma_txclean(txq->priv->dd, &tx->txreq); dev_kfree_skb_any(tx->skb); - kmem_cache_free(txq->priv->txreq_cache, tx); - atomic64_inc(complete_txreqs); + tx->skb = NULL; + txq->tx_ring.complete_txreqs++; } if (hfi1_ipoib_used(txq)) dd_dev_warn(txq->priv->dd, - "txq %d not empty found %llu requests\n", + "txq %d not empty found %u requests\n", txq->q_idx, - hfi1_ipoib_txreqs(txq->sent_txreqs, - atomic64_read(complete_txreqs))); + hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs, + txq->tx_ring.complete_txreqs)); } void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) @@ -830,19 +788,13 @@ void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) iowait_cancel_work(&txq->wait); iowait_sdma_drain(&txq->wait); hfi1_ipoib_drain_tx_list(txq); - netif_napi_del(txq->napi); - (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); + netif_napi_del(&txq->napi); + hfi1_ipoib_drain_tx_ring(txq); kfree(txq->tx_ring.items); } kfree(priv->txqs); priv->txqs = NULL; - - kfree(priv->tx_napis); - priv->tx_napis = NULL; - - kmem_cache_destroy(priv->txreq_cache); - priv->txreq_cache = NULL; } void hfi1_ipoib_napi_tx_enable(struct net_device *dev) @@ -853,7 +805,7 @@ void hfi1_ipoib_napi_tx_enable(struct net_device *dev) for (i = 0; i < dev->num_tx_queues; i++) { struct hfi1_ipoib_txq *txq = &priv->txqs[i]; - napi_enable(txq->napi); + napi_enable(&txq->napi); } } @@ -865,8 +817,8 @@ void hfi1_ipoib_napi_tx_disable(struct net_device *dev) for (i = 0; i < dev->num_tx_queues; i++) { struct hfi1_ipoib_txq *txq = &priv->txqs[i]; - napi_disable(txq->napi); - (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); + napi_disable(&txq->napi); + hfi1_ipoib_drain_tx_ring(txq); } } @@ -874,23 +826,23 @@ void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q) { struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); struct hfi1_ipoib_txq *txq = &priv->txqs[q]; - u64 completed = atomic64_read(&txq->complete_txreqs); dd_dev_info(priv->dd, "timeout txq %p q %u stopped %u stops %d no_desc %d ring_full %d\n", txq, q, __netif_subqueue_stopped(dev, txq->q_idx), - atomic_read(&txq->stops), - atomic_read(&txq->no_desc), - atomic_read(&txq->ring_full)); + atomic_read(&txq->tx_ring.stops), + atomic_read(&txq->tx_ring.no_desc), + atomic_read(&txq->tx_ring.ring_full)); dd_dev_info(priv->dd, "sde %p engine %u\n", txq->sde, txq->sde ? txq->sde->this_idx : 0); dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int); dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n", - txq->sent_txreqs, completed, hfi1_ipoib_used(txq)); - dd_dev_info(priv->dd, "tx_queue_len %u max_items %lu\n", + txq->tx_ring.sent_txreqs, txq->tx_ring.complete_txreqs, + hfi1_ipoib_used(txq)); + dd_dev_info(priv->dd, "tx_queue_len %u max_items %u\n", dev->tx_queue_len, txq->tx_ring.max_items); - dd_dev_info(priv->dd, "head %lu tail %lu\n", + dd_dev_info(priv->dd, "head %u tail %u\n", txq->tx_ring.head, txq->tx_ring.tail); dd_dev_info(priv->dd, "wait queued %u\n", !list_empty(&txq->wait.list)); diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index 7318aa6150b5..ed1b9e1e4b17 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -917,20 +917,22 @@ DECLARE_EVENT_CLASS(/* AIP */ __entry->tail = txq->tx_ring.tail; __entry->idx = txq->q_idx; __entry->used = - txq->sent_txreqs - - atomic64_read(&txq->complete_txreqs); + txq->tx_ring.sent_txreqs - + txq->tx_ring.complete_txreqs; __entry->flow = txq->flow.as_int; - __entry->stops = atomic_read(&txq->stops); - __entry->no_desc = atomic_read(&txq->no_desc); + __entry->stops = atomic_read(&txq->tx_ring.stops); + __entry->no_desc = atomic_read(&txq->tx_ring.no_desc); __entry->stopped = __netif_subqueue_stopped(txq->priv->netdev, txq->q_idx); ), TP_printk(/* print */ - "[%s] txq %llx idx %u sde %llx head %lx tail %lx flow %x used %u stops %d no_desc %d stopped %u", + "[%s] txq %llx idx %u sde %llx:%u cpu %d head %lx tail %lx flow %x used %u stops %d no_desc %d stopped %u", __get_str(dev), (unsigned long long)__entry->txq, __entry->idx, (unsigned long long)__entry->sde, + __entry->sde ? __entry->sde->this_idx : 0, + __entry->sde ? __entry->sde->cpu : 0, __entry->head, __entry->tail, __entry->flow, @@ -995,6 +997,65 @@ DEFINE_EVENT(/* xmit_unstopped */ TP_ARGS(txq) ); +DECLARE_EVENT_CLASS(/* AIP */ + hfi1_ipoib_tx_template, + TP_PROTO(struct ipoib_txreq *tx, u32 idx), + TP_ARGS(tx, idx), + TP_STRUCT__entry(/* entry */ + DD_DEV_ENTRY(tx->txq->priv->dd) + __field(struct ipoib_txreq *, tx) + __field(struct hfi1_ipoib_txq *, txq) + __field(struct sk_buff *, skb) + __field(ulong, idx) + ), + TP_fast_assign(/* assign */ + DD_DEV_ASSIGN(tx->txq->priv->dd); + __entry->tx = tx; + __entry->skb = tx->skb; + __entry->txq = tx->txq; + __entry->idx = idx; + ), + TP_printk(/* print */ + "[%s] tx %llx txq %llx,%u skb %llx idx %lu", + __get_str(dev), + (unsigned long long)__entry->tx, + (unsigned long long)__entry->txq, + __entry->txq ? __entry->txq->q_idx : 0, + (unsigned long long)__entry->skb, + __entry->idx + ) +); + +DEFINE_EVENT(/* produce */ + hfi1_ipoib_tx_template, hfi1_tx_produce, + TP_PROTO(struct ipoib_txreq *tx, u32 idx), + TP_ARGS(tx, idx) +); + +DEFINE_EVENT(/* consume */ + hfi1_ipoib_tx_template, hfi1_tx_consume, + TP_PROTO(struct ipoib_txreq *tx, u32 idx), + TP_ARGS(tx, idx) +); + +DEFINE_EVENT(/* alloc_tx */ + hfi1_ipoib_txq_template, hfi1_txq_alloc_tx, + TP_PROTO(struct hfi1_ipoib_txq *txq), + TP_ARGS(txq) +); + +DEFINE_EVENT(/* poll */ + hfi1_ipoib_txq_template, hfi1_txq_poll, + TP_PROTO(struct hfi1_ipoib_txq *txq), + TP_ARGS(txq) +); + +DEFINE_EVENT(/* complete */ + hfi1_ipoib_txq_template, hfi1_txq_complete, + TP_PROTO(struct hfi1_ipoib_txq *txq), + TP_ARGS(txq) +); + #endif /* __HFI1_TRACE_TX_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 0c86e9d354f8..186d30291260 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -692,8 +692,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, * Allocate the node first so we can handle a potential * failure before we've programmed anything. */ - node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages), - GFP_KERNEL); + node = kzalloc(struct_size(node, pages, npages), GFP_KERNEL); if (!node) return -ENOMEM; @@ -713,7 +712,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, node->dma_addr = phys; node->grp = grp; node->freed = false; - memcpy(node->pages, pages, sizeof(struct page *) * npages); + memcpy(node->pages, pages, flex_array_size(node, pages, npages)); if (fd->use_mn) { ret = mmu_interval_notifier_insert( diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 26bea51869bf..ed9fa0d84e9e 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1602,8 +1602,8 @@ static const char * const driver_cntr_names[] = { }; static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */ -static const char **dev_cntr_names; -static const char **port_cntr_names; +static struct rdma_stat_desc *dev_cntr_descs; +static struct rdma_stat_desc *port_cntr_descs; int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); static int num_dev_cntrs; static int num_port_cntrs; @@ -1614,13 +1614,12 @@ static int cntr_names_initialized; * strings. Optionally some entries can be reserved in the array to hold extra * external strings. */ -static int init_cntr_names(const char *names_in, - const size_t names_len, - int num_extra_names, - int *num_cntrs, - const char ***cntr_names) +static int init_cntr_names(const char *names_in, const size_t names_len, + int num_extra_names, int *num_cntrs, + struct rdma_stat_desc **cntr_descs) { - char *names_out, *p, **q; + struct rdma_stat_desc *q; + char *names_out, *p; int i, n; n = 0; @@ -1628,26 +1627,28 @@ static int init_cntr_names(const char *names_in, if (names_in[i] == '\n') n++; - names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len, - GFP_KERNEL); + names_out = + kmalloc((n + num_extra_names) * sizeof(struct rdma_stat_desc) + + names_len, + GFP_KERNEL); if (!names_out) { *num_cntrs = 0; - *cntr_names = NULL; + *cntr_descs = NULL; return -ENOMEM; } - p = names_out + (n + num_extra_names) * sizeof(char *); + p = names_out + (n + num_extra_names) * sizeof(struct rdma_stat_desc); memcpy(p, names_in, names_len); - q = (char **)names_out; + q = (struct rdma_stat_desc *)names_out; for (i = 0; i < n; i++) { - q[i] = p; + q[i].name = p; p = strchr(p, '\n'); *p++ = '\0'; } *num_cntrs = n; - *cntr_names = (const char **)names_out; + *cntr_descs = (struct rdma_stat_desc *)names_out; return 0; } @@ -1661,18 +1662,18 @@ static int init_counters(struct ib_device *ibdev) goto out_unlock; err = init_cntr_names(dd->cntrnames, dd->cntrnameslen, num_driver_cntrs, - &num_dev_cntrs, &dev_cntr_names); + &num_dev_cntrs, &dev_cntr_descs); if (err) goto out_unlock; for (i = 0; i < num_driver_cntrs; i++) - dev_cntr_names[num_dev_cntrs + i] = driver_cntr_names[i]; + dev_cntr_descs[num_dev_cntrs + i].name = driver_cntr_names[i]; err = init_cntr_names(dd->portcntrnames, dd->portcntrnameslen, 0, - &num_port_cntrs, &port_cntr_names); + &num_port_cntrs, &port_cntr_descs); if (err) { - kfree(dev_cntr_names); - dev_cntr_names = NULL; + kfree(dev_cntr_descs); + dev_cntr_descs = NULL; goto out_unlock; } cntr_names_initialized = 1; @@ -1686,7 +1687,7 @@ static struct rdma_hw_stats *hfi1_alloc_hw_device_stats(struct ib_device *ibdev) { if (init_counters(ibdev)) return NULL; - return rdma_alloc_hw_stats_struct(dev_cntr_names, + return rdma_alloc_hw_stats_struct(dev_cntr_descs, num_dev_cntrs + num_driver_cntrs, RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -1696,7 +1697,7 @@ static struct rdma_hw_stats *hfi_alloc_hw_port_stats(struct ib_device *ibdev, { if (init_counters(ibdev)) return NULL; - return rdma_alloc_hw_stats_struct(port_cntr_names, num_port_cntrs, + return rdma_alloc_hw_stats_struct(port_cntr_descs, num_port_cntrs, RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -1921,10 +1922,10 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) verbs_txreq_exit(dev); mutex_lock(&cntr_names_lock); - kfree(dev_cntr_names); - kfree(port_cntr_names); - dev_cntr_names = NULL; - port_cntr_names = NULL; + kfree(dev_cntr_descs); + kfree(port_cntr_descs); + dev_cntr_descs = NULL; + port_cntr_descs = NULL; cntr_names_initialized = 0; mutex_unlock(&cntr_names_lock); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 9467c39e3d28..43e17d61cb63 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -225,11 +225,24 @@ struct hns_roce_uar { unsigned long logic_idx; }; +enum hns_roce_mmap_type { + HNS_ROCE_MMAP_TYPE_DB = 1, + HNS_ROCE_MMAP_TYPE_TPTR, +}; + +struct hns_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + enum hns_roce_mmap_type mmap_type; + u64 address; +}; + struct hns_roce_ucontext { struct ib_ucontext ibucontext; struct hns_roce_uar uar; struct list_head page_list; struct mutex page_mutex; + struct hns_user_mmap_entry *db_mmap_entry; + struct hns_user_mmap_entry *tptr_mmap_entry; }; struct hns_roce_pd { @@ -898,7 +911,8 @@ struct hns_roce_hw { bool (*chk_mbox_avail)(struct hns_roce_dev *hr_dev, bool *is_busy); int (*set_gid)(struct hns_roce_dev *hr_dev, u32 port, int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr); - int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); + int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, + const u8 *addr); void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, enum ib_mtu mtu); int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, @@ -1049,6 +1063,12 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct hns_roce_srq, ibsrq); } +static inline struct hns_user_mmap_entry * +to_hns_mmap(struct rdma_user_mmap_entry *rdma_entry) +{ + return container_of(rdma_entry, struct hns_user_mmap_entry, rdma_entry); +} + static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { writeq(*(u64 *)val, dest); @@ -1259,4 +1279,8 @@ int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); +struct hns_user_mmap_entry * +hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, + size_t length, + enum hns_roce_mmap_type mmap_type); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index e0f59b8d7d5d..f4af3992ba95 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -90,11 +90,11 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, unsigned long flags = 0; void *wqe = NULL; __le32 doorbell[2]; + const u8 *smac; int ret = 0; int loopback; u32 wqe_idx; int nreq; - u8 *smac; if (unlikely(ibqp->qp_type != IB_QPT_GSI && ibqp->qp_type != IB_QPT_RC)) { @@ -154,7 +154,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, UD_SEND_WQE_U32_8_DMAC_5_S, ah->av.mac[5]); - smac = (u8 *)hr_dev->dev_addr[qp->port]; + smac = (const u8 *)hr_dev->dev_addr[qp->port]; loopback = ether_addr_equal_unaligned(ah->av.mac, smac) ? 1 : 0; roce_set_bit(ud_sq_wqe->u32_8, @@ -1782,7 +1782,7 @@ static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u32 port, } static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, - u8 *addr) + const u8 *addr) { u32 reg_smac_l; u16 reg_smac_h; @@ -2743,12 +2743,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, __le32 doorbell[2] = {0}; u64 *mtts_2 = NULL; int ret = -EINVAL; + const u8 *smac; u64 sq_ba = 0; u64 rq_ba = 0; u32 port; u32 port_num; u8 *dmac; - u8 *smac; if (!check_qp_state(cur_state, new_state)) { ibdev_err(ibqp->device, @@ -2947,7 +2947,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; - smac = (u8 *)hr_dev->dev_addr[port]; + smac = (const u8 *)hr_dev->dev_addr[port]; /* when dmac equals smac or loop_idc is 1, it should loopback */ if (ether_addr_equal_unaligned(dmac, smac) || hr_dev->loop_idc == 0x1) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d5f3faa1627a..9bfbaddd1763 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1165,32 +1165,22 @@ static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev, { int size = ring->desc_num * sizeof(struct hns_roce_cmq_desc); - ring->desc = kzalloc(size, GFP_KERNEL); + ring->desc = dma_alloc_coherent(hr_dev->dev, size, + &ring->desc_dma_addr, GFP_KERNEL); if (!ring->desc) return -ENOMEM; - ring->desc_dma_addr = dma_map_single(hr_dev->dev, ring->desc, size, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(hr_dev->dev, ring->desc_dma_addr)) { - ring->desc_dma_addr = 0; - kfree(ring->desc); - ring->desc = NULL; - - return -ENOMEM; - } - return 0; } static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev, struct hns_roce_v2_cmq_ring *ring) { - dma_unmap_single(hr_dev->dev, ring->desc_dma_addr, - ring->desc_num * sizeof(struct hns_roce_cmq_desc), - DMA_BIDIRECTIONAL); + dma_free_coherent(hr_dev->dev, + ring->desc_num * sizeof(struct hns_roce_cmq_desc), + ring->desc, ring->desc_dma_addr); ring->desc_dma_addr = 0; - kfree(ring->desc); } static int init_csq(struct hns_roce_dev *hr_dev, @@ -2992,7 +2982,7 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u32 port, } static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, - u8 *addr) + const u8 *addr) { struct hns_roce_cmq_desc desc; struct hns_roce_cfg_smac_tb *smac_tb = @@ -3328,7 +3318,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, memset(cq_context, 0, sizeof(*cq_context)); hr_reg_write(cq_context, CQC_CQ_ST, V2_CQ_STATE_VALID); - hr_reg_write(cq_context, CQC_ARM_ST, REG_NXT_CEQE); + hr_reg_write(cq_context, CQC_ARM_ST, NO_ARMED); hr_reg_write(cq_context, CQC_SHIFT, ilog2(hr_cq->cq_depth)); hr_reg_write(cq_context, CQC_CEQN, hr_cq->vector); hr_reg_write(cq_context, CQC_CQN, hr_cq->cqn); @@ -4318,10 +4308,10 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t trrl_ba; dma_addr_t irrl_ba; enum ib_mtu ib_mtu; + const u8 *smac; u8 lp_pktn_ini; u64 *mtts; u8 *dmac; - u8 *smac; u32 port; int mtu; int ret; @@ -4374,7 +4364,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; - smac = (u8 *)hr_dev->dev_addr[port]; + smac = (const u8 *)hr_dev->dev_addr[port]; dmac = (u8 *)attr->ah_attr.roce.dmac; /* when dmac equals smac or loop_idc is 1, it should loopback */ if (ether_addr_equal_unaligned(dmac, smac) || @@ -4399,8 +4389,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, mtu = ib_mtu_enum_to_int(ib_mtu); if (WARN_ON(mtu <= 0)) return -EINVAL; -#define MAX_LP_MSG_LEN 65536 - /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */ +#define MAX_LP_MSG_LEN 16384 + /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 16KB */ lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu); if (WARN_ON(lp_pktn_ini >= 0xF)) return -EINVAL; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 5d39bd08582a..4194b626f3c6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -42,7 +42,8 @@ #include "hns_roce_device.h" #include "hns_roce_hem.h" -static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port, u8 *addr) +static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port, + const u8 *addr) { u8 phy_port; u32 i; @@ -291,6 +292,79 @@ static int hns_roce_modify_device(struct ib_device *ib_dev, int mask, return 0; } +struct hns_user_mmap_entry * +hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, + size_t length, + enum hns_roce_mmap_type mmap_type) +{ + struct hns_user_mmap_entry *entry; + int ret; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + + entry->address = address; + entry->mmap_type = mmap_type; + + ret = rdma_user_mmap_entry_insert_exact( + ucontext, &entry->rdma_entry, length, + mmap_type == HNS_ROCE_MMAP_TYPE_DB ? 0 : 1); + if (ret) { + kfree(entry); + return NULL; + } + + return entry; +} + +static void hns_roce_dealloc_uar_entry(struct hns_roce_ucontext *context) +{ + if (context->db_mmap_entry) + rdma_user_mmap_entry_remove( + &context->db_mmap_entry->rdma_entry); + + if (context->tptr_mmap_entry) + rdma_user_mmap_entry_remove( + &context->tptr_mmap_entry->rdma_entry); +} + +static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx) +{ + struct hns_roce_ucontext *context = to_hr_ucontext(uctx); + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); + u64 address; + int ret; + + address = context->uar.pfn << PAGE_SHIFT; + context->db_mmap_entry = hns_roce_user_mmap_entry_insert( + uctx, address, PAGE_SIZE, HNS_ROCE_MMAP_TYPE_DB); + if (!context->db_mmap_entry) + return -ENOMEM; + + if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size) + return 0; + + /* + * FIXME: using io_remap_pfn_range on the dma address returned + * by dma_alloc_coherent is totally wrong. + */ + context->tptr_mmap_entry = + hns_roce_user_mmap_entry_insert(uctx, hr_dev->tptr_dma_addr, + hr_dev->tptr_size, + HNS_ROCE_MMAP_TYPE_TPTR); + if (!context->tptr_mmap_entry) { + ret = -ENOMEM; + goto err; + } + + return 0; + +err: + hns_roce_dealloc_uar_entry(context); + return ret; +} + static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { @@ -309,6 +383,10 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, if (ret) goto error_fail_uar_alloc; + ret = hns_roce_alloc_uar_entry(uctx); + if (ret) + goto error_fail_uar_entry; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { INIT_LIST_HEAD(&context->page_list); @@ -325,6 +403,9 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, return 0; error_fail_copy_to_udata: + hns_roce_dealloc_uar_entry(context); + +error_fail_uar_entry: ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx); error_fail_uar_alloc: @@ -336,39 +417,43 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device); + hns_roce_dealloc_uar_entry(context); + ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx); } -static int hns_roce_mmap(struct ib_ucontext *context, - struct vm_area_struct *vma) +static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) { - struct hns_roce_dev *hr_dev = to_hr_dev(context->device); - - switch (vma->vm_pgoff) { - case 0: - return rdma_user_mmap_io(context, vma, - to_hr_ucontext(context)->uar.pfn, - PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot), - NULL); - - /* vm_pgoff: 1 -- TPTR */ - case 1: - if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size) - return -EINVAL; - /* - * FIXME: using io_remap_pfn_range on the dma address returned - * by dma_alloc_coherent is totally wrong. - */ - return rdma_user_mmap_io(context, vma, - hr_dev->tptr_dma_addr >> PAGE_SHIFT, - hr_dev->tptr_size, - vma->vm_page_prot, - NULL); + struct rdma_user_mmap_entry *rdma_entry; + struct hns_user_mmap_entry *entry; + phys_addr_t pfn; + pgprot_t prot; + int ret; - default: + rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff); + if (!rdma_entry) return -EINVAL; - } + + entry = to_hns_mmap(rdma_entry); + pfn = entry->address >> PAGE_SHIFT; + prot = vma->vm_page_prot; + + if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR) + prot = pgprot_noncached(prot); + + ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE, + prot, rdma_entry); + + rdma_user_mmap_entry_put(rdma_entry); + + return ret; +} + +static void hns_roce_free_mmap(struct rdma_user_mmap_entry *rdma_entry) +{ + struct hns_user_mmap_entry *entry = to_hns_mmap(rdma_entry); + + kfree(entry); } static int hns_roce_port_immutable(struct ib_device *ib_dev, u32 port_num, @@ -444,6 +529,7 @@ static const struct ib_device_ops hns_roce_dev_ops = { .get_link_layer = hns_roce_get_link_layer, .get_port_immutable = hns_roce_port_immutable, .mmap = hns_roce_mmap, + .mmap_free = hns_roce_free_mmap, .modify_device = hns_roce_modify_device, .modify_qp = hns_roce_modify_qp, .query_ah = hns_roce_query_ah, diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h index d03cd29333ea..3bf42728e9b7 100644 --- a/drivers/infiniband/hw/irdma/cm.h +++ b/drivers/infiniband/hw/irdma/cm.h @@ -159,14 +159,6 @@ enum irdma_cm_event_type { IRDMA_CM_EVENT_ABORTED, }; -struct irdma_bth { /* Base Trasnport Header */ - u8 opcode; - u8 flags; - __be16 pkey; - __be32 qpn; - __be32 apsn; -}; - struct ietf_mpa_v1 { u8 key[IETF_MPA_KEY_SIZE]; u8 flags; @@ -397,7 +389,7 @@ int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int irdma_create_listen(struct iw_cm_id *cm_id, int backlog); int irdma_destroy_listen(struct iw_cm_id *cm_id); -int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, u8 *mac); +int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, const u8 *mac); void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, struct irdma_cm_info *nfo, bool disconnect_all); @@ -406,7 +398,7 @@ int irdma_cm_stop(struct irdma_device *dev); bool irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr); bool irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr); int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, bool ipv4, - u8 *mac_addr, u32 action); + const u8 *mac_addr, u32 action); void irdma_if_notify(struct irdma_device *iwdev, struct net_device *netdev, u32 *ipaddr, bool ipv4, bool ifup); bool irdma_port_in_use(struct irdma_cm_core *cm_core, u16 port); diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index f1e5515256e0..7264f8c2f7d5 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -1420,44 +1420,6 @@ void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size, } /** - * irdma_sc_send_lsmm_nostag - for privilege qp - * @qp: sc qp struct - * @lsmm_buf: buffer with lsmm message - * @size: size of lsmm buffer - */ -void irdma_sc_send_lsmm_nostag(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size) -{ - __le64 *wqe; - u64 hdr; - struct irdma_qp_uk *qp_uk; - - qp_uk = &qp->qp_uk; - wqe = qp_uk->sq_base->elem; - - set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf); - - if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) - set_64bit_val(wqe, 8, - FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, size)); - else - set_64bit_val(wqe, 8, - FIELD_PREP(IRDMAQPSQ_FRAG_LEN, size) | - FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity)); - set_64bit_val(wqe, 16, 0); - - hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_RDMA_SEND) | - FIELD_PREP(IRDMAQPSQ_STREAMMODE, 1) | - FIELD_PREP(IRDMAQPSQ_WAITFORRCVPDU, 1) | - FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity); - dma_wmb(); /* make sure WQE is written before valid bit is set */ - - set_64bit_val(wqe, 24, hdr); - - print_hex_dump_debug("WQE: SEND_LSMM_NOSTAG WQE", DUMP_PREFIX_OFFSET, - 16, 8, wqe, IRDMA_QP_WQE_MIN_SIZE, false); -} - -/** * irdma_sc_send_rtt - send last read0 or write0 * @qp: sc qp struct * @read: Do read0 or write0 @@ -2501,7 +2463,6 @@ static inline void irdma_sc_cq_ack(struct irdma_sc_cq *cq) enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info) { - enum irdma_status_code ret_code; u32 pble_obj_cnt; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; @@ -2513,9 +2474,7 @@ enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq, cq->ceq_id = info->ceq_id; info->cq_uk_init_info.cqe_alloc_db = cq->dev->cq_arm_db; info->cq_uk_init_info.cq_ack_db = cq->dev->cq_ack_db; - ret_code = irdma_uk_cq_init(&cq->cq_uk, &info->cq_uk_init_info); - if (ret_code) - return ret_code; + irdma_uk_cq_init(&cq->cq_uk, &info->cq_uk_init_info); cq->virtual_map = info->virtual_map; cq->pbl_chunk_size = info->pbl_chunk_size; diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 7de525a5ccf8..4108dcabece2 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1057,7 +1057,7 @@ static enum irdma_status_code irdma_alloc_set_mac(struct irdma_device *iwdev) &iwdev->mac_ip_table_idx); if (!status) { status = irdma_add_local_mac_entry(iwdev->rf, - (u8 *)iwdev->netdev->dev_addr, + (const u8 *)iwdev->netdev->dev_addr, (u8)iwdev->mac_ip_table_idx); if (status) irdma_del_local_mac_entry(iwdev->rf, @@ -2191,7 +2191,7 @@ void irdma_del_local_mac_entry(struct irdma_pci_f *rf, u16 idx) * @mac_addr: pointer to mac address * @idx: the index of the mac ip address to add */ -int irdma_add_local_mac_entry(struct irdma_pci_f *rf, u8 *mac_addr, u16 idx) +int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx) { struct irdma_local_mac_entry_info *info; struct irdma_cqp *iwcqp = &rf->cqp; @@ -2362,7 +2362,8 @@ void irdma_del_apbvt(struct irdma_device *iwdev, * @ipv4: flag inicating IPv4 * @action: add, delete or modify */ -void irdma_manage_arp_cache(struct irdma_pci_f *rf, unsigned char *mac_addr, +void irdma_manage_arp_cache(struct irdma_pci_f *rf, + const unsigned char *mac_addr, u32 *ip_addr, bool ipv4, u32 action) { struct irdma_add_arp_cache_entry_info *info; diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index b678fe712447..91a497139ba3 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -467,7 +467,8 @@ void irdma_qp_rem_ref(struct ib_qp *ibqp); void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp); struct ib_qp *irdma_get_qp(struct ib_device *ibdev, int qpn); void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask); -void irdma_manage_arp_cache(struct irdma_pci_f *rf, unsigned char *mac_addr, +void irdma_manage_arp_cache(struct irdma_pci_f *rf, + const unsigned char *mac_addr, u32 *ip_addr, bool ipv4, u32 action); struct irdma_apbvt_entry *irdma_add_apbvt(struct irdma_device *iwdev, u16 port); void irdma_del_apbvt(struct irdma_device *iwdev, @@ -479,7 +480,7 @@ void irdma_free_cqp_request(struct irdma_cqp *cqp, void irdma_put_cqp_request(struct irdma_cqp *cqp, struct irdma_cqp_request *cqp_request); int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx); -int irdma_add_local_mac_entry(struct irdma_pci_f *rf, u8 *mac_addr, u16 idx); +int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx); void irdma_del_local_mac_entry(struct irdma_pci_f *rf, u16 idx); u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf); diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h index b2ab52335ca6..63d8bb3a6903 100644 --- a/drivers/infiniband/hw/irdma/osdep.h +++ b/drivers/infiniband/hw/irdma/osdep.h @@ -37,7 +37,6 @@ struct irdma_hw; struct irdma_pci_f; struct ib_device *to_ibdev(struct irdma_sc_dev *dev); -u8 __iomem *irdma_get_hw_addr(void *dev); void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev); bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev); diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h index 78f598fdbccf..a17c0ffb0cc8 100644 --- a/drivers/infiniband/hw/irdma/protos.h +++ b/drivers/infiniband/hw/irdma/protos.h @@ -37,8 +37,6 @@ void irdma_hw_stats_read_all(struct irdma_vsi_pestat *stats, enum irdma_status_code irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, struct irdma_ws_node_info *node_info); -enum irdma_status_code irdma_cqp_up_map_cmd(struct irdma_sc_dev *dev, u8 cmd, - struct irdma_up_info *map_info); enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq, u8 op); enum irdma_status_code irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, diff --git a/drivers/infiniband/hw/irdma/trace_cm.h b/drivers/infiniband/hw/irdma/trace_cm.h index bcf10ec427d6..f633fb343328 100644 --- a/drivers/infiniband/hw/irdma/trace_cm.h +++ b/drivers/infiniband/hw/irdma/trace_cm.h @@ -144,7 +144,7 @@ DEFINE_EVENT(tos_template, irdma_dcb_tos, DECLARE_EVENT_CLASS(qhash_template, TP_PROTO(struct irdma_device *iwdev, struct irdma_cm_listener *listener, - char *dev_addr), + const char *dev_addr), TP_ARGS(iwdev, listener, dev_addr), TP_STRUCT__entry(__field(struct irdma_device *, iwdev) __field(u16, lport) @@ -173,12 +173,14 @@ DECLARE_EVENT_CLASS(qhash_template, DEFINE_EVENT(qhash_template, irdma_add_mqh_6, TP_PROTO(struct irdma_device *iwdev, - struct irdma_cm_listener *listener, char *dev_addr), + struct irdma_cm_listener *listener, + const char *dev_addr), TP_ARGS(iwdev, listener, dev_addr)); DEFINE_EVENT(qhash_template, irdma_add_mqh_4, TP_PROTO(struct irdma_device *iwdev, - struct irdma_cm_listener *listener, char *dev_addr), + struct irdma_cm_listener *listener, + const char *dev_addr), TP_ARGS(iwdev, listener, dev_addr)); TRACE_EVENT(irdma_addr_resolve, diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 874bc25a938b..9483bb3e10ea 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -852,7 +852,6 @@ struct irdma_roce_offload_info { u16 err_rq_idx; u32 qkey; u32 dest_qp; - u32 local_qp; u8 roce_tver; u8 ack_credits; u8 err_rq_idx_valid; @@ -1256,7 +1255,7 @@ enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp, u64 scratch, bool post_sq); void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size, irdma_stag stag); -void irdma_sc_send_lsmm_nostag(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size); + void irdma_sc_send_rtt(struct irdma_sc_qp *qp, bool read); void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx, struct irdma_qp_host_ctx_info *info); diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 9b544a3b1288..57a9444e9ea7 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -13,16 +13,16 @@ * @sge: sge length and stag * @valid: The wqe valid */ -static void irdma_set_fragment(__le64 *wqe, u32 offset, struct irdma_sge *sge, +static void irdma_set_fragment(__le64 *wqe, u32 offset, struct ib_sge *sge, u8 valid) { if (sge) { set_64bit_val(wqe, offset, - FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->tag_off)); + FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->addr)); set_64bit_val(wqe, offset + 8, FIELD_PREP(IRDMAQPSQ_VALID, valid) | - FIELD_PREP(IRDMAQPSQ_FRAG_LEN, sge->len) | - FIELD_PREP(IRDMAQPSQ_FRAG_STAG, sge->stag)); + FIELD_PREP(IRDMAQPSQ_FRAG_LEN, sge->length) | + FIELD_PREP(IRDMAQPSQ_FRAG_STAG, sge->lkey)); } else { set_64bit_val(wqe, offset, 0); set_64bit_val(wqe, offset + 8, @@ -38,14 +38,14 @@ static void irdma_set_fragment(__le64 *wqe, u32 offset, struct irdma_sge *sge, * @valid: wqe valid flag */ static void irdma_set_fragment_gen_1(__le64 *wqe, u32 offset, - struct irdma_sge *sge, u8 valid) + struct ib_sge *sge, u8 valid) { if (sge) { set_64bit_val(wqe, offset, - FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->tag_off)); + FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->addr)); set_64bit_val(wqe, offset + 8, - FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, sge->len) | - FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, sge->stag)); + FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, sge->length) | + FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, sge->lkey)); } else { set_64bit_val(wqe, offset, 0); set_64bit_val(wqe, offset + 8, 0); @@ -289,7 +289,7 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, return IRDMA_ERR_INVALID_FRAG_COUNT; for (i = 0; i < op_info->num_lo_sges; i++) - total_size += op_info->lo_sg_list[i].len; + total_size += op_info->lo_sg_list[i].length; read_fence |= info->read_fence; @@ -310,7 +310,7 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, irdma_clr_wqes(qp, wqe_idx); set_64bit_val(wqe, 16, - FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); + FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); if (info->imm_data_valid) { set_64bit_val(wqe, 0, @@ -339,7 +339,7 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, ++addl_frag_cnt; } - hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt) | @@ -391,7 +391,7 @@ enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, return IRDMA_ERR_INVALID_FRAG_COUNT; for (i = 0; i < op_info->num_lo_sges; i++) - total_size += op_info->lo_sg_list[i].len; + total_size += op_info->lo_sg_list[i].length; ret_code = irdma_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta); if (ret_code) @@ -426,8 +426,8 @@ enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, ++addl_frag_cnt; } set_64bit_val(wqe, 16, - FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); - hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | + FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | FIELD_PREP(IRDMAQPSQ_OPCODE, @@ -477,7 +477,7 @@ enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp, return IRDMA_ERR_INVALID_FRAG_COUNT; for (i = 0; i < op_info->num_sges; i++) - total_size += op_info->sg_list[i].len; + total_size += op_info->sg_list[i].length; if (info->imm_data_valid) frag_cnt = op_info->num_sges + 1; @@ -705,9 +705,9 @@ irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *in read_fence |= info->read_fence; set_64bit_val(wqe, 16, - FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.tag_off)); + FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr)); - hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.stag) | + hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) | @@ -826,7 +826,7 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, u64 hdr; u32 wqe_idx; bool local_fence = false; - struct irdma_sge sge = {}; + struct ib_sge sge = {}; info->push_wqe = qp->push_db ? true : false; op_info = &info->op.inv_local_stag; @@ -839,7 +839,7 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, irdma_clr_wqes(qp, wqe_idx); - sge.stag = op_info->target_stag; + sge.lkey = op_info->target_stag; qp->wqe_ops.iw_set_fragment(wqe, 0, &sge, 0); set_64bit_val(wqe, 16, 0); @@ -867,63 +867,6 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, } /** - * irdma_uk_mw_bind - bind Memory Window - * @qp: hw qp ptr - * @info: post sq information - * @post_sq: flag to post sq - */ -enum irdma_status_code irdma_uk_mw_bind(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq) -{ - __le64 *wqe; - struct irdma_bind_window *op_info; - u64 hdr; - u32 wqe_idx; - bool local_fence = false; - - info->push_wqe = qp->push_db ? true : false; - op_info = &info->op.bind_window; - local_fence |= info->local_fence; - - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, - 0, info); - if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; - - irdma_clr_wqes(qp, wqe_idx); - - qp->wqe_ops.iw_set_mw_bind_wqe(wqe, op_info); - - hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_BIND_MW) | - FIELD_PREP(IRDMAQPSQ_STAGRIGHTS, - ((op_info->ena_reads << 2) | (op_info->ena_writes << 3))) | - FIELD_PREP(IRDMAQPSQ_VABASEDTO, - (op_info->addressing_type == IRDMA_ADDR_TYPE_VA_BASED ? 1 : 0)) | - FIELD_PREP(IRDMAQPSQ_MEMWINDOWTYPE, - (op_info->mem_window_type_1 ? 1 : 0)) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | - FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | - FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | - FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | - FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity); - - dma_wmb(); /* make sure WQE is populated before valid bit is set */ - - set_64bit_val(wqe, 24, hdr); - - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, IRDMA_QP_WQE_MIN_QUANTA, wqe_idx, - post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } - - return 0; -} - -/** * irdma_uk_post_receive - post receive wqe * @qp: hw qp ptr * @info: post rq information @@ -1503,8 +1446,8 @@ enum irdma_status_code irdma_uk_qp_init(struct irdma_qp_uk *qp, * @cq: hw cq * @info: hw cq initialization info */ -enum irdma_status_code irdma_uk_cq_init(struct irdma_cq_uk *cq, - struct irdma_cq_uk_init_info *info) +void irdma_uk_cq_init(struct irdma_cq_uk *cq, + struct irdma_cq_uk_init_info *info) { cq->cq_base = info->cq_base; cq->cq_id = info->cq_id; @@ -1515,8 +1458,6 @@ enum irdma_status_code irdma_uk_cq_init(struct irdma_cq_uk *cq, cq->avoid_mem_cflct = info->avoid_mem_cflct; IRDMA_RING_INIT(cq->cq_ring, cq->cq_size); cq->polarity = 1; - - return 0; } /** diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index 3dcbb1fbf2c6..3c811fb88404 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -16,7 +16,6 @@ #define irdma_access_privileges u32 #define irdma_physical_fragment u64 #define irdma_address_list u64 * -#define irdma_sgl struct irdma_sge * #define IRDMA_MAX_MR_SIZE 0x200000000000ULL @@ -151,12 +150,6 @@ struct irdma_cq_uk; struct irdma_qp_uk_init_info; struct irdma_cq_uk_init_info; -struct irdma_sge { - irdma_tagged_offset tag_off; - u32 len; - irdma_stag stag; -}; - struct irdma_ring { u32 head; u32 tail; @@ -172,7 +165,7 @@ struct irdma_extended_cqe { }; struct irdma_post_send { - irdma_sgl sg_list; + struct ib_sge *sg_list; u32 num_sges; u32 qkey; u32 dest_qp; @@ -189,26 +182,26 @@ struct irdma_post_inline_send { struct irdma_post_rq_info { u64 wr_id; - irdma_sgl sg_list; + struct ib_sge *sg_list; u32 num_sges; }; struct irdma_rdma_write { - irdma_sgl lo_sg_list; + struct ib_sge *lo_sg_list; u32 num_lo_sges; - struct irdma_sge rem_addr; + struct ib_sge rem_addr; }; struct irdma_inline_rdma_write { void *data; u32 len; - struct irdma_sge rem_addr; + struct ib_sge rem_addr; }; struct irdma_rdma_read { - irdma_sgl lo_sg_list; + struct ib_sge *lo_sg_list; u32 num_lo_sges; - struct irdma_sge rem_addr; + struct ib_sge rem_addr; }; struct irdma_bind_window { @@ -283,9 +276,7 @@ enum irdma_status_code irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq); -enum irdma_status_code irdma_uk_mw_bind(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq); + enum irdma_status_code irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq); enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, @@ -306,7 +297,7 @@ enum irdma_status_code irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, struct irdma_wqe_uk_ops { void (*iw_copy_inline_data)(u8 *dest, u8 *src, u32 len, u8 polarity); u16 (*iw_inline_data_size_to_quanta)(u32 data_size); - void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct irdma_sge *sge, + void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct ib_sge *sge, u8 valid); void (*iw_set_mw_bind_wqe)(__le64 *wqe, struct irdma_bind_window *op_info); @@ -318,8 +309,8 @@ void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, enum irdma_cmpl_notify cq_notify); void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int size); void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *qp, u16 cnt); -enum irdma_status_code irdma_uk_cq_init(struct irdma_cq_uk *cq, - struct irdma_cq_uk_init_info *info); +void irdma_uk_cq_init(struct irdma_cq_uk *cq, + struct irdma_cq_uk_init_info *info); enum irdma_status_code irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info); struct irdma_sq_uk_wr_trk_info { @@ -369,7 +360,6 @@ struct irdma_qp_uk { bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */ bool destroy_pending:1; /* Indicates the QP is being destroyed */ void *back_qp; - spinlock_t *lock; u8 dbg_rq_flushed; u8 sq_flush_seen; u8 rq_flush_seen; diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index ac91ea5296db..8b42c43fc14f 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -11,7 +11,7 @@ * @action: modify, delete or add */ int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, bool ipv4, - u8 *mac_addr, u32 action) + const u8 *mac_addr, u32 action) { unsigned long flags; int arp_index; @@ -77,7 +77,7 @@ int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, bool ipv4, * @ipv4: IPv4 flag * @mac: MAC address */ -int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, u8 *mac) +int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, const u8 *mac) { int arpidx; @@ -768,17 +768,6 @@ struct ib_qp *irdma_get_qp(struct ib_device *device, int qpn) } /** - * irdma_get_hw_addr - return hw addr - * @par: points to shared dev - */ -u8 __iomem *irdma_get_hw_addr(void *par) -{ - struct irdma_sc_dev *dev = par; - - return dev->hw->hw_addr; -} - -/** * irdma_remove_cqp_head - return head entry and remove * @dev: device */ @@ -2060,40 +2049,6 @@ exit: } /** - * irdma_cqp_up_map_cmd - Set the up-up mapping - * @dev: pointer to device structure - * @cmd: map command - * @map_info: pointer to up map info - */ -enum irdma_status_code irdma_cqp_up_map_cmd(struct irdma_sc_dev *dev, u8 cmd, - struct irdma_up_info *map_info) -{ - struct irdma_pci_f *rf = dev_to_rf(dev); - struct irdma_cqp *iwcqp = &rf->cqp; - struct irdma_sc_cqp *cqp = &iwcqp->sc_cqp; - struct irdma_cqp_request *cqp_request; - struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; - - cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, false); - if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; - - cqp_info = &cqp_request->info; - memset(cqp_info, 0, sizeof(*cqp_info)); - cqp_info->cqp_cmd = cmd; - cqp_info->post_sq = 1; - cqp_info->in.u.up_map.info = *map_info; - cqp_info->in.u.up_map.cqp = cqp; - cqp_info->in.u.up_map.scratch = (uintptr_t)cqp_request; - - status = irdma_handle_cqp_op(rf, cqp_request); - irdma_put_cqp_request(&rf->cqp, cqp_request); - - return status; -} - -/** * irdma_ah_cqp_op - perform an AH cqp operation * @rf: RDMA PCI function * @sc_ah: address handle diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 102dc9342f2a..0f66e809d418 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -833,7 +833,6 @@ static int irdma_create_qp(struct ib_qp *ibqp, qp = &iwqp->sc_qp; qp->qp_uk.back_qp = iwqp; - qp->qp_uk.lock = &iwqp->lock; qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX; iwqp->iwdev = iwdev; @@ -1198,7 +1197,6 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, av->attrs = attr->ah_attr; rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid); rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid); - roce_info->local_qp = ibqp->qp_num; if (av->sgid_addr.saddr.sa_family == AF_INET6) { __be32 *daddr = av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32; @@ -3041,24 +3039,6 @@ done: } /** - * irdma_copy_sg_list - copy sg list for qp - * @sg_list: copied into sg_list - * @sgl: copy from sgl - * @num_sges: count of sg entries - */ -static void irdma_copy_sg_list(struct irdma_sge *sg_list, struct ib_sge *sgl, - int num_sges) -{ - unsigned int i; - - for (i = 0; (i < num_sges) && (i < IRDMA_MAX_WQ_FRAGMENT_COUNT); i++) { - sg_list[i].tag_off = sgl[i].addr; - sg_list[i].len = sgl[i].length; - sg_list[i].stag = sgl[i].lkey; - } -} - -/** * irdma_post_send - kernel application wr * @ibqp: qp ptr for wr * @ib_wr: work request ptr @@ -3134,8 +3114,7 @@ static int irdma_post_send(struct ib_qp *ibqp, ret = irdma_uk_inline_send(ukqp, &info, false); } else { info.op.send.num_sges = ib_wr->num_sge; - info.op.send.sg_list = (struct irdma_sge *) - ib_wr->sg_list; + info.op.send.sg_list = ib_wr->sg_list; if (iwqp->ibqp.qp_type == IB_QPT_UD || iwqp->ibqp.qp_type == IB_QPT_GSI) { ah = to_iwah(ud_wr(ib_wr)->ah); @@ -3170,15 +3149,18 @@ static int irdma_post_send(struct ib_qp *ibqp, if (ib_wr->send_flags & IB_SEND_INLINE) { info.op.inline_rdma_write.data = (void *)(uintptr_t)ib_wr->sg_list[0].addr; - info.op.inline_rdma_write.len = ib_wr->sg_list[0].length; - info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; - info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey; + info.op.inline_rdma_write.len = + ib_wr->sg_list[0].length; + info.op.inline_rdma_write.rem_addr.addr = + rdma_wr(ib_wr)->remote_addr; + info.op.inline_rdma_write.rem_addr.lkey = + rdma_wr(ib_wr)->rkey; ret = irdma_uk_inline_rdma_write(ukqp, &info, false); } else { info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_write.num_lo_sges = ib_wr->num_sge; - info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; - info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey; + info.op.rdma_write.rem_addr.addr = rdma_wr(ib_wr)->remote_addr; + info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey; ret = irdma_uk_rdma_write(ukqp, &info, false); } @@ -3199,8 +3181,8 @@ static int irdma_post_send(struct ib_qp *ibqp, break; } info.op_type = IRDMA_OP_TYPE_RDMA_READ; - info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; - info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey; + info.op.rdma_read.rem_addr.addr = rdma_wr(ib_wr)->remote_addr; + info.op.rdma_read.rem_addr.lkey = rdma_wr(ib_wr)->rkey; info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_read.num_lo_sges = ib_wr->num_sge; @@ -3287,7 +3269,6 @@ static int irdma_post_recv(struct ib_qp *ibqp, struct irdma_qp *iwqp; struct irdma_qp_uk *ukqp; struct irdma_post_rq_info post_recv = {}; - struct irdma_sge sg_list[IRDMA_MAX_WQ_FRAGMENT_COUNT]; enum irdma_status_code ret = 0; unsigned long flags; int err = 0; @@ -3302,8 +3283,7 @@ static int irdma_post_recv(struct ib_qp *ibqp, while (ib_wr) { post_recv.num_sges = ib_wr->num_sge; post_recv.wr_id = ib_wr->wr_id; - irdma_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge); - post_recv.sg_list = sg_list; + post_recv.sg_list = ib_wr->sg_list; ret = irdma_uk_post_receive(ukqp, &post_recv); if (ret) { ibdev_dbg(&iwqp->iwdev->ibdev, @@ -3651,89 +3631,89 @@ static int irdma_iw_port_immutable(struct ib_device *ibdev, u32 port_num, return 0; } -static const char *const irdma_hw_stat_names[] = { +static const struct rdma_stat_desc irdma_hw_stat_descs[] = { /* 32bit names */ - [IRDMA_HW_STAT_INDEX_RXVLANERR] = "rxVlanErrors", - [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards", - [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts", - [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes", - [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards", - [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts", - [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes", - [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = "tcpRetransSegs", - [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = "tcpInOptErrors", - [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = "tcpInProtoErrors", - [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = "cnpHandled", - [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = "cnpIgnored", - [IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = "cnpSent", + [IRDMA_HW_STAT_INDEX_RXVLANERR].name = "rxVlanErrors", + [IRDMA_HW_STAT_INDEX_IP4RXDISCARD].name = "ip4InDiscards", + [IRDMA_HW_STAT_INDEX_IP4RXTRUNC].name = "ip4InTruncatedPkts", + [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE].name = "ip4OutNoRoutes", + [IRDMA_HW_STAT_INDEX_IP6RXDISCARD].name = "ip6InDiscards", + [IRDMA_HW_STAT_INDEX_IP6RXTRUNC].name = "ip6InTruncatedPkts", + [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE].name = "ip6OutNoRoutes", + [IRDMA_HW_STAT_INDEX_TCPRTXSEG].name = "tcpRetransSegs", + [IRDMA_HW_STAT_INDEX_TCPRXOPTERR].name = "tcpInOptErrors", + [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR].name = "tcpInProtoErrors", + [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED].name = "cnpHandled", + [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED].name = "cnpIgnored", + [IRDMA_HW_STAT_INDEX_TXNPCNPSENT].name = "cnpSent", /* 64bit names */ - [IRDMA_HW_STAT_INDEX_IP4RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4InOctets", - [IRDMA_HW_STAT_INDEX_IP4RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4InPkts", - [IRDMA_HW_STAT_INDEX_IP4RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4InReasmRqd", - [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4InMcastOctets", - [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4InMcastPkts", - [IRDMA_HW_STAT_INDEX_IP4TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4OutOctets", - [IRDMA_HW_STAT_INDEX_IP4TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4OutPkts", - [IRDMA_HW_STAT_INDEX_IP4TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4OutSegRqd", - [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4OutMcastOctets", - [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip4OutMcastPkts", - [IRDMA_HW_STAT_INDEX_IP6RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6RXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6InOctets", - [IRDMA_HW_STAT_INDEX_IP6RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6RXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6InPkts", - [IRDMA_HW_STAT_INDEX_IP6RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6RXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6InReasmRqd", - [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6InMcastOctets", - [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6InMcastPkts", - [IRDMA_HW_STAT_INDEX_IP6TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6TXOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6OutOctets", - [IRDMA_HW_STAT_INDEX_IP6TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6TXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6OutPkts", - [IRDMA_HW_STAT_INDEX_IP6TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6TXFRAGS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6OutSegRqd", - [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6OutMcastOctets", - [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "ip6OutMcastPkts", - [IRDMA_HW_STAT_INDEX_TCPRXSEGS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_TCPRXSEGS + IRDMA_HW_STAT_INDEX_MAX_32].name = "tcpInSegs", - [IRDMA_HW_STAT_INDEX_TCPTXSEG + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_TCPTXSEG + IRDMA_HW_STAT_INDEX_MAX_32].name = "tcpOutSegs", - [IRDMA_HW_STAT_INDEX_RDMARXRDS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMARXRDS + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwInRdmaReads", - [IRDMA_HW_STAT_INDEX_RDMARXSNDS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMARXSNDS + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwInRdmaSends", - [IRDMA_HW_STAT_INDEX_RDMARXWRS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMARXWRS + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwInRdmaWrites", - [IRDMA_HW_STAT_INDEX_RDMATXRDS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMATXRDS + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwOutRdmaReads", - [IRDMA_HW_STAT_INDEX_RDMATXSNDS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMATXSNDS + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwOutRdmaSends", - [IRDMA_HW_STAT_INDEX_RDMATXWRS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMATXWRS + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwOutRdmaWrites", - [IRDMA_HW_STAT_INDEX_RDMAVBND + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMAVBND + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwRdmaBnd", - [IRDMA_HW_STAT_INDEX_RDMAVINV + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_RDMAVINV + IRDMA_HW_STAT_INDEX_MAX_32].name = "iwRdmaInv", - [IRDMA_HW_STAT_INDEX_UDPRXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_UDPRXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "RxUDP", - [IRDMA_HW_STAT_INDEX_UDPTXPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = + [IRDMA_HW_STAT_INDEX_UDPTXPKTS + IRDMA_HW_STAT_INDEX_MAX_32].name = "TxUDP", - [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS + IRDMA_HW_STAT_INDEX_MAX_32] = - "RxECNMrkd", + [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS + IRDMA_HW_STAT_INDEX_MAX_32] + .name = "RxECNMrkd", }; static void irdma_get_dev_fw_str(struct ib_device *dev, char *str) @@ -3757,10 +3737,10 @@ static struct rdma_hw_stats *irdma_alloc_hw_port_stats(struct ib_device *ibdev, IRDMA_HW_STAT_INDEX_MAX_64; unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN; - BUILD_BUG_ON(ARRAY_SIZE(irdma_hw_stat_names) != + BUILD_BUG_ON(ARRAY_SIZE(irdma_hw_stat_descs) != (IRDMA_HW_STAT_INDEX_MAX_32 + IRDMA_HW_STAT_INDEX_MAX_64)); - return rdma_alloc_hw_stats_struct(irdma_hw_stat_names, num_counters, + return rdma_alloc_hw_stats_struct(irdma_hw_stat_descs, num_counters, lifespan); } @@ -4330,7 +4310,7 @@ static enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev, static __be64 irdma_mac_to_guid(struct net_device *ndev) { - unsigned char *mac = ndev->dev_addr; + const unsigned char *mac = ndev->dev_addr; __be64 guid; unsigned char *dst = (unsigned char *)&guid; diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 571d9c542024..e2e1f5daddc4 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -822,10 +822,8 @@ void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev) } spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags); } - for (i = 0 ; i < dev->num_ports; i++) { - flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); + for (i = 0 ; i < dev->num_ports; i++) destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); - } ib_sa_unregister_client(dev->sriov.alias_guid.sa_client); kfree(dev->sriov.alias_guid.sa_client); } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f3fa2fe6a88a..ceca05982f61 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2105,10 +2105,10 @@ mlx4_ib_alloc_hw_device_stats(struct ib_device *ibdev) struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_diag_counters *diag = dev->diag_counters; - if (!diag[0].name) + if (!diag[0].descs) return NULL; - return rdma_alloc_hw_stats_struct(diag[0].name, diag[0].num_counters, + return rdma_alloc_hw_stats_struct(diag[0].descs, diag[0].num_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -2118,10 +2118,10 @@ mlx4_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_diag_counters *diag = dev->diag_counters; - if (!diag[1].name) + if (!diag[1].descs) return NULL; - return rdma_alloc_hw_stats_struct(diag[1].name, diag[1].num_counters, + return rdma_alloc_hw_stats_struct(diag[1].descs, diag[1].num_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } @@ -2151,10 +2151,8 @@ static int mlx4_ib_get_hw_stats(struct ib_device *ibdev, } static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev, - const char ***name, - u32 **offset, - u32 *num, - bool port) + struct rdma_stat_desc **pdescs, + u32 **offset, u32 *num, bool port) { u32 num_counters; @@ -2166,46 +2164,46 @@ static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev, if (!port) num_counters += ARRAY_SIZE(diag_device_only); - *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL); - if (!*name) + *pdescs = kcalloc(num_counters, sizeof(struct rdma_stat_desc), + GFP_KERNEL); + if (!*pdescs) return -ENOMEM; *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL); if (!*offset) - goto err_name; + goto err; *num = num_counters; return 0; -err_name: - kfree(*name); +err: + kfree(*pdescs); return -ENOMEM; } static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev, - const char **name, - u32 *offset, - bool port) + struct rdma_stat_desc *descs, + u32 *offset, bool port) { int i; int j; for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) { - name[i] = diag_basic[i].name; + descs[i].name = diag_basic[i].name; offset[i] = diag_basic[i].offset; } if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) { for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) { - name[j] = diag_ext[i].name; + descs[j].name = diag_ext[i].name; offset[j] = diag_ext[i].offset; } } if (!port) { for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) { - name[j] = diag_device_only[i].name; + descs[j].name = diag_device_only[i].name; offset[j] = diag_device_only[i].offset; } } @@ -2233,13 +2231,13 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) if (i && !per_port) continue; - ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name, + ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].descs, &diag[i].offset, &diag[i].num_counters, i); if (ret) goto err_alloc; - mlx4_ib_fill_diag_counters(ibdev, diag[i].name, + mlx4_ib_fill_diag_counters(ibdev, diag[i].descs, diag[i].offset, i); } @@ -2249,7 +2247,7 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) err_alloc: if (i) { - kfree(diag[i - 1].name); + kfree(diag[i - 1].descs); kfree(diag[i - 1].offset); } @@ -2262,7 +2260,7 @@ static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev) for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { kfree(ibdev->diag_counters[i].offset); - kfree(ibdev->diag_counters[i].name); + kfree(ibdev->diag_counters[i].descs); } } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index c60f6e9ac640..d84023b4b1b8 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -601,7 +601,7 @@ struct mlx4_ib_counters { #define MLX4_DIAG_COUNTERS_TYPES 2 struct mlx4_ib_diag_counters { - const char **name; + struct rdma_stat_desc *descs; u32 *offset; u32 num_counters; }; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index aea4182f33a4..b17d6ebc5b70 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1099,8 +1099,10 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) qp->flags |= MLX4_IB_QP_NETIF; - else + else { + err = -EINVAL; goto err; + } } err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 224ba36f2946..945758f39523 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -12,6 +12,7 @@ struct mlx5_ib_counter { const char *name; size_t offset; + u32 type; }; #define INIT_Q_COUNTER(_name) \ @@ -75,6 +76,21 @@ static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), }; +#define INIT_OP_COUNTER(_name, _type) \ + { .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type} + +static const struct mlx5_ib_counter basic_op_cnts[] = { + INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS), +}; + +static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = { + INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS), +}; + +static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = { + INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS), +}; + static int mlx5_ib_read_counters(struct ib_counters *counters, struct ib_counters_read_attr *read_attr, struct uverbs_attr_bundle *attrs) @@ -161,17 +177,34 @@ u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num) return cnts->set_id; } +static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts) +{ + struct rdma_hw_stats *stats; + u32 num_hw_counters; + int i; + + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + stats = rdma_alloc_hw_stats_struct(cnts->descs, + num_hw_counters + + cnts->num_op_counters, + RDMA_HW_STATS_DEFAULT_LIFESPAN); + if (!stats) + return NULL; + + for (i = 0; i < cnts->num_op_counters; i++) + set_bit(num_hw_counters + i, stats->is_disabled); + + return stats; +} + static struct rdma_hw_stats * mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev) { struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = &dev->port[0].cnts; - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); + return do_alloc_stats(cnts); } static struct rdma_hw_stats * @@ -180,11 +213,7 @@ mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts; - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); + return do_alloc_stats(cnts); } static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, @@ -241,9 +270,9 @@ free: return ret; } -static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, - struct rdma_hw_stats *stats, - u32 port_num, int index) +static int do_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num, int index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); @@ -295,6 +324,88 @@ done: return num_counters; } +static int do_get_op_stat(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num, int index) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + const struct mlx5_ib_op_fc *opfcs; + u64 packets = 0, bytes; + u32 type; + int ret; + + cnts = get_counters(dev, port_num - 1); + opfcs = cnts->opfcs; + type = *(u32 *)cnts->descs[index].priv; + if (type >= MLX5_IB_OPCOUNTER_MAX) + return -EINVAL; + + if (!opfcs[type].fc) + goto out; + + ret = mlx5_fc_query(dev->mdev, opfcs[type].fc, + &packets, &bytes); + if (ret) + return ret; + +out: + stats->value[index] = packets; + return index; +} + +static int do_get_op_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + int index, ret, num_hw_counters; + + cnts = get_counters(dev, port_num - 1); + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + for (index = num_hw_counters; + index < (num_hw_counters + cnts->num_op_counters); index++) { + ret = do_get_op_stat(ibdev, stats, port_num, index); + if (ret != index) + return ret; + } + + return cnts->num_op_counters; +} + +static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u32 port_num, int index) +{ + int num_counters, num_hw_counters, num_op_counters; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + const struct mlx5_ib_counters *cnts; + + cnts = get_counters(dev, port_num - 1); + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + num_counters = num_hw_counters + cnts->num_op_counters; + + if (index < 0 || index > num_counters) + return -EINVAL; + else if (index > 0 && index < num_hw_counters) + return do_get_hw_stats(ibdev, stats, port_num, index); + else if (index >= num_hw_counters && index < num_counters) + return do_get_op_stat(ibdev, stats, port_num, index); + + num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index); + if (num_hw_counters < 0) + return num_hw_counters; + + num_op_counters = do_get_op_stats(ibdev, stats, port_num); + if (num_op_counters < 0) + return num_op_counters; + + return num_hw_counters + num_op_counters; +} + static struct rdma_hw_stats * mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) { @@ -302,11 +413,7 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port - 1); - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); + return do_alloc_stats(cnts); } static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) @@ -371,67 +478,89 @@ static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) return mlx5_ib_qp_set_counter(qp, NULL); } - static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, - const char **names, - size_t *offsets) + struct rdma_stat_desc *descs, size_t *offsets) { int i; int j = 0; for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { - names[j] = basic_q_cnts[i].name; + descs[j].name = basic_q_cnts[i].name; offsets[j] = basic_q_cnts[i].offset; } if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { - names[j] = out_of_seq_q_cnts[i].name; + descs[j].name = out_of_seq_q_cnts[i].name; offsets[j] = out_of_seq_q_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { - names[j] = retrans_q_cnts[i].name; + descs[j].name = retrans_q_cnts[i].name; offsets[j] = retrans_q_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { - names[j] = extended_err_cnts[i].name; + descs[j].name = extended_err_cnts[i].name; offsets[j] = extended_err_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { - names[j] = roce_accl_cnts[i].name; + descs[j].name = roce_accl_cnts[i].name; offsets[j] = roce_accl_cnts[i].offset; } } if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { - names[j] = cong_cnts[i].name; + descs[j].name = cong_cnts[i].name; offsets[j] = cong_cnts[i].offset; } } if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { - names[j] = ext_ppcnt_cnts[i].name; + descs[j].name = ext_ppcnt_cnts[i].name; offsets[j] = ext_ppcnt_cnts[i].offset; } } + + for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) { + descs[j].name = basic_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &basic_op_cnts[i].type; + } + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode)) { + for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) { + descs[j].name = rdmarx_cnp_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &rdmarx_cnp_op_cnts[i].type; + } + } + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode)) { + for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) { + descs[j].name = rdmatx_cnp_op_cnts[i].name; + descs[j].flags |= IB_STAT_FLAG_OPTIONAL; + descs[j].priv = &rdmatx_cnp_op_cnts[i].type; + } + } } static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, struct mlx5_ib_counters *cnts) { - u32 num_counters; + u32 num_counters, num_op_counters; num_counters = ARRAY_SIZE(basic_q_cnts); @@ -457,20 +586,34 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); num_counters += ARRAY_SIZE(ext_ppcnt_cnts); } - cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL); - if (!cnts->names) + + num_op_counters = ARRAY_SIZE(basic_op_cnts); + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode)) + num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts); + + if (MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode)) + num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts); + + cnts->num_op_counters = num_op_counters; + num_counters += num_op_counters; + cnts->descs = kcalloc(num_counters, + sizeof(struct rdma_stat_desc), GFP_KERNEL); + if (!cnts->descs) return -ENOMEM; cnts->offsets = kcalloc(num_counters, sizeof(*cnts->offsets), GFP_KERNEL); if (!cnts->offsets) - goto err_names; + goto err; return 0; -err_names: - kfree(cnts->names); - cnts->names = NULL; +err: + kfree(cnts->descs); + cnts->descs = NULL; return -ENOMEM; } @@ -478,7 +621,7 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; int num_cnt_ports; - int i; + int i, j; num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; @@ -491,8 +634,20 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) dev->port[i].cnts.set_id); mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); } - kfree(dev->port[i].cnts.names); + kfree(dev->port[i].cnts.descs); kfree(dev->port[i].cnts.offsets); + + for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) { + if (!dev->port[i].cnts.opfcs[j].fc) + continue; + + if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) + mlx5_ib_fs_remove_op_fc(dev, + &dev->port[i].cnts.opfcs[j], j); + mlx5_fc_destroy(dev->mdev, + dev->port[i].cnts.opfcs[j].fc); + dev->port[i].cnts.opfcs[j].fc = NULL; + } } } @@ -514,7 +669,7 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) if (err) goto err_alloc; - mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, + mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs, dev->port[i].cnts.offsets); MLX5_SET(alloc_q_counter_in, in, uid, @@ -672,6 +827,56 @@ void mlx5_ib_counters_clear_description(struct ib_counters *counters) mutex_unlock(&mcounters->mcntrs_mutex); } +static int mlx5_ib_modify_stat(struct ib_device *device, u32 port, + unsigned int index, bool enable) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_ib_counters *cnts; + struct mlx5_ib_op_fc *opfc; + u32 num_hw_counters, type; + int ret; + + cnts = &dev->port[port - 1].cnts; + num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; + if (index < num_hw_counters || + index >= (num_hw_counters + cnts->num_op_counters)) + return -EINVAL; + + if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) + return -EINVAL; + + type = *(u32 *)cnts->descs[index].priv; + if (type >= MLX5_IB_OPCOUNTER_MAX) + return -EINVAL; + + opfc = &cnts->opfcs[type]; + + if (enable) { + if (opfc->fc) + return -EEXIST; + + opfc->fc = mlx5_fc_create(dev->mdev, false); + if (IS_ERR(opfc->fc)) + return PTR_ERR(opfc->fc); + + ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type); + if (ret) { + mlx5_fc_destroy(dev->mdev, opfc->fc); + opfc->fc = NULL; + } + return ret; + } + + if (!opfc->fc) + return -EINVAL; + + mlx5_ib_fs_remove_op_fc(dev, opfc, type); + mlx5_fc_destroy(dev->mdev, opfc->fc); + opfc->fc = NULL; + return 0; +} + static const struct ib_device_ops hw_stats_ops = { .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats, .get_hw_stats = mlx5_ib_get_hw_stats, @@ -680,6 +885,8 @@ static const struct ib_device_ops hw_stats_ops = { .counter_dealloc = mlx5_ib_counter_dealloc, .counter_alloc_stats = mlx5_ib_counter_alloc_stats, .counter_update_stats = mlx5_ib_counter_update_stats, + .modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ? + mlx5_ib_modify_stat : NULL, }; static const struct ib_device_ops hw_switchdev_stats_ops = { diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 5fbc0a8454b9..b780185d9dc6 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -10,12 +10,14 @@ #include <rdma/uverbs_std_types.h> #include <rdma/mlx5_user_ioctl_cmds.h> #include <rdma/mlx5_user_ioctl_verbs.h> +#include <rdma/ib_hdrs.h> #include <rdma/ib_umem.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/fs_helpers.h> #include <linux/mlx5/accel.h> #include <linux/mlx5/eswitch.h> +#include <net/inet_ecn.h> #include "mlx5_ib.h" #include "counters.h" #include "devx.h" @@ -847,6 +849,191 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, return prio; } +enum { + RDMA_RX_ECN_OPCOUNTER_PRIO, + RDMA_RX_CNP_OPCOUNTER_PRIO, +}; + +enum { + RDMA_TX_CNP_OPCOUNTER_PRIO, +}; + +static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_flow_spec *spec) +{ + if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, + ft_field_support.source_vhca_port) || + !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, + ft_field_support.source_vhca_port)) + return -EOPNOTSUPP; + + MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria, + misc_parameters.source_vhca_port); + MLX5_SET(fte_match_param, &spec->match_value, + misc_parameters.source_vhca_port, port_num); + + return 0; +} + +static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_flow_spec *spec, int ipv) +{ + if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, + ft_field_support.outer_ip_version)) + return -EOPNOTSUPP; + + if (mlx5_core_mp_enabled(dev->mdev) && + set_vhca_port_spec(dev, port_num, spec)) + return -EOPNOTSUPP; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ip_ecn); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn, + INET_ECN_CE); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, + ipv); + + spec->match_criteria_enable = + get_match_criteria_enable(spec->match_criteria); + + return 0; +} + +static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_flow_spec *spec) +{ + if (mlx5_core_mp_enabled(dev->mdev) && + set_vhca_port_spec(dev, port_num, spec)) + return -EOPNOTSUPP; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters.bth_opcode); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode, + IB_BTH_OPCODE_CNP); + + spec->match_criteria_enable = + get_match_criteria_enable(spec->match_criteria); + + return 0; +} + +int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type) +{ + enum mlx5_flow_namespace_type fn_type; + int priority, i, err, spec_num; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_destination dst; + struct mlx5_flow_namespace *ns; + struct mlx5_ib_flow_prio *prio; + struct mlx5_flow_spec *spec; + + spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + switch (type) { + case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS: + if (set_ecn_ce_spec(dev, port_num, &spec[0], + MLX5_FS_IPV4_VERSION) || + set_ecn_ce_spec(dev, port_num, &spec[1], + MLX5_FS_IPV6_VERSION)) { + err = -EOPNOTSUPP; + goto free; + } + spec_num = 2; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_ECN_OPCOUNTER_PRIO; + break; + + case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS: + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_receive_rdma.bth_opcode) || + set_cnp_spec(dev, port_num, &spec[0])) { + err = -EOPNOTSUPP; + goto free; + } + spec_num = 1; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS; + priority = RDMA_RX_CNP_OPCOUNTER_PRIO; + break; + + case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS: + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + ft_field_support_2_nic_transmit_rdma.bth_opcode) || + set_cnp_spec(dev, port_num, &spec[0])) { + err = -EOPNOTSUPP; + goto free; + } + spec_num = 1; + fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS; + priority = RDMA_TX_CNP_OPCOUNTER_PRIO; + break; + + default: + err = -EOPNOTSUPP; + goto free; + } + + ns = mlx5_get_flow_namespace(dev->mdev, fn_type); + if (!ns) { + err = -EOPNOTSUPP; + goto free; + } + + prio = &dev->flow_db->opfcs[type]; + if (!prio->flow_table) { + prio = _get_prio(ns, prio, priority, + dev->num_ports * MAX_OPFC_RULES, 1, 0); + if (IS_ERR(prio)) { + err = PTR_ERR(prio); + goto free; + } + } + + dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst.counter_id = mlx5_fc_id(opfc->fc); + + flow_act.action = + MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + + for (i = 0; i < spec_num; i++) { + opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i], + &flow_act, &dst, 1); + if (IS_ERR(opfc->rule[i])) { + err = PTR_ERR(opfc->rule[i]); + goto del_rules; + } + } + prio->refcount += spec_num; + kfree(spec); + + return 0; + +del_rules: + for (i -= 1; i >= 0; i--) + mlx5_del_flow_rules(opfc->rule[i]); + put_flow_table(dev, prio, false); +free: + kfree(spec); + return err; +} + +void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type) +{ + int i; + + for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) { + mlx5_del_flow_rules(opfc->rule[i]); + put_flow_table(dev, &dev->flow_db->opfcs[type], true); + } +} + static void set_underlay_qp(struct mlx5_ib_dev *dev, struct mlx5_flow_spec *spec, u32 underlay_qpn) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e462e368c353..e636e954f6bf 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -263,6 +263,14 @@ struct mlx5_ib_pp { struct mlx5_core_dev *mdev; }; +enum mlx5_ib_optional_counter_type { + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS, + MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS, + MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS, + + MLX5_IB_OPCOUNTER_MAX, +}; + struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; @@ -271,6 +279,7 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio fdb; struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT]; + struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX]; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. @@ -804,15 +813,32 @@ struct mlx5_ib_resources { struct mlx5_ib_port_resources ports[2]; }; +#define MAX_OPFC_RULES 2 + +struct mlx5_ib_op_fc { + struct mlx5_fc *fc; + struct mlx5_flow_handle *rule[MAX_OPFC_RULES]; +}; + struct mlx5_ib_counters { - const char **names; + struct rdma_stat_desc *descs; size_t *offsets; u32 num_q_counters; u32 num_cong_counters; u32 num_ext_ppcnt_counters; + u32 num_op_counters; u16 set_id; + struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX]; }; +int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type); + +void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev, + struct mlx5_ib_op_fc *opfc, + enum mlx5_ib_optional_counter_type type); + struct mlx5_ib_multiport_info; struct mlx5_ib_multiport { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index d2044df30394..157d862fb864 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -605,29 +605,21 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, /* Return a MR already available in the cache */ static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) { - struct mlx5_ib_dev *dev = req_ent->dev; struct mlx5_ib_mr *mr = NULL; struct mlx5_cache_ent *ent = req_ent; - /* Try larger MR pools from the cache to satisfy the allocation */ - for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) { - mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order, - ent - dev->cache.ent); - - spin_lock_irq(&ent->lock); - if (!list_empty(&ent->head)) { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, - list); - list_del(&mr->list); - ent->available_mrs--; - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); - mlx5_clear_mr(mr); - return mr; - } + spin_lock_irq(&ent->lock); + if (!list_empty(&ent->head)) { + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->available_mrs--; queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); + mlx5_clear_mr(mr); + return mr; } + queue_adjust_cache_locked(ent); + spin_unlock_irq(&ent->lock); req_ent->miss++; return NULL; } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index b1e2725f4586..91eb615b89ee 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1691,20 +1691,26 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); - if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) + if (!mmkey || mmkey->key != lkey) { + mr = ERR_PTR(-ENOENT); goto end; + } + if (mmkey->type != MLX5_MKEY_MR) { + mr = ERR_PTR(-EINVAL); + goto end; + } mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); if (mr->ibmr.pd != pd) { - mr = NULL; + mr = ERR_PTR(-EPERM); goto end; } /* prefetch with write-access must be supported by the MR */ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && !mr->umem->writable) { - mr = NULL; + mr = ERR_PTR(-EPERM); goto end; } @@ -1736,7 +1742,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) destroy_prefetch_work(work); } -static bool init_prefetch_work(struct ib_pd *pd, +static int init_prefetch_work(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 pf_flags, struct prefetch_mr_work *work, struct ib_sge *sg_list, u32 num_sge) @@ -1747,17 +1753,19 @@ static bool init_prefetch_work(struct ib_pd *pd, work->pf_flags = pf_flags; for (i = 0; i < num_sge; ++i) { - work->frags[i].io_virt = sg_list[i].addr; - work->frags[i].length = sg_list[i].length; - work->frags[i].mr = - get_prefetchable_mr(pd, advice, sg_list[i].lkey); - if (!work->frags[i].mr) { + struct mlx5_ib_mr *mr; + + mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); + if (IS_ERR(mr)) { work->num_sge = i; - return false; + return PTR_ERR(mr); } + work->frags[i].io_virt = sg_list[i].addr; + work->frags[i].length = sg_list[i].length; + work->frags[i].mr = mr; } work->num_sge = num_sge; - return true; + return 0; } static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, @@ -1773,8 +1781,8 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, struct mlx5_ib_mr *mr; mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); - if (!mr) - return -ENOENT; + if (IS_ERR(mr)) + return PTR_ERR(mr); ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, &bytes_mapped, pf_flags); if (ret < 0) { @@ -1794,6 +1802,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { u32 pf_flags = 0; struct prefetch_mr_work *work; + int rc; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; @@ -1809,9 +1818,10 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (!work) return -ENOMEM; - if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { + rc = init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge); + if (rc) { destroy_prefetch_work(work); - return -EINVAL; + return rc; } queue_work(system_unbound_wq, &work->work); return 0; diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index dc203f3d0f25..65ce6d0f1885 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -228,7 +228,6 @@ static const struct ib_device_ops qedr_dev_ops = { .query_srq = qedr_query_srq, .reg_user_mr = qedr_reg_user_mr, .req_notify_cq = qedr_arm_cq, - .resize_cq = qedr_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, qedr_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, qedr_cq, ibcq), diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index dcb3653db72d..9100009f0a23 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1052,16 +1052,6 @@ err0: return -EINVAL; } -int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) -{ - struct qedr_dev *dev = get_qedr_dev(ibcq->device); - struct qedr_cq *cq = get_qedr_cq(ibcq); - - DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq); - - return 0; -} - #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10) #define QEDR_DESTROY_CQ_ITER_DURATION (10) @@ -2744,15 +2734,18 @@ int qedr_query_qp(struct ib_qp *ibqp, int rc = 0; memset(¶ms, 0, sizeof(params)); - - rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms); - if (rc) - goto err; - memset(qp_attr, 0, sizeof(*qp_attr)); memset(qp_init_attr, 0, sizeof(*qp_init_attr)); - qp_attr->qp_state = qedr_get_ibqp_state(params.state); + if (qp->qp_type != IB_QPT_GSI) { + rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms); + if (rc) + goto err; + qp_attr->qp_state = qedr_get_ibqp_state(params.state); + } else { + qp_attr->qp_state = qedr_get_ibqp_state(QED_ROCE_QP_STATE_RTS); + } + qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state); qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu); qp_attr->path_mig_state = IB_MIG_MIGRATED; diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 031687dafc61..081753df79ef 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -53,7 +53,6 @@ int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata); int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata); int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int qedr_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs, diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 84fc4dcc5399..bf3fa12fe935 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2021 Cornelis Networks. All rights reserved. * Copyright (c) 2013 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. @@ -62,8 +63,8 @@ MODULE_PARM_DESC(compat_ddr_negotiate, "Attempt pre-IBTA 1.2 DDR speed negotiation"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Intel <ibsupport@intel.com>"); -MODULE_DESCRIPTION("Intel IB driver"); +MODULE_AUTHOR("Cornelis <support@cornelisnetworks.com>"); +MODULE_DESCRIPTION("Cornelis IB driver"); /* * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c index 398c4c00b932..18a70850b738 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.c +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -103,7 +103,7 @@ void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev) kfree(ufdev); } -void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]) +void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, const char mac[ETH_ALEN]) { spin_lock(&ufdev->lock); memcpy(&ufdev->mac, mac, sizeof(ufdev->mac)); diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h index f0b71d593da5..a91200886922 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.h +++ b/drivers/infiniband/hw/usnic/usnic_fwd.h @@ -74,7 +74,7 @@ struct usnic_filter_action { struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev); void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev); -void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]); +void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, const char mac[ETH_ALEN]); void usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr); void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev); void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev); diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index da2e867a1ed9..38c7b6fb39d7 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -101,11 +101,29 @@ void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr) struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) { + struct rxe_ah *ah; + u32 ah_num; + if (!pkt || !pkt->qp) return NULL; if (qp_type(pkt->qp) == IB_QPT_RC || qp_type(pkt->qp) == IB_QPT_UC) return &pkt->qp->pri_av; - return (pkt->wqe) ? &pkt->wqe->av : NULL; + if (!pkt->wqe) + return NULL; + + ah_num = pkt->wqe->wr.wr.ud.ah_num; + if (ah_num) { + /* only new user provider or kernel client */ + ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num); + if (!ah || ah->ah_num != ah_num || rxe_ah_pd(ah) != pkt->qp->pd) { + pr_warn("Unable to find AH matching ah_num\n"); + return NULL; + } + return &ah->av; + } + + /* only old user provider for UD sends*/ + return &pkt->wqe->wr.wr.ud.av; } diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index d2d802c776fd..d771ba8449a1 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -142,10 +142,7 @@ static inline enum comp_state get_wqe(struct rxe_qp *qp, /* we come here whether or not we found a response packet to see if * there are any posted WQEs */ - if (qp->is_user) - wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_USER); - else - wqe = queue_head(qp->sq.queue, QUEUE_TYPE_KERNEL); + wqe = queue_head(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); *wqe_p = wqe; /* no WQE or requester has not started it yet */ @@ -383,30 +380,35 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp, static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_cqe *cqe) { + struct ib_wc *wc = &cqe->ibwc; + struct ib_uverbs_wc *uwc = &cqe->uibwc; + memset(cqe, 0, sizeof(*cqe)); if (!qp->is_user) { - struct ib_wc *wc = &cqe->ibwc; - - wc->wr_id = wqe->wr.wr_id; - wc->status = wqe->status; - wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - wc->wc_flags = IB_WC_WITH_IMM; - wc->byte_len = wqe->dma.length; - wc->qp = &qp->ibqp; + wc->wr_id = wqe->wr.wr_id; + wc->status = wqe->status; + wc->qp = &qp->ibqp; } else { - struct ib_uverbs_wc *uwc = &cqe->uibwc; - - uwc->wr_id = wqe->wr.wr_id; - uwc->status = wqe->status; - uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); - if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || - wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - uwc->wc_flags = IB_WC_WITH_IMM; - uwc->byte_len = wqe->dma.length; - uwc->qp_num = qp->ibqp.qp_num; + uwc->wr_id = wqe->wr.wr_id; + uwc->status = wqe->status; + uwc->qp_num = qp->ibqp.qp_num; + } + + if (wqe->status == IB_WC_SUCCESS) { + if (!qp->is_user) { + wc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + wc->wc_flags = IB_WC_WITH_IMM; + wc->byte_len = wqe->dma.length; + } else { + uwc->opcode = wr_to_wc_opcode(wqe->wr.opcode); + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + uwc->wc_flags = IB_WC_WITH_IMM; + uwc->byte_len = wqe->dma.length; + } } } @@ -432,10 +434,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) if (post) make_send_cqe(qp, wqe, &cqe); - if (qp->is_user) - advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_USER); - else - advance_consumer(qp->sq.queue, QUEUE_TYPE_KERNEL); + queue_advance_consumer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); if (post) rxe_cq_post(qp->scq, &cqe, 0); @@ -539,7 +538,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) wqe->status = IB_WC_WR_FLUSH_ERR; do_complete(qp, wqe); } else { - advance_consumer(q, q->type); + queue_advance_consumer(q, q->type); } } } diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index aef288f164fd..6848426c074f 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -25,11 +25,7 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, } if (cq) { - if (cq->is_user) - count = queue_count(cq->queue, QUEUE_TYPE_TO_USER); - else - count = queue_count(cq->queue, QUEUE_TYPE_KERNEL); - + count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT); if (cqe < count) { pr_warn("cqe(%d) < current # elements in queue (%d)", cqe, count); @@ -65,7 +61,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, int err; enum queue_type type; - type = uresp ? QUEUE_TYPE_TO_USER : QUEUE_TYPE_KERNEL; + type = QUEUE_TYPE_TO_CLIENT; cq->queue = rxe_queue_init(rxe, &cqe, sizeof(struct rxe_cqe), type); if (!cq->queue) { @@ -81,8 +77,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, return err; } - if (uresp) - cq->is_user = 1; + cq->is_user = uresp; cq->is_dying = false; @@ -117,11 +112,7 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) spin_lock_irqsave(&cq->cq_lock, flags); - if (cq->is_user) - full = queue_full(cq->queue, QUEUE_TYPE_TO_USER); - else - full = queue_full(cq->queue, QUEUE_TYPE_KERNEL); - + full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT); if (unlikely(full)) { spin_unlock_irqrestore(&cq->cq_lock, flags); if (cq->ibcq.event_handler) { @@ -134,17 +125,10 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) return -EBUSY; } - if (cq->is_user) - addr = producer_addr(cq->queue, QUEUE_TYPE_TO_USER); - else - addr = producer_addr(cq->queue, QUEUE_TYPE_KERNEL); - + addr = queue_producer_addr(cq->queue, QUEUE_TYPE_TO_CLIENT); memcpy(addr, cqe, sizeof(*cqe)); - if (cq->is_user) - advance_producer(cq->queue, QUEUE_TYPE_TO_USER); - else - advance_producer(cq->queue, QUEUE_TYPE_KERNEL); + queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT); spin_unlock_irqrestore(&cq->cq_lock, flags); diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c index d5ceb706d964..a012522b577a 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c @@ -6,22 +6,22 @@ #include "rxe.h" #include "rxe_hw_counters.h" -static const char * const rxe_counter_name[] = { - [RXE_CNT_SENT_PKTS] = "sent_pkts", - [RXE_CNT_RCVD_PKTS] = "rcvd_pkts", - [RXE_CNT_DUP_REQ] = "duplicate_request", - [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_seq_request", - [RXE_CNT_RCV_RNR] = "rcvd_rnr_err", - [RXE_CNT_SND_RNR] = "send_rnr_err", - [RXE_CNT_RCV_SEQ_ERR] = "rcvd_seq_err", - [RXE_CNT_COMPLETER_SCHED] = "ack_deferred", - [RXE_CNT_RETRY_EXCEEDED] = "retry_exceeded_err", - [RXE_CNT_RNR_RETRY_EXCEEDED] = "retry_rnr_exceeded_err", - [RXE_CNT_COMP_RETRY] = "completer_retry_err", - [RXE_CNT_SEND_ERR] = "send_err", - [RXE_CNT_LINK_DOWNED] = "link_downed", - [RXE_CNT_RDMA_SEND] = "rdma_sends", - [RXE_CNT_RDMA_RECV] = "rdma_recvs", +static const struct rdma_stat_desc rxe_counter_descs[] = { + [RXE_CNT_SENT_PKTS].name = "sent_pkts", + [RXE_CNT_RCVD_PKTS].name = "rcvd_pkts", + [RXE_CNT_DUP_REQ].name = "duplicate_request", + [RXE_CNT_OUT_OF_SEQ_REQ].name = "out_of_seq_request", + [RXE_CNT_RCV_RNR].name = "rcvd_rnr_err", + [RXE_CNT_SND_RNR].name = "send_rnr_err", + [RXE_CNT_RCV_SEQ_ERR].name = "rcvd_seq_err", + [RXE_CNT_COMPLETER_SCHED].name = "ack_deferred", + [RXE_CNT_RETRY_EXCEEDED].name = "retry_exceeded_err", + [RXE_CNT_RNR_RETRY_EXCEEDED].name = "retry_rnr_exceeded_err", + [RXE_CNT_COMP_RETRY].name = "completer_retry_err", + [RXE_CNT_SEND_ERR].name = "send_err", + [RXE_CNT_LINK_DOWNED].name = "link_downed", + [RXE_CNT_RDMA_SEND].name = "rdma_sends", + [RXE_CNT_RDMA_RECV].name = "rdma_recvs", }; int rxe_ib_get_hw_stats(struct ib_device *ibdev, @@ -34,18 +34,18 @@ int rxe_ib_get_hw_stats(struct ib_device *ibdev, if (!port || !stats) return -EINVAL; - for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_name); cnt++) + for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_descs); cnt++) stats->value[cnt] = atomic64_read(&dev->stats_counters[cnt]); - return ARRAY_SIZE(rxe_counter_name); + return ARRAY_SIZE(rxe_counter_descs); } struct rdma_hw_stats *rxe_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) { - BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_name) != RXE_NUM_OF_COUNTERS); + BUILD_BUG_ON(ARRAY_SIZE(rxe_counter_descs) != RXE_NUM_OF_COUNTERS); - return rdma_alloc_hw_stats_struct(rxe_counter_name, - ARRAY_SIZE(rxe_counter_name), + return rdma_alloc_hw_stats_struct(rxe_counter_descs, + ARRAY_SIZE(rxe_counter_descs), RDMA_HW_STATS_DEFAULT_LIFESPAN); } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index f0c954575bde..1ca43b859d80 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -86,6 +86,8 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length); int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey); +int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe); +int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr); int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); void rxe_mr_cleanup(struct rxe_pool_entry *arg); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 5890a8246216..53271df10e47 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -24,17 +24,22 @@ u8 rxe_get_next_key(u32 last_key) int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) { + struct rxe_map_set *set = mr->cur_map_set; + switch (mr->type) { - case RXE_MR_TYPE_DMA: + case IB_MR_TYPE_DMA: return 0; - case RXE_MR_TYPE_MR: - if (iova < mr->iova || length > mr->length || - iova > mr->iova + mr->length - length) + case IB_MR_TYPE_USER: + case IB_MR_TYPE_MEM_REG: + if (iova < set->iova || length > set->length || + iova > set->iova + set->length - length) return -EFAULT; return 0; default: + pr_warn("%s: mr type (%d) not supported\n", + __func__, mr->type); return -EFAULT; } } @@ -48,48 +53,101 @@ static void rxe_mr_init(int access, struct rxe_mr *mr) u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1); u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; - mr->ibmr.lkey = lkey; - mr->ibmr.rkey = rkey; + /* set ibmr->l/rkey and also copy into private l/rkey + * for user MRs these will always be the same + * for cases where caller 'owns' the key portion + * they may be different until REG_MR WQE is executed. + */ + mr->lkey = mr->ibmr.lkey = lkey; + mr->rkey = mr->ibmr.rkey = rkey; + mr->state = RXE_MR_STATE_INVALID; - mr->type = RXE_MR_TYPE_NONE; mr->map_shift = ilog2(RXE_BUF_PER_MAP); } -static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) +static void rxe_mr_free_map_set(int num_map, struct rxe_map_set *set) { int i; - int num_map; - struct rxe_map **map = mr->map; - num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; + for (i = 0; i < num_map; i++) + kfree(set->map[i]); - mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); - if (!mr->map) - goto err1; + kfree(set->map); + kfree(set); +} + +static int rxe_mr_alloc_map_set(int num_map, struct rxe_map_set **setp) +{ + int i; + struct rxe_map_set *set; + + set = kmalloc(sizeof(*set), GFP_KERNEL); + if (!set) + goto err_out; + + set->map = kmalloc_array(num_map, sizeof(struct rxe_map *), GFP_KERNEL); + if (!set->map) + goto err_free_set; for (i = 0; i < num_map; i++) { - mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); - if (!mr->map[i]) - goto err2; + set->map[i] = kmalloc(sizeof(struct rxe_map), GFP_KERNEL); + if (!set->map[i]) + goto err_free_map; } + *setp = set; + + return 0; + +err_free_map: + for (i--; i >= 0; i--) + kfree(set->map[i]); + + kfree(set->map); +err_free_set: + kfree(set); +err_out: + return -ENOMEM; +} + +/** + * rxe_mr_alloc() - Allocate memory map array(s) for MR + * @mr: Memory region + * @num_buf: Number of buffer descriptors to support + * @both: If non zero allocate both mr->map and mr->next_map + * else just allocate mr->map. Used for fast MRs + * + * Return: 0 on success else an error + */ +static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf, int both) +{ + int ret; + int num_map; + BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); + num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; mr->map_shift = ilog2(RXE_BUF_PER_MAP); mr->map_mask = RXE_BUF_PER_MAP - 1; - mr->num_buf = num_buf; - mr->num_map = num_map; mr->max_buf = num_map * RXE_BUF_PER_MAP; + mr->num_map = num_map; - return 0; + ret = rxe_mr_alloc_map_set(num_map, &mr->cur_map_set); + if (ret) + goto err_out; -err2: - for (i--; i >= 0; i--) - kfree(mr->map[i]); + if (both) { + ret = rxe_mr_alloc_map_set(num_map, &mr->next_map_set); + if (ret) { + rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); + goto err_out; + } + } - kfree(mr->map); -err1: + return 0; + +err_out: return -ENOMEM; } @@ -100,12 +158,13 @@ void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr) mr->ibmr.pd = &pd->ibpd; mr->access = access; mr->state = RXE_MR_STATE_VALID; - mr->type = RXE_MR_TYPE_DMA; + mr->type = IB_MR_TYPE_DMA; } int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr) { + struct rxe_map_set *set; struct rxe_map **map; struct rxe_phys_buf *buf = NULL; struct ib_umem *umem; @@ -113,7 +172,6 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int num_buf; void *vaddr; int err; - int i; umem = ib_umem_get(pd->ibpd.device, start, length, access); if (IS_ERR(umem)) { @@ -127,18 +185,20 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, rxe_mr_init(access, mr); - err = rxe_mr_alloc(mr, num_buf); + err = rxe_mr_alloc(mr, num_buf, 0); if (err) { pr_warn("%s: Unable to allocate memory for map\n", __func__); goto err_release_umem; } - mr->page_shift = PAGE_SHIFT; - mr->page_mask = PAGE_SIZE - 1; + set = mr->cur_map_set; + set->page_shift = PAGE_SHIFT; + set->page_mask = PAGE_SIZE - 1; + + num_buf = 0; + map = set->map; - num_buf = 0; - map = mr->map; if (length > 0) { buf = map[0]->buf; @@ -161,26 +221,24 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, buf->size = PAGE_SIZE; num_buf++; buf++; - } } mr->ibmr.pd = &pd->ibpd; mr->umem = umem; mr->access = access; - mr->length = length; - mr->iova = iova; - mr->va = start; - mr->offset = ib_umem_offset(umem); mr->state = RXE_MR_STATE_VALID; - mr->type = RXE_MR_TYPE_MR; + mr->type = IB_MR_TYPE_USER; + + set->length = length; + set->iova = iova; + set->va = start; + set->offset = ib_umem_offset(umem); return 0; err_cleanup_map: - for (i = 0; i < mr->num_map; i++) - kfree(mr->map[i]); - kfree(mr->map); + rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); err_release_umem: ib_umem_release(umem); err_out: @@ -191,19 +249,17 @@ int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) { int err; - rxe_mr_init(0, mr); + /* always allow remote access for FMRs */ + rxe_mr_init(IB_ACCESS_REMOTE, mr); - /* In fastreg, we also set the rkey */ - mr->ibmr.rkey = mr->ibmr.lkey; - - err = rxe_mr_alloc(mr, max_pages); + err = rxe_mr_alloc(mr, max_pages, 1); if (err) goto err1; mr->ibmr.pd = &pd->ibpd; mr->max_buf = max_pages; mr->state = RXE_MR_STATE_FREE; - mr->type = RXE_MR_TYPE_MR; + mr->type = IB_MR_TYPE_MEM_REG; return 0; @@ -214,21 +270,24 @@ err1: static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, size_t *offset_out) { - size_t offset = iova - mr->iova + mr->offset; + struct rxe_map_set *set = mr->cur_map_set; + size_t offset = iova - set->iova + set->offset; int map_index; int buf_index; u64 length; + struct rxe_map *map; - if (likely(mr->page_shift)) { - *offset_out = offset & mr->page_mask; - offset >>= mr->page_shift; + if (likely(set->page_shift)) { + *offset_out = offset & set->page_mask; + offset >>= set->page_shift; *n_out = offset & mr->map_mask; *m_out = offset >> mr->map_shift; } else { map_index = 0; buf_index = 0; - length = mr->map[map_index]->buf[buf_index].size; + map = set->map[map_index]; + length = map->buf[buf_index].size; while (offset >= length) { offset -= length; @@ -238,7 +297,8 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, map_index++; buf_index = 0; } - length = mr->map[map_index]->buf[buf_index].size; + map = set->map[map_index]; + length = map->buf[buf_index].size; } *m_out = map_index; @@ -259,7 +319,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) goto out; } - if (!mr->map) { + if (!mr->cur_map_set) { addr = (void *)(uintptr_t)iova; goto out; } @@ -272,13 +332,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) lookup_iova(mr, iova, &m, &n, &offset); - if (offset + length > mr->map[m]->buf[n].size) { + if (offset + length > mr->cur_map_set->map[m]->buf[n].size) { pr_warn("crosses page boundary\n"); addr = NULL; goto out; } - addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; + addr = (void *)(uintptr_t)mr->cur_map_set->map[m]->buf[n].addr + offset; out: return addr; @@ -302,7 +362,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, if (length == 0) return 0; - if (mr->type == RXE_MR_TYPE_DMA) { + if (mr->type == IB_MR_TYPE_DMA) { u8 *src, *dest; src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); @@ -314,7 +374,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, return 0; } - WARN_ON_ONCE(!mr->map); + WARN_ON_ONCE(!mr->cur_map_set); err = mr_check_range(mr, iova, length); if (err) { @@ -324,7 +384,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, lookup_iova(mr, iova, &m, &i, &offset); - map = mr->map + m; + map = mr->cur_map_set->map + m; buf = map[0]->buf + i; while (length > 0) { @@ -507,8 +567,8 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, if (!mr) return NULL; - if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) || - (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) || + if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) || + (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || mr_pd(mr) != pd || (access && !(access & mr->access)) || mr->state != RXE_MR_STATE_VALID)) { rxe_drop_ref(mr); @@ -531,9 +591,9 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) goto err; } - if (rkey != mr->ibmr.rkey) { - pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n", - __func__, rkey, mr->ibmr.rkey); + if (rkey != mr->rkey) { + pr_err("%s: rkey (%#x) doesn't match mr->rkey (%#x)\n", + __func__, rkey, mr->rkey); ret = -EINVAL; goto err_drop_ref; } @@ -545,6 +605,12 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) goto err_drop_ref; } + if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) { + pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type); + ret = -EINVAL; + goto err_drop_ref; + } + mr->state = RXE_MR_STATE_FREE; ret = 0; @@ -554,6 +620,67 @@ err: return ret; } +/* user can (re)register fast MR by executing a REG_MR WQE. + * user is expected to hold a reference on the ib mr until the + * WQE completes. + * Once a fast MR is created this is the only way to change the + * private keys. It is the responsibility of the user to maintain + * the ib mr keys in sync with rxe mr keys. + */ +int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr); + u32 key = wqe->wr.wr.reg.key & 0xff; + u32 access = wqe->wr.wr.reg.access; + struct rxe_map_set *set; + + /* user can only register MR in free state */ + if (unlikely(mr->state != RXE_MR_STATE_FREE)) { + pr_warn("%s: mr->lkey = 0x%x not free\n", + __func__, mr->lkey); + return -EINVAL; + } + + /* user can only register mr with qp in same protection domain */ + if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) { + pr_warn("%s: qp->pd and mr->pd don't match\n", + __func__); + return -EINVAL; + } + + mr->access = access; + mr->lkey = (mr->lkey & ~0xff) | key; + mr->rkey = (access & IB_ACCESS_REMOTE) ? mr->lkey : 0; + mr->state = RXE_MR_STATE_VALID; + + set = mr->cur_map_set; + mr->cur_map_set = mr->next_map_set; + mr->cur_map_set->iova = wqe->wr.wr.reg.mr->iova; + mr->next_map_set = set; + + return 0; +} + +int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct rxe_mr *mr = to_rmr(ibmr); + struct rxe_map_set *set = mr->next_map_set; + struct rxe_map *map; + struct rxe_phys_buf *buf; + + if (unlikely(set->nbuf == mr->num_buf)) + return -ENOMEM; + + map = set->map[set->nbuf / RXE_BUF_PER_MAP]; + buf = &map->buf[set->nbuf % RXE_BUF_PER_MAP]; + + buf->addr = addr; + buf->size = ibmr->page_size; + set->nbuf++; + + return 0; +} + int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct rxe_mr *mr = to_rmr(ibmr); @@ -564,7 +691,7 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) return -EINVAL; } - mr->state = RXE_MR_STATE_ZOMBIE; + mr->state = RXE_MR_STATE_INVALID; rxe_drop_ref(mr_pd(mr)); rxe_drop_index(mr); rxe_drop_ref(mr); @@ -575,14 +702,12 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) void rxe_mr_cleanup(struct rxe_pool_entry *arg) { struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem); - int i; ib_umem_release(mr->umem); - if (mr->map) { - for (i = 0; i < mr->num_map; i++) - kfree(mr->map[i]); + if (mr->cur_map_set) + rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); - kfree(mr->map); - } + if (mr->next_map_set) + rxe_mr_free_map_set(mr->num_map, mr->next_map_set); } diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index 5ba77df7598e..9534a7fe1a98 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -21,7 +21,7 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) } rxe_add_index(mw); - ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1); + mw->rkey = ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1); mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ? RXE_MW_STATE_FREE : RXE_MW_STATE_VALID; spin_lock_init(&mw->lock); @@ -71,6 +71,8 @@ int rxe_dealloc_mw(struct ib_mw *ibmw) static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_mw *mw, struct rxe_mr *mr) { + u32 key = wqe->wr.wr.mw.rkey & 0xff; + if (mw->ibmw.type == IB_MW_TYPE_1) { if (unlikely(mw->state != RXE_MW_STATE_VALID)) { pr_err_once( @@ -108,7 +110,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } } - if (unlikely((wqe->wr.wr.mw.rkey & 0xff) == (mw->ibmw.rkey & 0xff))) { + if (unlikely(key == (mw->rkey & 0xff))) { pr_err_once("attempt to bind MW with same key\n"); return -EINVAL; } @@ -140,15 +142,15 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, /* C10-75 */ if (mw->access & IB_ZERO_BASED) { - if (unlikely(wqe->wr.wr.mw.length > mr->length)) { + if (unlikely(wqe->wr.wr.mw.length > mr->cur_map_set->length)) { pr_err_once( "attempt to bind a ZB MW outside of the MR\n"); return -EINVAL; } } else { - if (unlikely((wqe->wr.wr.mw.addr < mr->iova) || + if (unlikely((wqe->wr.wr.mw.addr < mr->cur_map_set->iova) || ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) > - (mr->iova + mr->length)))) { + (mr->cur_map_set->iova + mr->cur_map_set->length)))) { pr_err_once( "attempt to bind a VA MW outside of the MR\n"); return -EINVAL; @@ -161,13 +163,9 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_mw *mw, struct rxe_mr *mr) { - u32 rkey; - u32 new_rkey; - - rkey = mw->ibmw.rkey; - new_rkey = (rkey & 0xffffff00) | (wqe->wr.wr.mw.rkey & 0x000000ff); + u32 key = wqe->wr.wr.mw.rkey & 0xff; - mw->ibmw.rkey = new_rkey; + mw->rkey = (mw->rkey & ~0xff) | key; mw->access = wqe->wr.wr.mw.access; mw->state = RXE_MW_STATE_VALID; mw->addr = wqe->wr.wr.mw.addr; @@ -197,29 +195,29 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe) struct rxe_mw *mw; struct rxe_mr *mr; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + u32 mw_rkey = wqe->wr.wr.mw.mw_rkey; + u32 mr_lkey = wqe->wr.wr.mw.mr_lkey; unsigned long flags; - mw = rxe_pool_get_index(&rxe->mw_pool, - wqe->wr.wr.mw.mw_rkey >> 8); + mw = rxe_pool_get_index(&rxe->mw_pool, mw_rkey >> 8); if (unlikely(!mw)) { ret = -EINVAL; goto err; } - if (unlikely(mw->ibmw.rkey != wqe->wr.wr.mw.mw_rkey)) { + if (unlikely(mw->rkey != mw_rkey)) { ret = -EINVAL; goto err_drop_mw; } if (likely(wqe->wr.wr.mw.length)) { - mr = rxe_pool_get_index(&rxe->mr_pool, - wqe->wr.wr.mw.mr_lkey >> 8); + mr = rxe_pool_get_index(&rxe->mr_pool, mr_lkey >> 8); if (unlikely(!mr)) { ret = -EINVAL; goto err_drop_mw; } - if (unlikely(mr->ibmr.lkey != wqe->wr.wr.mw.mr_lkey)) { + if (unlikely(mr->lkey != mr_lkey)) { ret = -EINVAL; goto err_drop_mr; } @@ -292,7 +290,7 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey) goto err; } - if (rkey != mw->ibmw.rkey) { + if (rkey != mw->rkey) { ret = -EINVAL; goto err_drop_ref; } @@ -323,7 +321,7 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey) if (!mw) return NULL; - if (unlikely((rxe_mw_rkey(mw) != rkey) || rxe_mw_pd(mw) != pd || + if (unlikely((mw->rkey != rkey) || rxe_mw_pd(mw) != pd || (mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) || (mw->length == 0) || (access && !(access & mw->access)) || diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h index e02f039b8c44..8f9aaaf260f2 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.h +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -22,7 +22,6 @@ enum rxe_wr_mask { WR_LOCAL_OP_MASK = BIT(5), WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK, - WR_READ_WRITE_OR_SEND_MASK = WR_READ_OR_WRITE_MASK | WR_SEND_MASK, WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK, WR_ATOMIC_OR_READ_MASK = WR_ATOMIC_MASK | WR_READ_MASK, }; @@ -82,8 +81,9 @@ enum rxe_hdr_mask { RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12), - RXE_READ_OR_ATOMIC = (RXE_READ_MASK | RXE_ATOMIC_MASK), - RXE_WRITE_OR_SEND = (RXE_WRITE_MASK | RXE_SEND_MASK), + RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK), + RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK), + RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK), }; #define OPCODE_NONE (-1) diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 742e6ec93686..918270e34a35 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -9,6 +9,8 @@ #include <uapi/rdma/rdma_user_rxe.h> +#define DEFAULT_MAX_VALUE (1 << 20) + static inline enum ib_mtu rxe_mtu_int_to_enum(int mtu) { if (mtu < 256) @@ -37,7 +39,7 @@ static inline enum ib_mtu eth_mtu_int_to_enum(int mtu) enum rxe_device_param { RXE_MAX_MR_SIZE = -1ull, RXE_PAGE_SIZE_CAP = 0xfffff000, - RXE_MAX_QP_WR = 0x4000, + RXE_MAX_QP_WR = DEFAULT_MAX_VALUE, RXE_DEVICE_CAP_FLAGS = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_AUTO_PATH_MIG @@ -58,42 +60,44 @@ enum rxe_device_param { RXE_MAX_INLINE_DATA = RXE_MAX_WQE_SIZE - sizeof(struct rxe_send_wqe), RXE_MAX_SGE_RD = 32, - RXE_MAX_CQ = 16384, + RXE_MAX_CQ = DEFAULT_MAX_VALUE, RXE_MAX_LOG_CQE = 15, - RXE_MAX_PD = 0x7ffc, + RXE_MAX_PD = DEFAULT_MAX_VALUE, RXE_MAX_QP_RD_ATOM = 128, RXE_MAX_RES_RD_ATOM = 0x3f000, RXE_MAX_QP_INIT_RD_ATOM = 128, RXE_MAX_MCAST_GRP = 8192, RXE_MAX_MCAST_QP_ATTACH = 56, RXE_MAX_TOT_MCAST_QP_ATTACH = 0x70000, - RXE_MAX_AH = 100, - RXE_MAX_SRQ_WR = 0x4000, + RXE_MAX_AH = (1<<15) - 1, /* 32Ki - 1 */ + RXE_MIN_AH_INDEX = 1, + RXE_MAX_AH_INDEX = RXE_MAX_AH, + RXE_MAX_SRQ_WR = DEFAULT_MAX_VALUE, RXE_MIN_SRQ_WR = 1, RXE_MAX_SRQ_SGE = 27, RXE_MIN_SRQ_SGE = 1, RXE_MAX_FMR_PAGE_LIST_LEN = 512, - RXE_MAX_PKEYS = 1, + RXE_MAX_PKEYS = 64, RXE_LOCAL_CA_ACK_DELAY = 15, - RXE_MAX_UCONTEXT = 512, + RXE_MAX_UCONTEXT = DEFAULT_MAX_VALUE, RXE_NUM_PORT = 1, - RXE_MAX_QP = 0x10000, RXE_MIN_QP_INDEX = 16, - RXE_MAX_QP_INDEX = 0x00020000, + RXE_MAX_QP_INDEX = DEFAULT_MAX_VALUE, + RXE_MAX_QP = DEFAULT_MAX_VALUE - RXE_MIN_QP_INDEX, - RXE_MAX_SRQ = 0x00001000, RXE_MIN_SRQ_INDEX = 0x00020001, - RXE_MAX_SRQ_INDEX = 0x00040000, + RXE_MAX_SRQ_INDEX = DEFAULT_MAX_VALUE, + RXE_MAX_SRQ = DEFAULT_MAX_VALUE - RXE_MIN_SRQ_INDEX, - RXE_MAX_MR = 0x00001000, - RXE_MAX_MW = 0x00001000, RXE_MIN_MR_INDEX = 0x00000001, - RXE_MAX_MR_INDEX = 0x00010000, + RXE_MAX_MR_INDEX = DEFAULT_MAX_VALUE, + RXE_MAX_MR = DEFAULT_MAX_VALUE - RXE_MIN_MR_INDEX, RXE_MIN_MW_INDEX = 0x00010001, RXE_MAX_MW_INDEX = 0x00020000, + RXE_MAX_MW = 0x00001000, RXE_MAX_PKT_PER_ACK = 64, @@ -113,7 +117,7 @@ enum rxe_device_param { /* default/initial rxe port parameters */ enum rxe_port_param { RXE_PORT_GID_TBL_LEN = 1024, - RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP, + RXE_PORT_PORT_CAP_FLAGS = IB_PORT_CM_SUP, RXE_PORT_MAX_MSG_SZ = 0x800000, RXE_PORT_BAD_PKEY_CNTR = 0, RXE_PORT_QKEY_VIOL_CNTR = 0, diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index ffa8420b4765..2e80bb6aa957 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -7,9 +7,17 @@ #include "rxe.h" #include "rxe_loc.h" -/* info about object pools - */ -struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { +static const struct rxe_type_info { + const char *name; + size_t size; + size_t elem_offset; + void (*cleanup)(struct rxe_pool_entry *obj); + enum rxe_pool_flags flags; + u32 min_index; + u32 max_index; + size_t key_offset; + size_t key_size; +} rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { .name = "rxe-uc", .size = sizeof(struct rxe_ucontext), @@ -26,7 +34,9 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { .name = "rxe-ah", .size = sizeof(struct rxe_ah), .elem_offset = offsetof(struct rxe_ah, pelem), - .flags = RXE_POOL_NO_ALLOC, + .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, + .min_index = RXE_MIN_AH_INDEX, + .max_index = RXE_MAX_AH_INDEX, }, [RXE_TYPE_SRQ] = { .name = "rxe-srq", @@ -58,8 +68,8 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { .elem_offset = offsetof(struct rxe_mr, pelem), .cleanup = rxe_mr_cleanup, .flags = RXE_POOL_INDEX, - .max_index = RXE_MAX_MR_INDEX, .min_index = RXE_MIN_MR_INDEX, + .max_index = RXE_MAX_MR_INDEX, }, [RXE_TYPE_MW] = { .name = "rxe-mw", @@ -67,8 +77,8 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { .elem_offset = offsetof(struct rxe_mw, pelem), .cleanup = rxe_mw_cleanup, .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, - .max_index = RXE_MAX_MW_INDEX, .min_index = RXE_MIN_MW_INDEX, + .max_index = RXE_MAX_MW_INDEX, }, [RXE_TYPE_MC_GRP] = { .name = "rxe-mc_grp", @@ -94,7 +104,6 @@ static inline const char *pool_name(struct rxe_pool *pool) static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) { int err = 0; - size_t size; if ((max - min + 1) < pool->max_elem) { pr_warn("not enough indices for max_elem\n"); @@ -105,16 +114,12 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) pool->index.max_index = max; pool->index.min_index = min; - size = BITS_TO_LONGS(max - min + 1) * sizeof(long); - pool->index.table = kmalloc(size, GFP_KERNEL); + pool->index.table = bitmap_zalloc(max - min + 1, GFP_KERNEL); if (!pool->index.table) { err = -ENOMEM; goto out; } - pool->index.table_size = size; - bitmap_zero(pool->index.table, max - min + 1); - out: return err; } @@ -166,7 +171,7 @@ void rxe_pool_cleanup(struct rxe_pool *pool) pr_warn("%s pool destroyed with unfree'd elem\n", pool_name(pool)); - kfree(pool->index.table); + bitmap_free(pool->index.table); } static u32 alloc_index(struct rxe_pool *pool) @@ -327,7 +332,7 @@ void __rxe_drop_index(struct rxe_pool_entry *elem) void *rxe_alloc_locked(struct rxe_pool *pool) { - struct rxe_type_info *info = &rxe_type_info[pool->type]; + const struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rxe_pool_entry *elem; u8 *obj; @@ -352,7 +357,7 @@ out_cnt: void *rxe_alloc(struct rxe_pool *pool) { - struct rxe_type_info *info = &rxe_type_info[pool->type]; + const struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rxe_pool_entry *elem; u8 *obj; @@ -395,7 +400,7 @@ void rxe_elem_release(struct kref *kref) struct rxe_pool_entry *elem = container_of(kref, struct rxe_pool_entry, ref_cnt); struct rxe_pool *pool = elem->pool; - struct rxe_type_info *info = &rxe_type_info[pool->type]; + const struct rxe_type_info *info = &rxe_type_info[pool->type]; u8 *obj; if (pool->cleanup) @@ -411,7 +416,7 @@ void rxe_elem_release(struct kref *kref) void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index) { - struct rxe_type_info *info = &rxe_type_info[pool->type]; + const struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rb_node *node; struct rxe_pool_entry *elem; u8 *obj; @@ -453,7 +458,7 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key) { - struct rxe_type_info *info = &rxe_type_info[pool->type]; + const struct rxe_type_info *info = &rxe_type_info[pool->type]; struct rb_node *node; struct rxe_pool_entry *elem; u8 *obj; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 1feca1bffced..8ecd9f870aea 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -32,20 +32,6 @@ enum rxe_elem_type { struct rxe_pool_entry; -struct rxe_type_info { - const char *name; - size_t size; - size_t elem_offset; - void (*cleanup)(struct rxe_pool_entry *obj); - enum rxe_pool_flags flags; - u32 max_index; - u32 min_index; - size_t key_offset; - size_t key_size; -}; - -extern struct rxe_type_info rxe_type_info[]; - struct rxe_pool_entry { struct rxe_pool *pool; struct kref ref_cnt; @@ -74,7 +60,6 @@ struct rxe_pool { struct { struct rb_root tree; unsigned long *table; - size_t table_size; u32 last; u32 max_index; u32 min_index; diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 1ab6af7ddb25..975321812c87 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -190,8 +190,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, INIT_LIST_HEAD(&qp->grp_list); - skb_queue_head_init(&qp->send_pkts); - spin_lock_init(&qp->grp_lock); spin_lock_init(&qp->state_lock); @@ -231,7 +229,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, qp->sq.max_inline = init->cap.max_inline_data = wqe_size; wqe_size += sizeof(struct rxe_send_wqe); - type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL; + type = QUEUE_TYPE_FROM_CLIENT; qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, wqe_size, type); if (!qp->sq.queue) @@ -248,12 +246,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, return err; } - if (qp->is_user) - qp->req.wqe_index = producer_index(qp->sq.queue, - QUEUE_TYPE_FROM_USER); - else - qp->req.wqe_index = producer_index(qp->sq.queue, - QUEUE_TYPE_KERNEL); + qp->req.wqe_index = queue_get_producer(qp->sq.queue, + QUEUE_TYPE_FROM_CLIENT); qp->req.state = QP_STATE_RESET; qp->req.opcode = -1; @@ -293,7 +287,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n", qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size); - type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL; + type = QUEUE_TYPE_FROM_CLIENT; qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, wqe_size, type); if (!qp->rq.queue) @@ -313,8 +307,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, spin_lock_init(&qp->rq.producer_lock); spin_lock_init(&qp->rq.consumer_lock); - qp->rq.is_user = qp->is_user; - skb_queue_head_init(&qp->resp_pkts); rxe_init_task(rxe, &qp->resp.task, qp, diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index 72d95398e604..6e6e023c1b45 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -111,17 +111,33 @@ err1: static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q, unsigned int num_elem) { - if (!queue_empty(q, q->type) && (num_elem < queue_count(q, q->type))) + enum queue_type type = q->type; + u32 prod; + u32 cons; + + if (!queue_empty(q, q->type) && (num_elem < queue_count(q, type))) return -EINVAL; - while (!queue_empty(q, q->type)) { - memcpy(producer_addr(new_q, new_q->type), - consumer_addr(q, q->type), - new_q->elem_size); - advance_producer(new_q, new_q->type); - advance_consumer(q, q->type); + prod = queue_get_producer(new_q, type); + cons = queue_get_consumer(q, type); + + while (!queue_empty(q, type)) { + memcpy(queue_addr_from_index(new_q, prod), + queue_addr_from_index(q, cons), new_q->elem_size); + prod = queue_next_index(new_q, prod); + cons = queue_next_index(q, cons); } + new_q->buf->producer_index = prod; + q->buf->consumer_index = cons; + + /* update private index copies */ + if (type == QUEUE_TYPE_TO_CLIENT) + new_q->index = new_q->buf->producer_index; + else + q->index = q->buf->consumer_index; + + /* exchange rxe_queue headers */ swap(*q, *new_q); return 0; diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 2702b0e55fc3..6227112ef7a2 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -10,34 +10,47 @@ /* for definition of shared struct rxe_queue_buf */ #include <uapi/rdma/rdma_user_rxe.h> -/* implements a simple circular buffer that can optionally be - * shared between user space and the kernel and can be resized - * the requested element size is rounded up to a power of 2 - * and the number of elements in the buffer is also rounded - * up to a power of 2. Since the queue is empty when the - * producer and consumer indices match the maximum capacity - * of the queue is one less than the number of element slots +/* Implements a simple circular buffer that is shared between user + * and the driver and can be resized. The requested element size is + * rounded up to a power of 2 and the number of elements in the buffer + * is also rounded up to a power of 2. Since the queue is empty when + * the producer and consumer indices match the maximum capacity of the + * queue is one less than the number of element slots. * * Notes: - * - Kernel space indices are always masked off to q->index_mask - * before storing so do not need to be checked on reads. - * - User space indices may be out of range and must be - * masked before use when read. - * - The kernel indices for shared queues must not be written - * by user space so a local copy is used and a shared copy is - * stored when the local copy changes. + * - The driver indices are always masked off to q->index_mask + * before storing so do not need to be checked on reads. + * - The user whether user space or kernel is generally + * not trusted so its parameters are masked to make sure + * they do not access the queue out of bounds on reads. + * - The driver indices for queues must not be written + * by user so a local copy is used and a shared copy is + * stored when the local copy is changed. * - By passing the type in the parameter list separate from q - * the compiler can eliminate the switch statement when the - * actual queue type is known when the function is called. - * In the performance path this is done. In less critical - * paths just q->type is passed. + * the compiler can eliminate the switch statement when the + * actual queue type is known when the function is called at + * compile time. + * - These queues are lock free. The user and driver must protect + * changes to their end of the queues with locks if more than one + * CPU can be accessing it at the same time. */ -/* type of queue */ +/** + * enum queue_type - type of queue + * @QUEUE_TYPE_TO_CLIENT: Queue is written by rxe driver and + * read by client. Used by rxe driver only. + * @QUEUE_TYPE_FROM_CLIENT: Queue is written by client and + * read by rxe driver. Used by rxe driver only. + * @QUEUE_TYPE_TO_DRIVER: Queue is written by client and + * read by rxe driver. Used by kernel client only. + * @QUEUE_TYPE_FROM_DRIVER: Queue is written by rxe driver and + * read by client. Used by kernel client only. + */ enum queue_type { - QUEUE_TYPE_KERNEL, - QUEUE_TYPE_TO_USER, - QUEUE_TYPE_FROM_USER, + QUEUE_TYPE_TO_CLIENT, + QUEUE_TYPE_FROM_CLIENT, + QUEUE_TYPE_TO_DRIVER, + QUEUE_TYPE_FROM_DRIVER, }; struct rxe_queue { @@ -69,238 +82,171 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, unsigned int elem_size, struct ib_udata *udata, struct mminfo __user *outbuf, - /* Protect producers while resizing queue */ - spinlock_t *producer_lock, - /* Protect consumers while resizing queue */ - spinlock_t *consumer_lock); + spinlock_t *producer_lock, spinlock_t *consumer_lock); void rxe_queue_cleanup(struct rxe_queue *queue); -static inline int next_index(struct rxe_queue *q, int index) +static inline u32 queue_next_index(struct rxe_queue *q, int index) { - return (index + 1) & q->buf->index_mask; + return (index + 1) & q->index_mask; } -static inline int queue_empty(struct rxe_queue *q, enum queue_type type) +static inline u32 queue_get_producer(const struct rxe_queue *q, + enum queue_type type) { u32 prod; - u32 cons; switch (type) { - case QUEUE_TYPE_FROM_USER: - /* protect user space index */ + case QUEUE_TYPE_FROM_CLIENT: + /* protect user index */ prod = smp_load_acquire(&q->buf->producer_index); - cons = q->index; break; - case QUEUE_TYPE_TO_USER: + case QUEUE_TYPE_TO_CLIENT: prod = q->index; - /* protect user space index */ - cons = smp_load_acquire(&q->buf->consumer_index); break; - case QUEUE_TYPE_KERNEL: + case QUEUE_TYPE_FROM_DRIVER: + /* protect driver index */ + prod = smp_load_acquire(&q->buf->producer_index); + break; + case QUEUE_TYPE_TO_DRIVER: prod = q->buf->producer_index; - cons = q->buf->consumer_index; break; } - return ((prod - cons) & q->index_mask) == 0; + return prod; } -static inline int queue_full(struct rxe_queue *q, enum queue_type type) +static inline u32 queue_get_consumer(const struct rxe_queue *q, + enum queue_type type) { - u32 prod; u32 cons; switch (type) { - case QUEUE_TYPE_FROM_USER: - /* protect user space index */ - prod = smp_load_acquire(&q->buf->producer_index); + case QUEUE_TYPE_FROM_CLIENT: cons = q->index; break; - case QUEUE_TYPE_TO_USER: - prod = q->index; - /* protect user space index */ + case QUEUE_TYPE_TO_CLIENT: + /* protect user index */ cons = smp_load_acquire(&q->buf->consumer_index); break; - case QUEUE_TYPE_KERNEL: - prod = q->buf->producer_index; + case QUEUE_TYPE_FROM_DRIVER: cons = q->buf->consumer_index; break; + case QUEUE_TYPE_TO_DRIVER: + /* protect driver index */ + cons = smp_load_acquire(&q->buf->consumer_index); + break; } - return ((prod + 1 - cons) & q->index_mask) == 0; + return cons; } -static inline unsigned int queue_count(const struct rxe_queue *q, - enum queue_type type) +static inline int queue_empty(struct rxe_queue *q, enum queue_type type) { - u32 prod; - u32 cons; - - switch (type) { - case QUEUE_TYPE_FROM_USER: - /* protect user space index */ - prod = smp_load_acquire(&q->buf->producer_index); - cons = q->index; - break; - case QUEUE_TYPE_TO_USER: - prod = q->index; - /* protect user space index */ - cons = smp_load_acquire(&q->buf->consumer_index); - break; - case QUEUE_TYPE_KERNEL: - prod = q->buf->producer_index; - cons = q->buf->consumer_index; - break; - } + u32 prod = queue_get_producer(q, type); + u32 cons = queue_get_consumer(q, type); - return (prod - cons) & q->index_mask; + return ((prod - cons) & q->index_mask) == 0; } -static inline void advance_producer(struct rxe_queue *q, enum queue_type type) +static inline int queue_full(struct rxe_queue *q, enum queue_type type) { - u32 prod; + u32 prod = queue_get_producer(q, type); + u32 cons = queue_get_consumer(q, type); - switch (type) { - case QUEUE_TYPE_FROM_USER: - pr_warn_once("Normally kernel should not write user space index\n"); - /* protect user space index */ - prod = smp_load_acquire(&q->buf->producer_index); - prod = (prod + 1) & q->index_mask; - /* same */ - smp_store_release(&q->buf->producer_index, prod); - break; - case QUEUE_TYPE_TO_USER: - prod = q->index; - q->index = (prod + 1) & q->index_mask; - q->buf->producer_index = q->index; - break; - case QUEUE_TYPE_KERNEL: - prod = q->buf->producer_index; - q->buf->producer_index = (prod + 1) & q->index_mask; - break; - } + return ((prod + 1 - cons) & q->index_mask) == 0; } -static inline void advance_consumer(struct rxe_queue *q, enum queue_type type) +static inline u32 queue_count(const struct rxe_queue *q, + enum queue_type type) { - u32 cons; + u32 prod = queue_get_producer(q, type); + u32 cons = queue_get_consumer(q, type); - switch (type) { - case QUEUE_TYPE_FROM_USER: - cons = q->index; - q->index = (cons + 1) & q->index_mask; - q->buf->consumer_index = q->index; - break; - case QUEUE_TYPE_TO_USER: - pr_warn_once("Normally kernel should not write user space index\n"); - /* protect user space index */ - cons = smp_load_acquire(&q->buf->consumer_index); - cons = (cons + 1) & q->index_mask; - /* same */ - smp_store_release(&q->buf->consumer_index, cons); - break; - case QUEUE_TYPE_KERNEL: - cons = q->buf->consumer_index; - q->buf->consumer_index = (cons + 1) & q->index_mask; - break; - } + return (prod - cons) & q->index_mask; } -static inline void *producer_addr(struct rxe_queue *q, enum queue_type type) +static inline void queue_advance_producer(struct rxe_queue *q, + enum queue_type type) { u32 prod; switch (type) { - case QUEUE_TYPE_FROM_USER: - /* protect user space index */ - prod = smp_load_acquire(&q->buf->producer_index); - prod &= q->index_mask; + case QUEUE_TYPE_FROM_CLIENT: + pr_warn("%s: attempt to advance client index\n", + __func__); break; - case QUEUE_TYPE_TO_USER: + case QUEUE_TYPE_TO_CLIENT: prod = q->index; + prod = (prod + 1) & q->index_mask; + q->index = prod; + /* protect user index */ + smp_store_release(&q->buf->producer_index, prod); + break; + case QUEUE_TYPE_FROM_DRIVER: + pr_warn("%s: attempt to advance driver index\n", + __func__); break; - case QUEUE_TYPE_KERNEL: + case QUEUE_TYPE_TO_DRIVER: prod = q->buf->producer_index; + prod = (prod + 1) & q->index_mask; + q->buf->producer_index = prod; break; } - - return q->buf->data + (prod << q->log2_elem_size); } -static inline void *consumer_addr(struct rxe_queue *q, enum queue_type type) +static inline void queue_advance_consumer(struct rxe_queue *q, + enum queue_type type) { u32 cons; switch (type) { - case QUEUE_TYPE_FROM_USER: + case QUEUE_TYPE_FROM_CLIENT: cons = q->index; + cons = (cons + 1) & q->index_mask; + q->index = cons; + /* protect user index */ + smp_store_release(&q->buf->consumer_index, cons); break; - case QUEUE_TYPE_TO_USER: - /* protect user space index */ - cons = smp_load_acquire(&q->buf->consumer_index); - cons &= q->index_mask; + case QUEUE_TYPE_TO_CLIENT: + pr_warn("%s: attempt to advance client index\n", + __func__); break; - case QUEUE_TYPE_KERNEL: + case QUEUE_TYPE_FROM_DRIVER: cons = q->buf->consumer_index; + cons = (cons + 1) & q->index_mask; + q->buf->consumer_index = cons; + break; + case QUEUE_TYPE_TO_DRIVER: + pr_warn("%s: attempt to advance driver index\n", + __func__); break; } - - return q->buf->data + (cons << q->log2_elem_size); } -static inline unsigned int producer_index(struct rxe_queue *q, - enum queue_type type) +static inline void *queue_producer_addr(struct rxe_queue *q, + enum queue_type type) { - u32 prod; + u32 prod = queue_get_producer(q, type); - switch (type) { - case QUEUE_TYPE_FROM_USER: - /* protect user space index */ - prod = smp_load_acquire(&q->buf->producer_index); - prod &= q->index_mask; - break; - case QUEUE_TYPE_TO_USER: - prod = q->index; - break; - case QUEUE_TYPE_KERNEL: - prod = q->buf->producer_index; - break; - } - - return prod; + return q->buf->data + (prod << q->log2_elem_size); } -static inline unsigned int consumer_index(struct rxe_queue *q, - enum queue_type type) +static inline void *queue_consumer_addr(struct rxe_queue *q, + enum queue_type type) { - u32 cons; - - switch (type) { - case QUEUE_TYPE_FROM_USER: - cons = q->index; - break; - case QUEUE_TYPE_TO_USER: - /* protect user space index */ - cons = smp_load_acquire(&q->buf->consumer_index); - cons &= q->index_mask; - break; - case QUEUE_TYPE_KERNEL: - cons = q->buf->consumer_index; - break; - } + u32 cons = queue_get_consumer(q, type); - return cons; + return q->buf->data + (cons << q->log2_elem_size); } -static inline void *addr_from_index(struct rxe_queue *q, - unsigned int index) +static inline void *queue_addr_from_index(struct rxe_queue *q, u32 index) { return q->buf->data + ((index & q->index_mask) - << q->buf->log2_elem_size); + << q->log2_elem_size); } -static inline unsigned int index_from_addr(const struct rxe_queue *q, +static inline u32 queue_index_from_addr(const struct rxe_queue *q, const void *addr) { return (((u8 *)addr - q->buf->data) >> q->log2_elem_size) @@ -309,7 +255,7 @@ static inline unsigned int index_from_addr(const struct rxe_queue *q, static inline void *queue_head(struct rxe_queue *q, enum queue_type type) { - return queue_empty(q, type) ? NULL : consumer_addr(q, type); + return queue_empty(q, type) ? NULL : queue_consumer_addr(q, type); } #endif /* RXE_QUEUE_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 3894197a82f6..0c9d2af15f3d 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -49,21 +49,16 @@ static void req_retry(struct rxe_qp *qp) unsigned int cons; unsigned int prod; - if (qp->is_user) { - cons = consumer_index(q, QUEUE_TYPE_FROM_USER); - prod = producer_index(q, QUEUE_TYPE_FROM_USER); - } else { - cons = consumer_index(q, QUEUE_TYPE_KERNEL); - prod = producer_index(q, QUEUE_TYPE_KERNEL); - } + cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); + prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT); qp->req.wqe_index = cons; qp->req.psn = qp->comp.psn; qp->req.opcode = -1; for (wqe_index = cons; wqe_index != prod; - wqe_index = next_index(q, wqe_index)) { - wqe = addr_from_index(qp->sq.queue, wqe_index); + wqe_index = queue_next_index(q, wqe_index)) { + wqe = queue_addr_from_index(qp->sq.queue, wqe_index); mask = wr_opcode_mask(wqe->wr.opcode, qp); if (wqe->state == wqe_state_posted) @@ -121,15 +116,9 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) unsigned int cons; unsigned int prod; - if (qp->is_user) { - wqe = queue_head(q, QUEUE_TYPE_FROM_USER); - cons = consumer_index(q, QUEUE_TYPE_FROM_USER); - prod = producer_index(q, QUEUE_TYPE_FROM_USER); - } else { - wqe = queue_head(q, QUEUE_TYPE_KERNEL); - cons = consumer_index(q, QUEUE_TYPE_KERNEL); - prod = producer_index(q, QUEUE_TYPE_KERNEL); - } + wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); + cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); + prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT); if (unlikely(qp->req.state == QP_STATE_DRAIN)) { /* check to see if we are drained; @@ -170,7 +159,7 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) if (index == prod) return NULL; - wqe = addr_from_index(q, index); + wqe = queue_addr_from_index(q, index); if (unlikely((qp->req.state == QP_STATE_DRAIN || qp->req.state == QP_STATE_DRAINED) && @@ -390,9 +379,8 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, /* length from start of bth to end of icrc */ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; - /* pkt->hdr, rxe, port_num and mask are initialized in ifc - * layer - */ + /* pkt->hdr, port_num and mask are initialized in ifc layer */ + pkt->rxe = rxe; pkt->opcode = opcode; pkt->qp = qp; pkt->psn = qp->req.psn; @@ -402,6 +390,9 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, /* init skb */ av = rxe_get_av(pkt); + if (!av) + return NULL; + skb = rxe_init_packet(rxe, av, paylen, pkt); if (unlikely(!skb)) return NULL; @@ -472,7 +463,7 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (err) return err; - if (pkt->mask & RXE_WRITE_OR_SEND) { + if (pkt->mask & RXE_WRITE_OR_SEND_MASK) { if (wqe->wr.send_flags & IB_SEND_INLINE) { u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset]; @@ -560,7 +551,8 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, qp->req.opcode = pkt->opcode; if (pkt->mask & RXE_END_MASK) - qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); + qp->req.wqe_index = queue_next_index(qp->sq.queue, + qp->req.wqe_index); qp->need_req_skb = 0; @@ -572,7 +564,6 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) { u8 opcode = wqe->wr.opcode; - struct rxe_mr *mr; u32 rkey; int ret; @@ -590,14 +581,11 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) } break; case IB_WR_REG_MR: - mr = to_rmr(wqe->wr.wr.reg.mr); - rxe_add_ref(mr); - mr->state = RXE_MR_STATE_VALID; - mr->access = wqe->wr.wr.reg.access; - mr->ibmr.lkey = wqe->wr.wr.reg.key; - mr->ibmr.rkey = wqe->wr.wr.reg.key; - mr->iova = wqe->wr.wr.reg.mr->iova; - rxe_drop_ref(mr); + ret = rxe_reg_fast_mr(qp, wqe); + if (unlikely(ret)) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + return ret; + } break; case IB_WR_BIND_MW: ret = rxe_bind_mw(qp, wqe); @@ -614,7 +602,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); + qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); if ((wqe->wr.send_flags & IB_SEND_SIGNALED) || qp->sq_sig_type == IB_SIGNAL_ALL_WR) @@ -645,7 +633,8 @@ next_wqe: goto exit; if (unlikely(qp->req.state == QP_STATE_RESET)) { - qp->req.wqe_index = consumer_index(q, q->type); + qp->req.wqe_index = queue_get_consumer(q, + QUEUE_TYPE_FROM_CLIENT); qp->req.opcode = -1; qp->req.need_rd_atomic = 0; qp->req.wait_psn = 0; @@ -691,13 +680,13 @@ next_wqe: } mask = rxe_opcode[opcode].mask; - if (unlikely(mask & RXE_READ_OR_ATOMIC)) { + if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) { if (check_init_depth(qp, wqe)) goto exit; } mtu = get_mtu(qp); - payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0; + payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0; if (payload > mtu) { if (qp_type(qp) == IB_QPT_UD) { /* C10-93.1.1: If the total sum of all the buffer lengths specified for a @@ -711,7 +700,7 @@ next_wqe: wqe->last_psn = qp->req.psn; qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; qp->req.opcode = IB_OPCODE_UD_SEND_ONLY; - qp->req.wqe_index = next_index(qp->sq.queue, + qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 5501227ddc65..e8f435fa6e4d 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -303,10 +303,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) spin_lock_bh(&srq->rq.consumer_lock); - if (qp->is_user) - wqe = queue_head(q, QUEUE_TYPE_FROM_USER); - else - wqe = queue_head(q, QUEUE_TYPE_KERNEL); + wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); if (!wqe) { spin_unlock_bh(&srq->rq.consumer_lock); return RESPST_ERR_RNR; @@ -322,13 +319,8 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) memcpy(&qp->resp.srq_wqe, wqe, size); qp->resp.wqe = &qp->resp.srq_wqe.wqe; - if (qp->is_user) { - advance_consumer(q, QUEUE_TYPE_FROM_USER); - count = queue_count(q, QUEUE_TYPE_FROM_USER); - } else { - advance_consumer(q, QUEUE_TYPE_KERNEL); - count = queue_count(q, QUEUE_TYPE_KERNEL); - } + queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT); + count = queue_count(q, QUEUE_TYPE_FROM_CLIENT); if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) { srq->limit = 0; @@ -357,12 +349,8 @@ static enum resp_states check_resource(struct rxe_qp *qp, qp->resp.status = IB_WC_WR_FLUSH_ERR; return RESPST_COMPLETE; } else if (!srq) { - if (qp->is_user) - qp->resp.wqe = queue_head(qp->rq.queue, - QUEUE_TYPE_FROM_USER); - else - qp->resp.wqe = queue_head(qp->rq.queue, - QUEUE_TYPE_KERNEL); + qp->resp.wqe = queue_head(qp->rq.queue, + QUEUE_TYPE_FROM_CLIENT); if (qp->resp.wqe) { qp->resp.status = IB_WC_WR_FLUSH_ERR; return RESPST_COMPLETE; @@ -374,7 +362,7 @@ static enum resp_states check_resource(struct rxe_qp *qp, } } - if (pkt->mask & RXE_READ_OR_ATOMIC) { + if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) { /* it is the requesters job to not send * too many read/atomic ops, we just * recycle the responder resource queue @@ -389,12 +377,8 @@ static enum resp_states check_resource(struct rxe_qp *qp, if (srq) return get_srq_wqe(qp); - if (qp->is_user) - qp->resp.wqe = queue_head(qp->rq.queue, - QUEUE_TYPE_FROM_USER); - else - qp->resp.wqe = queue_head(qp->rq.queue, - QUEUE_TYPE_KERNEL); + qp->resp.wqe = queue_head(qp->rq.queue, + QUEUE_TYPE_FROM_CLIENT); return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; } @@ -429,7 +413,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, enum resp_states state; int access; - if (pkt->mask & (RXE_READ_MASK | RXE_WRITE_MASK)) { + if (pkt->mask & RXE_READ_OR_WRITE_MASK) { if (pkt->mask & RXE_RETH_MASK) { qp->resp.va = reth_va(pkt); qp->resp.offset = 0; @@ -450,7 +434,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp, } /* A zero-byte op is not required to set an addr or rkey. */ - if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) && + if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) { return RESPST_EXECUTE; @@ -876,7 +860,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, wc->opcode = (pkt->mask & RXE_IMMDT_MASK && pkt->mask & RXE_WRITE_MASK) ? IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; - wc->vendor_err = 0; wc->byte_len = (pkt->mask & RXE_IMMDT_MASK && pkt->mask & RXE_WRITE_MASK) ? qp->resp.length : wqe->dma.length - wqe->dma.resid; @@ -897,8 +880,6 @@ static enum resp_states do_complete(struct rxe_qp *qp, uwc->ex.invalidate_rkey = ieth_rkey(pkt); } - uwc->qp_num = qp->ibqp.qp_num; - if (pkt->mask & RXE_DETH_MASK) uwc->src_qp = deth_sqp(pkt); @@ -930,18 +911,13 @@ static enum resp_states do_complete(struct rxe_qp *qp, if (pkt->mask & RXE_DETH_MASK) wc->src_qp = deth_sqp(pkt); - wc->qp = &qp->ibqp; wc->port_num = qp->attr.port_num; } } /* have copy for srq and reference for !srq */ - if (!qp->srq) { - if (qp->is_user) - advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_USER); - else - advance_consumer(qp->rq.queue, QUEUE_TYPE_KERNEL); - } + if (!qp->srq) + queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT); qp->resp.wqe = NULL; @@ -1213,7 +1189,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) return; while (!qp->srq && q && queue_head(q, q->type)) - advance_consumer(q, q->type); + queue_advance_consumer(q, q->type); } int rxe_responder(void *arg) diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 610c98d24b5c..eb1c4c3b3a78 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -86,14 +86,13 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, srq->srq_num = srq->pelem.index; srq->rq.max_wr = init->attr.max_wr; srq->rq.max_sge = init->attr.max_sge; - srq->rq.is_user = srq->is_user; srq_wqe_size = rcv_wqe_size(srq->rq.max_sge); spin_lock_init(&srq->rq.producer_lock); spin_lock_init(&srq->rq.consumer_lock); - type = uresp ? QUEUE_TYPE_FROM_USER : QUEUE_TYPE_KERNEL; + type = QUEUE_TYPE_FROM_CLIENT; q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type); if (!q) { diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 267b5a9c345d..0aa0d7e52773 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -29,13 +29,10 @@ static int rxe_query_port(struct ib_device *dev, u32 port_num, struct ib_port_attr *attr) { struct rxe_dev *rxe = to_rdev(dev); - struct rxe_port *port; int rc; - port = &rxe->port; - /* *attr being zeroed by the caller, avoid zeroing it here */ - *attr = port->attr; + *attr = rxe->port.attr; mutex_lock(&rxe->usdev_lock); rc = ib_get_eth_speed(dev, port_num, &attr->active_speed, @@ -161,9 +158,19 @@ static int rxe_create_ah(struct ib_ah *ibah, struct ib_udata *udata) { - int err; struct rxe_dev *rxe = to_rdev(ibah->device); struct rxe_ah *ah = to_rah(ibah); + struct rxe_create_ah_resp __user *uresp = NULL; + int err; + + if (udata) { + /* test if new user provider */ + if (udata->outlen >= sizeof(*uresp)) + uresp = udata->outbuf; + ah->is_user = true; + } else { + ah->is_user = false; + } err = rxe_av_chk_attr(rxe, init_attr->ah_attr); if (err) @@ -173,6 +180,24 @@ static int rxe_create_ah(struct ib_ah *ibah, if (err) return err; + /* create index > 0 */ + rxe_add_index(ah); + ah->ah_num = ah->pelem.index; + + if (uresp) { + /* only if new user provider */ + err = copy_to_user(&uresp->ah_num, &ah->ah_num, + sizeof(uresp->ah_num)); + if (err) { + rxe_drop_index(ah); + rxe_drop_ref(ah); + return -EFAULT; + } + } else if (ah->is_user) { + /* only if old user provider */ + ah->ah_num = 0; + } + rxe_init_av(init_attr->ah_attr, &ah->av); return 0; } @@ -205,6 +230,7 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) { struct rxe_ah *ah = to_rah(ibah); + rxe_drop_index(ah); rxe_drop_ref(ah); return 0; } @@ -218,11 +244,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) int num_sge = ibwr->num_sge; int full; - if (rq->is_user) - full = queue_full(rq->queue, QUEUE_TYPE_FROM_USER); - else - full = queue_full(rq->queue, QUEUE_TYPE_KERNEL); - + full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER); if (unlikely(full)) { err = -ENOMEM; goto err1; @@ -237,11 +259,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) for (i = 0; i < num_sge; i++) length += ibwr->sg_list[i].length; - if (rq->is_user) - recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_FROM_USER); - else - recv_wqe = producer_addr(rq->queue, QUEUE_TYPE_KERNEL); - + recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_TO_DRIVER); recv_wqe->wr_id = ibwr->wr_id; recv_wqe->num_sge = num_sge; @@ -254,10 +272,7 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) recv_wqe->dma.cur_sge = 0; recv_wqe->dma.sge_offset = 0; - if (rq->is_user) - advance_producer(rq->queue, QUEUE_TYPE_FROM_USER); - else - advance_producer(rq->queue, QUEUE_TYPE_KERNEL); + queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER); return 0; @@ -281,9 +296,6 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, if (udata->outlen < sizeof(*uresp)) return -EINVAL; uresp = udata->outbuf; - srq->is_user = true; - } else { - srq->is_user = false; } err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); @@ -522,8 +534,11 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { + struct ib_ah *ibah = ud_wr(ibwr)->ah; + wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn; wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey; + wr->wr.ud.ah_num = to_rah(ibah)->ah_num; if (qp_type(qp) == IB_QPT_GSI) wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; if (wr->opcode == IB_WR_SEND_WITH_IMM) @@ -595,11 +610,6 @@ static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, return; } - if (qp_type(qp) == IB_QPT_UD || - qp_type(qp) == IB_QPT_SMI || - qp_type(qp) == IB_QPT_GSI) - memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); - if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) copy_inline_data_to_wqe(wqe, ibwr); else @@ -633,27 +643,17 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, spin_lock_irqsave(&qp->sq.sq_lock, flags); - if (qp->is_user) - full = queue_full(sq->queue, QUEUE_TYPE_FROM_USER); - else - full = queue_full(sq->queue, QUEUE_TYPE_KERNEL); + full = queue_full(sq->queue, QUEUE_TYPE_TO_DRIVER); if (unlikely(full)) { spin_unlock_irqrestore(&qp->sq.sq_lock, flags); return -ENOMEM; } - if (qp->is_user) - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_FROM_USER); - else - send_wqe = producer_addr(sq->queue, QUEUE_TYPE_KERNEL); - + send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_TO_DRIVER); init_send_wqe(qp, ibwr, mask, length, send_wqe); - if (qp->is_user) - advance_producer(sq->queue, QUEUE_TYPE_FROM_USER); - else - advance_producer(sq->queue, QUEUE_TYPE_KERNEL); + queue_advance_producer(sq->queue, QUEUE_TYPE_TO_DRIVER); spin_unlock_irqrestore(&qp->sq.sq_lock, flags); @@ -845,18 +845,12 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) spin_lock_irqsave(&cq->cq_lock, flags); for (i = 0; i < num_entries; i++) { - if (cq->is_user) - cqe = queue_head(cq->queue, QUEUE_TYPE_TO_USER); - else - cqe = queue_head(cq->queue, QUEUE_TYPE_KERNEL); + cqe = queue_head(cq->queue, QUEUE_TYPE_FROM_DRIVER); if (!cqe) break; memcpy(wc++, &cqe->ibwc, sizeof(*wc)); - if (cq->is_user) - advance_consumer(cq->queue, QUEUE_TYPE_TO_USER); - else - advance_consumer(cq->queue, QUEUE_TYPE_KERNEL); + queue_advance_consumer(cq->queue, QUEUE_TYPE_FROM_DRIVER); } spin_unlock_irqrestore(&cq->cq_lock, flags); @@ -868,10 +862,7 @@ static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) struct rxe_cq *cq = to_rcq(ibcq); int count; - if (cq->is_user) - count = queue_count(cq->queue, QUEUE_TYPE_TO_USER); - else - count = queue_count(cq->queue, QUEUE_TYPE_KERNEL); + count = queue_count(cq->queue, QUEUE_TYPE_FROM_DRIVER); return (count > wc_cnt) ? wc_cnt : count; } @@ -887,10 +878,7 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = flags & IB_CQ_SOLICITED_MASK; - if (cq->is_user) - empty = queue_empty(cq->queue, QUEUE_TYPE_TO_USER); - else - empty = queue_empty(cq->queue, QUEUE_TYPE_KERNEL); + empty = queue_empty(cq->queue, QUEUE_TYPE_FROM_DRIVER); if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty) ret = 1; @@ -987,41 +975,26 @@ err1: return ERR_PTR(err); } -static int rxe_set_page(struct ib_mr *ibmr, u64 addr) -{ - struct rxe_mr *mr = to_rmr(ibmr); - struct rxe_map *map; - struct rxe_phys_buf *buf; - - if (unlikely(mr->nbuf == mr->num_buf)) - return -ENOMEM; - - map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; - buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; - - buf->addr = addr; - buf->size = ibmr->page_size; - mr->nbuf++; - - return 0; -} - +/* build next_map_set from scatterlist + * The IB_WR_REG_MR WR will swap map_sets + */ static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { struct rxe_mr *mr = to_rmr(ibmr); + struct rxe_map_set *set = mr->next_map_set; int n; - mr->nbuf = 0; + set->nbuf = 0; - n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); + n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_mr_set_page); - mr->va = ibmr->iova; - mr->iova = ibmr->iova; - mr->length = ibmr->length; - mr->page_shift = ilog2(ibmr->page_size); - mr->page_mask = ibmr->page_size - 1; - mr->offset = mr->iova & mr->page_mask; + set->va = ibmr->iova; + set->iova = ibmr->iova; + set->length = ibmr->length; + set->page_shift = ilog2(ibmr->page_size); + set->page_mask = ibmr->page_size - 1; + set->offset = set->iova & set->page_mask; return n; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index ac2a2148027f..35e041450090 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -46,8 +46,9 @@ struct rxe_pd { struct rxe_ah { struct ib_ah ibah; struct rxe_pool_entry pelem; - struct rxe_pd *pd; struct rxe_av av; + bool is_user; + int ah_num; }; struct rxe_cqe { @@ -64,7 +65,7 @@ struct rxe_cq { spinlock_t cq_lock; u8 notify; bool is_dying; - int is_user; + bool is_user; struct tasklet_struct comp_task; }; @@ -77,7 +78,6 @@ enum wqe_state { }; struct rxe_sq { - bool is_user; int max_wr; int max_sge; int max_inline; @@ -86,7 +86,6 @@ struct rxe_sq { }; struct rxe_rq { - bool is_user; int max_wr; int max_sge; spinlock_t producer_lock; /* guard queue producer */ @@ -100,7 +99,6 @@ struct rxe_srq { struct rxe_pd *pd; struct rxe_rq rq; u32 srq_num; - bool is_user; int limit; int error; @@ -240,7 +238,6 @@ struct rxe_qp { struct sk_buff_head req_pkts; struct sk_buff_head resp_pkts; - struct sk_buff_head send_pkts; struct rxe_req_info req; struct rxe_comp_info comp; @@ -267,18 +264,11 @@ struct rxe_qp { }; enum rxe_mr_state { - RXE_MR_STATE_ZOMBIE, RXE_MR_STATE_INVALID, RXE_MR_STATE_FREE, RXE_MR_STATE_VALID, }; -enum rxe_mr_type { - RXE_MR_TYPE_NONE, - RXE_MR_TYPE_DMA, - RXE_MR_TYPE_MR, -}; - enum rxe_mr_copy_dir { RXE_TO_MR_OBJ, RXE_FROM_MR_OBJ, @@ -300,6 +290,17 @@ struct rxe_map { struct rxe_phys_buf buf[RXE_BUF_PER_MAP]; }; +struct rxe_map_set { + struct rxe_map **map; + u64 va; + u64 iova; + size_t length; + u32 offset; + u32 nbuf; + int page_shift; + int page_mask; +}; + static inline int rkey_is_mw(u32 rkey) { u32 index = rkey >> 8; @@ -313,28 +314,24 @@ struct rxe_mr { struct ib_umem *umem; + u32 lkey; + u32 rkey; enum rxe_mr_state state; - enum rxe_mr_type type; - u64 va; - u64 iova; - size_t length; - u32 offset; + enum ib_mr_type type; int access; - int page_shift; - int page_mask; int map_shift; int map_mask; u32 num_buf; - u32 nbuf; u32 max_buf; u32 num_map; atomic_t num_mw; - struct rxe_map **map; + struct rxe_map_set *cur_map_set; + struct rxe_map_set *next_map_set; }; enum rxe_mw_state { @@ -350,6 +347,7 @@ struct rxe_mw { enum rxe_mw_state state; struct rxe_qp *qp; /* Type 2 only */ struct rxe_mr *mr; + u32 rkey; int access; u64 addr; u64 length; @@ -469,19 +467,14 @@ static inline struct rxe_mw *to_rmw(struct ib_mw *mw) return mw ? container_of(mw, struct rxe_mw, ibmw) : NULL; } -static inline struct rxe_pd *mr_pd(struct rxe_mr *mr) +static inline struct rxe_pd *rxe_ah_pd(struct rxe_ah *ah) { - return to_rpd(mr->ibmr.pd); + return to_rpd(ah->ibah.pd); } -static inline u32 mr_lkey(struct rxe_mr *mr) -{ - return mr->ibmr.lkey; -} - -static inline u32 mr_rkey(struct rxe_mr *mr) +static inline struct rxe_pd *mr_pd(struct rxe_mr *mr) { - return mr->ibmr.rkey; + return to_rpd(mr->ibmr.pd); } static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw) @@ -489,11 +482,6 @@ static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw) return to_rpd(mw->ibmw.pd); } -static inline u32 rxe_mw_rkey(struct rxe_mw *mw) -{ - return mw->ibmw.rkey; -} - int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name); void rxe_mc_cleanup(struct rxe_pool_entry *arg); diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 7a5ed86ffc9f..7acdd3c3a599 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1951,8 +1951,6 @@ int siw_cm_init(void) void siw_cm_exit(void) { - if (siw_cm_wq) { - flush_workqueue(siw_cm_wq); + if (siw_cm_wq) destroy_workqueue(siw_cm_wq); - } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 684c2ddb16f5..fd9d7f2c4d64 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1583,6 +1583,7 @@ int ipoib_cm_dev_init(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); int max_srq_sge, i; + u8 addr; INIT_LIST_HEAD(&priv->cm.passive_ids); INIT_LIST_HEAD(&priv->cm.reap_list); @@ -1636,7 +1637,8 @@ int ipoib_cm_dev_init(struct net_device *dev) } } - priv->dev->dev_addr[0] = IPOIB_FLAGS_RC; + addr = IPOIB_FLAGS_RC; + dev_addr_mod(dev, 0, &addr, 1); return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index ceabfb0b0a83..2c3dca41d3bd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1057,13 +1057,11 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) { union ib_gid search_gid; union ib_gid gid0; - union ib_gid *netdev_gid; int err; u16 index; u32 port; bool ret = false; - netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4); if (rdma_query_gid(priv->ca, priv->port, 0, &gid0)) return false; @@ -1073,7 +1071,8 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) * to do it later */ priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix; - netdev_gid->global.subnet_prefix = gid0.global.subnet_prefix; + dev_addr_mod(priv->dev, 4, (u8 *)&gid0.global.subnet_prefix, + sizeof(gid0.global.subnet_prefix)); search_gid.global.subnet_prefix = gid0.global.subnet_prefix; search_gid.global.interface_id = priv->local_gid.global.interface_id; @@ -1135,8 +1134,8 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) { memcpy(&priv->local_gid, &gid0, sizeof(priv->local_gid)); - memcpy(priv->dev->dev_addr + 4, &gid0, - sizeof(priv->local_gid)); + dev_addr_mod(priv->dev, 4, (u8 *)&gid0, + sizeof(priv->local_gid)); ret = true; } } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 0aa8629fdf62..9934b8bd7f56 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1696,6 +1696,7 @@ static void ipoib_dev_uninit_default(struct net_device *dev) static int ipoib_dev_init_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); + u8 addr_mod[3]; ipoib_napi_add(dev); @@ -1723,9 +1724,10 @@ static int ipoib_dev_init_default(struct net_device *dev) } /* after qp created set dev address */ - priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; - priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; - priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff; + addr_mod[0] = (priv->qp->qp_num >> 16) & 0xff; + addr_mod[1] = (priv->qp->qp_num >> 8) & 0xff; + addr_mod[2] = (priv->qp->qp_num) & 0xff; + dev_addr_mod(priv->dev, 1, addr_mod, sizeof(addr_mod)); return 0; @@ -1886,8 +1888,7 @@ static int ipoib_parent_init(struct net_device *ndev) priv->ca->name, priv->port, result); return result; } - memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, - sizeof(union ib_gid)); + dev_addr_mod(priv->dev, 4, priv->local_gid.raw, sizeof(union ib_gid)); SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent); priv->dev->dev_port = priv->port - 1; @@ -1908,8 +1909,8 @@ static void ipoib_child_init(struct net_device *ndev) memcpy(&priv->local_gid, priv->dev->dev_addr + 4, sizeof(priv->local_gid)); else { - memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, - INFINIBAND_ALEN); + __dev_addr_set(priv->dev, ppriv->dev->dev_addr, + INFINIBAND_ALEN); memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); } @@ -1997,7 +1998,6 @@ static void ipoib_ndo_uninit(struct net_device *dev) if (priv->wq) { /* See ipoib_mcast_carrier_on_task() */ WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)); - flush_workqueue(priv->wq); destroy_workqueue(priv->wq); priv->wq = NULL; } @@ -2327,7 +2327,7 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid) memcpy(&priv->local_gid.global.interface_id, &gid->global.interface_id, sizeof(gid->global.interface_id)); - memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid)); + dev_addr_mod(netdev, 4, (u8 *)&priv->local_gid, sizeof(priv->local_gid)); clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); netif_addr_unlock_bh(netdev); diff --git a/drivers/infiniband/ulp/opa_vnic/Kconfig b/drivers/infiniband/ulp/opa_vnic/Kconfig index e84248587187..4d43d055fa8e 100644 --- a/drivers/infiniband/ulp/opa_vnic/Kconfig +++ b/drivers/infiniband/ulp/opa_vnic/Kconfig @@ -1,9 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_OPA_VNIC - tristate "Intel OPA VNIC support" + tristate "Cornelis OPX VNIC support" depends on X86_64 && INFINIBAND help - This is Omni-Path (OPA) Virtual Network Interface Controller (VNIC) + This is Omni-Path Express (OPX) Virtual Network Interface Controller (VNIC) driver for Ethernet over Omni-Path feature. It implements the HW independent VNIC functionality. It interfaces with Linux stack for data path and IB MAD for the control path. diff --git a/drivers/infiniband/ulp/opa_vnic/Makefile b/drivers/infiniband/ulp/opa_vnic/Makefile index a8c21d140ccb..196183817cdc 100644 --- a/drivers/infiniband/ulp/opa_vnic/Makefile +++ b/drivers/infiniband/ulp/opa_vnic/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only -# Makefile - Intel Omni-Path Virtual Network Controller driver +# Makefile - Cornelis Omni-Path Express Virtual Network Controller driver # Copyright(c) 2017, Intel Corporation. +# Copyright(c) 2021, Cornelis Networks. # obj-$(CONFIG_INFINIBAND_OPA_VNIC) += opa_vnic.o diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index cecf0f7cadf9..21c6cea8b1db 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -1,5 +1,6 @@ /* * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2021 Cornelis Networks. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -46,7 +47,7 @@ */ /* - * This file contains OPA Virtual Network Interface Controller (VNIC) + * This file contains OPX Virtual Network Interface Controller (VNIC) * Ethernet Management Agent (EMA) driver */ @@ -1051,5 +1052,5 @@ static void opa_vnic_deinit(void) module_exit(opa_vnic_deinit); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Intel Corporation"); -MODULE_DESCRIPTION("Intel OPA Virtual Network driver"); +MODULE_AUTHOR("Cornelis Networks"); +MODULE_DESCRIPTION("Cornelis OPX Virtual Network driver"); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c index 5e780bdd763d..f7e459fe68be 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c @@ -37,46 +37,50 @@ void rtrs_clt_inc_failover_cnt(struct rtrs_clt_stats *stats) s->rdma.failover_cnt++; } -int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, - char *buf, size_t len) +int rtrs_clt_stats_migration_from_cnt_to_str(struct rtrs_clt_stats *stats, char *buf) { struct rtrs_clt_stats_pcpu *s; size_t used; int cpu; - used = scnprintf(buf, len, " "); - for_each_possible_cpu(cpu) - used += scnprintf(buf + used, len - used, " CPU%u", cpu); - - used += scnprintf(buf + used, len - used, "\nfrom:"); + used = 0; for_each_possible_cpu(cpu) { s = per_cpu_ptr(stats->pcpu_stats, cpu); - used += scnprintf(buf + used, len - used, " %d", + used += sysfs_emit_at(buf, used, "%d ", atomic_read(&s->cpu_migr.from)); } - used += scnprintf(buf + used, len - used, "\nto :"); + used += sysfs_emit_at(buf, used, "\n"); + + return used; +} + +int rtrs_clt_stats_migration_to_cnt_to_str(struct rtrs_clt_stats *stats, char *buf) +{ + struct rtrs_clt_stats_pcpu *s; + + size_t used; + int cpu; + + used = 0; for_each_possible_cpu(cpu) { s = per_cpu_ptr(stats->pcpu_stats, cpu); - used += scnprintf(buf + used, len - used, " %d", - s->cpu_migr.to); + used += sysfs_emit_at(buf, used, "%d ", s->cpu_migr.to); } - used += scnprintf(buf + used, len - used, "\n"); + + used += sysfs_emit_at(buf, used, "\n"); return used; } -int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf, - size_t len) +int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf) { - return scnprintf(buf, len, "%d %d\n", - stats->reconnects.successful_cnt, - stats->reconnects.fail_cnt); + return sysfs_emit(buf, "%d %d\n", stats->reconnects.successful_cnt, + stats->reconnects.fail_cnt); } -ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, - char *page, size_t len) +ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, char *page) { struct rtrs_clt_stats_rdma sum; struct rtrs_clt_stats_rdma *r; @@ -94,16 +98,15 @@ ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, sum.failover_cnt += r->failover_cnt; } - return scnprintf(page, len, "%llu %llu %llu %llu %u %llu\n", + return sysfs_emit(page, "%llu %llu %llu %llu %u %llu\n", sum.dir[READ].cnt, sum.dir[READ].size_total, sum.dir[WRITE].cnt, sum.dir[WRITE].size_total, atomic_read(&stats->inflight), sum.failover_cnt); } -ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *s, - char *page, size_t len) +ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *s, char *page) { - return scnprintf(page, len, "echo 1 to reset all statistics\n"); + return sysfs_emit(page, "echo 1 to reset all statistics\n"); } int rtrs_clt_reset_rdma_stats(struct rtrs_clt_stats *stats, bool enable) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index 4ee592ccf979..0e69180c3771 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -296,8 +296,12 @@ static struct kobj_attribute rtrs_clt_remove_path_attr = __ATTR(remove_path, 0644, rtrs_clt_remove_path_show, rtrs_clt_remove_path_store); -STAT_ATTR(struct rtrs_clt_stats, cpu_migration, - rtrs_clt_stats_migration_cnt_to_str, +STAT_ATTR(struct rtrs_clt_stats, cpu_migration_from, + rtrs_clt_stats_migration_from_cnt_to_str, + rtrs_clt_reset_cpu_migr_stats); + +STAT_ATTR(struct rtrs_clt_stats, cpu_migration_to, + rtrs_clt_stats_migration_to_cnt_to_str, rtrs_clt_reset_cpu_migr_stats); STAT_ATTR(struct rtrs_clt_stats, reconnects, @@ -313,7 +317,8 @@ STAT_ATTR(struct rtrs_clt_stats, reset_all, rtrs_clt_reset_all_stats); static struct attribute *rtrs_clt_stats_attrs[] = { - &cpu_migration_attr.attr, + &cpu_migration_from_attr.attr, + &cpu_migration_to_attr.attr, &reconnects_attr.attr, &rdma_attr.attr, &reset_all_attr.attr, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index bc8824b4ee0d..15c0077dd27e 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2788,6 +2788,12 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, struct rtrs_clt *clt; int err, i; + if (strchr(sessname, '/') || strchr(sessname, '.')) { + pr_err("sessname cannot contain / and .\n"); + err = -EINVAL; + goto out; + } + clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv, ops->link_ev, reconnect_delay_sec, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index 9dc819885ec7..9afffccff973 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -224,19 +224,18 @@ void rtrs_clt_update_all_stats(struct rtrs_clt_io_req *req, int dir); int rtrs_clt_reset_rdma_lat_distr_stats(struct rtrs_clt_stats *stats, bool enable); ssize_t rtrs_clt_stats_rdma_lat_distr_to_str(struct rtrs_clt_stats *stats, - char *page, size_t len); + char *page); int rtrs_clt_reset_cpu_migr_stats(struct rtrs_clt_stats *stats, bool enable); -int rtrs_clt_stats_migration_cnt_to_str(struct rtrs_clt_stats *stats, char *buf, - size_t len); +int rtrs_clt_stats_migration_from_cnt_to_str(struct rtrs_clt_stats *stats, char *buf); +int rtrs_clt_stats_migration_to_cnt_to_str(struct rtrs_clt_stats *stats, char *buf); int rtrs_clt_reset_reconnects_stat(struct rtrs_clt_stats *stats, bool enable); -int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf, - size_t len); +int rtrs_clt_stats_reconnects_to_str(struct rtrs_clt_stats *stats, char *buf); int rtrs_clt_reset_rdma_stats(struct rtrs_clt_stats *stats, bool enable); ssize_t rtrs_clt_stats_rdma_to_str(struct rtrs_clt_stats *stats, - char *page, size_t len); + char *page); int rtrs_clt_reset_all_stats(struct rtrs_clt_stats *stats, bool enable); ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *stats, - char *page, size_t len); + char *page); /* rtrs-clt-sysfs.c */ diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index d12ddfa50747..78eac9a4f703 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -398,7 +398,7 @@ static ssize_t get_value##_show(struct kobject *kobj, \ { \ type *stats = container_of(kobj, type, kobj_stats); \ \ - return print(stats, page, PAGE_SIZE); \ + return print(stats, page); \ } #define STAT_ATTR(type, stat, print, reset) \ diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c index 12c374b5eb6e..44b1c1652131 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c @@ -23,8 +23,7 @@ int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable) return -EINVAL; } -ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, - char *page, size_t len) +ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, char *page) { struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 20efd44297fb..9c43ce5ba1c1 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -102,7 +102,7 @@ static ssize_t rtrs_srv_src_addr_show(struct kobject *kobj, sess = container_of(kobj, struct rtrs_srv_sess, kobj); cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, page, PAGE_SIZE); - return cnt + scnprintf(page + cnt, PAGE_SIZE - cnt, "\n"); + return cnt + sysfs_emit_at(page, cnt, "\n"); } static struct kobj_attribute rtrs_srv_src_addr_attr = diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 716ef7b23558..7df71f8cf149 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -803,6 +803,11 @@ static int process_info_req(struct rtrs_srv_con *con, return err; } + if (strchr(msg->sessname, '/') || strchr(msg->sessname, '.')) { + rtrs_err(s, "sessname cannot contain / and .\n"); + return -EINVAL; + } + if (exist_sessname(sess->srv->ctx, msg->sessname, &sess->srv->paths_uuid)) { rtrs_err(s, "sessname is duplicated: %s\n", msg->sessname); @@ -1766,6 +1771,7 @@ static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv, strscpy(sess->s.sessname, str, sizeof(sess->s.sessname)); sess->s.con_num = con_num; + sess->s.irq_con_num = con_num; sess->s.recon_cnt = recon_cnt; uuid_copy(&sess->s.uuid, uuid); spin_lock_init(&sess->state_lock); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index 9d8d2a91a235..7d403c12faf3 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -136,8 +136,7 @@ static inline void rtrs_srv_update_rdma_stats(struct rtrs_srv_stats *s, /* functions which are implemented in rtrs-srv-stats.c */ int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable); -ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, - char *page, size_t len); +ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, char *page); int rtrs_srv_reset_all_stats(struct rtrs_srv_stats *stats, bool enable); ssize_t rtrs_srv_reset_all_help(struct rtrs_srv_stats *stats, char *page, size_t len); diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index ca542e477d38..37952c8e768c 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -222,13 +222,23 @@ static void qp_event_handler(struct ib_event *ev, void *ctx) } } +static bool is_pollqueue(struct rtrs_con *con) +{ + return con->cid >= con->sess->irq_con_num; +} + static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe, enum ib_poll_context poll_ctx) { struct rdma_cm_id *cm_id = con->cm_id; struct ib_cq *cq; - cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx); + if (is_pollqueue(con)) + cq = ib_alloc_cq(cm_id->device, con, nr_cqe, cq_vector, + poll_ctx); + else + cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx); + if (IS_ERR(cq)) { rtrs_err(con->sess, "Creating completion queue failed, errno: %ld\n", PTR_ERR(cq)); @@ -269,6 +279,17 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd, return ret; } +static void destroy_cq(struct rtrs_con *con) +{ + if (con->cq) { + if (is_pollqueue(con)) + ib_free_cq(con->cq); + else + ib_cq_pool_put(con->cq, con->nr_cqe); + } + con->cq = NULL; +} + int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, u32 max_send_sge, int cq_vector, int nr_cqe, u32 max_send_wr, u32 max_recv_wr, @@ -283,8 +304,7 @@ int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr, max_send_sge); if (err) { - ib_cq_pool_put(con->cq, con->nr_cqe); - con->cq = NULL; + destroy_cq(con); return err; } con->sess = sess; @@ -299,10 +319,7 @@ void rtrs_cq_qp_destroy(struct rtrs_con *con) rdma_destroy_qp(con->cm_id); con->qp = NULL; } - if (con->cq) { - ib_cq_pool_put(con->cq, con->nr_cqe); - con->cq = NULL; - } + destroy_cq(con); } EXPORT_SYMBOL_GPL(rtrs_cq_qp_destroy); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 84297cc1b509..ea1efdecc88c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -474,11 +474,13 @@ int mlx5i_dev_init(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); struct mlx5i_priv *ipriv = priv->ppriv; + u8 addr_mod[3]; /* Set dev address using underlay QP */ - dev->dev_addr[1] = (ipriv->qpn >> 16) & 0xff; - dev->dev_addr[2] = (ipriv->qpn >> 8) & 0xff; - dev->dev_addr[3] = (ipriv->qpn) & 0xff; + addr_mod[0] = (ipriv->qpn >> 16) & 0xff; + addr_mod[1] = (ipriv->qpn >> 8) & 0xff; + addr_mod[2] = (ipriv->qpn) & 0xff; + dev_addr_mod(dev, 1, addr_mod, sizeof(addr_mod)); /* Add QPN to net-device mapping to HT */ mlx5i_pkey_add_qpn(dev, ipriv->qpn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 7f3e84b8622d..23b668de4640 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/string.h> +#include <net/addrconf.h> #include "qed.h" #include "qed_cxt.h" #include "qed_hsi.h" @@ -410,18 +411,6 @@ static void qed_rdma_free(struct qed_hwfn *p_hwfn) qed_rdma_resc_free(p_hwfn); } -static void qed_rdma_get_guid(struct qed_hwfn *p_hwfn, u8 *guid) -{ - guid[0] = p_hwfn->hw_info.hw_mac_addr[0] ^ 2; - guid[1] = p_hwfn->hw_info.hw_mac_addr[1]; - guid[2] = p_hwfn->hw_info.hw_mac_addr[2]; - guid[3] = 0xff; - guid[4] = 0xfe; - guid[5] = p_hwfn->hw_info.hw_mac_addr[3]; - guid[6] = p_hwfn->hw_info.hw_mac_addr[4]; - guid[7] = p_hwfn->hw_info.hw_mac_addr[5]; -} - static void qed_rdma_init_events(struct qed_hwfn *p_hwfn, struct qed_rdma_start_in_params *params) { @@ -449,7 +438,9 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn, dev->fw_ver = (FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) | (FW_REVISION_VERSION << 8) | (FW_ENGINEERING_VERSION); - qed_rdma_get_guid(p_hwfn, (u8 *)&dev->sys_image_guid); + addrconf_addr_eui48((u8 *)&dev->sys_image_guid, + p_hwfn->hw_info.hw_mac_addr); + dev->node_guid = dev->sys_image_guid; dev->max_sge = min_t(u32, RDMA_MAX_SGE_PER_SQ_WQE, diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 02c2eb874da6..42a323a73c61 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -86,8 +86,8 @@ struct dma_buf_ops { * @pin: * * This is called by dma_buf_pin() and lets the exporter know that the - * DMA-buf can't be moved any more. The exporter should pin the buffer - * into system memory to make sure it is generally accessible by other + * DMA-buf can't be moved any more. Ideally, the exporter should + * pin the buffer so that it is generally accessible by all * devices. * * This is called with the &dmabuf.resv object locked and is mutual diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index 7e542205861c..8ae07c0ecdf7 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -232,6 +232,7 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr) #define IB_BTH_SE_SHIFT 23 #define IB_BTH_TVER_MASK 0xf #define IB_BTH_TVER_SHIFT 16 +#define IB_BTH_OPCODE_CNP 0x81 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr) { diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 5ae9dff74dac..92a673cd9b4f 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -38,6 +38,7 @@ struct ib_umem_dmabuf { unsigned long first_sg_offset; unsigned long last_sg_trim; void *private; + u8 pinned : 1; }; static inline struct ib_umem_dmabuf *to_ib_umem_dmabuf(struct ib_umem *umem) @@ -139,6 +140,10 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, unsigned long offset, size_t size, int fd, int access, const struct dma_buf_attach_ops *ops); +struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device, + unsigned long offset, + size_t size, int fd, + int access); int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf); void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf); void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf); @@ -179,6 +184,12 @@ struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device, { return ERR_PTR(-EOPNOTSUPP); } +static inline struct ib_umem_dmabuf * +ib_umem_dmabuf_get_pinned(struct ib_device *device, unsigned long offset, + size_t size, int fd, int access) +{ + return ERR_PTR(-EOPNOTSUPP); +} static inline int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) { return -EOPNOTSUPP; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4b50d9a3018a..6e9ad656ecb7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -545,6 +545,22 @@ enum ib_port_speed { IB_SPEED_NDR = 128, }; +enum ib_stat_flag { + IB_STAT_FLAG_OPTIONAL = 1 << 0, +}; + +/** + * struct rdma_stat_desc + * @name - The name of the counter + * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL + * @priv - Driver private information; Core code should not use + */ +struct rdma_stat_desc { + const char *name; + unsigned int flags; + const void *priv; +}; + /** * struct rdma_hw_stats * @lock - Mutex to protect parallel write access to lifespan and values @@ -555,8 +571,10 @@ enum ib_port_speed { * should be before being updated again. Stored in jiffies, defaults * to 10 milliseconds, drivers can override the default be specifying * their own value during their allocation routine. - * @name - Array of pointers to static names used for the counters in - * directory. + * @descs - Array of pointers to static descriptors used for the counters + * in directory. + * @is_disabled - A bitmap to indicate each counter is currently disabled + * or not. * @num_counters - How many hardware counters there are. If name is * shorter than this number, a kernel oops will result. Driver authors * are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters) @@ -568,36 +586,19 @@ struct rdma_hw_stats { struct mutex lock; /* Protect lifespan and values[] */ unsigned long timestamp; unsigned long lifespan; - const char * const *names; + const struct rdma_stat_desc *descs; + unsigned long *is_disabled; int num_counters; u64 value[]; }; #define RDMA_HW_STATS_DEFAULT_LIFESPAN 10 -/** - * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct - * for drivers. - * @names - Array of static const char * - * @num_counters - How many elements in array - * @lifespan - How many milliseconds between updates - */ -static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( - const char * const *names, int num_counters, - unsigned long lifespan) -{ - struct rdma_hw_stats *stats; - stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64), - GFP_KERNEL); - if (!stats) - return NULL; - stats->names = names; - stats->num_counters = num_counters; - stats->lifespan = msecs_to_jiffies(lifespan); - - return stats; -} +struct rdma_hw_stats *rdma_alloc_hw_stats_struct( + const struct rdma_stat_desc *descs, int num_counters, + unsigned long lifespan); +void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats); /* Define bits for the various functionality this port needs to be supported by * the core. @@ -2570,6 +2571,13 @@ struct ib_device_ops { struct rdma_hw_stats *stats, u32 port, int index); /** + * modify_hw_stat - Modify the counter configuration + * @enable: true/false when enable/disable a counter + * Return codes - 0 on success or error code otherwise. + */ + int (*modify_hw_stat)(struct ib_device *device, u32 port, + unsigned int counter_index, bool enable); + /** * Allows rdma drivers to add their own restrack attributes. */ int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr); @@ -2906,6 +2914,15 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, size_t length, u32 min_pgoff, u32 max_pgoff); +static inline int +rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext, + struct rdma_user_mmap_entry *entry, + size_t length, u32 pgoff) +{ + return rdma_user_mmap_entry_insert_range(ucontext, entry, length, pgoff, + pgoff); +} + struct rdma_user_mmap_entry * rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, unsigned long pgoff); @@ -4097,8 +4114,13 @@ static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev, enum dma_data_direction direction, unsigned long dma_attrs) { + int nents; + if (ib_uses_virt_dma(dev)) { - ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents); + nents = ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents); + if (!nents) + return -EIO; + sgt->nents = nents; return 0; } return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs); diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h index 0295b22cd1cd..45d5481a7846 100644 --- a/include/rdma/rdma_counter.h +++ b/include/rdma/rdma_counter.h @@ -63,4 +63,6 @@ int rdma_counter_get_mode(struct ib_device *dev, u32 port, enum rdma_nl_counter_mode *mode, enum rdma_nl_counter_mask *mask); +int rdma_counter_modify(struct ib_device *dev, u32 port, + unsigned int index, bool enable); #endif /* _RDMA_COUNTER_H_ */ diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index f89fbb5b1e8d..08035ccf1fff 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef EFA_ABI_USER_H @@ -52,11 +52,20 @@ struct efa_ibv_alloc_pd_resp { __u8 reserved_30[2]; }; +enum { + EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL = 1 << 0, +}; + struct efa_ibv_create_cq { __u32 comp_mask; __u32 cq_entry_size; __u16 num_sub_cqs; - __u8 reserved_50[6]; + __u8 flags; + __u8 reserved_58[5]; +}; + +enum { + EFA_CREATE_CQ_RESP_DB_OFF = 1 << 0, }; struct efa_ibv_create_cq_resp { @@ -65,7 +74,9 @@ struct efa_ibv_create_cq_resp { __aligned_u64 q_mmap_key; __aligned_u64 q_mmap_size; __u16 cq_idx; - __u8 reserved_d0[6]; + __u8 reserved_d0[2]; + __u32 db_off; + __aligned_u64 db_mmap_key; }; enum { @@ -106,6 +117,7 @@ struct efa_ibv_create_ah_resp { enum { EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0, EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1, + EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS = 1 << 2, }; struct efa_ibv_ex_query_device_resp { diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 75a1ae2311d8..e50c357367db 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -297,6 +297,8 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_RES_SRQ_GET, /* can dump */ + RDMA_NLDEV_CMD_STAT_GET_STATUS, + RDMA_NLDEV_NUM_OPS }; @@ -549,6 +551,9 @@ enum rdma_nldev_attr { RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, /* u8 */ + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, /* u32 */ + RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, /* u8 */ + /* * Always the end */ diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index 7f44d54bb0ab..f09c5c9e3dd5 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -98,6 +98,10 @@ struct rxe_send_wr { __u32 remote_qpn; __u32 remote_qkey; __u16 pkey_index; + __u16 reserved; + __u32 ah_num; + __u32 pad[4]; + struct rxe_av av; } ud; struct { __aligned_u64 addr; @@ -148,7 +152,6 @@ struct rxe_dma_info { struct rxe_send_wqe { struct rxe_send_wr wr; - struct rxe_av av; __u32 status; __u32 state; __aligned_u64 iova; @@ -168,6 +171,11 @@ struct rxe_recv_wqe { struct rxe_dma_info dma; }; +struct rxe_create_ah_resp { + __u32 ah_num; + __u32 reserved; +}; + struct rxe_create_cq_resp { struct mminfo mi; }; |