diff options
author | Linus Torvalds | 2016-10-04 12:08:55 -0700 |
---|---|---|
committer | Linus Torvalds | 2016-10-04 12:08:55 -0700 |
commit | ce866e2d182b9e00e29e7a2eed4db908257d7f79 (patch) | |
tree | 18f6c1aaa41cfbc6df3e4fb09965e90c045d5064 | |
parent | 19fe416532f798e199f04d25816b1bd36e48d6fe (diff) | |
parent | 61347fa6087884305ea4a3a04501839fdb68dc76 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull hdi1 rdma driver updates from Doug Ledford:
"This is the first pull request of the 4.9 merge window for the RDMA
subsystem. It is only the hfi1 driver. It had dependencies on code
that only landed late in the 4.7-rc cycle (around 4.7-rc7), so putting
this with my other for-next code would have create an ugly merge of
lot of 4.7-rc stuff. For that reason, it's being submitted
individually. It's been through 0day and linux-next"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (37 commits)
IB/rdmavt: Trivial function comment corrected.
IB/hfi1: Fix trace of atomic ack
IB/hfi1: Update SMA ingress checks for response packets
IB/hfi1: Use EPROM platform configuration read
IB/hfi1: Add ability to read platform config from the EPROM
IB/hfi1: Restore EPROM read ability
IB/hfi1: Document new sysfs entries for hfi1 driver
IB/hfi1: Add new debugfs sdma_cpu_list file
IB/hfi1: Add irq affinity notification handler
IB/hfi1: Add a new VL sysfs attribute for sdma engines
IB/hfi1: Add sysfs interface for affinity setup
IB/hfi1: Fix resource release in context allocation
IB/hfi1: Remove unused variable from devdata
IB/hfi1: Cleanup tasklet refs in comments
IB/hfi1: Adjust hardware buffering parameter
IB/hfi1: Act on external device timeout
IB/hfi1: Fix defered ack race with qp destroy
IB/hfi1: Combine shift copy and byte copy for SGE reads
IB/hfi1: Do not read more than a SGE length
IB/hfi1: Extend i2c timeout
...
48 files changed, 1660 insertions, 899 deletions
diff --git a/Documentation/infiniband/sysfs.txt b/Documentation/infiniband/sysfs.txt index 45bcafe6ff8a..77570d16b170 100644 --- a/Documentation/infiniband/sysfs.txt +++ b/Documentation/infiniband/sysfs.txt @@ -89,6 +89,36 @@ HFI1 nctxts - number of allowed contexts (PSM2) chip_reset - diagnostic (root only) boardversion - board version + + sdma<N>/ - one directory per sdma engine (0 - 15) + sdma<N>/cpu_list - read-write, list of cpus for user-process to sdma + engine assignment. + sdma<N>/vl - read-only, vl the sdma engine maps to. + + The new interface will give the user control on the affinity settings + for the hfi1 device. + As an example, to set an sdma engine irq affinity and thread affinity + of a user processes to use the sdma engine, which is "near" in terms + of NUMA configuration, or physical cpu location, the user will do: + + echo "3" > /proc/irq/<N>/smp_affinity_list + echo "4-7" > /sys/devices/.../sdma3/cpu_list + cat /sys/devices/.../sdma3/vl + 0 + echo "8" > /proc/irq/<M>/smp_affinity_list + echo "9-12" > /sys/devices/.../sdma4/cpu_list + cat /sys/devices/.../sdma4/vl + 1 + + to make sure that when a process runs on cpus 4,5,6, or 7, + and uses vl=0, then sdma engine 3 is selected by the driver, + and also the interrupt of the sdma engine 3 is steered to cpu 3. + Similarly, when a process runs on cpus 9,10,11, or 12 and sets vl=1, + then engine 4 will be selected and the irq of the sdma engine 4 is + steered to cpu 8. + This assumes that in the above N is the irq number of "sdma3", + and M is irq number of "sdma4" in the /proc/interrupts file. + ports/1/ CCMgtA/ cc_settings_bin - CCA tables used by PSM2 diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 0566393e5aba..a26a9a0bfc41 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -47,6 +47,7 @@ #include <linux/topology.h> #include <linux/cpumask.h> #include <linux/module.h> +#include <linux/interrupt.h> #include "hfi.h" #include "affinity.h" @@ -55,7 +56,7 @@ struct hfi1_affinity_node_list node_affinity = { .list = LIST_HEAD_INIT(node_affinity.list), - .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock), + .lock = __MUTEX_INITIALIZER(node_affinity.lock) }; /* Name of IRQ types, indexed by enum irq_type */ @@ -159,14 +160,14 @@ void node_affinity_destroy(void) struct list_head *pos, *q; struct hfi1_affinity_node *entry; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); list_for_each_safe(pos, q, &node_affinity.list) { entry = list_entry(pos, struct hfi1_affinity_node, list); list_del(pos); kfree(entry); } - spin_unlock(&node_affinity.lock); + mutex_unlock(&node_affinity.lock); kfree(hfi1_per_node_cntr); } @@ -233,9 +234,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) if (cpumask_first(local_mask) >= nr_cpu_ids) local_mask = topology_core_cpumask(0); - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); /* * If this is the first time this NUMA node's affinity is used, @@ -246,6 +246,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) if (!entry) { dd_dev_err(dd, "Unable to allocate global affinity node\n"); + mutex_unlock(&node_affinity.lock); return -ENOMEM; } init_cpu_mask_set(&entry->def_intr); @@ -302,15 +303,113 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) &entry->general_intr_mask); } - spin_lock(&node_affinity.lock); node_affinity_add_tail(entry); - spin_unlock(&node_affinity.lock); } - + mutex_unlock(&node_affinity.lock); return 0; } -int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) +/* + * Function updates the irq affinity hint for msix after it has been changed + * by the user using the /proc/irq interface. This function only accepts + * one cpu in the mask. + */ +static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu) +{ + struct sdma_engine *sde = msix->arg; + struct hfi1_devdata *dd = sde->dd; + struct hfi1_affinity_node *entry; + struct cpu_mask_set *set; + int i, old_cpu; + + if (cpu > num_online_cpus() || cpu == sde->cpu) + return; + + mutex_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + if (!entry) + goto unlock; + + old_cpu = sde->cpu; + sde->cpu = cpu; + cpumask_clear(&msix->mask); + cpumask_set_cpu(cpu, &msix->mask); + dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n", + msix->msix.vector, irq_type_names[msix->type], + sde->this_idx, cpu); + irq_set_affinity_hint(msix->msix.vector, &msix->mask); + + /* + * Set the new cpu in the hfi1_affinity_node and clean + * the old cpu if it is not used by any other IRQ + */ + set = &entry->def_intr; + cpumask_set_cpu(cpu, &set->mask); + cpumask_set_cpu(cpu, &set->used); + for (i = 0; i < dd->num_msix_entries; i++) { + struct hfi1_msix_entry *other_msix; + + other_msix = &dd->msix_entries[i]; + if (other_msix->type != IRQ_SDMA || other_msix == msix) + continue; + + if (cpumask_test_cpu(old_cpu, &other_msix->mask)) + goto unlock; + } + cpumask_clear_cpu(old_cpu, &set->mask); + cpumask_clear_cpu(old_cpu, &set->used); +unlock: + mutex_unlock(&node_affinity.lock); +} + +static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + int cpu = cpumask_first(mask); + struct hfi1_msix_entry *msix = container_of(notify, + struct hfi1_msix_entry, + notify); + + /* Only one CPU configuration supported currently */ + hfi1_update_sdma_affinity(msix, cpu); +} + +static void hfi1_irq_notifier_release(struct kref *ref) +{ + /* + * This is required by affinity notifier. We don't have anything to + * free here. + */ +} + +static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix) +{ + struct irq_affinity_notify *notify = &msix->notify; + + notify->irq = msix->msix.vector; + notify->notify = hfi1_irq_notifier_notify; + notify->release = hfi1_irq_notifier_release; + + if (irq_set_affinity_notifier(notify->irq, notify)) + pr_err("Failed to register sdma irq affinity notifier for irq %d\n", + notify->irq); +} + +static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix) +{ + struct irq_affinity_notify *notify = &msix->notify; + + if (irq_set_affinity_notifier(notify->irq, NULL)) + pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n", + notify->irq); +} + +/* + * Function sets the irq affinity for msix. + * It *must* be called with node_affinity.lock held. + */ +static int get_irq_affinity(struct hfi1_devdata *dd, + struct hfi1_msix_entry *msix) { int ret; cpumask_var_t diff; @@ -328,9 +427,7 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) if (!ret) return -ENOMEM; - spin_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); switch (msix->type) { case IRQ_SDMA: @@ -360,7 +457,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) * finds its CPU here. */ if (cpu == -1 && set) { - spin_lock(&node_affinity.lock); if (cpumask_equal(&set->mask, &set->used)) { /* * We've used up all the CPUs, bump up the generation @@ -372,17 +468,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) cpumask_andnot(diff, &set->mask, &set->used); cpu = cpumask_first(diff); cpumask_set_cpu(cpu, &set->used); - spin_unlock(&node_affinity.lock); - } - - switch (msix->type) { - case IRQ_SDMA: - sde->cpu = cpu; - break; - case IRQ_GENERAL: - case IRQ_RCVCTXT: - case IRQ_OTHER: - break; } cpumask_set_cpu(cpu, &msix->mask); @@ -391,10 +476,25 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) extra, cpu); irq_set_affinity_hint(msix->msix.vector, &msix->mask); + if (msix->type == IRQ_SDMA) { + sde->cpu = cpu; + hfi1_setup_sdma_notifier(msix); + } + free_cpumask_var(diff); return 0; } +int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) +{ + int ret; + + mutex_lock(&node_affinity.lock); + ret = get_irq_affinity(dd, msix); + mutex_unlock(&node_affinity.lock); + return ret; +} + void hfi1_put_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) { @@ -402,13 +502,13 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd; struct hfi1_affinity_node *entry; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); switch (msix->type) { case IRQ_SDMA: set = &entry->def_intr; + hfi1_cleanup_sdma_notifier(msix); break; case IRQ_GENERAL: /* Don't do accounting for general contexts */ @@ -420,21 +520,21 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, set = &entry->rcv_intr; break; default: + mutex_unlock(&node_affinity.lock); return; } if (set) { - spin_lock(&node_affinity.lock); cpumask_andnot(&set->used, &set->used, &msix->mask); if (cpumask_empty(&set->used) && set->gen) { set->gen--; cpumask_copy(&set->used, &set->mask); } - spin_unlock(&node_affinity.lock); } irq_set_affinity_hint(msix->msix.vector, NULL); cpumask_clear(&msix->mask); + mutex_unlock(&node_affinity.lock); } /* This should be called with node_affinity.lock held */ @@ -535,7 +635,7 @@ int hfi1_get_proc_affinity(int node) if (!ret) goto free_available_mask; - spin_lock(&affinity->lock); + mutex_lock(&affinity->lock); /* * If we've used all available HW threads, clear the mask and start * overloading. @@ -643,7 +743,8 @@ int hfi1_get_proc_affinity(int node) cpu = -1; else cpumask_set_cpu(cpu, &set->used); - spin_unlock(&affinity->lock); + + mutex_unlock(&affinity->lock); hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu); free_cpumask_var(intrs_mask); @@ -664,19 +765,17 @@ void hfi1_put_proc_affinity(int cpu) if (cpu < 0) return; - spin_lock(&affinity->lock); + + mutex_lock(&affinity->lock); cpumask_clear_cpu(cpu, &set->used); hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); if (cpumask_empty(&set->used) && set->gen) { set->gen--; cpumask_copy(&set->used, &set->mask); } - spin_unlock(&affinity->lock); + mutex_unlock(&affinity->lock); } -/* Prevents concurrent reads and writes of the sdma_affinity attrib */ -static DEFINE_MUTEX(sdma_affinity_mutex); - int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, size_t count) { @@ -684,16 +783,19 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, cpumask_var_t mask; int ret, i; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); - if (!entry) - return -EINVAL; + if (!entry) { + ret = -EINVAL; + goto unlock; + } ret = zalloc_cpumask_var(&mask, GFP_KERNEL); - if (!ret) - return -ENOMEM; + if (!ret) { + ret = -ENOMEM; + goto unlock; + } ret = cpulist_parse(buf, mask); if (ret) @@ -705,13 +807,11 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, goto out; } - mutex_lock(&sdma_affinity_mutex); /* reset the SDMA interrupt affinity details */ init_cpu_mask_set(&entry->def_intr); cpumask_copy(&entry->def_intr.mask, mask); - /* - * Reassign the affinity for each SDMA interrupt. - */ + + /* Reassign the affinity for each SDMA interrupt. */ for (i = 0; i < dd->num_msix_entries; i++) { struct hfi1_msix_entry *msix; @@ -719,14 +819,15 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, if (msix->type != IRQ_SDMA) continue; - ret = hfi1_get_irq_affinity(dd, msix); + ret = get_irq_affinity(dd, msix); if (ret) break; } - mutex_unlock(&sdma_affinity_mutex); out: free_cpumask_var(mask); +unlock: + mutex_unlock(&node_affinity.lock); return ret ? ret : strnlen(buf, PAGE_SIZE); } @@ -734,15 +835,15 @@ int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf) { struct hfi1_affinity_node *entry; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); - if (!entry) + if (!entry) { + mutex_unlock(&node_affinity.lock); return -EINVAL; + } - mutex_lock(&sdma_affinity_mutex); cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask); - mutex_unlock(&sdma_affinity_mutex); + mutex_unlock(&node_affinity.lock); return strnlen(buf, PAGE_SIZE); } diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 8879cf7a8cac..b89ea3c0ee1a 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -121,8 +121,7 @@ struct hfi1_affinity_node_list { int num_core_siblings; int num_online_nodes; int num_online_cpus; - /* protect affinity node list */ - spinlock_t lock; + struct mutex lock; /* protects affinity nodes */ }; int node_affinity_init(void); diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index cc38004cea42..9bf5f23544d4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -971,7 +971,9 @@ static struct flag_table dc8051_info_err_flags[] = { FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1), FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2), FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT), - FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT) + FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT), + FLAG_ENTRY0("External Device Request Timeout", + EXTERNAL_DEVICE_REQ_TIMEOUT), }; /* @@ -6825,7 +6827,6 @@ void handle_link_up(struct work_struct *work) set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0, OPA_LINKDOWN_REASON_SPEED_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } } @@ -6998,12 +6999,10 @@ void handle_link_down(struct work_struct *work) * If there is no cable attached, turn the DC off. Otherwise, * start the link bring up. */ - if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) { + if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) dc_shutdown(ppd->dd); - } else { - tune_serdes(ppd); + else start_link(ppd); - } } void handle_link_bounce(struct work_struct *work) @@ -7016,7 +7015,6 @@ void handle_link_bounce(struct work_struct *work) */ if (ppd->host_link_state & HLS_UP) { set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } else { dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n", @@ -7531,7 +7529,6 @@ done: set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0, OPA_LINKDOWN_REASON_WIDTH_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } } @@ -9161,6 +9158,12 @@ set_local_link_attributes_fail: */ int start_link(struct hfi1_pportdata *ppd) { + /* + * Tune the SerDes to a ballpark setting for optimal signal and bit + * error rate. Needs to be done before starting the link. + */ + tune_serdes(ppd); + if (!ppd->link_enabled) { dd_dev_info(ppd->dd, "%s: stopping link start because link is disabled\n", @@ -9401,8 +9404,6 @@ void qsfp_event(struct work_struct *work) */ set_qsfp_int_n(ppd, 1); - tune_serdes(ppd); - start_link(ppd); } @@ -9544,11 +9545,6 @@ static void try_start_link(struct hfi1_pportdata *ppd) } ppd->qsfp_retry_count = 0; - /* - * Tune the SerDes to a ballpark setting for optimal signal and bit - * error rate. Needs to be done before starting the link. - */ - tune_serdes(ppd); start_link(ppd); } @@ -9718,12 +9714,12 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd) hfi1_put_tid(dd, i, PT_INVALID, 0, 0); } -struct hfi1_message_header *hfi1_get_msgheader( - struct hfi1_devdata *dd, __le32 *rhf_addr) +struct ib_header *hfi1_get_msgheader( + struct hfi1_devdata *dd, __le32 *rhf_addr) { u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); - return (struct hfi1_message_header *) + return (struct ib_header *) (rhf_addr - dd->rhf_offset + offset); } @@ -11559,10 +11555,10 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) { /* reset the tail and hdr addresses, and sequence count */ write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR, - rcd->rcvhdrq_phys); + rcd->rcvhdrq_dma); if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, - rcd->rcvhdrqtailaddr_phys); + rcd->rcvhdrqtailaddr_dma); rcd->seq_cnt = 1; /* reset the cached receive header queue head value */ @@ -11627,9 +11623,9 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) * update with a dummy tail address and then disable * receive context. */ - if (dd->rcvhdrtail_dummy_physaddr) { + if (dd->rcvhdrtail_dummy_dma) { write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); /* Enabling RcvCtxtCtrl.TailUpd is intentional. */ rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; } @@ -11640,7 +11636,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK; if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; - if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys) + if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma) rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; if (op & HFI1_RCVCTRL_TAILUPD_DIS) { /* See comment on RcvCtxtCtrl.TailUpd above */ @@ -11712,7 +11708,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) * so it doesn't contain an address that is invalid. */ write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); } u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp) @@ -13389,9 +13385,9 @@ static void init_rbufs(struct hfi1_devdata *dd) /* * Give up after 1ms - maximum wait time. * - * RBuf size is 148KiB. Slowest possible is PCIe Gen1 x1 at + * RBuf size is 136KiB. Slowest possible is PCIe Gen1 x1 at * 250MB/s bandwidth. Lower rate to 66% for overhead to get: - * 148 KB / (66% * 250MB/s) = 920us + * 136 KB / (66% * 250MB/s) = 844us */ if (count++ > 500) { dd_dev_err(dd, @@ -14570,6 +14566,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_cleanup; + /* call before get_platform_config(), after init_chip_resources() */ + ret = eprom_init(dd); + if (ret) + goto bail_free_rcverr; + /* Needs to be called before hfi1_firmware_init */ get_platform_config(dd); @@ -14690,10 +14691,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_free_cntrs; - ret = eprom_init(dd); - if (ret) - goto bail_free_rcverr; - goto bail; bail_free_rcverr: diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index e29573769efc..92345259a8f4 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -82,7 +82,7 @@ */ #define CM_VAU 3 /* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */ -#define CM_GLOBAL_CREDITS 0x940 +#define CM_GLOBAL_CREDITS 0x880 /* Number of PKey entries in the HW */ #define MAX_PKEY_VALUES 16 @@ -254,12 +254,14 @@ #define FAILED_LNI_VERIFY_CAP2 BIT(10) #define FAILED_LNI_CONFIGLT BIT(11) #define HOST_HANDSHAKE_TIMEOUT BIT(12) +#define EXTERNAL_DEVICE_REQ_TIMEOUT BIT(13) #define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \ | FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \ | FAILED_LNI_VERIFY_CAP1 \ | FAILED_LNI_VERIFY_CAP2 \ - | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT) + | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT \ + | EXTERNAL_DEVICE_REQ_TIMEOUT) /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */ #define HOST_REQ_DONE BIT(0) @@ -1336,7 +1338,7 @@ enum { u64 get_all_cpu_total(u64 __percpu *cntr); void hfi1_start_cleanup(struct hfi1_devdata *dd); void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); -struct hfi1_message_header *hfi1_get_msgheader( +struct ib_header *hfi1_get_msgheader( struct hfi1_devdata *dd, __le32 *rhf_addr); int hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index fcc9c217a97a..da7be21bedb4 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -320,14 +320,6 @@ struct diag_pkt { /* RHF receive type error - bypass packet errors */ #define RHF_RTE_BYPASS_NO_ERR 0x0 -/* - * This structure contains the first field common to all protocols - * that employ this chip. - */ -struct hfi1_message_header { - __be16 lrh[4]; -}; - /* IB - LRH header constants */ #define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */ #define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 5e9be16f6cd3..632ba21759ab 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -933,6 +933,43 @@ static const struct counter_info port_cntr_ops[] = { DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write), }; +static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos) +{ + if (*pos >= num_online_cpus()) + return NULL; + + return pos; +} + +static void *_sdma_cpu_list_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + ++*pos; + if (*pos >= num_online_cpus()) + return NULL; + + return pos; +} + +static void _sdma_cpu_list_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _sdma_cpu_list_seq_show(struct seq_file *s, void *v) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + loff_t *spos = v; + loff_t i = *spos; + + sdma_seqfile_dump_cpu_list(s, dd, (unsigned long)i); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list); +DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list) +DEBUGFS_FILE_OPS(sdma_cpu_list); + void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) { char name[sizeof("port0counters") + 1]; @@ -961,6 +998,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); + DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd); /* dev counter files */ for (i = 0; i < ARRAY_SIZE(cntr_ops); i++) DEBUGFS_FILE_CREATE(cntr_ops[i].name, diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 303f10555729..6563e4d38b80 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -276,7 +276,7 @@ inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded) static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, struct hfi1_packet *packet) { - struct hfi1_message_header *rhdr = packet->hdr; + struct ib_header *rhdr = packet->hdr; u32 rte = rhf_rcv_type_err(packet->rhf); int lnh = be16_to_cpu(rhdr->lrh[0]) & 3; struct hfi1_ibport *ibp = &ppd->ibport_data; @@ -288,10 +288,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (packet->rhf & RHF_TID_ERR) { /* For TIDERR and RC QPs preemptively schedule a NAK */ - struct hfi1_ib_header *hdr = (struct hfi1_ib_header *)rhdr; - struct hfi1_other_headers *ohdr = NULL; + struct ib_other_headers *ohdr = NULL; u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ - u16 lid = be16_to_cpu(hdr->lrh[1]); + u16 lid = be16_to_cpu(rhdr->lrh[1]); u32 qp_num; u32 rcv_flags = 0; @@ -301,14 +300,14 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, /* Check for GRH */ if (lnh == HFI1_LRH_BTH) { - ohdr = &hdr->u.oth; + ohdr = &rhdr->u.oth; } else if (lnh == HFI1_LRH_GRH) { u32 vtf; - ohdr = &hdr->u.l.oth; - if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + ohdr = &rhdr->u.l.oth; + if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) goto drop; - vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); + vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow); if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) goto drop; rcv_flags |= HFI1_HAS_GRH; @@ -344,7 +343,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, case IB_QPT_RC: hfi1_rc_hdrerr( rcd, - hdr, + rhdr, rcv_flags, qp); break; @@ -452,8 +451,8 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_ib_header *hdr = pkt->hdr; - struct hfi1_other_headers *ohdr = pkt->ohdr; + struct ib_header *hdr = pkt->hdr; + struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = NULL; u32 rqpn = 0, bth1; u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]); @@ -487,7 +486,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, return; } - sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf); + sc = hdr2sc(hdr, pkt->rhf); bth1 = be32_to_cpu(ohdr->bth[1]); if (do_cnp && (bth1 & HFI1_FECN_SMASK)) { @@ -599,8 +598,8 @@ static void __prescan_rxq(struct hfi1_packet *packet) __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + dd->rhf_offset; struct rvt_qp *qp; - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr; + struct ib_header *hdr; + struct ib_other_headers *ohdr; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; @@ -616,8 +615,8 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (etype != RHF_RCV_TYPE_IB) goto next; - hdr = (struct hfi1_ib_header *) - hfi1_get_msgheader(dd, rhf_addr); + hdr = hfi1_get_msgheader(dd, rhf_addr); + lnh = be16_to_cpu(hdr->lrh[0]) & 3; if (lnh == HFI1_LRH_BTH) @@ -892,8 +891,8 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, struct hfi1_devdata *dd) { struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; - struct hfi1_message_header *hdr = hfi1_get_msgheader(packet->rcd->dd, - packet->rhf_addr); + struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); u8 etype = rhf_rcv_type(packet->rhf); if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) { diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c index 36b77943cbfd..e70c223801b4 100644 --- a/drivers/infiniband/hw/hfi1/eprom.c +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -49,7 +49,26 @@ #include "common.h" #include "eprom.h" +/* + * The EPROM is logically divided into three partitions: + * partition 0: the first 128K, visible from PCI ROM BAR + * partition 1: 4K config file (sector size) + * partition 2: the rest + */ +#define P0_SIZE (128 * 1024) +#define P1_SIZE (4 * 1024) +#define P1_START P0_SIZE +#define P2_START (P0_SIZE + P1_SIZE) + +/* controller page size, in bytes */ +#define EP_PAGE_SIZE 256 +#define EP_PAGE_MASK (EP_PAGE_SIZE - 1) +#define EP_PAGE_DWORDS (EP_PAGE_SIZE / sizeof(u32)) + +/* controller commands */ #define CMD_SHIFT 24 +#define CMD_NOP (0) +#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr) #define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT)) /* controller interface speeds */ @@ -61,6 +80,90 @@ * Double it for safety. */ #define EPROM_TIMEOUT 80000 /* ms */ + +/* + * Read a 256 byte (64 dword) EPROM page. + * All callers have verified the offset is at a page boundary. + */ +static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result) +{ + int i; + + write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset)); + for (i = 0; i < EP_PAGE_DWORDS; i++) + result[i] = (u32)read_csr(dd, ASIC_EEP_DATA); + write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */ +} + +/* + * Read length bytes starting at offset from the start of the EPROM. + */ +static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, void *dest) +{ + u32 buffer[EP_PAGE_DWORDS]; + u32 end; + u32 start_offset; + u32 read_start; + u32 bytes; + + if (len == 0) + return 0; + + end = start + len; + + /* + * Make sure the read range is not outside of the controller read + * command address range. Note that '>' is correct below - the end + * of the range is OK if it stops at the limit, but no higher. + */ + if (end > (1 << CMD_SHIFT)) + return -EINVAL; + + /* read the first partial page */ + start_offset = start & EP_PAGE_MASK; + if (start_offset) { + /* partial starting page */ + + /* align and read the page that contains the start */ + read_start = start & ~EP_PAGE_MASK; + read_page(dd, read_start, buffer); + + /* the rest of the page is available data */ + bytes = EP_PAGE_SIZE - start_offset; + + if (len <= bytes) { + /* end is within this page */ + memcpy(dest, (u8 *)buffer + start_offset, len); + return 0; + } + + memcpy(dest, (u8 *)buffer + start_offset, bytes); + + start += bytes; + len -= bytes; + dest += bytes; + } + /* start is now page aligned */ + + /* read whole pages */ + while (len >= EP_PAGE_SIZE) { + read_page(dd, start, buffer); + memcpy(dest, buffer, EP_PAGE_SIZE); + + start += EP_PAGE_SIZE; + len -= EP_PAGE_SIZE; + dest += EP_PAGE_SIZE; + } + + /* read the last partial page */ + if (len) { + read_page(dd, start, buffer); + memcpy(dest, buffer, len); + } + + return 0; +} + /* * Initialize the EPROM handler. */ @@ -100,3 +203,85 @@ int eprom_init(struct hfi1_devdata *dd) done_asic: return ret; } + +/* magic character sequence that trails an image */ +#define IMAGE_TRAIL_MAGIC "egamiAPO" + +/* + * Read all of partition 1. The actual file is at the front. Adjust + * the returned size if a trailing image magic is found. + */ +static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, + u32 *size) +{ + void *buffer; + void *p; + u32 length; + int ret; + + buffer = kmalloc(P1_SIZE, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + ret = read_length(dd, P1_START, P1_SIZE, buffer); + if (ret) { + kfree(buffer); + return ret; + } + + /* scan for image magic that may trail the actual data */ + p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE); + if (p) + length = p - buffer; + else + length = P1_SIZE; + + *data = buffer; + *size = length; + return 0; +} + +/* + * Read the platform configuration file from the EPROM. + * + * On success, an allocated buffer containing the data and its size are + * returned. It is up to the caller to free this buffer. + * + * Return value: + * 0 - success + * -ENXIO - no EPROM is available + * -EBUSY - not able to acquire access to the EPROM + * -ENOENT - no recognizable file written + * -ENOMEM - buffer could not be allocated + */ +int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size) +{ + u32 directory[EP_PAGE_DWORDS]; /* aligned buffer */ + int ret; + + if (!dd->eprom_available) + return -ENXIO; + + ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); + if (ret) + return -EBUSY; + + /* read the last page of P0 for the EPROM format magic */ + ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory); + if (ret) + goto done; + + /* last dword of P0 contains a magic indicator */ + if (directory[EP_PAGE_DWORDS - 1] == 0) { + /* partition format */ + ret = read_partition_platform_config(dd, data, size); + goto done; + } + + /* nothing recognized */ + ret = -ENOENT; + +done: + release_chip_resource(dd, CR_EPROM); + return ret; +} diff --git a/drivers/infiniband/hw/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h index d41f0b1afb15..e774184f1643 100644 --- a/drivers/infiniband/hw/hfi1/eprom.h +++ b/drivers/infiniband/hw/hfi1/eprom.h @@ -45,8 +45,8 @@ * */ -struct hfi1_cmd; struct hfi1_devdata; int eprom_init(struct hfi1_devdata *dd); -int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd); +int eprom_read_platform_config(struct hfi1_devdata *dd, void **buf_ret, + u32 *size_ret); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 7e03ccd2554d..677efa0e8cd6 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -58,7 +58,6 @@ #include "trace.h" #include "user_sdma.h" #include "user_exp_rcv.h" -#include "eprom.h" #include "aspm.h" #include "mmu_rb.h" @@ -440,9 +439,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd; - unsigned long flags, pfn; + unsigned long flags; u64 token = vma->vm_pgoff << PAGE_SHIFT, memaddr = 0; + void *memvirt = NULL; u8 subctxt, mapio = 0, vmf = 0, type; ssize_t memlen = 0; int ret = 0; @@ -493,7 +493,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) * second or third page allocated for credit returns (if number * of enabled contexts > 64 and 128 respectively). */ - memaddr = dd->cr_base[uctxt->numa_id].pa + + memvirt = dd->cr_base[uctxt->numa_id].va; + memaddr = virt_to_phys(memvirt) + (((u64)uctxt->sc->hw_free - (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK); memlen = PAGE_SIZE; @@ -508,8 +509,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) mapio = 1; break; case RCV_HDRQ: - memaddr = uctxt->rcvhdrq_phys; memlen = uctxt->rcvhdrq_size; + memvirt = uctxt->rcvhdrq; break; case RCV_EGRBUF: { unsigned long addr; @@ -533,14 +534,21 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) vma->vm_flags &= ~VM_MAYWRITE; addr = vma->vm_start; for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) { + memlen = uctxt->egrbufs.buffers[i].len; + memvirt = uctxt->egrbufs.buffers[i].addr; ret = remap_pfn_range( vma, addr, - uctxt->egrbufs.buffers[i].phys >> PAGE_SHIFT, - uctxt->egrbufs.buffers[i].len, + /* + * virt_to_pfn() does the same, but + * it's not available on x86_64 + * when CONFIG_MMU is enabled. + */ + PFN_DOWN(__pa(memvirt)), + memlen, vma->vm_page_prot); if (ret < 0) goto done; - addr += uctxt->egrbufs.buffers[i].len; + addr += memlen; } ret = 0; goto done; @@ -596,8 +604,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) ret = -EPERM; goto done; } - memaddr = uctxt->rcvhdrqtailaddr_phys; memlen = PAGE_SIZE; + memvirt = (void *)uctxt->rcvhdrtail_kvaddr; flags &= ~VM_MAYWRITE; break; case SUBCTXT_UREGS: @@ -650,16 +658,24 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n", ctxt, subctxt, type, mapio, vmf, memaddr, memlen, vma->vm_end - vma->vm_start, vma->vm_flags); - pfn = (unsigned long)(memaddr >> PAGE_SHIFT); if (vmf) { - vma->vm_pgoff = pfn; + vma->vm_pgoff = PFN_DOWN(memaddr); vma->vm_ops = &vm_ops; ret = 0; } else if (mapio) { - ret = io_remap_pfn_range(vma, vma->vm_start, pfn, memlen, + ret = io_remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(memaddr), + memlen, vma->vm_page_prot); + } else if (memvirt) { + ret = remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(__pa(memvirt)), + memlen, + vma->vm_page_prot); } else { - ret = remap_pfn_range(vma, vma->vm_start, pfn, memlen, + ret = remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(memaddr), + memlen, vma->vm_page_prot); } done: @@ -961,14 +977,16 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, */ uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, uctxt->dd->node); - if (!uctxt->sc) - return -ENOMEM; - + if (!uctxt->sc) { + ret = -ENOMEM; + goto ctxdata_free; + } hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index, uctxt->sc->hw_context); ret = sc_enable(uctxt->sc); if (ret) - return ret; + goto ctxdata_free; + /* * Setup shared context resources if the user-level has requested * shared contexts and this is the 'master' process. @@ -982,7 +1000,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, * send context because it will be done during file close */ if (ret) - return ret; + goto ctxdata_free; } uctxt->userversion = uinfo->userversion; uctxt->flags = hfi1_cap_mask; /* save current flag state */ @@ -1002,6 +1020,11 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, fd->uctxt = uctxt; return 0; + +ctxdata_free: + dd->rcd[ctxt] = NULL; + hfi1_free_ctxtdata(dd, uctxt); + return ret; } static int init_subctxts(struct hfi1_ctxtdata *uctxt, @@ -1260,7 +1283,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len) uctxt->rcvhdrq); binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt, fd->subctxt, - uctxt->egrbufs.rcvtids[0].phys); + uctxt->egrbufs.rcvtids[0].dma); binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, fd->subctxt, 0); /* diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 325ec211370f..7eef11b316ff 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -64,6 +64,8 @@ #include <linux/kthread.h> #include <linux/i2c.h> #include <linux/i2c-algo-bit.h> +#include <rdma/ib_hdrs.h> +#include <linux/rhashtable.h> #include <rdma/rdma_vt.h> #include "chip_registers.h" @@ -171,12 +173,12 @@ struct ctxt_eager_bufs { u32 threshold; /* head update threshold */ struct eager_buffer { void *addr; - dma_addr_t phys; + dma_addr_t dma; ssize_t len; } *buffers; struct { void *addr; - dma_addr_t phys; + dma_addr_t dma; } *rcvtids; }; @@ -207,8 +209,8 @@ struct hfi1_ctxtdata { /* size of each of the rcvhdrq entries */ u16 rcvhdrqentsize; /* mmap of hdrq, must fit in 44 bits */ - dma_addr_t rcvhdrq_phys; - dma_addr_t rcvhdrqtailaddr_phys; + dma_addr_t rcvhdrq_dma; + dma_addr_t rcvhdrqtailaddr_dma; struct ctxt_eager_bufs egrbufs; /* this receive context's assigned PIO ACK send context */ struct send_context *sc; @@ -350,7 +352,7 @@ struct hfi1_packet { struct hfi1_ctxtdata *rcd; __le32 *rhf_addr; struct rvt_qp *qp; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; u64 rhf; u32 maxcnt; u32 rhqoff; @@ -529,6 +531,7 @@ struct hfi1_msix_entry { void *arg; char name[MAX_NAME_SIZE]; cpumask_t mask; + struct irq_affinity_notify notify; }; /* per-SL CCA information */ @@ -1060,8 +1063,6 @@ struct hfi1_devdata { u8 psxmitwait_supported; /* cycle length of PS* counters in HW (in picoseconds) */ u16 psxmitwait_check_rate; - /* high volume overflow errors deferred to tasklet */ - struct tasklet_struct error_tasklet; /* MSI-X information */ struct hfi1_msix_entry *msix_entries; @@ -1164,7 +1165,7 @@ struct hfi1_devdata { /* receive context tail dummy address */ __le64 *rcvhdrtail_dummy_kvaddr; - dma_addr_t rcvhdrtail_dummy_physaddr; + dma_addr_t rcvhdrtail_dummy_dma; bool eprom_available; /* true if EPROM is available for this device */ bool aspm_supported; /* Does HW support ASPM */ @@ -1175,6 +1176,7 @@ struct hfi1_devdata { atomic_t aspm_disabled_cnt; struct hfi1_affinity *affinity; + struct rhashtable sdma_rht; struct kobject kobj; }; @@ -1268,7 +1270,7 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd) void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ -static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) +static inline int hdr2sc(struct ib_header *hdr, u64 rhf) { return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | ((!!(rhf_dc_info(rhf))) << 4); @@ -1603,7 +1605,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { - struct hfi1_other_headers *ohdr = pkt->ohdr; + struct ib_other_headers *ohdr = pkt->ohdr; u32 bth1; bth1 = be32_to_cpu(ohdr->bth[1]); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 384b43d2fd49..60db61536fed 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -336,6 +336,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, } return rcd; bail: + dd->rcd[ctxt] = NULL; kfree(rcd->egrbufs.rcvtids); kfree(rcd->egrbufs.buffers); kfree(rcd); @@ -709,7 +710,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) /* allocate dummy tail memory for all receive contexts */ dd->rcvhdrtail_dummy_kvaddr = dma_zalloc_coherent( &dd->pcidev->dev, sizeof(u64), - &dd->rcvhdrtail_dummy_physaddr, + &dd->rcvhdrtail_dummy_dma, GFP_KERNEL); if (!dd->rcvhdrtail_dummy_kvaddr) { @@ -942,12 +943,12 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (rcd->rcvhdrq) { dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, - rcd->rcvhdrq, rcd->rcvhdrq_phys); + rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; if (rcd->rcvhdrtail_kvaddr) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, (void *)rcd->rcvhdrtail_kvaddr, - rcd->rcvhdrqtailaddr_phys); + rcd->rcvhdrqtailaddr_dma); rcd->rcvhdrtail_kvaddr = NULL; } } @@ -956,11 +957,11 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) kfree(rcd->egrbufs.rcvtids); for (e = 0; e < rcd->egrbufs.alloced; e++) { - if (rcd->egrbufs.buffers[e].phys) + if (rcd->egrbufs.buffers[e].dma) dma_free_coherent(&dd->pcidev->dev, rcd->egrbufs.buffers[e].len, rcd->egrbufs.buffers[e].addr, - rcd->egrbufs.buffers[e].phys); + rcd->egrbufs.buffers[e].dma); } kfree(rcd->egrbufs.buffers); @@ -1354,7 +1355,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) if (dd->rcvhdrtail_dummy_kvaddr) { dma_free_coherent(&dd->pcidev->dev, sizeof(u64), (void *)dd->rcvhdrtail_dummy_kvaddr, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); dd->rcvhdrtail_dummy_kvaddr = NULL; } @@ -1577,7 +1578,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) u64 reg; if (!rcd->rcvhdrq) { - dma_addr_t phys_hdrqtail; + dma_addr_t dma_hdrqtail; gfp_t gfp_flags; /* @@ -1590,7 +1591,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? GFP_USER : GFP_KERNEL; rcd->rcvhdrq = dma_zalloc_coherent( - &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, + &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma, gfp_flags | __GFP_COMP); if (!rcd->rcvhdrq) { @@ -1602,11 +1603,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent( - &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, + &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail, gfp_flags); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; - rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; + rcd->rcvhdrqtailaddr_dma = dma_hdrqtail; } rcd->rcvhdrq_size = amt; @@ -1634,7 +1635,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) * before enabling any receive context */ write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); return 0; @@ -1645,7 +1646,7 @@ bail_free: vfree(rcd->user_event_mask); rcd->user_event_mask = NULL; dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, - rcd->rcvhdrq_phys); + rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; bail: return -ENOMEM; @@ -1706,15 +1707,15 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->egrbufs.buffers[idx].addr = dma_zalloc_coherent(&dd->pcidev->dev, rcd->egrbufs.rcvtid_size, - &rcd->egrbufs.buffers[idx].phys, + &rcd->egrbufs.buffers[idx].dma, gfp_flags); if (rcd->egrbufs.buffers[idx].addr) { rcd->egrbufs.buffers[idx].len = rcd->egrbufs.rcvtid_size; rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].addr = rcd->egrbufs.buffers[idx].addr; - rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].phys = - rcd->egrbufs.buffers[idx].phys; + rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].dma = + rcd->egrbufs.buffers[idx].dma; rcd->egrbufs.alloced++; alloced_bytes += rcd->egrbufs.rcvtid_size; idx++; @@ -1755,14 +1756,14 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) for (i = 0, j = 0, offset = 0; j < idx; i++) { if (i >= rcd->egrbufs.count) break; - rcd->egrbufs.rcvtids[i].phys = - rcd->egrbufs.buffers[j].phys + offset; + rcd->egrbufs.rcvtids[i].dma = + rcd->egrbufs.buffers[j].dma + offset; rcd->egrbufs.rcvtids[i].addr = rcd->egrbufs.buffers[j].addr + offset; rcd->egrbufs.alloced++; - if ((rcd->egrbufs.buffers[j].phys + offset + + if ((rcd->egrbufs.buffers[j].dma + offset + new_size) == - (rcd->egrbufs.buffers[j].phys + + (rcd->egrbufs.buffers[j].dma + rcd->egrbufs.buffers[j].len)) { j++; offset = 0; @@ -1814,7 +1815,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) for (idx = 0; idx < rcd->egrbufs.alloced; idx++) { hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER, - rcd->egrbufs.rcvtids[idx].phys, order); + rcd->egrbufs.rcvtids[idx].dma, order); cond_resched(); } goto bail; @@ -1826,9 +1827,9 @@ bail_rcvegrbuf_phys: dma_free_coherent(&dd->pcidev->dev, rcd->egrbufs.buffers[idx].len, rcd->egrbufs.buffers[idx].addr, - rcd->egrbufs.buffers[idx].phys); + rcd->egrbufs.buffers[idx].dma); rcd->egrbufs.buffers[idx].addr = NULL; - rcd->egrbufs.buffers[idx].phys = 0; + rcd->egrbufs.buffers[idx].dma = 0; rcd->egrbufs.buffers[idx].len = 0; } bail: diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 7ffc14f21523..9487c9bb8920 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1013,7 +1013,6 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, * offline. */ set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } else { set_link_state(ppd, link_state); @@ -1407,12 +1406,6 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) if (key == okey) continue; /* - * Don't update pkeys[2], if an HFI port without MgmtAllowed - * by neighbor is a switch. - */ - if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1) - continue; - /* * The SM gives us the complete PKey table. We have * to ensure that we put the PKeys in the matching * slots. diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index ac1bf4a73571..50a3a36d9363 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -551,11 +551,11 @@ static inline u32 group_size(u32 group) } /* - * Obtain the credit return addresses, kernel virtual and physical, for the + * Obtain the credit return addresses, kernel virtual and bus, for the * given sc. * * To understand this routine: - * o va and pa are arrays of struct credit_return. One for each physical + * o va and dma are arrays of struct credit_return. One for each physical * send context, per NUMA. * o Each send context always looks in its relative location in a struct * credit_return for its credit return. @@ -563,14 +563,14 @@ static inline u32 group_size(u32 group) * with the same value. Use the address of the first send context in the * group. */ -static void cr_group_addresses(struct send_context *sc, dma_addr_t *pa) +static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma) { u32 gc = group_context(sc->hw_context, sc->group); u32 index = sc->hw_context & 0x7; sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index]; - *pa = (unsigned long) - &((struct credit_return *)sc->dd->cr_base[sc->node].pa)[gc]; + *dma = (unsigned long) + &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc]; } /* @@ -710,7 +710,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, { struct send_context_info *sci; struct send_context *sc = NULL; - dma_addr_t pa; + dma_addr_t dma; unsigned long flags; u64 reg; u32 thresh; @@ -763,7 +763,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, sc->sw_index = sw_index; sc->hw_context = hw_context; - cr_group_addresses(sc, &pa); + cr_group_addresses(sc, &dma); sc->credits = sci->credits; /* PIO Send Memory Address details */ @@ -805,7 +805,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT))); /* set up credit return */ - reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); + reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg); /* @@ -2064,7 +2064,7 @@ int init_credit_return(struct hfi1_devdata *dd) dd->cr_base[i].va = dma_zalloc_coherent( &dd->pcidev->dev, bytes, - &dd->cr_base[i].pa, + &dd->cr_base[i].dma, GFP_KERNEL); if (!dd->cr_base[i].va) { set_dev_node(&dd->pcidev->dev, dd->node); @@ -2097,7 +2097,7 @@ void free_credit_return(struct hfi1_devdata *dd) TXE_NUM_CONTEXTS * sizeof(struct credit_return), dd->cr_base[i].va, - dd->cr_base[i].pa); + dd->cr_base[i].dma); } } kfree(dd->cr_base); diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 464cbd27b975..e709eaf743b5 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -154,7 +154,7 @@ struct credit_return { /* NUMA indexed credit return array */ struct credit_return_base { struct credit_return *va; - dma_addr_t pa; + dma_addr_t dma; }; /* send context configuration sizes (one per type) */ diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 3a1ef3056282..aa7773643107 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -165,9 +165,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, preempt_enable(); } -/* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */ -#define USE_SHIFTS 1 -#ifdef USE_SHIFTS /* * Handle carry bytes using shifts and masks. * @@ -187,150 +184,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, #define mshift(x) (8 * (x)) /* - * Read nbytes bytes from "from" and return them in the LSB bytes - * of pbuf->carry. Other bytes are zeroed. Any previous value - * pbuf->carry is lost. - * - * NOTES: - * o do not read from from if nbytes is zero - * o from may _not_ be u64 aligned - * o nbytes must not span a QW boundary - */ -static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, - unsigned int nbytes) -{ - unsigned long off; - - if (nbytes == 0) { - pbuf->carry.val64 = 0; - } else { - /* align our pointer */ - off = (unsigned long)from & 0x7; - from = (void *)((unsigned long)from & ~0x7l); - pbuf->carry.val64 = ((*(u64 *)from) - << zshift(nbytes + off))/* zero upper bytes */ - >> zshift(nbytes); /* place at bottom */ - } - pbuf->carry_bytes = nbytes; -} - -/* - * Read nbytes bytes from "from" and put them at the next significant bytes - * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra - * read does not overfill carry. - * - * NOTES: - * o from may _not_ be u64 aligned - * o nbytes may span a QW boundary - */ -static inline void read_extra_bytes(struct pio_buf *pbuf, - const void *from, unsigned int nbytes) -{ - unsigned long off = (unsigned long)from & 0x7; - unsigned int room, xbytes; - - /* align our pointer */ - from = (void *)((unsigned long)from & ~0x7l); - - /* check count first - don't read anything if count is zero */ - while (nbytes) { - /* find the number of bytes in this u64 */ - room = 8 - off; /* this u64 has room for this many bytes */ - xbytes = min(room, nbytes); - - /* - * shift down to zero lower bytes, shift up to zero upper - * bytes, shift back down to move into place - */ - pbuf->carry.val64 |= (((*(u64 *)from) - >> mshift(off)) - << zshift(xbytes)) - >> zshift(xbytes + pbuf->carry_bytes); - off = 0; - pbuf->carry_bytes += xbytes; - nbytes -= xbytes; - from += sizeof(u64); - } -} - -/* - * Zero extra bytes from the end of pbuf->carry. - * - * NOTES: - * o zbytes <= old_bytes - */ -static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) -{ - unsigned int remaining; - - if (zbytes == 0) /* nothing to do */ - return; - - remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */ - - /* NOTE: zshift only guaranteed to work if remaining != 0 */ - if (remaining) - pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining)) - >> zshift(remaining); - else - pbuf->carry.val64 = 0; - pbuf->carry_bytes = remaining; -} - -/* - * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. - * Put the unused part of the next 8 bytes of src into the LSB bytes of - * pbuf->carry with the upper bytes zeroed.. - * - * NOTES: - * o result must keep unused bytes zeroed - * o src must be u64 aligned - */ -static inline void merge_write8( - struct pio_buf *pbuf, - void __iomem *dest, - const void *src) -{ - u64 new, temp; - - new = *(u64 *)src; - temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes)); - writeq(temp, dest); - pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes); -} - -/* - * Write a quad word using all bytes of carry. - */ -static inline void carry8_write8(union mix carry, void __iomem *dest) -{ - writeq(carry.val64, dest); -} - -/* - * Write a quad word using all the valid bytes of carry. If carry - * has zero valid bytes, nothing is written. - * Returns 0 on nothing written, non-zero on quad word written. - */ -static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest) -{ - if (pbuf->carry_bytes) { - /* unused bytes are always kept zeroed, so just write */ - writeq(pbuf->carry.val64, dest); - return 1; - } - - return 0; -} - -#else /* USE_SHIFTS */ -/* - * Handle carry bytes using byte copies. - * - * NOTE: the value the unused portion of carry is left uninitialized. - */ - -/* * Jump copy - no-loop copy for < 8 bytes. */ static inline void jcopy(u8 *dest, const u8 *src, u32 n) @@ -338,18 +191,25 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n) switch (n) { case 7: *dest++ = *src++; + /* fall through */ case 6: *dest++ = *src++; + /* fall through */ case 5: *dest++ = *src++; + /* fall through */ case 4: *dest++ = *src++; + /* fall through */ case 3: *dest++ = *src++; + /* fall through */ case 2: *dest++ = *src++; + /* fall through */ case 1: *dest++ = *src++; + /* fall through */ } } @@ -365,6 +225,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n) static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, unsigned int nbytes) { + pbuf->carry.val64 = 0; jcopy(&pbuf->carry.val8[0], from, nbytes); pbuf->carry_bytes = nbytes; } @@ -385,40 +246,31 @@ static inline void read_extra_bytes(struct pio_buf *pbuf, } /* - * Zero extra bytes from the end of pbuf->carry. - * - * We do not care about the value of unused bytes in carry, so just - * reduce the byte count. + * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. + * Put the unused part of the next 8 bytes of src into the LSB bytes of + * pbuf->carry with the upper bytes zeroed.. * * NOTES: - * o zbytes <= old_bytes - */ -static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) -{ - pbuf->carry_bytes -= zbytes; -} - -/* - * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. - * Put the unused part of the next 8 bytes of src into the low bytes of - * pbuf->carry. + * o result must keep unused bytes zeroed + * o src must be u64 aligned */ static inline void merge_write8( struct pio_buf *pbuf, - void *dest, + void __iomem *dest, const void *src) { - u32 remainder = 8 - pbuf->carry_bytes; + u64 new, temp; - jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder); - writeq(pbuf->carry.val64, dest); - jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes); + new = *(u64 *)src; + temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes)); + writeq(temp, dest); + pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes); } /* * Write a quad word using all bytes of carry. */ -static inline void carry8_write8(union mix carry, void *dest) +static inline void carry8_write8(union mix carry, void __iomem *dest) { writeq(carry.val64, dest); } @@ -428,20 +280,16 @@ static inline void carry8_write8(union mix carry, void *dest) * has zero valid bytes, nothing is written. * Returns 0 on nothing written, non-zero on quad word written. */ -static inline int carry_write8(struct pio_buf *pbuf, void *dest) +static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest) { if (pbuf->carry_bytes) { - u64 zero = 0; - - jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero, - 8 - pbuf->carry_bytes); + /* unused bytes are always kept zeroed, so just write */ writeq(pbuf->carry.val64, dest); return 1; } return 0; } -#endif /* USE_SHIFTS */ /* * Segmented PIO Copy - start @@ -550,8 +398,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) { void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); void __iomem *dend; /* 8-byte data end */ - unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3; - unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7; + unsigned long qw_to_write = nbytes >> 3; + unsigned long bytes_left = nbytes & 0x7; /* calculate 8-byte data end */ dend = dest + (qw_to_write * sizeof(u64)); @@ -621,16 +469,46 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) dest += sizeof(u64); } - /* adjust carry */ - if (pbuf->carry_bytes < bytes_left) { - /* need to read more */ - read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes); + pbuf->qw_written += qw_to_write; + + /* handle carry and left-over bytes */ + if (pbuf->carry_bytes + bytes_left >= 8) { + unsigned long nread; + + /* there is enough to fill another qw - fill carry */ + nread = 8 - pbuf->carry_bytes; + read_extra_bytes(pbuf, from, nread); + + /* + * One more write - but need to make sure dest is correct. + * Check for wrap and the possibility the write + * should be in SOP space. + * + * The two checks immediately below cannot both be true, hence + * the else. If we have wrapped, we cannot still be within the + * first block. Conversely, if we are still in the first block, + * we cannot have wrapped. We do the wrap check first as that + * is more likely. + */ + /* adjust if we have wrapped */ + if (dest >= pbuf->end) + dest -= pbuf->size; + /* jump to the SOP range if within the first block */ + else if (pbuf->qw_written < PIO_BLOCK_QWS) + dest += SOP_DISTANCE; + + /* flush out full carry */ + carry8_write8(pbuf->carry, dest); + pbuf->qw_written++; + + /* now adjust and read the rest of the bytes into carry */ + bytes_left -= nread; + from += nread; /* from is now not aligned */ + read_low_bytes(pbuf, from, bytes_left); } else { - /* remove invalid bytes */ - zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left); + /* not enough to fill another qw, append the rest to carry */ + read_extra_bytes(pbuf, from, bytes_left); } - - pbuf->qw_written += qw_to_write; } /* diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 965c8aef0c60..202433178864 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -47,29 +47,39 @@ #include "hfi.h" #include "efivar.h" +#include "eprom.h" void get_platform_config(struct hfi1_devdata *dd) { int ret = 0; unsigned long size = 0; u8 *temp_platform_config = NULL; + u32 esize; + + ret = eprom_read_platform_config(dd, (void **)&temp_platform_config, + &esize); + if (!ret) { + /* success */ + size = esize; + goto success; + } + /* fail, try EFI variable */ ret = read_hfi1_efi_var(dd, "configuration", &size, (void **)&temp_platform_config); - if (ret) { - dd_dev_info(dd, - "%s: Failed to get platform config from UEFI, falling back to request firmware\n", - __func__); - /* fall back to request firmware */ - platform_config_load = 1; - goto bail; - } + if (!ret) + goto success; + + dd_dev_info(dd, + "%s: Failed to get platform config from UEFI, falling back to request firmware\n", + __func__); + /* fall back to request firmware */ + platform_config_load = 1; + return; +success: dd->platform_config.data = temp_platform_config; dd->platform_config.size = size; - -bail: - /* exit */; } void free_platform_config(struct hfi1_devdata *dd) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 4e4d8317c281..9fc75e7e8781 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -202,8 +202,7 @@ static void flush_iowait(struct rvt_qp *qp) write_seqlock_irqsave(&dev->iowait_lock, flags); if (!list_empty(&priv->s_iowait.list)) { list_del_init(&priv->s_iowait.list); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } write_sequnlock_irqrestore(&dev->iowait_lock, flags); } @@ -450,13 +449,14 @@ static void qp_pio_drain(struct rvt_qp *qp) */ void hfi1_schedule_send(struct rvt_qp *qp) { + lockdep_assert_held(&qp->s_lock); if (hfi1_send_ok(qp)) _hfi1_schedule_send(qp); } /** - * hfi1_get_credit - flush the send work queue of a QP - * @qp: the qp who's send work queue to flush + * hfi1_get_credit - handle credit in aeth + * @qp: the qp * @aeth: the Acknowledge Extended Transport Header * * The QP s_lock should be held. @@ -465,6 +465,7 @@ void hfi1_get_credit(struct rvt_qp *qp, u32 aeth) { u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK; + lockdep_assert_held(&qp->s_lock); /* * If the credit is invalid, we can send * as many packets as we like. Otherwise, we have to @@ -503,8 +504,7 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) } spin_unlock_irqrestore(&qp->s_lock, flags); /* Notify hfi1_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } static int iowait_sleep( @@ -544,7 +544,7 @@ static int iowait_sleep( qp->s_flags |= RVT_S_WAIT_DMA_DESC; list_add_tail(&priv->s_iowait.list, &sde->dmawait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); } write_sequnlock(&dev->iowait_lock); qp->s_flags &= ~RVT_S_BUSY; @@ -808,6 +808,13 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, kfree(priv); return ERR_PTR(-ENOMEM); } + iowait_init( + &priv->s_iowait, + 1, + _hfi1_do_send, + iowait_sleep, + iowait_wakeup, + iowait_sdma_drained); setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp); qp->s_timer.function = hfi1_rc_timeout; return priv; @@ -848,6 +855,7 @@ unsigned free_all_qps(struct rvt_dev_info *rdi) void flush_qp_waiters(struct rvt_qp *qp) { + lockdep_assert_held(&qp->s_lock); flush_iowait(qp); hfi1_stop_rc_timers(qp); } @@ -873,13 +881,6 @@ void notify_qp_reset(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - iowait_init( - &priv->s_iowait, - 1, - _hfi1_do_send, - iowait_sleep, - iowait_wakeup, - iowait_sdma_drained); priv->r_adefered = 0; clear_ahg(qp); } @@ -963,8 +964,7 @@ void notify_error_qp(struct rvt_qp *qp) if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { qp->s_flags &= ~RVT_S_ANY_WAIT_IO; list_del_init(&priv->s_iowait.list); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } write_sequnlock(&dev->iowait_lock); diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 4e95ad810847..1869f639c3ae 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -161,7 +161,7 @@ static struct hfi1_i2c_bus *init_i2c_bus(struct hfi1_devdata *dd, bus->algo.getsda = hfi1_getsda; bus->algo.getscl = hfi1_getscl; bus->algo.udelay = 5; - bus->algo.timeout = usecs_to_jiffies(50); + bus->algo.timeout = usecs_to_jiffies(100000); bus->algo.data = bus; bus->adapter.owner = THIS_MODULE; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 5da190e6011b..8bc5013f39a1 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -55,7 +55,7 @@ #include "trace.h" /* cut down ridiculously long IB macro names */ -#define OP(x) IB_OPCODE_RC_##x +#define OP(x) RC_OP(x) /** * hfi1_add_retry_timer - add/start a retry timer @@ -68,6 +68,7 @@ static inline void hfi1_add_retry_timer(struct rvt_qp *qp) struct ib_qp *ibqp = &qp->ibqp; struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ qp->s_timer.expires = jiffies + qp->timeout_jiffies + @@ -86,6 +87,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) { struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_WAIT_RNR; qp->s_timer.expires = jiffies + usecs_to_jiffies(to); add_timer(&priv->s_rnr_timer); @@ -103,6 +105,7 @@ static inline void hfi1_mod_retry_timer(struct rvt_qp *qp) struct ib_qp *ibqp = &qp->ibqp; struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies + @@ -120,6 +123,7 @@ static inline int hfi1_stop_retry_timer(struct rvt_qp *qp) { int rval = 0; + lockdep_assert_held(&qp->s_lock); /* Remove QP from retry */ if (qp->s_flags & RVT_S_TIMER) { qp->s_flags &= ~RVT_S_TIMER; @@ -138,6 +142,7 @@ void hfi1_stop_rc_timers(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); /* Remove QP from all timers */ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); @@ -158,6 +163,7 @@ static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp) int rval = 0; struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); /* Remove QP from rnr timer */ if (qp->s_flags & RVT_S_WAIT_RNR) { qp->s_flags &= ~RVT_S_WAIT_RNR; @@ -178,18 +184,6 @@ void hfi1_del_timers_sync(struct rvt_qp *qp) del_timer_sync(&priv->s_rnr_timer); } -/* only opcode mask for adaptive pio */ -const u32 rc_only_opcode = - BIT(OP(SEND_ONLY) & 0x1f) | - BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) | - BIT(OP(RDMA_READ_REQUEST & 0x1f)) | - BIT(OP(ACKNOWLEDGE & 0x1f)) | - BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) | - BIT(OP(COMPARE_SWAP & 0x1f)) | - BIT(OP(FETCH_ADD & 0x1f)); - static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 psn, u32 pmtu) { @@ -216,7 +210,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, * Note the QP s_lock must be held. */ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, - struct hfi1_other_headers *ohdr, + struct ib_other_headers *ohdr, struct hfi1_pkt_state *ps) { struct rvt_ack_entry *e; @@ -228,6 +222,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, u32 pmtu = qp->pmtu; struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); /* Don't send an ACK if we aren't supposed to. */ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) goto bail; @@ -299,10 +294,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, len = 0; qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); ohdr->u.at.aeth = hfi1_compute_aeth(qp); - ohdr->u.at.atomic_ack_eth[0] = - cpu_to_be32(e->atomic_data >> 32); - ohdr->u.at.atomic_ack_eth[1] = - cpu_to_be32(e->atomic_data); + ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth); hwords += sizeof(ohdr->u.at) / sizeof(u32); bth2 = mask_psn(e->psn); e->sent = 1; @@ -390,7 +382,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_sge_state *ss; struct rvt_swqe *wqe; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ @@ -403,6 +395,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) int middle = 0; int delta; + lockdep_assert_held(&qp->s_lock); ps->s_txreq = get_txreq(ps->dev, qp); if (IS_ERR(ps->s_txreq)) goto bail_no_tx; @@ -566,8 +559,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->rdma_wr.remote_addr); + put_ib_reth_vaddr( + wqe->rdma_wr.remote_addr, + &ohdr->u.rc.reth); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); @@ -608,8 +602,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; } - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->rdma_wr.remote_addr); + put_ib_reth_vaddr( + wqe->rdma_wr.remote_addr, + &ohdr->u.rc.reth); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); @@ -640,20 +635,18 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.swap); - ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->atomic_wr.compare_add); + put_ib_ateth_swap(wqe->atomic_wr.swap, + &ohdr->u.atomic_eth); + put_ib_ateth_compare(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); } else { qp->s_state = OP(FETCH_ADD); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.compare_add); - ohdr->u.atomic_eth.compare_data = 0; + put_ib_ateth_swap(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); + put_ib_ateth_compare(0, &ohdr->u.atomic_eth); } - ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->atomic_wr.remote_addr >> 32); - ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->atomic_wr.remote_addr); + put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr, + &ohdr->u.atomic_eth); ohdr->u.atomic_eth.rkey = cpu_to_be32( wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); @@ -779,8 +772,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) * See restart_rc(). */ len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu; - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->rdma_wr.remote_addr + len); + put_ib_reth_vaddr( + wqe->rdma_wr.remote_addr + len, + &ohdr->u.rc.reth); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); @@ -841,7 +835,7 @@ bail_no_tx: * * This is called from hfi1_rc_rcv() and handle_receive_interrupt(). * Note that RDMA reads and atomics are handled in the - * send side QP state and tasklet. + * send side QP state and send engine. */ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, int is_fecn) @@ -856,8 +850,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, u32 vl, plen; struct send_context *sc; struct pio_buf *pbuf; - struct hfi1_ib_header hdr; - struct hfi1_other_headers *ohdr; + struct ib_header hdr; + struct ib_other_headers *ohdr; unsigned long flags; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ @@ -917,7 +911,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, if (!pbuf) { /* * We have no room to send at the moment. Pass - * responsibility for sending the ACK to the send tasklet + * responsibility for sending the ACK to the send engine * so that when enough buffer space becomes available, * the ACK is sent ahead of other outgoing packets. */ @@ -932,16 +926,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, return; queue_ack: - this_cpu_inc(*ibp->rvp.rc_qacks); spin_lock_irqsave(&qp->s_lock, flags); + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) + goto unlock; + this_cpu_inc(*ibp->rvp.rc_qacks); qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; qp->s_nak_state = qp->r_nak_state; qp->s_ack_psn = qp->r_ack_psn; if (is_fecn) qp->s_flags |= RVT_S_ECN; - /* Schedule the send tasklet. */ + /* Schedule the send engine. */ hfi1_schedule_send(qp); +unlock: spin_unlock_irqrestore(&qp->s_lock, flags); } @@ -960,6 +957,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn) struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n); u32 opcode; + lockdep_assert_held(&qp->s_lock); qp->s_cur = n; /* @@ -1027,7 +1025,7 @@ done: qp->s_psn = psn; /* * Set RVT_S_WAIT_PSN as rc_complete() may start the timer - * asynchronously before the send tasklet can get scheduled. + * asynchronously before the send engine can get scheduled. * Doing it in hfi1_make_rc_req() is too late. */ if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) && @@ -1045,6 +1043,8 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait) struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked); struct hfi1_ibport *ibp; + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_lock); if (qp->s_retry == 0) { if (qp->s_mig_state == IB_MIG_ARMED) { hfi1_migrate_qp(qp); @@ -1121,6 +1121,7 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) struct rvt_swqe *wqe; u32 n = qp->s_last; + lockdep_assert_held(&qp->s_lock); /* Find the work request corresponding to the given PSN. */ for (;;) { wqe = rvt_get_swqe_ptr(qp, n); @@ -1141,15 +1142,16 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) /* * This should be called with the QP s_lock held and interrupts disabled. */ -void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) +void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; struct ib_wc wc; unsigned i; u32 opcode; u32 psn; + lockdep_assert_held(&qp->s_lock); if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; @@ -1241,6 +1243,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct ib_wc wc; unsigned i; + lockdep_assert_held(&qp->s_lock); /* * Don't decrement refcount and don't generate a * completion if the SWQE is being resent until the send @@ -1340,6 +1343,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, int diff; unsigned long to; + lockdep_assert_held(&qp->s_lock); /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued @@ -1389,7 +1393,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -1555,6 +1559,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, { struct rvt_swqe *wqe; + lockdep_assert_held(&qp->s_lock); /* Remove QP from retry timer */ hfi1_stop_rc_timers(qp); @@ -1573,7 +1578,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1595,7 +1600,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, * Called at interrupt level. */ static void rc_rcv_resp(struct hfi1_ibport *ibp, - struct hfi1_other_headers *ohdr, + struct ib_other_headers *ohdr, void *data, u32 tlen, struct rvt_qp *qp, u32 opcode, u32 psn, u32 hdrsize, u32 pmtu, struct hfi1_ctxtdata *rcd) @@ -1649,14 +1654,10 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, case OP(ATOMIC_ACKNOWLEDGE): case OP(RDMA_READ_RESPONSE_FIRST): aeth = be32_to_cpu(ohdr->u.aeth); - if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { - __be32 *p = ohdr->u.at.atomic_ack_eth; - - val = ((u64)be32_to_cpu(p[0]) << 32) | - be32_to_cpu(p[1]); - } else { + if (opcode == OP(ATOMIC_ACKNOWLEDGE)) + val = ib_u64_get(&ohdr->u.at.atomic_ack_eth); + else val = 0; - } if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) goto ack_done; @@ -1782,7 +1783,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd, { if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1796,8 +1797,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) return; list_del_init(&qp->rspwait); qp->r_flags &= ~RVT_R_RSP_NAK; - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } /** @@ -1815,7 +1815,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) * Return 1 if no more processing is needed; otherwise return 0 to * schedule a response to be sent. */ -static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, +static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, struct rvt_qp *qp, u32 opcode, u32 psn, int diff, struct hfi1_ctxtdata *rcd) { @@ -1923,7 +1923,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, } if (len != 0) { u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); + u64 vaddr = get_ib_reth_vaddr(reth); int ok; ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, @@ -1946,7 +1946,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, case OP(FETCH_ADD): { /* * If we didn't find the atomic request in the ack queue - * or the send tasklet is already backed up to send an + * or the send engine is already backed up to send an * earlier entry, we can ignore this request. */ if (!e || e->opcode != (u8)opcode || old_req) @@ -2123,13 +2123,13 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, void hfi1_rc_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_other_headers *ohdr = packet->ohdr; + struct ib_other_headers *ohdr = packet->ohdr; u32 bth0, opcode; u32 hdrsize = packet->hlen; u32 psn; @@ -2143,6 +2143,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) int copy_last = 0; u32 rkey; + lockdep_assert_held(&qp->r_lock); bth0 = be32_to_cpu(ohdr->bth[0]); if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) return; @@ -2342,7 +2343,7 @@ send_last: qp->r_sge.sg_list = NULL; if (qp->r_len != 0) { u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); + u64 vaddr = get_ib_reth_vaddr(reth); int ok; /* Check rkey & NAK */ @@ -2397,7 +2398,7 @@ send_last: len = be32_to_cpu(reth->length); if (len) { u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); + u64 vaddr = get_ib_reth_vaddr(reth); int ok; /* Check rkey & NAK */ @@ -2432,7 +2433,7 @@ send_last: qp->r_nak_state = 0; qp->r_head_ack_queue = next; - /* Schedule the send tasklet. */ + /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; hfi1_schedule_send(qp); @@ -2469,8 +2470,7 @@ send_last: e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; - vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) | - be32_to_cpu(ateth->vaddr[1]); + vaddr = get_ib_ateth_vaddr(ateth); if (unlikely(vaddr & (sizeof(u64) - 1))) goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); @@ -2481,11 +2481,11 @@ send_last: goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *)qp->r_sge.sge.vaddr; - sdata = be64_to_cpu(ateth->swap_data); + sdata = get_ib_ateth_swap(ateth); e->atomic_data = (opcode == OP(FETCH_ADD)) ? (u64)atomic64_add_return(sdata, maddr) - sdata : (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, - be64_to_cpu(ateth->compare_data), + get_ib_ateth_compare(ateth), sdata); rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; @@ -2499,7 +2499,7 @@ send_last: qp->r_nak_state = 0; qp->r_head_ack_queue = next; - /* Schedule the send tasklet. */ + /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; hfi1_schedule_send(qp); @@ -2575,12 +2575,12 @@ send_ack: void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct hfi1_ib_header *hdr, + struct ib_header *hdr, u32 rcv_flags, struct rvt_qp *qp) { int has_grh = rcv_flags & HFI1_HAS_GRH; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); int diff; u32 opcode; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 48d5094f98e2..a1576aea4756 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -262,7 +262,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * * The s_lock will be acquired around the hfi1_migrate_qp() call. */ -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0) { __be64 guid; @@ -352,7 +352,7 @@ err: * * This is called from hfi1_do_send() to * forward a WQE addressed to the same HFI. - * Note that although we are single threaded due to the tasklet, we still + * Note that although we are single threaded due to the send engine, we still * have to protect against post_send(). We don't have to worry about * receive interrupts since this is a connected protocol and all packets * will pass through here. @@ -765,7 +765,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) } } -void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, +void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, struct hfi1_pkt_state *ps) { @@ -846,7 +846,7 @@ void _hfi1_do_send(struct work_struct *work) * @work: contains a pointer to the QP * * Process entries in the send work queue until credit or queue is - * exhausted. Only allow one CPU to send a packet per QP (tasklet). + * exhausted. Only allow one CPU to send a packet per QP. * Otherwise, two threads could send packets out of order. */ void hfi1_do_send(struct rvt_qp *qp) @@ -909,7 +909,7 @@ void hfi1_do_send(struct rvt_qp *qp) spin_unlock_irqrestore(&qp->s_lock, ps.flags); /* * If the packet cannot be sent now, return and - * the send tasklet will be woken up later. + * the send engine will be woken up later. */ if (hfi1_verbs_send(qp, &ps)) return; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index f9befc05b349..fd39bcaa062d 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -726,6 +726,34 @@ u16 sdma_get_descq_cnt(void) } /** + * sdma_engine_get_vl() - return vl for a given sdma engine + * @sde: sdma engine + * + * This function returns the vl mapped to a given engine, or an error if + * the mapping can't be found. The mapping fields are protected by RCU. + */ +int sdma_engine_get_vl(struct sdma_engine *sde) +{ + struct hfi1_devdata *dd = sde->dd; + struct sdma_vl_map *m; + u8 vl; + + if (sde->this_idx >= TXE_NUM_SDMA_ENGINES) + return -EINVAL; + + rcu_read_lock(); + m = rcu_dereference(dd->sdma_map); + if (unlikely(!m)) { + rcu_read_unlock(); + return -EINVAL; + } + vl = m->engine_to_vl[sde->this_idx]; + rcu_read_unlock(); + + return vl; +} + +/** * sdma_select_engine_vl() - select sdma engine * @dd: devdata * @selector: a spreading factor @@ -788,6 +816,326 @@ struct sdma_engine *sdma_select_engine_sc( return sdma_select_engine_vl(dd, selector, vl); } +struct sdma_rht_map_elem { + u32 mask; + u8 ctr; + struct sdma_engine *sde[0]; +}; + +struct sdma_rht_node { + unsigned long cpu_id; + struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED]; + struct rhash_head node; +}; + +#define NR_CPUS_HINT 192 + +static const struct rhashtable_params sdma_rht_params = { + .nelem_hint = NR_CPUS_HINT, + .head_offset = offsetof(struct sdma_rht_node, node), + .key_offset = offsetof(struct sdma_rht_node, cpu_id), + .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id), + .max_size = NR_CPUS, + .min_size = 8, + .automatic_shrinking = true, +}; + +/* + * sdma_select_user_engine() - select sdma engine based on user setup + * @dd: devdata + * @selector: a spreading factor + * @vl: this vl + * + * This function returns an sdma engine for a user sdma request. + * User defined sdma engine affinity setting is honored when applicable, + * otherwise system default sdma engine mapping is used. To ensure correct + * ordering, the mapping from <selector, vl> to sde must remain unchanged. + */ +struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, + u32 selector, u8 vl) +{ + struct sdma_rht_node *rht_node; + struct sdma_engine *sde = NULL; + const struct cpumask *current_mask = tsk_cpus_allowed(current); + unsigned long cpu_id; + + /* + * To ensure that always the same sdma engine(s) will be + * selected make sure the process is pinned to this CPU only. + */ + if (cpumask_weight(current_mask) != 1) + goto out; + + cpu_id = smp_processor_id(); + rcu_read_lock(); + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id, + sdma_rht_params); + + if (rht_node && rht_node->map[vl]) { + struct sdma_rht_map_elem *map = rht_node->map[vl]; + + sde = map->sde[selector & map->mask]; + } + rcu_read_unlock(); + + if (sde) + return sde; + +out: + return sdma_select_engine_vl(dd, selector, vl); +} + +static void sdma_populate_sde_map(struct sdma_rht_map_elem *map) +{ + int i; + + for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++) + map->sde[map->ctr + i] = map->sde[i]; +} + +static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map, + struct sdma_engine *sde) +{ + unsigned int i, pow; + + /* only need to check the first ctr entries for a match */ + for (i = 0; i < map->ctr; i++) { + if (map->sde[i] == sde) { + memmove(&map->sde[i], &map->sde[i + 1], + (map->ctr - i - 1) * sizeof(map->sde[0])); + map->ctr--; + pow = roundup_pow_of_two(map->ctr ? : 1); + map->mask = pow - 1; + sdma_populate_sde_map(map); + break; + } + } +} + +/* + * Prevents concurrent reads and writes of the sdma engine cpu_mask + */ +static DEFINE_MUTEX(process_to_sde_mutex); + +ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, + size_t count) +{ + struct hfi1_devdata *dd = sde->dd; + cpumask_var_t mask, new_mask; + unsigned long cpu; + int ret, vl, sz; + + vl = sdma_engine_get_vl(sde); + if (unlikely(vl < 0)) + return -EINVAL; + + ret = zalloc_cpumask_var(&mask, GFP_KERNEL); + if (!ret) + return -ENOMEM; + + ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL); + if (!ret) { + free_cpumask_var(mask); + return -ENOMEM; + } + ret = cpulist_parse(buf, mask); + if (ret) + goto out_free; + + if (!cpumask_subset(mask, cpu_online_mask)) { + dd_dev_warn(sde->dd, "Invalid CPU mask\n"); + ret = -EINVAL; + goto out_free; + } + + sz = sizeof(struct sdma_rht_map_elem) + + (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *)); + + mutex_lock(&process_to_sde_mutex); + + for_each_cpu(cpu, mask) { + struct sdma_rht_node *rht_node; + + /* Check if we have this already mapped */ + if (cpumask_test_cpu(cpu, &sde->cpu_mask)) { + cpumask_set_cpu(cpu, new_mask); + continue; + } + + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + sdma_rht_params); + if (!rht_node) { + rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL); + if (!rht_node) { + ret = -ENOMEM; + goto out; + } + + rht_node->map[vl] = kzalloc(sz, GFP_KERNEL); + if (!rht_node->map[vl]) { + kfree(rht_node); + ret = -ENOMEM; + goto out; + } + rht_node->cpu_id = cpu; + rht_node->map[vl]->mask = 0; + rht_node->map[vl]->ctr = 1; + rht_node->map[vl]->sde[0] = sde; + + ret = rhashtable_insert_fast(&dd->sdma_rht, + &rht_node->node, + sdma_rht_params); + if (ret) { + kfree(rht_node->map[vl]); + kfree(rht_node); + dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n", + cpu); + goto out; + } + + } else { + int ctr, pow; + + /* Add new user mappings */ + if (!rht_node->map[vl]) + rht_node->map[vl] = kzalloc(sz, GFP_KERNEL); + + if (!rht_node->map[vl]) { + ret = -ENOMEM; + goto out; + } + + rht_node->map[vl]->ctr++; + ctr = rht_node->map[vl]->ctr; + rht_node->map[vl]->sde[ctr - 1] = sde; + pow = roundup_pow_of_two(ctr); + rht_node->map[vl]->mask = pow - 1; + + /* Populate the sde map table */ + sdma_populate_sde_map(rht_node->map[vl]); + } + cpumask_set_cpu(cpu, new_mask); + } + + /* Clean up old mappings */ + for_each_cpu(cpu, cpu_online_mask) { + struct sdma_rht_node *rht_node; + + /* Don't cleanup sdes that are set in the new mask */ + if (cpumask_test_cpu(cpu, mask)) + continue; + + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + sdma_rht_params); + if (rht_node) { + bool empty = true; + int i; + + /* Remove mappings for old sde */ + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) + if (rht_node->map[i]) + sdma_cleanup_sde_map(rht_node->map[i], + sde); + + /* Free empty hash table entries */ + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) { + if (!rht_node->map[i]) + continue; + + if (rht_node->map[i]->ctr) { + empty = false; + break; + } + } + + if (empty) { + ret = rhashtable_remove_fast(&dd->sdma_rht, + &rht_node->node, + sdma_rht_params); + WARN_ON(ret); + + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) + kfree(rht_node->map[i]); + + kfree(rht_node); + } + } + } + + cpumask_copy(&sde->cpu_mask, new_mask); +out: + mutex_unlock(&process_to_sde_mutex); +out_free: + free_cpumask_var(mask); + free_cpumask_var(new_mask); + return ret ? : strnlen(buf, PAGE_SIZE); +} + +ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf) +{ + mutex_lock(&process_to_sde_mutex); + if (cpumask_empty(&sde->cpu_mask)) + snprintf(buf, PAGE_SIZE, "%s\n", "empty"); + else + cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask); + mutex_unlock(&process_to_sde_mutex); + return strnlen(buf, PAGE_SIZE); +} + +static void sdma_rht_free(void *ptr, void *arg) +{ + struct sdma_rht_node *rht_node = ptr; + int i; + + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) + kfree(rht_node->map[i]); + + kfree(rht_node); +} + +/** + * sdma_seqfile_dump_cpu_list() - debugfs dump the cpu to sdma mappings + * @s: seq file + * @dd: hfi1_devdata + * @cpuid: cpu id + * + * This routine dumps the process to sde mappings per cpu + */ +void sdma_seqfile_dump_cpu_list(struct seq_file *s, + struct hfi1_devdata *dd, + unsigned long cpuid) +{ + struct sdma_rht_node *rht_node; + int i, j; + + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid, + sdma_rht_params); + if (!rht_node) + return; + + seq_printf(s, "cpu%3lu: ", cpuid); + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) { + if (!rht_node->map[i] || !rht_node->map[i]->ctr) + continue; + + seq_printf(s, " vl%d: [", i); + + for (j = 0; j < rht_node->map[i]->ctr; j++) { + if (!rht_node->map[i]->sde[j]) + continue; + + if (j > 0) + seq_puts(s, ","); + + seq_printf(s, " sdma%2d", + rht_node->map[i]->sde[j]->this_idx); + } + seq_puts(s, " ]"); + } + + seq_puts(s, "\n"); +} + /* * Free the indicated map struct */ @@ -1161,6 +1509,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) dd->num_sdma = num_engines; if (sdma_map_init(dd, port, ppd->vls_operational, NULL)) goto bail; + + if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params)) + goto bail; + dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); return 0; @@ -1252,6 +1604,7 @@ void sdma_exit(struct hfi1_devdata *dd) sdma_finalput(&sde->state); } sdma_clean(dd, dd->num_sdma); + rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL); } /* @@ -2086,6 +2439,11 @@ nodesc: * @sde: sdma engine to use * @wait: wait structure to use when full (may be NULL) * @tx_list: list of sdma_txreqs to submit + * @count: pointer to a u32 which, after return will contain the total number of + * sdma_txreqs removed from the tx_list. This will include sdma_txreqs + * whose SDMA descriptors are submitted to the ring and the sdma_txreqs + * which are added to SDMA engine flush list if the SDMA engine state is + * not running. * * The call submits the list into the ring. * @@ -2100,18 +2458,18 @@ nodesc: * side locking. * * Return: - * > 0 - Success (value is number of sdma_txreq's submitted), + * 0 - Success, * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL) * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state */ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, - struct list_head *tx_list) + struct list_head *tx_list, u32 *count_out) { struct sdma_txreq *tx, *tx_next; int ret = 0; unsigned long flags; u16 tail = INVALID_TAIL; - int count = 0; + u32 submit_count = 0, flush_count = 0, total_count; spin_lock_irqsave(&sde->tail_lock, flags); retry: @@ -2127,33 +2485,34 @@ retry: } list_del_init(&tx->list); tail = submit_tx(sde, tx); - count++; + submit_count++; if (tail != INVALID_TAIL && - (count & SDMA_TAIL_UPDATE_THRESH) == 0) { + (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) { sdma_update_tail(sde, tail); tail = INVALID_TAIL; } } update_tail: + total_count = submit_count + flush_count; if (wait) - iowait_sdma_add(wait, count); + iowait_sdma_add(wait, total_count); if (tail != INVALID_TAIL) sdma_update_tail(sde, tail); spin_unlock_irqrestore(&sde->tail_lock, flags); - return ret == 0 ? count : ret; + *count_out = total_count; + return ret; unlock_noconn: spin_lock(&sde->flushlist_lock); list_for_each_entry_safe(tx, tx_next, tx_list, list) { tx->wait = wait; list_del_init(&tx->list); - if (wait) - iowait_sdma_inc(wait); tx->next_descq_idx = 0; #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER tx->sn = sde->tail_sn++; trace_hfi1_sdma_in_sn(sde, tx->sn); #endif list_add_tail(&tx->list, &sde->flushlist); + flush_count++; if (wait) { wait->tx_count++; wait->count += tx->num_desc; diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 8f50c99fe711..56257ea3598f 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -413,6 +413,8 @@ struct sdma_engine { spinlock_t flushlist_lock; /* private: */ struct list_head flushlist; + struct cpumask cpu_mask; + struct kobject kobj; }; int sdma_init(struct hfi1_devdata *dd, u8 port); @@ -847,7 +849,8 @@ int sdma_send_txreq(struct sdma_engine *sde, struct sdma_txreq *tx); int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, - struct list_head *tx_list); + struct list_head *tx_list, + u32 *count); int sdma_ahg_alloc(struct sdma_engine *sde); void sdma_ahg_free(struct sdma_engine *sde, int ahg_index); @@ -1058,7 +1061,15 @@ struct sdma_engine *sdma_select_engine_vl( u32 selector, u8 vl); +struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, + u32 selector, u8 vl); +ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf); +ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, + size_t count); +int sdma_engine_get_vl(struct sdma_engine *sde); void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *); +void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct hfi1_devdata *dd, + unsigned long cpuid); #ifdef CONFIG_SDMA_VERBOSITY void sdma_dumpstate(struct sdma_engine *); diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 74c84c655f7e..edba22461a9c 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -766,13 +766,95 @@ bail: return ret; } +struct sde_attribute { + struct attribute attr; + ssize_t (*show)(struct sdma_engine *sde, char *buf); + ssize_t (*store)(struct sdma_engine *sde, const char *buf, size_t cnt); +}; + +static ssize_t sde_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct sde_attribute *sde_attr = + container_of(attr, struct sde_attribute, attr); + struct sdma_engine *sde = + container_of(kobj, struct sdma_engine, kobj); + + if (!sde_attr->show) + return -EINVAL; + + return sde_attr->show(sde, buf); +} + +static ssize_t sde_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct sde_attribute *sde_attr = + container_of(attr, struct sde_attribute, attr); + struct sdma_engine *sde = + container_of(kobj, struct sdma_engine, kobj); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!sde_attr->store) + return -EINVAL; + + return sde_attr->store(sde, buf, count); +} + +static const struct sysfs_ops sde_sysfs_ops = { + .show = sde_show, + .store = sde_store, +}; + +static struct kobj_type sde_ktype = { + .sysfs_ops = &sde_sysfs_ops, +}; + +#define SDE_ATTR(_name, _mode, _show, _store) \ + struct sde_attribute sde_attr_##_name = \ + __ATTR(_name, _mode, _show, _store) + +static ssize_t sde_show_cpu_to_sde_map(struct sdma_engine *sde, char *buf) +{ + return sdma_get_cpu_to_sde_map(sde, buf); +} + +static ssize_t sde_store_cpu_to_sde_map(struct sdma_engine *sde, + const char *buf, size_t count) +{ + return sdma_set_cpu_to_sde_map(sde, buf, count); +} + +static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf) +{ + int vl; + + vl = sdma_engine_get_vl(sde); + if (vl < 0) + return vl; + + return snprintf(buf, PAGE_SIZE, "%d\n", vl); +} + +static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO, + sde_show_cpu_to_sde_map, + sde_store_cpu_to_sde_map); +static SDE_ATTR(vl, S_IRUGO, sde_show_vl, NULL); + +static struct sde_attribute *sde_attribs[] = { + &sde_attr_cpu_list, + &sde_attr_vl +}; + /* * Register and create our files in /sys/class/infiniband. */ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) { struct ib_device *dev = &dd->verbs_dev.rdi.ibdev; - int i, ret; + struct device *class_dev = &dev->dev; + int i, j, ret; for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) { ret = device_create_file(&dev->dev, hfi1_attributes[i]); @@ -780,10 +862,29 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) goto bail; } + for (i = 0; i < dd->num_sdma; i++) { + ret = kobject_init_and_add(&dd->per_sdma[i].kobj, + &sde_ktype, &class_dev->kobj, + "sdma%d", i); + if (ret) + goto bail; + + for (j = 0; j < ARRAY_SIZE(sde_attribs); j++) { + ret = sysfs_create_file(&dd->per_sdma[i].kobj, + &sde_attribs[j]->attr); + if (ret) + goto bail; + } + } + return 0; bail: for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) device_remove_file(&dev->dev, hfi1_attributes[i]); + + for (i = 0; i < dd->num_sdma; i++) + kobject_del(&dd->per_sdma[i].kobj); + return ret; } diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 4cfb13771897..01f525cd985a 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -47,9 +47,9 @@ #define CREATE_TRACE_POINTS #include "trace.h" -u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr) +u8 ibhdr_exhdr_len(struct ib_header *hdr) { - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; u8 opcode; u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); @@ -67,16 +67,11 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr) #define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x" #define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x" #define IETH_PRN "ieth rkey 0x%.8x" -#define ATOMICACKETH_PRN "origdata %lld" -#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld" +#define ATOMICACKETH_PRN "origdata %llx" +#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx" #define OP(transport, op) IB_OPCODE_## transport ## _ ## op -static u64 ib_u64_get(__be32 *p) -{ - return ((u64)be32_to_cpu(p[0]) << 32) | be32_to_cpu(p[1]); -} - static const char *parse_syndrome(u8 syndrome) { switch (syndrome >> 5) { @@ -113,8 +108,7 @@ const char *parse_everbs_hdrs( case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE): case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE): trace_seq_printf(p, RETH_PRN " " IMM_PRN, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->rc.reth.vaddr), + get_ib_reth_vaddr(&eh->rc.reth), be32_to_cpu(eh->rc.reth.rkey), be32_to_cpu(eh->rc.reth.length), be32_to_cpu(eh->rc.imm_data)); @@ -126,8 +120,7 @@ const char *parse_everbs_hdrs( case OP(RC, RDMA_WRITE_ONLY): case OP(UC, RDMA_WRITE_ONLY): trace_seq_printf(p, RETH_PRN, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->rc.reth.vaddr), + get_ib_reth_vaddr(&eh->rc.reth), be32_to_cpu(eh->rc.reth.rkey), be32_to_cpu(eh->rc.reth.length)); break; @@ -145,20 +138,16 @@ const char *parse_everbs_hdrs( be32_to_cpu(eh->at.aeth) >> 24, parse_syndrome(be32_to_cpu(eh->at.aeth) >> 24), be32_to_cpu(eh->at.aeth) & HFI1_MSN_MASK, - (unsigned long long) - ib_u64_get(eh->at.atomic_ack_eth)); + ib_u64_get(&eh->at.atomic_ack_eth)); break; /* atomiceth */ case OP(RC, COMPARE_SWAP): case OP(RC, FETCH_ADD): trace_seq_printf(p, ATOMICETH_PRN, - (unsigned long long)ib_u64_get( - eh->atomic_eth.vaddr), + get_ib_ateth_vaddr(&eh->atomic_eth), eh->atomic_eth.rkey, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->atomic_eth.swap_data), - (unsigned long long)ib_u64_get( - (__be32 *)&eh->atomic_eth.compare_data)); + get_ib_ateth_swap(&eh->atomic_eth), + get_ib_ateth_compare(&eh->atomic_eth)); break; /* deth */ case OP(UD, SEND_ONLY): diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h index 31654bbac1cf..26ae789e47cf 100644 --- a/drivers/infiniband/hw/hfi1/trace_ctxts.h +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -67,9 +67,9 @@ TRACE_EVENT(hfi1_uctxtdata, __field(u64, hw_free) __field(void __iomem *, piobase) __field(u16, rcvhdrq_cnt) - __field(u64, rcvhdrq_phys) + __field(u64, rcvhdrq_dma) __field(u32, eager_cnt) - __field(u64, rcvegr_phys) + __field(u64, rcvegr_dma) ), TP_fast_assign(DD_DEV_ASSIGN(dd); __entry->ctxt = uctxt->ctxt; @@ -77,10 +77,9 @@ TRACE_EVENT(hfi1_uctxtdata, __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free); __entry->piobase = uctxt->sc->base_addr; __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; - __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys; + __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma; __entry->eager_cnt = uctxt->egrbufs.alloced; - __entry->rcvegr_phys = - uctxt->egrbufs.rcvtids[0].phys; + __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma; ), TP_printk("[%s] ctxt %u " UCTXT_FMT, __get_str(dev), @@ -89,9 +88,9 @@ TRACE_EVENT(hfi1_uctxtdata, __entry->hw_free, __entry->piobase, __entry->rcvhdrq_cnt, - __entry->rcvhdrq_phys, + __entry->rcvhdrq_dma, __entry->eager_cnt, - __entry->rcvegr_phys + __entry->rcvegr_dma ) ); diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index c3e41aed0034..382fcda3a5f6 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -55,7 +55,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_ibhdrs -u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr); +u8 ibhdr_exhdr_len(struct ib_header *hdr); const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) @@ -74,7 +74,7 @@ __print_symbolic(lrh, \ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, - struct hfi1_ib_header *hdr), + struct ib_header *hdr), TP_ARGS(dd, hdr), TP_STRUCT__entry( DD_DEV_ENTRY(dd) @@ -102,7 +102,7 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) ), TP_fast_assign( - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; DD_DEV_ASSIGN(dd); /* LRH */ @@ -185,19 +185,19 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, ); DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); #endif /* __HFI1_TRACE_IBHDRS_H */ diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 9ba1f615ec95..11e02b228922 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -260,7 +260,7 @@ TRACE_EVENT(hfi1_mmu_invalidate, TRACE_EVENT(snoop_capture, TP_PROTO(struct hfi1_devdata *dd, int hdr_len, - struct hfi1_ib_header *hdr, + struct ib_header *hdr, int data_len, void *data), TP_ARGS(dd, hdr_len, hdr, data_len, data), @@ -279,7 +279,7 @@ TRACE_EVENT(snoop_capture, __dynamic_array(u8, raw_pkt, data_len) ), TP_fast_assign( - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); if (__entry->lnh == HFI1_LRH_BTH) diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index a726d96d185f..5e6d1bac4914 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -50,14 +50,7 @@ #include "qp.h" /* cut down ridiculously long IB macro names */ -#define OP(x) IB_OPCODE_UC_##x - -/* only opcode mask for adaptive pio */ -const u32 uc_only_opcode = - BIT(OP(SEND_ONLY) & 0x1f) | - BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)); +#define OP(x) UC_OP(x) /** * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) @@ -70,7 +63,7 @@ const u32 uc_only_opcode = int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; u32 hwords = 5; u32 bth0 = 0; @@ -304,12 +297,12 @@ bail_no_tx: void hfi1_uc_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; - struct hfi1_other_headers *ohdr = packet->ohdr; + struct ib_other_headers *ohdr = packet->ohdr; u32 bth0, opcode; u32 hdrsize = packet->hlen; u32 psn; diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index f01e8e1d62d3..97ae24b6314c 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -271,7 +271,7 @@ drop: int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; @@ -510,8 +510,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 bth0, plen, vl, hwords = 5; u16 lrh0; u8 sl = ibp->sc_to_sl[sc5]; - struct hfi1_ib_header hdr; - struct hfi1_other_headers *ohdr; + struct ib_header hdr; + struct ib_other_headers *ohdr; struct pio_buf *pbuf; struct send_context *ctxt = qp_to_send_context(qp, sc5); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -559,8 +559,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, /* * opa_smp_check() - Do the regular pkey checking, and the additional - * checks for SMPs specified in OPAv1 rev 0.90, section 9.10.26 - * ("SMA Packet Checks"). + * checks for SMPs specified in OPAv1 rev 1.0, 9/19/2016 update, section + * 9.10.25 ("SMA Packet Checks"). * * Note that: * - Checks are done using the pkey directly from the packet's BTH, @@ -603,23 +603,28 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, /* * SMPs fall into one of four (disjoint) categories: - * SMA request, SMA response, trap, or trap repress. - * Our response depends, in part, on which type of - * SMP we're processing. + * SMA request, SMA response, SMA trap, or SMA trap repress. + * Our response depends, in part, on which type of SMP we're + * processing. * - * If this is not an SMA request, or trap repress: - * - accept MAD if the port is running an SM - * - pkey == FULL_MGMT_P_KEY => - * reply with unsupported method (i.e., just mark - * the smp's status field here, and let it be - * processed normally) - * - pkey != LIM_MGMT_P_KEY => - * increment port recv constraint errors, drop MAD - * If this is an SMA request or trap repress: + * If this is an SMA response, skip the check here. + * + * If this is an SMA request or SMA trap repress: * - pkey != FULL_MGMT_P_KEY => * increment port recv constraint errors, drop MAD + * + * Otherwise: + * - accept if the port is running an SM + * - drop MAD if it's an SMA trap + * - pkey == FULL_MGMT_P_KEY => + * reply with unsupported method + * - pkey != FULL_MGMT_P_KEY => + * increment port recv constraint errors, drop MAD */ switch (smp->method) { + case IB_MGMT_METHOD_GET_RESP: + case IB_MGMT_METHOD_REPORT_RESP: + break; case IB_MGMT_METHOD_GET: case IB_MGMT_METHOD_SET: case IB_MGMT_METHOD_REPORT: @@ -629,23 +634,17 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, return 1; } break; - case IB_MGMT_METHOD_SEND: - case IB_MGMT_METHOD_TRAP: - case IB_MGMT_METHOD_GET_RESP: - case IB_MGMT_METHOD_REPORT_RESP: + default: if (ibp->rvp.port_cap_flags & IB_PORT_SM) return 0; + if (smp->method == IB_MGMT_METHOD_TRAP) + return 1; if (pkey == FULL_MGMT_P_KEY) { smp->status |= IB_SMP_UNSUP_METHOD; return 0; } - if (pkey != LIM_MGMT_P_KEY) { - ingress_pkey_table_fail(ppd, pkey, slid); - return 1; - } - break; - default: - break; + ingress_pkey_table_fail(ppd, pkey, slid); + return 1; } return 0; } @@ -665,7 +664,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, */ void hfi1_ud_rcv(struct hfi1_packet *packet) { - struct hfi1_other_headers *ohdr = packet->ohdr; + struct ib_other_headers *ohdr = packet->ohdr; int opcode; u32 hdrsize = packet->hlen; struct ib_wc wc; @@ -675,13 +674,13 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) int mgmt_pkey_idx = -1; struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; bool has_grh = rcv_flags & HFI1_HAS_GRH; - u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); + u8 sc5 = hdr2sc(hdr, packet->rhf); u32 bth1; u8 sl_from_sc, sl; u16 slid; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 1694037d1eee..a761f804111e 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -548,7 +548,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, u8 opcode, sc, vl; int req_queued = 0; u16 dlid; - u8 selector; + u32 selector; if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { hfi1_cdbg( @@ -753,12 +753,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, dlid = be16_to_cpu(req->hdr.lrh[1]); selector = dlid_to_selector(dlid); + selector += uctxt->ctxt + fd->subctxt; + req->sde = sdma_select_user_engine(dd, selector, vl); - /* Have to select the engine */ - req->sde = sdma_select_engine_vl(dd, - (u32)(uctxt->ctxt + fd->subctxt + - selector), - vl); if (!req->sde || !sdma_running(req->sde)) { ret = -ECOMM; goto free_req; @@ -894,7 +891,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len) static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) { - int ret = 0; + int ret = 0, count; unsigned npkts = 0; struct user_sdma_txreq *tx = NULL; struct hfi1_user_sdma_pkt_q *pq = NULL; @@ -1090,23 +1087,18 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) npkts++; } dosend: - ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps); - if (list_empty(&req->txps)) { - req->seqsubmitted = req->seqnum; - if (req->seqnum == req->info.npkts) { - set_bit(SDMA_REQ_SEND_DONE, &req->flags); - /* - * The txreq has already been submitted to the HW queue - * so we can free the AHG entry now. Corruption will not - * happen due to the sequential manner in which - * descriptors are processed. - */ - if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) - sdma_ahg_free(req->sde, req->ahg_idx); - } - } else if (ret > 0) { - req->seqsubmitted += ret; - ret = 0; + ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); + req->seqsubmitted += count; + if (req->seqsubmitted == req->info.npkts) { + set_bit(SDMA_REQ_SEND_DONE, &req->flags); + /* + * The txreq has already been submitted to the HW queue + * so we can free the AHG entry now. Corruption will not + * happen due to the sequential manner in which + * descriptors are processed. + */ + if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) + sdma_ahg_free(req->sde, req->ahg_idx); } return ret; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 2b359540901d..f2f6b5a78e0e 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -76,7 +76,7 @@ static unsigned int hfi1_max_ahs = 0xFFFF; module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO); MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); -unsigned int hfi1_max_cqes = 0x2FFFF; +unsigned int hfi1_max_cqes = 0x2FFFFF; module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO); MODULE_PARM_DESC(max_cqes, "Maximum number of completion queue entries to support"); @@ -89,7 +89,7 @@ unsigned int hfi1_max_qp_wrs = 0x3FFF; module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO); MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); -unsigned int hfi1_max_qps = 16384; +unsigned int hfi1_max_qps = 32768; module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO); MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); @@ -335,7 +335,7 @@ const u8 hdr_len_by_opcode[256] = { [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4, [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4, [IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4, - [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4, + [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4 + 8, [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, @@ -403,6 +403,28 @@ static const opcode_handler opcode_handler_tbl[256] = { [IB_OPCODE_CNP] = &hfi1_cnp_rcv }; +#define OPMASK 0x1f + +static const u32 pio_opmask[BIT(3)] = { + /* RC */ + [IB_OPCODE_RC >> 5] = + BIT(RC_OP(SEND_ONLY) & OPMASK) | + BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) | + BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) | + BIT(RC_OP(ACKNOWLEDGE) & OPMASK) | + BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) | + BIT(RC_OP(COMPARE_SWAP) & OPMASK) | + BIT(RC_OP(FETCH_ADD) & OPMASK), + /* UC */ + [IB_OPCODE_UC >> 5] = + BIT(UC_OP(SEND_ONLY) & OPMASK) | + BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) | + BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK), +}; + /* * System image GUID. */ @@ -567,7 +589,7 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) void hfi1_ib_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 tlen = packet->tlen; struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = &ppd->ibport_data; @@ -719,7 +741,7 @@ static void verbs_sdma_complete( if (tx->wqe) { hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); } else if (qp->ibqp.qp_type == IB_QPT_RC) { - struct hfi1_ib_header *hdr; + struct ib_header *hdr; hdr = &tx->phdr.hdr; hfi1_rc_send_complete(qp, hdr); @@ -748,7 +770,7 @@ static int wait_kmem(struct hfi1_ibdev *dev, qp->s_flags |= RVT_S_WAIT_KMEM; list_add_tail(&priv->s_iowait.list, &dev->memwait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); } write_sequnlock(&dev->iowait_lock); qp->s_flags &= ~RVT_S_BUSY; @@ -959,7 +981,7 @@ static int pio_wait(struct rvt_qp *qp, was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); /* counting: only call wantpiobuf_intr if first user */ if (was_empty) hfi1_sc_wantpiobuf_intr(sc, 1); @@ -1200,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_ib_header *h = &tx->phdr.hdr; + struct ib_header *h = &tx->phdr.hdr; if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) return dd->process_pio_send; @@ -1210,22 +1232,18 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, case IB_QPT_GSI: case IB_QPT_UD: break; - case IB_QPT_RC: - if (piothreshold && - qp->s_cur_size <= min(piothreshold, qp->pmtu) && - (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) && - iowait_sdma_pending(&priv->s_iowait) == 0 && - !sdma_txreq_built(&tx->txreq)) - return dd->process_pio_send; - break; case IB_QPT_UC: + case IB_QPT_RC: { + u8 op = get_opcode(h); + if (piothreshold && qp->s_cur_size <= min(piothreshold, qp->pmtu) && - (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) && + (BIT(op & OPMASK) & pio_opmask[op >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; break; + } default: break; } @@ -1244,8 +1262,8 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_other_headers *ohdr; - struct hfi1_ib_header *hdr; + struct ib_other_headers *ohdr; + struct ib_header *hdr; send_routine sr; int ret; u8 lnh; @@ -1754,7 +1772,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; struct rvt_qp *qp = packet->qp; u32 lqpn, rqpn = 0; u16 rlid = 0; @@ -1781,7 +1799,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) return; } - sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); + sc5 = hdr2sc(hdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = qp->ibqp.qp_num; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index d1b101c54828..1c3815d89eb7 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -60,6 +60,7 @@ #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_mad.h> +#include <rdma/ib_hdrs.h> #include <rdma/rdma_vt.h> #include <rdma/rdmavt_qp.h> #include <rdma/rdmavt_cq.h> @@ -80,16 +81,6 @@ struct hfi1_packet; */ #define HFI1_UVERBS_ABI_VERSION 2 -#define IB_SEQ_NAK (3 << 29) - -/* AETH NAK opcode values */ -#define IB_RNR_NAK 0x20 -#define IB_NAK_PSN_ERROR 0x60 -#define IB_NAK_INVALID_REQUEST 0x61 -#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 -#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 -#define IB_NAK_INVALID_RD_REQUEST 0x64 - /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 #define IB_PMA_SAMPLE_STATUS_STARTED 0x01 @@ -104,80 +95,16 @@ struct hfi1_packet; #define HFI1_VENDOR_IPG cpu_to_be16(0xFFA0) -#define IB_BTH_REQ_ACK BIT(31) -#define IB_BTH_SOLICITED BIT(23) -#define IB_BTH_MIG_REQ BIT(22) - -#define IB_GRH_VERSION 6 -#define IB_GRH_VERSION_MASK 0xF -#define IB_GRH_VERSION_SHIFT 28 -#define IB_GRH_TCLASS_MASK 0xFF -#define IB_GRH_TCLASS_SHIFT 20 -#define IB_GRH_FLOW_MASK 0xFFFFF -#define IB_GRH_FLOW_SHIFT 0 -#define IB_GRH_NEXT_HDR 0x1B - #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) +#define RC_OP(x) IB_OPCODE_RC_##x +#define UC_OP(x) IB_OPCODE_UC_##x + /* flags passed by hfi1_ib_rcv() */ enum { HFI1_HAS_GRH = (1 << 0), }; -struct ib_reth { - __be64 vaddr; - __be32 rkey; - __be32 length; -} __packed; - -struct ib_atomic_eth { - __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ - __be32 rkey; - __be64 swap_data; - __be64 compare_data; -} __packed; - -union ib_ehdrs { - struct { - __be32 deth[2]; - __be32 imm_data; - } ud; - struct { - struct ib_reth reth; - __be32 imm_data; - } rc; - struct { - __be32 aeth; - __be32 atomic_ack_eth[2]; - } at; - __be32 imm_data; - __be32 aeth; - __be32 ieth; - struct ib_atomic_eth atomic_eth; -} __packed; - -struct hfi1_other_headers { - __be32 bth[3]; - union ib_ehdrs u; -} __packed; - -/* - * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes - * long (72 w/ imm_data). Only the first 56 bytes of the IB header - * will be in the eager header buffer. The remaining 12 or 16 bytes - * are in the data buffer. - */ -struct hfi1_ib_header { - __be16 lrh[4]; - union { - struct { - struct ib_grh grh; - struct hfi1_other_headers oth; - } l; - struct hfi1_other_headers oth; - } u; -} __packed; - struct hfi1_ahg_info { u32 ahgdesc[2]; u16 tx_flags; @@ -187,7 +114,7 @@ struct hfi1_ahg_info { struct hfi1_sdma_header { __le64 pbc; - struct hfi1_ib_header hdr; + struct ib_header hdr; } __packed; /* @@ -386,7 +313,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet); void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct hfi1_ib_header *hdr, + struct ib_header *hdr, u32 rcv_flags, struct rvt_qp *qp); @@ -400,7 +327,7 @@ void hfi1_rc_timeout(unsigned long arg); void hfi1_del_timers_sync(struct rvt_qp *qp); void hfi1_stop_rc_timers(struct rvt_qp *qp); -void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr); +void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err); @@ -423,7 +350,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); extern const u32 rc_only_opcode; extern const u32 uc_only_opcode; -static inline u8 get_opcode(struct hfi1_ib_header *h) +static inline u8 get_opcode(struct ib_header *h) { u16 lnh = be16_to_cpu(h->lrh[0]) & 3; @@ -433,13 +360,13 @@ static inline u8 get_opcode(struct hfi1_ib_header *h) return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; } -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, struct ib_global_route *grh, u32 hwords, u32 nwords); -void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, +void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, struct hfi1_pkt_state *ps); diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index d8fb056526f8..094ab829ec42 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -109,7 +109,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, qp->s_flags |= RVT_S_WAIT_TX; list_add_tail(&priv->s_iowait.list, &dev->txwait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); } qp->s_flags &= ~RVT_S_BUSY; } diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index bbf0a163aeab..a3e21a25cea5 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -52,6 +52,7 @@ #include <linux/kref.h> #include <linux/sched.h> #include <linux/kthread.h> +#include <rdma/ib_hdrs.h> #include <rdma/rdma_vt.h> #include "qib_common.h" @@ -1131,7 +1132,6 @@ extern spinlock_t qib_devs_lock; extern struct qib_devdata *qib_lookup(int unit); extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; -extern u16 qpt_mask; extern unsigned qib_cc_table_size; int qib_init(struct qib_devdata *, int); diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 67ee6438cf59..728e0a030d2e 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -319,8 +319,8 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, ret = 1; else if (eflags == QLOGIC_IB_RHF_H_TIDERR) { /* For TIDERR and RC QPs premptively schedule a NAK */ - struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr; - struct qib_other_headers *ohdr = NULL; + struct ib_header *hdr = (struct ib_header *)rhdr; + struct ib_other_headers *ohdr = NULL; struct qib_ibport *ibp = &ppd->ibport_data; struct qib_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; @@ -588,8 +588,7 @@ move_along: qib_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } bail: diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ce4034071f9c..ded27172320e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1415,7 +1415,7 @@ static void flush_fifo(struct qib_pportdata *ppd) u32 *hdr; u64 pbc; const unsigned hdrwords = 7; - static struct qib_ib_header ibhdr = { + static struct ib_header ibhdr = { .lrh[0] = cpu_to_be16(0xF000 | QIB_LRH_BTH), .lrh[1] = IB_LID_PERMISSIVE, .lrh[2] = cpu_to_be16(hdrwords + SIZE_OF_CRC), diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index f9b8cd2354d1..99d31efe4c2f 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -41,14 +41,6 @@ #include "qib.h" -/* - * mask field which was present in now deleted qib_qpn_table - * is not present in rvt_qpn_table. Defining the same field - * as qpt_mask here instead of adding the mask field to - * rvt_qpn_table. - */ -u16 qpt_mask; - static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, unsigned off) { @@ -57,7 +49,7 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, static inline unsigned find_next_offset(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, unsigned off, - unsigned n) + unsigned n, u16 qpt_mask) { if (qpt_mask) { off++; @@ -179,6 +171,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi); struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata, verbs_dev); + u16 qpt_mask = dd->qpn_mask; if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; @@ -215,7 +208,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, goto bail; } offset = find_next_offset(qpt, map, offset, - dd->n_krcv_queues); + dd->n_krcv_queues, qpt_mask); qpn = mk_qpn(qpt, map, offset); /* * This test differs from alloc_pidmap(). diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 444028a3582a..2097512e75aa 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -75,7 +75,7 @@ static void start_timer(struct rvt_qp *qp) * Note the QP s_lock must be held. */ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, - struct qib_other_headers *ohdr, u32 pmtu) + struct ib_other_headers *ohdr, u32 pmtu) { struct rvt_ack_entry *e; u32 hwords; @@ -154,10 +154,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, len = 0; qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); ohdr->u.at.aeth = qib_compute_aeth(qp); - ohdr->u.at.atomic_ack_eth[0] = - cpu_to_be32(e->atomic_data >> 32); - ohdr->u.at.atomic_ack_eth[1] = - cpu_to_be32(e->atomic_data); + ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth); hwords += sizeof(ohdr->u.at) / sizeof(u32); bth2 = e->psn & QIB_PSN_MASK; e->sent = 1; @@ -234,7 +231,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_sge_state *ss; struct rvt_swqe *wqe; u32 hwords; @@ -444,20 +441,18 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) } if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.swap); - ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->atomic_wr.compare_add); + put_ib_ateth_swap(wqe->atomic_wr.swap, + &ohdr->u.atomic_eth); + put_ib_ateth_swap(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); } else { qp->s_state = OP(FETCH_ADD); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.compare_add); - ohdr->u.atomic_eth.compare_data = 0; + put_ib_ateth_swap(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); + put_ib_ateth_swap(0, &ohdr->u.atomic_eth); } - ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->atomic_wr.remote_addr >> 32); - ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->atomic_wr.remote_addr); + put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr, + &ohdr->u.atomic_eth); ohdr->u.atomic_eth.rkey = cpu_to_be32( wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); @@ -632,8 +627,8 @@ void qib_send_rc_ack(struct rvt_qp *qp) u32 hwords; u32 pbufn; u32 __iomem *piobuf; - struct qib_ib_header hdr; - struct qib_other_headers *ohdr; + struct ib_header hdr; + struct ib_other_headers *ohdr; u32 control; unsigned long flags; @@ -942,9 +937,9 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) /* * This should be called with the QP s_lock held and interrupts disabled. */ -void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr) +void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; struct ib_wc wc; unsigned i; @@ -1177,7 +1172,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, qib_restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -1361,7 +1356,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, qib_restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1383,7 +1378,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, * Called at interrupt level. */ static void qib_rc_rcv_resp(struct qib_ibport *ibp, - struct qib_other_headers *ohdr, + struct ib_other_headers *ohdr, void *data, u32 tlen, struct rvt_qp *qp, u32 opcode, @@ -1463,12 +1458,9 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, case OP(ATOMIC_ACKNOWLEDGE): case OP(RDMA_READ_RESPONSE_FIRST): aeth = be32_to_cpu(ohdr->u.aeth); - if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { - __be32 *p = ohdr->u.at.atomic_ack_eth; - - val = ((u64) be32_to_cpu(p[0]) << 32) | - be32_to_cpu(p[1]); - } else + if (opcode == OP(ATOMIC_ACKNOWLEDGE)) + val = ib_u64_get(&ohdr->u.at.atomic_ack_eth); + else val = 0; if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) @@ -1608,7 +1600,7 @@ bail: * Return 1 if no more processing is needed; otherwise return 0 to * schedule a response to be sent. */ -static int qib_rc_rcv_error(struct qib_other_headers *ohdr, +static int qib_rc_rcv_error(struct ib_other_headers *ohdr, void *data, struct rvt_qp *qp, u32 opcode, @@ -1640,7 +1632,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1848,11 +1840,11 @@ static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n) * for the given QP. * Called at interrupt level. */ -void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, +void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { struct qib_ibport *ibp = &rcd->ppd->ibport_data; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; u32 opcode; u32 hdrsize; u32 psn; @@ -2177,8 +2169,7 @@ send_last: e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; - vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | - be32_to_cpu(ateth->vaddr[1]); + vaddr = get_ib_ateth_vaddr(ateth); if (unlikely(vaddr & (sizeof(u64) - 1))) goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); @@ -2189,11 +2180,11 @@ send_last: goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = be64_to_cpu(ateth->swap_data); + sdata = get_ib_ateth_swap(ateth); e->atomic_data = (opcode == OP(FETCH_ADD)) ? (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - be64_to_cpu(ateth->compare_data), + get_ib_ateth_compare(ateth), sdata); rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; @@ -2233,7 +2224,7 @@ rnr_nak: /* Queue RNR NAK for later */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } return; @@ -2245,7 +2236,7 @@ nack_op_err: /* Queue NAK for later */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } return; @@ -2259,7 +2250,7 @@ nack_inv: /* Queue NAK for later */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } return; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index b67779256297..de1bde5950f5 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -265,7 +265,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * * The s_lock will be acquired around the qib_migrate_qp() call. */ -int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, +int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0) { __be64 guid; @@ -680,7 +680,7 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, return sizeof(struct ib_grh) / sizeof(u32); } -void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr, +void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2) { struct qib_qp_priv *priv = qp->priv; diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 1d61bd04f449..5b2d483451ad 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -48,7 +48,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; u32 hwords; u32 bth0; @@ -236,10 +236,10 @@ bail: * for the given QP. * Called at interrupt level. */ -void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; u32 opcode; u32 hdrsize; u32 psn; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 10d062561bd9..f45cad1198b0 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -245,7 +245,7 @@ drop: int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct qib_pportdata *ppd; struct qib_ibport *ibp; @@ -435,10 +435,10 @@ static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey) * for the given QP. * Called at interrupt level. */ -void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; int opcode; u32 hdrsize; u32 pad; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index fd1dfbce5539..876ebb442d38 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -313,7 +313,7 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length) * for the given QP. * Called at interrupt level. */ -static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, +static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { struct qib_ibport *ibp = &rcd->ppd->ibport_data; @@ -366,10 +366,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) { struct qib_pportdata *ppd = rcd->ppd; struct qib_ibport *ibp = &ppd->ibport_data; - struct qib_ib_header *hdr = rhdr; + struct ib_header *hdr = rhdr; struct qib_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_qp *qp; u32 qp_num; int lnh; @@ -841,7 +841,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status) if (tx->wqe) qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS); else if (qp->ibqp.qp_type == IB_QPT_RC) { - struct qib_ib_header *hdr; + struct ib_header *hdr; if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) hdr = &tx->align_buf->hdr; @@ -889,7 +889,7 @@ static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp) return ret; } -static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr, +static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len, u32 plen, u32 dwords) { @@ -1025,7 +1025,7 @@ static int no_bufs_available(struct rvt_qp *qp) return ret; } -static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr, +static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len, u32 plen, u32 dwords) { @@ -1133,7 +1133,7 @@ done: * Return zero if packet is sent or queued OK. * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise. */ -int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr, +int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len) { struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); @@ -1606,8 +1606,6 @@ int qib_register_ib_device(struct qib_devdata *dd) /* Only need to initialize non-zero fields. */ setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); - qpt_mask = dd->qpn_mask; - INIT_LIST_HEAD(&dev->piowait); INIT_LIST_HEAD(&dev->dmawait); INIT_LIST_HEAD(&dev->txwait); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 736ced684842..94fd30fdedac 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -45,6 +45,7 @@ #include <linux/completion.h> #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_hdrs.h> #include <rdma/rdma_vt.h> #include <rdma/rdmavt_cq.h> @@ -63,16 +64,6 @@ struct qib_verbs_txreq; */ #define QIB_UVERBS_ABI_VERSION 2 -#define IB_SEQ_NAK (3 << 29) - -/* AETH NAK opcode values */ -#define IB_RNR_NAK 0x20 -#define IB_NAK_PSN_ERROR 0x60 -#define IB_NAK_INVALID_REQUEST 0x61 -#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 -#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 -#define IB_NAK_INVALID_RD_REQUEST 0x64 - /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 #define IB_PMA_SAMPLE_STATUS_STARTED 0x01 @@ -87,22 +78,9 @@ struct qib_verbs_txreq; #define QIB_VENDOR_IPG cpu_to_be16(0xFFA0) -#define IB_BTH_REQ_ACK (1 << 31) -#define IB_BTH_SOLICITED (1 << 23) -#define IB_BTH_MIG_REQ (1 << 22) - /* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */ #define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26) -#define IB_GRH_VERSION 6 -#define IB_GRH_VERSION_MASK 0xF -#define IB_GRH_VERSION_SHIFT 28 -#define IB_GRH_TCLASS_MASK 0xFF -#define IB_GRH_TCLASS_SHIFT 20 -#define IB_GRH_FLOW_MASK 0xFFFFF -#define IB_GRH_FLOW_SHIFT 0 -#define IB_GRH_NEXT_HDR 0x1B - #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) /* Values for set/get portinfo VLCap OperationalVLs */ @@ -129,61 +107,9 @@ static inline int qib_num_vls(int vls) } } -struct ib_reth { - __be64 vaddr; - __be32 rkey; - __be32 length; -} __packed; - -struct ib_atomic_eth { - __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ - __be32 rkey; - __be64 swap_data; - __be64 compare_data; -} __packed; - -struct qib_other_headers { - __be32 bth[3]; - union { - struct { - __be32 deth[2]; - __be32 imm_data; - } ud; - struct { - struct ib_reth reth; - __be32 imm_data; - } rc; - struct { - __be32 aeth; - __be32 atomic_ack_eth[2]; - } at; - __be32 imm_data; - __be32 aeth; - __be32 ieth; - struct ib_atomic_eth atomic_eth; - } u; -} __packed; - -/* - * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes - * long (72 w/ imm_data). Only the first 56 bytes of the IB header - * will be in the eager header buffer. The remaining 12 or 16 bytes - * are in the data buffer. - */ -struct qib_ib_header { - __be16 lrh[4]; - union { - struct { - struct ib_grh grh; - struct qib_other_headers oth; - } l; - struct qib_other_headers oth; - } u; -} __packed; - struct qib_pio_header { __le32 pbc[2]; - struct qib_ib_header hdr; + struct ib_header hdr; } __packed; /* @@ -191,7 +117,7 @@ struct qib_pio_header { * is made common. */ struct qib_qp_priv { - struct qib_ib_header *s_hdr; /* next packet header to send */ + struct ib_header *s_hdr; /* next packet header to send */ struct list_head iowait; /* link for wait PIO buf */ atomic_t s_dma_busy; struct qib_verbs_txreq *s_tx; @@ -376,7 +302,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail); void qib_put_txreq(struct qib_verbs_txreq *tx); -int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr, +int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len); void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, @@ -384,10 +310,10 @@ void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release); -void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); -void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, +void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); @@ -398,13 +324,13 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); void qib_rc_rnr_retry(unsigned long arg); -void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr); +void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err); int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr); -void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); void mr_rcu_callback(struct rcu_head *list); @@ -413,13 +339,13 @@ int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only); void qib_migrate_qp(struct rvt_qp *qp); -int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, +int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, struct ib_global_route *grh, u32 hwords, u32 nwords); -void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr, +void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2); void _qib_do_send(struct work_struct *work); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 870b4f212fbc..6500c3b5a89c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -488,60 +488,23 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); if (removed) { synchronize_rcu(); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } /** - * reset_qp - initialize the QP state to the reset state - * @qp: the QP to reset + * rvt_init_qp - initialize the QP state to the reset state + * @qp: the QP to init or reinit * @type: the QP type - * r and s lock are required to be held by the caller + * + * This function is called from both rvt_create_qp() and + * rvt_reset_qp(). The difference is that the reset + * patch the necessary locks to protect against concurent + * access. */ -static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type) - __releases(&qp->s_lock) - __releases(&qp->s_hlock) - __releases(&qp->r_lock) - __acquires(&qp->r_lock) - __acquires(&qp->s_hlock) - __acquires(&qp->s_lock) +static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) { - if (qp->state != IB_QPS_RESET) { - qp->state = IB_QPS_RESET; - - /* Let drivers flush their waitlist */ - rdi->driver_f.flush_qp_waiters(qp); - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); - spin_unlock(&qp->s_lock); - spin_unlock(&qp->s_hlock); - spin_unlock_irq(&qp->r_lock); - - /* Stop the send queue and the retry timer */ - rdi->driver_f.stop_send_queue(qp); - - /* Wait for things to stop */ - rdi->driver_f.quiesce_qp(qp); - - /* take qp out the hash and wait for it to be unused */ - rvt_remove_qp(rdi, qp); - wait_event(qp->wait, !atomic_read(&qp->refcount)); - - /* grab the lock b/c it was locked at call time */ - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_hlock); - spin_lock(&qp->s_lock); - - rvt_clear_mr_refs(qp, 1); - } - - /* - * Let the driver do any tear down it needs to for a qp - * that has been reset - */ - rdi->driver_f.notify_qp_reset(qp); - qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; @@ -587,6 +550,60 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, } /** + * rvt_reset_qp - initialize the QP state to the reset state + * @qp: the QP to reset + * @type: the QP type + * + * r_lock, s_hlock, and s_lock are required to be held by the caller + */ +static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) + __must_hold(&qp->s_lock) + __must_hold(&qp->s_hlock) + __must_hold(&qp->r_lock) +{ + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_hlock); + lockdep_assert_held(&qp->s_lock); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + + /* Let drivers flush their waitlist */ + rdi->driver_f.flush_qp_waiters(qp); + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); + spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); + spin_unlock_irq(&qp->r_lock); + + /* Stop the send queue and the retry timer */ + rdi->driver_f.stop_send_queue(qp); + + /* Wait for things to stop */ + rdi->driver_f.quiesce_qp(qp); + + /* take qp out the hash and wait for it to be unused */ + rvt_remove_qp(rdi, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + + /* grab the lock b/c it was locked at call time */ + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); + spin_lock(&qp->s_lock); + + rvt_clear_mr_refs(qp, 1); + /* + * Let the driver do any tear down or re-init it needs to for + * a qp that has been reset + */ + rdi->driver_f.notify_qp_reset(qp); + } + rvt_init_qp(rdi, qp, type); + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_hlock); + lockdep_assert_held(&qp->s_lock); +} + +/** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair @@ -766,7 +783,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; - rvt_reset_qp(rdi, qp, init_attr->qp_type); + rvt_init_qp(rdi, qp, init_attr->qp_type); break; default: @@ -906,6 +923,8 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) int ret = 0; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_lock); if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) goto bail; @@ -980,7 +999,7 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; unsigned long flags; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); if (qp->ibqp.qp_num <= 1) { @@ -997,7 +1016,7 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) } /** - * qib_modify_qp - modify the attributes of a queue pair + * rvt_modify_qp - modify the attributes of a queue pair * @ibqp: the queue pair who's attributes we're modifying * @attr: the new attributes * @attr_mask: the mask of attributes to modify diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h new file mode 100644 index 000000000000..408439fe911e --- /dev/null +++ b/include/rdma/ib_hdrs.h @@ -0,0 +1,178 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef IB_HDRS_H +#define IB_HDRS_H + +#include <linux/types.h> +#include <asm/unaligned.h> +#include <rdma/ib_verbs.h> + +#define IB_SEQ_NAK (3 << 29) + +/* AETH NAK opcode values */ +#define IB_RNR_NAK 0x20 +#define IB_NAK_PSN_ERROR 0x60 +#define IB_NAK_INVALID_REQUEST 0x61 +#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 +#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 +#define IB_NAK_INVALID_RD_REQUEST 0x64 + +#define IB_BTH_REQ_ACK BIT(31) +#define IB_BTH_SOLICITED BIT(23) +#define IB_BTH_MIG_REQ BIT(22) + +#define IB_GRH_VERSION 6 +#define IB_GRH_VERSION_MASK 0xF +#define IB_GRH_VERSION_SHIFT 28 +#define IB_GRH_TCLASS_MASK 0xFF +#define IB_GRH_TCLASS_SHIFT 20 +#define IB_GRH_FLOW_MASK 0xFFFFF +#define IB_GRH_FLOW_SHIFT 0 +#define IB_GRH_NEXT_HDR 0x1B + +struct ib_reth { + __be64 vaddr; /* potentially unaligned */ + __be32 rkey; + __be32 length; +} __packed; + +struct ib_atomic_eth { + __be64 vaddr; /* potentially unaligned */ + __be32 rkey; + __be64 swap_data; /* potentially unaligned */ + __be64 compare_data; /* potentially unaligned */ +} __packed; + +union ib_ehdrs { + struct { + __be32 deth[2]; + __be32 imm_data; + } ud; + struct { + struct ib_reth reth; + __be32 imm_data; + } rc; + struct { + __be32 aeth; + __be64 atomic_ack_eth; /* potentially unaligned */ + } __packed at; + __be32 imm_data; + __be32 aeth; + __be32 ieth; + struct ib_atomic_eth atomic_eth; +} __packed; + +struct ib_other_headers { + __be32 bth[3]; + union ib_ehdrs u; +} __packed; + +struct ib_header { + __be16 lrh[4]; + union { + struct { + struct ib_grh grh; + struct ib_other_headers oth; + } l; + struct ib_other_headers oth; + } u; +} __packed; + +/* accessors for unaligned __be64 items */ + +static inline u64 ib_u64_get(__be64 *p) +{ + return get_unaligned_be64(p); +} + +static inline void ib_u64_put(u64 val, __be64 *p) +{ + put_unaligned_be64(val, p); +} + +static inline u64 get_ib_reth_vaddr(struct ib_reth *reth) +{ + return ib_u64_get(&reth->vaddr); +} + +static inline void put_ib_reth_vaddr(u64 val, struct ib_reth *reth) +{ + ib_u64_put(val, &reth->vaddr); +} + +static inline u64 get_ib_ateth_vaddr(struct ib_atomic_eth *ateth) +{ + return ib_u64_get(&ateth->vaddr); +} + +static inline void put_ib_ateth_vaddr(u64 val, struct ib_atomic_eth *ateth) +{ + ib_u64_put(val, &ateth->vaddr); +} + +static inline u64 get_ib_ateth_swap(struct ib_atomic_eth *ateth) +{ + return ib_u64_get(&ateth->swap_data); +} + +static inline void put_ib_ateth_swap(u64 val, struct ib_atomic_eth *ateth) +{ + ib_u64_put(val, &ateth->swap_data); +} + +static inline u64 get_ib_ateth_compare(struct ib_atomic_eth *ateth) +{ + return ib_u64_get(&ateth->compare_data); +} + +static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth) +{ + ib_u64_put(val, &ateth->compare_data); +} + +#endif /* IB_HDRS_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index bd34d0b56bf7..2c5183ef0243 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -466,6 +466,25 @@ static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) } /** + * rvt_get_qp - get a QP reference + * @qp - the QP to hold + */ +static inline void rvt_get_qp(struct rvt_qp *qp) +{ + atomic_inc(&qp->refcount); +} + +/** + * rvt_put_qp - release a QP reference + * @qp - the QP to release + */ +static inline void rvt_put_qp(struct rvt_qp *qp) +{ + if (qp && atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +/** * rvt_qp_wqe_reserve - reserve operation * @qp - the rvt qp * @wqe - the send wqe |