aboutsummaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/goya/goya.c
diff options
context:
space:
mode:
authorLinus Torvalds2019-07-11 15:34:05 -0700
committerLinus Torvalds2019-07-11 15:34:05 -0700
commit97ff4ca46d3279134cec49752de8c5a62dc68460 (patch)
tree3b562d5890b2fc358ca55a935395cc7938155406 /drivers/misc/habanalabs/goya/goya.c
parent4832a4dada1a2baefac76b70e4f3a78e71a7c35c (diff)
parent2f4281f4dce12440727ab770683cfb83eab62a26 (diff)
Merge tag 'char-misc-5.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull char / misc driver updates from Greg KH: "Here is the "large" pull request for char and misc and other assorted smaller driver subsystems for 5.3-rc1. It seems that this tree is becoming the funnel point of lots of smaller driver subsystems, which is fine for me, but that's why it is getting larger over time and does not just contain stuff under drivers/char/ and drivers/misc. Lots of small updates all over the place here from different driver subsystems: - habana driver updates - coresight driver updates - documentation file movements and updates - Android binder fixes and updates - extcon driver updates - google firmware driver updates - fsi driver updates - smaller misc and char driver updates - soundwire driver updates - nvmem driver updates - w1 driver fixes All of these have been in linux-next for a while with no reported issues" * tag 'char-misc-5.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (188 commits) coresight: Do not default to CPU0 for missing CPU phandle dt-bindings: coresight: Change CPU phandle to required property ocxl: Allow contexts to be attached with a NULL mm fsi: sbefifo: Don't fail operations when in SBE IPL state coresight: tmc: Smatch: Fix potential NULL pointer dereference coresight: etm3x: Smatch: Fix potential NULL pointer dereference coresight: Potential uninitialized variable in probe() coresight: etb10: Do not call smp_processor_id from preemptible coresight: tmc-etf: Do not call smp_processor_id from preemptible coresight: tmc-etr: alloc_perf_buf: Do not call smp_processor_id from preemptible coresight: tmc-etr: Do not call smp_processor_id() from preemptible docs: misc-devices: convert files without extension to ReST fpga: dfl: fme: align PR buffer size per PR datawidth fpga: dfl: fme: remove copy_to_user() in ioctl for PR fpga: dfl-fme-mgr: fix FME_PR_INTFC_ID register address. intel_th: msu: Start read iterator from a non-empty window intel_th: msu: Split sgt array and pointer in multiwindow mode intel_th: msu: Support multipage blocks intel_th: pci: Add Ice Lake NNPI support intel_th: msu: Fix single mode with disabled IOMMU ...
Diffstat (limited to 'drivers/misc/habanalabs/goya/goya.c')
-rw-r--r--drivers/misc/habanalabs/goya/goya.c635
1 files changed, 449 insertions, 186 deletions
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 02d116b01a1a..75294ec65257 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -14,6 +14,8 @@
#include <linux/genalloc.h>
#include <linux/hwmon.h>
#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/iommu.h>
+#include <linux/seq_file.h>
/*
* GOYA security scheme:
@@ -89,6 +91,30 @@
#define GOYA_CB_POOL_CB_CNT 512
#define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */
+#define IS_QM_IDLE(engine, qm_glbl_sts0) \
+ (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
+#define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0)
+#define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0)
+#define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0)
+
+#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
+ (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
+ engine##_CMDQ_IDLE_MASK)
+#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
+ IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
+#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
+ IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
+
+#define IS_DMA_IDLE(dma_core_sts0) \
+ !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
+
+#define IS_TPC_IDLE(tpc_cfg_sts) \
+ (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
+
+#define IS_MME_IDLE(mme_arch_sts) \
+ (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
+
+
static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
"goya cq 4", "goya cpu eq"
@@ -297,6 +323,11 @@ static u32 goya_all_events[] = {
GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
};
+static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
+static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
+static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
+static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
+
void goya_get_fixed_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -467,7 +498,7 @@ static int goya_early_init(struct hl_device *hdev)
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
- rc = hl_pci_init(hdev, 39);
+ rc = hl_pci_init(hdev, 48);
if (rc)
return rc;
@@ -539,9 +570,36 @@ int goya_late_init(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
+ goya_fetch_psoc_frequency(hdev);
+
+ rc = goya_mmu_clear_pgt_range(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to clear MMU page tables range %d\n", rc);
+ return rc;
+ }
+
+ rc = goya_mmu_set_dram_default_page(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
+ return rc;
+ }
+
+ rc = goya_mmu_add_mappings_for_device_cpu(hdev);
+ if (rc)
+ return rc;
+
+ rc = goya_init_cpu_queues(hdev);
+ if (rc)
+ return rc;
+
+ rc = goya_test_cpu_queue(hdev);
+ if (rc)
+ return rc;
+
rc = goya_armcp_info_get(hdev);
if (rc) {
- dev_err(hdev->dev, "Failed to get armcp info\n");
+ dev_err(hdev->dev, "Failed to get armcp info %d\n", rc);
return rc;
}
@@ -553,33 +611,15 @@ int goya_late_init(struct hl_device *hdev)
rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
if (rc) {
- dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
+ dev_err(hdev->dev,
+ "Failed to enable PCI access from CPU %d\n", rc);
return rc;
}
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
- goya_fetch_psoc_frequency(hdev);
-
- rc = goya_mmu_clear_pgt_range(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
- goto disable_pci_access;
- }
-
- rc = goya_mmu_set_dram_default_page(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to set DRAM default page\n");
- goto disable_pci_access;
- }
-
return 0;
-
-disable_pci_access:
- hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
-
- return rc;
}
/*
@@ -655,7 +695,10 @@ static int goya_sw_init(struct hl_device *hdev)
goto free_dma_pool;
}
- hdev->cpu_accessible_dma_pool = gen_pool_create(HL_CPU_PKT_SHIFT, -1);
+ dev_dbg(hdev->dev, "cpu accessible memory at bus address 0x%llx\n",
+ hdev->cpu_accessible_dma_address);
+
+ hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
if (!hdev->cpu_accessible_dma_pool) {
dev_err(hdev->dev,
"Failed to create CPU accessible DMA pool\n");
@@ -786,7 +829,6 @@ static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
else
sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
- WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + reg_off, lower_32_bits(sob_addr));
WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
}
@@ -973,9 +1015,9 @@ int goya_init_cpu_queues(struct hl_device *hdev)
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(eq->bus_address));
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8,
- lower_32_bits(hdev->cpu_accessible_dma_address));
+ lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9,
- upper_32_bits(hdev->cpu_accessible_dma_address));
+ upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES);
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES);
@@ -1001,7 +1043,7 @@ int goya_init_cpu_queues(struct hl_device *hdev)
if (err) {
dev_err(hdev->dev,
- "Failed to communicate with ARM CPU (ArmCP timeout)\n");
+ "Failed to setup communication with device CPU\n");
return -EIO;
}
@@ -2061,10 +2103,12 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
goya_disable_external_queues(hdev);
goya_disable_internal_queues(hdev);
- if (hard_reset)
+ if (hard_reset) {
goya_disable_msix(hdev);
- else
+ goya_mmu_remove_device_cpu_mappings(hdev);
+ } else {
goya_sync_irqs(hdev);
+ }
}
/*
@@ -2277,14 +2321,14 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
- if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
- goto out;
-
if (!hdev->fw_loading) {
dev_info(hdev->dev, "Skip loading FW\n");
goto out;
}
+ if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
+ goto out;
+
rc = goya_push_linux_to_device(hdev);
if (rc)
return rc;
@@ -2466,34 +2510,11 @@ static int goya_hw_init(struct hl_device *hdev)
if (rc)
goto disable_queues;
- rc = goya_init_cpu_queues(hdev);
- if (rc) {
- dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
- rc);
- goto disable_msix;
- }
-
- /*
- * Check if we managed to set the DMA mask to more then 32 bits. If so,
- * let's try to increase it again because in Goya we set the initial
- * dma mask to less then 39 bits so that the allocation of the memory
- * area for the device's cpu will be under 39 bits
- */
- if (hdev->dma_mask > 32) {
- rc = hl_pci_set_dma_mask(hdev, 48);
- if (rc)
- goto disable_pci_access;
- }
-
/* Perform read from the device to flush all MSI-X configuration */
val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
return 0;
-disable_pci_access:
- hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
-disable_msix:
- goya_disable_msix(hdev);
disable_queues:
goya_disable_internal_queues(hdev);
goya_disable_external_queues(hdev);
@@ -2629,7 +2650,6 @@ static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
{
u32 db_reg_offset, db_value;
- bool invalid_queue = false;
switch (hw_queue_id) {
case GOYA_QUEUE_ID_DMA_0:
@@ -2653,10 +2673,7 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
break;
case GOYA_QUEUE_ID_CPU_PQ:
- if (hdev->cpu_queues_enable)
- db_reg_offset = mmCPU_IF_PF_PQ_PI;
- else
- invalid_queue = true;
+ db_reg_offset = mmCPU_IF_PF_PQ_PI;
break;
case GOYA_QUEUE_ID_MME:
@@ -2696,12 +2713,8 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
break;
default:
- invalid_queue = true;
- }
-
- if (invalid_queue) {
/* Should never get here */
- dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
+ dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
hw_queue_id);
return;
}
@@ -2808,7 +2821,6 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
dma_addr_t fence_dma_addr;
struct hl_cb *cb;
u32 tmp, timeout;
- char buf[16] = {};
int rc;
if (hdev->pldm)
@@ -2816,10 +2828,9 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
else
timeout = HL_DEVICE_TIMEOUT_USEC;
- if (!hdev->asic_funcs->is_device_idle(hdev, buf, sizeof(buf))) {
+ if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
dev_err_ratelimited(hdev->dev,
- "Can't send KMD job on QMAN0 because %s is busy\n",
- buf);
+ "Can't send KMD job on QMAN0 because the device is not idle\n");
return -EBUSY;
}
@@ -2831,16 +2842,8 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
return -ENOMEM;
}
- *fence_ptr = 0;
-
goya_qman0_set_security(hdev, true);
- /*
- * goya cs parser saves space for 2xpacket_msg_prot at end of CB. For
- * synchronized kernel jobs we only need space for 1 packet_msg_prot
- */
- job->job_cb_size -= sizeof(struct packet_msg_prot);
-
cb = job->patched_cb;
fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
@@ -2860,14 +2863,14 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
goto free_fence_ptr;
}
- rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr, timeout,
- &tmp);
+ rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
+ (tmp == GOYA_QMAN0_FENCE_VAL), 1000, timeout);
hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
- if ((rc) || (tmp != GOYA_QMAN0_FENCE_VAL)) {
- dev_err(hdev->dev, "QMAN0 Job hasn't finished in time\n");
- rc = -ETIMEDOUT;
+ if (rc == -ETIMEDOUT) {
+ dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
+ goto free_fence_ptr;
}
free_fence_ptr:
@@ -2941,20 +2944,19 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
goto free_pkt;
}
- rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr,
- GOYA_TEST_QUEUE_WAIT_USEC, &tmp);
+ rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
+ 1000, GOYA_TEST_QUEUE_WAIT_USEC);
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
- if ((!rc) && (tmp == fence_val)) {
- dev_info(hdev->dev,
- "queue test on H/W queue %d succeeded\n",
- hw_queue_id);
- } else {
+ if (rc == -ETIMEDOUT) {
dev_err(hdev->dev,
"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
- rc = -EINVAL;
+ rc = -EIO;
+ } else {
+ dev_info(hdev->dev, "queue test on H/W queue %d succeeded\n",
+ hw_queue_id);
}
free_pkt:
@@ -2990,12 +2992,6 @@ int goya_test_queues(struct hl_device *hdev)
ret_val = -EINVAL;
}
- if (hdev->cpu_queues_enable) {
- rc = goya_test_cpu_queue(hdev);
- if (rc)
- ret_val = -EINVAL;
- }
-
return ret_val;
}
@@ -3028,7 +3024,13 @@ static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
dma_addr_t *dma_handle)
{
- return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
+ void *vaddr;
+
+ vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
+ *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
+ VA_CPU_ACCESSIBLE_MEM_ADDR;
+
+ return vaddr;
}
void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
@@ -3907,8 +3909,8 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
return goya_parse_cb_no_mmu(hdev, parser);
}
-void goya_add_end_of_cb_packets(u64 kernel_address, u32 len, u64 cq_addr,
- u32 cq_val, u32 msix_vec)
+void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
+ u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec)
{
struct packet_msg_prot *cq_pkt;
u32 tmp;
@@ -3939,6 +3941,11 @@ void goya_update_eq_ci(struct hl_device *hdev, u32 val)
void goya_restore_phase_topology(struct hl_device *hdev)
{
+
+}
+
+static void goya_clear_sm_regs(struct hl_device *hdev)
+{
int i, num_of_sob_in_longs, num_of_mon_in_longs;
num_of_sob_in_longs =
@@ -3958,10 +3965,11 @@ void goya_restore_phase_topology(struct hl_device *hdev)
}
/*
- * goya_debugfs_read32 - read a 32bit value from a given device address
+ * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
+ * address.
*
* @hdev: pointer to hl_device structure
- * @addr: address in device
+ * @addr: device or host mapped address
* @val: returned value
*
* In case of DDR address that is not mapped into the default aperture that
@@ -4002,6 +4010,10 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
}
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
+
+ } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
+ *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
+
} else {
rc = -EFAULT;
}
@@ -4010,10 +4022,11 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
}
/*
- * goya_debugfs_write32 - write a 32bit value to a given device address
+ * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
+ * address.
*
* @hdev: pointer to hl_device structure
- * @addr: address in device
+ * @addr: device or host mapped address
* @val: returned value
*
* In case of DDR address that is not mapped into the default aperture that
@@ -4054,6 +4067,10 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
}
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
+
+ } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
+ *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
+
} else {
rc = -EFAULT;
}
@@ -4086,6 +4103,47 @@ static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
static const char *_goya_get_event_desc(u16 event_type)
{
switch (event_type) {
+ case GOYA_ASYNC_EVENT_ID_PCIE_IF:
+ return "PCIe_if";
+ case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
+ return "TPC%d_ecc";
+ case GOYA_ASYNC_EVENT_ID_MME_ECC:
+ return "MME_ecc";
+ case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
+ return "MME_ecc_ext";
+ case GOYA_ASYNC_EVENT_ID_MMU_ECC:
+ return "MMU_ecc";
+ case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
+ return "DMA_macro";
+ case GOYA_ASYNC_EVENT_ID_DMA_ECC:
+ return "DMA_ecc";
+ case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
+ return "CPU_if_ecc";
+ case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
+ return "PSOC_mem";
+ case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
+ return "PSOC_coresight";
+ case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
+ return "SRAM%d";
+ case GOYA_ASYNC_EVENT_ID_GIC500:
+ return "GIC500";
+ case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
+ return "PLL%d";
+ case GOYA_ASYNC_EVENT_ID_AXI_ECC:
+ return "AXI_ecc";
+ case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
+ return "L2_ram_ecc";
+ case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
+ return "PSOC_gpio_05_sw_reset";
+ case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
+ return "PSOC_gpio_10_vrhot_icrit";
case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
return "PCIe_dec";
case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
@@ -4128,6 +4186,17 @@ static const char *_goya_get_event_desc(u16 event_type)
return "DMA%d_qm";
case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
return "DMA%d_ch";
+ case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
+ return "TPC%d_bmon_spmu";
+ case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
+ return "DMA_bm_ch%d";
default:
return "N/A";
}
@@ -4138,6 +4207,25 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
u8 index;
switch (event_type) {
+ case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
+ index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
+ snprintf(desc, size, _goya_get_event_desc(event_type), index);
+ break;
+ case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
+ index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
+ snprintf(desc, size, _goya_get_event_desc(event_type), index);
+ break;
+ case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
+ index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
+ snprintf(desc, size, _goya_get_event_desc(event_type), index);
+ break;
case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
@@ -4176,6 +4264,21 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
snprintf(desc, size, _goya_get_event_desc(event_type), index);
break;
+ case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
+ index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
+ snprintf(desc, size, _goya_get_event_desc(event_type), index);
+ break;
+ case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
+ index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
+ snprintf(desc, size, _goya_get_event_desc(event_type), index);
+ break;
default:
snprintf(desc, size, _goya_get_event_desc(event_type));
break;
@@ -4226,7 +4329,8 @@ static void goya_print_mmu_error_info(struct hl_device *hdev)
}
}
-static void goya_print_irq_info(struct hl_device *hdev, u16 event_type)
+static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
+ bool razwi)
{
char desc[20] = "";
@@ -4234,8 +4338,10 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type)
dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
event_type, desc);
- goya_print_razwi_info(hdev);
- goya_print_mmu_error_info(hdev);
+ if (razwi) {
+ goya_print_razwi_info(hdev);
+ goya_print_mmu_error_info(hdev);
+ }
}
static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
@@ -4339,19 +4445,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
case GOYA_ASYNC_EVENT_ID_GIC500:
- case GOYA_ASYNC_EVENT_ID_PLL0:
- case GOYA_ASYNC_EVENT_ID_PLL1:
- case GOYA_ASYNC_EVENT_ID_PLL3:
- case GOYA_ASYNC_EVENT_ID_PLL4:
- case GOYA_ASYNC_EVENT_ID_PLL5:
- case GOYA_ASYNC_EVENT_ID_PLL6:
+ case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
case GOYA_ASYNC_EVENT_ID_AXI_ECC:
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
- dev_err(hdev->dev,
- "Received H/W interrupt %d, reset the chip\n",
- event_type);
+ goya_print_irq_info(hdev, event_type, false);
hl_device_reset(hdev, true, false);
break;
@@ -4382,7 +4481,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
- goya_print_irq_info(hdev, event_type);
+ goya_print_irq_info(hdev, event_type, true);
goya_unmask_irq(hdev, event_type);
break;
@@ -4394,12 +4493,9 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
- case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0:
- case GOYA_ASYNC_EVENT_ID_DMA_BM_CH1:
- case GOYA_ASYNC_EVENT_ID_DMA_BM_CH2:
- case GOYA_ASYNC_EVENT_ID_DMA_BM_CH3:
- case GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
- dev_info(hdev->dev, "Received H/W interrupt %d\n", event_type);
+ case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
+ goya_print_irq_info(hdev, event_type, false);
+ goya_unmask_irq(hdev, event_type);
break;
default:
@@ -4418,36 +4514,47 @@ void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
return goya->events_stat;
}
-static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size,
+static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
u64 val, bool is_dram)
{
struct packet_lin_dma *lin_dma_pkt;
struct hl_cs_job *job;
u32 cb_size, ctl;
struct hl_cb *cb;
- int rc;
+ int rc, lin_dma_pkts_cnt;
- cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+ lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
+ cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
+ sizeof(struct packet_msg_prot);
+ cb = hl_cb_kernel_create(hdev, cb_size);
if (!cb)
- return -EFAULT;
+ return -ENOMEM;
lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
- memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
- cb_size = sizeof(*lin_dma_pkt);
-
- ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
- (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
- (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
- (1 << GOYA_PKT_CTL_RB_SHIFT) |
- (1 << GOYA_PKT_CTL_MB_SHIFT));
- ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
- GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
- lin_dma_pkt->ctl = cpu_to_le32(ctl);
+ do {
+ memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
+
+ ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
+ (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
+ (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
+ (1 << GOYA_PKT_CTL_RB_SHIFT) |
+ (1 << GOYA_PKT_CTL_MB_SHIFT));
+ ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
+ GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
+ lin_dma_pkt->ctl = cpu_to_le32(ctl);
+
+ lin_dma_pkt->src_addr = cpu_to_le64(val);
+ lin_dma_pkt->dst_addr = cpu_to_le64(addr);
+ if (lin_dma_pkts_cnt > 1)
+ lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
+ else
+ lin_dma_pkt->tsize = cpu_to_le32(size);
- lin_dma_pkt->src_addr = cpu_to_le64(val);
- lin_dma_pkt->dst_addr = cpu_to_le64(addr);
- lin_dma_pkt->tsize = cpu_to_le32(size);
+ size -= SZ_2G;
+ addr += SZ_2G;
+ lin_dma_pkt++;
+ } while (--lin_dma_pkts_cnt);
job = hl_cs_allocate_job(hdev, true);
if (!job) {
@@ -4462,8 +4569,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size,
job->user_cb_size = cb_size;
job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
job->patched_cb = job->user_cb;
- job->job_cb_size = job->user_cb_size +
- sizeof(struct packet_msg_prot) * 2;
+ job->job_cb_size = job->user_cb_size;
hl_debugfs_add_job(hdev, job);
@@ -4485,10 +4591,12 @@ release_cb:
int goya_context_switch(struct hl_device *hdev, u32 asid)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 addr = prop->sram_base_address;
+ u64 addr = prop->sram_base_address, sob_addr;
u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
u64 val = 0x7777777777777777ull;
- int rc;
+ int rc, dma_id;
+ u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
+ mmDMA_CH_0_WR_COMP_ADDR_LO;
rc = goya_memset_device_memory(hdev, addr, size, val, false);
if (rc) {
@@ -4496,13 +4604,27 @@ int goya_context_switch(struct hl_device *hdev, u32 asid)
return rc;
}
+ /* we need to reset registers that the user is allowed to change */
+ sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
+ WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
+
+ for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
+ sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
+ (dma_id - 1) * 4;
+ WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
+ lower_32_bits(sob_addr));
+ }
+
WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
+
goya_mmu_prepare(hdev, asid);
+ goya_clear_sm_regs(hdev);
+
return 0;
}
-int goya_mmu_clear_pgt_range(struct hl_device *hdev)
+static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct goya_device *goya = hdev->asic_specific;
@@ -4516,7 +4638,7 @@ int goya_mmu_clear_pgt_range(struct hl_device *hdev)
return goya_memset_device_memory(hdev, addr, size, 0, true);
}
-int goya_mmu_set_dram_default_page(struct hl_device *hdev)
+static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
@@ -4529,7 +4651,123 @@ int goya_mmu_set_dram_default_page(struct hl_device *hdev)
return goya_memset_device_memory(hdev, addr, size, val, true);
}
-void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
+static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct goya_device *goya = hdev->asic_specific;
+ s64 off, cpu_off;
+ int rc;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+ return 0;
+
+ for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
+ rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
+ prop->dram_base_address + off, PAGE_SIZE_2MB);
+ if (rc) {
+ dev_err(hdev->dev, "Map failed for address 0x%llx\n",
+ prop->dram_base_address + off);
+ goto unmap;
+ }
+ }
+
+ if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
+ rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
+ hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Map failed for CPU accessible memory\n");
+ off -= PAGE_SIZE_2MB;
+ goto unmap;
+ }
+ } else {
+ for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
+ rc = hl_mmu_map(hdev->kernel_ctx,
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
+ hdev->cpu_accessible_dma_address + cpu_off,
+ PAGE_SIZE_4KB);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Map failed for CPU accessible memory\n");
+ cpu_off -= PAGE_SIZE_4KB;
+ goto unmap_cpu;
+ }
+ }
+ }
+
+ goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
+ goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
+ WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
+ WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
+
+ /* Make sure configuration is flushed to device */
+ RREG32(mmCPU_IF_AWUSER_OVR_EN);
+
+ goya->device_cpu_mmu_mappings_done = true;
+
+ return 0;
+
+unmap_cpu:
+ for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
+ if (hl_mmu_unmap(hdev->kernel_ctx,
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
+ PAGE_SIZE_4KB))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap address 0x%llx\n",
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
+unmap:
+ for (; off >= 0 ; off -= PAGE_SIZE_2MB)
+ if (hl_mmu_unmap(hdev->kernel_ctx,
+ prop->dram_base_address + off, PAGE_SIZE_2MB))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap address 0x%llx\n",
+ prop->dram_base_address + off);
+
+ return rc;
+}
+
+void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct goya_device *goya = hdev->asic_specific;
+ u32 off, cpu_off;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+ return;
+
+ if (!goya->device_cpu_mmu_mappings_done)
+ return;
+
+ WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
+ WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
+
+ if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
+ if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
+ PAGE_SIZE_2MB))
+ dev_warn(hdev->dev,
+ "Failed to unmap CPU accessible memory\n");
+ } else {
+ for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
+ if (hl_mmu_unmap(hdev->kernel_ctx,
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
+ PAGE_SIZE_4KB))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap address 0x%llx\n",
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
+ }
+
+ for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
+ if (hl_mmu_unmap(hdev->kernel_ctx,
+ prop->dram_base_address + off, PAGE_SIZE_2MB))
+ dev_warn_ratelimited(hdev->dev,
+ "Failed to unmap address 0x%llx\n",
+ prop->dram_base_address + off);
+
+ goya->device_cpu_mmu_mappings_done = false;
+}
+
+static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
{
struct goya_device *goya = hdev->asic_specific;
int i;
@@ -4676,57 +4914,82 @@ int goya_armcp_info_get(struct hl_device *hdev)
return 0;
}
-static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size)
+static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
+ struct seq_file *s)
{
- u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg;
+ const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
+ const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
+ u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
+ mme_arch_sts;
+ bool is_idle = true, is_eng_idle;
+ u64 offset;
int i;
+ if (s)
+ seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
+ "--- ------- ------------ -------------\n");
+
offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
for (i = 0 ; i < DMA_MAX_NUM ; i++) {
- dma_qm_reg = mmDMA_QM_0_GLBL_STS0 + i * offset;
+ qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
+ dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
+ is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
+ IS_DMA_IDLE(dma_core_sts0);
+ is_idle &= is_eng_idle;
- if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) !=
- DMA_QM_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "DMA%d_QM", i);
+ if (mask)
+ *mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i);
+ if (s)
+ seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
+ qm_glbl_sts0, dma_core_sts0);
}
+ if (s)
+ seq_puts(s,
+ "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
+ "--- ------- ------------ -------------- ----------\n");
+
offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
for (i = 0 ; i < TPC_MAX_NUM ; i++) {
- tpc_qm_reg = mmTPC0_QM_GLBL_STS0 + i * offset;
- tpc_cmdq_reg = mmTPC0_CMDQ_GLBL_STS0 + i * offset;
- tpc_cfg_reg = mmTPC0_CFG_STATUS + i * offset;
-
- if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) !=
- TPC_QM_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "TPC%d_QM", i);
-
- if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) !=
- TPC_CMDQ_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "TPC%d_CMDQ", i);
-
- if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) !=
- TPC_CFG_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "TPC%d_CFG", i);
- }
-
- if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) !=
- MME_QM_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "MME_QM");
-
- if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) !=
- MME_CMDQ_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "MME_CMDQ");
-
- if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) !=
- MME_ARCH_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "MME_ARCH");
-
- if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "MME");
-
- return true;
+ qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
+ cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
+ tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
+ is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
+ IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
+ IS_TPC_IDLE(tpc_cfg_sts);
+ is_idle &= is_eng_idle;
+
+ if (mask)
+ *mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i);
+ if (s)
+ seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
+ qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
+ }
+
+ if (s)
+ seq_puts(s,
+ "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
+ "--- ------- ------------ -------------- -----------\n");
+
+ qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
+ cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
+ mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
+ is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
+ IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
+ IS_MME_IDLE(mme_arch_sts);
+ is_idle &= is_eng_idle;
+
+ if (mask)
+ *mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0;
+ if (s) {
+ seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
+ cmdq_glbl_sts0, mme_arch_sts);
+ seq_puts(s, "\n");
+ }
+
+ return is_idle;
}
static void goya_hw_queues_lock(struct hl_device *hdev)