From 9864bb4801331daa48514face9d0f4861e4d485b Mon Sep 17 00:00:00 2001 From: Li Li Date: Thu, 26 May 2022 15:00:18 -0700 Subject: Binder: add TF_UPDATE_TXN to replace outdated txn When the target process is busy, incoming oneway transactions are queued in the async_todo list. If the clients continue sending extra oneway transactions while the target process is frozen, this queue can become too large to accommodate new transactions. That's why binder driver introduced ONEWAY_SPAM_DETECTION to detect this situation. It's helpful to debug the async binder buffer exhausting issue, but the issue itself isn't solved directly. In real cases applications are designed to send oneway transactions repeatedly, delivering updated inforamtion to the target process. Typical examples are Wi-Fi signal strength and some real time sensor data. Even if the apps might only care about the lastet information, all outdated oneway transactions are still accumulated there until the frozen process is thawed later. For this kind of situations, there's no existing method to skip those outdated transactions and deliver the latest one only. This patch introduces a new transaction flag TF_UPDATE_TXN. To use it, use apps can set this new flag along with TF_ONE_WAY. When such an oneway transaction is to be queued into the async_todo list of a frozen process, binder driver will check if any previous pending transactions can be superseded by comparing their code, flags and target node. If such an outdated pending transaction is found, the latest transaction will supersede that outdated one. This effectively prevents the async binder buffer running out and saves unnecessary binder read workloads. Acked-by: Todd Kjos Signed-off-by: Li Li Link: https://lore.kernel.org/r/20220526220018.3334775-2-dualli@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/android/binder.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 986333cf5bbe..e72e4de8f452 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -287,6 +287,7 @@ enum transaction_flags { TF_STATUS_CODE = 0x08, /* contents are a 32-bit status code */ TF_ACCEPT_FDS = 0x10, /* allow replies with file descriptors */ TF_CLEAR_BUF = 0x20, /* clear buffer on txn complete */ + TF_UPDATE_TXN = 0x40, /* update the outdated pending async txn */ }; struct binder_transaction_data { -- cgit v1.2.3 From 2acd21cd00ce635adfec5a8725a0c342812bffb4 Mon Sep 17 00:00:00 2001 From: Dan Rapaport Date: Mon, 30 May 2022 14:11:45 +0300 Subject: habanalabs: align ioctl uapi structures to 64-bit The compiler is padding the members of the struct to be aligned to 64-bit. The content of the padded bytes is and not zeroed explicitly, hence might copy undefined data. We add a padding member to the struct to get a zeroed 64-bit align struct. Signed-off-by: Dan Rapaport Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 52540d5b4fc9..6d2ccc09dcf2 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -949,6 +949,7 @@ struct hl_cs_in { /* Context ID - Currently not in use */ __u32 ctx_id; + __u8 pad[4]; }; struct hl_cs_out { -- cgit v1.2.3 From a7d6c35bcd6b5389eb680daff5839339bd8206e0 Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Wed, 11 May 2022 18:02:39 +0300 Subject: habanalabs/gaudi: collect undefined opcode error info when an undefined opcode error occurres, the driver collects the relevant information from the Qman and stores it inside the hdev data structure. An event fd indication is sent towards the user space. Note: another commit shall be followed which will add support to read the error info by an ioctl. Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 13 +-- drivers/misc/habanalabs/common/habanalabs.h | 40 ++++++++- drivers/misc/habanalabs/common/habanalabs_drv.c | 1 + drivers/misc/habanalabs/gaudi/gaudi.c | 108 +++++++++++++++++++----- include/uapi/misc/habanalabs.h | 8 +- 5 files changed, 138 insertions(+), 32 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 38e1ad432e51..0f804ecb6caa 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1531,10 +1531,11 @@ out_err: return rc; } -static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event) +static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask) { mutex_lock(¬ifier_event->lock); - notifier_event->events_mask |= event; + notifier_event->events_mask |= event_mask; + if (notifier_event->eventfd) eventfd_signal(notifier_event->eventfd, 1); @@ -1545,17 +1546,17 @@ static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 * hl_notifier_event_send_all - notify all user processes via eventfd * * @hdev: pointer to habanalabs device structure - * @event: the occurred event + * @event_mask: the occurred event/s * Returns 0 for success or an error on failure. */ -void hl_notifier_event_send_all(struct hl_device *hdev, u64 event) +void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask) { struct hl_fpriv *hpriv; mutex_lock(&hdev->fpriv_list_lock); list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) - hl_notifier_event_send(&hpriv->notifier_event, event); + hl_notifier_event_send(&hpriv->notifier_event, event_mask); mutex_unlock(&hdev->fpriv_list_lock); @@ -1563,7 +1564,7 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event) mutex_lock(&hdev->fpriv_ctrl_list_lock); list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node) - hl_notifier_event_send(&hpriv->notifier_event, event); + hl_notifier_event_send(&hpriv->notifier_event, event_mask); mutex_unlock(&hdev->fpriv_ctrl_list_lock); } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 1ab64e8a05c6..3a0f6dca8361 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2644,14 +2644,48 @@ struct razwi_info { u8 type; }; +#define MAX_QMAN_STREAMS_INFO 4 +#define OPCODE_INFO_MAX_ADDR_SIZE 8 +/** + * struct undefined_opcode_info - info about last undefined opcode error + * @timestamp: timestamp of the undefined opcode error + * @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ + * entiers. In case all streams array entries are + * filled with values, it means the execution was in Lower-CP. + * @cq_addr: the address of the current handled command buffer + * @cq_size: the size of the current handled command buffer + * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array. + * should be equal to 1 incase of undefined opcode + * in Upper-CP (specific stream) and equal to 4 incase + * of undefined opcode in Lower-CP. + * @engine_id: engine-id that the error occurred on + * @stream_id: the stream id the error occurred on. In case the stream equals to + * MAX_QMAN_STREAMS_INFO it means the error occurred on a Lower-CP. + * @write_enable: if set, writing to undefined opcode parameters in the structure + * is enable so the first (root cause) undefined opcode will not be + * overwritten. + */ +struct undefined_opcode_info { + ktime_t timestamp; + u64 cb_addr_streams[MAX_QMAN_STREAMS_INFO][OPCODE_INFO_MAX_ADDR_SIZE]; + u64 cq_addr; + u32 cq_size; + u32 cb_addr_streams_len; + u32 engine_id; + u32 stream_id; + bool write_enable; +}; + /** * struct last_error_session_info - info about last session errors occurred. * @cs_timeout: CS timeout error last information. * @razwi: razwi last information. + * @undef_opcode: undefined opcode information */ struct last_error_session_info { - struct cs_timeout_info cs_timeout; - struct razwi_info razwi; + struct cs_timeout_info cs_timeout; + struct razwi_info razwi; + struct undefined_opcode_info undef_opcode; }; /** @@ -3159,7 +3193,7 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization); int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sensors_arr); -void hl_notifier_event_send_all(struct hl_device *hdev, u64 event); +void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask); int hl_sysfs_init(struct hl_device *hdev); void hl_sysfs_fini(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index e617cc394ff7..d02533666746 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -198,6 +198,7 @@ int hl_device_open(struct inode *inode, struct file *filp) atomic_set(&hdev->last_error.cs_timeout.write_enable, 1); atomic_set(&hdev->last_error.razwi.write_enable, 1); + hdev->last_error.undef_opcode.write_enable = true; hdev->open_counter++; hdev->last_successful_open_jif = jiffies; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 72b0d145e853..ec9f0a93cbe2 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -443,6 +443,38 @@ static s64 gaudi_state_dump_specs_props[] = { [SP_NUM_CORES] = 1, }; +static const int gaudi_queue_id_to_engine_id[] = { + [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0, + [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1, + [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE, + [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2, + [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3, + [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4, + [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5, + [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6, + [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7, + [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0, + [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_1, + [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0, + [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1, + [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2, + [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3, + [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4, + [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5, + [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6, + [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7, + [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0, + [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1, + [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2, + [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3, + [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4, + [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5, + [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6, + [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7, + [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8, + [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9, +}; + /* The order here is opposite to the order of the indexing in the h/w. * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc. */ @@ -6989,14 +7021,15 @@ static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len) } /** - * gaudi_print_sw_config_stream_data - print SW config stream data + * gaudi_handle_sw_config_stream_data - print SW config stream data * * @hdev: pointer to the habanalabs device structure * @stream: the QMAN's stream * @qman_base: base address of QMAN registers block + * @event_mask: mask of the last events occurred */ -static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream, - u64 qman_base) +static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream, + u64 qman_base, u64 event_mask) { u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; u32 cq_ptr_lo_off, size; @@ -7014,24 +7047,32 @@ static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream size = RREG32(cq_tsize); dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n", stream, cq_ptr, size); + + if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { + hdev->last_error.undef_opcode.cq_addr = cq_ptr; + hdev->last_error.undef_opcode.cq_size = size; + hdev->last_error.undef_opcode.stream_id = stream; + } } /** - * gaudi_print_last_pqes_on_err - print last PQEs on error + * gaudi_handle_last_pqes_on_err - print last PQEs on error * * @hdev: pointer to the habanalabs device structure * @qid_base: first QID of the QMAN (out of 4 streams) * @stream: the QMAN's stream * @qman_base: base address of QMAN registers block + * @event_mask: mask of the last events occurred * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) */ -static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, +static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base, + u64 event_mask, bool pr_sw_conf) { u32 ci, qm_ci_stream_off, queue_len; struct hl_hw_queue *q; - u64 pq_ci; + u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE]; int i; q = &hdev->kernel_queues[qid_base + stream]; @@ -7046,16 +7087,16 @@ static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, hdev->asic_funcs->hw_queues_lock(hdev); if (pr_sw_conf) - gaudi_print_sw_config_stream_data(hdev, stream, qman_base); + gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); ci = RREG32(pq_ci); /* we should start printing form ci -1 */ ci = gaudi_queue_idx_dec(ci, queue_len); + memset(addr, 0, sizeof(addr)); for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { struct hl_bd *bd; - u64 addr; u32 len; bd = q->kernel_address; @@ -7066,52 +7107,68 @@ static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, if (!len) break; - addr = le64_to_cpu(bd->ptr); + addr[i] = le64_to_cpu(bd->ptr); dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n", - stream, ci, addr, len); + stream, ci, addr[i], len); /* get previous ci, wrap if needed */ ci = gaudi_queue_idx_dec(ci, queue_len); } + if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { + struct undefined_opcode_info *undef_opcode = &hdev->last_error.undef_opcode; + u32 arr_idx = undef_opcode->cb_addr_streams_len; + + if (arr_idx == 0) { + undef_opcode->timestamp = ktime_get(); + undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base]; + } + + memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr)); + undef_opcode->cb_addr_streams_len++; + } + hdev->asic_funcs->hw_queues_unlock(hdev); } /** - * print_qman_data_on_err - extract QMAN data on error + * handle_qman_data_on_err - extract QMAN data on error * * @hdev: pointer to the habanalabs device structure * @qid_base: first QID of the QMAN (out of 4 streams) * @stream: the QMAN's stream * @qman_base: base address of QMAN registers block + * @event_mask: mask of the last events occurred * * This function attempt to exatract as much data as possible on QMAN error. * On upper CP print the SW config stream data and last 8 PQEs. * On lower CP print SW config data and last PQEs of ALL 4 upper CPs */ -static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, - u32 stream, u64 qman_base) +static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base, + u32 stream, u64 qman_base, u64 event_mask) { u32 i; if (stream != QMAN_STREAMS) { - gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, - true); + gaudi_handle_last_pqes_on_err(hdev, qid_base, stream, + qman_base, event_mask, true); return; } - gaudi_print_sw_config_stream_data(hdev, stream, qman_base); + /* handle Lower-CP */ + gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); for (i = 0; i < QMAN_STREAMS; i++) - gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base, - false); + gaudi_handle_last_pqes_on_err(hdev, qid_base, i, + qman_base, event_mask, false); } static void gaudi_handle_qman_err_generic(struct hl_device *hdev, const char *qm_name, u64 qman_base, - u32 qid_base) + u32 qid_base, + u64 *event_mask) { u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; u64 glbl_sts_addr, arb_err_addr; @@ -7142,12 +7199,21 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev, glbl_sts_clr_val |= BIT(j); } } + /* check for undefined opcode */ + if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && + hdev->last_error.undef_opcode.write_enable) { + memset(&hdev->last_error.undef_opcode, 0, + sizeof(hdev->last_error.undef_opcode)); + + hdev->last_error.undef_opcode.write_enable = false; + *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; + } /* Write 1 clear errors */ if (!hdev->stop_on_err) WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); else - print_qman_data_on_err(hdev, qid_base, i, qman_base); + handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask); } arb_err_val = RREG32(arb_err_addr); @@ -7385,7 +7451,7 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e return; } - gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base); + gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask); } static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 6d2ccc09dcf2..c94b89cf1ec1 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1402,9 +1402,13 @@ struct hl_debug_args { /* * Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command * - * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event + * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event + * HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code + * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset */ -#define HL_NOTIFIER_EVENT_TPC_ASSERT (1 << 0) +#define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) +#define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) +#define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2) /* * Various information operations such as: -- cgit v1.2.3 From 647469148360dc873405acc6fcf63772e9e401f4 Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Thu, 12 May 2022 11:59:41 +0300 Subject: habanalabs: expose undefined opcode status via info ioctl The info ioctl retrieves information on the last undefined opcode occurred. Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 25 +++++++++++++++++++ include/uapi/misc/habanalabs.h | 30 +++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index c7864d6bb0a1..fe7ed46cd1c5 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -610,6 +610,28 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args) return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } +static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + struct hl_info_undefined_opcode_event info = {0}; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + info.timestamp = ktime_to_ns(hdev->last_error.undef_opcode.timestamp); + info.engine_id = hdev->last_error.undef_opcode.engine_id; + info.cq_addr = hdev->last_error.undef_opcode.cq_addr; + info.cq_size = hdev->last_error.undef_opcode.cq_size; + info.stream_id = hdev->last_error.undef_opcode.stream_id; + info.cb_addr_streams_len = hdev->last_error.undef_opcode.cb_addr_streams_len; + memcpy(info.cb_addr_streams, hdev->last_error.undef_opcode.cb_addr_streams, + sizeof(info.cb_addr_streams)); + + return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; +} + static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { void __user *out = (void __user *) (uintptr_t) args->return_pointer; @@ -718,6 +740,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_RAZWI_EVENT: return razwi_info(hpriv, args); + case HL_INFO_UNDEFINED_OPCODE_EVENT: + return undefined_opcode_info(hpriv, args); + case HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES: return dev_mem_alloc_page_sizes_info(hpriv, args); diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index c94b89cf1ec1..5f9a6097f5f3 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -352,6 +352,7 @@ enum hl_server_type { * HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications. * HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd * HL_INFO_GET_EVENTS - Retrieve the last occurred events + * HL_INFO_UNDEFINED_OPCODE_EVENT - Retrieve last undefined opcode error information. */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -380,6 +381,7 @@ enum hl_server_type { #define HL_INFO_REGISTER_EVENTFD 28 #define HL_INFO_UNREGISTER_EVENTFD 29 #define HL_INFO_GET_EVENTS 30 +#define HL_INFO_UNDEFINED_OPCODE_EVENT 31 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -656,6 +658,34 @@ struct hl_info_razwi_event { __u8 pad[2]; }; +#define MAX_QMAN_STREAMS_INFO 4 +#define OPCODE_INFO_MAX_ADDR_SIZE 8 +/** + * struct hl_info_undefined_opcode_event - info about last undefined opcode error + * @timestamp: timestamp of the undefined opcode error + * @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ + * entiers. In case all streams array entries are + * filled with values, it means the execution was in Lower-CP. + * @cq_addr: the address of the current handled command buffer + * @cq_size: the size of the current handled command buffer + * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array. + * should be equal to 1 incase of undefined opcode + * in Upper-CP (specific stream) and equal to 4 incase + * of undefined opcode in Lower-CP. + * @engine_id: engine-id that the error occurred on + * @stream_id: the stream id the error occurred on. In case the stream equals to + * MAX_QMAN_STREAMS_INFO it means the error occurred on a Lower-CP. + */ +struct hl_info_undefined_opcode_event { + __s64 timestamp; + __u64 cb_addr_streams[MAX_QMAN_STREAMS_INFO][OPCODE_INFO_MAX_ADDR_SIZE]; + __u64 cq_addr; + __u32 cq_size; + __u32 cb_addr_streams_len; + __u32 engine_id; + __u32 stream_id; +}; + /** * struct hl_info_dev_memalloc_page_sizes - valid page sizes in device mem alloc information. * @page_order_bitmask: bitmap in which a set bit represents the order of the supported page size -- cgit v1.2.3 From fa9deaca2f9123b1749ea81bbeb12778645af580 Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Thu, 19 May 2022 18:00:55 +0300 Subject: habanalabs: send an event notification when CS timeout occurs The Driver needs to inform the User process whenever one of its CS is timed out. The Driver shall recognize the CS timeout and shall send an eventfd notification, towards user space, whenever a timeout is expired on a CS. Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/command_submission.c | 26 ++++++++++++++-------- include/uapi/misc/habanalabs.h | 2 ++ 2 files changed, 19 insertions(+), 9 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 47b49cbf67ab..cbb7c29966ff 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -797,10 +797,11 @@ out: static void cs_timedout(struct work_struct *work) { struct hl_device *hdev; + u64 event_mask; int rc; struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work); - bool skip_reset_on_timeout = cs->skip_reset_on_timeout; + bool skip_reset_on_timeout = cs->skip_reset_on_timeout, device_reset = false; rc = cs_get_unless_zero(cs); if (!rc) @@ -811,9 +812,15 @@ static void cs_timedout(struct work_struct *work) return; } - /* Mark the CS is timed out so we won't try to cancel its TDR */ - if (likely(!skip_reset_on_timeout)) + if (likely(!skip_reset_on_timeout)) { + if (hdev->reset_on_lockup) + device_reset = true; + else + hdev->reset_info.needs_reset = true; + + /* Mark the CS is timed out so we won't try to cancel its TDR */ cs->timedout = true; + } hdev = cs->ctx->hdev; @@ -822,6 +829,11 @@ static void cs_timedout(struct work_struct *work) if (rc) { hdev->last_error.cs_timeout.timestamp = ktime_get(); hdev->last_error.cs_timeout.seq = cs->sequence; + + event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT | + HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT; + + hl_notifier_event_send_all(hdev, event_mask); } switch (cs->type) { @@ -856,12 +868,8 @@ static void cs_timedout(struct work_struct *work) cs_put(cs); - if (likely(!skip_reset_on_timeout)) { - if (hdev->reset_on_lockup) - hl_device_reset(hdev, HL_DRV_RESET_TDR); - else - hdev->reset_info.needs_reset = true; - } + if (device_reset) + hl_device_reset(hdev, HL_DRV_RESET_TDR); } static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 5f9a6097f5f3..18f86d259421 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1435,10 +1435,12 @@ struct hl_debug_args { * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event * HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset + * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error */ #define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) #define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) #define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2) +#define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3) /* * Various information operations such as: -- cgit v1.2.3 From 67a54d5de2c348562b745c5029daa9e5207514c9 Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Thu, 9 Jun 2022 18:08:31 +0300 Subject: habanalabs/gaudi: notify user process on device unavailable When a device error occurs, user process would like to get some indication on the error by reading some device HW info. If the device is unavailable, user process can't perform any HW device reading. Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 5 ++++- include/uapi/misc/habanalabs.h | 12 +++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 1156ec7dacc1..939d2636b9ed 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -8063,7 +8063,10 @@ reset_device: if (hdev->asic_prop.fw_security_enabled && !reset_direct) { flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag; - event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; + + /* notify on device unavailable while the reset triggered by fw */ + event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | + HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); } else if (hdev->hard_reset_on_fw_events) { flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag; event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 18f86d259421..78aecea4684d 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1432,15 +1432,17 @@ struct hl_debug_args { /* * Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command * - * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event - * HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code - * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset - * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error + * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event + * HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code + * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset + * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error + * HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable */ #define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) #define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) #define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2) -#define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3) +#define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3) +#define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4) /* * Various information operations such as: -- cgit v1.2.3 From 5125aa3368892dd9dd8bca3d64e88b11bc3490a4 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Fri, 24 Jun 2022 13:36:10 +0300 Subject: habanalabs/goya: move dma direction enum to uapi file The values in this enum are not used by h/w but are a contract between userspace and the kernel driver so they must be defined in the uapi file. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 28 +++++++++++----------- .../misc/habanalabs/include/goya/goya_packets.h | 12 ---------- include/uapi/misc/habanalabs.h | 27 +++++++++++++++++++++ 3 files changed, 41 insertions(+), 26 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 25b1e3e139e8..411a4be09aa6 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -3403,7 +3403,7 @@ static int goya_validate_dma_pkt_host(struct hl_device *hdev, { u64 device_memory_addr, addr; enum dma_data_direction dir; - enum goya_dma_direction user_dir; + enum hl_goya_dma_direction user_dir; bool sram_addr = true; bool skip_host_mem_pin = false; bool user_memset; @@ -3419,7 +3419,7 @@ static int goya_validate_dma_pkt_host(struct hl_device *hdev, GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT; switch (user_dir) { - case DMA_HOST_TO_DRAM: + case HL_DMA_HOST_TO_DRAM: dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n"); dir = DMA_TO_DEVICE; sram_addr = false; @@ -3429,7 +3429,7 @@ static int goya_validate_dma_pkt_host(struct hl_device *hdev, skip_host_mem_pin = true; break; - case DMA_DRAM_TO_HOST: + case HL_DMA_DRAM_TO_HOST: dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n"); dir = DMA_FROM_DEVICE; sram_addr = false; @@ -3437,7 +3437,7 @@ static int goya_validate_dma_pkt_host(struct hl_device *hdev, device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); break; - case DMA_HOST_TO_SRAM: + case HL_DMA_HOST_TO_SRAM: dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n"); dir = DMA_TO_DEVICE; addr = le64_to_cpu(user_dma_pkt->src_addr); @@ -3446,14 +3446,14 @@ static int goya_validate_dma_pkt_host(struct hl_device *hdev, skip_host_mem_pin = true; break; - case DMA_SRAM_TO_HOST: + case HL_DMA_SRAM_TO_HOST: dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n"); dir = DMA_FROM_DEVICE; addr = le64_to_cpu(user_dma_pkt->dst_addr); device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); break; default: - dev_err(hdev->dev, "DMA direction is undefined\n"); + dev_err(hdev->dev, "DMA direction %d is unsupported/undefined\n", user_dir); return -EFAULT; } @@ -3505,14 +3505,14 @@ static int goya_validate_dma_pkt_no_host(struct hl_device *hdev, struct packet_lin_dma *user_dma_pkt) { u64 sram_memory_addr, dram_memory_addr; - enum goya_dma_direction user_dir; + enum hl_goya_dma_direction user_dir; u32 ctl; ctl = le32_to_cpu(user_dma_pkt->ctl); user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >> GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; - if (user_dir == DMA_DRAM_TO_SRAM) { + if (user_dir == HL_DMA_DRAM_TO_SRAM) { dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n"); dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); @@ -3549,7 +3549,7 @@ static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev, struct hl_cs_parser *parser, struct packet_lin_dma *user_dma_pkt) { - enum goya_dma_direction user_dir; + enum hl_goya_dma_direction user_dir; u32 ctl; int rc; @@ -3574,7 +3574,7 @@ static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev, return -EINVAL; } - if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM)) + if ((user_dir == HL_DMA_DRAM_TO_SRAM) || (user_dir == HL_DMA_SRAM_TO_DRAM)) rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt); else rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt); @@ -3781,7 +3781,7 @@ static int goya_patch_dma_packet(struct hl_device *hdev, u32 count, dma_desc_cnt; u64 len, len_next; dma_addr_t dma_addr, dma_addr_next; - enum goya_dma_direction user_dir; + enum hl_goya_dma_direction user_dir; u64 device_memory_addr, addr; enum dma_data_direction dir; struct sg_table *sgt; @@ -3797,14 +3797,14 @@ static int goya_patch_dma_packet(struct hl_device *hdev, user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >> GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT; - if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) || + if ((user_dir == HL_DMA_DRAM_TO_SRAM) || (user_dir == HL_DMA_SRAM_TO_DRAM) || (user_dma_pkt->tsize == 0)) { memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt)); *new_dma_pkt_size = sizeof(*new_dma_pkt); return 0; } - if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) { + if ((user_dir == HL_DMA_HOST_TO_DRAM) || (user_dir == HL_DMA_HOST_TO_SRAM)) { addr = le64_to_cpu(user_dma_pkt->src_addr); device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); dir = DMA_TO_DEVICE; @@ -4804,7 +4804,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) | (1 << GOYA_PKT_CTL_RB_SHIFT) | (1 << GOYA_PKT_CTL_MB_SHIFT)); - ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) << + ctl |= (is_dram ? HL_DMA_HOST_TO_DRAM : HL_DMA_HOST_TO_SRAM) << GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT; lin_dma_pkt->ctl = cpu_to_le32(ctl); diff --git a/drivers/misc/habanalabs/include/goya/goya_packets.h b/drivers/misc/habanalabs/include/goya/goya_packets.h index 50ce5175b63a..896799204fb0 100644 --- a/drivers/misc/habanalabs/include/goya/goya_packets.h +++ b/drivers/misc/habanalabs/include/goya/goya_packets.h @@ -28,18 +28,6 @@ enum packet_id { PACKET_HEADER_PACKET_ID_SHIFT) + 1 }; -enum goya_dma_direction { - DMA_HOST_TO_DRAM, - DMA_HOST_TO_SRAM, - DMA_DRAM_TO_SRAM, - DMA_SRAM_TO_DRAM, - DMA_SRAM_TO_HOST, - DMA_DRAM_TO_HOST, - DMA_DRAM_TO_DRAM, - DMA_SRAM_TO_SRAM, - DMA_ENUM_MAX -}; - #define GOYA_PKT_CTL_OPCODE_SHIFT 24 #define GOYA_PKT_CTL_OPCODE_MASK 0x1F000000 diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 78aecea4684d..41a0fa345e4e 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -275,6 +275,33 @@ enum hl_gaudi_pll_index { HL_GAUDI_PLL_MAX }; +/** + * enum hl_goya_dma_direction - Direction of DMA operation inside a LIN_DMA packet that is + * submitted to the GOYA's DMA QMAN. This attribute is not relevant + * to the H/W but the kernel driver use it to parse the packet's + * addresses and patch/validate them. + * @HL_DMA_HOST_TO_DRAM: DMA operation from Host memory to GOYA's DDR. + * @HL_DMA_HOST_TO_SRAM: DMA operation from Host memory to GOYA's SRAM. + * @HL_DMA_DRAM_TO_SRAM: DMA operation from GOYA's DDR to GOYA's SRAM. + * @HL_DMA_SRAM_TO_DRAM: DMA operation from GOYA's SRAM to GOYA's DDR. + * @HL_DMA_SRAM_TO_HOST: DMA operation from GOYA's SRAM to Host memory. + * @HL_DMA_DRAM_TO_HOST: DMA operation from GOYA's DDR to Host memory. + * @HL_DMA_DRAM_TO_DRAM: DMA operation from GOYA's DDR to GOYA's DDR. + * @HL_DMA_SRAM_TO_SRAM: DMA operation from GOYA's SRAM to GOYA's SRAM. + * @HL_DMA_ENUM_MAX: number of values in enum + */ +enum hl_goya_dma_direction { + HL_DMA_HOST_TO_DRAM, + HL_DMA_HOST_TO_SRAM, + HL_DMA_DRAM_TO_SRAM, + HL_DMA_SRAM_TO_DRAM, + HL_DMA_SRAM_TO_HOST, + HL_DMA_DRAM_TO_HOST, + HL_DMA_DRAM_TO_DRAM, + HL_DMA_SRAM_TO_SRAM, + HL_DMA_ENUM_MAX +}; + /** * enum hl_device_status - Device status information. * @HL_DEVICE_STATUS_OPERATIONAL: Device is operational. -- cgit v1.2.3 From 97c6d22fa4bd54cccff39e862893327eef1bbfa8 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Fri, 24 Jun 2022 13:38:57 +0300 Subject: uapi: habanalabs: add gaudi2 defines Add the new defines for GAUDI2 uapi interface. It includes the following: 1. Enums of engines and PLLs. 2. New information in the info IOCTL that is retrieved by the driver. 3. Update comments regarding the CB/CS/wait for CS ioctls. 4. New fields in the debug IOCTL for configuring the profiler for Gaudi2. There is no new IOCTL. Some of the changes are also relevant for Greco (which will be upstreamed later this year). When ever it says "Greco and onwards", it means it is also for Gaudi2. Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 455 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 435 insertions(+), 20 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 41a0fa345e4e..77b89c537ee8 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -184,6 +184,285 @@ enum gaudi_queue_id { GAUDI_QUEUE_ID_SIZE }; +/* + * In GAUDI2 we have two modes of operation in regard to queues: + * 1. Legacy mode, where each QMAN exposes 4 streams to the user + * 2. F/W mode, where we use F/W to schedule the JOBS to the different queues. + * + * When in legacy mode, the user sends the queue id per JOB according to + * enum gaudi2_queue_id below. + * + * When in F/W mode, the user sends a stream id per Command Submission. The + * stream id is a running number from 0 up to (N-1), where N is the number + * of streams the F/W exposes and is passed to the user in + * struct hl_info_hw_ip_info + */ + +enum gaudi2_queue_id { + GAUDI2_QUEUE_ID_PDMA_0_0 = 0, + GAUDI2_QUEUE_ID_PDMA_0_1 = 1, + GAUDI2_QUEUE_ID_PDMA_0_2 = 2, + GAUDI2_QUEUE_ID_PDMA_0_3 = 3, + GAUDI2_QUEUE_ID_PDMA_1_0 = 4, + GAUDI2_QUEUE_ID_PDMA_1_1 = 5, + GAUDI2_QUEUE_ID_PDMA_1_2 = 6, + GAUDI2_QUEUE_ID_PDMA_1_3 = 7, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0 = 8, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1 = 9, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2 = 10, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3 = 11, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0 = 12, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1 = 13, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2 = 14, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3 = 15, + GAUDI2_QUEUE_ID_DCORE0_MME_0_0 = 16, + GAUDI2_QUEUE_ID_DCORE0_MME_0_1 = 17, + GAUDI2_QUEUE_ID_DCORE0_MME_0_2 = 18, + GAUDI2_QUEUE_ID_DCORE0_MME_0_3 = 19, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 = 20, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_1 = 21, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_2 = 22, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_3 = 23, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_0 = 24, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_1 = 25, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_2 = 26, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_3 = 27, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_0 = 28, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_1 = 29, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_2 = 30, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_3 = 31, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_0 = 32, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_1 = 33, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_2 = 34, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_3 = 35, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_0 = 36, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_1 = 37, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_2 = 38, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_3 = 39, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_0 = 40, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_1 = 41, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_2 = 42, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_3 = 43, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 = 44, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_1 = 45, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_2 = 46, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_3 = 47, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0 = 48, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1 = 49, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2 = 50, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3 = 51, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0 = 52, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1 = 53, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2 = 54, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3 = 55, + GAUDI2_QUEUE_ID_DCORE1_MME_0_0 = 56, + GAUDI2_QUEUE_ID_DCORE1_MME_0_1 = 57, + GAUDI2_QUEUE_ID_DCORE1_MME_0_2 = 58, + GAUDI2_QUEUE_ID_DCORE1_MME_0_3 = 59, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 = 60, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_1 = 61, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_2 = 62, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_3 = 63, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_0 = 64, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_1 = 65, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_2 = 66, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_3 = 67, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_0 = 68, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_1 = 69, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_2 = 70, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_3 = 71, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_0 = 72, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_1 = 73, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_2 = 74, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_3 = 75, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_0 = 76, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_1 = 77, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_2 = 78, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_3 = 79, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_0 = 80, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_1 = 81, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_2 = 82, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_3 = 83, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0 = 84, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1 = 85, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2 = 86, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3 = 87, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0 = 88, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1 = 89, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2 = 90, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3 = 91, + GAUDI2_QUEUE_ID_DCORE2_MME_0_0 = 92, + GAUDI2_QUEUE_ID_DCORE2_MME_0_1 = 93, + GAUDI2_QUEUE_ID_DCORE2_MME_0_2 = 94, + GAUDI2_QUEUE_ID_DCORE2_MME_0_3 = 95, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 = 96, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_1 = 97, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_2 = 98, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_3 = 99, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_0 = 100, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_1 = 101, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_2 = 102, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_3 = 103, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_0 = 104, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_1 = 105, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_2 = 106, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_3 = 107, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_0 = 108, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_1 = 109, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_2 = 110, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_3 = 111, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_0 = 112, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_1 = 113, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_2 = 114, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_3 = 115, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_0 = 116, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_1 = 117, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_2 = 118, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_3 = 119, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0 = 120, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1 = 121, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2 = 122, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3 = 123, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0 = 124, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1 = 125, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2 = 126, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3 = 127, + GAUDI2_QUEUE_ID_DCORE3_MME_0_0 = 128, + GAUDI2_QUEUE_ID_DCORE3_MME_0_1 = 129, + GAUDI2_QUEUE_ID_DCORE3_MME_0_2 = 130, + GAUDI2_QUEUE_ID_DCORE3_MME_0_3 = 131, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 = 132, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_1 = 133, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_2 = 134, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_3 = 135, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_0 = 136, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_1 = 137, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_2 = 138, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_3 = 139, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_0 = 140, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_1 = 141, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_2 = 142, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_3 = 143, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_0 = 144, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_1 = 145, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_2 = 146, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_3 = 147, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_0 = 148, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_1 = 149, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_2 = 150, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_3 = 151, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_0 = 152, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_1 = 153, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_2 = 154, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_3 = 155, + GAUDI2_QUEUE_ID_NIC_0_0 = 156, + GAUDI2_QUEUE_ID_NIC_0_1 = 157, + GAUDI2_QUEUE_ID_NIC_0_2 = 158, + GAUDI2_QUEUE_ID_NIC_0_3 = 159, + GAUDI2_QUEUE_ID_NIC_1_0 = 160, + GAUDI2_QUEUE_ID_NIC_1_1 = 161, + GAUDI2_QUEUE_ID_NIC_1_2 = 162, + GAUDI2_QUEUE_ID_NIC_1_3 = 163, + GAUDI2_QUEUE_ID_NIC_2_0 = 164, + GAUDI2_QUEUE_ID_NIC_2_1 = 165, + GAUDI2_QUEUE_ID_NIC_2_2 = 166, + GAUDI2_QUEUE_ID_NIC_2_3 = 167, + GAUDI2_QUEUE_ID_NIC_3_0 = 168, + GAUDI2_QUEUE_ID_NIC_3_1 = 169, + GAUDI2_QUEUE_ID_NIC_3_2 = 170, + GAUDI2_QUEUE_ID_NIC_3_3 = 171, + GAUDI2_QUEUE_ID_NIC_4_0 = 172, + GAUDI2_QUEUE_ID_NIC_4_1 = 173, + GAUDI2_QUEUE_ID_NIC_4_2 = 174, + GAUDI2_QUEUE_ID_NIC_4_3 = 175, + GAUDI2_QUEUE_ID_NIC_5_0 = 176, + GAUDI2_QUEUE_ID_NIC_5_1 = 177, + GAUDI2_QUEUE_ID_NIC_5_2 = 178, + GAUDI2_QUEUE_ID_NIC_5_3 = 179, + GAUDI2_QUEUE_ID_NIC_6_0 = 180, + GAUDI2_QUEUE_ID_NIC_6_1 = 181, + GAUDI2_QUEUE_ID_NIC_6_2 = 182, + GAUDI2_QUEUE_ID_NIC_6_3 = 183, + GAUDI2_QUEUE_ID_NIC_7_0 = 184, + GAUDI2_QUEUE_ID_NIC_7_1 = 185, + GAUDI2_QUEUE_ID_NIC_7_2 = 186, + GAUDI2_QUEUE_ID_NIC_7_3 = 187, + GAUDI2_QUEUE_ID_NIC_8_0 = 188, + GAUDI2_QUEUE_ID_NIC_8_1 = 189, + GAUDI2_QUEUE_ID_NIC_8_2 = 190, + GAUDI2_QUEUE_ID_NIC_8_3 = 191, + GAUDI2_QUEUE_ID_NIC_9_0 = 192, + GAUDI2_QUEUE_ID_NIC_9_1 = 193, + GAUDI2_QUEUE_ID_NIC_9_2 = 194, + GAUDI2_QUEUE_ID_NIC_9_3 = 195, + GAUDI2_QUEUE_ID_NIC_10_0 = 196, + GAUDI2_QUEUE_ID_NIC_10_1 = 197, + GAUDI2_QUEUE_ID_NIC_10_2 = 198, + GAUDI2_QUEUE_ID_NIC_10_3 = 199, + GAUDI2_QUEUE_ID_NIC_11_0 = 200, + GAUDI2_QUEUE_ID_NIC_11_1 = 201, + GAUDI2_QUEUE_ID_NIC_11_2 = 202, + GAUDI2_QUEUE_ID_NIC_11_3 = 203, + GAUDI2_QUEUE_ID_NIC_12_0 = 204, + GAUDI2_QUEUE_ID_NIC_12_1 = 205, + GAUDI2_QUEUE_ID_NIC_12_2 = 206, + GAUDI2_QUEUE_ID_NIC_12_3 = 207, + GAUDI2_QUEUE_ID_NIC_13_0 = 208, + GAUDI2_QUEUE_ID_NIC_13_1 = 209, + GAUDI2_QUEUE_ID_NIC_13_2 = 210, + GAUDI2_QUEUE_ID_NIC_13_3 = 211, + GAUDI2_QUEUE_ID_NIC_14_0 = 212, + GAUDI2_QUEUE_ID_NIC_14_1 = 213, + GAUDI2_QUEUE_ID_NIC_14_2 = 214, + GAUDI2_QUEUE_ID_NIC_14_3 = 215, + GAUDI2_QUEUE_ID_NIC_15_0 = 216, + GAUDI2_QUEUE_ID_NIC_15_1 = 217, + GAUDI2_QUEUE_ID_NIC_15_2 = 218, + GAUDI2_QUEUE_ID_NIC_15_3 = 219, + GAUDI2_QUEUE_ID_NIC_16_0 = 220, + GAUDI2_QUEUE_ID_NIC_16_1 = 221, + GAUDI2_QUEUE_ID_NIC_16_2 = 222, + GAUDI2_QUEUE_ID_NIC_16_3 = 223, + GAUDI2_QUEUE_ID_NIC_17_0 = 224, + GAUDI2_QUEUE_ID_NIC_17_1 = 225, + GAUDI2_QUEUE_ID_NIC_17_2 = 226, + GAUDI2_QUEUE_ID_NIC_17_3 = 227, + GAUDI2_QUEUE_ID_NIC_18_0 = 228, + GAUDI2_QUEUE_ID_NIC_18_1 = 229, + GAUDI2_QUEUE_ID_NIC_18_2 = 230, + GAUDI2_QUEUE_ID_NIC_18_3 = 231, + GAUDI2_QUEUE_ID_NIC_19_0 = 232, + GAUDI2_QUEUE_ID_NIC_19_1 = 233, + GAUDI2_QUEUE_ID_NIC_19_2 = 234, + GAUDI2_QUEUE_ID_NIC_19_3 = 235, + GAUDI2_QUEUE_ID_NIC_20_0 = 236, + GAUDI2_QUEUE_ID_NIC_20_1 = 237, + GAUDI2_QUEUE_ID_NIC_20_2 = 238, + GAUDI2_QUEUE_ID_NIC_20_3 = 239, + GAUDI2_QUEUE_ID_NIC_21_0 = 240, + GAUDI2_QUEUE_ID_NIC_21_1 = 241, + GAUDI2_QUEUE_ID_NIC_21_2 = 242, + GAUDI2_QUEUE_ID_NIC_21_3 = 243, + GAUDI2_QUEUE_ID_NIC_22_0 = 244, + GAUDI2_QUEUE_ID_NIC_22_1 = 245, + GAUDI2_QUEUE_ID_NIC_22_2 = 246, + GAUDI2_QUEUE_ID_NIC_22_3 = 247, + GAUDI2_QUEUE_ID_NIC_23_0 = 248, + GAUDI2_QUEUE_ID_NIC_23_1 = 249, + GAUDI2_QUEUE_ID_NIC_23_2 = 250, + GAUDI2_QUEUE_ID_NIC_23_3 = 251, + GAUDI2_QUEUE_ID_ROT_0_0 = 252, + GAUDI2_QUEUE_ID_ROT_0_1 = 253, + GAUDI2_QUEUE_ID_ROT_0_2 = 254, + GAUDI2_QUEUE_ID_ROT_0_3 = 255, + GAUDI2_QUEUE_ID_ROT_1_0 = 256, + GAUDI2_QUEUE_ID_ROT_1_1 = 257, + GAUDI2_QUEUE_ID_ROT_1_2 = 258, + GAUDI2_QUEUE_ID_ROT_1_3 = 259, + GAUDI2_QUEUE_ID_CPU_PQ = 260, + GAUDI2_QUEUE_ID_SIZE +}; + /* * Engine Numbering * @@ -242,6 +521,85 @@ enum gaudi_engine_id { GAUDI_ENGINE_ID_SIZE }; +enum gaudi2_engine_id { + GAUDI2_DCORE0_ENGINE_ID_EDMA_0 = 0, + GAUDI2_DCORE0_ENGINE_ID_EDMA_1, + GAUDI2_DCORE0_ENGINE_ID_MME, + GAUDI2_DCORE0_ENGINE_ID_TPC_0, + GAUDI2_DCORE0_ENGINE_ID_TPC_1, + GAUDI2_DCORE0_ENGINE_ID_TPC_2, + GAUDI2_DCORE0_ENGINE_ID_TPC_3, + GAUDI2_DCORE0_ENGINE_ID_TPC_4, + GAUDI2_DCORE0_ENGINE_ID_TPC_5, + GAUDI2_DCORE0_ENGINE_ID_DEC_0, + GAUDI2_DCORE0_ENGINE_ID_DEC_1, + GAUDI2_DCORE1_ENGINE_ID_EDMA_0, + GAUDI2_DCORE1_ENGINE_ID_EDMA_1, + GAUDI2_DCORE1_ENGINE_ID_MME, + GAUDI2_DCORE1_ENGINE_ID_TPC_0, + GAUDI2_DCORE1_ENGINE_ID_TPC_1, + GAUDI2_DCORE1_ENGINE_ID_TPC_2, + GAUDI2_DCORE1_ENGINE_ID_TPC_3, + GAUDI2_DCORE1_ENGINE_ID_TPC_4, + GAUDI2_DCORE1_ENGINE_ID_TPC_5, + GAUDI2_DCORE1_ENGINE_ID_DEC_0, + GAUDI2_DCORE1_ENGINE_ID_DEC_1, + GAUDI2_DCORE2_ENGINE_ID_EDMA_0, + GAUDI2_DCORE2_ENGINE_ID_EDMA_1, + GAUDI2_DCORE2_ENGINE_ID_MME, + GAUDI2_DCORE2_ENGINE_ID_TPC_0, + GAUDI2_DCORE2_ENGINE_ID_TPC_1, + GAUDI2_DCORE2_ENGINE_ID_TPC_2, + GAUDI2_DCORE2_ENGINE_ID_TPC_3, + GAUDI2_DCORE2_ENGINE_ID_TPC_4, + GAUDI2_DCORE2_ENGINE_ID_TPC_5, + GAUDI2_DCORE2_ENGINE_ID_DEC_0, + GAUDI2_DCORE2_ENGINE_ID_DEC_1, + GAUDI2_DCORE3_ENGINE_ID_EDMA_0, + GAUDI2_DCORE3_ENGINE_ID_EDMA_1, + GAUDI2_DCORE3_ENGINE_ID_MME, + GAUDI2_DCORE3_ENGINE_ID_TPC_0, + GAUDI2_DCORE3_ENGINE_ID_TPC_1, + GAUDI2_DCORE3_ENGINE_ID_TPC_2, + GAUDI2_DCORE3_ENGINE_ID_TPC_3, + GAUDI2_DCORE3_ENGINE_ID_TPC_4, + GAUDI2_DCORE3_ENGINE_ID_TPC_5, + GAUDI2_DCORE3_ENGINE_ID_DEC_0, + GAUDI2_DCORE3_ENGINE_ID_DEC_1, + GAUDI2_DCORE0_ENGINE_ID_TPC_6, + GAUDI2_ENGINE_ID_PDMA_0, + GAUDI2_ENGINE_ID_PDMA_1, + GAUDI2_ENGINE_ID_ROT_0, + GAUDI2_ENGINE_ID_ROT_1, + GAUDI2_PCIE_ENGINE_ID_DEC_0, + GAUDI2_PCIE_ENGINE_ID_DEC_1, + GAUDI2_ENGINE_ID_NIC0_0, + GAUDI2_ENGINE_ID_NIC0_1, + GAUDI2_ENGINE_ID_NIC1_0, + GAUDI2_ENGINE_ID_NIC1_1, + GAUDI2_ENGINE_ID_NIC2_0, + GAUDI2_ENGINE_ID_NIC2_1, + GAUDI2_ENGINE_ID_NIC3_0, + GAUDI2_ENGINE_ID_NIC3_1, + GAUDI2_ENGINE_ID_NIC4_0, + GAUDI2_ENGINE_ID_NIC4_1, + GAUDI2_ENGINE_ID_NIC5_0, + GAUDI2_ENGINE_ID_NIC5_1, + GAUDI2_ENGINE_ID_NIC6_0, + GAUDI2_ENGINE_ID_NIC6_1, + GAUDI2_ENGINE_ID_NIC7_0, + GAUDI2_ENGINE_ID_NIC7_1, + GAUDI2_ENGINE_ID_NIC8_0, + GAUDI2_ENGINE_ID_NIC8_1, + GAUDI2_ENGINE_ID_NIC9_0, + GAUDI2_ENGINE_ID_NIC9_1, + GAUDI2_ENGINE_ID_NIC10_0, + GAUDI2_ENGINE_ID_NIC10_1, + GAUDI2_ENGINE_ID_NIC11_0, + GAUDI2_ENGINE_ID_NIC11_1, + GAUDI2_ENGINE_ID_SIZE +}; + /* * ASIC specific PLL index * @@ -275,6 +633,22 @@ enum hl_gaudi_pll_index { HL_GAUDI_PLL_MAX }; +enum hl_gaudi2_pll_index { + HL_GAUDI2_CPU_PLL = 0, + HL_GAUDI2_PCI_PLL, + HL_GAUDI2_SRAM_PLL, + HL_GAUDI2_HBM_PLL, + HL_GAUDI2_NIC_PLL, + HL_GAUDI2_DMA_PLL, + HL_GAUDI2_MESH_PLL, + HL_GAUDI2_MME_PLL, + HL_GAUDI2_TPC_PLL, + HL_GAUDI2_IF_PLL, + HL_GAUDI2_VID_PLL, + HL_GAUDI2_MSS_PLL, + HL_GAUDI2_PLL_MAX +}; + /** * enum hl_goya_dma_direction - Direction of DMA operation inside a LIN_DMA packet that is * submitted to the GOYA's DMA QMAN. This attribute is not relevant @@ -326,7 +700,8 @@ enum hl_server_type { HL_SERVER_GAUDI_HLS1 = 1, HL_SERVER_GAUDI_HLS1H = 2, HL_SERVER_GAUDI_TYPE1 = 3, - HL_SERVER_GAUDI_TYPE2 = 4 + HL_SERVER_GAUDI_TYPE2 = 4, + HL_SERVER_GAUDI2_HLS2 = 5 }; /* Opcode for management ioctl @@ -428,8 +803,10 @@ enum hl_server_type { * @device_id: PCI device ID of the ASIC. * @module_id: Module ID of the ASIC for mezzanine cards in servers * (From OCP spec). + * @decoder_enabled_mask: Bit-mask that represents which decoders are enabled. * @first_available_interrupt_id: The first available interrupt ID for the user * to be used when it works with user interrupts. + * Relevant for Gaudi2 and later. * @server_type: Server type that the Gaudi ASIC is currently installed in. * The value is according to enum hl_server_type * @cpld_version: CPLD version on the board. @@ -441,9 +818,15 @@ enum hl_server_type { * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant * for Goya/Gaudi only. * @dram_enabled: Whether the DRAM is enabled. + * @mme_master_slave_mode: Indicate whether the MME is working in master/slave + * configuration. Relevant for Greco and later. * @cpucp_version: The CPUCP f/w version. * @card_name: The card name as passed by the f/w. + * @tpc_enabled_mask_ext: Bit-mask that represents which TPCs are enabled. + * Relevant for Greco and later. * @dram_page_size: The DRAM physical page size. + * @edma_enabled_mask: Bit-mask that represents which EDMAs are enabled. + * Relevant for Gaudi2 and later. * @number_of_user_interrupts: The number of interrupts that are available to the userspace * application to use. Relevant for Gaudi2 and later. * @device_mem_alloc_default_page_size: default page size used in device memory allocation. @@ -456,7 +839,7 @@ struct hl_info_hw_ip_info { __u32 num_of_events; __u32 device_id; __u32 module_id; - __u32 reserved; + __u32 decoder_enabled_mask; __u16 first_available_interrupt_id; __u16 server_type; __u32 cpld_version; @@ -466,12 +849,13 @@ struct hl_info_hw_ip_info { __u32 psoc_pci_pll_div_factor; __u8 tpc_enabled_mask; __u8 dram_enabled; - __u8 pad[2]; + __u8 reserved; + __u8 mme_master_slave_mode; __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; - __u64 reserved2; + __u64 tpc_enabled_mask_ext; __u64 dram_page_size; - __u32 reserved3; + __u32 edma_enabled_mask; __u16 number_of_user_interrupts; __u16 pad2; __u64 reserved4; @@ -821,16 +1205,16 @@ union hl_cb_args { /* HL_CS_CHUNK_FLAGS_ values * * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB: - * Indicates if the CB was allocated and mapped by userspace. - * User allocated CB is a command buffer allocated by the user, via malloc - * (or similar). After allocating the CB, the user invokes “memory ioctl” - * to map the user memory into a device virtual address. The user provides - * this address via the cb_handle field. The interface provides the - * ability to create a large CBs, Which aren’t limited to - * “HL_MAX_CB_SIZE”. Therefore, it increases the PCI-DMA queues - * throughput. This CB allocation method also reduces the use of Linux - * DMA-able memory pool. Which are limited and used by other Linux - * sub-systems. + * Indicates if the CB was allocated and mapped by userspace + * (relevant to greco and above). User allocated CB is a command buffer, + * allocated by the user, via malloc (or similar). After allocating the + * CB, the user invokes - “memory ioctl” to map the user memory into a + * device virtual address. The user provides this address via the + * cb_handle field. The interface provides the ability to create a + * large CBs, Which aren’t limited to “HL_MAX_CB_SIZE”. Therefore, it + * increases the PCI-DMA queues throughput. This CB allocation method + * also reduces the use of Linux DMA-able memory pool. Which are limited + * and used by other Linux sub-systems. */ #define HL_CS_CHUNK_FLAGS_USER_ALLOC_CB 0x1 @@ -840,12 +1224,17 @@ union hl_cb_args { */ struct hl_cs_chunk { union { - /* For external queue, this represents a Handle of CB on the + /* Goya/Gaudi: + * For external queue, this represents a Handle of CB on the * Host. * For internal queue in Goya, this represents an SRAM or * a DRAM address of the internal CB. In Gaudi, this might also * represent a mapped host address of the CB. * + * Greco onwards: + * For H/W queue, this represents either a Handle of CB on the + * Host, or an SRAM, a DRAM, or a mapped host address of the CB. + * * A mapped host address is in the device address space, after * a host address was mapped by the device MMU. */ @@ -910,11 +1299,12 @@ struct hl_cs_chunk { __u32 pad[10]; }; -/* SIGNAL and WAIT/COLLECTIVE_WAIT flags are mutually exclusive */ +/* SIGNAL/WAIT/COLLECTIVE_WAIT flags are mutually exclusive */ #define HL_CS_FLAGS_FORCE_RESTORE 0x1 #define HL_CS_FLAGS_SIGNAL 0x2 #define HL_CS_FLAGS_WAIT 0x4 #define HL_CS_FLAGS_COLLECTIVE_WAIT 0x8 + #define HL_CS_FLAGS_TIMESTAMP 0x20 #define HL_CS_FLAGS_STAGED_SUBMISSION 0x40 #define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST 0x80 @@ -1174,14 +1564,19 @@ union hl_wait_cs_args { /* Opcode to allocate device memory */ #define HL_MEM_OP_ALLOC 0 + /* Opcode to free previously allocated device memory */ #define HL_MEM_OP_FREE 1 + /* Opcode to map host and device memory */ #define HL_MEM_OP_MAP 2 + /* Opcode to unmap previously mapped host and device memory */ #define HL_MEM_OP_UNMAP 3 + /* Opcode to map a hw block */ #define HL_MEM_OP_MAP_BLOCK 4 + /* Opcode to create DMA-BUF object for an existing device memory allocation * and to export an FD of that DMA-BUF back to the caller */ @@ -1400,7 +1795,16 @@ struct hl_debug_params_bmon { /* Trace source ID */ __u32 id; - __u32 pad; + + /* Control register */ + __u32 control; + + /* Two more address ranges that the user can request to filter */ + __u64 start_addr2; + __u64 end_addr2; + + __u64 start_addr3; + __u64 end_addr3; }; struct hl_debug_params_spmu { @@ -1409,7 +1813,11 @@ struct hl_debug_params_spmu { /* Number of event types selection */ __u32 event_types_num; - __u32 pad; + + /* TRC configuration register values */ + __u32 pmtrc_val; + __u32 trc_ctrl_host_val; + __u32 trc_en_host_val; }; /* Opcode for ETR component */ @@ -1524,16 +1932,23 @@ struct hl_debug_args { * (or if its the first CS for this context). The user can also order the * driver to run the "restore" phase explicitly * + * Goya/Gaudi: * There are two types of queues - external and internal. External queues * are DMA queues which transfer data from/to the Host. All other queues are * internal. The driver will get completion notifications from the device only * on JOBS which are enqueued in the external queues. * + * Greco onwards: + * There is a single type of queue for all types of engines, either DMA engines + * for transfers from/to the host or inside the device, or compute engines. + * The driver will get completion notifications from the device for all queues. + * * For jobs on external queues, the user needs to create command buffers * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on * internal queues, the user needs to prepare a "command buffer" with packets * on either the device SRAM/DRAM or the host, and give the device address of * that buffer to the CS ioctl. + * For jobs on H/W queues both options of command buffers are valid. * * This IOCTL is asynchronous in regard to the actual execution of the CS. This * means it returns immediately after ALL the JOBS were enqueued on their @@ -1542,7 +1957,7 @@ struct hl_debug_args { * * Upon successful enqueue, the IOCTL returns a sequence number which the user * can use with the "Wait for CS" IOCTL to check whether the handle's CS - * external JOBS have been completed. Note that if the CS has internal JOBS + * non-internal JOBS have been completed. Note that if the CS has internal JOBS * which can execute AFTER the external JOBS have finished, the driver might * report that the CS has finished executing BEFORE the internal JOBS have * actually finished executing. -- cgit v1.2.3 From 1a6609cdd496b1d2183189674962035903025976 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 28 Jun 2022 21:05:28 +0300 Subject: habanalabs: naming refactor of user interrupt flow Current naming convention can be misleading. Hence renaming some variables and defines in order to be more explicit. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 6 +++--- drivers/misc/habanalabs/common/habanalabs.h | 8 ++++---- drivers/misc/habanalabs/common/irq.c | 12 ++++++------ drivers/misc/habanalabs/gaudi2/gaudi2.c | 6 +++--- include/uapi/misc/habanalabs.h | 7 +++++-- 5 files changed, 21 insertions(+), 18 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index e91ca31d4930..275dcb69a40e 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -1080,7 +1080,7 @@ void hl_release_pending_user_interrupts(struct hl_device *hdev) wake_pending_user_interrupt_threads(interrupt); } - interrupt = &hdev->common_user_interrupt; + interrupt = &hdev->common_user_cq_interrupt; wake_pending_user_interrupt_threads(interrupt); } @@ -3373,8 +3373,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) int_idx = interrupt_id - first_interrupt + prop->user_dec_intr_count; interrupt = &hdev->user_interrupt[int_idx]; - } else if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID) { - interrupt = &hdev->common_user_interrupt; + } else if (interrupt_id == HL_COMMON_USER_CQ_INTERRUPT_ID) { + interrupt = &hdev->common_user_cq_interrupt; } else { dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id); return -EINVAL; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 8c38c2c1b1dc..9b2451f3619a 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -76,7 +76,7 @@ struct hl_fpriv; #define HL_INVALID_QUEUE UINT_MAX -#define HL_COMMON_USER_INTERRUPT_ID 0xFFF +#define HL_COMMON_USER_CQ_INTERRUPT_ID 0xFFF #define HL_STATE_DUMP_HIST_LEN 5 @@ -2952,8 +2952,8 @@ struct hl_reset_info { * @user_interrupt: array of hl_user_interrupt. upon the corresponding user * interrupt, driver will monitor the list of fences * registered to this interrupt. - * @common_user_interrupt: common user interrupt for all user interrupts. - * upon any user interrupt, driver will monitor the + * @common_user_cq_interrupt: common user CQ interrupt for all user CQ interrupts. + * upon any user CQ interrupt, driver will monitor the * list of fences registered to this common structure. * @shadow_cs_queue: pointer to a shadow queue that holds pointers to * outstanding command submissions. @@ -3118,7 +3118,7 @@ struct hl_device { enum hl_asic_type asic_type; struct hl_cq *completion_queue; struct hl_user_interrupt *user_interrupt; - struct hl_user_interrupt common_user_interrupt; + struct hl_user_interrupt common_user_cq_interrupt; struct hl_cs **shadow_cs_queue; struct workqueue_struct **cq_wq; struct workqueue_struct *eq_wq; diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index c1088377d1de..fd8f2bd9020e 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -269,7 +269,7 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi return 0; } -static void handle_user_cq(struct hl_device *hdev, struct hl_user_interrupt *user_cq) +static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interrupt *intr) { struct hl_user_pending_interrupt *pend, *temp_pend; struct list_head *ts_reg_free_list_head = NULL; @@ -291,8 +291,8 @@ static void handle_user_cq(struct hl_device *hdev, struct hl_user_interrupt *use if (!job) return; - spin_lock(&user_cq->wait_list_lock); - list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) { + spin_lock(&intr->wait_list_lock); + list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, wait_list_node) { if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) || !pend->cq_kernel_addr) { if (pend->ts_reg_info.buf) { @@ -309,7 +309,7 @@ static void handle_user_cq(struct hl_device *hdev, struct hl_user_interrupt *use } } } - spin_unlock(&user_cq->wait_list_lock); + spin_unlock(&intr->wait_list_lock); if (ts_reg_free_list_head) { INIT_WORK(&job->free_obj, hl_ts_free_objects); @@ -339,10 +339,10 @@ irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg) */ if (!user_int->is_decoder) /* Handle user cq interrupts registered on all interrupts */ - handle_user_cq(hdev, &hdev->common_user_interrupt); + handle_user_interrupt(hdev, &hdev->common_user_cq_interrupt); /* Handle user cq or decoder interrupts registered on this specific irq */ - handle_user_cq(hdev, user_int); + handle_user_interrupt(hdev, user_int); return IRQ_HANDLED; } diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 93373290e894..8f012076363d 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -2891,9 +2891,9 @@ static void gaudi2_user_interrupt_setup(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; int i, j, k; - /* Initialize common user interrupt */ - HL_USR_INTR_STRUCT_INIT(hdev->common_user_interrupt, hdev, HL_COMMON_USER_INTERRUPT_ID, - false); + /* Initialize common user CQ interrupt */ + HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev, + HL_COMMON_USER_CQ_INTERRUPT_ID, false); /* User interrupts structure holds both decoder and user interrupts from various engines. * We first initialize the decoder interrupts and then we add the user interrupts. diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 77b89c537ee8..4ee24a3a13e9 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1442,6 +1442,7 @@ union hl_cs_args { #define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 #define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 +#define HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT 0xFFF00000 #define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 #define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10 #define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT 0x20 @@ -1491,8 +1492,10 @@ struct hl_wait_cs_in { /* HL_WAIT_CS_FLAGS_* * If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include - * interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order - * not to specify an interrupt id ,set mask to all 1s. + * interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK + * + * in order to wait for any CQ interrupt, set interrupt value to + * HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT. */ __u32 flags; -- cgit v1.2.3 From d6a66d59609fc45afc91149e13dddafb8faff0d6 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 28 Jun 2022 18:34:58 +0300 Subject: habanalabs: add support for common decoder interrupts User application should be able to get notification for any decoder completion. Hence, we introduce a new interface in which a user can wait for all current decoder pending interrupts. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 5 +++++ drivers/misc/habanalabs/common/habanalabs.h | 3 +++ drivers/misc/habanalabs/common/irq.c | 9 +++------ drivers/misc/habanalabs/gaudi2/gaudi2.c | 4 ++++ include/uapi/misc/habanalabs.h | 4 ++++ 5 files changed, 19 insertions(+), 6 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 275dcb69a40e..eb5f1aee15fc 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -1082,6 +1082,9 @@ void hl_release_pending_user_interrupts(struct hl_device *hdev) interrupt = &hdev->common_user_cq_interrupt; wake_pending_user_interrupt_threads(interrupt); + + interrupt = &hdev->common_decoder_interrupt; + wake_pending_user_interrupt_threads(interrupt); } static void job_wq_completion(struct work_struct *work) @@ -3375,6 +3378,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) } else if (interrupt_id == HL_COMMON_USER_CQ_INTERRUPT_ID) { interrupt = &hdev->common_user_cq_interrupt; + } else if (interrupt_id == HL_COMMON_DEC_INTERRUPT_ID) { + interrupt = &hdev->common_decoder_interrupt; } else { dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id); return -EINVAL; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 9b2451f3619a..7e84f2ce49ae 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -77,6 +77,7 @@ struct hl_fpriv; #define HL_INVALID_QUEUE UINT_MAX #define HL_COMMON_USER_CQ_INTERRUPT_ID 0xFFF +#define HL_COMMON_DEC_INTERRUPT_ID 0xFFE #define HL_STATE_DUMP_HIST_LEN 5 @@ -2955,6 +2956,7 @@ struct hl_reset_info { * @common_user_cq_interrupt: common user CQ interrupt for all user CQ interrupts. * upon any user CQ interrupt, driver will monitor the * list of fences registered to this common structure. + * @common_decoder_interrupt: common decoder interrupt for all user decoder interrupts. * @shadow_cs_queue: pointer to a shadow queue that holds pointers to * outstanding command submissions. * @cq_wq: work queues of completion queues for executing work in process @@ -3119,6 +3121,7 @@ struct hl_device { struct hl_cq *completion_queue; struct hl_user_interrupt *user_interrupt; struct hl_user_interrupt common_user_cq_interrupt; + struct hl_user_interrupt common_decoder_interrupt; struct hl_cs **shadow_cs_queue; struct workqueue_struct **cq_wq; struct workqueue_struct *eq_wq; diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index fd8f2bd9020e..d60dafb03a8e 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -333,12 +333,9 @@ irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg) struct hl_user_interrupt *user_int = arg; struct hl_device *hdev = user_int->hdev; - /* If the interrupt is not a decoder interrupt, it means the interrupt - * belongs to a user cq. In that case, before handling it, we need to handle the common - * user cq - */ - if (!user_int->is_decoder) - /* Handle user cq interrupts registered on all interrupts */ + if (user_int->is_decoder) + handle_user_interrupt(hdev, &hdev->common_decoder_interrupt); + else handle_user_interrupt(hdev, &hdev->common_user_cq_interrupt); /* Handle user cq or decoder interrupts registered on this specific irq */ diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 8f012076363d..83da4247e71b 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -2895,6 +2895,10 @@ static void gaudi2_user_interrupt_setup(struct hl_device *hdev) HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev, HL_COMMON_USER_CQ_INTERRUPT_ID, false); + /* Initialize common decoder interrupt */ + HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev, + HL_COMMON_DEC_INTERRUPT_ID, true); + /* User interrupts structure holds both decoder and user interrupts from various engines. * We first initialize the decoder interrupts and then we add the user interrupts. * The only limitation is that the last decoder interrupt id must be smaller diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 4ee24a3a13e9..8c6ab71e7831 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1443,6 +1443,7 @@ union hl_cs_args { #define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 #define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 #define HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT 0xFFF00000 +#define HL_WAIT_CS_FLAGS_ANY_DEC_INTERRUPT 0xFFE00000 #define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 #define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10 #define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT 0x20 @@ -1496,6 +1497,9 @@ struct hl_wait_cs_in { * * in order to wait for any CQ interrupt, set interrupt value to * HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT. + * + * in order to wait for any decoder interrupt, set interrupt value to + * HL_WAIT_CS_FLAGS_ANY_DEC_INTERRUPT. */ __u32 flags; -- cgit v1.2.3 From e3b20f3ee452e3ce1077822b2558846eb4fe571f Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 7 Jul 2022 11:42:15 +0300 Subject: habanalabs: add status of reset after device release The user might want to know the device is in reset after device release, which is not an erroneous event as a regular reset. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 17 +++++++++++------ drivers/misc/habanalabs/common/habanalabs_drv.c | 6 +++++- include/uapi/misc/habanalabs.h | 5 ++++- 3 files changed, 20 insertions(+), 8 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 5bc291c11e9b..19c049046383 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -271,16 +271,20 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) { enum hl_device_status status; - if (hdev->reset_info.in_reset) - status = HL_DEVICE_STATUS_IN_RESET; - else if (hdev->reset_info.needs_reset) + if (hdev->reset_info.in_reset) { + if (hdev->reset_info.is_in_soft_reset) + status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE; + else + status = HL_DEVICE_STATUS_IN_RESET; + } else if (hdev->reset_info.needs_reset) { status = HL_DEVICE_STATUS_NEEDS_RESET; - else if (hdev->disabled) + } else if (hdev->disabled) { status = HL_DEVICE_STATUS_MALFUNCTION; - else if (!hdev->init_done) + } else if (!hdev->init_done) { status = HL_DEVICE_STATUS_IN_DEVICE_CREATION; - else + } else { status = HL_DEVICE_STATUS_OPERATIONAL; + } return status; } @@ -296,6 +300,7 @@ bool hl_device_operational(struct hl_device *hdev, switch (current_status) { case HL_DEVICE_STATUS_IN_RESET: + case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: case HL_DEVICE_STATUS_MALFUNCTION: case HL_DEVICE_STATUS_NEEDS_RESET: return false; diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index d900bae86168..f733ead605e7 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -165,7 +165,8 @@ int hl_device_open(struct inode *inode, struct file *filp) "Can't open %s because it is %s\n", dev_name(hdev->dev), hdev->status[status]); - if (status == HL_DEVICE_STATUS_IN_RESET) + if (status == HL_DEVICE_STATUS_IN_RESET || + status == HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE) rc = -EAGAIN; else rc = -EPERM; @@ -395,6 +396,9 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], "in device creation", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE], + "in reset after device release", HL_STR_MAX); + /* First, we must find out which ASIC are we handling. This is needed * to configure the behavior of the driver (kernel parameters) diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 8c6ab71e7831..5d06d5c74dd1 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -684,6 +684,8 @@ enum hl_goya_dma_direction { * @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was disabled. * @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation is still in * progress. + * @HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: Device is currently during reset that was + * triggered because the user released the device * @HL_DEVICE_STATUS_LAST: Last status. */ enum hl_device_status { @@ -692,7 +694,8 @@ enum hl_device_status { HL_DEVICE_STATUS_MALFUNCTION, HL_DEVICE_STATUS_NEEDS_RESET, HL_DEVICE_STATUS_IN_DEVICE_CREATION, - HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION + HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE, + HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE }; enum hl_server_type { -- cgit v1.2.3