From d8dd7b0a81cc192ef5d30ec76ed6f6d35a1a7cf5 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 16 Feb 2019 00:39:23 +0200 Subject: habanalabs: implement INFO IOCTL This patch implements the INFO IOCTL. That IOCTL is used by the user to query information that is relevant/needed by the user in order to submit deep learning jobs to Goya. The information is divided into several categories, such as H/W IP, Events that happened, DDR usage and more. Reviewed-by: Mike Rapoport Signed-off-by: Oded Gabbay Signed-off-by: Greg Kroah-Hartman --- drivers/misc/habanalabs/goya/goya.c | 6 ++ drivers/misc/habanalabs/habanalabs.h | 2 + drivers/misc/habanalabs/habanalabs_ioctl.c | 126 +++++++++++++++++++++++++++++ include/uapi/misc/habanalabs.h | 75 ++++++++++++++++- 4 files changed, 208 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 89b82b989966..bf3f76f1aeae 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5131,6 +5131,11 @@ static void goya_hw_queues_unlock(struct hl_device *hdev) spin_unlock(&goya->hw_queues_lock); } +static u32 goya_get_pci_id(struct hl_device *hdev) +{ + return hdev->pdev->device; +} + int goya_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) { struct goya_device *goya = hdev->asic_specific; @@ -5232,6 +5237,7 @@ static const struct hl_asic_funcs goya_funcs = { .soft_reset_late_init = goya_soft_reset_late_init, .hw_queues_lock = goya_hw_queues_lock, .hw_queues_unlock = goya_hw_queues_unlock, + .get_pci_id = goya_get_pci_id, .get_eeprom_data = goya_get_eeprom_data, .send_cpu_message = goya_send_cpu_message, .get_hw_state = goya_get_hw_state diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 03085e7a12dd..02de4a2cab27 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -470,6 +470,7 @@ enum hl_pll_frequency { * @soft_reset_late_init: perform certain actions needed after soft reset. * @hw_queues_lock: acquire H/W queues lock. * @hw_queues_unlock: release H/W queues lock. + * @get_pci_id: retrieve PCI ID. * @get_eeprom_data: retrieve EEPROM data from F/W. * @send_cpu_message: send buffer to ArmCP. * @get_hw_state: retrieve the H/W state @@ -539,6 +540,7 @@ struct hl_asic_funcs { int (*soft_reset_late_init)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev); void (*hw_queues_unlock)(struct hl_device *hdev); + u32 (*get_pci_id)(struct hl_device *hdev); int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size); int (*send_cpu_message)(struct hl_device *hdev, u32 *msg, diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c index 6e4dc5b5e696..12408d3302e9 100644 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c @@ -12,10 +12,136 @@ #include #include +static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_hw_ip_info hw_ip = {0}; + u32 size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 sram_kmd_size, dram_kmd_size; + + if ((!size) || (!out)) + return -EINVAL; + + sram_kmd_size = (prop->sram_user_base_address - + prop->sram_base_address); + dram_kmd_size = (prop->dram_user_base_address - + prop->dram_base_address); + + hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev); + hw_ip.sram_base_address = prop->sram_user_base_address; + hw_ip.dram_base_address = prop->dram_user_base_address; + hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask; + hw_ip.sram_size = prop->sram_size - sram_kmd_size; + hw_ip.dram_size = prop->dram_size - dram_kmd_size; + if (hw_ip.dram_size > 0) + hw_ip.dram_enabled = 1; + hw_ip.num_of_events = prop->num_of_events; + memcpy(hw_ip.armcp_version, + prop->armcp_info.armcp_version, VERSION_MAX_LEN); + hw_ip.armcp_cpld_version = prop->armcp_info.cpld_version; + hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr; + hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf; + hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od; + hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor; + + return copy_to_user(out, &hw_ip, + min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0; +} + +static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args) +{ + u32 size, max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + void *arr; + + if ((!max_size) || (!out)) + return -EINVAL; + + arr = hdev->asic_funcs->get_events_stat(hdev, &size); + + return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0; +} + +static int dram_usage_info(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_dram_usage dram_usage = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 dram_kmd_size; + + if ((!max_size) || (!out)) + return -EINVAL; + + dram_kmd_size = (prop->dram_user_base_address - + prop->dram_base_address); + dram_usage.dram_free_mem = (prop->dram_size - dram_kmd_size) - + atomic64_read(&hdev->dram_used_mem); + dram_usage.ctx_dram_mem = atomic64_read(&hdev->user_ctx->dram_phys_mem); + + return copy_to_user(out, &dram_usage, + min((size_t) max_size, sizeof(dram_usage))) ? -EFAULT : 0; +} + +static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_hw_idle hw_idle = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev); + + return copy_to_user(out, &hw_idle, + min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; +} + +static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) +{ + struct hl_info_args *args = data; + struct hl_device *hdev = hpriv->hdev; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_err(hdev->dev, + "Device is disabled or in reset. Can't execute INFO IOCTL\n"); + return -EBUSY; + } + + switch (args->op) { + case HL_INFO_HW_IP_INFO: + rc = hw_ip_info(hdev, args); + break; + + case HL_INFO_HW_EVENTS: + rc = hw_events_info(hdev, args); + break; + + case HL_INFO_DRAM_USAGE: + rc = dram_usage_info(hdev, args); + break; + + case HL_INFO_HW_IDLE: + rc = hw_idle(hdev, args); + break; + + default: + dev_err(hdev->dev, "Invalid request %d\n", args->op); + rc = -ENOTTY; + break; + } + + return rc; +} + #define HL_IOCTL_DEF(ioctl, _func) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func} static const struct hl_ioctl_desc hl_ioctls[] = { + HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl), HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl), HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl), HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl), diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 9015043887d1..4afc1891ece8 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -45,6 +45,62 @@ enum goya_queue_id { GOYA_QUEUE_ID_SIZE }; +/* Opcode for management ioctl */ +#define HL_INFO_HW_IP_INFO 0 +#define HL_INFO_HW_EVENTS 1 +#define HL_INFO_DRAM_USAGE 2 +#define HL_INFO_HW_IDLE 3 + +#define HL_INFO_VERSION_MAX_LEN 128 + +struct hl_info_hw_ip_info { + __u64 sram_base_address; + __u64 dram_base_address; + __u64 dram_size; + __u32 sram_size; + __u32 num_of_events; + __u32 device_id; /* PCI Device ID */ + __u32 reserved[3]; + __u32 armcp_cpld_version; + __u32 psoc_pci_pll_nr; + __u32 psoc_pci_pll_nf; + __u32 psoc_pci_pll_od; + __u32 psoc_pci_pll_div_factor; + __u8 tpc_enabled_mask; + __u8 dram_enabled; + __u8 pad[2]; + __u8 armcp_version[HL_INFO_VERSION_MAX_LEN]; +}; + +struct hl_info_dram_usage { + __u64 dram_free_mem; + __u64 ctx_dram_mem; +}; + +struct hl_info_hw_idle { + __u32 is_idle; + __u32 pad; +}; + +struct hl_info_args { + /* Location of relevant struct in userspace */ + __u64 return_pointer; + /* + * The size of the return value. Just like "size" in "snprintf", + * it limits how many bytes the kernel can write + * + * For hw_events array, the size should be + * hl_info_hw_ip_info.num_of_events * sizeof(__u32) + */ + __u32 return_size; + + /* HL_INFO_* */ + __u32 op; + + /* Context ID - Currently not in use */ + __u32 ctx_id; + __u32 pad; +}; /* Opcode to create a new command buffer */ #define HL_CB_OP_CREATE 0 @@ -264,6 +320,23 @@ union hl_mem_args { struct hl_mem_out out; }; +/* + * Various information operations such as: + * - H/W IP information + * - Current dram usage + * + * The user calls this IOCTL with an opcode that describes the required + * information. The user should supply a pointer to a user-allocated memory + * chunk, which will be filled by the driver with the requested information. + * + * The user supplies the maximum amount of size to copy into the user's memory, + * in order to prevent data corruption in case of differences between the + * definitions of structures in kernel and userspace, e.g. in case of old + * userspace and new kernel driver + */ +#define HL_IOCTL_INFO \ + _IOWR('H', 0x01, struct hl_info_args) + /* * Command Buffer * - Request a Command Buffer @@ -365,7 +438,7 @@ union hl_mem_args { #define HL_IOCTL_MEMORY \ _IOWR('H', 0x05, union hl_mem_args) -#define HL_COMMAND_START 0x02 +#define HL_COMMAND_START 0x01 #define HL_COMMAND_END 0x06 #endif /* HABANALABS_H_ */ -- cgit v1.2.3