diff options
author | Linus Torvalds | 2019-11-27 10:53:50 -0800 |
---|---|---|
committer | Linus Torvalds | 2019-11-27 10:53:50 -0800 |
commit | 8f56e4ebe05c26c30e167519273843476e39e244 (patch) | |
tree | c9b76dca50074b3f2dd5d4e54e211daf5f866c46 /drivers/misc | |
parent | 59274c7164807d27b24e6c068dfe734f7bea4623 (diff) | |
parent | b78cda795ac83333293f1bfa3165572a47e550c2 (diff) |
Merge tag 'char-misc-5.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull char/misc driver updates from Greg KH:
"Here is the big set of char/misc and other driver patches for 5.5-rc1
Loads of different things in here, this feels like the catch-all of
driver subsystems these days. Full details are in the shortlog, but
nothing major overall, just lots of driver updates and additions.
All of these have been in linux-next for a while with no reported
issues"
* tag 'char-misc-5.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (198 commits)
char: Fix Kconfig indentation, continued
habanalabs: add more protection of device during reset
habanalabs: flush EQ workers in hard reset
habanalabs: make the reset code more consistent
habanalabs: expose reset counters via existing INFO IOCTL
habanalabs: make code more concise
habanalabs: use defines for F/W files
habanalabs: remove prints on successful device initialization
habanalabs: remove unnecessary checks
habanalabs: invalidate MMU cache only once
habanalabs: skip VA block list update in reset flow
habanalabs: optimize MMU unmap
habanalabs: prevent read/write from/to the device during hard reset
habanalabs: split MMU properties to PCI/DRAM
habanalabs: re-factor MMU masks and documentation
habanalabs: type specific MMU cache invalidation
habanalabs: re-factor memory module code
habanalabs: export uapi defines to user-space
habanalabs: don't print error when queues are full
habanalabs: increase max jobs number to 512
...
Diffstat (limited to 'drivers/misc')
49 files changed, 2660 insertions, 677 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index c55b63750757..7f0d48f406e3 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -8,7 +8,6 @@ menu "Misc devices" config SENSORS_LIS3LV02D tristate depends on INPUT - select INPUT_POLLDEV config AD525X_DPOT tristate "Analog Devices Digital Potentiometers" @@ -326,14 +325,14 @@ config SENSORS_TSL2550 will be called tsl2550. config SENSORS_BH1770 - tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor" - depends on I2C - ---help--- - Say Y here if you want to build a driver for BH1770GLC (ROHM) or + tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor" + depends on I2C + ---help--- + Say Y here if you want to build a driver for BH1770GLC (ROHM) or SFH7770 (Osram) combined ambient light and proximity sensor chip. - To compile this driver as a module, choose M here: the - module will be called bh1770glc. If unsure, say N here. + To compile this driver as a module, choose M here: the + module will be called bh1770glc. If unsure, say N here. config SENSORS_APDS990X tristate "APDS990X combined als and proximity sensors" @@ -438,8 +437,8 @@ config PCI_ENDPOINT_TEST select CRC32 tristate "PCI Endpoint Test driver" ---help--- - Enable this configuration option to enable the host side test driver - for PCI Endpoint. + Enable this configuration option to enable the host side test driver + for PCI Endpoint. config XILINX_SDFEC tristate "Xilinx SDFEC 16" diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c index 08b5b639d77f..7de7840f613c 100644 --- a/drivers/misc/atmel_tclib.c +++ b/drivers/misc/atmel_tclib.c @@ -109,7 +109,6 @@ static int __init tc_probe(struct platform_device *pdev) struct atmel_tc *tc; struct clk *clk; int irq; - struct resource *r; unsigned int i; if (of_get_child_count(pdev->dev.of_node)) @@ -133,8 +132,7 @@ static int __init tc_probe(struct platform_device *pdev) if (IS_ERR(tc->slow_clk)) return PTR_ERR(tc->slow_clk); - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - tc->regs = devm_ioremap_resource(&pdev->dev, r); + tc->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(tc->regs)) return PTR_ERR(tc->regs); diff --git a/drivers/misc/cardreader/Makefile b/drivers/misc/cardreader/Makefile index d9bff5a2217e..1f56267ed2f4 100644 --- a/drivers/misc/cardreader/Makefile +++ b/drivers/misc/cardreader/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_MISC_ALCOR_PCI) += alcor_pci.o obj-$(CONFIG_MISC_RTSX_PCI) += rtsx_pci.o -rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o +rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o rts5261.o obj-$(CONFIG_MISC_RTSX_USB) += rtsx_usb.o diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c index 40a6d199f2ea..4214f02a17fd 100644 --- a/drivers/misc/cardreader/rts5260.c +++ b/drivers/misc/cardreader/rts5260.c @@ -191,7 +191,6 @@ static int sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr) static int rts5260_card_power_on(struct rtsx_pcr *pcr, int card) { - int err = 0; struct rtsx_cr_option *option = &pcr->option; if (option->ocp_en) @@ -231,7 +230,7 @@ static int rts5260_card_power_on(struct rtsx_pcr *pcr, int card) rtsx_pci_write_register(pcr, REG_PRE_RW_MODE, EN_INFINITE_MODE, 0); - return err; + return 0; } static int rts5260_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c new file mode 100644 index 000000000000..32dcec2e9dfd --- /dev/null +++ b/drivers/misc/cardreader/rts5261.c @@ -0,0 +1,792 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2018-2019 Realtek Semiconductor Corp. All rights reserved. + * + * Author: + * Rui FENG <rui_feng@realsil.com.cn> + * Wei WANG <wei_wang@realsil.com.cn> + */ + +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/rtsx_pci.h> + +#include "rts5261.h" +#include "rtsx_pcr.h" + +static u8 rts5261_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & IC_VERSION_MASK; +} + +static void rts5261_fill_driving(struct rtsx_pcr *pcr, u8 voltage) +{ + u8 driving_3v3[4][3] = { + {0x13, 0x13, 0x13}, + {0x96, 0x96, 0x96}, + {0x7F, 0x7F, 0x7F}, + {0x96, 0x96, 0x96}, + }; + u8 driving_1v8[4][3] = { + {0x99, 0x99, 0x99}, + {0x3A, 0x3A, 0x3A}, + {0xE6, 0xE6, 0xE6}, + {0xB3, 0xB3, 0xB3}, + }; + u8 (*driving)[3], drive_sel; + + if (voltage == OUTPUT_3V3) { + driving = driving_3v3; + drive_sel = pcr->sd30_drive_sel_3v3; + } else { + driving = driving_1v8; + drive_sel = pcr->sd30_drive_sel_1v8; + } + + rtsx_pci_write_register(pcr, SD30_CLK_DRIVE_SEL, + 0xFF, driving[drive_sel][0]); + + rtsx_pci_write_register(pcr, SD30_CMD_DRIVE_SEL, + 0xFF, driving[drive_sel][1]); + + rtsx_pci_write_register(pcr, SD30_DAT_DRIVE_SEL, + 0xFF, driving[drive_sel][2]); +} + +static void rtsx5261_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg; + /* 0x814~0x817 */ + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + + if (!rts5261_vendor_setting_valid(reg)) { + pcr_dbg(pcr, "skip fetch vendor setting\n"); + return; + } + + pcr->card_drive_sel &= 0x3F; + pcr->card_drive_sel |= rts5261_reg_to_card_drive_sel(reg); + + if (rts5261_reg_check_reverse_socket(reg)) + pcr->flags |= PCR_REVERSE_SOCKET; + + /* 0x724~0x727 */ + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + pcr->aspm_en = rts5261_reg_to_aspm(reg); + pcr->sd30_drive_sel_1v8 = rts5261_reg_to_sd30_drive_sel_1v8(reg); + pcr->sd30_drive_sel_3v3 = rts5261_reg_to_sd30_drive_sel_3v3(reg); +} + +static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + /* Set relink_time to 0 */ + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, + RELINK_TIME_MASK, 0); + + if (pm_state == HOST_ENTER_S3) + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, + D3_DELINK_MODE_EN, D3_DELINK_MODE_EN); + + rtsx_pci_write_register(pcr, RTS5261_REG_FPDCTL, + SSC_POWER_DOWN, SSC_POWER_DOWN); +} + +static int rts5261_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, + LED_SHINE_MASK, LED_SHINE_EN); +} + +static int rts5261_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, + LED_SHINE_MASK, LED_SHINE_DISABLE); +} + +static int rts5261_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, + 0x02, 0x02); +} + +static int rts5261_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, + 0x02, 0x00); +} + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5261_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5261_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + 0, +}; + +static int rts5261_sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr) +{ + rtsx_pci_write_register(pcr, SD_CFG1, SD_MODE_SELECT_MASK + | SD_ASYNC_FIFO_NOT_RST, SD_30_MODE | SD_ASYNC_FIFO_NOT_RST); + rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, CLK_LOW_FREQ); + rtsx_pci_write_register(pcr, CARD_CLK_SOURCE, 0xFF, + CRC_VAR_CLK0 | SD30_FIX_CLK | SAMPLE_VAR_CLK1); + rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0); + + return 0; +} + +static int rts5261_card_power_on(struct rtsx_pcr *pcr, int card) +{ + struct rtsx_cr_option *option = &pcr->option; + + if (option->ocp_en) + rtsx_pci_enable_ocp(pcr); + + + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG1, + RTS5261_LDO1_TUNE_MASK, RTS5261_LDO1_33); + rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL, + RTS5261_LDO1_POWERON, RTS5261_LDO1_POWERON); + + rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL, + RTS5261_LDO3318_POWERON, RTS5261_LDO3318_POWERON); + + msleep(20); + + rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, SD_OUTPUT_EN); + + /* Initialize SD_CFG1 register */ + rtsx_pci_write_register(pcr, SD_CFG1, 0xFF, + SD_CLK_DIVIDE_128 | SD_20_MODE | SD_BUS_WIDTH_1BIT); + + rtsx_pci_write_register(pcr, SD_SAMPLE_POINT_CTL, + 0xFF, SD20_RX_POS_EDGE); + rtsx_pci_write_register(pcr, SD_PUSH_POINT_CTL, 0xFF, 0); + rtsx_pci_write_register(pcr, CARD_STOP, SD_STOP | SD_CLR_ERR, + SD_STOP | SD_CLR_ERR); + + /* Reset SD_CFG3 register */ + rtsx_pci_write_register(pcr, SD_CFG3, SD30_CLK_END_EN, 0); + rtsx_pci_write_register(pcr, REG_SD_STOP_SDCLK_CFG, + SD30_CLK_STOP_CFG_EN | SD30_CLK_STOP_CFG1 | + SD30_CLK_STOP_CFG0, 0); + + if (pcr->extra_caps & EXTRA_CAPS_SD_SDR50 || + pcr->extra_caps & EXTRA_CAPS_SD_SDR104) + rts5261_sd_set_sample_push_timing_sd30(pcr); + + return 0; +} + +static int rts5261_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + int err; + u16 val = 0; + + rtsx_pci_write_register(pcr, RTS5261_CARD_PWR_CTL, + RTS5261_PUPDC, RTS5261_PUPDC); + + switch (voltage) { + case OUTPUT_3V3: + rtsx_pci_read_phy_register(pcr, PHY_TUNE, &val); + val |= PHY_TUNE_SDBUS_33; + err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, val); + if (err < 0) + return err; + + rtsx_pci_write_register(pcr, RTS5261_DV3318_CFG, + RTS5261_DV3318_TUNE_MASK, RTS5261_DV3318_33); + rtsx_pci_write_register(pcr, SD_PAD_CTL, + SD_IO_USING_1V8, 0); + break; + case OUTPUT_1V8: + rtsx_pci_read_phy_register(pcr, PHY_TUNE, &val); + val &= ~PHY_TUNE_SDBUS_33; + err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, val); + if (err < 0) + return err; + + rtsx_pci_write_register(pcr, RTS5261_DV3318_CFG, + RTS5261_DV3318_TUNE_MASK, RTS5261_DV3318_18); + rtsx_pci_write_register(pcr, SD_PAD_CTL, + SD_IO_USING_1V8, SD_IO_USING_1V8); + break; + default: + return -EINVAL; + } + + /* set pad drive */ + rts5261_fill_driving(pcr, voltage); + + return 0; +} + +static void rts5261_stop_cmd(struct rtsx_pcr *pcr) +{ + rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD); + rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA); + rtsx_pci_write_register(pcr, RTS5260_DMA_RST_CTL_0, + RTS5260_DMA_RST | RTS5260_ADMA3_RST, + RTS5260_DMA_RST | RTS5260_ADMA3_RST); + rtsx_pci_write_register(pcr, RBCTL, RB_FLUSH, RB_FLUSH); +} + +static void rts5261_card_before_power_off(struct rtsx_pcr *pcr) +{ + rts5261_stop_cmd(pcr); + rts5261_switch_output_voltage(pcr, OUTPUT_3V3); + +} + +static void rts5261_enable_ocp(struct rtsx_pcr *pcr) +{ + u8 val = 0; + + val = SD_OCP_INT_EN | SD_DETECT_EN; + rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val); + +} + +static void rts5261_disable_ocp(struct rtsx_pcr *pcr) +{ + u8 mask = 0; + + mask = SD_OCP_INT_EN | SD_DETECT_EN; + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN, 0); + +} + +static int rts5261_card_power_off(struct rtsx_pcr *pcr, int card) +{ + int err = 0; + + rts5261_card_before_power_off(pcr); + err = rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL, + RTS5261_LDO_POWERON_MASK, 0); + + if (pcr->option.ocp_en) + rtsx_pci_disable_ocp(pcr); + + return err; +} + +static void rts5261_init_ocp(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + if (option->ocp_en) { + u8 mask, val; + + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN, + RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN); + + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_THD_MASK, option->sd_800mA_ocp_thd); + + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_LMT_THD_MASK, + RTS5261_LDO1_LMT_THD_2000); + + mask = SD_OCP_GLITCH_MASK; + val = pcr->hw_param.ocp_glitch; + rtsx_pci_write_register(pcr, REG_OCPGLITCH, mask, val); + + rts5261_enable_ocp(pcr); + } else { + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN, 0); + } +} + +static void rts5261_clear_ocpstat(struct rtsx_pcr *pcr) +{ + u8 mask = 0; + u8 val = 0; + + mask = SD_OCP_INT_CLR | SD_OC_CLR; + val = SD_OCP_INT_CLR | SD_OC_CLR; + + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val); + + udelay(10); + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); + +} + +static void rts5261_process_ocp(struct rtsx_pcr *pcr) +{ + if (!pcr->option.ocp_en) + return; + + rtsx_pci_get_ocpstat(pcr, &pcr->ocp_stat); + + if (pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER)) { + rts5261_card_power_off(pcr, RTSX_SD_CARD); + rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0); + rts5261_clear_ocpstat(pcr); + pcr->ocp_stat = 0; + } + +} + +static int rts5261_init_from_hw(struct rtsx_pcr *pcr) +{ + int retval; + u32 lval, i; + u8 valid, efuse_valid, tmp; + + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + REG_EFUSE_POR | REG_EFUSE_POWER_MASK, + REG_EFUSE_POR | REG_EFUSE_POWERON); + udelay(1); + rtsx_pci_write_register(pcr, RTS5261_EFUSE_ADDR, + RTS5261_EFUSE_ADDR_MASK, 0x00); + rtsx_pci_write_register(pcr, RTS5261_EFUSE_CTL, + RTS5261_EFUSE_ENABLE | RTS5261_EFUSE_MODE_MASK, + RTS5261_EFUSE_ENABLE); + + /* Wait transfer end */ + for (i = 0; i < MAX_RW_REG_CNT; i++) { + rtsx_pci_read_register(pcr, RTS5261_EFUSE_CTL, &tmp); + if ((tmp & 0x80) == 0) + break; + } + rtsx_pci_read_register(pcr, RTS5261_EFUSE_READ_DATA, &tmp); + efuse_valid = ((tmp & 0x0C) >> 2); + pcr_dbg(pcr, "Load efuse valid: 0x%x\n", efuse_valid); + + if (efuse_valid == 0) { + retval = rtsx_pci_read_config_dword(pcr, + PCR_SETTING_REG2, &lval); + if (retval != 0) + pcr_dbg(pcr, "read 0x814 DW fail\n"); + pcr_dbg(pcr, "DW from 0x814: 0x%x\n", lval); + /* 0x816 */ + valid = (u8)((lval >> 16) & 0x03); + pcr_dbg(pcr, "0x816: %d\n", valid); + } + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + REG_EFUSE_POR, 0); + pcr_dbg(pcr, "Disable efuse por!\n"); + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, &lval); + lval = lval & 0x00FFFFFF; + retval = rtsx_pci_write_config_dword(pcr, PCR_SETTING_REG2, lval); + if (retval != 0) + pcr_dbg(pcr, "write config fail\n"); + + return retval; +} + +static void rts5261_init_from_cfg(struct rtsx_pcr *pcr) +{ + u32 lval; + struct rtsx_cr_option *option = &pcr->option; + + rtsx_pci_read_config_dword(pcr, PCR_ASPM_SETTING_REG1, &lval); + + if (lval & ASPM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); + else + rtsx_clear_dev_flag(pcr, ASPM_L1_1_EN); + + if (lval & ASPM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); + else + rtsx_clear_dev_flag(pcr, ASPM_L1_2_EN); + + if (lval & PM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_1_EN); + else + rtsx_clear_dev_flag(pcr, PM_L1_1_EN); + + if (lval & PM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_2_EN); + else + rtsx_clear_dev_flag(pcr, PM_L1_2_EN); + + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0xFF, 0); + if (option->ltr_en) { + u16 val; + + pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val); + if (val & PCI_EXP_DEVCTL2_LTR_EN) { + option->ltr_enabled = true; + option->ltr_active = true; + rtsx_set_ltr_latency(pcr, option->ltr_active_latency); + } else { + option->ltr_enabled = false; + } + } + + if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN + | PM_L1_1_EN | PM_L1_2_EN)) + option->force_clkreq_0 = false; + else + option->force_clkreq_0 = true; +} + +static int rts5261_extra_init_hw(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1, + CD_RESUME_EN_MASK, CD_RESUME_EN_MASK); + + rts5261_init_from_cfg(pcr); + rts5261_init_from_hw(pcr); + + /* power off efuse */ + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + REG_EFUSE_POWER_MASK, REG_EFUSE_POWEROFF); + rtsx_pci_write_register(pcr, L1SUB_CONFIG1, + AUX_CLK_ACTIVE_SEL_MASK, MAC_CKSW_DONE); + rtsx_pci_write_register(pcr, L1SUB_CONFIG3, 0xFF, 0); + + rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4, + RTS5261_AUX_CLK_16M_EN, 0); + + /* Release PRSNT# */ + rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4, + RTS5261_FORCE_PRSNT_LOW, 0); + rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, + FUNC_FORCE_UPME_XMT_DBG, FUNC_FORCE_UPME_XMT_DBG); + + rtsx_pci_write_register(pcr, PCLK_CTL, + PCLK_MODE_SEL, PCLK_MODE_SEL); + + rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0); + rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, CLK_PM_EN, CLK_PM_EN); + + /* LED shine disabled, set initial shine cycle period */ + rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x0F, 0x02); + + /* Configure driving */ + rts5261_fill_driving(pcr, OUTPUT_3V3); + + /* + * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced + * to drive low, and we forcibly request clock. + */ + if (option->force_clkreq_0) + rtsx_pci_write_register(pcr, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); + else + rtsx_pci_write_register(pcr, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); + + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00); + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL); + + /* Clear Enter RTD3_cold Information*/ + rtsx_pci_write_register(pcr, RTS5261_FW_CTL, + RTS5261_INFORM_RTD3_COLD, 0); + + return 0; +} + +static void rts5261_enable_aspm(struct rtsx_pcr *pcr, bool enable) +{ + struct rtsx_cr_option *option = &pcr->option; + u8 val = 0; + + if (pcr->aspm_enabled == enable) + return; + + if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { + val = pcr->aspm_en; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { + u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0; + + val = FORCE_ASPM_CTL0; + val |= (pcr->aspm_en & 0x02); + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); + val = pcr->aspm_en; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + } + pcr->aspm_enabled = enable; + +} + +static void rts5261_disable_aspm(struct rtsx_pcr *pcr, bool enable) +{ + struct rtsx_cr_option *option = &pcr->option; + u8 val = 0; + + if (pcr->aspm_enabled == enable) + return; + + if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { + val = 0; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { + u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0; + + val = 0; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + val = FORCE_ASPM_CTL0; + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); + } + rtsx_pci_write_register(pcr, SD_CFG1, SD_ASYNC_FIFO_NOT_RST, 0); + udelay(10); + pcr->aspm_enabled = enable; +} + +static void rts5261_set_aspm(struct rtsx_pcr *pcr, bool enable) +{ + if (enable) + rts5261_enable_aspm(pcr, true); + else + rts5261_disable_aspm(pcr, false); +} + +static void rts5261_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active) +{ + struct rtsx_cr_option *option = &pcr->option; + int aspm_L1_1, aspm_L1_2; + u8 val = 0; + + aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN); + aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN); + + if (active) { + /* run, latency: 60us */ + if (aspm_L1_1) + val = option->ltr_l1off_snooze_sspwrgate; + } else { + /* l1off, latency: 300us */ + if (aspm_L1_2) + val = option->ltr_l1off_sspwrgate; + } + + rtsx_set_l1off_sub(pcr, val); +} + +static const struct pcr_ops rts5261_pcr_ops = { + .fetch_vendor_settings = rtsx5261_fetch_vendor_settings, + .turn_on_led = rts5261_turn_on_led, + .turn_off_led = rts5261_turn_off_led, + .extra_init_hw = rts5261_extra_init_hw, + .enable_auto_blink = rts5261_enable_auto_blink, + .disable_auto_blink = rts5261_disable_auto_blink, + .card_power_on = rts5261_card_power_on, + .card_power_off = rts5261_card_power_off, + .switch_output_voltage = rts5261_switch_output_voltage, + .force_power_down = rts5261_force_power_down, + .stop_cmd = rts5261_stop_cmd, + .set_aspm = rts5261_set_aspm, + .set_l1off_cfg_sub_d0 = rts5261_set_l1off_cfg_sub_d0, + .enable_ocp = rts5261_enable_ocp, + .disable_ocp = rts5261_disable_ocp, + .init_ocp = rts5261_init_ocp, + .process_ocp = rts5261_process_ocp, + .clear_ocpstat = rts5261_clear_ocpstat, +}; + +static inline u8 double_ssc_depth(u8 depth) +{ + return ((depth > 1) ? (depth - 1) : depth); +} + +int rts5261_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk) +{ + int err, clk; + u8 n, clk_divider, mcu_cnt, div; + static const u8 depth[] = { + [RTSX_SSC_DEPTH_4M] = RTS5261_SSC_DEPTH_4M, + [RTSX_SSC_DEPTH_2M] = RTS5261_SSC_DEPTH_2M, + [RTSX_SSC_DEPTH_1M] = RTS5261_SSC_DEPTH_1M, + [RTSX_SSC_DEPTH_500K] = RTS5261_SSC_DEPTH_512K, + }; + + if (initial_mode) { + /* We use 250k(around) here, in initial stage */ + clk_divider = SD_CLK_DIVIDE_128; + card_clock = 30000000; + } else { + clk_divider = SD_CLK_DIVIDE_0; + } + err = rtsx_pci_write_register(pcr, SD_CFG1, + SD_CLK_DIVIDE_MASK, clk_divider); + if (err < 0) + return err; + + card_clock /= 1000000; + pcr_dbg(pcr, "Switch card clock to %dMHz\n", card_clock); + + clk = card_clock; + if (!initial_mode && double_clk) + clk = card_clock * 2; + pcr_dbg(pcr, "Internal SSC clock: %dMHz (cur_clock = %d)\n", + clk, pcr->cur_clock); + + if (clk == pcr->cur_clock) + return 0; + + if (pcr->ops->conv_clk_and_div_n) + n = (u8)pcr->ops->conv_clk_and_div_n(clk, CLK_TO_DIV_N); + else + n = (u8)(clk - 4); + if ((clk <= 4) || (n > 396)) + return -EINVAL; + + mcu_cnt = (u8)(125/clk + 3); + if (mcu_cnt > 15) + mcu_cnt = 15; + + div = CLK_DIV_1; + while ((n < MIN_DIV_N_PCR - 4) && (div < CLK_DIV_8)) { + if (pcr->ops->conv_clk_and_div_n) { + int dbl_clk = pcr->ops->conv_clk_and_div_n(n, + DIV_N_TO_CLK) * 2; + n = (u8)pcr->ops->conv_clk_and_div_n(dbl_clk, + CLK_TO_DIV_N); + } else { + n = (n + 4) * 2 - 4; + } + div++; + } + + n = (n / 2); + pcr_dbg(pcr, "n = %d, div = %d\n", n, div); + + ssc_depth = depth[ssc_depth]; + if (double_clk) + ssc_depth = double_ssc_depth(ssc_depth); + + if (ssc_depth) { + if (div == CLK_DIV_2) { + if (ssc_depth > 1) + ssc_depth -= 1; + else + ssc_depth = RTS5261_SSC_DEPTH_8M; + } else if (div == CLK_DIV_4) { + if (ssc_depth > 2) + ssc_depth -= 2; + else + ssc_depth = RTS5261_SSC_DEPTH_8M; + } else if (div == CLK_DIV_8) { + if (ssc_depth > 3) + ssc_depth -= 3; + else + ssc_depth = RTS5261_SSC_DEPTH_8M; + } + } else { + ssc_depth = 0; + } + pcr_dbg(pcr, "ssc_depth = %d\n", ssc_depth); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL, + CLK_LOW_FREQ, CLK_LOW_FREQ); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, + 0xFF, (div << 4) | mcu_cnt); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, + SSC_DEPTH_MASK, ssc_depth); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB); + if (vpclk) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK1_CTL, + PHASE_NOT_RESET, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, PHASE_NOT_RESET); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK1_CTL, + PHASE_NOT_RESET, PHASE_NOT_RESET); + } + + err = rtsx_pci_send_cmd(pcr, 2000); + if (err < 0) + return err; + + /* Wait SSC clock stable */ + udelay(SSC_CLOCK_STABLE_WAIT); + err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0); + if (err < 0) + return err; + + pcr->cur_clock = clk; + return 0; + +} + +void rts5261_init_params(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + struct rtsx_hw_param *hw_param = &pcr->hw_param; + + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 1; + pcr->ops = &rts5261_pcr_ops; + + pcr->flags = 0; + pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 27, 16); + pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); + + pcr->ic_version = rts5261_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5261_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5261_sd_pull_ctl_disable_tbl; + + pcr->reg_pm_ctrl3 = RTS5261_AUTOLOAD_CFG3; + + option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN + | LTR_L1SS_PWR_GATE_EN); + option->ltr_en = true; + + /* init latency of active, idle, L1OFF to 60us, 300us, 3ms */ + option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF; + option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF; + option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF; + option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF; + option->ltr_l1off_sspwrgate = 0x7F; + option->ltr_l1off_snooze_sspwrgate = 0x78; + option->dev_aspm_mode = DEV_ASPM_DYNAMIC; + + option->ocp_en = 1; + hw_param->interrupt_en |= SD_OC_INT_EN; + hw_param->ocp_glitch = SD_OCP_GLITCH_800U; + option->sd_800mA_ocp_thd = RTS5261_LDO1_OCP_THD_1040; +} diff --git a/drivers/misc/cardreader/rts5261.h b/drivers/misc/cardreader/rts5261.h new file mode 100644 index 000000000000..ebfdd236a553 --- /dev/null +++ b/drivers/misc/cardreader/rts5261.h @@ -0,0 +1,233 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2018-2019 Realtek Semiconductor Corp. All rights reserved. + * + * Author: + * Rui FENG <rui_feng@realsil.com.cn> + * Wei WANG <wei_wang@realsil.com.cn> + */ +#ifndef RTS5261_H +#define RTS5261_H + +/*New add*/ +#define rts5261_vendor_setting_valid(reg) ((reg) & 0x010000) +#define rts5261_reg_to_aspm(reg) (((reg) >> 28) ^ 0x03) +#define rts5261_reg_check_reverse_socket(reg) ((reg) & 0x04) +#define rts5261_reg_to_card_drive_sel(reg) ((((reg) >> 6) & 0x01) << 6) +#define rts5261_reg_to_sd30_drive_sel_1v8(reg) (((reg) >> 22) ^ 0x03) +#define rts5261_reg_to_sd30_drive_sel_3v3(reg) (((reg) >> 16) ^ 0x03) + + +#define RTS5261_AUTOLOAD_CFG0 0xFF7B +#define RTS5261_AUTOLOAD_CFG1 0xFF7C +#define RTS5261_AUTOLOAD_CFG2 0xFF7D +#define RTS5261_AUTOLOAD_CFG3 0xFF7E +#define RTS5261_AUTOLOAD_CFG4 0xFF7F +#define RTS5261_FORCE_PRSNT_LOW (1 << 6) +#define RTS5261_AUX_CLK_16M_EN (1 << 5) + +#define RTS5261_REG_VREF 0xFE97 +#define RTS5261_PWD_SUSPND_EN (1 << 4) + +#define RTS5261_PAD_H3L1 0xFF79 +#define PAD_GPIO_H3L1 (1 << 3) + +/* SSC_CTL2 0xFC12 */ +#define RTS5261_SSC_DEPTH_MASK 0x07 +#define RTS5261_SSC_DEPTH_DISALBE 0x00 +#define RTS5261_SSC_DEPTH_8M 0x01 +#define RTS5261_SSC_DEPTH_4M 0x02 +#define RTS5261_SSC_DEPTH_2M 0x03 +#define RTS5261_SSC_DEPTH_1M 0x04 +#define RTS5261_SSC_DEPTH_512K 0x05 +#define RTS5261_SSC_DEPTH_256K 0x06 +#define RTS5261_SSC_DEPTH_128K 0x07 + +/* efuse control register*/ +#define RTS5261_EFUSE_CTL 0xFC30 +#define RTS5261_EFUSE_ENABLE 0x80 +/* EFUSE_MODE: 0=READ 1=PROGRAM */ +#define RTS5261_EFUSE_MODE_MASK 0x40 +#define RTS5261_EFUSE_PROGRAM 0x40 + +#define RTS5261_EFUSE_ADDR 0xFC31 +#define RTS5261_EFUSE_ADDR_MASK 0x3F + +#define RTS5261_EFUSE_WRITE_DATA 0xFC32 +#define RTS5261_EFUSE_READ_DATA 0xFC34 + +/* DMACTL 0xFE2C */ +#define RTS5261_DMA_PACK_SIZE_MASK 0xF0 + +/* FW config info register */ +#define RTS5261_FW_CFG_INFO0 0xFF50 +#define RTS5261_FW_EXPRESS_TEST_MASK (0x01<<0) +#define RTS5261_FW_EA_MODE_MASK (0x01<<5) + +/* FW config register */ +#define RTS5261_FW_CFG0 0xFF54 +#define RTS5261_FW_ENTER_EXPRESS (0x01<<0) + +#define RTS5261_FW_CFG1 0xFF55 +#define RTS5261_SYS_CLK_SEL_MCU_CLK (0x01<<7) +#define RTS5261_CRC_CLK_SEL_MCU_CLK (0x01<<6) +#define RTS5261_FAKE_MCU_CLOCK_GATING (0x01<<5) +/*MCU_bus_mode_sel: 0=real 8051 1=fake mcu*/ +#define RTS5261_MCU_BUS_SEL_MASK (0x01<<4) +/*MCU_clock_sel:VerA 00=aux16M 01=aux400K 1x=REFCLK100M*/ +/*MCU_clock_sel:VerB 00=aux400K 01=aux16M 10=REFCLK100M*/ +#define RTS5261_MCU_CLOCK_SEL_MASK (0x03<<2) +#define RTS5261_MCU_CLOCK_SEL_16M (0x01<<2) +#define RTS5261_MCU_CLOCK_GATING (0x01<<1) +#define RTS5261_DRIVER_ENABLE_FW (0x01<<0) + +/* FW status register */ +#define RTS5261_FW_STATUS 0xFF56 +#define RTS5261_EXPRESS_LINK_FAIL_MASK (0x01<<7) + +/* FW control register */ +#define RTS5261_FW_CTL 0xFF5F +#define RTS5261_INFORM_RTD3_COLD (0x01<<5) + +#define RTS5261_REG_FPDCTL 0xFF60 + +#define RTS5261_REG_LDO12_CFG 0xFF6E +#define RTS5261_LDO12_VO_TUNE_MASK (0x07<<1) +#define RTS5261_LDO12_115 (0x03<<1) +#define RTS5261_LDO12_120 (0x04<<1) +#define RTS5261_LDO12_125 (0x05<<1) +#define RTS5261_LDO12_130 (0x06<<1) +#define RTS5261_LDO12_135 (0x07<<1) + +/* LDO control register */ +#define RTS5261_CARD_PWR_CTL 0xFD50 +#define RTS5261_SD_CLK_ISO (0x01<<7) +#define RTS5261_PAD_SD_DAT_FW_CTRL (0x01<<6) +#define RTS5261_PUPDC (0x01<<5) +#define RTS5261_SD_CMD_ISO (0x01<<4) +#define RTS5261_SD_DAT_ISO_MASK (0x0F<<0) + +#define RTS5261_LDO1233318_POW_CTL 0xFF70 +#define RTS5261_LDO3318_POWERON (0x01<<3) +#define RTS5261_LDO3_POWERON (0x01<<2) +#define RTS5261_LDO2_POWERON (0x01<<1) +#define RTS5261_LDO1_POWERON (0x01<<0) +#define RTS5261_LDO_POWERON_MASK (0x0F<<0) + +#define RTS5261_DV3318_CFG 0xFF71 +#define RTS5261_DV3318_TUNE_MASK (0x07<<4) +#define RTS5261_DV3318_18 (0x02<<4) +#define RTS5261_DV3318_19 (0x04<<4) +#define RTS5261_DV3318_33 (0x07<<4) + +#define RTS5261_LDO1_CFG0 0xFF72 +#define RTS5261_LDO1_OCP_THD_MASK (0x07<<5) +#define RTS5261_LDO1_OCP_EN (0x01<<4) +#define RTS5261_LDO1_OCP_LMT_THD_MASK (0x03<<2) +#define RTS5261_LDO1_OCP_LMT_EN (0x01<<1) + +/* CRD6603-433 190319 request changed */ +#define RTS5261_LDO1_OCP_THD_740 (0x00<<5) +#define RTS5261_LDO1_OCP_THD_800 (0x01<<5) +#define RTS5261_LDO1_OCP_THD_860 (0x02<<5) +#define RTS5261_LDO1_OCP_THD_920 (0x03<<5) +#define RTS5261_LDO1_OCP_THD_980 (0x04<<5) +#define RTS5261_LDO1_OCP_THD_1040 (0x05<<5) +#define RTS5261_LDO1_OCP_THD_1100 (0x06<<5) +#define RTS5261_LDO1_OCP_THD_1160 (0x07<<5) + +#define RTS5261_LDO1_LMT_THD_450 (0x00<<2) +#define RTS5261_LDO1_LMT_THD_1000 (0x01<<2) +#define RTS5261_LDO1_LMT_THD_1500 (0x02<<2) +#define RTS5261_LDO1_LMT_THD_2000 (0x03<<2) + +#define RTS5261_LDO1_CFG1 0xFF73 +#define RTS5261_LDO1_TUNE_MASK (0x07<<1) +#define RTS5261_LDO1_18 (0x05<<1) +#define RTS5261_LDO1_33 (0x07<<1) +#define RTS5261_LDO1_PWD_MASK (0x01<<0) + +#define RTS5261_LDO2_CFG0 0xFF74 +#define RTS5261_LDO2_OCP_THD_MASK (0x07<<5) +#define RTS5261_LDO2_OCP_EN (0x01<<4) +#define RTS5261_LDO2_OCP_LMT_THD_MASK (0x03<<2) +#define RTS5261_LDO2_OCP_LMT_EN (0x01<<1) + +#define RTS5261_LDO2_OCP_THD_620 (0x00<<5) +#define RTS5261_LDO2_OCP_THD_650 (0x01<<5) +#define RTS5261_LDO2_OCP_THD_680 (0x02<<5) +#define RTS5261_LDO2_OCP_THD_720 (0x03<<5) +#define RTS5261_LDO2_OCP_THD_750 (0x04<<5) +#define RTS5261_LDO2_OCP_THD_780 (0x05<<5) +#define RTS5261_LDO2_OCP_THD_810 (0x06<<5) +#define RTS5261_LDO2_OCP_THD_840 (0x07<<5) + +#define RTS5261_LDO2_CFG1 0xFF75 +#define RTS5261_LDO2_TUNE_MASK (0x07<<1) +#define RTS5261_LDO2_18 (0x05<<1) +#define RTS5261_LDO2_33 (0x07<<1) +#define RTS5261_LDO2_PWD_MASK (0x01<<0) + +#define RTS5261_LDO3_CFG0 0xFF76 +#define RTS5261_LDO3_OCP_THD_MASK (0x07<<5) +#define RTS5261_LDO3_OCP_EN (0x01<<4) +#define RTS5261_LDO3_OCP_LMT_THD_MASK (0x03<<2) +#define RTS5261_LDO3_OCP_LMT_EN (0x01<<1) + +#define RTS5261_LDO3_OCP_THD_620 (0x00<<5) +#define RTS5261_LDO3_OCP_THD_650 (0x01<<5) +#define RTS5261_LDO3_OCP_THD_680 (0x02<<5) +#define RTS5261_LDO3_OCP_THD_720 (0x03<<5) +#define RTS5261_LDO3_OCP_THD_750 (0x04<<5) +#define RTS5261_LDO3_OCP_THD_780 (0x05<<5) +#define RTS5261_LDO3_OCP_THD_810 (0x06<<5) +#define RTS5261_LDO3_OCP_THD_840 (0x07<<5) + +#define RTS5261_LDO3_CFG1 0xFF77 +#define RTS5261_LDO3_TUNE_MASK (0x07<<1) +#define RTS5261_LDO3_18 (0x05<<1) +#define RTS5261_LDO3_33 (0x07<<1) +#define RTS5261_LDO3_PWD_MASK (0x01<<0) + +#define RTS5261_REG_PME_FORCE_CTL 0xFF78 +#define FORCE_PM_CONTROL 0x20 +#define FORCE_PM_VALUE 0x10 +#define REG_EFUSE_BYPASS 0x08 +#define REG_EFUSE_POR 0x04 +#define REG_EFUSE_POWER_MASK 0x03 +#define REG_EFUSE_POWERON 0x03 +#define REG_EFUSE_POWEROFF 0x00 + + +/* Single LUN, support SD/SD EXPRESS */ +#define DEFAULT_SINGLE 0 +#define SD_LUN 1 +#define SD_EXPRESS_LUN 2 + +/* For Change_FPGA_SSCClock Function */ +#define MULTIPLY_BY_1 0x00 +#define MULTIPLY_BY_2 0x01 +#define MULTIPLY_BY_3 0x02 +#define MULTIPLY_BY_4 0x03 +#define MULTIPLY_BY_5 0x04 +#define MULTIPLY_BY_6 0x05 +#define MULTIPLY_BY_7 0x06 +#define MULTIPLY_BY_8 0x07 +#define MULTIPLY_BY_9 0x08 +#define MULTIPLY_BY_10 0x09 + +#define DIVIDE_BY_2 0x01 +#define DIVIDE_BY_3 0x02 +#define DIVIDE_BY_4 0x03 +#define DIVIDE_BY_5 0x04 +#define DIVIDE_BY_6 0x05 +#define DIVIDE_BY_7 0x06 +#define DIVIDE_BY_8 0x07 +#define DIVIDE_BY_9 0x08 +#define DIVIDE_BY_10 0x09 + +int rts5261_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk); + +#endif /* RTS5261_H */ diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index b4a66b64f742..fd7b2167103d 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -22,6 +22,7 @@ #include <asm/unaligned.h> #include "rtsx_pcr.h" +#include "rts5261.h" static bool msi_en = true; module_param(msi_en, bool, S_IRUGO | S_IWUSR); @@ -34,9 +35,6 @@ static struct mfd_cell rtsx_pcr_cells[] = { [RTSX_SD_CARD] = { .name = DRV_NAME_RTSX_PCI_SDMMC, }, - [RTSX_MS_CARD] = { - .name = DRV_NAME_RTSX_PCI_MS, - }, }; static const struct pci_device_id rtsx_pci_ids[] = { @@ -51,6 +49,7 @@ static const struct pci_device_id rtsx_pci_ids[] = { { PCI_DEVICE(0x10EC, 0x524A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { PCI_DEVICE(0x10EC, 0x525A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { PCI_DEVICE(0x10EC, 0x5260), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5261), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { 0, } }; @@ -438,8 +437,16 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr, if (end) option |= RTSX_SG_END; - val = ((u64)addr << 32) | ((u64)len << 12) | option; + if (PCI_PID(pcr) == PID_5261) { + if (len > 0xFFFF) + val = ((u64)addr << 32) | (((u64)len & 0xFFFF) << 16) + | (((u64)len >> 16) << 6) | option; + else + val = ((u64)addr << 32) | ((u64)len << 16) | option; + } else { + val = ((u64)addr << 32) | ((u64)len << 12) | option; + } put_unaligned_le64(val, ptr); pcr->sgi++; } @@ -684,7 +691,6 @@ int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card) else return -EINVAL; - return rtsx_pci_set_pull_ctl(pcr, tbl); } EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_disable); @@ -735,6 +741,10 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, [RTSX_SSC_DEPTH_250K] = SSC_DEPTH_250K, }; + if (PCI_PID(pcr) == PID_5261) + return rts5261_pci_switch_clock(pcr, card_clock, + ssc_depth, initial_mode, double_clk, vpclk); + if (initial_mode) { /* We use 250k(around) here, in initial stage */ clk_divider = SD_CLK_DIVIDE_128; @@ -1253,7 +1263,15 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) rtsx_pci_enable_bus_int(pcr); /* Power on SSC */ - err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0); + if (PCI_PID(pcr) == PID_5261) { + /* Gating real mcu clock */ + err = rtsx_pci_write_register(pcr, RTS5261_FW_CFG1, + RTS5261_MCU_CLOCK_GATING, 0); + err = rtsx_pci_write_register(pcr, RTS5261_REG_FPDCTL, + SSC_POWER_DOWN, 0); + } else { + err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0); + } if (err < 0) return err; @@ -1283,7 +1301,12 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) /* Enable SSC Clock */ rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, 0xFF, SSC_8X_EN | SSC_SEL_4M); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12); + if (PCI_PID(pcr) == PID_5261) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, + RTS5261_SSC_DEPTH_2M); + else + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12); + /* Disable cd_pwr_save */ rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x16, 0x10); /* Clear Link Ready Interrupt */ @@ -1314,6 +1337,7 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) case PID_524A: case PID_525A: case PID_5260: + case PID_5261: rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 1, 1); break; default: @@ -1393,9 +1417,14 @@ static int rtsx_pci_init_chip(struct rtsx_pcr *pcr) case 0x5286: rtl8402_init_params(pcr); break; + case 0x5260: rts5260_init_params(pcr); break; + + case 0x5261: + rts5261_init_params(pcr); + break; } pcr_dbg(pcr, "PID: 0x%04x, IC version: 0x%02x\n", diff --git a/drivers/misc/cardreader/rtsx_pcr.h b/drivers/misc/cardreader/rtsx_pcr.h index 98f729263dc1..ed391df52f4f 100644 --- a/drivers/misc/cardreader/rtsx_pcr.h +++ b/drivers/misc/cardreader/rtsx_pcr.h @@ -53,6 +53,7 @@ void rts524a_init_params(struct rtsx_pcr *pcr); void rts525a_init_params(struct rtsx_pcr *pcr); void rtl8411b_init_params(struct rtsx_pcr *pcr); void rts5260_init_params(struct rtsx_pcr *pcr); +void rts5261_init_params(struct rtsx_pcr *pcr); static inline u8 map_sd_drive(int idx) { diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c index 2cfe3d4ae144..226b5efa6a77 100644 --- a/drivers/misc/eeprom/eeprom.c +++ b/drivers/misc/eeprom/eeprom.c @@ -175,6 +175,10 @@ static int eeprom_probe(struct i2c_client *client, } } + /* Let the users know they are using deprecated driver */ + dev_notice(&client->dev, + "eeprom driver is deprecated, please use at24 instead\n"); + /* create the sysfs eeprom file */ return sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr); } diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 1b1a794d639d..ae4ee27a63c4 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -32,8 +32,9 @@ #define FASTRPC_CTX_MAX (256) #define FASTRPC_INIT_HANDLE 1 #define FASTRPC_CTXID_MASK (0xFF0) -#define INIT_FILELEN_MAX (64 * 1024 * 1024) +#define INIT_FILELEN_MAX (2 * 1024 * 1024) #define FASTRPC_DEVICE_NAME "fastrpc" +#define ADSP_MMAP_ADD_PAGES 0x1000 /* Retrives number of input buffers from the scalars parameter */ #define REMOTE_SCALARS_INBUFS(sc) (((sc) >> 16) & 0x0ff) @@ -66,6 +67,8 @@ /* Remote Method id table */ #define FASTRPC_RMID_INIT_ATTACH 0 #define FASTRPC_RMID_INIT_RELEASE 1 +#define FASTRPC_RMID_INIT_MMAP 4 +#define FASTRPC_RMID_INIT_MUNMAP 5 #define FASTRPC_RMID_INIT_CREATE 6 #define FASTRPC_RMID_INIT_CREATE_ATTR 7 #define FASTRPC_RMID_INIT_CREATE_STATIC 8 @@ -89,6 +92,23 @@ struct fastrpc_remote_arg { u64 len; }; +struct fastrpc_mmap_rsp_msg { + u64 vaddr; +}; + +struct fastrpc_mmap_req_msg { + s32 pgid; + u32 flags; + u64 vaddr; + s32 num; +}; + +struct fastrpc_munmap_req_msg { + s32 pgid; + u64 vaddr; + u64 size; +}; + struct fastrpc_msg { int pid; /* process group id */ int tid; /* thread id */ @@ -123,6 +143,9 @@ struct fastrpc_buf { /* Lock for dma buf attachments */ struct mutex lock; struct list_head attachments; + /* mmap support */ + struct list_head node; /* list of user requested mmaps */ + uintptr_t raddr; }; struct fastrpc_dma_buf_attachment { @@ -192,6 +215,7 @@ struct fastrpc_user { struct list_head user; struct list_head maps; struct list_head pending; + struct list_head mmaps; struct fastrpc_channel_ctx *cctx; struct fastrpc_session_ctx *sctx; @@ -269,6 +293,7 @@ static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev, return -ENOMEM; INIT_LIST_HEAD(&buf->attachments); + INIT_LIST_HEAD(&buf->node); mutex_init(&buf->lock); buf->fl = fl; @@ -276,6 +301,7 @@ static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev, buf->phys = 0; buf->size = size; buf->dev = dev; + buf->raddr = 0; buf->virt = dma_alloc_coherent(dev, buf->size, (dma_addr_t *)&buf->phys, GFP_KERNEL); @@ -934,8 +960,13 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, if (err) goto bail; - /* Wait for remote dsp to respond or time out */ - err = wait_for_completion_interruptible(&ctx->work); + if (kernel) { + if (!wait_for_completion_timeout(&ctx->work, 10 * HZ)) + err = -ETIMEDOUT; + } else { + err = wait_for_completion_interruptible(&ctx->work); + } + if (err) goto bail; @@ -954,12 +985,13 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, } bail: - /* We are done with this compute context, remove it from pending list */ - spin_lock(&fl->lock); - list_del(&ctx->node); - spin_unlock(&fl->lock); - fastrpc_context_put(ctx); - + if (err != -ERESTARTSYS && err != -ETIMEDOUT) { + /* We are done with this compute context */ + spin_lock(&fl->lock); + list_del(&ctx->node); + spin_unlock(&fl->lock); + fastrpc_context_put(ctx); + } if (err) dev_dbg(fl->sctx->dev, "Error: Invoke Failed %d\n", err); @@ -1131,6 +1163,7 @@ static int fastrpc_device_release(struct inode *inode, struct file *file) struct fastrpc_channel_ctx *cctx = fl->cctx; struct fastrpc_invoke_ctx *ctx, *n; struct fastrpc_map *map, *m; + struct fastrpc_buf *buf, *b; unsigned long flags; fastrpc_release_current_dsp_process(fl); @@ -1152,6 +1185,11 @@ static int fastrpc_device_release(struct inode *inode, struct file *file) fastrpc_map_put(map); } + list_for_each_entry_safe(buf, b, &fl->mmaps, node) { + list_del(&buf->node); + fastrpc_buf_free(buf); + } + fastrpc_session_free(cctx, fl->sctx); fastrpc_channel_ctx_put(cctx); @@ -1180,6 +1218,7 @@ static int fastrpc_device_open(struct inode *inode, struct file *filp) mutex_init(&fl->mutex); INIT_LIST_HEAD(&fl->pending); INIT_LIST_HEAD(&fl->maps); + INIT_LIST_HEAD(&fl->mmaps); INIT_LIST_HEAD(&fl->user); fl->tgid = current->tgid; fl->cctx = cctx; @@ -1285,6 +1324,148 @@ static int fastrpc_invoke(struct fastrpc_user *fl, char __user *argp) return err; } +static int fastrpc_req_munmap_impl(struct fastrpc_user *fl, + struct fastrpc_req_munmap *req) +{ + struct fastrpc_invoke_args args[1] = { [0] = { 0 } }; + struct fastrpc_buf *buf, *b; + struct fastrpc_munmap_req_msg req_msg; + struct device *dev = fl->sctx->dev; + int err; + u32 sc; + + spin_lock(&fl->lock); + list_for_each_entry_safe(buf, b, &fl->mmaps, node) { + if ((buf->raddr == req->vaddrout) && (buf->size == req->size)) + break; + buf = NULL; + } + spin_unlock(&fl->lock); + + if (!buf) { + dev_err(dev, "mmap not in list\n"); + return -EINVAL; + } + + req_msg.pgid = fl->tgid; + req_msg.size = buf->size; + req_msg.vaddr = buf->raddr; + + args[0].ptr = (u64) (uintptr_t) &req_msg; + args[0].length = sizeof(req_msg); + + sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MUNMAP, 1, 0); + err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, + &args[0]); + if (!err) { + dev_dbg(dev, "unmmap\tpt 0x%09lx OK\n", buf->raddr); + spin_lock(&fl->lock); + list_del(&buf->node); + spin_unlock(&fl->lock); + fastrpc_buf_free(buf); + } else { + dev_err(dev, "unmmap\tpt 0x%09lx ERROR\n", buf->raddr); + } + + return err; +} + +static int fastrpc_req_munmap(struct fastrpc_user *fl, char __user *argp) +{ + struct fastrpc_req_munmap req; + + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + + return fastrpc_req_munmap_impl(fl, &req); +} + +static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp) +{ + struct fastrpc_invoke_args args[3] = { [0 ... 2] = { 0 } }; + struct fastrpc_buf *buf = NULL; + struct fastrpc_mmap_req_msg req_msg; + struct fastrpc_mmap_rsp_msg rsp_msg; + struct fastrpc_req_munmap req_unmap; + struct fastrpc_phy_page pages; + struct fastrpc_req_mmap req; + struct device *dev = fl->sctx->dev; + int err; + u32 sc; + + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + + if (req.flags != ADSP_MMAP_ADD_PAGES) { + dev_err(dev, "flag not supported 0x%x\n", req.flags); + return -EINVAL; + } + + if (req.vaddrin) { + dev_err(dev, "adding user allocated pages is not supported\n"); + return -EINVAL; + } + + err = fastrpc_buf_alloc(fl, fl->sctx->dev, req.size, &buf); + if (err) { + dev_err(dev, "failed to allocate buffer\n"); + return err; + } + + req_msg.pgid = fl->tgid; + req_msg.flags = req.flags; + req_msg.vaddr = req.vaddrin; + req_msg.num = sizeof(pages); + + args[0].ptr = (u64) (uintptr_t) &req_msg; + args[0].length = sizeof(req_msg); + + pages.addr = buf->phys; + pages.size = buf->size; + + args[1].ptr = (u64) (uintptr_t) &pages; + args[1].length = sizeof(pages); + + args[2].ptr = (u64) (uintptr_t) &rsp_msg; + args[2].length = sizeof(rsp_msg); + + sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MMAP, 2, 1); + err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, + &args[0]); + if (err) { + dev_err(dev, "mmap error (len 0x%08llx)\n", buf->size); + goto err_invoke; + } + + /* update the buffer to be able to deallocate the memory on the DSP */ + buf->raddr = (uintptr_t) rsp_msg.vaddr; + + /* let the client know the address to use */ + req.vaddrout = rsp_msg.vaddr; + + spin_lock(&fl->lock); + list_add_tail(&buf->node, &fl->mmaps); + spin_unlock(&fl->lock); + + if (copy_to_user((void __user *)argp, &req, sizeof(req))) { + /* unmap the memory and release the buffer */ + req_unmap.vaddrout = buf->raddr; + req_unmap.size = buf->size; + fastrpc_req_munmap_impl(fl, &req_unmap); + return -EFAULT; + } + + dev_dbg(dev, "mmap\t\tpt 0x%09lx OK [len 0x%08llx]\n", + buf->raddr, buf->size); + + return 0; + +err_invoke: + fastrpc_buf_free(buf); + + return err; +} + static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1305,6 +1486,12 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, case FASTRPC_IOCTL_ALLOC_DMA_BUFF: err = fastrpc_dmabuf_alloc(fl, argp); break; + case FASTRPC_IOCTL_MMAP: + err = fastrpc_req_mmap(fl, argp); + break; + case FASTRPC_IOCTL_MUNMAP: + err = fastrpc_req_munmap(fl, argp); + break; default: err = -ENOTTY; break; @@ -1430,8 +1617,8 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev) return -ENOMEM; data->miscdev.minor = MISC_DYNAMIC_MINOR; - data->miscdev.name = kasprintf(GFP_KERNEL, "fastrpc-%s", - domains[domain_id]); + data->miscdev.name = devm_kasprintf(rdev, GFP_KERNEL, "fastrpc-%s", + domains[domain_id]); data->miscdev.fops = &fastrpc_fops; err = misc_register(&data->miscdev); if (err) diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index a9ac045dcfde..8850f475a413 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -65,6 +65,18 @@ static void cs_put(struct hl_cs *cs) kref_put(&cs->refcount, cs_do_release); } +static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) +{ + /* + * Patched CB is created for external queues jobs, and for H/W queues + * jobs if the user CB was allocated by driver and MMU is disabled. + */ + return (job->queue_type == QUEUE_TYPE_EXT || + (job->queue_type == QUEUE_TYPE_HW && + job->is_kernel_allocated_cb && + !hdev->mmu_enable)); +} + /* * cs_parser - parse the user command submission * @@ -91,11 +103,13 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) parser.patched_cb = NULL; parser.user_cb = job->user_cb; parser.user_cb_size = job->user_cb_size; - parser.ext_queue = job->ext_queue; + parser.queue_type = job->queue_type; + parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; job->patched_cb = NULL; rc = hdev->asic_funcs->cs_parser(hdev, &parser); - if (job->ext_queue) { + + if (is_cb_patched(hdev, job)) { if (!rc) { job->patched_cb = parser.patched_cb; job->job_cb_size = parser.patched_cb_size; @@ -124,7 +138,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) { struct hl_cs *cs = job->cs; - if (job->ext_queue) { + if (is_cb_patched(hdev, job)) { hl_userptr_delete_list(hdev, &job->userptr_list); /* @@ -140,6 +154,19 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) } } + /* For H/W queue jobs, if a user CB was allocated by driver and MMU is + * enabled, the user CB isn't released in cs_parser() and thus should be + * released here. + */ + if (job->queue_type == QUEUE_TYPE_HW && + job->is_kernel_allocated_cb && hdev->mmu_enable) { + spin_lock(&job->user_cb->lock); + job->user_cb->cs_cnt--; + spin_unlock(&job->user_cb->lock); + + hl_cb_put(job->user_cb); + } + /* * This is the only place where there can be multiple threads * modifying the list at the same time @@ -150,7 +177,8 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) hl_debugfs_remove_job(hdev, job); - if (job->ext_queue) + if (job->queue_type == QUEUE_TYPE_EXT || + job->queue_type == QUEUE_TYPE_HW) cs_put(cs); kfree(job); @@ -387,18 +415,13 @@ static void job_wq_completion(struct work_struct *work) free_job(hdev, job); } -static struct hl_cb *validate_queue_index(struct hl_device *hdev, - struct hl_cb_mgr *cb_mgr, - struct hl_cs_chunk *chunk, - bool *ext_queue) +static int validate_queue_index(struct hl_device *hdev, + struct hl_cs_chunk *chunk, + enum hl_queue_type *queue_type, + bool *is_kernel_allocated_cb) { struct asic_fixed_properties *asic = &hdev->asic_prop; struct hw_queue_properties *hw_queue_prop; - u32 cb_handle; - struct hl_cb *cb; - - /* Assume external queue */ - *ext_queue = true; hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; @@ -406,20 +429,29 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev, (hw_queue_prop->type == QUEUE_TYPE_NA)) { dev_err(hdev->dev, "Queue index %d is invalid\n", chunk->queue_index); - return NULL; + return -EINVAL; } if (hw_queue_prop->driver_only) { dev_err(hdev->dev, "Queue index %d is restricted for the kernel driver\n", chunk->queue_index); - return NULL; - } else if (hw_queue_prop->type == QUEUE_TYPE_INT) { - *ext_queue = false; - return (struct hl_cb *) (uintptr_t) chunk->cb_handle; + return -EINVAL; } - /* Retrieve CB object */ + *queue_type = hw_queue_prop->type; + *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb; + + return 0; +} + +static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, + struct hl_cb_mgr *cb_mgr, + struct hl_cs_chunk *chunk) +{ + struct hl_cb *cb; + u32 cb_handle; + cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); cb = hl_cb_get(hdev, cb_mgr, cb_handle); @@ -444,7 +476,8 @@ release_cb: return NULL; } -struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue) +struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, + enum hl_queue_type queue_type, bool is_kernel_allocated_cb) { struct hl_cs_job *job; @@ -452,12 +485,14 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue) if (!job) return NULL; - job->ext_queue = ext_queue; + job->queue_type = queue_type; + job->is_kernel_allocated_cb = is_kernel_allocated_cb; - if (job->ext_queue) { + if (is_cb_patched(hdev, job)) INIT_LIST_HEAD(&job->userptr_list); + + if (job->queue_type == QUEUE_TYPE_EXT) INIT_WORK(&job->finish_work, job_wq_completion); - } return job; } @@ -470,7 +505,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, struct hl_cs_job *job; struct hl_cs *cs; struct hl_cb *cb; - bool ext_queue_present = false; + bool int_queues_only = true; u32 size_to_copy; int rc, i, parse_cnt; @@ -514,23 +549,33 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, /* Validate ALL the CS chunks before submitting the CS */ for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) { struct hl_cs_chunk *chunk = &cs_chunk_array[i]; - bool ext_queue; + enum hl_queue_type queue_type; + bool is_kernel_allocated_cb; + + rc = validate_queue_index(hdev, chunk, &queue_type, + &is_kernel_allocated_cb); + if (rc) + goto free_cs_object; - cb = validate_queue_index(hdev, &hpriv->cb_mgr, chunk, - &ext_queue); - if (ext_queue) { - ext_queue_present = true; + if (is_kernel_allocated_cb) { + cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); if (!cb) { rc = -EINVAL; goto free_cs_object; } + } else { + cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; } - job = hl_cs_allocate_job(hdev, ext_queue); + if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW) + int_queues_only = false; + + job = hl_cs_allocate_job(hdev, queue_type, + is_kernel_allocated_cb); if (!job) { dev_err(hdev->dev, "Failed to allocate a new job\n"); rc = -ENOMEM; - if (ext_queue) + if (is_kernel_allocated_cb) goto release_cb; else goto free_cs_object; @@ -540,7 +585,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, job->cs = cs; job->user_cb = cb; job->user_cb_size = chunk->cb_size; - if (job->ext_queue) + if (is_kernel_allocated_cb) job->job_cb_size = cb->size; else job->job_cb_size = chunk->cb_size; @@ -553,10 +598,11 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, /* * Increment CS reference. When CS reference is 0, CS is * done and can be signaled to user and free all its resources - * Only increment for JOB on external queues, because only - * for those JOBs we get completion + * Only increment for JOB on external or H/W queues, because + * only for those JOBs we get completion */ - if (job->ext_queue) + if (job->queue_type == QUEUE_TYPE_EXT || + job->queue_type == QUEUE_TYPE_HW) cs_get(cs); hl_debugfs_add_job(hdev, job); @@ -570,9 +616,9 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, } } - if (!ext_queue_present) { + if (int_queues_only) { dev_err(hdev->dev, - "Reject CS %d.%llu because no external queues jobs\n", + "Reject CS %d.%llu because only internal queues jobs are present\n", cs->ctx->asid, cs->sequence); rc = -EINVAL; goto free_cs_object; @@ -580,9 +626,10 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, rc = hl_hw_queue_schedule_cs(cs); if (rc) { - dev_err(hdev->dev, - "Failed to submit CS %d.%llu to H/W queues, error %d\n", - cs->ctx->asid, cs->sequence, rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to submit CS %d.%llu to H/W queues, error %d\n", + cs->ctx->asid, cs->sequence, rc); goto free_cs_object; } diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 87f37ac31ccd..20413e350343 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -307,45 +307,57 @@ static inline u64 get_hop0_addr(struct hl_ctx *ctx) (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); } -static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, + u64 virt_addr, u64 mask, u64 shift) { return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP0_MASK) >> HOP0_SHIFT); + ((virt_addr & mask) >> shift); } -static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) { - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP1_MASK) >> HOP1_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask, + mmu_specs->hop0_shift); } -static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) { - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP2_MASK) >> HOP2_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask, + mmu_specs->hop1_shift); } -static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) { - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP3_MASK) >> HOP3_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask, + mmu_specs->hop2_shift); } -static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) { - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP4_MASK) >> HOP4_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask, + mmu_specs->hop3_shift); +} + +static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask, + mmu_specs->hop4_shift); } static inline u64 get_next_hop_addr(u64 curr_pte) { if (curr_pte & PAGE_PRESENT_MASK) - return curr_pte & PHYS_ADDR_MASK; + return curr_pte & HOP_PHYS_ADDR_MASK; else return ULLONG_MAX; } @@ -355,7 +367,10 @@ static int mmu_show(struct seq_file *s, void *data) struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; struct hl_ctx *ctx; + bool is_dram_addr; u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0, hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0, @@ -377,33 +392,39 @@ static int mmu_show(struct seq_file *s, void *data) return 0; } + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); + + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + mutex_lock(&ctx->mmu_lock); /* the following lookup is copied from unmap() in mmu.c */ hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr); hop1_addr = get_next_hop_addr(hop0_pte); if (hop1_addr == ULLONG_MAX) goto not_mapped; - hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr); hop2_addr = get_next_hop_addr(hop1_pte); if (hop2_addr == ULLONG_MAX) goto not_mapped; - hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr); hop3_addr = get_next_hop_addr(hop2_pte); if (hop3_addr == ULLONG_MAX) goto not_mapped; - hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); if (!(hop3_pte & LAST_MASK)) { @@ -412,7 +433,8 @@ static int mmu_show(struct seq_file *s, void *data) if (hop4_addr == ULLONG_MAX) goto not_mapped; - hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr); if (!(hop4_pte & PAGE_PRESENT_MASK)) goto not_mapped; @@ -506,6 +528,12 @@ static int engines_show(struct seq_file *s, void *data) struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, + "Can't check device idle during reset\n"); + return 0; + } + hdev->asic_funcs->is_device_idle(hdev, NULL, s); return 0; @@ -534,41 +562,50 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u64 *phys_addr) { struct hl_ctx *ctx = hdev->compute_ctx; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 hop_addr, hop_pte_addr, hop_pte; - u64 offset_mask = HOP4_MASK | OFFSET_MASK; + u64 offset_mask = HOP4_MASK | FLAGS_MASK; int rc = 0; + bool is_dram_addr; if (!ctx) { dev_err(hdev->dev, "no ctx available\n"); return -EINVAL; } + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); + + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + mutex_lock(&ctx->mmu_lock); /* hop 0 */ hop_addr = get_hop0_addr(ctx); - hop_pte_addr = get_hop0_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); /* hop 1 */ hop_addr = get_next_hop_addr(hop_pte); if (hop_addr == ULLONG_MAX) goto not_mapped; - hop_pte_addr = get_hop1_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); /* hop 2 */ hop_addr = get_next_hop_addr(hop_pte); if (hop_addr == ULLONG_MAX) goto not_mapped; - hop_pte_addr = get_hop2_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); /* hop 3 */ hop_addr = get_next_hop_addr(hop_pte); if (hop_addr == ULLONG_MAX) goto not_mapped; - hop_pte_addr = get_hop3_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); if (!(hop_pte & LAST_MASK)) { @@ -576,10 +613,11 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, hop_addr = get_next_hop_addr(hop_pte); if (hop_addr == ULLONG_MAX) goto not_mapped; - hop_pte_addr = get_hop4_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr, + virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - offset_mask = OFFSET_MASK; + offset_mask = FLAGS_MASK; } if (!(hop_pte & PAGE_PRESENT_MASK)) @@ -608,6 +646,11 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, u32 val; ssize_t rc; + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); + return 0; + } + if (*ppos) return 0; @@ -637,6 +680,11 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, u32 value; ssize_t rc; + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't write during reset\n"); + return 0; + } + rc = kstrtouint_from_user(buf, count, 16, &value); if (rc) return rc; diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 459fee70a597..b155e9549076 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -42,12 +42,10 @@ static void hpriv_release(struct kref *ref) { struct hl_fpriv *hpriv; struct hl_device *hdev; - struct hl_ctx *ctx; hpriv = container_of(ref, struct hl_fpriv, refcount); hdev = hpriv->hdev; - ctx = hpriv->ctx; put_pid(hpriv->taskpid); @@ -889,13 +887,19 @@ again: /* Go over all the queues, release all CS and their jobs */ hl_cs_rollback_all(hdev); - /* Kill processes here after CS rollback. This is because the process - * can't really exit until all its CSs are done, which is what we - * do in cs rollback - */ - if (from_hard_reset_thread) + if (hard_reset) { + /* Kill processes here after CS rollback. This is because the + * process can't really exit until all its CSs are done, which + * is what we do in cs rollback + */ device_kill_open_processes(hdev); + /* Flush the Event queue workers to make sure no other thread is + * reading or writing to registers during the reset + */ + flush_workqueue(hdev->eq_wq); + } + /* Release kernel context */ if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1)) hdev->kernel_ctx = NULL; diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c index ea2ca67fbfbf..f5bd03171dac 100644 --- a/drivers/misc/habanalabs/firmware_if.c +++ b/drivers/misc/habanalabs/firmware_if.c @@ -143,10 +143,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev) sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); if (!rc) { - if (result == ARMCP_PACKET_FENCE_VAL) - dev_info(hdev->dev, - "queue test on CPU queue succeeded\n"); - else + if (result != ARMCP_PACKET_FENCE_VAL) dev_err(hdev->dev, "CPU queue test failed (0x%08lX)\n", result); } else { diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 6fba14b81f90..c8d16aa4382c 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -72,6 +72,9 @@ * */ +#define GOYA_UBOOT_FW_FILE "habanalabs/goya/goya-u-boot.bin" +#define GOYA_LINUX_FW_FILE "habanalabs/goya/goya-fit.itb" + #define GOYA_MMU_REGS_NUM 63 #define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ @@ -337,17 +340,20 @@ void goya_get_fixed_properties(struct hl_device *hdev) for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; prop->hw_queues_props[i].driver_only = 0; + prop->hw_queues_props[i].requires_kernel_cb = 1; } for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; prop->hw_queues_props[i].driver_only = 1; + prop->hw_queues_props[i].requires_kernel_cb = 0; } for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES + NUMBER_OF_INT_HW_QUEUES; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_INT; prop->hw_queues_props[i].driver_only = 0; + prop->hw_queues_props[i].requires_kernel_cb = 0; } for (; i < HL_MAX_QUEUES; i++) @@ -377,6 +383,23 @@ void goya_get_fixed_properties(struct hl_device *hdev) prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; + prop->dmmu.hop0_shift = HOP0_SHIFT; + prop->dmmu.hop1_shift = HOP1_SHIFT; + prop->dmmu.hop2_shift = HOP2_SHIFT; + prop->dmmu.hop3_shift = HOP3_SHIFT; + prop->dmmu.hop4_shift = HOP4_SHIFT; + prop->dmmu.hop0_mask = HOP0_MASK; + prop->dmmu.hop1_mask = HOP1_MASK; + prop->dmmu.hop2_mask = HOP2_MASK; + prop->dmmu.hop3_mask = HOP3_MASK; + prop->dmmu.hop4_mask = HOP4_MASK; + prop->dmmu.huge_page_size = PAGE_SIZE_2MB; + + /* No difference between PMMU and DMMU except of page size */ + memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); + prop->dmmu.page_size = PAGE_SIZE_2MB; + prop->pmmu.page_size = PAGE_SIZE_4KB; + prop->va_space_host_start_address = VA_HOST_SPACE_START; prop->va_space_host_end_address = VA_HOST_SPACE_END; prop->va_space_dram_start_address = VA_DDR_SPACE_START; @@ -393,6 +416,9 @@ void goya_get_fixed_properties(struct hl_device *hdev) prop->tpc_enabled_mask = TPC_ENABLED_MASK; prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; + + strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME, + CARD_NAME_MAX_LEN); } /* @@ -1454,6 +1480,9 @@ static void goya_init_golden_registers(struct hl_device *hdev) 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT); WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset, 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT); + + WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset, + ICACHE_FETCH_LINE_NUM, 2); } WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT); @@ -1533,7 +1562,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev) u32 mtr_base_lo, mtr_base_hi; u32 so_base_lo, so_base_hi; u32 gic_base_lo, gic_base_hi; - u64 qman_base_addr; mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); @@ -1545,9 +1573,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev) gic_base_hi = upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); - qman_base_addr = hdev->asic_prop.sram_base_address + - MME_QMAN_BASE_OFFSET; - WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo); WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi); WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo); @@ -2141,13 +2166,11 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) */ static int goya_push_uboot_to_device(struct hl_device *hdev) { - char fw_name[200]; void __iomem *dst; - snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin"); dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET; - return hl_fw_push_fw_to_device(hdev, fw_name, dst); + return hl_fw_push_fw_to_device(hdev, GOYA_UBOOT_FW_FILE, dst); } /* @@ -2160,13 +2183,11 @@ static int goya_push_uboot_to_device(struct hl_device *hdev) */ static int goya_push_linux_to_device(struct hl_device *hdev) { - char fw_name[200]; void __iomem *dst; - snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb"); dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET; - return hl_fw_push_fw_to_device(hdev, fw_name, dst); + return hl_fw_push_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst); } static int goya_pldm_init_cpu(struct hl_device *hdev) @@ -2291,6 +2312,10 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) 10000, cpu_timeout); + /* Read U-Boot version now in case we will later fail */ + goya_read_device_fw_version(hdev, FW_COMP_UBOOT); + goya_read_device_fw_version(hdev, FW_COMP_PREBOOT); + if (rc) { dev_err(hdev->dev, "Error in ARM u-boot!"); switch (status) { @@ -2328,6 +2353,11 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) "ARM status %d - u-boot stopped by user\n", status); break; + case CPU_BOOT_STATUS_TS_INIT_FAIL: + dev_err(hdev->dev, + "ARM status %d - Thermal Sensor initialization failed\n", + status); + break; default: dev_err(hdev->dev, "ARM status %d - Invalid status code\n", @@ -2337,10 +2367,6 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) return -EIO; } - /* Read U-Boot version now in case we will later fail */ - goya_read_device_fw_version(hdev, FW_COMP_UBOOT); - goya_read_device_fw_version(hdev, FW_COMP_PREBOOT); - if (!hdev->fw_loading) { dev_info(hdev->dev, "Skip loading FW\n"); goto out; @@ -2453,7 +2479,8 @@ int goya_mmu_init(struct hl_device *hdev) WREG32_AND(mmSTLB_STLB_FEATURE_EN, (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK)); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true); + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, + VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK); WREG32(mmMMU_MMU_ENABLE, 1); WREG32(mmMMU_SPI_MASK, 0xF); @@ -2978,9 +3005,6 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", hw_queue_id, (unsigned long long) fence_dma_addr, tmp); rc = -EIO; - } else { - dev_info(hdev->dev, "queue test on H/W queue %d succeeded\n", - hw_queue_id); } free_pkt: @@ -3925,7 +3949,7 @@ static int goya_parse_cb_no_ext_queue(struct hl_device *hdev, return 0; dev_err(hdev->dev, - "Internal CB address %px + 0x%x is not in SRAM nor in DRAM\n", + "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n", parser->user_cb, parser->user_cb_size); return -EFAULT; @@ -3935,7 +3959,7 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) { struct goya_device *goya = hdev->asic_specific; - if (!parser->ext_queue) + if (parser->queue_type == QUEUE_TYPE_INT) return goya_parse_cb_no_ext_queue(hdev, parser); if (goya->hw_cap_initialized & HW_CAP_MMU) @@ -4606,7 +4630,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, lin_dma_pkt++; } while (--lin_dma_pkts_cnt); - job = hl_cs_allocate_job(hdev, true); + job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); if (!job) { dev_err(hdev->dev, "Failed to allocate a new job\n"); rc = -ENOMEM; @@ -4835,13 +4859,15 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid) goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid); } -static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard) +static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, + u32 flags) { struct goya_device *goya = hdev->asic_specific; u32 status, timeout_usec; int rc; - if (!(goya->hw_cap_initialized & HW_CAP_MMU)) + if (!(goya->hw_cap_initialized & HW_CAP_MMU) || + hdev->hard_reset_pending) return; /* no need in L1 only invalidation in Goya */ @@ -4880,7 +4906,8 @@ static void goya_mmu_invalidate_cache_range(struct hl_device *hdev, u32 status, timeout_usec, inv_data, pi; int rc; - if (!(goya->hw_cap_initialized & HW_CAP_MMU)) + if (!(goya->hw_cap_initialized & HW_CAP_MMU) || + hdev->hard_reset_pending) return; /* no need in L1 only invalidation in Goya */ @@ -5137,7 +5164,8 @@ static const struct hl_asic_funcs goya_funcs = { .init_iatu = goya_init_iatu, .rreg = hl_rreg, .wreg = hl_wreg, - .halt_coresight = goya_halt_coresight + .halt_coresight = goya_halt_coresight, + .get_clk_rate = goya_get_clk_rate }; /* diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 89b6574f8e4f..c3230cb6e25c 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -233,4 +233,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr); void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev); +int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); + #endif /* GOYAP_H_ */ diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c index b4d406af1bed..c1ee6e2b5dff 100644 --- a/drivers/misc/habanalabs/goya/goya_coresight.c +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -8,6 +8,7 @@ #include "goyaP.h" #include "include/goya/goya_coresight.h" #include "include/goya/asic_reg/goya_regs.h" +#include "include/goya/asic_reg/goya_masks.h" #include <uapi/misc/habanalabs.h> @@ -377,33 +378,32 @@ static int goya_config_etr(struct hl_device *hdev, struct hl_debug_params *params) { struct hl_debug_params_etr *input; - u64 base_reg = mmPSOC_ETR_BASE - CFG_BASE; u32 val; int rc; - WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); + WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK); - val = RREG32(base_reg + 0x304); + val = RREG32(mmPSOC_ETR_FFCR); val |= 0x1000; - WREG32(base_reg + 0x304, val); + WREG32(mmPSOC_ETR_FFCR, val); val |= 0x40; - WREG32(base_reg + 0x304, val); + WREG32(mmPSOC_ETR_FFCR, val); - rc = goya_coresight_timeout(hdev, base_reg + 0x304, 6, false); + rc = goya_coresight_timeout(hdev, mmPSOC_ETR_FFCR, 6, false); if (rc) { dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n", params->enable ? "enable" : "disable", rc); return rc; } - rc = goya_coresight_timeout(hdev, base_reg + 0xC, 2, true); + rc = goya_coresight_timeout(hdev, mmPSOC_ETR_STS, 2, true); if (rc) { dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n", params->enable ? "enable" : "disable", rc); return rc; } - WREG32(base_reg + 0x20, 0); + WREG32(mmPSOC_ETR_CTL, 0); if (params->enable) { input = params->input; @@ -423,25 +423,26 @@ static int goya_config_etr(struct hl_device *hdev, return -EINVAL; } - WREG32(base_reg + 0x34, 0x3FFC); - WREG32(base_reg + 0x4, input->buffer_size); - WREG32(base_reg + 0x28, input->sink_mode); - WREG32(base_reg + 0x110, 0x700); - WREG32(base_reg + 0x118, + WREG32(mmPSOC_ETR_BUFWM, 0x3FFC); + WREG32(mmPSOC_ETR_RSZ, input->buffer_size); + WREG32(mmPSOC_ETR_MODE, input->sink_mode); + WREG32(mmPSOC_ETR_AXICTL, + 0x700 | PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT); + WREG32(mmPSOC_ETR_DBALO, lower_32_bits(input->buffer_address)); - WREG32(base_reg + 0x11C, + WREG32(mmPSOC_ETR_DBAHI, upper_32_bits(input->buffer_address)); - WREG32(base_reg + 0x304, 3); - WREG32(base_reg + 0x308, 0xA); - WREG32(base_reg + 0x20, 1); + WREG32(mmPSOC_ETR_FFCR, 3); + WREG32(mmPSOC_ETR_PSCR, 0xA); + WREG32(mmPSOC_ETR_CTL, 1); } else { - WREG32(base_reg + 0x34, 0); - WREG32(base_reg + 0x4, 0x400); - WREG32(base_reg + 0x118, 0); - WREG32(base_reg + 0x11C, 0); - WREG32(base_reg + 0x308, 0); - WREG32(base_reg + 0x28, 0); - WREG32(base_reg + 0x304, 0); + WREG32(mmPSOC_ETR_BUFWM, 0); + WREG32(mmPSOC_ETR_RSZ, 0x400); + WREG32(mmPSOC_ETR_DBALO, 0); + WREG32(mmPSOC_ETR_DBAHI, 0); + WREG32(mmPSOC_ETR_PSCR, 0); + WREG32(mmPSOC_ETR_MODE, 0); + WREG32(mmPSOC_ETR_FFCR, 0); if (params->output_size >= sizeof(u64)) { u32 rwp, rwphi; @@ -451,8 +452,8 @@ static int goya_config_etr(struct hl_device *hdev, * the buffer is set in the RWP register (lower 32 * bits), and in the RWPHI register (upper 8 bits). */ - rwp = RREG32(base_reg + 0x18); - rwphi = RREG32(base_reg + 0x3c) & 0xff; + rwp = RREG32(mmPSOC_ETR_RWP); + rwphi = RREG32(mmPSOC_ETR_RWPHI) & 0xff; *(u64 *) params->output = ((u64) rwphi << 32) | rwp; } } diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c index a2a700c3d597..b2ebc01e27f4 100644 --- a/drivers/misc/habanalabs/goya/goya_hwmgr.c +++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c @@ -32,6 +32,37 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) } } +int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) +{ + long value; + + if (hl_device_disabled_or_in_reset(hdev)) + return -ENODEV; + + value = hl_get_frequency(hdev, MME_PLL, false); + + if (value < 0) { + dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", + value); + return value; + } + + *max_clk = (value / 1000 / 1000); + + value = hl_get_frequency(hdev, MME_PLL, true); + + if (value < 0) { + dev_err(hdev->dev, + "Failed to retrieve device current clock %ld\n", + value); + return value; + } + + *cur_clk = (value / 1000 / 1000); + + return 0; +} + static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 75862be53c60..00c949f4ccd1 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -40,8 +40,6 @@ #define HL_MAX_QUEUES 128 -#define HL_MAX_JOBS_PER_CS 64 - /* MUST BE POWER OF 2 and larger than 1 */ #define HL_MAX_PENDING_CS 64 @@ -85,12 +83,15 @@ struct hl_fpriv; * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's * memories and/or operates the compute engines. * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU. + * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion + * notifications are sent by H/W. */ enum hl_queue_type { QUEUE_TYPE_NA, QUEUE_TYPE_EXT, QUEUE_TYPE_INT, - QUEUE_TYPE_CPU + QUEUE_TYPE_CPU, + QUEUE_TYPE_HW }; /** @@ -98,10 +99,13 @@ enum hl_queue_type { * @type: queue type. * @driver_only: true if only the driver is allowed to send a job to this queue, * false otherwise. + * @requires_kernel_cb: true if a CB handle must be provided for jobs on this + * queue, false otherwise (a CB address must be provided). */ struct hw_queue_properties { enum hl_queue_type type; u8 driver_only; + u8 requires_kernel_cb; }; /** @@ -110,8 +114,8 @@ struct hw_queue_properties { * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address. */ enum vm_type_t { - VM_TYPE_USERPTR, - VM_TYPE_PHYS_PACK + VM_TYPE_USERPTR = 0x1, + VM_TYPE_PHYS_PACK = 0x2 }; /** @@ -127,12 +131,44 @@ enum hl_device_hw_state { }; /** + * struct hl_mmu_properties - ASIC specific MMU address translation properties. + * @hop0_shift: shift of hop 0 mask. + * @hop1_shift: shift of hop 1 mask. + * @hop2_shift: shift of hop 2 mask. + * @hop3_shift: shift of hop 3 mask. + * @hop4_shift: shift of hop 4 mask. + * @hop0_mask: mask to get the PTE address in hop 0. + * @hop1_mask: mask to get the PTE address in hop 1. + * @hop2_mask: mask to get the PTE address in hop 2. + * @hop3_mask: mask to get the PTE address in hop 3. + * @hop4_mask: mask to get the PTE address in hop 4. + * @page_size: default page size used to allocate memory. + * @huge_page_size: page size used to allocate memory with huge pages. + */ +struct hl_mmu_properties { + u64 hop0_shift; + u64 hop1_shift; + u64 hop2_shift; + u64 hop3_shift; + u64 hop4_shift; + u64 hop0_mask; + u64 hop1_mask; + u64 hop2_mask; + u64 hop3_mask; + u64 hop4_mask; + u32 page_size; + u32 huge_page_size; +}; + +/** * struct asic_fixed_properties - ASIC specific immutable properties. * @hw_queues_props: H/W queues properties. * @armcp_info: received various information from ArmCP regarding the H/W, e.g. * available sensors. * @uboot_ver: F/W U-boot version. * @preboot_ver: F/W Preboot version. + * @dmmu: DRAM MMU address translation properties. + * @pmmu: PCI (host) MMU address translation properties. * @sram_base_address: SRAM physical start address. * @sram_end_address: SRAM physical end address. * @sram_user_base_address - SRAM physical start address for user access. @@ -169,53 +205,55 @@ enum hl_device_hw_state { * @psoc_pci_pll_nf: PCI PLL NF value. * @psoc_pci_pll_od: PCI PLL OD value. * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value. - * @completion_queues_count: number of completion queues. * @high_pll: high PLL frequency used by the device. * @cb_pool_cb_cnt: number of CBs in the CB pool. * @cb_pool_cb_size: size of each CB in the CB pool. * @tpc_enabled_mask: which TPCs are enabled. + * @completion_queues_count: number of completion queues. */ struct asic_fixed_properties { struct hw_queue_properties hw_queues_props[HL_MAX_QUEUES]; - struct armcp_info armcp_info; - char uboot_ver[VERSION_MAX_LEN]; - char preboot_ver[VERSION_MAX_LEN]; - u64 sram_base_address; - u64 sram_end_address; - u64 sram_user_base_address; - u64 dram_base_address; - u64 dram_end_address; - u64 dram_user_base_address; - u64 dram_size; - u64 dram_pci_bar_size; - u64 max_power_default; - u64 va_space_host_start_address; - u64 va_space_host_end_address; - u64 va_space_dram_start_address; - u64 va_space_dram_end_address; - u64 dram_size_for_default_page_mapping; - u64 pcie_dbi_base_address; - u64 pcie_aux_dbi_reg_addr; - u64 mmu_pgt_addr; - u64 mmu_dram_default_page_addr; - u32 mmu_pgt_size; - u32 mmu_pte_size; - u32 mmu_hop_table_size; - u32 mmu_hop0_tables_total_size; - u32 dram_page_size; - u32 cfg_size; - u32 sram_size; - u32 max_asid; - u32 num_of_events; - u32 psoc_pci_pll_nr; - u32 psoc_pci_pll_nf; - u32 psoc_pci_pll_od; - u32 psoc_pci_pll_div_factor; - u32 high_pll; - u32 cb_pool_cb_cnt; - u32 cb_pool_cb_size; - u8 completion_queues_count; - u8 tpc_enabled_mask; + struct armcp_info armcp_info; + char uboot_ver[VERSION_MAX_LEN]; + char preboot_ver[VERSION_MAX_LEN]; + struct hl_mmu_properties dmmu; + struct hl_mmu_properties pmmu; + u64 sram_base_address; + u64 sram_end_address; + u64 sram_user_base_address; + u64 dram_base_address; + u64 dram_end_address; + u64 dram_user_base_address; + u64 dram_size; + u64 dram_pci_bar_size; + u64 max_power_default; + u64 va_space_host_start_address; + u64 va_space_host_end_address; + u64 va_space_dram_start_address; + u64 va_space_dram_end_address; + u64 dram_size_for_default_page_mapping; + u64 pcie_dbi_base_address; + u64 pcie_aux_dbi_reg_addr; + u64 mmu_pgt_addr; + u64 mmu_dram_default_page_addr; + u32 mmu_pgt_size; + u32 mmu_pte_size; + u32 mmu_hop_table_size; + u32 mmu_hop0_tables_total_size; + u32 dram_page_size; + u32 cfg_size; + u32 sram_size; + u32 max_asid; + u32 num_of_events; + u32 psoc_pci_pll_nr; + u32 psoc_pci_pll_nf; + u32 psoc_pci_pll_od; + u32 psoc_pci_pll_div_factor; + u32 high_pll; + u32 cb_pool_cb_cnt; + u32 cb_pool_cb_size; + u8 tpc_enabled_mask; + u8 completion_queues_count; }; /** @@ -236,8 +274,6 @@ struct hl_dma_fence { * Command Buffers */ -#define HL_MAX_CB_SIZE 0x200000 /* 2MB */ - /** * struct hl_cb_mgr - describes a Command Buffer Manager. * @cb_lock: protects cb_handles. @@ -481,8 +517,8 @@ enum hl_pll_frequency { * @get_events_stat: retrieve event queue entries histogram. * @read_pte: read MMU page table entry from DRAM. * @write_pte: write MMU page table entry to DRAM. - * @mmu_invalidate_cache: flush MMU STLB cache, either with soft (L1 only) or - * hard (L0 & L1) flush. + * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft + * (L1 only) or hard (L0 & L1) flush. * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with * ASID-VA-size mask. * @send_heartbeat: send is-alive packet to ArmCP and verify response. @@ -502,6 +538,7 @@ enum hl_pll_frequency { * @rreg: Read a register. Needed for simulator support. * @wreg: Write a register. Needed for simulator support. * @halt_coresight: stop the ETF and ETR traces. + * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -562,7 +599,8 @@ struct hl_asic_funcs { u32 *size); u64 (*read_pte)(struct hl_device *hdev, u64 addr); void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val); - void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard); + void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard, + u32 flags); void (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); @@ -584,6 +622,7 @@ struct hl_asic_funcs { u32 (*rreg)(struct hl_device *hdev, u32 reg); void (*wreg)(struct hl_device *hdev, u32 reg, u32 val); void (*halt_coresight)(struct hl_device *hdev); + int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); }; @@ -688,7 +727,7 @@ struct hl_ctx_mgr { * @sgt: pointer to the scatter-gather table that holds the pages. * @dir: for DMA unmapping, the direction must be supplied, so save it. * @debugfs_list: node in debugfs list of command submissions. - * @addr: user-space virtual pointer to the start of the memory area. + * @addr: user-space virtual address of the start of the memory area. * @size: size of the memory area to pin & map. * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise. */ @@ -752,11 +791,14 @@ struct hl_cs { * @userptr_list: linked-list of userptr mappings that belong to this job and * wait for completion. * @debugfs_list: node in debugfs list of command submission jobs. + * @queue_type: the type of the H/W queue this job is submitted to. * @id: the id of this job inside a CS. * @hw_queue_id: the id of the H/W queue this job is submitted to. * @user_cb_size: the actual size of the CB we got from the user. * @job_cb_size: the actual size of the CB that we put on the queue. - * @ext_queue: whether the job is for external queue or internal queue. + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a + * handle to a kernel-allocated CB object, false + * otherwise (SRAM/DRAM/host address). */ struct hl_cs_job { struct list_head cs_node; @@ -766,39 +808,44 @@ struct hl_cs_job { struct work_struct finish_work; struct list_head userptr_list; struct list_head debugfs_list; + enum hl_queue_type queue_type; u32 id; u32 hw_queue_id; u32 user_cb_size; u32 job_cb_size; - u8 ext_queue; + u8 is_kernel_allocated_cb; }; /** - * struct hl_cs_parser - command submission paerser properties. + * struct hl_cs_parser - command submission parser properties. * @user_cb: the CB we got from the user. * @patched_cb: in case of patching, this is internal CB which is submitted on * the queue instead of the CB we got from the IOCTL. * @job_userptr_list: linked-list of userptr mappings that belong to the related * job and wait for completion. * @cs_sequence: the sequence number of the related CS. + * @queue_type: the type of the H/W queue this job is submitted to. * @ctx_id: the ID of the context the related CS belongs to. * @hw_queue_id: the id of the H/W queue this job is submitted to. * @user_cb_size: the actual size of the CB we got from the user. * @patched_cb_size: the size of the CB after parsing. - * @ext_queue: whether the job is for external queue or internal queue. * @job_id: the id of the related job inside the related CS. + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a + * handle to a kernel-allocated CB object, false + * otherwise (SRAM/DRAM/host address). */ struct hl_cs_parser { struct hl_cb *user_cb; struct hl_cb *patched_cb; struct list_head *job_userptr_list; u64 cs_sequence; + enum hl_queue_type queue_type; u32 ctx_id; u32 hw_queue_id; u32 user_cb_size; u32 patched_cb_size; - u8 ext_queue; u8 job_id; + u8 is_kernel_allocated_cb; }; @@ -1048,9 +1095,10 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); #define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT #define REG_FIELD_MASK(reg, field) reg##_##field##_MASK -#define WREG32_FIELD(reg, field, val) \ - WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \ - (val) << REG_FIELD_SHIFT(reg, field)) +#define WREG32_FIELD(reg, offset, field, val) \ + WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \ + ~REG_FIELD_MASK(reg, field)) | \ + (val) << REG_FIELD_SHIFT(reg, field)) /* Timeout should be longer when working with simulator but cap the * increased timeout to some maximum @@ -1501,7 +1549,8 @@ int hl_cb_pool_init(struct hl_device *hdev); int hl_cb_pool_fini(struct hl_device *hdev); void hl_cs_rollback_all(struct hl_device *hdev); -struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue); +struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, + enum hl_queue_type queue_type, bool is_kernel_allocated_cb); void goya_set_asic_funcs(struct hl_device *hdev); @@ -1513,7 +1562,7 @@ void hl_vm_fini(struct hl_device *hdev); int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, struct hl_userptr *userptr); -int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr); +void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr); void hl_userptr_delete_list(struct hl_device *hdev, struct list_head *userptr_list); bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size, diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c index 66d9c710073c..6474b868ef27 100644 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c @@ -60,11 +60,16 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask; hw_ip.sram_size = prop->sram_size - sram_kmd_size; hw_ip.dram_size = prop->dram_size - dram_kmd_size; - if (hw_ip.dram_size > 0) + if (hw_ip.dram_size > PAGE_SIZE) hw_ip.dram_enabled = 1; hw_ip.num_of_events = prop->num_of_events; - memcpy(hw_ip.armcp_version, - prop->armcp_info.armcp_version, VERSION_MAX_LEN); + + memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version, + min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN)); + + memcpy(hw_ip.card_name, prop->armcp_info.card_name, + min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN)); + hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version); hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr; hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf; @@ -179,17 +184,14 @@ static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args) goto out; } - if (output) { - if (copy_to_user((void __user *) (uintptr_t) args->output_ptr, - output, - args->output_size)) { - dev_err(hdev->dev, - "copy to user failed in debug ioctl\n"); - rc = -EFAULT; - goto out; - } + if (output && copy_to_user((void __user *) (uintptr_t) args->output_ptr, + output, args->output_size)) { + dev_err(hdev->dev, "copy to user failed in debug ioctl\n"); + rc = -EFAULT; + goto out; } + out: kfree(params); kfree(output); @@ -221,6 +223,41 @@ static int device_utilization(struct hl_device *hdev, struct hl_info_args *args) min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0; } +static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_clk_rate clk_rate = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, + &clk_rate.max_clk_rate_mhz); + if (rc) + return rc; + + return copy_to_user(out, &clk_rate, + min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0; +} + +static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_reset_count reset_count = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + reset_count.hard_reset_cnt = hdev->hard_reset_cnt; + reset_count.soft_reset_cnt = hdev->soft_reset_cnt; + + return copy_to_user(out, &reset_count, + min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -239,6 +276,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_DEVICE_STATUS: return device_status_info(hdev, args); + case HL_INFO_RESET_COUNT: + return get_reset_count(hdev, args); + default: break; } @@ -271,6 +311,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, rc = hw_events_info(hdev, true, args); break; + case HL_INFO_CLK_RATE: + rc = get_clk_rate(hdev, args); + break; + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -ENOTTY; @@ -406,9 +450,8 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, retcode = func(hpriv, kdata); - if (cmd & IOC_OUT) - if (copy_to_user((void __user *)arg, kdata, usize)) - retcode = -EFAULT; + if ((cmd & IOC_OUT) && copy_to_user((void __user *)arg, kdata, usize)) + retcode = -EFAULT; out_err: if (retcode) diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c index 55b383b2a116..91579dde9262 100644 --- a/drivers/misc/habanalabs/hw_queue.c +++ b/drivers/misc/habanalabs/hw_queue.c @@ -58,8 +58,8 @@ out: } /* - * ext_queue_submit_bd - Submit a buffer descriptor to an external queue - * + * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a + * H/W queue. * @hdev: pointer to habanalabs device structure * @q: pointer to habanalabs queue structure * @ctl: BD's control word @@ -73,8 +73,8 @@ out: * This function must be called when the scheduler mutex is taken * */ -static void ext_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q, - u32 ctl, u32 len, u64 ptr) +static void ext_and_hw_queue_submit_bd(struct hl_device *hdev, + struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr) { struct hl_bd *bd; @@ -174,6 +174,45 @@ static int int_queue_sanity_checks(struct hl_device *hdev, } /* + * hw_queue_sanity_checks() - Perform some sanity checks on a H/W queue. + * @hdev: Pointer to hl_device structure. + * @q: Pointer to hl_hw_queue structure. + * @num_of_entries: How many entries to check for space. + * + * Perform the following: + * - Make sure we have enough space in the completion queue. + * This check also ensures that there is enough space in the h/w queue, as + * both queues are of the same size. + * - Reserve space in the completion queue (needs to be reversed if there + * is a failure down the road before the actual submission of work). + * + * Both operations are done using the "free_slots_cnt" field of the completion + * queue. The CI counters of the queue and the completion queue are not + * needed/used for the H/W queue type. + */ +static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q, + int num_of_entries) +{ + atomic_t *free_slots = + &hdev->completion_queue[q->hw_queue_id].free_slots_cnt; + + /* + * Check we have enough space in the completion queue. + * Add -1 to counter (decrement) unless counter was already 0. + * In that case, CQ is full so we can't submit a new CB. + * atomic_add_unless will return 0 if counter was already 0. + */ + if (atomic_add_negative(num_of_entries * -1, free_slots)) { + dev_dbg(hdev->dev, "No space for %d entries on CQ %d\n", + num_of_entries, q->hw_queue_id); + atomic_add(num_of_entries, free_slots); + return -EAGAIN; + } + + return 0; +} + +/* * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion * * @hdev: pointer to hl_device structure @@ -188,7 +227,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, u32 cb_size, u64 cb_ptr) { struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; - int rc; + int rc = 0; /* * The CPU queue is a synchronous queue with an effective depth of @@ -206,11 +245,18 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, goto out; } - rc = ext_queue_sanity_checks(hdev, q, 1, false); - if (rc) - goto out; + /* + * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue + * type only on init phase, when the queues are empty and being tested, + * so there is no need for sanity checks. + */ + if (q->queue_type != QUEUE_TYPE_HW) { + rc = ext_queue_sanity_checks(hdev, q, 1, false); + if (rc) + goto out; + } - ext_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); + ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); out: if (q->queue_type != QUEUE_TYPE_CPU) @@ -220,14 +266,14 @@ out: } /* - * ext_hw_queue_schedule_job - submit an JOB to an external queue + * ext_queue_schedule_job - submit a JOB to an external queue * * @job: pointer to the job that needs to be submitted to the queue * * This function must be called when the scheduler mutex is taken * */ -static void ext_hw_queue_schedule_job(struct hl_cs_job *job) +static void ext_queue_schedule_job(struct hl_cs_job *job) { struct hl_device *hdev = job->cs->ctx->hdev; struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; @@ -260,7 +306,7 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job) * H/W queues is done under the scheduler mutex * * No need to check if CQ is full because it was already - * checked in hl_queue_sanity_checks + * checked in ext_queue_sanity_checks */ cq = &hdev->completion_queue[q->hw_queue_id]; cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry); @@ -274,18 +320,18 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job) cq->pi = hl_cq_inc_ptr(cq->pi); - ext_queue_submit_bd(hdev, q, ctl, len, ptr); + ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); } /* - * int_hw_queue_schedule_job - submit an JOB to an internal queue + * int_queue_schedule_job - submit a JOB to an internal queue * * @job: pointer to the job that needs to be submitted to the queue * * This function must be called when the scheduler mutex is taken * */ -static void int_hw_queue_schedule_job(struct hl_cs_job *job) +static void int_queue_schedule_job(struct hl_cs_job *job) { struct hl_device *hdev = job->cs->ctx->hdev; struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; @@ -308,6 +354,60 @@ static void int_hw_queue_schedule_job(struct hl_cs_job *job) } /* + * hw_queue_schedule_job - submit a JOB to a H/W queue + * + * @job: pointer to the job that needs to be submitted to the queue + * + * This function must be called when the scheduler mutex is taken + * + */ +static void hw_queue_schedule_job(struct hl_cs_job *job) +{ + struct hl_device *hdev = job->cs->ctx->hdev; + struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; + struct hl_cq *cq; + u64 ptr; + u32 offset, ctl, len; + + /* + * Upon PQE completion, COMP_DATA is used as the write data to the + * completion queue (QMAN HBW message), and COMP_OFFSET is used as the + * write address offset in the SM block (QMAN LBW message). + * The write address offset is calculated as "COMP_OFFSET << 2". + */ + offset = job->cs->sequence & (HL_MAX_PENDING_CS - 1); + ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) | + ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK); + + len = job->job_cb_size; + + /* + * A patched CB is created only if a user CB was allocated by driver and + * MMU is disabled. If MMU is enabled, the user CB should be used + * instead. If the user CB wasn't allocated by driver, assume that it + * holds an address. + */ + if (job->patched_cb) + ptr = job->patched_cb->bus_address; + else if (job->is_kernel_allocated_cb) + ptr = job->user_cb->bus_address; + else + ptr = (u64) (uintptr_t) job->user_cb; + + /* + * No need to protect pi_offset because scheduling to the + * H/W queues is done under the scheduler mutex + * + * No need to check if CQ is full because it was already + * checked in hw_queue_sanity_checks + */ + cq = &hdev->completion_queue[q->hw_queue_id]; + cq->pi = hl_cq_inc_ptr(cq->pi); + + ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); +} + +/* * hl_hw_queue_schedule_cs - schedule a command submission * * @job : pointer to the CS @@ -330,23 +430,34 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) } q = &hdev->kernel_queues[0]; - /* This loop assumes all external queues are consecutive */ for (i = 0, cq_cnt = 0 ; i < HL_MAX_QUEUES ; i++, q++) { - if (q->queue_type == QUEUE_TYPE_EXT) { - if (cs->jobs_in_queue_cnt[i]) { + if (cs->jobs_in_queue_cnt[i]) { + switch (q->queue_type) { + case QUEUE_TYPE_EXT: rc = ext_queue_sanity_checks(hdev, q, - cs->jobs_in_queue_cnt[i], true); - if (rc) - goto unroll_cq_resv; - cq_cnt++; - } - } else if (q->queue_type == QUEUE_TYPE_INT) { - if (cs->jobs_in_queue_cnt[i]) { + cs->jobs_in_queue_cnt[i], true); + break; + case QUEUE_TYPE_INT: rc = int_queue_sanity_checks(hdev, q, - cs->jobs_in_queue_cnt[i]); - if (rc) - goto unroll_cq_resv; + cs->jobs_in_queue_cnt[i]); + break; + case QUEUE_TYPE_HW: + rc = hw_queue_sanity_checks(hdev, q, + cs->jobs_in_queue_cnt[i]); + break; + default: + dev_err(hdev->dev, "Queue type %d is invalid\n", + q->queue_type); + rc = -EINVAL; + break; } + + if (rc) + goto unroll_cq_resv; + + if (q->queue_type == QUEUE_TYPE_EXT || + q->queue_type == QUEUE_TYPE_HW) + cq_cnt++; } } @@ -373,21 +484,30 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) } list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) - if (job->ext_queue) - ext_hw_queue_schedule_job(job); - else - int_hw_queue_schedule_job(job); + switch (job->queue_type) { + case QUEUE_TYPE_EXT: + ext_queue_schedule_job(job); + break; + case QUEUE_TYPE_INT: + int_queue_schedule_job(job); + break; + case QUEUE_TYPE_HW: + hw_queue_schedule_job(job); + break; + default: + break; + } cs->submitted = true; goto out; unroll_cq_resv: - /* This loop assumes all external queues are consecutive */ q = &hdev->kernel_queues[0]; for (i = 0 ; (i < HL_MAX_QUEUES) && (cq_cnt > 0) ; i++, q++) { - if ((q->queue_type == QUEUE_TYPE_EXT) && - (cs->jobs_in_queue_cnt[i])) { + if ((q->queue_type == QUEUE_TYPE_EXT || + q->queue_type == QUEUE_TYPE_HW) && + cs->jobs_in_queue_cnt[i]) { atomic_t *free_slots = &hdev->completion_queue[i].free_slots_cnt; atomic_add(cs->jobs_in_queue_cnt[i], free_slots); @@ -414,8 +534,8 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id) q->ci = hl_queue_inc_ptr(q->ci); } -static int ext_and_cpu_hw_queue_init(struct hl_device *hdev, - struct hl_hw_queue *q, bool is_cpu_queue) +static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, + bool is_cpu_queue) { void *p; int rc; @@ -465,7 +585,7 @@ free_queue: return rc; } -static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) { void *p; @@ -485,18 +605,38 @@ static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) return 0; } -static int cpu_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +{ + return ext_and_cpu_queue_init(hdev, q, true); +} + +static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) { - return ext_and_cpu_hw_queue_init(hdev, q, true); + return ext_and_cpu_queue_init(hdev, q, false); } -static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) { - return ext_and_cpu_hw_queue_init(hdev, q, false); + void *p; + + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + &q->bus_address, + GFP_KERNEL | __GFP_ZERO); + if (!p) + return -ENOMEM; + + q->kernel_address = (u64) (uintptr_t) p; + + /* Make sure read/write pointers are initialized to start of queue */ + q->ci = 0; + q->pi = 0; + + return 0; } /* - * hw_queue_init - main initialization function for H/W queue object + * queue_init - main initialization function for H/W queue object * * @hdev: pointer to hl_device device structure * @q: pointer to hl_hw_queue queue structure @@ -505,7 +645,7 @@ static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) * Allocate dma-able memory for the queue and initialize fields * Returns 0 on success */ -static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, +static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q, u32 hw_queue_id) { int rc; @@ -516,21 +656,20 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, switch (q->queue_type) { case QUEUE_TYPE_EXT: - rc = ext_hw_queue_init(hdev, q); + rc = ext_queue_init(hdev, q); break; - case QUEUE_TYPE_INT: - rc = int_hw_queue_init(hdev, q); + rc = int_queue_init(hdev, q); break; - case QUEUE_TYPE_CPU: - rc = cpu_hw_queue_init(hdev, q); + rc = cpu_queue_init(hdev, q); + break; + case QUEUE_TYPE_HW: + rc = hw_queue_init(hdev, q); break; - case QUEUE_TYPE_NA: q->valid = 0; return 0; - default: dev_crit(hdev->dev, "wrong queue type %d during init\n", q->queue_type); @@ -554,7 +693,7 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, * * Free the queue memory */ -static void hw_queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) +static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) { if (!q->valid) return; @@ -612,7 +751,7 @@ int hl_hw_queues_create(struct hl_device *hdev) i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) { q->queue_type = asic->hw_queues_props[i].type; - rc = hw_queue_init(hdev, q, i); + rc = queue_init(hdev, q, i); if (rc) { dev_err(hdev->dev, "failed to initialize queue %d\n", i); @@ -624,7 +763,7 @@ int hl_hw_queues_create(struct hl_device *hdev) release_queues: for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++) - hw_queue_fini(hdev, q); + queue_fini(hdev, q); kfree(hdev->kernel_queues); @@ -637,7 +776,7 @@ void hl_hw_queues_destroy(struct hl_device *hdev) int i; for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++) - hw_queue_fini(hdev, q); + queue_fini(hdev, q); kfree(hdev->kernel_queues); } diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h index 8618891d5afa..3c44ef3a23ed 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h @@ -260,4 +260,6 @@ #define DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT #define DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT +#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT 1 + #endif /* ASIC_REG_GOYA_MASKS_H_ */ diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h index 19b0f0ef1d0b..fce490e6a231 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h @@ -84,6 +84,7 @@ #include "tpc6_rtr_regs.h" #include "tpc7_nrtr_regs.h" #include "tpc0_eml_cfg_regs.h" +#include "psoc_etr_regs.h" #include "psoc_global_conf_masks.h" #include "dma_macro_masks.h" diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_etr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_etr_regs.h new file mode 100644 index 000000000000..b7c33e025db5 --- /dev/null +++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_etr_regs.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2018 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +/************************************ + ** This is an auto-generated file ** + ** DO NOT EDIT BELOW ** + ************************************/ + +#ifndef ASIC_REG_PSOC_ETR_REGS_H_ +#define ASIC_REG_PSOC_ETR_REGS_H_ + +/* + ***************************************** + * PSOC_ETR (Prototype: ETR) + ***************************************** + */ + +#define mmPSOC_ETR_RSZ 0x2C43004 + +#define mmPSOC_ETR_STS 0x2C4300C + +#define mmPSOC_ETR_RRD 0x2C43010 + +#define mmPSOC_ETR_RRP 0x2C43014 + +#define mmPSOC_ETR_RWP 0x2C43018 + +#define mmPSOC_ETR_TRG 0x2C4301C + +#define mmPSOC_ETR_CTL 0x2C43020 + +#define mmPSOC_ETR_RWD 0x2C43024 + +#define mmPSOC_ETR_MODE 0x2C43028 + +#define mmPSOC_ETR_LBUFLEVEL 0x2C4302C + +#define mmPSOC_ETR_CBUFLEVEL 0x2C43030 + +#define mmPSOC_ETR_BUFWM 0x2C43034 + +#define mmPSOC_ETR_RRPHI 0x2C43038 + +#define mmPSOC_ETR_RWPHI 0x2C4303C + +#define mmPSOC_ETR_AXICTL 0x2C43110 + +#define mmPSOC_ETR_DBALO 0x2C43118 + +#define mmPSOC_ETR_DBAHI 0x2C4311C + +#define mmPSOC_ETR_FFSR 0x2C43300 + +#define mmPSOC_ETR_FFCR 0x2C43304 + +#define mmPSOC_ETR_PSCR 0x2C43308 + +#define mmPSOC_ETR_ITMISCOP0 0x2C43EE0 + +#define mmPSOC_ETR_ITTRFLIN 0x2C43EE8 + +#define mmPSOC_ETR_ITATBDATA0 0x2C43EEC + +#define mmPSOC_ETR_ITATBCTR2 0x2C43EF0 + +#define mmPSOC_ETR_ITATBCTR1 0x2C43EF4 + +#define mmPSOC_ETR_ITATBCTR0 0x2C43EF8 + +#define mmPSOC_ETR_ITCTRL 0x2C43F00 + +#define mmPSOC_ETR_CLAIMSET 0x2C43FA0 + +#define mmPSOC_ETR_CLAIMCLR 0x2C43FA4 + +#define mmPSOC_ETR_LAR 0x2C43FB0 + +#define mmPSOC_ETR_LSR 0x2C43FB4 + +#define mmPSOC_ETR_AUTHSTATUS 0x2C43FB8 + +#define mmPSOC_ETR_DEVID 0x2C43FC8 + +#define mmPSOC_ETR_DEVTYPE 0x2C43FCC + +#define mmPSOC_ETR_PERIPHID4 0x2C43FD0 + +#define mmPSOC_ETR_PERIPHID5 0x2C43FD4 + +#define mmPSOC_ETR_PERIPHID6 0x2C43FD8 + +#define mmPSOC_ETR_PERIPHID7 0x2C43FDC + +#define mmPSOC_ETR_PERIPHID0 0x2C43FE0 + +#define mmPSOC_ETR_PERIPHID1 0x2C43FE4 + +#define mmPSOC_ETR_PERIPHID2 0x2C43FE8 + +#define mmPSOC_ETR_PERIPHID3 0x2C43FEC + +#define mmPSOC_ETR_COMPID0 0x2C43FF0 + +#define mmPSOC_ETR_COMPID1 0x2C43FF4 + +#define mmPSOC_ETR_COMPID2 0x2C43FF8 + +#define mmPSOC_ETR_COMPID3 0x2C43FFC + +#endif /* ASIC_REG_PSOC_ETR_REGS_H_ */ diff --git a/drivers/misc/habanalabs/include/hl_boot_if.h b/drivers/misc/habanalabs/include/hl_boot_if.h index 4cd04c090285..2853a2de8cf6 100644 --- a/drivers/misc/habanalabs/include/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/hl_boot_if.h @@ -20,6 +20,8 @@ enum cpu_boot_status { CPU_BOOT_STATUS_DRAM_INIT_FAIL, CPU_BOOT_STATUS_FIT_CORRUPTED, CPU_BOOT_STATUS_UBOOT_NOT_READY, + CPU_BOOT_STATUS_RESERVED, + CPU_BOOT_STATUS_TS_INIT_FAIL, }; enum kmd_msg { diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h index 71ea3c3e8ba3..a6851a9d3f03 100644 --- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h +++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h @@ -12,18 +12,16 @@ #define PAGE_SHIFT_2MB 21 #define PAGE_SIZE_2MB (_AC(1, UL) << PAGE_SHIFT_2MB) #define PAGE_SIZE_4KB (_AC(1, UL) << PAGE_SHIFT_4KB) -#define PAGE_MASK_2MB (~(PAGE_SIZE_2MB - 1)) #define PAGE_PRESENT_MASK 0x0000000000001ull #define SWAP_OUT_MASK 0x0000000000004ull #define LAST_MASK 0x0000000000800ull -#define PHYS_ADDR_MASK 0xFFFFFFFFFFFFF000ull #define HOP0_MASK 0x3000000000000ull #define HOP1_MASK 0x0FF8000000000ull #define HOP2_MASK 0x0007FC0000000ull #define HOP3_MASK 0x000003FE00000ull #define HOP4_MASK 0x00000001FF000ull -#define OFFSET_MASK 0x0000000000FFFull +#define FLAGS_MASK 0x0000000000FFFull #define HOP0_SHIFT 48 #define HOP1_SHIFT 39 @@ -31,8 +29,7 @@ #define HOP3_SHIFT 21 #define HOP4_SHIFT 12 -#define PTE_PHYS_ADDR_SHIFT 12 -#define PTE_PHYS_ADDR_MASK ~OFFSET_MASK +#define HOP_PHYS_ADDR_MASK (~FLAGS_MASK) #define HL_PTE_SIZE sizeof(u64) #define HOP_TABLE_SIZE PAGE_SIZE_4KB diff --git a/drivers/misc/habanalabs/include/qman_if.h b/drivers/misc/habanalabs/include/qman_if.h index bf59bbe27fdc..0fdb49188ed7 100644 --- a/drivers/misc/habanalabs/include/qman_if.h +++ b/drivers/misc/habanalabs/include/qman_if.h @@ -23,6 +23,8 @@ struct hl_bd { #define HL_BD_SIZE sizeof(struct hl_bd) /* + * S/W CTL FIELDS. + * * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is * valid. 1 means the repeat field is valid, 0 means not-valid, * i.e. repeat == 1 @@ -34,6 +36,16 @@ struct hl_bd { #define BD_CTL_SHADOW_INDEX_MASK 0x00000FFF /* + * H/W CTL FIELDS + */ + +#define BD_CTL_COMP_OFFSET_SHIFT 16 +#define BD_CTL_COMP_OFFSET_MASK 0x00FF0000 + +#define BD_CTL_COMP_DATA_SHIFT 0 +#define BD_CTL_COMP_DATA_MASK 0x0000FFFF + +/* * COMPLETION QUEUE */ diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index 365fb0cb8dff..6c72cb4eff54 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -13,7 +13,6 @@ #include <linux/slab.h> #include <linux/genalloc.h> -#define PGS_IN_2MB_PAGE (PAGE_SIZE_2MB >> PAGE_SHIFT) #define HL_MMU_DEBUG 0 /* @@ -159,20 +158,19 @@ pages_pack_err: } /* - * get_userptr_from_host_va - initialize userptr structure from given host - * virtual address - * - * @hdev : habanalabs device structure - * @args : parameters containing the virtual address and size - * @p_userptr : pointer to result userptr structure + * dma_map_host_va - DMA mapping of the given host virtual address. + * @hdev: habanalabs device structure + * @addr: the host virtual address of the memory area + * @size: the size of the memory area + * @p_userptr: pointer to result userptr structure * * This function does the following: * - Allocate userptr structure * - Pin the given host memory using the userptr structure * - Perform DMA mapping to have the DMA addresses of the pages */ -static int get_userptr_from_host_va(struct hl_device *hdev, - struct hl_mem_in *args, struct hl_userptr **p_userptr) +static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, + struct hl_userptr **p_userptr) { struct hl_userptr *userptr; int rc; @@ -183,8 +181,7 @@ static int get_userptr_from_host_va(struct hl_device *hdev, goto userptr_err; } - rc = hl_pin_host_memory(hdev, args->map_host.host_virt_addr, - args->map_host.mem_size, userptr); + rc = hl_pin_host_memory(hdev, addr, size, userptr); if (rc) { dev_err(hdev->dev, "Failed to pin host memory\n"); goto pin_err; @@ -215,16 +212,16 @@ userptr_err: } /* - * free_userptr - free userptr structure - * - * @hdev : habanalabs device structure - * @userptr : userptr to free + * dma_unmap_host_va - DMA unmapping of the given host virtual address. + * @hdev: habanalabs device structure + * @userptr: userptr to free * * This function does the following: * - Unpins the physical pages * - Frees the userptr structure */ -static void free_userptr(struct hl_device *hdev, struct hl_userptr *userptr) +static void dma_unmap_host_va(struct hl_device *hdev, + struct hl_userptr *userptr) { hl_unpin_host_memory(hdev, userptr); kfree(userptr); @@ -253,10 +250,9 @@ static void dram_pg_pool_do_release(struct kref *ref) } /* - * free_phys_pg_pack - free physical page pack - * - * @hdev : habanalabs device structure - * @phys_pg_pack : physical page pack to free + * free_phys_pg_pack - free physical page pack + * @hdev: habanalabs device structure + * @phys_pg_pack: physical page pack to free * * This function does the following: * - For DRAM memory only, iterate over the pack and free each physical block @@ -264,7 +260,7 @@ static void dram_pg_pool_do_release(struct kref *ref) * - Free the hl_vm_phys_pg_pack structure */ static void free_phys_pg_pack(struct hl_device *hdev, - struct hl_vm_phys_pg_pack *phys_pg_pack) + struct hl_vm_phys_pg_pack *phys_pg_pack) { struct hl_vm *vm = &hdev->vm; u64 i; @@ -519,8 +515,8 @@ static inline int add_va_block(struct hl_device *hdev, * - Return the start address of the virtual block */ static u64 get_va_block(struct hl_device *hdev, - struct hl_va_range *va_range, u64 size, u64 hint_addr, - bool is_userptr) + struct hl_va_range *va_range, u64 size, u64 hint_addr, + bool is_userptr) { struct hl_vm_va_block *va_block, *new_va_block = NULL; u64 valid_start, valid_size, prev_start, prev_end, page_mask, @@ -528,18 +524,17 @@ static u64 get_va_block(struct hl_device *hdev, u32 page_size; bool add_prev = false; - if (is_userptr) { + if (is_userptr) /* * We cannot know if the user allocated memory with huge pages * or not, hence we continue with the biggest possible * granularity. */ - page_size = PAGE_SIZE_2MB; - page_mask = PAGE_MASK_2MB; - } else { - page_size = hdev->asic_prop.dram_page_size; - page_mask = ~((u64)page_size - 1); - } + page_size = hdev->asic_prop.pmmu.huge_page_size; + else + page_size = hdev->asic_prop.dmmu.page_size; + + page_mask = ~((u64)page_size - 1); mutex_lock(&va_range->lock); @@ -549,7 +544,6 @@ static u64 get_va_block(struct hl_device *hdev, /* calc the first possible aligned addr */ valid_start = va_block->start; - if (valid_start & (page_size - 1)) { valid_start &= page_mask; valid_start += page_size; @@ -561,7 +555,6 @@ static u64 get_va_block(struct hl_device *hdev, if (valid_size >= size && (!new_va_block || valid_size < res_valid_size)) { - new_va_block = va_block; res_valid_start = valid_start; res_valid_size = valid_size; @@ -631,11 +624,10 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) /* * init_phys_pg_pack_from_userptr - initialize physical page pack from host - * memory - * - * @ctx : current context - * @userptr : userptr to initialize from - * @pphys_pg_pack : res pointer + * memory + * @ctx: current context + * @userptr: userptr to initialize from + * @pphys_pg_pack: result pointer * * This function does the following: * - Pin the physical pages related to the given virtual block @@ -643,16 +635,19 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) * virtual block */ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, - struct hl_userptr *userptr, - struct hl_vm_phys_pg_pack **pphys_pg_pack) + struct hl_userptr *userptr, + struct hl_vm_phys_pg_pack **pphys_pg_pack) { + struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu; struct hl_vm_phys_pg_pack *phys_pg_pack; struct scatterlist *sg; dma_addr_t dma_addr; u64 page_mask, total_npages; - u32 npages, page_size = PAGE_SIZE; + u32 npages, page_size = PAGE_SIZE, + huge_page_size = mmu_prop->huge_page_size; bool first = true, is_huge_page_opt = true; int rc, i, j; + u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); if (!phys_pg_pack) @@ -675,14 +670,14 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, total_npages += npages; - if ((npages % PGS_IN_2MB_PAGE) || - (dma_addr & (PAGE_SIZE_2MB - 1))) + if ((npages % pgs_in_huge_page) || + (dma_addr & (huge_page_size - 1))) is_huge_page_opt = false; } if (is_huge_page_opt) { - page_size = PAGE_SIZE_2MB; - total_npages /= PGS_IN_2MB_PAGE; + page_size = huge_page_size; + do_div(total_npages, pgs_in_huge_page); } page_mask = ~(((u64) page_size) - 1); @@ -714,7 +709,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, dma_addr += page_size; if (is_huge_page_opt) - npages -= PGS_IN_2MB_PAGE; + npages -= pgs_in_huge_page; else npages--; } @@ -731,19 +726,18 @@ page_pack_arr_mem_err: } /* - * map_phys_page_pack - maps the physical page pack - * - * @ctx : current context - * @vaddr : start address of the virtual area to map from - * @phys_pg_pack : the pack of physical pages to map to + * map_phys_pg_pack - maps the physical page pack. + * @ctx: current context + * @vaddr: start address of the virtual area to map from + * @phys_pg_pack: the pack of physical pages to map to * * This function does the following: * - Maps each chunk of virtual memory to matching physical chunk * - Stores number of successful mappings in the given argument - * - Returns 0 on success, error code otherwise. + * - Returns 0 on success, error code otherwise */ -static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr, - struct hl_vm_phys_pg_pack *phys_pg_pack) +static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, + struct hl_vm_phys_pg_pack *phys_pg_pack) { struct hl_device *hdev = ctx->hdev; u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; @@ -783,6 +777,36 @@ err: return rc; } +/* + * unmap_phys_pg_pack - unmaps the physical page pack + * @ctx: current context + * @vaddr: start address of the virtual area to unmap + * @phys_pg_pack: the pack of physical pages to unmap + */ +static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, + struct hl_vm_phys_pg_pack *phys_pg_pack) +{ + struct hl_device *hdev = ctx->hdev; + u64 next_vaddr, i; + u32 page_size; + + page_size = phys_pg_pack->page_size; + next_vaddr = vaddr; + + for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { + if (hl_mmu_unmap(ctx, next_vaddr, page_size)) + dev_warn_ratelimited(hdev->dev, + "unmap failed for vaddr: 0x%llx\n", next_vaddr); + + /* + * unmapping on Palladium can be really long, so avoid a CPU + * soft lockup bug by sleeping a little between unmapping pages + */ + if (hdev->pldm) + usleep_range(500, 1000); + } +} + static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *paddr) { @@ -839,7 +863,10 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, *device_addr = 0; if (is_userptr) { - rc = get_userptr_from_host_va(hdev, args, &userptr); + u64 addr = args->map_host.host_virt_addr, + size = args->map_host.mem_size; + + rc = dma_map_host_va(hdev, addr, size, &userptr); if (rc) { dev_err(hdev->dev, "failed to get userptr from va\n"); return rc; @@ -850,7 +877,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, if (rc) { dev_err(hdev->dev, "unable to init page pack for vaddr 0x%llx\n", - args->map_host.host_virt_addr); + addr); goto init_page_pack_err; } @@ -909,7 +936,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, mutex_lock(&ctx->mmu_lock); - rc = map_phys_page_pack(ctx, ret_vaddr, phys_pg_pack); + rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); if (rc) { mutex_unlock(&ctx->mmu_lock); dev_err(hdev->dev, "mapping page pack failed for handle %u\n", @@ -917,7 +944,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, goto map_err; } - hdev->asic_funcs->mmu_invalidate_cache(hdev, false); + hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type); mutex_unlock(&ctx->mmu_lock); @@ -955,7 +982,7 @@ shared_err: free_phys_pg_pack(hdev, phys_pg_pack); init_page_pack_err: if (is_userptr) - free_userptr(hdev, userptr); + dma_unmap_host_va(hdev, userptr); return rc; } @@ -965,20 +992,20 @@ init_page_pack_err: * * @ctx : current context * @vaddr : device virtual address to unmap + * @ctx_free : true if in context free flow, false otherwise. * * This function does the following: * - Unmap the physical pages related to the given virtual address * - return the device virtual block to the virtual block list */ -static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) +static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) { struct hl_device *hdev = ctx->hdev; struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; struct hl_vm_hash_node *hnode = NULL; struct hl_userptr *userptr = NULL; + struct hl_va_range *va_range; enum vm_type_t *vm_type; - u64 next_vaddr, i; - u32 page_size; bool is_userptr; int rc; @@ -1003,9 +1030,10 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) if (*vm_type == VM_TYPE_USERPTR) { is_userptr = true; + va_range = &ctx->host_va_range; userptr = hnode->ptr; rc = init_phys_pg_pack_from_userptr(ctx, userptr, - &phys_pg_pack); + &phys_pg_pack); if (rc) { dev_err(hdev->dev, "unable to init page pack for vaddr 0x%llx\n", @@ -1014,6 +1042,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) } } else if (*vm_type == VM_TYPE_PHYS_PACK) { is_userptr = false; + va_range = &ctx->dram_va_range; phys_pg_pack = hnode->ptr; } else { dev_warn(hdev->dev, @@ -1029,42 +1058,41 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) goto mapping_cnt_err; } - page_size = phys_pg_pack->page_size; - vaddr &= ~(((u64) page_size) - 1); - - next_vaddr = vaddr; + vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); mutex_lock(&ctx->mmu_lock); - for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { - if (hl_mmu_unmap(ctx, next_vaddr, page_size)) - dev_warn_ratelimited(hdev->dev, - "unmap failed for vaddr: 0x%llx\n", next_vaddr); - - /* unmapping on Palladium can be really long, so avoid a CPU - * soft lockup bug by sleeping a little between unmapping pages - */ - if (hdev->pldm) - usleep_range(500, 1000); - } + unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true); + /* + * During context free this function is called in a loop to clean all + * the context mappings. Hence the cache invalidation can be called once + * at the loop end rather than for each iteration + */ + if (!ctx_free) + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, *vm_type); mutex_unlock(&ctx->mmu_lock); - if (add_va_block(hdev, - is_userptr ? &ctx->host_va_range : &ctx->dram_va_range, - vaddr, - vaddr + phys_pg_pack->total_size - 1)) - dev_warn(hdev->dev, "add va block failed for vaddr: 0x%llx\n", - vaddr); + /* + * No point in maintaining the free VA block list if the context is + * closing as the list will be freed anyway + */ + if (!ctx_free) { + rc = add_va_block(hdev, va_range, vaddr, + vaddr + phys_pg_pack->total_size - 1); + if (rc) + dev_warn(hdev->dev, + "add va block failed for vaddr: 0x%llx\n", + vaddr); + } atomic_dec(&phys_pg_pack->mapping_cnt); kfree(hnode); if (is_userptr) { free_phys_pg_pack(hdev, phys_pg_pack); - free_userptr(hdev, userptr); + dma_unmap_host_va(hdev, userptr); } return 0; @@ -1189,8 +1217,8 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) break; case HL_MEM_OP_UNMAP: - rc = unmap_device_va(ctx, - args->in.unmap.device_virt_addr); + rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr, + false); break; default: @@ -1203,20 +1231,72 @@ out: return rc; } +static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, + u32 npages, u64 start, u32 offset, + struct hl_userptr *userptr) +{ + int rc; + + if (!access_ok((void __user *) (uintptr_t) addr, size)) { + dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); + return -EFAULT; + } + + userptr->vec = frame_vector_create(npages); + if (!userptr->vec) { + dev_err(hdev->dev, "Failed to create frame vector\n"); + return -ENOMEM; + } + + rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, + userptr->vec); + + if (rc != npages) { + dev_err(hdev->dev, + "Failed to map host memory, user ptr probably wrong\n"); + if (rc < 0) + goto destroy_framevec; + rc = -EFAULT; + goto put_framevec; + } + + if (frame_vector_to_pages(userptr->vec) < 0) { + dev_err(hdev->dev, + "Failed to translate frame vector to pages\n"); + rc = -EFAULT; + goto put_framevec; + } + + rc = sg_alloc_table_from_pages(userptr->sgt, + frame_vector_pages(userptr->vec), + npages, offset, size, GFP_ATOMIC); + if (rc < 0) { + dev_err(hdev->dev, "failed to create SG table from pages\n"); + goto put_framevec; + } + + return 0; + +put_framevec: + put_vaddr_frames(userptr->vec); +destroy_framevec: + frame_vector_destroy(userptr->vec); + return rc; +} + /* - * hl_pin_host_memory - pins a chunk of host memory - * - * @hdev : pointer to the habanalabs device structure - * @addr : the user-space virtual address of the memory area - * @size : the size of the memory area - * @userptr : pointer to hl_userptr structure + * hl_pin_host_memory - pins a chunk of host memory. + * @hdev: pointer to the habanalabs device structure + * @addr: the host virtual address of the memory area + * @size: the size of the memory area + * @userptr: pointer to hl_userptr structure * * This function does the following: * - Pins the physical pages - * - Create a SG list from those pages + * - Create an SG list from those pages */ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, - struct hl_userptr *userptr) + struct hl_userptr *userptr) { u64 start, end; u32 npages, offset; @@ -1227,11 +1307,6 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, return -EINVAL; } - if (!access_ok((void __user *) (uintptr_t) addr, size)) { - dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); - return -EFAULT; - } - /* * If the combination of the address and size requested for this memory * region causes an integer overflow, return error. @@ -1244,6 +1319,14 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, return -EINVAL; } + /* + * This function can be called also from data path, hence use atomic + * always as it is not a big allocation. + */ + userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); + if (!userptr->sgt) + return -ENOMEM; + start = addr & PAGE_MASK; offset = addr & ~PAGE_MASK; end = PAGE_ALIGN(addr + size); @@ -1254,42 +1337,12 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, userptr->dma_mapped = false; INIT_LIST_HEAD(&userptr->job_node); - userptr->vec = frame_vector_create(npages); - if (!userptr->vec) { - dev_err(hdev->dev, "Failed to create frame vector\n"); - return -ENOMEM; - } - - rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, - userptr->vec); - - if (rc != npages) { - dev_err(hdev->dev, - "Failed to map host memory, user ptr probably wrong\n"); - if (rc < 0) - goto destroy_framevec; - rc = -EFAULT; - goto put_framevec; - } - - if (frame_vector_to_pages(userptr->vec) < 0) { + rc = get_user_memory(hdev, addr, size, npages, start, offset, + userptr); + if (rc) { dev_err(hdev->dev, - "Failed to translate frame vector to pages\n"); - rc = -EFAULT; - goto put_framevec; - } - - userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); - if (!userptr->sgt) { - rc = -ENOMEM; - goto put_framevec; - } - - rc = sg_alloc_table_from_pages(userptr->sgt, - frame_vector_pages(userptr->vec), - npages, offset, size, GFP_ATOMIC); - if (rc < 0) { - dev_err(hdev->dev, "failed to create SG table from pages\n"); + "failed to get user memory for address 0x%llx\n", + addr); goto free_sgt; } @@ -1299,34 +1352,28 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, free_sgt: kfree(userptr->sgt); -put_framevec: - put_vaddr_frames(userptr->vec); -destroy_framevec: - frame_vector_destroy(userptr->vec); return rc; } /* - * hl_unpin_host_memory - unpins a chunk of host memory - * - * @hdev : pointer to the habanalabs device structure - * @userptr : pointer to hl_userptr structure + * hl_unpin_host_memory - unpins a chunk of host memory. + * @hdev: pointer to the habanalabs device structure + * @userptr: pointer to hl_userptr structure * * This function does the following: * - Unpins the physical pages related to the host memory * - Free the SG list */ -int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) +void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) { struct page **pages; hl_debugfs_remove_userptr(hdev, userptr); if (userptr->dma_mapped) - hdev->asic_funcs->hl_dma_unmap_sg(hdev, - userptr->sgt->sgl, - userptr->sgt->nents, - userptr->dir); + hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl, + userptr->sgt->nents, + userptr->dir); pages = frame_vector_pages(userptr->vec); if (!IS_ERR(pages)) { @@ -1342,8 +1389,6 @@ int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) sg_free_table(userptr->sgt); kfree(userptr->sgt); - - return 0; } /* @@ -1542,43 +1587,16 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) * @hdev : pointer to the habanalabs structure * va_range : pointer to virtual addresses range * - * This function initializes the following: - * - Checks that the given range contains the whole initial range + * This function does the following: * - Frees the virtual addresses block list and its lock */ static void hl_va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range) { - struct hl_vm_va_block *va_block; - - if (list_empty(&va_range->list)) { - dev_warn(hdev->dev, - "va list should not be empty on cleanup!\n"); - goto out; - } - - if (!list_is_singular(&va_range->list)) { - dev_warn(hdev->dev, - "va list should not contain multiple blocks on cleanup!\n"); - goto free_va_list; - } - - va_block = list_first_entry(&va_range->list, typeof(*va_block), node); - - if (va_block->start != va_range->start_addr || - va_block->end != va_range->end_addr) { - dev_warn(hdev->dev, - "wrong va block on cleanup, from 0x%llx to 0x%llx\n", - va_block->start, va_block->end); - goto free_va_list; - } - -free_va_list: mutex_lock(&va_range->lock); clear_va_list_locked(hdev, &va_range->list); mutex_unlock(&va_range->lock); -out: mutex_destroy(&va_range->lock); } @@ -1613,21 +1631,31 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) hl_debugfs_remove_ctx_mem_hash(hdev, ctx); - if (!hash_empty(ctx->mem_hash)) - dev_notice(hdev->dev, "ctx is freed while it has va in use\n"); + /* + * Clearly something went wrong on hard reset so no point in printing + * another side effect error + */ + if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash)) + dev_notice(hdev->dev, + "ctx %d is freed while it has va in use\n", + ctx->asid); hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) { dev_dbg(hdev->dev, "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n", hnode->vaddr, ctx->asid); - unmap_device_va(ctx, hnode->vaddr); + unmap_device_va(ctx, hnode->vaddr, true); } + /* invalidate the cache once after the unmapping loop */ + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK); + spin_lock(&vm->idr_lock); idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) if (phys_pg_list->asid == ctx->asid) { dev_dbg(hdev->dev, - "page list 0x%p of asid %d is still alive\n", + "page list 0x%px of asid %d is still alive\n", phys_pg_list, ctx->asid); atomic64_sub(phys_pg_list->total_size, &hdev->dram_used_mem); diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c index 176c315836f1..6262b26e2086 100644 --- a/drivers/misc/habanalabs/mmu.c +++ b/drivers/misc/habanalabs/mmu.c @@ -25,10 +25,9 @@ static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) return pgt_info; } -static void free_hop(struct hl_ctx *ctx, u64 hop_addr) +static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) { struct hl_device *hdev = ctx->hdev; - struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr, hdev->asic_prop.mmu_hop_table_size); @@ -37,6 +36,13 @@ static void free_hop(struct hl_ctx *ctx, u64 hop_addr) kfree(pgt_info); } +static void free_hop(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); + + _free_hop(ctx, pgt_info); +} + static u64 alloc_hop(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; @@ -105,8 +111,8 @@ static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) * clear the 12 LSBs and translate the shadow hop to its associated * physical hop, and add back the original 12 LSBs. */ - u64 phys_val = get_phys_addr(ctx, val & PTE_PHYS_ADDR_MASK) | - (val & OFFSET_MASK); + u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) | + (val & FLAGS_MASK); ctx->hdev->asic_funcs->write_pte(ctx->hdev, get_phys_addr(ctx, shadow_pte_addr), @@ -159,7 +165,7 @@ static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) */ num_of_ptes_left = pgt_info->num_of_ptes; if (!num_of_ptes_left) - free_hop(ctx, hop_addr); + _free_hop(ctx, pgt_info); return num_of_ptes_left; } @@ -171,35 +177,50 @@ static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, ((virt_addr & mask) >> shift); } -static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask, + mmu_prop->hop0_shift); } -static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask, + mmu_prop->hop1_shift); } -static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask, + mmu_prop->hop2_shift); } -static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask, + mmu_prop->hop3_shift); } -static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask, + mmu_prop->hop4_shift); } static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) { if (curr_pte & PAGE_PRESENT_MASK) - return curr_pte & PHYS_ADDR_MASK; + return curr_pte & HOP_PHYS_ADDR_MASK; else return ULLONG_MAX; } @@ -288,23 +309,23 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) } /* need only pte 0 in hops 0 and 1 */ - pte_val = (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop0_addr, pte_val); - pte_val = (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop1_addr, pte_val); get_pte(ctx, hop1_addr); hop2_pte_addr = hop2_addr; for (i = 0 ; i < num_of_hop3 ; i++) { - pte_val = (ctx->dram_default_hops[i] & PTE_PHYS_ADDR_MASK) | + pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop2_pte_addr, pte_val); get_pte(ctx, hop2_addr); hop2_pte_addr += HL_PTE_SIZE; } - pte_val = (prop->mmu_dram_default_page_addr & PTE_PHYS_ADDR_MASK) | + pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; for (i = 0 ; i < num_of_hop3 ; i++) { @@ -400,8 +421,6 @@ int hl_mmu_init(struct hl_device *hdev) if (!hdev->mmu_enable) return 0; - /* MMU H/W init was already done in device hw_init() */ - hdev->mmu_pgt_pool = gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); @@ -427,6 +446,8 @@ int hl_mmu_init(struct hl_device *hdev) goto err_pool_add; } + /* MMU H/W init will be done in device hw_init() */ + return 0; err_pool_add: @@ -450,10 +471,10 @@ void hl_mmu_fini(struct hl_device *hdev) if (!hdev->mmu_enable) return; + /* MMU H/W fini was already done in device hw_fini() */ + kvfree(hdev->mmu_shadow_hop0); gen_pool_destroy(hdev->mmu_pgt_pool); - - /* MMU H/W fini will be done in device hw_fini() */ } /** @@ -501,36 +522,36 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) dram_default_mapping_fini(ctx); if (!hash_empty(ctx->mmu_shadow_hash)) - dev_err(hdev->dev, "ctx is freed while it has pgts in use\n"); + dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n", + ctx->asid); hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) { - dev_err(hdev->dev, + dev_err_ratelimited(hdev->dev, "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); - free_hop(ctx, pgt_info->shadow_addr); + _free_hop(ctx, pgt_info); } mutex_destroy(&ctx->mmu_lock); } -static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) +static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) { struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 hop0_addr = 0, hop0_pte_addr = 0, hop1_addr = 0, hop1_pte_addr = 0, hop2_addr = 0, hop2_pte_addr = 0, hop3_addr = 0, hop3_pte_addr = 0, hop4_addr = 0, hop4_pte_addr = 0, curr_pte; - bool is_dram_addr, is_huge, clear_hop3 = true; + bool is_huge, clear_hop3 = true; - is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; @@ -539,7 +560,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hop1_addr == ULLONG_MAX) goto not_mapped; - hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; @@ -548,7 +569,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hop2_addr == ULLONG_MAX) goto not_mapped; - hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; @@ -557,7 +578,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hop3_addr == ULLONG_MAX) goto not_mapped; - hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; @@ -575,7 +596,8 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hop4_addr == ULLONG_MAX) goto not_mapped; - hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; @@ -584,7 +606,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hdev->dram_default_page_mapping && is_dram_addr) { u64 default_pte = (prop->mmu_dram_default_page_addr & - PTE_PHYS_ADDR_MASK) | LAST_MASK | + HOP_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; if (curr_pte == default_pte) { dev_err(hdev->dev, @@ -667,25 +689,36 @@ not_mapped: int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) { struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 real_virt_addr; u32 real_page_size, npages; int i, rc; + bool is_dram_addr; if (!hdev->mmu_enable) return 0; + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); + + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + /* - * The H/W handles mapping of 4KB/2MB page. Hence if the host page size - * is bigger, we break it to sub-pages and unmap them separately. + * The H/W handles mapping of specific page sizes. Hence if the page + * size is bigger, we break it to sub-pages and unmap them separately. */ - if ((page_size % PAGE_SIZE_2MB) == 0) { - real_page_size = PAGE_SIZE_2MB; - } else if ((page_size % PAGE_SIZE_4KB) == 0) { - real_page_size = PAGE_SIZE_4KB; + if ((page_size % mmu_prop->huge_page_size) == 0) { + real_page_size = mmu_prop->huge_page_size; + } else if ((page_size % mmu_prop->page_size) == 0) { + real_page_size = mmu_prop->page_size; } else { dev_err(hdev->dev, - "page size of %u is not 4KB nor 2MB aligned, can't unmap\n", - page_size); + "page size of %u is not %uKB nor %uMB aligned, can't unmap\n", + page_size, + mmu_prop->page_size >> 10, + mmu_prop->huge_page_size >> 20); return -EFAULT; } @@ -694,7 +727,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) real_virt_addr = virt_addr; for (i = 0 ; i < npages ; i++) { - rc = _hl_mmu_unmap(ctx, real_virt_addr); + rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr); if (rc) return rc; @@ -705,10 +738,11 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) } static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, - u32 page_size) + u32 page_size, bool is_dram_addr) { struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 hop0_addr = 0, hop0_pte_addr = 0, hop1_addr = 0, hop1_pte_addr = 0, hop2_addr = 0, hop2_pte_addr = 0, @@ -716,21 +750,19 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, hop4_addr = 0, hop4_pte_addr = 0, curr_pte = 0; bool hop1_new = false, hop2_new = false, hop3_new = false, - hop4_new = false, is_huge, is_dram_addr; + hop4_new = false, is_huge; int rc = -ENOMEM; + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + /* - * This mapping function can map a 4KB/2MB page. For 2MB page there are - * only 3 hops rather than 4. Currently the DRAM allocation uses 2MB - * pages only but user memory could have been allocated with one of the - * two page sizes. Since this is a common code for all the three cases, - * we need this hugs page check. + * This mapping function can map a page or a huge page. For huge page + * there are only 3 hops rather than 4. Currently the DRAM allocation + * uses huge pages only but user memory could have been allocated with + * one of the two page sizes. Since this is a common code for all the + * three cases, we need this hugs page check. */ - is_huge = page_size == PAGE_SIZE_2MB; - - is_dram_addr = hl_mem_area_inside_range(virt_addr, page_size, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + is_huge = page_size == mmu_prop->huge_page_size; if (is_dram_addr && !is_huge) { dev_err(hdev->dev, "DRAM mapping should use huge pages only\n"); @@ -738,28 +770,28 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, } hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); if (hop1_addr == ULLONG_MAX) goto err; - hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new); if (hop2_addr == ULLONG_MAX) goto err; - hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new); if (hop3_addr == ULLONG_MAX) goto err; - hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; if (!is_huge) { @@ -767,13 +799,14 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, if (hop4_addr == ULLONG_MAX) goto err; - hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; } if (hdev->dram_default_page_mapping && is_dram_addr) { u64 default_pte = (prop->mmu_dram_default_page_addr & - PTE_PHYS_ADDR_MASK) | LAST_MASK | + HOP_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; if (curr_pte != default_pte) { @@ -813,7 +846,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, goto err; } - curr_pte = (phys_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK + curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; if (is_huge) @@ -823,25 +856,25 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, if (hop1_new) { curr_pte = - (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop0_pte_addr, curr_pte); } if (hop2_new) { curr_pte = - (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop1_pte_addr, curr_pte); get_pte(ctx, hop1_addr); } if (hop3_new) { curr_pte = - (hop3_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop2_pte_addr, curr_pte); get_pte(ctx, hop2_addr); } if (!is_huge) { if (hop4_new) { - curr_pte = (hop4_addr & PTE_PHYS_ADDR_MASK) | + curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop3_pte_addr, curr_pte); get_pte(ctx, hop3_addr); @@ -890,25 +923,36 @@ err: int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) { struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 real_virt_addr, real_phys_addr; u32 real_page_size, npages; int i, rc, mapped_cnt = 0; + bool is_dram_addr; if (!hdev->mmu_enable) return 0; + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); + + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + /* - * The H/W handles mapping of 4KB/2MB page. Hence if the host page size - * is bigger, we break it to sub-pages and map them separately. + * The H/W handles mapping of specific page sizes. Hence if the page + * size is bigger, we break it to sub-pages and map them separately. */ - if ((page_size % PAGE_SIZE_2MB) == 0) { - real_page_size = PAGE_SIZE_2MB; - } else if ((page_size % PAGE_SIZE_4KB) == 0) { - real_page_size = PAGE_SIZE_4KB; + if ((page_size % mmu_prop->huge_page_size) == 0) { + real_page_size = mmu_prop->huge_page_size; + } else if ((page_size % mmu_prop->page_size) == 0) { + real_page_size = mmu_prop->page_size; } else { dev_err(hdev->dev, - "page size of %u is not 4KB nor 2MB aligned, can't map\n", - page_size); + "page size of %u is not %dKB nor %dMB aligned, can't unmap\n", + page_size, + mmu_prop->page_size >> 10, + mmu_prop->huge_page_size >> 20); return -EFAULT; } @@ -923,7 +967,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) for (i = 0 ; i < npages ; i++) { rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr, - real_page_size); + real_page_size, is_dram_addr); if (rc) goto err; @@ -937,7 +981,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) err: real_virt_addr = virt_addr; for (i = 0 ; i < mapped_cnt ; i++) { - if (_hl_mmu_unmap(ctx, real_virt_addr)) + if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr)) dev_warn_ratelimited(hdev->dev, "failed to unmap va: 0x%llx\n", real_virt_addr); diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h index 94dfb9e40e29..1aa433a7f66c 100644 --- a/drivers/misc/hpilo.h +++ b/drivers/misc/hpilo.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/drivers/char/hpilo.h * diff --git a/drivers/misc/ibmvmc.h b/drivers/misc/ibmvmc.h index e140ada8fe2c..0e1756fffeae 100644 --- a/drivers/misc/ibmvmc.h +++ b/drivers/misc/ibmvmc.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0+ - * +/* SPDX-License-Identifier: GPL-2.0+ */ +/* * linux/drivers/misc/ibmvmc.h * * IBM Power Systems Virtual Management Channel Support. diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c index 057d7bbde402..dd65cedf3b12 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/drivers/misc/lis3lv02d/lis3lv02d.c @@ -16,7 +16,7 @@ #include <linux/types.h> #include <linux/platform_device.h> #include <linux/interrupt.h> -#include <linux/input-polldev.h> +#include <linux/input.h> #include <linux/delay.h> #include <linux/wait.h> #include <linux/poll.h> @@ -434,23 +434,23 @@ int lis3lv02d_poweron(struct lis3lv02d *lis3) EXPORT_SYMBOL_GPL(lis3lv02d_poweron); -static void lis3lv02d_joystick_poll(struct input_polled_dev *pidev) +static void lis3lv02d_joystick_poll(struct input_dev *input) { - struct lis3lv02d *lis3 = pidev->private; + struct lis3lv02d *lis3 = input_get_drvdata(input); int x, y, z; mutex_lock(&lis3->mutex); lis3lv02d_get_xyz(lis3, &x, &y, &z); - input_report_abs(pidev->input, ABS_X, x); - input_report_abs(pidev->input, ABS_Y, y); - input_report_abs(pidev->input, ABS_Z, z); - input_sync(pidev->input); + input_report_abs(input, ABS_X, x); + input_report_abs(input, ABS_Y, y); + input_report_abs(input, ABS_Z, z); + input_sync(input); mutex_unlock(&lis3->mutex); } -static void lis3lv02d_joystick_open(struct input_polled_dev *pidev) +static int lis3lv02d_joystick_open(struct input_dev *input) { - struct lis3lv02d *lis3 = pidev->private; + struct lis3lv02d *lis3 = input_get_drvdata(input); if (lis3->pm_dev) pm_runtime_get_sync(lis3->pm_dev); @@ -461,12 +461,14 @@ static void lis3lv02d_joystick_open(struct input_polled_dev *pidev) * Update coordinates for the case where poll interval is 0 and * the chip in running purely under interrupt control */ - lis3lv02d_joystick_poll(pidev); + lis3lv02d_joystick_poll(input); + + return 0; } -static void lis3lv02d_joystick_close(struct input_polled_dev *pidev) +static void lis3lv02d_joystick_close(struct input_dev *input) { - struct lis3lv02d *lis3 = pidev->private; + struct lis3lv02d *lis3 = input_get_drvdata(input); atomic_set(&lis3->wake_thread, 0); if (lis3->pm_dev) @@ -497,7 +499,7 @@ out: static void lis302dl_interrupt_handle_click(struct lis3lv02d *lis3) { - struct input_dev *dev = lis3->idev->input; + struct input_dev *dev = lis3->idev; u8 click_src; mutex_lock(&lis3->mutex); @@ -677,26 +679,19 @@ int lis3lv02d_joystick_enable(struct lis3lv02d *lis3) if (lis3->idev) return -EINVAL; - lis3->idev = input_allocate_polled_device(); - if (!lis3->idev) + input_dev = input_allocate_device(); + if (!input_dev) return -ENOMEM; - lis3->idev->poll = lis3lv02d_joystick_poll; - lis3->idev->open = lis3lv02d_joystick_open; - lis3->idev->close = lis3lv02d_joystick_close; - lis3->idev->poll_interval = MDPS_POLL_INTERVAL; - lis3->idev->poll_interval_min = MDPS_POLL_MIN; - lis3->idev->poll_interval_max = MDPS_POLL_MAX; - lis3->idev->private = lis3; - input_dev = lis3->idev->input; - input_dev->name = "ST LIS3LV02DL Accelerometer"; input_dev->phys = DRIVER_NAME "/input0"; input_dev->id.bustype = BUS_HOST; input_dev->id.vendor = 0; input_dev->dev.parent = &lis3->pdev->dev; - set_bit(EV_ABS, input_dev->evbit); + input_dev->open = lis3lv02d_joystick_open; + input_dev->close = lis3lv02d_joystick_close; + max_val = (lis3->mdps_max_val * lis3->scale) / LIS3_ACCURACY; if (lis3->whoami == WAI_12B) { fuzz = LIS3_DEFAULT_FUZZ_12B; @@ -712,17 +707,32 @@ int lis3lv02d_joystick_enable(struct lis3lv02d *lis3) input_set_abs_params(input_dev, ABS_Y, -max_val, max_val, fuzz, flat); input_set_abs_params(input_dev, ABS_Z, -max_val, max_val, fuzz, flat); + input_set_drvdata(input_dev, lis3); + lis3->idev = input_dev; + + err = input_setup_polling(input_dev, lis3lv02d_joystick_poll); + if (err) + goto err_free_input; + + input_set_poll_interval(input_dev, MDPS_POLL_INTERVAL); + input_set_min_poll_interval(input_dev, MDPS_POLL_MIN); + input_set_max_poll_interval(input_dev, MDPS_POLL_MAX); + lis3->mapped_btns[0] = lis3lv02d_get_axis(abs(lis3->ac.x), btns); lis3->mapped_btns[1] = lis3lv02d_get_axis(abs(lis3->ac.y), btns); lis3->mapped_btns[2] = lis3lv02d_get_axis(abs(lis3->ac.z), btns); - err = input_register_polled_device(lis3->idev); - if (err) { - input_free_polled_device(lis3->idev); - lis3->idev = NULL; - } + err = input_register_device(lis3->idev); + if (err) + goto err_free_input; + return 0; + +err_free_input: + input_free_device(input_dev); + lis3->idev = NULL; return err; + } EXPORT_SYMBOL_GPL(lis3lv02d_joystick_enable); @@ -738,8 +748,7 @@ void lis3lv02d_joystick_disable(struct lis3lv02d *lis3) if (lis3->irq) misc_deregister(&lis3->miscdev); - input_unregister_polled_device(lis3->idev); - input_free_polled_device(lis3->idev); + input_unregister_device(lis3->idev); lis3->idev = NULL; } EXPORT_SYMBOL_GPL(lis3lv02d_joystick_disable); @@ -895,10 +904,9 @@ static void lis3lv02d_8b_configure(struct lis3lv02d *lis3, (p->click_thresh_y << 4)); if (lis3->idev) { - struct input_dev *input_dev = lis3->idev->input; - input_set_capability(input_dev, EV_KEY, BTN_X); - input_set_capability(input_dev, EV_KEY, BTN_Y); - input_set_capability(input_dev, EV_KEY, BTN_Z); + input_set_capability(lis3->idev, EV_KEY, BTN_X); + input_set_capability(lis3->idev, EV_KEY, BTN_Y); + input_set_capability(lis3->idev, EV_KEY, BTN_Z); } } diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h index 1b0c99883c57..c394c0b08519 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.h +++ b/drivers/misc/lis3lv02d/lis3lv02d.h @@ -6,7 +6,7 @@ * Copyright (C) 2008-2009 Eric Piel */ #include <linux/platform_device.h> -#include <linux/input-polldev.h> +#include <linux/input.h> #include <linux/regulator/consumer.h> #include <linux/miscdevice.h> @@ -281,7 +281,7 @@ struct lis3lv02d { * (1/1000th of earth gravity) */ - struct input_polled_dev *idev; /* input device */ + struct input_dev *idev; /* input device */ struct platform_device *pdev; /* platform device */ struct regulator_bulk_data regulators[2]; atomic_t count; /* interrupt count after last read */ diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 0a2b99e1af45..9ad9c01ddf41 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -46,8 +46,6 @@ static const uuid_le mei_nfc_info_guid = MEI_UUID_NFC_INFO; */ static void number_of_connections(struct mei_cl_device *cldev) { - dev_dbg(&cldev->dev, "running hook %s\n", __func__); - if (cldev->me_cl->props.max_number_of_connections > 1) cldev->do_match = 0; } @@ -59,8 +57,6 @@ static void number_of_connections(struct mei_cl_device *cldev) */ static void blacklist(struct mei_cl_device *cldev) { - dev_dbg(&cldev->dev, "running hook %s\n", __func__); - cldev->do_match = 0; } @@ -71,8 +67,6 @@ static void blacklist(struct mei_cl_device *cldev) */ static void whitelist(struct mei_cl_device *cldev) { - dev_dbg(&cldev->dev, "running hook %s\n", __func__); - cldev->do_match = 1; } @@ -256,7 +250,6 @@ static void mei_wd(struct mei_cl_device *cldev) { struct pci_dev *pdev = to_pci_dev(cldev->dev.parent); - dev_dbg(&cldev->dev, "running hook %s\n", __func__); if (pdev->device == MEI_DEV_ID_WPT_LP || pdev->device == MEI_DEV_ID_SPT || pdev->device == MEI_DEV_ID_SPT_H) @@ -410,8 +403,6 @@ static void mei_nfc(struct mei_cl_device *cldev) bus = cldev->bus; - dev_dbg(&cldev->dev, "running hook %s\n", __func__); - mutex_lock(&bus->device_lock); /* we need to connect to INFO GUID */ cl = mei_cl_alloc_linked(bus); diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 985bd4fd3328..a0a495c95e3c 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -791,11 +791,44 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a, } static DEVICE_ATTR_RO(modalias); +static ssize_t max_conn_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + u8 maxconn = mei_me_cl_max_conn(cldev->me_cl); + + return scnprintf(buf, PAGE_SIZE, "%d", maxconn); +} +static DEVICE_ATTR_RO(max_conn); + +static ssize_t fixed_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + u8 fixed = mei_me_cl_fixed(cldev->me_cl); + + return scnprintf(buf, PAGE_SIZE, "%d", fixed); +} +static DEVICE_ATTR_RO(fixed); + +static ssize_t max_len_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + u32 maxlen = mei_me_cl_max_len(cldev->me_cl); + + return scnprintf(buf, PAGE_SIZE, "%u", maxlen); +} +static DEVICE_ATTR_RO(max_len); + static struct attribute *mei_cldev_attrs[] = { &dev_attr_name.attr, &dev_attr_uuid.attr, &dev_attr_version.attr, &dev_attr_modalias.attr, + &dev_attr_max_conn.attr, + &dev_attr_fixed.attr, + &dev_attr_max_len.attr, NULL, }; ATTRIBUTE_GROUPS(mei_cldev); @@ -873,15 +906,16 @@ static const struct device_type mei_cl_device_type = { /** * mei_cl_bus_set_name - set device name for me client device + * <controller>-<client device> + * Example: 0000:00:16.0-55213584-9a29-4916-badf-0fb7ed682aeb * * @cldev: me client device */ static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev) { - dev_set_name(&cldev->dev, "mei:%s:%pUl:%02X", - cldev->name, - mei_me_cl_uuid(cldev->me_cl), - mei_me_cl_ver(cldev->me_cl)); + dev_set_name(&cldev->dev, "%s-%pUl", + dev_name(cldev->bus->dev), + mei_me_cl_uuid(cldev->me_cl)); } /** diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h index c1f9e810cf81..2f8954def591 100644 --- a/drivers/misc/mei/client.h +++ b/drivers/misc/mei/client.h @@ -69,6 +69,42 @@ static inline u8 mei_me_cl_ver(const struct mei_me_client *me_cl) return me_cl->props.protocol_version; } +/** + * mei_me_cl_max_conn - return me client max number of connections + * + * @me_cl: me client + * + * Return: me client max number of connections + */ +static inline u8 mei_me_cl_max_conn(const struct mei_me_client *me_cl) +{ + return me_cl->props.max_number_of_connections; +} + +/** + * mei_me_cl_fixed - return me client fixed address, if any + * + * @me_cl: me client + * + * Return: me client fixed address + */ +static inline u8 mei_me_cl_fixed(const struct mei_me_client *me_cl) +{ + return me_cl->props.fixed_address; +} + +/** + * mei_me_cl_max_len - return me client max msg length + * + * @me_cl: me client + * + * Return: me client max msg length + */ +static inline u32 mei_me_cl_max_len(const struct mei_me_client *me_cl) +{ + return me_cl->props.max_msg_length; +} + /* * MEI IO Functions */ diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index c09f8bb49495..7cd67fb2365d 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -81,6 +81,7 @@ #define MEI_DEV_ID_CMP_LP 0x02e0 /* Comet Point LP */ #define MEI_DEV_ID_CMP_LP_3 0x02e4 /* Comet Point LP 3 (iTouch) */ +#define MEI_DEV_ID_CMP_V 0xA3BA /* Comet Point Lake V */ #define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ @@ -162,7 +163,8 @@ access to ME_CBD */ #define ME_IS_HRA 0x00000002 /* ME Interrupt Enable HRA - host read only access to ME_IE */ #define ME_IE_HRA 0x00000001 - +/* TRC control shadow register */ +#define ME_TRC 0x00000030 /* H_HPG_CSR register bits */ #define H_HPG_CSR_PGIHEXR 0x00000001 diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index c4f6991d3028..668418d7ea77 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2003-2018, Intel Corporation. All rights reserved. + * Copyright (c) 2003-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -173,6 +173,27 @@ static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg) } /** + * mei_me_trc_status - read trc status register + * + * @dev: mei device + * @trc: trc status register value + * + * Return: 0 on success, error otherwise + */ +static int mei_me_trc_status(struct mei_device *dev, u32 *trc) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (!hw->cfg->hw_trc_supported) + return -EOPNOTSUPP; + + *trc = mei_me_reg_read(hw, ME_TRC); + trace_mei_reg_read(dev->dev, "ME_TRC", ME_TRC, *trc); + + return 0; +} + +/** * mei_me_fw_status - read fw status register from pci config space * * @dev: mei device @@ -183,20 +204,19 @@ static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg) static int mei_me_fw_status(struct mei_device *dev, struct mei_fw_status *fw_status) { - struct pci_dev *pdev = to_pci_dev(dev->dev); struct mei_me_hw *hw = to_me_hw(dev); const struct mei_fw_status *fw_src = &hw->cfg->fw_status; int ret; int i; - if (!fw_status) + if (!fw_status || !hw->read_fws) return -EINVAL; fw_status->count = fw_src->count; for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) { - ret = pci_read_config_dword(pdev, fw_src->status[i], - &fw_status->status[i]); - trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HSF_X", + ret = hw->read_fws(dev, fw_src->status[i], + &fw_status->status[i]); + trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HFS_X", fw_src->status[i], fw_status->status[i]); if (ret) @@ -210,19 +230,26 @@ static int mei_me_fw_status(struct mei_device *dev, * mei_me_hw_config - configure hw dependent settings * * @dev: mei device + * + * Return: + * * -EINVAL when read_fws is not set + * * 0 on success + * */ -static void mei_me_hw_config(struct mei_device *dev) +static int mei_me_hw_config(struct mei_device *dev) { - struct pci_dev *pdev = to_pci_dev(dev->dev); struct mei_me_hw *hw = to_me_hw(dev); u32 hcsr, reg; + if (WARN_ON(!hw->read_fws)) + return -EINVAL; + /* Doesn't change in runtime */ hcsr = mei_hcsr_read(dev); hw->hbuf_depth = (hcsr & H_CBD) >> 24; reg = 0; - pci_read_config_dword(pdev, PCI_CFG_HFS_1, ®); + hw->read_fws(dev, PCI_CFG_HFS_1, ®); trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg); hw->d0i3_supported = ((reg & PCI_CFG_HFS_1_D0I3_MSK) == PCI_CFG_HFS_1_D0I3_MSK); @@ -233,6 +260,8 @@ static void mei_me_hw_config(struct mei_device *dev) if (reg & H_D0I3C_I3) hw->pg_state = MEI_PG_ON; } + + return 0; } /** @@ -269,7 +298,7 @@ static inline void me_intr_disable(struct mei_device *dev, u32 hcsr) } /** - * mei_me_intr_clear - clear and stop interrupts + * me_intr_clear - clear and stop interrupts * * @dev: the device structure * @hcsr: supplied hcsr register value @@ -323,9 +352,9 @@ static void mei_me_intr_disable(struct mei_device *dev) */ static void mei_me_synchronize_irq(struct mei_device *dev) { - struct pci_dev *pdev = to_pci_dev(dev->dev); + struct mei_me_hw *hw = to_me_hw(dev); - synchronize_irq(pdev->irq); + synchronize_irq(hw->irq); } /** @@ -1294,6 +1323,7 @@ end: static const struct mei_hw_ops mei_me_hw_ops = { + .trc_status = mei_me_trc_status, .fw_status = mei_me_fw_status, .pg_state = mei_me_pg_state, @@ -1384,6 +1414,9 @@ static bool mei_me_fw_type_sps(struct pci_dev *pdev) .dma_size[DMA_DSCR_DEVICE] = SZ_128K, \ .dma_size[DMA_DSCR_CTRL] = PAGE_SIZE +#define MEI_CFG_TRC \ + .hw_trc_supported = 1 + /* ICH Legacy devices */ static const struct mei_cfg mei_me_ich_cfg = { MEI_CFG_ICH_HFS, @@ -1432,6 +1465,14 @@ static const struct mei_cfg mei_me_pch12_cfg = { MEI_CFG_DMA_128, }; +/* Tiger Lake and newer devices */ +static const struct mei_cfg mei_me_pch15_cfg = { + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, + MEI_CFG_DMA_128, + MEI_CFG_TRC, +}; + /* * mei_cfg_list - A list of platform platform specific configurations. * Note: has to be synchronized with enum mei_cfg_idx. @@ -1446,6 +1487,7 @@ static const struct mei_cfg *const mei_cfg_list[] = { [MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg, [MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg, [MEI_ME_PCH12_CFG] = &mei_me_pch12_cfg, + [MEI_ME_PCH15_CFG] = &mei_me_pch15_cfg, }; const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx) @@ -1461,19 +1503,19 @@ const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx) /** * mei_me_dev_init - allocates and initializes the mei device structure * - * @pdev: The pci device structure + * @parent: device associated with physical device (pci/platform) * @cfg: per device generation config * * Return: The mei_device pointer on success, NULL on failure. */ -struct mei_device *mei_me_dev_init(struct pci_dev *pdev, +struct mei_device *mei_me_dev_init(struct device *parent, const struct mei_cfg *cfg) { struct mei_device *dev; struct mei_me_hw *hw; int i; - dev = devm_kzalloc(&pdev->dev, sizeof(struct mei_device) + + dev = devm_kzalloc(parent, sizeof(struct mei_device) + sizeof(struct mei_me_hw), GFP_KERNEL); if (!dev) return NULL; @@ -1483,7 +1525,7 @@ struct mei_device *mei_me_dev_init(struct pci_dev *pdev, for (i = 0; i < DMA_DSCR_NUM; i++) dev->dr_dscr[i].size = cfg->dma_size[i]; - mei_device_init(dev, &pdev->dev, &mei_me_hw_ops); + mei_device_init(dev, parent, &mei_me_hw_ops); hw->cfg = cfg; dev->fw_f_fw_ver_supported = cfg->fw_ver_supported; diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h index 1d8794828cbc..4a8d4dcd5a91 100644 --- a/drivers/misc/mei/hw-me.h +++ b/drivers/misc/mei/hw-me.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (c) 2012-2018, Intel Corporation. All rights reserved. + * Copyright (c) 2012-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -21,12 +21,14 @@ * @quirk_probe: device exclusion quirk * @dma_size: device DMA buffers size * @fw_ver_supported: is fw version retrievable from FW + * @hw_trc_supported: does the hw support trc register */ struct mei_cfg { const struct mei_fw_status fw_status; bool (*quirk_probe)(struct pci_dev *pdev); size_t dma_size[DMA_DSCR_NUM]; u32 fw_ver_supported:1; + u32 hw_trc_supported:1; }; @@ -42,16 +44,20 @@ struct mei_cfg { * * @cfg: per device generation config and ops * @mem_addr: io memory address + * @irq: irq number * @pg_state: power gating state * @d0i3_supported: di03 support * @hbuf_depth: depth of hardware host/write buffer in slots + * @read_fws: read FW status register handler */ struct mei_me_hw { const struct mei_cfg *cfg; void __iomem *mem_addr; + int irq; enum mei_pg_state pg_state; bool d0i3_supported; u8 hbuf_depth; + int (*read_fws)(const struct mei_device *dev, int where, u32 *val); }; #define to_me_hw(dev) (struct mei_me_hw *)((dev)->hw) @@ -74,6 +80,7 @@ struct mei_me_hw { * servers platforms with quirk for * SPS firmware exclusion. * @MEI_ME_PCH12_CFG: Platform Controller Hub Gen12 and newer + * @MEI_ME_PCH15_CFG: Platform Controller Hub Gen15 and newer * @MEI_ME_NUM_CFG: Upper Sentinel. */ enum mei_cfg_idx { @@ -86,12 +93,13 @@ enum mei_cfg_idx { MEI_ME_PCH8_CFG, MEI_ME_PCH8_SPS_CFG, MEI_ME_PCH12_CFG, + MEI_ME_PCH15_CFG, MEI_ME_NUM_CFG, }; const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx); -struct mei_device *mei_me_dev_init(struct pci_dev *pdev, +struct mei_device *mei_me_dev_init(struct device *parent, const struct mei_cfg *cfg); int mei_me_pg_enter_sync(struct mei_device *dev); diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c index 5e58656b8e19..785b260b3ae9 100644 --- a/drivers/misc/mei/hw-txe.c +++ b/drivers/misc/mei/hw-txe.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2013-2014, Intel Corporation. All rights reserved. + * Copyright (c) 2013-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -660,14 +660,16 @@ static int mei_txe_fw_status(struct mei_device *dev, } /** - * mei_txe_hw_config - configure hardware at the start of the devices + * mei_txe_hw_config - configure hardware at the start of the devices * * @dev: the device structure * * Configure hardware at the start of the device should be done only * once at the device probe time + * + * Return: always 0 */ -static void mei_txe_hw_config(struct mei_device *dev) +static int mei_txe_hw_config(struct mei_device *dev) { struct mei_txe_hw *hw = to_txe_hw(dev); @@ -677,6 +679,8 @@ static void mei_txe_hw_config(struct mei_device *dev) dev_dbg(dev->dev, "aliveness_resp = 0x%08x, readiness = 0x%08x.\n", hw->aliveness, hw->readiness); + + return 0; } /** diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index b9fef773e71b..bcee77768b91 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2012-2018, Intel Corporation. All rights reserved. + * Copyright (c) 2012-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -190,7 +190,9 @@ int mei_start(struct mei_device *dev) /* acknowledge interrupt and stop interrupts */ mei_clear_interrupts(dev); - mei_hw_config(dev); + ret = mei_hw_config(dev); + if (ret) + goto err; dev_dbg(dev->dev, "reset in start the mei device.\n"); diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 7310b476323c..4ef6e37caafc 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -701,6 +701,29 @@ static int mei_fasync(int fd, struct file *file, int band) } /** + * trc_show - mei device trc attribute show method + * + * @device: device pointer + * @attr: attribute pointer + * @buf: char out buffer + * + * Return: number of the bytes printed into buf or error + */ +static ssize_t trc_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mei_device *dev = dev_get_drvdata(device); + u32 trc; + int ret; + + ret = mei_trc_status(dev, &trc); + if (ret) + return ret; + return sprintf(buf, "%08X\n", trc); +} +static DEVICE_ATTR_RO(trc); + +/** * fw_status_show - mei device fw_status attribute show method * * @device: device pointer @@ -887,6 +910,7 @@ static struct attribute *mei_attrs[] = { &dev_attr_tx_queue_limit.attr, &dev_attr_fw_ver.attr, &dev_attr_dev_state.attr, + &dev_attr_trc.attr, NULL }; ATTRIBUTE_GROUPS(mei); diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index 0f2141178299..76f8ff5ff974 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (c) 2003-2018, Intel Corporation. All rights reserved. + * Copyright (c) 2003-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -260,6 +260,7 @@ struct mei_cl { * @hw_config : configure hw * * @fw_status : get fw status registers + * @trc_status : get trc status register * @pg_state : power gating state of the device * @pg_in_transition : is device now in pg transition * @pg_is_enabled : is power gating enabled @@ -287,9 +288,11 @@ struct mei_hw_ops { bool (*hw_is_ready)(struct mei_device *dev); int (*hw_reset)(struct mei_device *dev, bool enable); int (*hw_start)(struct mei_device *dev); - void (*hw_config)(struct mei_device *dev); + int (*hw_config)(struct mei_device *dev); int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts); + int (*trc_status)(struct mei_device *dev, u32 *trc); + enum mei_pg_state (*pg_state)(struct mei_device *dev); bool (*pg_in_transition)(struct mei_device *dev); bool (*pg_is_enabled)(struct mei_device *dev); @@ -614,9 +617,9 @@ void mei_irq_compl_handler(struct mei_device *dev, struct list_head *cmpl_list); */ -static inline void mei_hw_config(struct mei_device *dev) +static inline int mei_hw_config(struct mei_device *dev) { - dev->ops->hw_config(dev); + return dev->ops->hw_config(dev); } static inline enum mei_pg_state mei_pg_state(struct mei_device *dev) @@ -711,6 +714,13 @@ static inline int mei_count_full_read_slots(struct mei_device *dev) return dev->ops->rdbuf_full_slots(dev); } +static inline int mei_trc_status(struct mei_device *dev, u32 *trc) +{ + if (dev->ops->trc_status) + return dev->ops->trc_status(dev, trc); + return -EOPNOTSUPP; +} + static inline int mei_fw_status(struct mei_device *dev, struct mei_fw_status *fw_status) { diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 3dca63eddaa0..c845b7e40f26 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -98,12 +98,13 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_V, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)}, /* required last entry */ @@ -120,6 +121,13 @@ static inline void mei_me_set_pm_domain(struct mei_device *dev) {} static inline void mei_me_unset_pm_domain(struct mei_device *dev) {} #endif /* CONFIG_PM */ +static int mei_me_read_fws(const struct mei_device *dev, int where, u32 *val) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + + return pci_read_config_dword(pdev, where, val); +} + /** * mei_me_quirk_probe - probe for devices that doesn't valid ME interface * @@ -191,13 +199,15 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* allocates and initializes the mei dev structure */ - dev = mei_me_dev_init(pdev, cfg); + dev = mei_me_dev_init(&pdev->dev, cfg); if (!dev) { err = -ENOMEM; goto end; } hw = to_me_hw(dev); hw->mem_addr = pcim_iomap_table(pdev)[0]; + hw->irq = pdev->irq; + hw->read_fws = mei_me_read_fws; pci_enable_msi(pdev); diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index 948f45bbf135..b6841ba6d922 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only menu "Intel MIC & related support" -comment "Intel MIC Bus Driver" - config INTEL_MIC_BUS tristate "Intel MIC Bus Driver" depends on 64BIT && PCI && X86 @@ -18,8 +16,6 @@ config INTEL_MIC_BUS OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. -comment "SCIF Bus Driver" - config SCIF_BUS tristate "SCIF Bus Driver" depends on 64BIT && PCI && X86 @@ -35,8 +31,6 @@ config SCIF_BUS OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. -comment "VOP Bus Driver" - config VOP_BUS tristate "VOP Bus Driver" help @@ -51,8 +45,6 @@ config VOP_BUS OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. -comment "Intel MIC Host Driver" - config INTEL_MIC_HOST tristate "Intel MIC Host Driver" depends on 64BIT && PCI && X86 @@ -71,8 +63,6 @@ config INTEL_MIC_HOST OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. -comment "Intel MIC Card Driver" - config INTEL_MIC_CARD tristate "Intel MIC Card Driver" depends on 64BIT && X86 @@ -90,8 +80,6 @@ config INTEL_MIC_CARD For more information see <http://software.intel.com/en-us/mic-developer>. -comment "SCIF Driver" - config SCIF tristate "SCIF Driver" depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT @@ -110,8 +98,6 @@ config SCIF OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. -comment "Intel MIC Coprocessor State Management (COSM) Drivers" - config MIC_COSM tristate "Intel MIC Coprocessor State Management (COSM) Drivers" depends on 64BIT && PCI && X86 && SCIF @@ -128,8 +114,6 @@ config MIC_COSM OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. -comment "VOP Driver" - config VOP tristate "VOP Driver" depends on VOP_BUS diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h index 97415afd79f3..345bf843a38e 100644 --- a/drivers/misc/ocxl/ocxl_internal.h +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright 2017 IBM Corp. #ifndef _OCXL_INTERNAL_H_ #define _OCXL_INTERNAL_H_ diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h index 024f417e7e01..17e21cb2addd 100644 --- a/drivers/misc/ocxl/trace.h +++ b/drivers/misc/ocxl/trace.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright 2017 IBM Corp. #undef TRACE_SYSTEM #define TRACE_SYSTEM ocxl diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c index 3a8d76d1ccae..2817f4751306 100644 --- a/drivers/misc/sgi-gru/gruprocfs.c +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -119,7 +119,7 @@ static int mcs_statistics_show(struct seq_file *s, void *p) "cch_interrupt_sync", "cch_deallocate", "tfh_write_only", "tfh_write_restart", "tgh_invalidate"}; - seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks"); + seq_puts(s, "#id count aver-clks max-clks\n"); for (op = 0; op < mcsop_last; op++) { count = atomic_long_read(&mcs_op_statistics[op].count); total = atomic_long_read(&mcs_op_statistics[op].total); @@ -165,8 +165,7 @@ static int cch_seq_show(struct seq_file *file, void *data) const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" }; if (gid == 0) - seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid", - "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode"); + seq_puts(file, "# gid bid ctx# asid pid cbrs dsbytes mode\n"); if (gru) for (i = 0; i < GRU_NUM_CCH; i++) { ts = gru->gs_gts[i]; @@ -191,10 +190,8 @@ static int gru_seq_show(struct seq_file *file, void *data) struct gru_state *gru = GID_TO_GRU(gid); if (gid == 0) { - seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid", - "ctx", "cbr", "dsr", "ctx", "cbr", "dsr"); - seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy", - "busy", "busy", "free", "free", "free"); + seq_puts(file, "# gid nid ctx cbr dsr ctx cbr dsr\n"); + seq_puts(file, "# busy busy busy free free free\n"); } if (gru) { ctxfree = GRU_NUM_CCH - gru->gs_active_contexts; diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index 7d9e23aa0b92..2ae9948a91e1 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -708,7 +708,6 @@ EXPORT_SYMBOL_GPL(st_unregister); */ static int st_tty_open(struct tty_struct *tty) { - int err = 0; struct st_data_s *st_gdata; pr_info("%s ", __func__); @@ -731,7 +730,8 @@ static int st_tty_open(struct tty_struct *tty) */ st_kim_complete(st_gdata->kim_data); pr_debug("done %s", __func__); - return err; + + return 0; } static void st_tty_close(struct tty_struct *tty) |