From 6cc070bdf07c8f6d5955d43da0560c9e5fd203b1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 May 2019 15:28:39 +0300 Subject: net/mlx5: potential error pointer dereference in error handling The error handling was a bit flipped around. If the mlx5_create_flow_group() function failed then it would have resulted in dereferencing "fg" when it was an error pointer. Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic") Signed-off-by: Dan Carpenter Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 86f77456f873..401441aefbcb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -106,10 +106,10 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) return 0; -destroy_flow_table: - mlx5_destroy_flow_table(ft); destroy_flow_group: mlx5_destroy_flow_group(fg); +destroy_flow_table: + mlx5_destroy_flow_table(ft); free: kvfree(spec); kvfree(flow_group_in); -- cgit v1.2.3 From 0b9055a112fd86c07b9d4857b61019485ec6526f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 29 May 2019 22:50:24 +0000 Subject: net/mlx5: Add core dump register access HW bits Add Firmware core dump registers and HW definitions. Signed-off-by: Moshe Shemesh Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5a27246db883..b5431f7d97cb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -107,6 +107,7 @@ enum { MLX5_REG_FPGA_CAP = 0x4022, MLX5_REG_FPGA_CTRL = 0x4023, MLX5_REG_FPGA_ACCESS_REG = 0x4024, + MLX5_REG_CORE_DUMP = 0x402e, MLX5_REG_PCAP = 0x5001, MLX5_REG_PMTU = 0x5003, MLX5_REG_PTYS = 0x5004, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5e74305e2e57..7ee422e38826 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -715,7 +715,9 @@ struct mlx5_ifc_qos_cap_bits { }; struct mlx5_ifc_debug_cap_bits { - u8 reserved_at_0[0x20]; + u8 core_dump_general[0x1]; + u8 core_dump_qp[0x1]; + u8 reserved_at_2[0x1e]; u8 reserved_at_20[0x2]; u8 stall_detect[0x1]; @@ -2531,6 +2533,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_e_switch_cap_bits e_switch_cap; struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap; struct mlx5_ifc_qos_cap_bits qos_cap; + struct mlx5_ifc_debug_cap_bits debug_cap; struct mlx5_ifc_fpga_cap_bits fpga_cap; u8 reserved_at_0[0x8000]; }; @@ -8546,6 +8549,18 @@ struct mlx5_ifc_qcam_reg_bits { u8 reserved_at_1c0[0x80]; }; +struct mlx5_ifc_core_dump_reg_bits { + u8 reserved_at_0[0x18]; + u8 core_dump_type[0x8]; + + u8 reserved_at_20[0x30]; + u8 vhca_id[0x10]; + + u8 reserved_at_60[0x8]; + u8 qpn[0x18]; + u8 reserved_at_80[0x180]; +}; + struct mlx5_ifc_pcap_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; -- cgit v1.2.3 From c6d4e45d3b44b71227588c2f76615380b3961f96 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 29 May 2019 22:50:29 +0000 Subject: net/mlx5: Introduce termination table bits Termination table is a flow table with a termination flag. The flag allows the firmware to assume that the the specified actions are the last actions list. This assumption allows the FW to safely perform potential looping logic (e.g. hairpin). Introduce the bits for this attribute. Signed-off-by: Eli Britstein Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 3 +++ include/linux/mlx5/fs.h | 1 + include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 013b1ca4a791..bb24c3797218 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -147,6 +147,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, { int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION); u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; struct mlx5_core_dev *dev = ns->dev; @@ -167,6 +168,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, en_decap); MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, en_encap); + MLX5_SET(create_flow_table_in, in, flow_table_context.termination_table, + term); switch (ft->op_mod) { case FS_FT_OP_MOD_NORMAL: diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index e690ba0f965c..2ddaa97f2179 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -47,6 +47,7 @@ enum { enum { MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT = BIT(0), MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1), + MLX5_FLOW_TABLE_TERMINATION = BIT(2), }; #define LEFTOVERS_RULE_NUM 2 diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7ee422e38826..feaa909bf14f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -382,7 +382,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reformat_and_modify_action[0x1]; u8 reserved_at_15[0x2]; u8 table_miss_action_domain[0x1]; - u8 reserved_at_18[0x8]; + u8 termination_table[0x1]; + u8 reserved_at_19[0x7]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; u8 log_max_modify_header_context[0x8]; @@ -7239,7 +7240,8 @@ struct mlx5_ifc_create_flow_table_out_bits { struct mlx5_ifc_flow_table_context_bits { u8 reformat_en[0x1]; u8 decap_en[0x1]; - u8 reserved_at_2[0x2]; + u8 reserved_at_2[0x1]; + u8 termination_table[0x1]; u8 table_miss_action[0x4]; u8 level[0x8]; u8 reserved_at_10[0x8]; -- cgit v1.2.3 From cd56f929e6a547180f889a4def370bdd6d48d223 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Wed, 29 May 2019 22:50:34 +0000 Subject: net/mlx5: E-Switch, Replace host_params event with functions_changed event To support sriov on a E-Switch manager, num_vfs are queried to the firmware whenever E-Switch manager is notified by esw_functions_changed event. Replace host_params event with esw_functions_changed event that reflects more appropriate naming. While at it, also correct num_vfs type from int to u16 as expected by the function mlx5_esw_query_functions(). Signed-off-by: Vu Pham Reviewed-by: Parav Pandit Reviewed-by: Bodong Wang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/ecpf.c | 27 --------- drivers/net/ethernet/mellanox/mlx5/core/ecpf.h | 4 -- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 32 +++++++++- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 6 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 69 +++++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/events.c | 4 +- include/linux/mlx5/device.h | 2 +- include/linux/mlx5/mlx5_ifc.h | 6 +- 10 files changed, 86 insertions(+), 71 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 937ba4bcb056..7d3aec98e31f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -316,7 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: case MLX5_CMD_OP_PAGE_FAULT_RESUME: - case MLX5_CMD_OP_QUERY_HOST_PARAMS: + case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -628,7 +628,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); - MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS); + MLX5_COMMAND_STR_CASE(QUERY_ESW_FUNCTIONS); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 4746f2d28fb6..1bcf8b8f9713 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -83,30 +83,3 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) mlx5_peer_pf_cleanup(dev); } - -static int mlx5_query_host_params_context(struct mlx5_core_dev *dev, - u32 *out, int outlen) -{ - u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {}; - - MLX5_SET(query_host_params_in, in, opcode, - MLX5_CMD_OP_QUERY_HOST_PARAMS); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); -} - -int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) -{ - u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {}; - int err; - - err = mlx5_query_host_params_context(dev, out, sizeof(out)); - if (err) - return err; - - *num_vf = MLX5_GET(query_host_params_out, out, - host_params_context.host_num_of_vfs); - mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf); - - return 0; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h index 346372df218f..d3d7a00a02ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -16,7 +16,6 @@ enum { bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); int mlx5_ec_init(struct mlx5_core_dev *dev); void mlx5_ec_cleanup(struct mlx5_core_dev *dev); -int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf); #else /* CONFIG_MLX5_ESWITCH */ @@ -24,9 +23,6 @@ static inline bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} -static inline int -mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) -{ return -EOPNOTSUPP; } #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 23883d1fa22f..052bd70e4aa6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -534,7 +534,8 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); if (mlx5_core_is_ecpf_esw_manager(dev)) - async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE); + async_event_mask |= + (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); return async_event_mask; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9ea0ccfe5ef5..d8935232964a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1686,13 +1686,41 @@ static int eswitch_vport_event(struct notifier_block *nb, return NOTIFY_OK; } +static int query_esw_functions(struct mlx5_core_dev *dev, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {0}; + + MLX5_SET(query_esw_functions_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_FUNCTIONS); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {0}; + int err; + + err = query_esw_functions(dev, out, sizeof(out)); + if (err) + return err; + + *num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + esw_debug(dev, "host_num_of_vfs=%d\n", *num_vfs); + + return 0; +} + /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { - int vf_nvports = 0, total_nvports = 0; struct mlx5_vport *vport; + int total_nvports = 0; + u16 vf_nvports = 0; int err; int i, enabled_events; @@ -1712,7 +1740,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (mode == SRIOV_OFFLOADS) { if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports); + err = mlx5_esw_query_functions(esw->dev, &vf_nvports); if (err) return err; total_nvports = esw->total_vports; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ed3fad689ec9..320dd83dd301 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -190,7 +190,7 @@ struct mlx5_host_work { struct mlx5_eswitch *esw; }; -struct mlx5_host_info { +struct mlx5_esw_functions { struct mlx5_nb nb; u16 num_vfs; }; @@ -219,7 +219,7 @@ struct mlx5_eswitch { int mode; int nvports; u16 manager_vport; - struct mlx5_host_info host_info; + struct mlx5_esw_functions esw_funcs; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw); @@ -386,6 +386,8 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1); +int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(__dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e09ae27485ee..83689678b400 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -41,7 +41,6 @@ #include "en.h" #include "fs_core.h" #include "lib/devcom.h" -#include "ecpf.h" #include "lib/eq.h" /* There are two match-all miss flows, one for unicast dst mac and @@ -1782,57 +1781,79 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_prio_tag_acls_cleanup(esw); } -static void esw_host_params_event_handler(struct work_struct *work) +static void esw_functions_changed_event_handler(struct work_struct *work) { struct mlx5_host_work *host_work; struct mlx5_eswitch *esw; - int err, num_vf = 0; + u16 num_vfs = 0; + int err; host_work = container_of(work, struct mlx5_host_work, work); esw = host_work->esw; - err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf); - if (err || num_vf == esw->host_info.num_vfs) + err = mlx5_esw_query_functions(esw->dev, &num_vfs); + if (err || num_vfs == esw->esw_funcs.num_vfs) goto out; /* Number of VFs can only change from "0 to x" or "x to 0". */ - if (esw->host_info.num_vfs > 0) { - esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs); + if (esw->esw_funcs.num_vfs > 0) { + esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs); } else { - err = esw_offloads_load_vf_reps(esw, num_vf); + err = esw_offloads_load_vf_reps(esw, num_vfs); if (err) goto out; } - esw->host_info.num_vfs = num_vf; + esw->esw_funcs.num_vfs = num_vfs; out: kfree(host_work); } -static int esw_host_params_event(struct notifier_block *nb, - unsigned long type, void *data) +static int esw_functions_changed_event(struct notifier_block *nb, + unsigned long type, void *data) { + struct mlx5_esw_functions *esw_funcs; struct mlx5_host_work *host_work; - struct mlx5_host_info *host_info; struct mlx5_eswitch *esw; host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC); if (!host_work) return NOTIFY_DONE; - host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb); - esw = container_of(host_info, struct mlx5_eswitch, host_info); + esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb); + esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs); host_work->esw = esw; - INIT_WORK(&host_work->work, esw_host_params_event_handler); + INIT_WORK(&host_work->work, esw_functions_changed_event_handler); queue_work(esw->work_queue, &host_work->work); return NOTIFY_OK; } +static void esw_functions_changed_event_init(struct mlx5_eswitch *esw, + u16 vf_nvports) +{ + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return; + + MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event, + ESW_FUNCTIONS_CHANGED); + mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); + esw->esw_funcs.num_vfs = vf_nvports; +} + +static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw) +{ + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return; + + mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); + flush_workqueue(esw->work_queue); +} + int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, int total_nvports) { @@ -1848,12 +1869,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, esw_offloads_devcom_init(esw); - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event, - HOST_PARAMS_CHANGE); - mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb); - esw->host_info.num_vfs = vf_nvports; - } + esw_functions_changed_event_init(esw, vf_nvports); mlx5_rdma_enable_roce(esw->dev); @@ -1887,13 +1903,12 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw) { u16 num_vfs; - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb); - flush_workqueue(esw->work_queue); - num_vfs = esw->host_info.num_vfs; - } else { + esw_functions_changed_event_cleanup(esw); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + num_vfs = esw->esw_funcs.num_vfs; + else num_vfs = esw->dev->priv.sriov.num_vfs; - } mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index a81e8d2168d8..8bcf3426b9c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -108,8 +108,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_STALL_EVENT"; case MLX5_EVENT_TYPE_CMD: return "MLX5_EVENT_TYPE_CMD"; - case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: - return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; + case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED: + return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED"; case MLX5_EVENT_TYPE_PAGE_REQUEST: return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_PAGE_FAULT: diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index fc2b6e807f06..5e760067ac41 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -342,7 +342,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, - MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE = 0xe, + MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED = 0xe, MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index feaa909bf14f..0780242a757a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -155,7 +155,7 @@ enum { MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY = 0x725, MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY = 0x726, MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS = 0x727, - MLX5_CMD_OP_QUERY_HOST_PARAMS = 0x740, + MLX5_CMD_OP_QUERY_ESW_FUNCTIONS = 0x740, MLX5_CMD_OP_QUERY_VPORT_STATE = 0x750, MLX5_CMD_OP_MODIFY_VPORT_STATE = 0x751, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752, @@ -9721,7 +9721,7 @@ struct mlx5_ifc_host_params_context_bits { u8 reserved_at_80[0x180]; }; -struct mlx5_ifc_query_host_params_in_bits { +struct mlx5_ifc_query_esw_functions_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -9731,7 +9731,7 @@ struct mlx5_ifc_query_host_params_in_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_query_host_params_out_bits { +struct mlx5_ifc_query_esw_functions_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; -- cgit v1.2.3 From 6706a3b94f890145ca09797f748d2b30e1414fd3 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Wed, 29 May 2019 22:50:37 +0000 Subject: net/mlx5: E-Switch, Honor eswitch functions changed event cap Whenever device supports eswitch functions changed event, honor such device setting. Do not limit it to ECPF. Signed-off-by: Parav Pandit Signed-off-by: Vu Pham Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 13 +++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 +++--- include/linux/mlx5/mlx5_ifc.h | 4 +++- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 052bd70e4aa6..5e9319d3d90c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -533,7 +533,7 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); - if (mlx5_core_is_ecpf_esw_manager(dev)) + if (mlx5_eswitch_is_funcs_handler(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 320dd83dd301..b524813cccac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -406,6 +406,18 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) MLX5_VPORT_ECPF : MLX5_VPORT_PF; } +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) +{ + /* Ideally device should have the functions changed supported + * capability regardless of it being ECPF or PF wherever such + * event should be processed such as on eswitch manager device. + * However, some ECPF based device might not have this capability + * set. Hence OR for ECPF check to cover such device. + */ + return MLX5_CAP_ESW(dev, esw_functions_changed) || + mlx5_core_is_ecpf_esw_manager(dev); +} + static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) { /* Uplink always locate at the last element of the array.*/ @@ -500,6 +512,7 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } #define FDB_MAX_CHAIN 1 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 83689678b400..05cb2fffd887 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1836,7 +1836,7 @@ static int esw_functions_changed_event(struct notifier_block *nb, static void esw_functions_changed_event_init(struct mlx5_eswitch *esw, u16 vf_nvports) { - if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + if (!mlx5_eswitch_is_funcs_handler(esw->dev)) return; MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event, @@ -1847,7 +1847,7 @@ static void esw_functions_changed_event_init(struct mlx5_eswitch *esw, static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw) { - if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + if (!mlx5_eswitch_is_funcs_handler(esw->dev)) return; mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); @@ -1905,7 +1905,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw) esw_functions_changed_event_cleanup(esw); - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + if (mlx5_eswitch_is_funcs_handler(esw->dev)) num_vfs = esw->esw_funcs.num_vfs; else num_vfs = esw->dev->priv.sriov.num_vfs; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 0780242a757a..6513b985c5e9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -665,7 +665,9 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x16]; + u8 reserved_at_5[0x14]; + u8 esw_functions_changed[0x1]; + u8 reserved_at_1a[0x1]; u8 ecpf_vport_exists[0x1]; u8 counter_eswitch_affinity[0x1]; u8 merged_eswitch[0x1]; -- cgit v1.2.3 From c94ff7487754dd23159a8dc47466c0cc82121ebd Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 29 May 2019 22:50:39 +0000 Subject: {IB, net}/mlx5: No need to typecast from void* to mlx5_ib_dev* Avoid typecasting from void* to mlx5_ib_dev* or mlx5e_rep_priv* as it is not needed. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 1d9778da8a50..c995102b0276 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -72,6 +72,6 @@ struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, static inline struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) { - return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv; + return rep->rep_if[REP_IB].priv; } #endif /* __MLX5_IB_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 83b573b1abac..c40c025afd99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -91,7 +91,7 @@ struct mlx5e_rep_priv { static inline struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) { - return (struct mlx5e_rep_priv *)rep->rep_if[REP_ETH].priv; + return rep->rep_if[REP_ETH].priv; } struct mlx5e_neigh { -- cgit v1.2.3 From 8693115af4c24d92b971ad895c5f329761ed5d38 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 29 May 2019 22:50:41 +0000 Subject: {IB,net}/mlx5: Constify rep ops functions pointers Currently for every representor type and for every single vport, representer function pointers copy is stored even though they don't change from one to other vport. Additionally priv data entry for the rep is not passed during registration, but its copied. It is used (set and cleared) by the user of the reps. As we want to scale vports, to simplify and also to split constants from data, 1. Rename mlx5_eswitch_rep_if to mlx5_eswitch_rep_ops as to match _ops prefix with other standard netdev, ibdev ops. 2. Constify the IB and Ethernet rep ops structure. 3. Instead of storing copy of all rep function pointers, store copy per eswitch rep type. 4. Split data and function pointers to mlx5_eswitch_rep_ops and mlx5_eswitch_rep_data. Signed-off-by: Parav Pandit Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 19 ++++++----- drivers/infiniband/hw/mlx5/ib_rep.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 15 +++++---- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 38 ++++++++++------------ include/linux/mlx5/eswitch.h | 20 +++++++----- 7 files changed, 49 insertions(+), 48 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index cbcc40d776b9..22e651cb5534 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -60,7 +60,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) if (!__mlx5_ib_add(ibdev, profile)) return -EINVAL; - rep->rep_if[REP_IB].priv = ibdev; + rep->rep_data[REP_IB].priv = ibdev; return 0; } @@ -70,13 +70,13 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) { struct mlx5_ib_dev *dev; - if (!rep->rep_if[REP_IB].priv || + if (!rep->rep_data[REP_IB].priv || rep->vport != MLX5_VPORT_UPLINK) return; dev = mlx5_ib_rep_to_dev(rep); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); - rep->rep_if[REP_IB].priv = NULL; + rep->rep_data[REP_IB].priv = NULL; } static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) @@ -84,16 +84,17 @@ static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) return mlx5_ib_rep_to_dev(rep); } +static const struct mlx5_eswitch_rep_ops rep_ops = { + .load = mlx5_ib_vport_rep_load, + .unload = mlx5_ib_vport_rep_unload, + .get_proto_dev = mlx5_ib_vport_get_proto_dev, +}; + void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if = {}; - - rep_if.load = mlx5_ib_vport_rep_load; - rep_if.unload = mlx5_ib_vport_rep_unload; - rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; - mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_IB); + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); } void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index c995102b0276..22adce2d6795 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -72,6 +72,6 @@ struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, static inline struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) { - return rep->rep_if[REP_IB].priv; + return rep->rep_data[REP_IB].priv; } #endif /* __MLX5_IB_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 91e24f1cead8..33f8f99681a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1752,7 +1752,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) } rpriv->netdev = netdev; - rep->rep_if[REP_ETH].priv = rpriv; + rep->rep_data[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); if (rep->vport == MLX5_VPORT_UPLINK) { @@ -1826,16 +1826,17 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) return rpriv->netdev; } +static const struct mlx5_eswitch_rep_ops rep_ops = { + .load = mlx5e_vport_rep_load, + .unload = mlx5e_vport_rep_unload, + .get_proto_dev = mlx5e_vport_rep_get_proto_dev +}; + void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if = {}; - - rep_if.load = mlx5e_vport_rep_load; - rep_if.unload = mlx5e_vport_rep_unload; - rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH); + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH); } void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index c40c025afd99..e34573fd88c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -91,7 +91,7 @@ struct mlx5e_rep_priv { static inline struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) { - return rep->rep_if[REP_ETH].priv; + return rep->rep_data[REP_ETH].priv; } struct mlx5e_neigh { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b524813cccac..135d9a29bbdf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -173,6 +173,7 @@ struct mlx5_esw_offload { struct mutex peer_mutex; DECLARE_HASHTABLE(encap_tbl, 8); DECLARE_HASHTABLE(mod_hdr_tbl, 8); + const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; u64 num_flows; u8 encap; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 05cb2fffd887..d6246ee042fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -332,7 +332,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { rep = &esw->offloads.vport_reps[vf_vport]; - if (atomic_read(&rep->rep_if[REP_ETH].state) != REP_LOADED) + if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) continue; err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); @@ -1276,7 +1276,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) ether_addr_copy(rep->hw_id, hw_id); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) - atomic_set(&rep->rep_if[rep_type].state, + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); } @@ -1286,9 +1286,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { - if (atomic_cmpxchg(&rep->rep_if[rep_type].state, + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, REP_LOADED, REP_REGISTERED) == REP_LOADED) - rep->rep_if[rep_type].unload(rep); + esw->offloads.rep_ops[rep_type]->unload(rep); } static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) @@ -1349,11 +1349,11 @@ static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, { int err = 0; - if (atomic_cmpxchg(&rep->rep_if[rep_type].state, + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { - err = rep->rep_if[rep_type].load(esw->dev, rep); + err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); if (err) - atomic_set(&rep->rep_if[rep_type].state, + atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED); } @@ -2216,21 +2216,17 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) } void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep_if *__rep_if, + const struct mlx5_eswitch_rep_ops *ops, u8 rep_type) { - struct mlx5_eswitch_rep_if *rep_if; + struct mlx5_eswitch_rep_data *rep_data; struct mlx5_eswitch_rep *rep; int i; + esw->offloads.rep_ops[rep_type] = ops; mlx5_esw_for_all_reps(esw, i, rep) { - rep_if = &rep->rep_if[rep_type]; - rep_if->load = __rep_if->load; - rep_if->unload = __rep_if->unload; - rep_if->get_proto_dev = __rep_if->get_proto_dev; - rep_if->priv = __rep_if->priv; - - atomic_set(&rep_if->state, REP_REGISTERED); + rep_data = &rep->rep_data[rep_type]; + atomic_set(&rep_data->state, REP_REGISTERED); } } EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); @@ -2245,7 +2241,7 @@ void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) __unload_reps_all_vport(esw, max_vf, rep_type); mlx5_esw_for_all_reps(esw, i, rep) - atomic_set(&rep->rep_if[rep_type].state, REP_UNREGISTERED); + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); } EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); @@ -2254,7 +2250,7 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) struct mlx5_eswitch_rep *rep; rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - return rep->rep_if[rep_type].priv; + return rep->rep_data[rep_type].priv; } void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, @@ -2265,9 +2261,9 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, rep = mlx5_eswitch_get_rep(esw, vport); - if (atomic_read(&rep->rep_if[rep_type].state) == REP_LOADED && - rep->rep_if[rep_type].get_proto_dev) - return rep->rep_if[rep_type].get_proto_dev(rep); + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + esw->offloads.rep_ops[rep_type]->get_proto_dev) + return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep); return NULL; } EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 0ca77dd1429c..d81ee4df181c 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -29,17 +29,19 @@ enum { }; struct mlx5_eswitch_rep; -struct mlx5_eswitch_rep_if { - int (*load)(struct mlx5_core_dev *dev, - struct mlx5_eswitch_rep *rep); - void (*unload)(struct mlx5_eswitch_rep *rep); - void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); - void *priv; - atomic_t state; +struct mlx5_eswitch_rep_ops { + int (*load)(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep); + void (*unload)(struct mlx5_eswitch_rep *rep); + void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); +}; + +struct mlx5_eswitch_rep_data { + void *priv; + atomic_t state; }; struct mlx5_eswitch_rep { - struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES]; + struct mlx5_eswitch_rep_data rep_data[NUM_REP_TYPES]; u16 vport; u8 hw_id[ETH_ALEN]; u16 vlan; @@ -47,7 +49,7 @@ struct mlx5_eswitch_rep { }; void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep_if *rep_if, + const struct mlx5_eswitch_rep_ops *ops, u8 rep_type); void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type); void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, -- cgit v1.2.3 From b8a92577f4be89427f0b150261626ebcc25c498d Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Mon, 10 Jun 2019 23:38:14 +0000 Subject: net/mlx5: Increase wait time for fw initialization Firmware FLR happens sequentially, in some cases, like when destroying a VM that had many VFs, may require waiting much longer than 10 seconds. Increase the timeout to 2 minutes, and print a wait countdown status every 20 seconds. Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 61fa1d162d28..8e96c42d3b84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -169,18 +169,28 @@ static struct mlx5_profile profile[] = { #define FW_INIT_TIMEOUT_MILI 2000 #define FW_INIT_WAIT_MS 2 -#define FW_PRE_INIT_TIMEOUT_MILI 10000 +#define FW_PRE_INIT_TIMEOUT_MILI 120000 +#define FW_INIT_WARN_MESSAGE_INTERVAL 20000 -static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili) +static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, + u32 warn_time_mili) { + unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili); unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); int err = 0; + BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL); + while (fw_initializing(dev)) { if (time_after(jiffies, end)) { err = -EBUSY; break; } + if (warn_time_mili && time_after(jiffies, warn)) { + mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n", + jiffies_to_msecs(end - warn) / 1000); + warn = jiffies + msecs_to_jiffies(warn_time_mili); + } msleep(FW_INIT_WAIT_MS); } @@ -911,7 +921,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) /* wait for firmware to accept initialization segments configurations */ - err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI); + err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL); if (err) { mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n", FW_PRE_INIT_TIMEOUT_MILI); @@ -924,7 +934,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) return err; } - err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI); + err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0); if (err) { mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n", FW_INIT_TIMEOUT_MILI); -- cgit v1.2.3 From ac35dcd6e4bdfadc90d6566ccf9fdc3b8f5e1b23 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Mon, 10 Jun 2019 23:38:16 +0000 Subject: net/mlx5: E-Switch, Handle representors creation in handler context Unified representors creation in esw_functions_changed context handler. Emulate the esw_function_changed event for FW/HW that does not support this event. Signed-off-by: Vu Pham Reviewed-by: Parav Pandit Reviewed-by: Bodong Wang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 12 +-- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 89 ++++++++++++---------- 2 files changed, 50 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d8935232964a..504c0440b0b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1720,7 +1720,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { struct mlx5_vport *vport; int total_nvports = 0; - u16 vf_nvports = 0; int err; int i, enabled_events; @@ -1739,15 +1738,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); if (mode == SRIOV_OFFLOADS) { - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - err = mlx5_esw_query_functions(esw->dev, &vf_nvports); - if (err) - return err; + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) total_nvports = esw->total_vports; - } else { - vf_nvports = nvfs; + else total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev); - } } esw->mode = mode; @@ -1761,7 +1755,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, vf_nvports, total_nvports); + err = esw_offloads_init(esw, nvfs, total_nvports); } if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d6246ee042fa..f843d8a35a2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1436,34 +1436,13 @@ err_reps: return err; } -static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) -{ - int err; - - /* Special vports must be loaded first. */ - err = __load_reps_special_vport(esw, rep_type); - if (err) - return err; - - err = __load_reps_vf_vport(esw, nvports, rep_type); - if (err) - goto err_vfs; - - return 0; - -err_vfs: - __unload_reps_special_vport(esw, rep_type); - return err; -} - -static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_load_special_vport(struct mlx5_eswitch *esw) { u8 rep_type = 0; int err; for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = __load_reps_all_vport(esw, nvports, rep_type); + err = __load_reps_special_vport(esw, rep_type); if (err) goto err_reps; } @@ -1472,7 +1451,7 @@ static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) err_reps: while (rep_type-- > 0) - __unload_reps_all_vport(esw, nvports, rep_type); + __unload_reps_special_vport(esw, rep_type); return err; } @@ -1811,6 +1790,21 @@ out: kfree(host_work); } +static void esw_emulate_event_handler(struct work_struct *work) +{ + struct mlx5_host_work *host_work = + container_of(work, struct mlx5_host_work, work); + struct mlx5_eswitch *esw = host_work->esw; + int err; + + if (esw->esw_funcs.num_vfs) { + err = esw_offloads_load_vf_reps(esw, esw->esw_funcs.num_vfs); + if (err) + esw_warn(esw->dev, "Load vf reps err=%d\n", err); + } + kfree(host_work); +} + static int esw_functions_changed_event(struct notifier_block *nb, unsigned long type, void *data) { @@ -1827,7 +1821,11 @@ static int esw_functions_changed_event(struct notifier_block *nb, host_work->esw = esw; - INIT_WORK(&host_work->work, esw_functions_changed_event_handler); + if (mlx5_eswitch_is_funcs_handler(esw->dev)) + INIT_WORK(&host_work->work, + esw_functions_changed_event_handler); + else + INIT_WORK(&host_work->work, esw_emulate_event_handler); queue_work(esw->work_queue, &host_work->work); return NOTIFY_OK; @@ -1836,13 +1834,14 @@ static int esw_functions_changed_event(struct notifier_block *nb, static void esw_functions_changed_event_init(struct mlx5_eswitch *esw, u16 vf_nvports) { - if (!mlx5_eswitch_is_funcs_handler(esw->dev)) - return; - - MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event, - ESW_FUNCTIONS_CHANGED); - mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); - esw->esw_funcs.num_vfs = vf_nvports; + if (mlx5_eswitch_is_funcs_handler(esw->dev)) { + esw->esw_funcs.num_vfs = 0; + MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event, + ESW_FUNCTIONS_CHANGED); + mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); + } else { + esw->esw_funcs.num_vfs = vf_nvports; + } } static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw) @@ -1863,7 +1862,11 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, if (err) return err; - err = esw_offloads_load_all_reps(esw, vf_nvports); + /* Only load special vports reps. VF reps will be loaded in + * context of functions_changed event handler through real + * or emulated event. + */ + err = esw_offloads_load_special_vport(esw); if (err) goto err_reps; @@ -1873,6 +1876,16 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, mlx5_rdma_enable_roce(esw->dev); + /* Call esw_functions_changed event to load VF reps: + * 1. HW does not support the event then emulate it + * Or + * 2. The event was already notified when num_vfs changed + * and eswitch was in legacy mode + */ + esw_functions_changed_event(&esw->esw_funcs.nb.nb, + MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED, + NULL); + return 0; err_reps: @@ -1901,18 +1914,10 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_cleanup(struct mlx5_eswitch *esw) { - u16 num_vfs; - esw_functions_changed_event_cleanup(esw); - - if (mlx5_eswitch_is_funcs_handler(esw->dev)) - num_vfs = esw->esw_funcs.num_vfs; - else - num_vfs = esw->dev->priv.sriov.num_vfs; - mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_all_reps(esw, num_vfs); + esw_offloads_unload_all_reps(esw, esw->esw_funcs.num_vfs); esw_offloads_steering_cleanup(esw); } -- cgit v1.2.3 From 10ee82cedb62dd716c44ba7a2c458688638873ab Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 10 Jun 2019 23:38:18 +0000 Subject: net/mlx5: E-Switch, Return raw output for query esw functions Current function only returns host num of VFs, later patch requires other params such as host maximum num of VFs. Return the raw output so that caller can extract info as needed. Signed-off-by: Bodong Wang Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 21 ++------------------- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 7 ++++++- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 5 ++++- 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 504c0440b0b0..a4df109fbeb7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1686,10 +1686,9 @@ static int eswitch_vport_event(struct notifier_block *nb, return NOTIFY_OK; } -static int query_esw_functions(struct mlx5_core_dev *dev, - u32 *out, int outlen) +int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {}; MLX5_SET(query_esw_functions_in, in, opcode, MLX5_CMD_OP_QUERY_ESW_FUNCTIONS); @@ -1697,22 +1696,6 @@ static int query_esw_functions(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } -int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs) -{ - u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {0}; - int err; - - err = query_esw_functions(dev, out, sizeof(out)); - if (err) - return err; - - *num_vfs = MLX5_GET(query_esw_functions_out, out, - host_params_context.host_num_of_vfs); - esw_debug(dev, "host_num_of_vfs=%d\n", *num_vfs); - - return 0; -} - /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 135d9a29bbdf..e03811be771d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -387,7 +387,7 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1); -int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs); +int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) @@ -514,6 +514,11 @@ static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } +static inline int +mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen) +{ + return -EOPNOTSUPP; +} #define FDB_MAX_CHAIN 1 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f843d8a35a2c..1638e4cdeb16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1762,6 +1762,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) static void esw_functions_changed_event_handler(struct work_struct *work) { + u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {}; struct mlx5_host_work *host_work; struct mlx5_eswitch *esw; u16 num_vfs = 0; @@ -1770,7 +1771,9 @@ static void esw_functions_changed_event_handler(struct work_struct *work) host_work = container_of(work, struct mlx5_host_work, work); esw = host_work->esw; - err = mlx5_esw_query_functions(esw->dev, &num_vfs); + err = mlx5_esw_query_functions(esw->dev, out, sizeof(out)); + num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); if (err || num_vfs == esw->esw_funcs.num_vfs) goto out; -- cgit v1.2.3 From 86eec50beaf3a45f6432d491072fa5c54284dbca Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 10 Jun 2019 23:38:19 +0000 Subject: net/mlx5: Support querying max VFs from device For ECPF with eswitch manager privilege, query the host max VF count by querying the device using query_functions command. With this enhancement: 1. flow steering entries are created only for valid vports based on the max VF count of the PF. 2. Driver only queries cap of valid vport. Eswitch requires the max VFs when doing initialization, so do sr-iov init before eswitch init. Signed-off-by: Bodong Wang Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 18 +++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 22 ++++++++++++++++++++++ include/linux/mlx5/driver.h | 7 ++----- include/linux/mlx5/mlx5_ifc.h | 2 +- 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8e96c42d3b84..720f65bfe6a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -844,32 +844,32 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_rl_cleanup; } - err = mlx5_eswitch_init(dev); + err = mlx5_sriov_init(dev); if (err) { - mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + mlx5_core_err(dev, "Failed to init sriov %d\n", err); goto err_mpfs_cleanup; } - err = mlx5_sriov_init(dev); + err = mlx5_eswitch_init(dev); if (err) { - mlx5_core_err(dev, "Failed to init sriov %d\n", err); - goto err_eswitch_cleanup; + mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + goto err_sriov_cleanup; } err = mlx5_fpga_init(dev); if (err) { mlx5_core_err(dev, "Failed to init fpga device %d\n", err); - goto err_sriov_cleanup; + goto err_eswitch_cleanup; } dev->tracer = mlx5_fw_tracer_create(dev); return 0; -err_sriov_cleanup: - mlx5_sriov_cleanup(dev); err_eswitch_cleanup: mlx5_eswitch_cleanup(dev->priv.eswitch); +err_sriov_cleanup: + mlx5_sriov_cleanup(dev); err_mpfs_cleanup: mlx5_mpfs_cleanup(dev); err_rl_cleanup: @@ -893,8 +893,8 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { mlx5_fw_tracer_destroy(dev->tracer); mlx5_fpga_cleanup(dev); - mlx5_sriov_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_sriov_cleanup(dev); mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); mlx5_vxlan_destroy(dev->vxlan); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index a249b3c3843d..2eecb831c499 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -208,6 +208,27 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev) mlx5_device_disable_sriov(dev); } +static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {}; + u16 host_total_vfs; + int err; + + if (mlx5_core_is_ecpf_esw_manager(dev)) { + err = mlx5_esw_query_functions(dev, out, sizeof(out)); + host_total_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_total_vfs); + + /* Old FW doesn't support getting total_vfs from esw func + * but supports getting it from pci_sriov. + */ + if (!err && host_total_vfs) + return host_total_vfs; + } + + return pci_sriov_get_totalvfs(dev->pdev); +} + int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -218,6 +239,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) return 0; total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->max_vfs = mlx5_get_max_vfs(dev); sriov->num_vfs = pci_num_vf(pdev); sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b5431f7d97cb..64155fe201ee 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -470,6 +470,7 @@ struct mlx5_core_sriov { struct mlx5_vf_context *vfs_ctx; int num_vfs; int enabled_vfs; + u16 max_vfs; }; struct mlx5_fc_stats { @@ -1103,13 +1104,9 @@ static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev) return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists); } -#define MLX5_HOST_PF_MAX_VFS (127u) static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev) { - if (mlx5_core_is_ecpf_esw_manager(dev)) - return MLX5_HOST_PF_MAX_VFS; - else - return pci_sriov_get_totalvfs(dev->pdev); + return dev->priv.sriov.max_vfs; } static inline int mlx5_get_gid_table_len(u16 param) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6513b985c5e9..e3c154b573a2 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9711,7 +9711,7 @@ struct mlx5_ifc_host_params_context_bits { u8 reserved_at_8[0x8]; u8 host_num_of_vfs[0x10]; - u8 reserved_at_20[0x10]; + u8 host_total_vfs[0x10]; u8 host_pci_bus[0x10]; u8 reserved_at_40[0x10]; -- cgit v1.2.3 From 081cc2d7fa5828c8214c41b97352245ca5cc0f58 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:21 +0000 Subject: net/mlx5: Introduce EQ polling budget Multiple EQs may share the same irq in subsequent patches. To avoid starvation, a budget is set per EQ's interrupt handler. Because of this change, it is no longer required to check that MLX5_NUM_SPARE_EQE eqes were polled (to detect that arm is required). It is guaranteed that MLX5_NUM_SPARE_EQE > budget, therefore the handler will arm and exit the handler before all the entries in the eq are polled. In the scenario where the handler is out of budget and there are more EQEs to poll, arming the EQ guarantees that the HW will send another interrupt and the handler will be called again. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 55 ++++++++++++++-------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 5e9319d3d90c..28defeaca80a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -61,6 +61,16 @@ enum { MLX5_EQ_DOORBEL_OFFSET = 0x40, }; +/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update + * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is + * used to set the EQ size, budget must be smaller than the EQ size. + */ +enum { + MLX5_EQ_POLLING_BUDGET = 128, +}; + +static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); + struct mlx5_irq_info { cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; @@ -129,11 +139,16 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) struct mlx5_eq_comp *eq_comp = eq_ptr; struct mlx5_eq *eq = eq_ptr; struct mlx5_eqe *eqe; - int set_ci = 0; + int num_eqes = 0; u32 cqn = -1; - while ((eqe = next_eqe_sw(eq))) { + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { struct mlx5_core_cq *cq; + /* Make sure we read EQ entry contents after we've * checked the ownership bit. */ @@ -151,20 +166,10 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) } ++eq->cons_index; - ++set_ci; - /* The HCA will think the queue has overflowed if we - * don't tell it we've been processing events. We - * create our EQs with MLX5_NUM_SPARE_EQE extra - * entries, so we must update our consumer index at - * least that often. - */ - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); - set_ci = 0; - } - } + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); +out: eq_update_ci(eq, 1); if (cqn != -1) @@ -197,12 +202,16 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) struct mlx5_eq_table *eqt; struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; - int set_ci = 0; + int num_eqes = 0; dev = eq->dev; eqt = dev->priv.eq_table; - while ((eqe = next_eqe_sw(eq))) { + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { /* * Make sure we read EQ entry contents after we've * checked the ownership bit. @@ -217,20 +226,10 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); ++eq->cons_index; - ++set_ci; - /* The HCA will think the queue has overflowed if we - * don't tell it we've been processing events. We - * create our EQs with MLX5_NUM_SPARE_EQE extra - * entries, so we must update our consumer index at - * least that often. - */ - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); - set_ci = 0; - } - } + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); +out: eq_update_ci(eq, 1); return IRQ_HANDLED; -- cgit v1.2.3 From ca390799c2aa03632c294107fa7f647bcbdff428 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:23 +0000 Subject: net/mlx5: Change interrupt handler to call chain notifier Multiple EQs may share the same IRQ in subsequent patches. Instead of calling the IRQ handler directly, the EQ will register to an atomic chain notfier. The Linux built-in shared IRQ is not used because it forces the caller to disable the IRQ and clear affinity before free_irq() can be called. This patch is the first step in the separation of IRQ and EQ logic. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/odp.c | 10 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 138 +++++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 9 +- include/linux/mlx5/eq.h | 3 +- 5 files changed, 105 insertions(+), 56 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 40eb8be482e4..a043af7ee366 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -920,6 +920,7 @@ struct mlx5_ib_lb_state { }; struct mlx5_ib_pf_eq { + struct notifier_block irq_nb; struct mlx5_ib_dev *dev; struct mlx5_eq *core; struct work_struct work; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 91507a2e9290..ac40a4fd5598 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1488,9 +1488,11 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq) mlx5_eq_update_ci(eq->core, cc, 1); } -static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr) +static int mlx5_ib_eq_pf_int(struct notifier_block *nb, unsigned long type, + void *data) { - struct mlx5_ib_pf_eq *eq = eq_ptr; + struct mlx5_ib_pf_eq *eq = + container_of(nb, struct mlx5_ib_pf_eq, irq_nb); unsigned long flags; if (spin_trylock_irqsave(&eq->lock, flags)) { @@ -1553,12 +1555,12 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) goto err_mempool; } + eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int; param = (struct mlx5_eq_param) { .index = MLX5_EQ_PFAULT_IDX, .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, .nent = MLX5_IB_NUM_PF_EQE, - .context = eq, - .handler = mlx5_ib_eq_pf_int + .nb = &eq->irq_nb, }; eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", ¶m); if (IS_ERR(eq->core)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 28defeaca80a..590c0fefaa25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -72,16 +72,16 @@ enum { static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); struct mlx5_irq_info { + struct atomic_notifier_head nh; cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; - void *context; /* dev_id provided to request_irq */ }; struct mlx5_eq_table { struct list_head comp_eqs_list; - struct mlx5_eq pages_eq; - struct mlx5_eq cmd_eq; - struct mlx5_eq async_eq; + struct mlx5_eq_async pages_eq; + struct mlx5_eq_async cmd_eq; + struct mlx5_eq_async async_eq; struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; @@ -109,6 +109,31 @@ struct mlx5_eq_table { (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) +static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + + return &eq_table->irq_info[vecidx]; +} + +static int mlx5_irq_attach_nb(struct mlx5_irq_info *irq, + struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&irq->nh, nb); +} + +static int mlx5_irq_detach_nb(struct mlx5_irq_info *irq, + struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&irq->nh, nb); +} + +static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) +{ + atomic_notifier_call_chain(nh, 0, NULL); + return IRQ_HANDLED; +} + static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; @@ -134,10 +159,13 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) return cq; } -static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) +static int mlx5_eq_comp_int(struct notifier_block *nb, + __always_unused unsigned long action, + __always_unused void *data) { - struct mlx5_eq_comp *eq_comp = eq_ptr; - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_comp *eq_comp = + container_of(nb, struct mlx5_eq_comp, irq_nb); + struct mlx5_eq *eq = &eq_comp->core; struct mlx5_eqe *eqe; int num_eqes = 0; u32 cqn = -1; @@ -175,7 +203,7 @@ out: if (cqn != -1) tasklet_schedule(&eq_comp->tasklet_ctx.task); - return IRQ_HANDLED; + return 0; } /* Some architectures don't latch interrupts when they are disabled, so using @@ -189,16 +217,19 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) disable_irq(eq->core.irqn); count_eqe = eq->core.cons_index; - mlx5_eq_comp_int(eq->core.irqn, eq); + mlx5_eq_comp_int(&eq->irq_nb, 0, NULL); count_eqe = eq->core.cons_index - count_eqe; enable_irq(eq->core.irqn); return count_eqe; } -static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) +static int mlx5_eq_async_int(struct notifier_block *nb, + unsigned long action, void *data) { - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_async *eq_async = + container_of(nb, struct mlx5_eq_async, irq_nb); + struct mlx5_eq *eq = &eq_async->core; struct mlx5_eq_table *eqt; struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; @@ -232,7 +263,7 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) out: eq_update_ci(eq, 1); - return IRQ_HANDLED; + return 0; } static void init_eq_buf(struct mlx5_eq *eq) @@ -254,6 +285,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; + struct mlx5_irq_info *irq_info; u8 vecidx = param->index; __be64 *pas; void *eqc; @@ -261,9 +293,6 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, u32 *in; int err; - if (eq_table->irq_info[vecidx].context) - return -EEXIST; - /* Init CQ table */ memset(cq_table, 0, sizeof(*cq_table)); spin_lock_init(&cq_table->lock); @@ -306,24 +335,31 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, if (err) goto err_in; - snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", - name, pci_name(dev->pdev)); - eq_table->irq_info[vecidx].context = param->context; + irq_info = mlx5_irq_get(dev, vecidx); + ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); + snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, + "%s@pci:%s", name, pci_name(dev->pdev)); eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, param->handler, 0, - eq_table->irq_info[vecidx].name, param->context); + eq->irq_nb = param->nb; + + err = request_irq(eq->irqn, mlx5_irq_int_handler, 0, irq_info->name, + &irq_info->nh); if (err) goto err_eq; - err = mlx5_debug_eq_add(dev, eq); + err = mlx5_irq_attach_nb(irq_info, param->nb); if (err) goto err_irq; + err = mlx5_debug_eq_add(dev, eq); + if (err) + goto err_detach; + /* EQs are created in ARMED state */ eq_update_ci(eq, 1); @@ -331,8 +367,11 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, kvfree(in); return 0; +err_detach: + mlx5_irq_detach_nb(irq_info, param->nb); + err_irq: - free_irq(eq->irqn, eq); + free_irq(eq->irqn, &eq_table->irq_info[vecidx].nh); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -355,9 +394,11 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) mlx5_debug_eq_remove(dev, eq); - free_irq(eq->irqn, irq_info->context); - irq_info->context = NULL; - + err = mlx5_irq_detach_nb(irq_info, eq->irq_nb); + if (err) + mlx5_core_warn(eq->dev, "eq failed to detach from irq. err %d", + err); + free_irq(eq->irqn, &eq_table->irq_info[eq->vecidx].nh); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", @@ -479,7 +520,7 @@ static int cq_err_event_notifier(struct notifier_block *nb, /* type == MLX5_EVENT_TYPE_CQ_ERROR */ eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); - eq = &eqt->async_eq; + eq = &eqt->async_eq.core; eqe = data; cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; @@ -548,14 +589,14 @@ static int create_async_eqs(struct mlx5_core_dev *dev) MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); mlx5_eq_notifier_register(dev, &table->cq_err_nb); + table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .index = MLX5_EQ_CMD_IDX, .mask = 1ull << MLX5_EVENT_TYPE_CMD, .nent = MLX5_NUM_CMD_EQE, - .context = &table->cmd_eq, - .handler = mlx5_eq_async_int, + .nb = &table->cmd_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); + err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); goto err0; @@ -563,27 +604,29 @@ static int create_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_use_events(dev); + table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .index = MLX5_EQ_ASYNC_IDX, .mask = gather_async_events_mask(dev), .nent = MLX5_NUM_ASYNC_EQE, - .context = &table->async_eq, - .handler = mlx5_eq_async_int, + .nb = &table->async_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, ¶m); + err = create_async_eq(dev, "mlx5_async_eq", + &table->async_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; } + table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { .index = MLX5_EQ_PAGEREQ_IDX, .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, .nent = /* TODO: sriov max_vf + */ 1, - .context = &table->pages_eq, - .handler = mlx5_eq_async_int, + .nb = &table->pages_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, ¶m); + err = create_async_eq(dev, "mlx5_pages_eq", + &table->pages_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; @@ -592,11 +635,11 @@ static int create_async_eqs(struct mlx5_core_dev *dev) return err; err2: - destroy_async_eq(dev, &table->async_eq); + destroy_async_eq(dev, &table->async_eq.core); err1: mlx5_cmd_use_polling(dev); - destroy_async_eq(dev, &table->cmd_eq); + destroy_async_eq(dev, &table->cmd_eq.core); err0: mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; @@ -607,19 +650,19 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; int err; - err = destroy_async_eq(dev, &table->pages_eq); + err = destroy_async_eq(dev, &table->pages_eq.core); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = destroy_async_eq(dev, &table->async_eq); + err = destroy_async_eq(dev, &table->async_eq.core); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); mlx5_cmd_use_polling(dev); - err = destroy_async_eq(dev, &table->cmd_eq); + err = destroy_async_eq(dev, &table->cmd_eq.core); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); @@ -629,17 +672,17 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) { - return &dev->priv.eq_table->async_eq; + return &dev->priv.eq_table->async_eq.core; } void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->async_eq.irqn); + synchronize_irq(dev->priv.eq_table->async_eq.core.irqn); } void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); + synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn); } /* Generic EQ API for mlx5_core consumers @@ -837,12 +880,12 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); #endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); + eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { .index = vecidx, .mask = 0, .nent = nent, - .context = &eq->core, - .handler = mlx5_eq_comp_int + .nb = &eq->irq_nb, }; err = create_map_eq(dev, &eq->core, name, ¶m); if (err) { @@ -940,10 +983,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; for (i = max_eqs - 1; i >= 0; i--) { - if (!table->irq_info[i].context) - continue; - free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context); - table->irq_info[i].context = NULL; + free_irq(pci_irq_vector(dev->pdev, i), &table->irq_info[i].nh); } mutex_unlock(&table->lock); pci_free_irq_vectors(dev->pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index c0fb6d72b695..adbc228bd55d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -34,10 +34,17 @@ struct mlx5_eq { u8 eqn; int nent; struct mlx5_rsc_debug *dbg; + struct notifier_block *irq_nb; /* For destroy only */ +}; + +struct mlx5_eq_async { + struct mlx5_eq core; + struct notifier_block irq_nb; }; struct mlx5_eq_comp { - struct mlx5_eq core; /* Must be first */ + struct mlx5_eq core; + struct notifier_block irq_nb; struct mlx5_eq_tasklet tasklet_ctx; struct list_head list; }; diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 00045cc4ea11..7909f1ff197c 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -26,8 +26,7 @@ struct mlx5_eq_param { u8 index; int nent; u64 mask; - void *context; - irq_handler_t handler; + struct notifier_block *nb; }; struct mlx5_eq * -- cgit v1.2.3 From 24163189da487b4caa751eef4e945c9333aae441 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:25 +0000 Subject: net/mlx5: Separate IRQ request/free from EQ life cycle Instead of requesting IRQ with eq creation, IRQs will be requested before EQ table creation. Instead of freeing the IRQs after EQ destroy, free IRQs after eq table destroy. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/odp.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 121 ++++++++++++++++++--------- include/linux/mlx5/eq.h | 3 +- 3 files changed, 84 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index ac40a4fd5598..7ce7c5bfe685 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1562,7 +1562,7 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) .nent = MLX5_IB_NUM_PF_EQE, .nb = &eq->irq_nb, }; - eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", ¶m); + eq->core = mlx5_eq_create_generic(dev->mdev, ¶m); if (IS_ERR(eq->core)) { err = PTR_ERR(eq->core); goto err_wq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 590c0fefaa25..f187169cbe76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -134,6 +134,64 @@ static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) return IRQ_HANDLED; } +static void irq_set_name(char *name, int vecidx) +{ + switch (vecidx) { + case MLX5_EQ_CMD_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_cmd_eq"); + break; + case MLX5_EQ_ASYNC_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async_eq"); + break; + case MLX5_EQ_PAGEREQ_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_pages_eq"); + break; + case MLX5_EQ_PFAULT_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_ib_page_fault_eq"); + break; + default: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", + vecidx - MLX5_EQ_VEC_COMP_BASE); + break; + } +} + +static int request_irqs(struct mlx5_core_dev *dev, int nvec) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *eq_table; + char name[MLX5_MAX_IRQ_NAME]; + int err; + int i; + + eq_table = priv->eq_table; + for (i = 0; i < nvec; i++) { + struct mlx5_irq_info *irq_info = &eq_table->irq_info[i]; + int irqn = pci_irq_vector(dev->pdev, i); + + irq_set_name(name, i); + ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); + snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, + "%s@pci:%s", name, pci_name(dev->pdev)); + err = request_irq(irqn, mlx5_irq_int_handler, 0, irq_info->name, + &irq_info->nh); + if (err) { + mlx5_core_err(dev, "Failed to request irq\n"); + goto err_request_irq; + } + } + return 0; + +err_request_irq: + for (; i >= 0; i--) { + struct mlx5_irq_info *irq_info = &eq_table->irq_info[i]; + int irqn = pci_irq_vector(dev->pdev, i); + + free_irq(irqn, &irq_info->nh); + } + return err; +} + static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; @@ -278,14 +336,12 @@ static void init_eq_buf(struct mlx5_eq *eq) } static int -create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - struct mlx5_irq_info *irq_info; u8 vecidx = param->index; __be64 *pas; void *eqc; @@ -335,11 +391,6 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, if (err) goto err_in; - irq_info = mlx5_irq_get(dev, vecidx); - ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); - snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, - "%s@pci:%s", name, pci_name(dev->pdev)); - eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); @@ -347,15 +398,10 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; eq->irq_nb = param->nb; - err = request_irq(eq->irqn, mlx5_irq_int_handler, 0, irq_info->name, - &irq_info->nh); + err = mlx5_irq_attach_nb(mlx5_irq_get(dev, vecidx), param->nb); if (err) goto err_eq; - err = mlx5_irq_attach_nb(irq_info, param->nb); - if (err) - goto err_irq; - err = mlx5_debug_eq_add(dev, eq); if (err) goto err_detach; @@ -368,10 +414,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, return 0; err_detach: - mlx5_irq_detach_nb(irq_info, param->nb); - -err_irq: - free_irq(eq->irqn, &eq_table->irq_info[vecidx].nh); + mlx5_irq_detach_nb(mlx5_irq_get(dev, vecidx), eq->irq_nb); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -386,19 +429,14 @@ err_buf: static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; - struct mlx5_irq_info *irq_info; int err; - irq_info = &eq_table->irq_info[eq->vecidx]; - mlx5_debug_eq_remove(dev, eq); - err = mlx5_irq_detach_nb(irq_info, eq->irq_nb); + err = mlx5_irq_detach_nb(mlx5_irq_get(dev, eq->vecidx), eq->irq_nb); if (err) mlx5_core_warn(eq->dev, "eq failed to detach from irq. err %d", err); - free_irq(eq->irqn, &eq_table->irq_info[eq->vecidx].nh); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", @@ -479,7 +517,7 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) /* Async EQs */ -static int create_async_eq(struct mlx5_core_dev *dev, const char *name, +static int create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; @@ -491,7 +529,7 @@ static int create_async_eq(struct mlx5_core_dev *dev, const char *name, goto unlock; } - err = create_map_eq(dev, eq, name, param); + err = create_map_eq(dev, eq, param); unlock: mutex_unlock(&eq_table->lock); return err; @@ -596,7 +634,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .nent = MLX5_NUM_CMD_EQE, .nb = &table->cmd_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq.core, ¶m); + err = create_async_eq(dev, &table->cmd_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); goto err0; @@ -611,8 +649,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .nent = MLX5_NUM_ASYNC_EQE, .nb = &table->async_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_async_eq", - &table->async_eq.core, ¶m); + err = create_async_eq(dev, &table->async_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; @@ -625,8 +662,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .nent = /* TODO: sriov max_vf + */ 1, .nb = &table->pages_eq.irq_nb, }; - err = create_async_eq(dev, "mlx5_pages_eq", - &table->pages_eq.core, ¶m); + err = create_async_eq(dev, &table->pages_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; @@ -689,7 +725,7 @@ void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) * Needed For RDMA ODP EQ for now */ struct mlx5_eq * -mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, +mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param) { struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); @@ -698,7 +734,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, if (!eq) return ERR_PTR(-ENOMEM); - err = create_async_eq(dev, name, eq, param); + err = create_async_eq(dev, eq, param); if (err) { kvfree(eq); eq = ERR_PTR(err); @@ -845,7 +881,6 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - char name[MLX5_MAX_IRQ_NAME]; struct mlx5_eq_comp *eq; int ncomp_vec; int nent; @@ -879,7 +914,6 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) #ifdef CONFIG_RFS_ACCEL irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); #endif - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { .index = vecidx, @@ -887,7 +921,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) .nent = nent, .nb = &eq->irq_nb, }; - err = create_map_eq(dev, &eq->core, name, ¶m); + err = create_map_eq(dev, &eq->core, ¶m); if (err) { kfree(eq); goto clean; @@ -1018,8 +1052,14 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; + err = request_irqs(dev, nvec); + if (err) + goto err_free_irqs; + return 0; +err_free_irqs: + pci_free_irq_vectors(dev->pdev); err_free_irq_info: kfree(table->irq_info); return err; @@ -1027,10 +1067,13 @@ err_free_irq_info: static void free_irq_vectors(struct mlx5_core_dev *dev) { - struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *table = dev->priv.eq_table; + int i; + for (i = 0; i < table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; i++) + free_irq(pci_irq_vector(dev->pdev, i), &table->irq_info[i].nh); pci_free_irq_vectors(dev->pdev); - kfree(priv->eq_table->irq_info); + kfree(table->irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) @@ -1039,7 +1082,7 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) err = alloc_irq_vectors(dev); if (err) { - mlx5_core_err(dev, "alloc irq vectors failed\n"); + mlx5_core_err(dev, "Failed to create IRQ vectors\n"); return err; } diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 7909f1ff197c..73ab658af764 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -30,8 +30,7 @@ struct mlx5_eq_param { }; struct mlx5_eq * -mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, - struct mlx5_eq_param *param); +mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param); int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -- cgit v1.2.3 From 561aa15ad69e9d1e5a8bb277adb3209bf8091ecb Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:27 +0000 Subject: net/mlx5: Separate IRQ data from EQ table data IRQ table should only exist for mlx5_core_dev for PF and VF only. EQ table of mediated devices should hold a pointer to the IRQ table of the parent PCI device. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 125 ++++++++++++++------- drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 3 + include/linux/mlx5/driver.h | 3 + 4 files changed, 98 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index f187169cbe76..cdfa35ec02fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -77,6 +77,14 @@ struct mlx5_irq_info { char name[MLX5_MAX_IRQ_NAME]; }; +struct mlx5_irq_table { + struct mlx5_irq_info *irq_info; + int nvec; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif +}; + struct mlx5_eq_table { struct list_head comp_eqs_list; struct mlx5_eq_async pages_eq; @@ -89,11 +97,8 @@ struct mlx5_eq_table { struct mlx5_nb cq_err_nb; struct mutex lock; /* sync async eqs creations */ - int num_comp_vectors; - struct mlx5_irq_info *irq_info; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif + int num_comp_eqs; + struct mlx5_irq_table *irq_table; }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -109,11 +114,33 @@ struct mlx5_eq_table { (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) +int mlx5_irq_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table; + + irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); + if (!irq_table) + return -ENOMEM; + + dev->priv.irq_table = irq_table; + return 0; +} + +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.irq_table); +} + +static int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) +{ + return table->nvec - MLX5_EQ_VEC_COMP_BASE; +} + static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; + struct mlx5_irq_table *irq_table = dev->priv.irq_table; - return &eq_table->irq_info[vecidx]; + return &irq_table->irq_info[vecidx]; } static int mlx5_irq_attach_nb(struct mlx5_irq_info *irq, @@ -158,15 +185,12 @@ static void irq_set_name(char *name, int vecidx) static int request_irqs(struct mlx5_core_dev *dev, int nvec) { - struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *eq_table; char name[MLX5_MAX_IRQ_NAME]; int err; int i; - eq_table = priv->eq_table; for (i = 0; i < nvec; i++) { - struct mlx5_irq_info *irq_info = &eq_table->irq_info[i]; + struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); irq_set_name(name, i); @@ -184,7 +208,7 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec) err_request_irq: for (; i >= 0; i--) { - struct mlx5_irq_info *irq_info = &eq_table->irq_info[i]; + struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); free_irq(irqn, &irq_info->nh); @@ -501,6 +525,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); + eq_table->irq_table = dev->priv.irq_table; return 0; kvfree_eq_table: @@ -796,10 +821,13 @@ EXPORT_SYMBOL(mlx5_eq_update_ci); static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { - struct mlx5_priv *priv = &mdev->priv; int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; + struct mlx5_priv *priv = &mdev->priv; + struct mlx5_irq_info *irq_info; + int irq; + + irq_info = mlx5_irq_get(mdev, vecidx); + irq = pci_irq_vector(mdev->pdev, vecidx); if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); @@ -819,20 +847,22 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; + struct mlx5_irq_info *irq_info; + int irq; + irq_info = mlx5_irq_get(mdev, vecidx); + irq = pci_irq_vector(mdev->pdev, vecidx); irq_set_affinity_hint(irq, NULL); free_cpumask_var(irq_info->mask); } static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) { + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); int err; int i; - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) { + for (i = 0; i < nvec; i++) { err = set_comp_irq_affinity_hint(mdev, i); if (err) goto err_out; @@ -849,9 +879,10 @@ err_out: static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) { + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); int i; - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) + for (i = 0; i < nvec; i++) clear_comp_irq_affinity_hint(mdev, i); } @@ -863,9 +894,9 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; + if (table->irq_table->rmap) { + free_irq_cpu_rmap(table->irq_table->rmap); + table->irq_table->rmap = NULL; } #endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { @@ -882,20 +913,20 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq; - int ncomp_vec; + int ncomp_eqs; int nent; int err; int i; INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_vec = table->num_comp_vectors; + ncomp_eqs = table->num_comp_eqs; nent = MLX5_COMP_EQ_SIZE; #ifdef CONFIG_RFS_ACCEL - table->rmap = alloc_irq_cpu_rmap(ncomp_vec); - if (!table->rmap) + table->irq_table->rmap = alloc_irq_cpu_rmap(ncomp_eqs); + if (!table->irq_table->rmap) return -ENOMEM; #endif - for (i = 0; i < ncomp_vec; i++) { + for (i = 0; i < ncomp_eqs; i++) { int vecidx = i + MLX5_EQ_VEC_COMP_BASE; struct mlx5_eq_param param = {}; @@ -912,7 +943,8 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) (unsigned long)&eq->tasklet_ctx); #ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); + irq_cpu_rmap_add(table->irq_table->rmap, + pci_irq_vector(dev->pdev, vecidx)); #endif eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { @@ -967,22 +999,23 @@ EXPORT_SYMBOL(mlx5_vector2eqn); unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->num_comp_vectors; + return dev->priv.eq_table->num_comp_eqs; } EXPORT_SYMBOL(mlx5_comp_vectors_count); struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { - /* TODO: consider irq_get_affinity_mask(irq) */ - return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; + int vecidx = vector + MLX5_EQ_VEC_COMP_BASE; + + return dev->priv.eq_table->irq_table->irq_info[vecidx].mask; } EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->rmap; + return dev->priv.eq_table->irq_table->rmap; } #endif @@ -1008,16 +1041,17 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; + if (table->irq_table->rmap) { + free_irq_cpu_rmap(table->irq_table->rmap); + table->irq_table->rmap = NULL; } #endif mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ - max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; + max_eqs = table->num_comp_eqs + MLX5_EQ_VEC_COMP_BASE; for (i = max_eqs - 1; i >= 0; i--) { - free_irq(pci_irq_vector(dev->pdev, i), &table->irq_info[i].nh); + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); } mutex_unlock(&table->lock); pci_free_irq_vectors(dev->pdev); @@ -1026,7 +1060,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) static int alloc_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *table = priv->eq_table; + struct mlx5_irq_table *table = priv->irq_table; int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? MLX5_CAP_GEN(dev, max_num_eqs) : 1 << MLX5_CAP_GEN(dev, log_max_eq); @@ -1050,7 +1084,7 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) goto err_free_irq_info; } - table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; + table->nvec = nvec; err = request_irqs(dev, nvec); if (err) @@ -1067,17 +1101,19 @@ err_free_irq_info: static void free_irq_vectors(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_irq_table *table = dev->priv.irq_table; int i; - for (i = 0; i < table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; i++) - free_irq(pci_irq_vector(dev->pdev, i), &table->irq_info[i].nh); + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); pci_free_irq_vectors(dev->pdev); kfree(table->irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; err = alloc_irq_vectors(dev); @@ -1086,6 +1122,9 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) return err; } + eq_table->num_comp_eqs = + mlx5_irq_get_num_comp(eq_table->irq_table); + err = create_async_eqs(dev); if (err) { mlx5_core_err(dev, "Failed to create async EQs\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 720f65bfe6a9..be79dceea3c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -804,10 +804,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_devcom; } + err = mlx5_irq_table_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize irq table\n"); + goto err_devcom; + } + err = mlx5_eq_table_init(dev); if (err) { mlx5_core_err(dev, "failed to initialize eq\n"); - goto err_devcom; + goto err_irq_cleanup; } err = mlx5_events_init(dev); @@ -883,6 +889,8 @@ err_events_cleanup: mlx5_events_cleanup(dev); err_eq_cleanup: mlx5_eq_table_cleanup(dev); +err_irq_cleanup: + mlx5_irq_table_cleanup(dev); err_devcom: mlx5_devcom_unregister_device(dev->priv.devcom); @@ -905,6 +913,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cq_debugfs_cleanup(dev); mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); + mlx5_irq_table_cleanup(dev); mlx5_devcom_unregister_device(dev->priv.devcom); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 22e69d4813e4..907515f3bfbb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -153,6 +153,9 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +int mlx5_irq_table_init(struct mlx5_core_dev *dev); +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); + int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); void mlx5_events_start(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 64155fe201ee..d8ab633406c2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -492,6 +492,7 @@ struct mlx5_eswitch; struct mlx5_lag; struct mlx5_devcom; struct mlx5_eq_table; +struct mlx5_irq_table; struct mlx5_rate_limit { u32 rate; @@ -521,6 +522,8 @@ struct mlx5_core_roce { }; struct mlx5_priv { + /* IRQ table valid only for real pci devices PF or VF */ + struct mlx5_irq_table *irq_table; struct mlx5_eq_table *eq_table; /* pages stuff */ -- cgit v1.2.3 From b79e6beb9c36a1f26116a9a576392647643ac456 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:28 +0000 Subject: net/mlx5: Move IRQ rmap creation to IRQ allocation phase Rmap creation/deletion is part of the IRQ life-cycle. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 82 +++++++++++++++++++--------- 1 file changed, 57 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index cdfa35ec02fa..1ea983c1ec05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -216,6 +216,49 @@ err_request_irq: return err; } +static void irq_clear_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + free_irq_cpu_rmap(irq_table->rmap); +#endif +} + +static int irq_set_rmap(struct mlx5_core_dev *mdev) +{ + int err = 0; +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = mdev->priv.irq_table; + int num_affinity_vec; + int vecidx; + + num_affinity_vec = mlx5_irq_get_num_comp(irq_table); + irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec); + if (!irq_table->rmap) { + err = -ENOMEM; + mlx5_core_err(mdev, "failed to allocate cpu_rmap. err %d", err); + goto err_out; + } + + vecidx = MLX5_EQ_VEC_COMP_BASE; + for (; vecidx < irq_table->nvec; vecidx++) { + err = irq_cpu_rmap_add(irq_table->rmap, + pci_irq_vector(mdev->pdev, vecidx)); + if (err) { + mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", err); + goto err_irq_cpu_rmap_add; + } + } + return 0; + +err_irq_cpu_rmap_add: + irq_clear_rmap(mdev); +err_out: +#endif + return err; +} + static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; @@ -893,12 +936,6 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) clear_comp_irqs_affinity_hints(dev); -#ifdef CONFIG_RFS_ACCEL - if (table->irq_table->rmap) { - free_irq_cpu_rmap(table->irq_table->rmap); - table->irq_table->rmap = NULL; - } -#endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); if (destroy_unmap_eq(dev, &eq->core)) @@ -921,11 +958,6 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) INIT_LIST_HEAD(&table->comp_eqs_list); ncomp_eqs = table->num_comp_eqs; nent = MLX5_COMP_EQ_SIZE; -#ifdef CONFIG_RFS_ACCEL - table->irq_table->rmap = alloc_irq_cpu_rmap(ncomp_eqs); - if (!table->irq_table->rmap) - return -ENOMEM; -#endif for (i = 0; i < ncomp_eqs; i++) { int vecidx = i + MLX5_EQ_VEC_COMP_BASE; struct mlx5_eq_param param = {}; @@ -942,10 +974,6 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, (unsigned long)&eq->tasklet_ctx); -#ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(table->irq_table->rmap, - pci_irq_vector(dev->pdev, vecidx)); -#endif eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { .index = vecidx, @@ -1039,14 +1067,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) int i, max_eqs; clear_comp_irqs_affinity_hints(dev); - -#ifdef CONFIG_RFS_ACCEL - if (table->irq_table->rmap) { - free_irq_cpu_rmap(table->irq_table->rmap); - table->irq_table->rmap = NULL; - } -#endif - + irq_clear_rmap(dev); mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ max_eqs = table->num_comp_eqs + MLX5_EQ_VEC_COMP_BASE; for (i = max_eqs - 1; i >= 0; i--) { @@ -1086,13 +1107,19 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) table->nvec = nvec; + err = irq_set_rmap(dev); + if (err) + goto err_set_rmap; + err = request_irqs(dev, nvec); if (err) - goto err_free_irqs; + goto err_request_irqs; return 0; -err_free_irqs: +err_request_irqs: + irq_clear_rmap(dev); +err_set_rmap: pci_free_irq_vectors(dev->pdev); err_free_irq_info: kfree(table->irq_info); @@ -1104,6 +1131,11 @@ static void free_irq_vectors(struct mlx5_core_dev *dev) struct mlx5_irq_table *table = dev->priv.irq_table; int i; + /* free_irq requires that affinity and rmap will be cleared + * before calling it. This is why there is asymmetry with set_rmap + * which should be called after alloc_irq but before request_irq. + */ + irq_clear_rmap(dev); for (i = 0; i < table->nvec; i++) free_irq(pci_irq_vector(dev->pdev, i), &mlx5_irq_get(dev, i)->nh); -- cgit v1.2.3 From 90426cc00c77e4c11f1d23799de44ecb54e8ff27 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:30 +0000 Subject: net/mlx5: Move IRQ affinity set to IRQ allocation phase Affinity set/clear is part of the IRQ life-cycle. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 1ea983c1ec05..d30bd01cf050 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -934,8 +934,6 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq, *n; - clear_comp_irqs_affinity_hints(dev); - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); if (destroy_unmap_eq(dev, &eq->core)) @@ -991,12 +989,6 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) list_add_tail(&eq->list, &table->comp_eqs_list); } - err = set_comp_irq_affinity_hints(dev); - if (err) { - mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); - goto clean; - } - return 0; clean: @@ -1078,6 +1070,16 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) pci_free_irq_vectors(dev->pdev); } +static void unrequest_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); +} + static int alloc_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; @@ -1115,8 +1117,14 @@ static int alloc_irq_vectors(struct mlx5_core_dev *dev) if (err) goto err_request_irqs; + err = set_comp_irq_affinity_hints(dev); + if (err) + goto err_set_affinity; + return 0; +err_set_affinity: + unrequest_irqs(dev); err_request_irqs: irq_clear_rmap(dev); err_set_rmap: @@ -1136,6 +1144,7 @@ static void free_irq_vectors(struct mlx5_core_dev *dev) * which should be called after alloc_irq but before request_irq. */ irq_clear_rmap(dev); + clear_comp_irqs_affinity_hints(dev); for (i = 0; i < table->nvec; i++) free_irq(pci_irq_vector(dev->pdev, i), &mlx5_irq_get(dev, i)->nh); -- cgit v1.2.3 From e1706e62801e9ad65b1fb6e6eccc69acfa43d16d Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:32 +0000 Subject: net/mlx5: Separate IRQ table creation from EQ table creation IRQ allocation should be part of the IRQ table life-cycle. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 22 +++------------------- drivers/net/ethernet/mellanox/mlx5/core/main.c | 9 +++++++++ .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 ++ 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index d30bd01cf050..daf9bc3155cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -1056,18 +1056,10 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - int i, max_eqs; - clear_comp_irqs_affinity_hints(dev); - irq_clear_rmap(dev); mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ - max_eqs = table->num_comp_eqs + MLX5_EQ_VEC_COMP_BASE; - for (i = max_eqs - 1; i >= 0; i--) { - free_irq(pci_irq_vector(dev->pdev, i), - &mlx5_irq_get(dev, i)->nh); - } + mlx5_irq_table_destroy(dev); mutex_unlock(&table->lock); - pci_free_irq_vectors(dev->pdev); } static void unrequest_irqs(struct mlx5_core_dev *dev) @@ -1080,7 +1072,7 @@ static void unrequest_irqs(struct mlx5_core_dev *dev) &mlx5_irq_get(dev, i)->nh); } -static int alloc_irq_vectors(struct mlx5_core_dev *dev) +int mlx5_irq_table_create(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_irq_table *table = priv->irq_table; @@ -1134,7 +1126,7 @@ err_free_irq_info: return err; } -static void free_irq_vectors(struct mlx5_core_dev *dev) +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) { struct mlx5_irq_table *table = dev->priv.irq_table; int i; @@ -1157,12 +1149,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; - err = alloc_irq_vectors(dev); - if (err) { - mlx5_core_err(dev, "Failed to create IRQ vectors\n"); - return err; - } - eq_table->num_comp_eqs = mlx5_irq_get_num_comp(eq_table->irq_table); @@ -1182,7 +1168,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) err_comp_eqs: destroy_async_eqs(dev); err_async_eqs: - free_irq_vectors(dev); return err; } @@ -1190,7 +1175,6 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) { destroy_comp_eqs(dev); destroy_async_eqs(dev); - free_irq_vectors(dev); } int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index be79dceea3c3..bfc8c6faedc2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1047,6 +1047,12 @@ static int mlx5_load(struct mlx5_core_dev *dev) mlx5_events_start(dev); mlx5_pagealloc_start(dev); + err = mlx5_irq_table_create(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc IRQs\n"); + goto err_irq_table; + } + err = mlx5_eq_table_create(dev); if (err) { mlx5_core_err(dev, "Failed to create EQs\n"); @@ -1118,6 +1124,8 @@ err_fpga_start: err_fw_tracer: mlx5_eq_table_destroy(dev); err_eq_table: + mlx5_irq_table_destroy(dev); +err_irq_table: mlx5_pagealloc_stop(dev); mlx5_events_stop(dev); mlx5_put_uars_page(dev, dev->priv.uar); @@ -1134,6 +1142,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_fpga_device_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); + mlx5_irq_table_destroy(dev); mlx5_pagealloc_stop(dev); mlx5_events_stop(dev); mlx5_put_uars_page(dev, dev->priv.uar); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 907515f3bfbb..14f1f63db3e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -155,6 +155,8 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev); int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_irq_table_create(struct mlx5_core_dev *dev); +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); -- cgit v1.2.3 From bfb49549ea7993f49c0374295d84a0c7772102a2 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:34 +0000 Subject: net/mlx5: Generalize IRQ interface to work with irq_table IRQ interface should operate within the irq_table context. It should be independent of any EQ data structure. The interface that will be exposed: init/clenup, create/destroy, attach/detach Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 38 +++++++++++++++++++++------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index daf9bc3155cc..80a436b5034a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -143,16 +143,22 @@ static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) return &irq_table->irq_info[vecidx]; } -static int mlx5_irq_attach_nb(struct mlx5_irq_info *irq, +static int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb) { - return atomic_notifier_chain_register(&irq->nh, nb); + struct mlx5_irq_info *irq_info; + + irq_info = &irq_table->irq_info[vecidx]; + return atomic_notifier_chain_register(&irq_info->nh, nb); } -static int mlx5_irq_detach_nb(struct mlx5_irq_info *irq, +static int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&irq->nh, nb); + struct mlx5_irq_info *irq_info; + + irq_info = &irq_table->irq_info[vecidx]; + return atomic_notifier_chain_unregister(&irq_info->nh, nb); } static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) @@ -465,7 +471,8 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; eq->irq_nb = param->nb; - err = mlx5_irq_attach_nb(mlx5_irq_get(dev, vecidx), param->nb); + err = mlx5_irq_attach_nb(dev->priv.eq_table->irq_table, vecidx, + param->nb); if (err) goto err_eq; @@ -481,7 +488,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, return 0; err_detach: - mlx5_irq_detach_nb(mlx5_irq_get(dev, vecidx), eq->irq_nb); + mlx5_irq_detach_nb(dev->priv.eq_table->irq_table, vecidx, eq->irq_nb); err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -500,7 +507,8 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) mlx5_debug_eq_remove(dev, eq); - err = mlx5_irq_detach_nb(mlx5_irq_get(dev, eq->vecidx), eq->irq_nb); + err = mlx5_irq_detach_nb(dev->priv.eq_table->irq_table, + eq->vecidx, eq->irq_nb); if (err) mlx5_core_warn(eq->dev, "eq failed to detach from irq. err %d", err); @@ -1023,19 +1031,31 @@ unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_comp_vectors_count); +static struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx) +{ + return irq_table->irq_info[vecidx].mask; +} + struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { int vecidx = vector + MLX5_EQ_VEC_COMP_BASE; - return dev->priv.eq_table->irq_table->irq_info[vecidx].mask; + return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table, + vecidx); } EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); #ifdef CONFIG_RFS_ACCEL +static struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table) +{ + return irq_table->rmap; +} + struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->irq_table->rmap; + return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table); } #endif -- cgit v1.2.3 From 256cf690af0668dd4e7c192648d2faf2e7e58788 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:37 +0000 Subject: net/mlx5: Move all IRQ logic to pci_irq.c Finalize IRQ separation and expose irq interface. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 327 ------------------- drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 1 - .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 8 + drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 345 +++++++++++++++++++++ 5 files changed, 354 insertions(+), 329 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 243368dc23db..cf8d2b74a2fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ - transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 80a436b5034a..0c72c122daef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -71,20 +71,6 @@ enum { static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); -struct mlx5_irq_info { - struct atomic_notifier_head nh; - cpumask_var_t mask; - char name[MLX5_MAX_IRQ_NAME]; -}; - -struct mlx5_irq_table { - struct mlx5_irq_info *irq_info; - int nvec; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif -}; - struct mlx5_eq_table { struct list_head comp_eqs_list; struct mlx5_eq_async pages_eq; @@ -114,157 +100,6 @@ struct mlx5_eq_table { (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) -int mlx5_irq_table_init(struct mlx5_core_dev *dev) -{ - struct mlx5_irq_table *irq_table; - - irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); - if (!irq_table) - return -ENOMEM; - - dev->priv.irq_table = irq_table; - return 0; -} - -void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) -{ - kvfree(dev->priv.irq_table); -} - -static int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) -{ - return table->nvec - MLX5_EQ_VEC_COMP_BASE; -} - -static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) -{ - struct mlx5_irq_table *irq_table = dev->priv.irq_table; - - return &irq_table->irq_info[vecidx]; -} - -static int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, - struct notifier_block *nb) -{ - struct mlx5_irq_info *irq_info; - - irq_info = &irq_table->irq_info[vecidx]; - return atomic_notifier_chain_register(&irq_info->nh, nb); -} - -static int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, - struct notifier_block *nb) -{ - struct mlx5_irq_info *irq_info; - - irq_info = &irq_table->irq_info[vecidx]; - return atomic_notifier_chain_unregister(&irq_info->nh, nb); -} - -static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) -{ - atomic_notifier_call_chain(nh, 0, NULL); - return IRQ_HANDLED; -} - -static void irq_set_name(char *name, int vecidx) -{ - switch (vecidx) { - case MLX5_EQ_CMD_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_cmd_eq"); - break; - case MLX5_EQ_ASYNC_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async_eq"); - break; - case MLX5_EQ_PAGEREQ_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_pages_eq"); - break; - case MLX5_EQ_PFAULT_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_ib_page_fault_eq"); - break; - default: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", - vecidx - MLX5_EQ_VEC_COMP_BASE); - break; - } -} - -static int request_irqs(struct mlx5_core_dev *dev, int nvec) -{ - char name[MLX5_MAX_IRQ_NAME]; - int err; - int i; - - for (i = 0; i < nvec; i++) { - struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); - int irqn = pci_irq_vector(dev->pdev, i); - - irq_set_name(name, i); - ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); - snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, - "%s@pci:%s", name, pci_name(dev->pdev)); - err = request_irq(irqn, mlx5_irq_int_handler, 0, irq_info->name, - &irq_info->nh); - if (err) { - mlx5_core_err(dev, "Failed to request irq\n"); - goto err_request_irq; - } - } - return 0; - -err_request_irq: - for (; i >= 0; i--) { - struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); - int irqn = pci_irq_vector(dev->pdev, i); - - free_irq(irqn, &irq_info->nh); - } - return err; -} - -static void irq_clear_rmap(struct mlx5_core_dev *dev) -{ -#ifdef CONFIG_RFS_ACCEL - struct mlx5_irq_table *irq_table = dev->priv.irq_table; - - free_irq_cpu_rmap(irq_table->rmap); -#endif -} - -static int irq_set_rmap(struct mlx5_core_dev *mdev) -{ - int err = 0; -#ifdef CONFIG_RFS_ACCEL - struct mlx5_irq_table *irq_table = mdev->priv.irq_table; - int num_affinity_vec; - int vecidx; - - num_affinity_vec = mlx5_irq_get_num_comp(irq_table); - irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec); - if (!irq_table->rmap) { - err = -ENOMEM; - mlx5_core_err(mdev, "failed to allocate cpu_rmap. err %d", err); - goto err_out; - } - - vecidx = MLX5_EQ_VEC_COMP_BASE; - for (; vecidx < irq_table->nvec; vecidx++) { - err = irq_cpu_rmap_add(irq_table->rmap, - pci_irq_vector(mdev->pdev, vecidx)); - if (err) { - mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", err); - goto err_irq_cpu_rmap_add; - } - } - return 0; - -err_irq_cpu_rmap_add: - irq_clear_rmap(mdev); -err_out: -#endif - return err; -} - static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; @@ -868,75 +703,6 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) } EXPORT_SYMBOL(mlx5_eq_update_ci); -/* Completion EQs */ - -static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_priv *priv = &mdev->priv; - struct mlx5_irq_info *irq_info; - int irq; - - irq_info = mlx5_irq_get(mdev, vecidx); - irq = pci_irq_vector(mdev->pdev, vecidx); - - if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { - mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); - return -ENOMEM; - } - - cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - irq_info->mask); - - if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, irq_info->mask)) - mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); - - return 0; -} - -static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_irq_info *irq_info; - int irq; - - irq_info = mlx5_irq_get(mdev, vecidx); - irq = pci_irq_vector(mdev->pdev, vecidx); - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(irq_info->mask); -} - -static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) -{ - int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); - int err; - int i; - - for (i = 0; i < nvec; i++) { - err = set_comp_irq_affinity_hint(mdev, i); - if (err) - goto err_out; - } - - return 0; - -err_out: - for (i--; i >= 0; i--) - clear_comp_irq_affinity_hint(mdev, i); - - return err; -} - -static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) -{ - int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); - int i; - - for (i = 0; i < nvec; i++) - clear_comp_irq_affinity_hint(mdev, i); -} - static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -1031,12 +797,6 @@ unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_comp_vectors_count); -static struct cpumask * -mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx) -{ - return irq_table->irq_info[vecidx].mask; -} - struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { @@ -1048,11 +808,6 @@ mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); #ifdef CONFIG_RFS_ACCEL -static struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table) -{ - return irq_table->rmap; -} - struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) { return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table); @@ -1082,88 +837,6 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) mutex_unlock(&table->lock); } -static void unrequest_irqs(struct mlx5_core_dev *dev) -{ - struct mlx5_irq_table *table = dev->priv.irq_table; - int i; - - for (i = 0; i < table->nvec; i++) - free_irq(pci_irq_vector(dev->pdev, i), - &mlx5_irq_get(dev, i)->nh); -} - -int mlx5_irq_table_create(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_irq_table *table = priv->irq_table; - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); - int nvec; - int err; - - nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_EQ_VEC_COMP_BASE; - nvec = min_t(int, nvec, num_eqs); - if (nvec <= MLX5_EQ_VEC_COMP_BASE) - return -ENOMEM; - - table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); - if (!table->irq_info) - return -ENOMEM; - - nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, - nvec, PCI_IRQ_MSIX); - if (nvec < 0) { - err = nvec; - goto err_free_irq_info; - } - - table->nvec = nvec; - - err = irq_set_rmap(dev); - if (err) - goto err_set_rmap; - - err = request_irqs(dev, nvec); - if (err) - goto err_request_irqs; - - err = set_comp_irq_affinity_hints(dev); - if (err) - goto err_set_affinity; - - return 0; - -err_set_affinity: - unrequest_irqs(dev); -err_request_irqs: - irq_clear_rmap(dev); -err_set_rmap: - pci_free_irq_vectors(dev->pdev); -err_free_irq_info: - kfree(table->irq_info); - return err; -} - -void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) -{ - struct mlx5_irq_table *table = dev->priv.irq_table; - int i; - - /* free_irq requires that affinity and rmap will be cleared - * before calling it. This is why there is asymmetry with set_rmap - * which should be called after alloc_irq but before request_irq. - */ - irq_clear_rmap(dev); - clear_comp_irqs_affinity_hints(dev); - for (i = 0; i < table->nvec; i++) - free_irq(pci_irq_vector(dev->pdev, i), - &mlx5_irq_get(dev, i)->nh); - pci_free_irq_vectors(dev->pdev); - kfree(table->irq_info); -} - int mlx5_eq_table_create(struct mlx5_core_dev *dev) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index adbc228bd55d..3836c39b2900 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -7,7 +7,6 @@ #include #include -#define MLX5_MAX_IRQ_NAME (32) #define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) struct mlx5_eq_tasklet { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 14f1f63db3e3..e0f6783a5f6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -157,6 +157,14 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_irq_table_create(struct mlx5_core_dev *dev); void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); +int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb); +int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb); +struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx); +struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table); +int mlx5_irq_get_num_comp(struct mlx5_irq_table *table); int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c new file mode 100644 index 000000000000..75408639d150 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -0,0 +1,345 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include +#include +#include +#include "mlx5_core.h" +#ifdef CONFIG_RFS_ACCEL +#include +#endif + +#define MLX5_MAX_IRQ_NAME (32) + +struct mlx5_irq_info { + struct atomic_notifier_head nh; + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; +}; + +struct mlx5_irq_table { + struct mlx5_irq_info *irq_info; + int nvec; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif +}; + +int mlx5_irq_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table; + + irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); + if (!irq_table) + return -ENOMEM; + + dev->priv.irq_table = irq_table; + return 0; +} + +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.irq_table); +} + +int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) +{ + return table->nvec - MLX5_EQ_VEC_COMP_BASE; +} + +static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) +{ + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + return &irq_table->irq_info[vecidx]; +} + +int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb) +{ + struct mlx5_irq_info *irq_info; + + irq_info = &irq_table->irq_info[vecidx]; + return atomic_notifier_chain_register(&irq_info->nh, nb); +} + +int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb) +{ + struct mlx5_irq_info *irq_info; + + irq_info = &irq_table->irq_info[vecidx]; + return atomic_notifier_chain_unregister(&irq_info->nh, nb); +} + +static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) +{ + atomic_notifier_call_chain(nh, 0, NULL); + return IRQ_HANDLED; +} + +static void irq_set_name(char *name, int vecidx) +{ + switch (vecidx) { + case MLX5_EQ_CMD_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_cmd_eq"); + break; + case MLX5_EQ_ASYNC_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async_eq"); + break; + case MLX5_EQ_PAGEREQ_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_pages_eq"); + break; + case MLX5_EQ_PFAULT_IDX: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_ib_page_fault_eq"); + break; + default: + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", + vecidx - MLX5_EQ_VEC_COMP_BASE); + break; + } +} + +static int request_irqs(struct mlx5_core_dev *dev, int nvec) +{ + char name[MLX5_MAX_IRQ_NAME]; + int err; + int i; + + for (i = 0; i < nvec; i++) { + struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); + int irqn = pci_irq_vector(dev->pdev, i); + + irq_set_name(name, i); + ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); + snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, + "%s@pci:%s", name, pci_name(dev->pdev)); + err = request_irq(irqn, mlx5_irq_int_handler, 0, irq_info->name, + &irq_info->nh); + if (err) { + mlx5_core_err(dev, "Failed to request irq\n"); + goto err_request_irq; + } + } + return 0; + +err_request_irq: + for (; i >= 0; i--) { + struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); + int irqn = pci_irq_vector(dev->pdev, i); + + free_irq(irqn, &irq_info->nh); + } + return err; +} + +static void irq_clear_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + free_irq_cpu_rmap(irq_table->rmap); +#endif +} + +static int irq_set_rmap(struct mlx5_core_dev *mdev) +{ + int err = 0; +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = mdev->priv.irq_table; + int num_affinity_vec; + int vecidx; + + num_affinity_vec = mlx5_irq_get_num_comp(irq_table); + irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec); + if (!irq_table->rmap) { + err = -ENOMEM; + mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err); + goto err_out; + } + + vecidx = MLX5_EQ_VEC_COMP_BASE; + for (; vecidx < irq_table->nvec; vecidx++) { + err = irq_cpu_rmap_add(irq_table->rmap, + pci_irq_vector(mdev->pdev, vecidx)); + if (err) { + mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", + err); + goto err_irq_cpu_rmap_add; + } + } + return 0; + +err_irq_cpu_rmap_add: + irq_clear_rmap(mdev); +err_out: +#endif + return err; +} + +/* Completion IRQ vectors */ + +static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + struct mlx5_irq_info *irq_info; + int irq; + + irq_info = mlx5_irq_get(mdev, vecidx); + irq = pci_irq_vector(mdev->pdev, vecidx); + if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node), + irq_info->mask); + + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irq, irq_info->mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", + irq); + + return 0; +} + +static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + struct mlx5_irq_info *irq_info; + int irq; + + irq_info = mlx5_irq_get(mdev, vecidx); + irq = pci_irq_vector(mdev->pdev, vecidx); + irq_set_affinity_hint(irq, NULL); + free_cpumask_var(irq_info->mask); +} + +static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) +{ + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); + int err; + int i; + + for (i = 0; i < nvec; i++) { + err = set_comp_irq_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + clear_comp_irq_affinity_hint(mdev, i); + + return err; +} + +static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) +{ + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); + int i; + + for (i = 0; i < nvec; i++) + clear_comp_irq_affinity_hint(mdev, i); +} + +struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx) +{ + return irq_table->irq_info[vecidx].mask; +} + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table) +{ + return irq_table->rmap; +} +#endif + +static void unrequest_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); +} + +int mlx5_irq_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_irq_table *table = priv->irq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int nvec; + int err; + + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_EQ_VEC_COMP_BASE; + nvec = min_t(int, nvec, num_eqs); + if (nvec <= MLX5_EQ_VEC_COMP_BASE) + return -ENOMEM; + + table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); + if (!table->irq_info) + return -ENOMEM; + + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, + nvec, PCI_IRQ_MSIX); + if (nvec < 0) { + err = nvec; + goto err_free_irq_info; + } + + table->nvec = nvec; + + err = irq_set_rmap(dev); + if (err) + goto err_set_rmap; + + err = request_irqs(dev, nvec); + if (err) + goto err_request_irqs; + + err = set_comp_irq_affinity_hints(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); + goto err_set_affinity; + } + + return 0; + +err_set_affinity: + unrequest_irqs(dev); +err_request_irqs: + irq_clear_rmap(dev); +err_set_rmap: + pci_free_irq_vectors(dev->pdev); +err_free_irq_info: + kfree(table->irq_info); + return err; +} + +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + /* free_irq requires that affinity and rmap will be cleared + * before calling it. This is why there is asymmetry with set_rmap + * which should be called after alloc_irq but before request_irq. + */ + irq_clear_rmap(dev); + clear_comp_irqs_affinity_hints(dev); + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); + pci_free_irq_vectors(dev->pdev); + kfree(table->irq_info); +} + -- cgit v1.2.3 From cf49f41d29467ccec16b12f77475cc217132c572 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:39 +0000 Subject: net/mlx5: Rename mlx5_irq_info to mlx5_irq struct mlx5_irq_info is an active object and not just info. Signed-off-by: Yuval Avnery Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 77 +++++++++++------------ 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 75408639d150..fec861f4fefe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -12,14 +12,14 @@ #define MLX5_MAX_IRQ_NAME (32) -struct mlx5_irq_info { +struct mlx5_irq { struct atomic_notifier_head nh; cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; }; struct mlx5_irq_table { - struct mlx5_irq_info *irq_info; + struct mlx5_irq *irq; int nvec; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; @@ -48,29 +48,29 @@ int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) return table->nvec - MLX5_EQ_VEC_COMP_BASE; } -static struct mlx5_irq_info *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) +static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) { struct mlx5_irq_table *irq_table = dev->priv.irq_table; - return &irq_table->irq_info[vecidx]; + return &irq_table->irq[vecidx]; } int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb) { - struct mlx5_irq_info *irq_info; + struct mlx5_irq *irq; - irq_info = &irq_table->irq_info[vecidx]; - return atomic_notifier_chain_register(&irq_info->nh, nb); + irq = &irq_table->irq[vecidx]; + return atomic_notifier_chain_register(&irq->nh, nb); } int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb) { - struct mlx5_irq_info *irq_info; + struct mlx5_irq *irq; - irq_info = &irq_table->irq_info[vecidx]; - return atomic_notifier_chain_unregister(&irq_info->nh, nb); + irq = &irq_table->irq[vecidx]; + return atomic_notifier_chain_unregister(&irq->nh, nb); } static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) @@ -108,15 +108,15 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec) int i; for (i = 0; i < nvec; i++) { - struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); + struct mlx5_irq *irq = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); irq_set_name(name, i); - ATOMIC_INIT_NOTIFIER_HEAD(&irq_info->nh); - snprintf(irq_info->name, MLX5_MAX_IRQ_NAME, + ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); + snprintf(irq->name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); - err = request_irq(irqn, mlx5_irq_int_handler, 0, irq_info->name, - &irq_info->nh); + err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name, + &irq->nh); if (err) { mlx5_core_err(dev, "Failed to request irq\n"); goto err_request_irq; @@ -126,10 +126,10 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec) err_request_irq: for (; i >= 0; i--) { - struct mlx5_irq_info *irq_info = mlx5_irq_get(dev, i); + struct mlx5_irq *irq = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); - free_irq(irqn, &irq_info->nh); + free_irq(irqn, &irq->nh); } return err; } @@ -183,23 +183,22 @@ err_out: static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_irq_info *irq_info; - int irq; + struct mlx5_irq *irq; + int irqn; - irq_info = mlx5_irq_get(mdev, vecidx); - irq = pci_irq_vector(mdev->pdev, vecidx); - if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { + irq = mlx5_irq_get(mdev, vecidx); + irqn = pci_irq_vector(mdev->pdev, vecidx); + if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); return -ENOMEM; } cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node), - irq_info->mask); - + irq->mask); if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, irq_info->mask)) + irq_set_affinity_hint(irqn, irq->mask)) mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", - irq); + irqn); return 0; } @@ -207,13 +206,13 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_irq_info *irq_info; - int irq; + struct mlx5_irq *irq; + int irqn; - irq_info = mlx5_irq_get(mdev, vecidx); - irq = pci_irq_vector(mdev->pdev, vecidx); - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(irq_info->mask); + irq = mlx5_irq_get(mdev, vecidx); + irqn = pci_irq_vector(mdev->pdev, vecidx); + irq_set_affinity_hint(irqn, NULL); + free_cpumask_var(irq->mask); } static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) @@ -249,7 +248,7 @@ static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) struct cpumask * mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx) { - return irq_table->irq_info[vecidx].mask; + return irq_table->irq[vecidx].mask; } #ifdef CONFIG_RFS_ACCEL @@ -285,15 +284,15 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev) if (nvec <= MLX5_EQ_VEC_COMP_BASE) return -ENOMEM; - table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); - if (!table->irq_info) + table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL); + if (!table->irq) return -ENOMEM; nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, nvec, PCI_IRQ_MSIX); if (nvec < 0) { err = nvec; - goto err_free_irq_info; + goto err_free_irq; } table->nvec = nvec; @@ -320,8 +319,8 @@ err_request_irqs: irq_clear_rmap(dev); err_set_rmap: pci_free_irq_vectors(dev->pdev); -err_free_irq_info: - kfree(table->irq_info); +err_free_irq: + kfree(table->irq); return err; } @@ -340,6 +339,6 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) free_irq(pci_irq_vector(dev->pdev, i), &mlx5_irq_get(dev, i)->nh); pci_free_irq_vectors(dev->pdev); - kfree(table->irq_info); + kfree(table->irq); } -- cgit v1.2.3 From 81bfa206032a67f0700459a64a5493c246629604 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Mon, 10 Jun 2019 23:38:41 +0000 Subject: net/mlx5: Use a single IRQ for all async EQs The patch modifies the IRQ allocation so that all async EQs are assigned to the same IRQ resulting in more available IRQs for completion EQs. The changes are using the support for IRQ sharing and EQ polling budget that was introduced in previous patches so when the shared interrupt is triggered, the kernel will serially call the handler of each of the sharing EQs with a certain budget of EQEs to poll in order to prevent starvation. Signed-off-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/odp.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 19 ++++++------ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 38 +++++++++-------------- include/linux/mlx5/eq.h | 14 ++------- 4 files changed, 27 insertions(+), 46 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 7ce7c5bfe685..693a0e225093 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1557,7 +1557,7 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_PFAULT_IDX, + .irq_index = 0, .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, .nent = MLX5_IB_NUM_PF_EQE, .nb = &eq->irq_nb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 0c72c122daef..0f5846a34928 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -250,7 +250,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - u8 vecidx = param->index; + u8 vecidx = param->irq_index; __be64 *pas; void *eqc; int inlen; @@ -435,8 +435,9 @@ static int create_async_eq(struct mlx5_core_dev *dev, int err; mutex_lock(&eq_table->lock); - if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) { - err = -ENOSPC; + /* Async EQs must share irq index 0 */ + if (param->irq_index != 0) { + err = -EINVAL; goto unlock; } @@ -540,7 +541,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_CMD_IDX, + .irq_index = 0, .mask = 1ull << MLX5_EVENT_TYPE_CMD, .nent = MLX5_NUM_CMD_EQE, .nb = &table->cmd_eq.irq_nb, @@ -555,7 +556,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_ASYNC_IDX, + .irq_index = 0, .mask = gather_async_events_mask(dev), .nent = MLX5_NUM_ASYNC_EQE, .nb = &table->async_eq.irq_nb, @@ -568,7 +569,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_PAGEREQ_IDX, + .irq_index = 0, .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, .nent = /* TODO: sriov max_vf + */ 1, .nb = &table->pages_eq.irq_nb, @@ -731,7 +732,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) ncomp_eqs = table->num_comp_eqs; nent = MLX5_COMP_EQ_SIZE; for (i = 0; i < ncomp_eqs; i++) { - int vecidx = i + MLX5_EQ_VEC_COMP_BASE; + int vecidx = i + MLX5_IRQ_VEC_COMP_BASE; struct mlx5_eq_param param = {}; eq = kzalloc(sizeof(*eq), GFP_KERNEL); @@ -748,7 +749,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { - .index = vecidx, + .irq_index = vecidx, .mask = 0, .nent = nent, .nb = &eq->irq_nb, @@ -800,7 +801,7 @@ EXPORT_SYMBOL(mlx5_comp_vectors_count); struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { - int vecidx = vector + MLX5_EQ_VEC_COMP_BASE; + int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE; return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table, vecidx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index fec861f4fefe..373981a659c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -45,7 +45,7 @@ void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) { - return table->nvec - MLX5_EQ_VEC_COMP_BASE; + return table->nvec - MLX5_IRQ_VEC_COMP_BASE; } static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) @@ -81,24 +81,14 @@ static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) static void irq_set_name(char *name, int vecidx) { - switch (vecidx) { - case MLX5_EQ_CMD_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_cmd_eq"); - break; - case MLX5_EQ_ASYNC_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async_eq"); - break; - case MLX5_EQ_PAGEREQ_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_pages_eq"); - break; - case MLX5_EQ_PFAULT_IDX: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_ib_page_fault_eq"); - break; - default: - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", - vecidx - MLX5_EQ_VEC_COMP_BASE); - break; + if (vecidx == 0) { + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async"); + return; } + + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", + vecidx - MLX5_IRQ_VEC_COMP_BASE); + return; } static int request_irqs(struct mlx5_core_dev *dev, int nvec) @@ -159,7 +149,7 @@ static int irq_set_rmap(struct mlx5_core_dev *mdev) goto err_out; } - vecidx = MLX5_EQ_VEC_COMP_BASE; + vecidx = MLX5_IRQ_VEC_COMP_BASE; for (; vecidx < irq_table->nvec; vecidx++) { err = irq_cpu_rmap_add(irq_table->rmap, pci_irq_vector(mdev->pdev, vecidx)); @@ -182,7 +172,7 @@ err_out: static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; struct mlx5_irq *irq; int irqn; @@ -205,7 +195,7 @@ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; struct mlx5_irq *irq; int irqn; @@ -279,16 +269,16 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev) int err; nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_EQ_VEC_COMP_BASE; + MLX5_IRQ_VEC_COMP_BASE; nvec = min_t(int, nvec, num_eqs); - if (nvec <= MLX5_EQ_VEC_COMP_BASE) + if (nvec <= MLX5_IRQ_VEC_COMP_BASE) return -ENOMEM; table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL); if (!table->irq) return -ENOMEM; - nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1, nvec, PCI_IRQ_MSIX); if (nvec < 0) { err = nvec; diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 73ab658af764..4a94e04eff0a 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -4,17 +4,7 @@ #ifndef MLX5_CORE_EQ_H #define MLX5_CORE_EQ_H -enum { - MLX5_EQ_PAGEREQ_IDX = 0, - MLX5_EQ_CMD_IDX = 1, - MLX5_EQ_ASYNC_IDX = 2, - /* reserved to be used by mlx5_core ulps (mlx5e/mlx5_ib) */ - MLX5_EQ_PFAULT_IDX = 3, - MLX5_EQ_MAX_ASYNC_EQS, - /* completion eqs vector indices start here */ - MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS, -}; - +#define MLX5_IRQ_VEC_COMP_BASE 1 #define MLX5_NUM_CMD_EQE (32) #define MLX5_NUM_ASYNC_EQE (0x1000) #define MLX5_NUM_SPARE_EQE (0x80) @@ -23,7 +13,7 @@ struct mlx5_eq; struct mlx5_core_dev; struct mlx5_eq_param { - u8 index; + u8 irq_index; int nent; u64 mask; struct notifier_block *nb; -- cgit v1.2.3 From 1f8a7bee27e63d7c5287719049941e285e54d370 Mon Sep 17 00:00:00 2001 From: Yuval Avnery Date: Mon, 10 Jun 2019 23:38:42 +0000 Subject: net/mlx5: Add EQ enable/disable API Previously, EQ joined the chain notifier on creation. This forced the caller to be ready to handle events before creating the EQ through eq_create_generic interface. To help the caller control when the created EQ will be attached to the IRQ, add enable/disable API. Signed-off-by: Yuval Avnery Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/odp.c | 9 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 105 ++++++++++++++++------- drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 1 - include/linux/mlx5/eq.h | 5 +- 4 files changed, 88 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 693a0e225093..12ccee1eb047 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1560,15 +1560,21 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) .irq_index = 0, .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, .nent = MLX5_IB_NUM_PF_EQE, - .nb = &eq->irq_nb, }; eq->core = mlx5_eq_create_generic(dev->mdev, ¶m); if (IS_ERR(eq->core)) { err = PTR_ERR(eq->core); goto err_wq; } + err = mlx5_eq_enable(dev->mdev, eq->core, &eq->irq_nb); + if (err) { + mlx5_ib_err(dev, "failed to enable odp EQ %d\n", err); + goto err_eq; + } return 0; +err_eq: + mlx5_eq_destroy_generic(dev->mdev, eq->core); err_wq: destroy_workqueue(eq->wq); err_mempool: @@ -1581,6 +1587,7 @@ mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) { int err; + mlx5_eq_disable(dev->mdev, eq->core, &eq->irq_nb); err = mlx5_eq_destroy_generic(dev->mdev, eq->core); cancel_work_sync(&eq->work); destroy_workqueue(eq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 0f5846a34928..58fff2f39b38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -304,27 +304,14 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - eq->irq_nb = param->nb; - - err = mlx5_irq_attach_nb(dev->priv.eq_table->irq_table, vecidx, - param->nb); - if (err) - goto err_eq; err = mlx5_debug_eq_add(dev, eq); if (err) - goto err_detach; - - /* EQs are created in ARMED state - */ - eq_update_ci(eq, 1); + goto err_eq; kvfree(in); return 0; -err_detach: - mlx5_irq_detach_nb(dev->priv.eq_table->irq_table, vecidx, eq->irq_nb); - err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -336,17 +323,49 @@ err_buf: return err; } +/** + * mlx5_eq_enable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to enable + * @nb - notifier call block + * mlx5_eq_enable - must be called after EQ is created in device. + */ +int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb); + if (!err) + eq_update_ci(eq, 1); + + return err; +} +EXPORT_SYMBOL(mlx5_eq_enable); + +/** + * mlx5_eq_disable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to disable + * @nb - notifier call block + * mlx5_eq_disable - must be called before EQ is destroyed. + */ +void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + + mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb); +} +EXPORT_SYMBOL(mlx5_eq_disable); + static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { int err; mlx5_debug_eq_remove(dev, eq); - err = mlx5_irq_detach_nb(dev->priv.eq_table->irq_table, - eq->vecidx, eq->irq_nb); - if (err) - mlx5_core_warn(eq->dev, "eq failed to detach from irq. err %d", - err); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", @@ -544,14 +563,17 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .irq_index = 0, .mask = 1ull << MLX5_EVENT_TYPE_CMD, .nent = MLX5_NUM_CMD_EQE, - .nb = &table->cmd_eq.irq_nb, }; err = create_async_eq(dev, &table->cmd_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); goto err0; } - + err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err); + goto err1; + } mlx5_cmd_use_events(dev); table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; @@ -559,12 +581,17 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .irq_index = 0, .mask = gather_async_events_mask(dev), .nent = MLX5_NUM_ASYNC_EQE, - .nb = &table->async_eq.irq_nb, }; err = create_async_eq(dev, &table->async_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); - goto err1; + goto err2; + } + err = mlx5_eq_enable(dev, &table->async_eq.core, + &table->async_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable async EQ %d\n", err); + goto err3; } table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; @@ -572,21 +599,31 @@ static int create_async_eqs(struct mlx5_core_dev *dev) .irq_index = 0, .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, .nent = /* TODO: sriov max_vf + */ 1, - .nb = &table->pages_eq.irq_nb, }; err = create_async_eq(dev, &table->pages_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); - goto err2; + goto err4; + } + err = mlx5_eq_enable(dev, &table->pages_eq.core, + &table->pages_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err); + goto err5; } return err; -err2: +err5: + destroy_async_eq(dev, &table->pages_eq.core); +err4: + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); +err3: destroy_async_eq(dev, &table->async_eq.core); - -err1: +err2: mlx5_cmd_use_polling(dev); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); +err1: destroy_async_eq(dev, &table->cmd_eq.core); err0: mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); @@ -598,11 +635,13 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; int err; + mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb); err = destroy_async_eq(dev, &table->pages_eq.core); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); err = destroy_async_eq(dev, &table->async_eq.core); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", @@ -610,6 +649,7 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_use_polling(dev); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); err = destroy_async_eq(dev, &table->cmd_eq.core); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", @@ -711,6 +751,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); if (destroy_unmap_eq(dev, &eq->core)) mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", eq->core.eqn); @@ -752,13 +793,19 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) .irq_index = vecidx, .mask = 0, .nent = nent, - .nb = &eq->irq_nb, }; err = create_map_eq(dev, &eq->core, ¶m); if (err) { kfree(eq); goto clean; } + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); + if (err) { + destroy_unmap_eq(dev, &eq->core); + kfree(eq); + goto clean; + } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ list_add_tail(&eq->list, &table->comp_eqs_list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 3836c39b2900..24bd991a727e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -33,7 +33,6 @@ struct mlx5_eq { u8 eqn; int nent; struct mlx5_rsc_debug *dbg; - struct notifier_block *irq_nb; /* For destroy only */ }; struct mlx5_eq_async { diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h index 4a94e04eff0a..70e16dcfb4c4 100644 --- a/include/linux/mlx5/eq.h +++ b/include/linux/mlx5/eq.h @@ -16,13 +16,16 @@ struct mlx5_eq_param { u8 irq_index; int nent; u64 mask; - struct notifier_block *nb; }; struct mlx5_eq * mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param); int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb); +void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb); struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc); void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm); -- cgit v1.2.3 From 98fdbea550378e0153092bce21261df86a8ccc57 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 12 Jun 2019 15:20:11 +0300 Subject: net/mlx5: Declare more strictly devlink encap mode Devlink has UAPI declaration for encap mode, so there is no need to be loose on the data get/set by drivers. Update call sites to use enum devlink_eswitch_encap_mode instead of plain u8. Suggested-by: Parav Pandit Signed-off-by: Leon Romanovsky Acked-by: Jiri Pirko Reviewed-by: Parav Pandit Reviewed-by: Petr Vorel --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 8 +++++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 ++++-- include/net/devlink.h | 6 ++++-- net/core/devlink.c | 6 ++++-- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index e03811be771d..8b9f2cf58e91 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -176,7 +176,7 @@ struct mlx5_esw_offload { const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; u64 num_flows; - u8 encap; + enum devlink_eswitch_encap_mode encap; }; /* E-Switch MC FDB table hash node */ @@ -357,9 +357,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode); -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack); -int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap); +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1638e4cdeb16..17abb98b48af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2160,7 +2160,8 @@ out: return 0; } -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); @@ -2209,7 +2210,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, return err; } -int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; diff --git a/include/net/devlink.h b/include/net/devlink.h index 1c4adfb4195a..7a34fc586def 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -530,8 +530,10 @@ struct devlink_ops { int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode); int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode, struct netlink_ext_ack *extack); - int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode); - int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode, + int (*eswitch_encap_mode_get)(struct devlink *devlink, + enum devlink_eswitch_encap_mode *p_encap_mode); + int (*eswitch_encap_mode_set)(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap_mode, struct netlink_ext_ack *extack); int (*info_get)(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack); diff --git a/net/core/devlink.c b/net/core/devlink.c index d43bc52b8840..47ae69363b07 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1552,7 +1552,8 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, u32 seq, int flags) { const struct devlink_ops *ops = devlink->ops; - u8 inline_mode, encap_mode; + enum devlink_eswitch_encap_mode encap_mode; + u8 inline_mode; void *hdr; int err = 0; u16 mode; @@ -1628,7 +1629,8 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; - u8 inline_mode, encap_mode; + enum devlink_eswitch_encap_mode encap_mode; + u8 inline_mode; int err = 0; u16 mode; -- cgit v1.2.3 From 82b11f071936a11094e1c44730030cd3d894e0b4 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 12 Jun 2019 15:20:12 +0300 Subject: net/mlx5: Expose eswitch encap mode Add API to get the current Eswitch encap mode. It will be used in downstream patches to check if flow table can be created with encap support or not. Signed-off-by: Maor Gottlieb Reviewed-by: Petr Vorel Signed-off-by: Leon Romanovsky Reviewed-by: Parav Pandit --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 11 +++++++++++ include/linux/mlx5/eswitch.h | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a4df109fbeb7..12010f85fa35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2457,6 +2457,17 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); +enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + + esw = dev->priv.eswitch; + return ESW_ALLOWED(esw) ? esw->offloads.encap : + DEVLINK_ESWITCH_ENCAP_MODE_NONE; +} +EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); + bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { if ((dev0->priv.eswitch->mode == SRIOV_NONE && diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index d81ee4df181c..174eec0871d9 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -7,6 +7,7 @@ #define _MLX5_ESWITCH_ #include +#include #define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) @@ -62,4 +63,15 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn); + +#ifdef CONFIG_MLX5_ESWITCH +enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); +#else /* CONFIG_MLX5_ESWITCH */ +static inline enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) +{ + return DEVLINK_ESWITCH_ENCAP_MODE_NONE; +} +#endif /* CONFIG_MLX5_ESWITCH */ #endif -- cgit v1.2.3