From 771addf60ac0a266a023c3e7fcae9a629658b455 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 4 Apr 2017 13:31:41 +0300 Subject: IB/core: Refactor idr to be per uverbs_file The current code creates an idr per type. Since types are currently common for all drivers and known in advance, this was good enough. However, the proposed ioctl based infrastructure allows each driver to declare only some of the common types and declare its own specific types. Thus, we decided to implement idr to be per uverbs_file. Signed-off-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Haggai Eran Reviewed-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 19 ++-- drivers/infiniband/core/uverbs_cmd.c | 157 ++++++++++++++++------------------ drivers/infiniband/core/uverbs_main.c | 45 +++------- include/rdma/ib_verbs.h | 1 + 4 files changed, 95 insertions(+), 127 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index e1bedf0bac04..6215735fa98e 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -123,6 +123,10 @@ struct ib_uverbs_file { struct ib_uverbs_event_file *async_file; struct list_head list; int is_closed; + + struct idr idr; + /* spinlock protects write access to idr */ + spinlock_t idr_lock; }; struct ib_uverbs_event { @@ -176,20 +180,7 @@ struct ib_ucq_object { u32 async_events_reported; }; -extern spinlock_t ib_uverbs_idr_lock; -extern struct idr ib_uverbs_pd_idr; -extern struct idr ib_uverbs_mr_idr; -extern struct idr ib_uverbs_mw_idr; -extern struct idr ib_uverbs_ah_idr; -extern struct idr ib_uverbs_cq_idr; -extern struct idr ib_uverbs_qp_idr; -extern struct idr ib_uverbs_srq_idr; -extern struct idr ib_uverbs_xrcd_idr; -extern struct idr ib_uverbs_rule_idr; -extern struct idr ib_uverbs_wq_idr; -extern struct idr ib_uverbs_rwq_ind_tbl_idr; - -void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); +void idr_remove_uobj(struct ib_uobject *uobj); struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, struct ib_device *ib_dev, diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 7b7a76e1279a..03c4f68a88e1 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -120,37 +120,36 @@ static void put_uobj_write(struct ib_uobject *uobj) put_uobj(uobj); } -static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj) +static int idr_add_uobj(struct ib_uobject *uobj) { int ret; idr_preload(GFP_KERNEL); - spin_lock(&ib_uverbs_idr_lock); + spin_lock(&uobj->context->ufile->idr_lock); - ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT); + ret = idr_alloc(&uobj->context->ufile->idr, uobj, 0, 0, GFP_NOWAIT); if (ret >= 0) uobj->id = ret; - spin_unlock(&ib_uverbs_idr_lock); + spin_unlock(&uobj->context->ufile->idr_lock); idr_preload_end(); return ret < 0 ? ret : 0; } -void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) +void idr_remove_uobj(struct ib_uobject *uobj) { - spin_lock(&ib_uverbs_idr_lock); - idr_remove(idr, uobj->id); - spin_unlock(&ib_uverbs_idr_lock); + spin_lock(&uobj->context->ufile->idr_lock); + idr_remove(&uobj->context->ufile->idr, uobj->id); + spin_unlock(&uobj->context->ufile->idr_lock); } -static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, - struct ib_ucontext *context) +static struct ib_uobject *__idr_get_uobj(int id, struct ib_ucontext *context) { struct ib_uobject *uobj; rcu_read_lock(); - uobj = idr_find(idr, id); + uobj = idr_find(&context->ufile->idr, id); if (uobj) { if (uobj->context == context) kref_get(&uobj->ref); @@ -162,12 +161,12 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, return uobj; } -static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, - struct ib_ucontext *context, int nested) +static struct ib_uobject *idr_read_uobj(int id, struct ib_ucontext *context, + int nested) { struct ib_uobject *uobj; - uobj = __idr_get_uobj(idr, id, context); + uobj = __idr_get_uobj(id, context); if (!uobj) return NULL; @@ -183,12 +182,11 @@ static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, return uobj; } -static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, - struct ib_ucontext *context) +static struct ib_uobject *idr_write_uobj(int id, struct ib_ucontext *context) { struct ib_uobject *uobj; - uobj = __idr_get_uobj(idr, id, context); + uobj = __idr_get_uobj(id, context); if (!uobj) return NULL; @@ -201,18 +199,18 @@ static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, return uobj; } -static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context, +static void *idr_read_obj(int id, struct ib_ucontext *context, int nested) { struct ib_uobject *uobj; - uobj = idr_read_uobj(idr, id, context, nested); + uobj = idr_read_uobj(id, context, nested); return uobj ? uobj->object : NULL; } static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0); + return idr_read_obj(pd_handle, context, 0); } static void put_pd_read(struct ib_pd *pd) @@ -222,7 +220,7 @@ static void put_pd_read(struct ib_pd *pd) static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested) { - return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested); + return idr_read_obj(cq_handle, context, nested); } static void put_cq_read(struct ib_cq *cq) @@ -232,7 +230,7 @@ static void put_cq_read(struct ib_cq *cq) static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0); + return idr_read_obj(ah_handle, context, 0); } static void put_ah_read(struct ib_ah *ah) @@ -242,12 +240,12 @@ static void put_ah_read(struct ib_ah *ah) static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); + return idr_read_obj(qp_handle, context, 0); } static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0); + return idr_read_obj(wq_handle, context, 0); } static void put_wq_read(struct ib_wq *wq) @@ -258,7 +256,7 @@ static void put_wq_read(struct ib_wq *wq) static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0); + return idr_read_obj(ind_table_handle, context, 0); } static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table) @@ -270,7 +268,7 @@ static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) { struct ib_uobject *uobj; - uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context); + uobj = idr_write_uobj(qp_handle, context); return uobj ? uobj->object : NULL; } @@ -286,7 +284,7 @@ static void put_qp_write(struct ib_qp *qp) static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); + return idr_read_obj(srq_handle, context, 0); } static void put_srq_read(struct ib_srq *srq) @@ -297,7 +295,7 @@ static void put_srq_read(struct ib_srq *srq) static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, struct ib_uobject **uobj) { - *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0); + *uobj = idr_read_uobj(xrcd_handle, context, 0); return *uobj ? (*uobj)->object : NULL; } @@ -305,7 +303,6 @@ static void put_xrcd_read(struct ib_uobject *uobj) { put_uobj_read(uobj); } - ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, @@ -348,6 +345,8 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ucontext->device = ib_dev; ucontext->cg_obj = cg_obj; + /* ufile is required when some objects are released */ + ucontext->ufile = file; INIT_LIST_HEAD(&ucontext->pd_list); INIT_LIST_HEAD(&ucontext->mr_list); INIT_LIST_HEAD(&ucontext->mw_list); @@ -591,7 +590,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, atomic_set(&pd->usecnt, 0); uobj->object = pd; - ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj); + ret = idr_add_uobj(uobj); if (ret) goto err_idr; @@ -615,7 +614,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); + idr_remove_uobj(uobj); err_idr: ib_dealloc_pd(pd); @@ -639,7 +638,7 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); + uobj = idr_write_uobj(cmd.pd_handle, file->ucontext); if (!uobj) return -EINVAL; pd = uobj->object; @@ -659,7 +658,7 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, uobj->live = 0; put_uobj_write(uobj); - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); + idr_remove_uobj(uobj); mutex_lock(&file->mutex); list_del(&uobj->list); @@ -835,7 +834,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, atomic_set(&obj->refcnt, 0); obj->uobject.object = xrcd; - ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); + ret = idr_add_uobj(&obj->uobject); if (ret) goto err_idr; @@ -879,7 +878,7 @@ err_copy: } err_insert_xrcd: - idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); + idr_remove_uobj(&obj->uobject); err_idr: ib_dealloc_xrcd(xrcd); @@ -913,7 +912,7 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, return -EFAULT; mutex_lock(&file->device->xrcd_tree_mutex); - uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext); + uobj = idr_write_uobj(cmd.xrcd_handle, file->ucontext); if (!uobj) { ret = -EINVAL; goto out; @@ -946,7 +945,7 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, if (inode && !live) xrcd_table_delete(file->device, inode); - idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); + idr_remove_uobj(uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); @@ -1043,7 +1042,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, atomic_inc(&pd->usecnt); uobj->object = mr; - ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); + ret = idr_add_uobj(uobj); if (ret) goto err_unreg; @@ -1071,7 +1070,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); + idr_remove_uobj(uobj); err_unreg: ib_dereg_mr(mr); @@ -1119,8 +1118,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) return -EINVAL; - uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, - file->ucontext); + uobj = idr_write_uobj(cmd.mr_handle, file->ucontext); if (!uobj) return -EINVAL; @@ -1189,7 +1187,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext); + uobj = idr_write_uobj(cmd.mr_handle, file->ucontext); if (!uobj) return -EINVAL; @@ -1205,8 +1203,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, return ret; ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); + idr_remove_uobj(uobj); mutex_lock(&file->mutex); list_del(&uobj->list); @@ -1271,7 +1268,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, atomic_inc(&pd->usecnt); uobj->object = mw; - ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj); + ret = idr_add_uobj(uobj); if (ret) goto err_unalloc; @@ -1298,7 +1295,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + idr_remove_uobj(uobj); err_unalloc: uverbs_dealloc_mw(mw); @@ -1327,7 +1324,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext); + uobj = idr_write_uobj(cmd.mw_handle, file->ucontext); if (!uobj) return -EINVAL; @@ -1343,8 +1340,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, return ret; ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + idr_remove_uobj(uobj); mutex_lock(&file->mutex); list_del(&uobj->list); @@ -1463,7 +1459,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, atomic_set(&cq->usecnt, 0); obj->uobject.object = cq; - ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject); + ret = idr_add_uobj(&obj->uobject); if (ret) goto err_free; @@ -1489,7 +1485,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, return obj; err_cb: - idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); + idr_remove_uobj(&obj->uobject); err_free: ib_destroy_cq(cq); @@ -1763,7 +1759,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); + uobj = idr_write_uobj(cmd.cq_handle, file->ucontext); if (!uobj) return -EINVAL; cq = uobj->object; @@ -1780,8 +1776,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, return ret; ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - idr_remove_uobj(&ib_uverbs_cq_idr, uobj); + idr_remove_uobj(uobj); mutex_lock(&file->mutex); list_del(&uobj->list); @@ -1994,7 +1989,7 @@ static int create_qp(struct ib_uverbs_file *file, qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; - ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + ret = idr_add_uobj(&obj->uevent.uobject); if (ret) goto err_destroy; @@ -2042,7 +2037,7 @@ static int create_qp(struct ib_uverbs_file *file, return 0; err_cb: - idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + idr_remove_uobj(&obj->uevent.uobject); err_destroy: ib_destroy_qp(qp); @@ -2232,7 +2227,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; - ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + ret = idr_add_uobj(&obj->uevent.uobject); if (ret) goto err_destroy; @@ -2261,7 +2256,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, return in_len; err_remove: - idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + idr_remove_uobj(&obj->uevent.uobject); err_destroy: ib_destroy_qp(qp); @@ -2557,7 +2552,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); - uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext); + uobj = idr_write_uobj(cmd.qp_handle, file->ucontext); if (!uobj) return -EINVAL; qp = uobj->object; @@ -2582,7 +2577,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, if (obj->uxrcd) atomic_dec(&obj->uxrcd->refcnt); - idr_remove_uobj(&ib_uverbs_qp_idr, uobj); + idr_remove_uobj(uobj); mutex_lock(&file->mutex); list_del(&uobj->list); @@ -3048,7 +3043,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, ah->uobject = uobj; uobj->object = ah; - ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj); + ret = idr_add_uobj(uobj); if (ret) goto err_destroy; @@ -3073,7 +3068,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); + idr_remove_uobj(uobj); err_destroy: ib_destroy_ah(ah); @@ -3101,7 +3096,7 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext); + uobj = idr_write_uobj(cmd.ah_handle, file->ucontext); if (!uobj) return -EINVAL; ah = uobj->object; @@ -3116,8 +3111,7 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, return ret; ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); + idr_remove_uobj(uobj); mutex_lock(&file->mutex); list_del(&uobj->list); @@ -3450,7 +3444,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, atomic_inc(&cq->usecnt); wq->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = wq; - err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); + err = idr_add_uobj(&obj->uevent.uobject); if (err) goto destroy_wq; @@ -3477,7 +3471,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, return 0; err_copy: - idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); + idr_remove_uobj(&obj->uevent.uobject); destroy_wq: ib_destroy_wq(wq); err_put_cq: @@ -3526,7 +3520,7 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, return -EOPNOTSUPP; resp.response_length = required_resp_len; - uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle, + uobj = idr_write_uobj(cmd.wq_handle, file->ucontext); if (!uobj) return -EINVAL; @@ -3541,7 +3535,7 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, if (ret) return ret; - idr_remove_uobj(&ib_uverbs_wq_idr, uobj); + idr_remove_uobj(uobj); mutex_lock(&file->mutex); list_del(&uobj->list); @@ -3713,7 +3707,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (i = 0; i < num_wq_handles; i++) atomic_inc(&wqs[i]->usecnt); - err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + err = idr_add_uobj(uobj); if (err) goto destroy_ind_tbl; @@ -3741,7 +3735,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, return 0; err_copy: - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + idr_remove_uobj(uobj); destroy_ind_tbl: ib_destroy_rwq_ind_table(rwq_ind_tbl); err_uobj: @@ -3784,7 +3778,7 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle, + uobj = idr_write_uobj(cmd.ind_tbl_handle, file->ucontext); if (!uobj) return -EINVAL; @@ -3800,7 +3794,7 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, if (ret) return ret; - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + idr_remove_uobj(uobj); mutex_lock(&file->mutex); list_del(&uobj->list); @@ -3945,7 +3939,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, flow_id->uobject = uobj; uobj->object = flow_id; - err = idr_add_uobj(&ib_uverbs_rule_idr, uobj); + err = idr_add_uobj(uobj); if (err) goto destroy_flow; @@ -3970,7 +3964,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kfree(kern_flow_attr); return 0; err_copy: - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + idr_remove_uobj(uobj); destroy_flow: ib_destroy_flow(flow_id); err_create: @@ -4007,8 +4001,7 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EINVAL; - uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, - file->ucontext); + uobj = idr_write_uobj(cmd.flow_handle, file->ucontext); if (!uobj) return -EINVAL; flow_id = uobj->object; @@ -4022,7 +4015,7 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, put_uobj_write(uobj); - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + idr_remove_uobj(uobj); mutex_lock(&file->mutex); list_del(&uobj->list); @@ -4115,7 +4108,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, atomic_set(&srq->usecnt, 0); obj->uevent.uobject.object = srq; - ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); + ret = idr_add_uobj(&obj->uevent.uobject); if (ret) goto err_destroy; @@ -4149,7 +4142,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, return 0; err_copy: - idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); + idr_remove_uobj(&obj->uevent.uobject); err_destroy: ib_destroy_srq(srq); @@ -4327,7 +4320,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext); + uobj = idr_write_uobj(cmd.srq_handle, file->ucontext); if (!uobj) return -EINVAL; srq = uobj->object; @@ -4350,7 +4343,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, atomic_dec(&us->uxrcd->refcnt); } - idr_remove_uobj(&ib_uverbs_srq_idr, uobj); + idr_remove_uobj(uobj); mutex_lock(&file->mutex); list_del(&uobj->list); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 35c788a32e26..f6812fb6cd0c 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -67,19 +67,6 @@ enum { static struct class *uverbs_class; -DEFINE_SPINLOCK(ib_uverbs_idr_lock); -DEFINE_IDR(ib_uverbs_pd_idr); -DEFINE_IDR(ib_uverbs_mr_idr); -DEFINE_IDR(ib_uverbs_mw_idr); -DEFINE_IDR(ib_uverbs_ah_idr); -DEFINE_IDR(ib_uverbs_cq_idr); -DEFINE_IDR(ib_uverbs_qp_idr); -DEFINE_IDR(ib_uverbs_srq_idr); -DEFINE_IDR(ib_uverbs_xrcd_idr); -DEFINE_IDR(ib_uverbs_rule_idr); -DEFINE_IDR(ib_uverbs_wq_idr); -DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr); - static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -236,7 +223,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { struct ib_ah *ah = uobj->object; - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); + idr_remove_uobj(uobj); ib_destroy_ah(ah); ib_rdmacg_uncharge(&uobj->cg_obj, context->device, RDMACG_RESOURCE_HCA_OBJECT); @@ -247,7 +234,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { struct ib_mw *mw = uobj->object; - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + idr_remove_uobj(uobj); uverbs_dealloc_mw(mw); ib_rdmacg_uncharge(&uobj->cg_obj, context->device, RDMACG_RESOURCE_HCA_OBJECT); @@ -257,7 +244,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { struct ib_flow *flow_id = uobj->object; - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + idr_remove_uobj(uobj); ib_destroy_flow(flow_id); ib_rdmacg_uncharge(&uobj->cg_obj, context->device, RDMACG_RESOURCE_HCA_OBJECT); @@ -269,7 +256,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, struct ib_uqp_object *uqp = container_of(uobj, struct ib_uqp_object, uevent.uobject); - idr_remove_uobj(&ib_uverbs_qp_idr, uobj); + idr_remove_uobj(uobj); if (qp == qp->real_qp) ib_uverbs_detach_umcast(qp, uqp); ib_destroy_qp(qp); @@ -283,7 +270,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object; struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); + idr_remove_uobj(uobj); ib_destroy_rwq_ind_table(rwq_ind_tbl); kfree(ind_tbl); kfree(uobj); @@ -294,7 +281,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, struct ib_uwq_object *uwq = container_of(uobj, struct ib_uwq_object, uevent.uobject); - idr_remove_uobj(&ib_uverbs_wq_idr, uobj); + idr_remove_uobj(uobj); ib_destroy_wq(wq); ib_uverbs_release_uevent(file, &uwq->uevent); kfree(uwq); @@ -305,7 +292,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, struct ib_uevent_object *uevent = container_of(uobj, struct ib_uevent_object, uobject); - idr_remove_uobj(&ib_uverbs_srq_idr, uobj); + idr_remove_uobj(uobj); ib_destroy_srq(srq); ib_rdmacg_uncharge(&uobj->cg_obj, context->device, RDMACG_RESOURCE_HCA_OBJECT); @@ -319,7 +306,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, struct ib_ucq_object *ucq = container_of(uobj, struct ib_ucq_object, uobject); - idr_remove_uobj(&ib_uverbs_cq_idr, uobj); + idr_remove_uobj(uobj); ib_destroy_cq(cq); ib_rdmacg_uncharge(&uobj->cg_obj, context->device, RDMACG_RESOURCE_HCA_OBJECT); @@ -330,7 +317,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { struct ib_mr *mr = uobj->object; - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); + idr_remove_uobj(uobj); ib_dereg_mr(mr); ib_rdmacg_uncharge(&uobj->cg_obj, context->device, RDMACG_RESOURCE_HCA_OBJECT); @@ -343,7 +330,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, struct ib_uxrcd_object *uxrcd = container_of(uobj, struct ib_uxrcd_object, uobject); - idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); + idr_remove_uobj(uobj); ib_uverbs_dealloc_xrcd(file->device, xrcd); kfree(uxrcd); } @@ -352,7 +339,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { struct ib_pd *pd = uobj->object; - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); + idr_remove_uobj(uobj); ib_dealloc_pd(pd); ib_rdmacg_uncharge(&uobj->cg_obj, context->device, RDMACG_RESOURCE_HCA_OBJECT); @@ -986,6 +973,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) } file->device = dev; + spin_lock_init(&file->idr_lock); + idr_init(&file->idr); file->ucontext = NULL; file->async_file = NULL; kref_init(&file->ref); @@ -1023,6 +1012,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) file->ucontext = NULL; } mutex_unlock(&file->cleanup_mutex); + idr_destroy(&file->idr); mutex_lock(&file->device->lists_mutex); if (!file->is_closed) { @@ -1396,13 +1386,6 @@ static void __exit ib_uverbs_cleanup(void) unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); if (overflow_maj) unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); - idr_destroy(&ib_uverbs_pd_idr); - idr_destroy(&ib_uverbs_mr_idr); - idr_destroy(&ib_uverbs_mw_idr); - idr_destroy(&ib_uverbs_ah_idr); - idr_destroy(&ib_uverbs_cq_idr); - idr_destroy(&ib_uverbs_qp_idr); - idr_destroy(&ib_uverbs_srq_idr); } module_init(ib_uverbs_init); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0f1813c13687..319e69106a26 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1365,6 +1365,7 @@ struct ib_rdmacg_object { struct ib_ucontext { struct ib_device *device; + struct ib_uverbs_file *ufile; struct list_head pd_list; struct list_head mr_list; struct list_head mw_list; -- cgit v1.2.3 From 3832125624b75b54567be906e9aa67e1343be569 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 4 Apr 2017 13:31:42 +0300 Subject: IB/core: Add support for idr types The new ioctl infrastructure supports driver specific objects. Each such object type has a hot unplug function, allocation size and an order of destruction. When a ucontext is created, a new list is created in this ib_ucontext. This list contains all objects created under this ib_ucontext. When a ib_ucontext is destroyed, we traverse this list several time destroying the various objects by the order mentioned in the object type description. If few object types have the same destruction order, they are destroyed in an order opposite to their creation. Adding an object is done in two parts. First, an object is allocated and added to idr tree. Then, the command's handlers (in downstream patches) could work on this object and fill in its required details. After a successful command, the commit part is called and the user objects become ucontext visible. If the handler failed, alloc_abort should be called. Removing an uboject is done by calling lookup_get with the write flag and finalizing it with destroy_commit. A major change from the previous code is that we actually destroy the kernel object itself in destroy_commit (rather than just the uobject). We should make sure idr (per-uverbs-file) and list (per-ucontext) could be accessed concurrently without corrupting them. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/Makefile | 3 +- drivers/infiniband/core/rdma_core.c | 450 ++++++++++++++++++++++++++++++++++++ drivers/infiniband/core/rdma_core.h | 55 +++++ include/rdma/ib_verbs.h | 21 ++ include/rdma/uverbs_types.h | 132 +++++++++++ 5 files changed, 660 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/core/rdma_core.c create mode 100644 drivers/infiniband/core/rdma_core.h create mode 100644 include/rdma/uverbs_types.h diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index e426ac877d19..d29f910d98c9 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -29,4 +29,5 @@ ib_umad-y := user_mad.o ib_ucm-y := ucm.o -ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o +ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ + rdma_core.o diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c new file mode 100644 index 000000000000..1cbc053add34 --- /dev/null +++ b/drivers/infiniband/core/rdma_core.c @@ -0,0 +1,450 @@ +/* + * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "uverbs.h" +#include "core_priv.h" +#include "rdma_core.h" + +void uverbs_uobject_get(struct ib_uobject *uobject) +{ + kref_get(&uobject->ref); +} + +static void uverbs_uobject_put_ref(struct kref *ref) +{ + struct ib_uobject *uobj = + container_of(ref, struct ib_uobject, ref); + + if (uobj->type->type_class->needs_kfree_rcu) + kfree_rcu(uobj, rcu); + else + kfree(uobj); +} + +void uverbs_uobject_put(struct ib_uobject *uobject) +{ + kref_put(&uobject->ref, uverbs_uobject_put_ref); +} + +static int uverbs_try_lock_object(struct ib_uobject *uobj, bool write) +{ + /* + * When a read is required, we use a positive counter. Each read + * request checks that the value != -1 and increment it. Write + * requires an exclusive access, thus we check that the counter is + * zero (nobody claimed this object) and we set it to -1. + * Releasing a read lock is done by simply decreasing the counter. + * As for writes, since only a single write is permitted, setting + * it to zero is enough for releasing it. + */ + if (!write) + return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ? + -EBUSY : 0; + + /* lock is either WRITE or DESTROY - should be exclusive */ + return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; +} + +static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, + const struct uverbs_obj_type *type) +{ + struct ib_uobject *uobj = kmalloc(type->obj_size, GFP_KERNEL); + + if (!uobj) + return ERR_PTR(-ENOMEM); + /* + * user_handle should be filled by the handler, + * The object is added to the list in the commit stage. + */ + uobj->context = context; + uobj->type = type; + atomic_set(&uobj->usecnt, 0); + kref_init(&uobj->ref); + + return uobj; +} + +static int idr_add_uobj(struct ib_uobject *uobj) +{ + int ret; + + idr_preload(GFP_KERNEL); + spin_lock(&uobj->context->ufile->idr_lock); + + /* + * We start with allocating an idr pointing to NULL. This represents an + * object which isn't initialized yet. We'll replace it later on with + * the real object once we commit. + */ + ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0, + min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT); + if (ret >= 0) + uobj->id = ret; + + spin_unlock(&uobj->context->ufile->idr_lock); + idr_preload_end(); + + return ret < 0 ? ret : 0; +} + +/* + * It only removes it from the uobjects list, uverbs_uobject_put() is still + * required. + */ +static void uverbs_idr_remove_uobj(struct ib_uobject *uobj) +{ + spin_lock(&uobj->context->ufile->idr_lock); + idr_remove(&uobj->context->ufile->idr, uobj->id); + spin_unlock(&uobj->context->ufile->idr_lock); +} + +/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ +static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext, + int id, bool write) +{ + struct ib_uobject *uobj; + + rcu_read_lock(); + /* object won't be released as we're protected in rcu */ + uobj = idr_find(&ucontext->ufile->idr, id); + if (!uobj) { + uobj = ERR_PTR(-ENOENT); + goto free; + } + + uverbs_uobject_get(uobj); +free: + rcu_read_unlock(); + return uobj; +} + +struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext, + int id, bool write) +{ + struct ib_uobject *uobj; + int ret; + + uobj = type->type_class->lookup_get(type, ucontext, id, write); + if (IS_ERR(uobj)) + return uobj; + + if (uobj->type != type) { + ret = -EINVAL; + goto free; + } + + ret = uverbs_try_lock_object(uobj, write); + if (ret) { + WARN(ucontext->cleanup_reason, + "ib_uverbs: Trying to lookup_get while cleanup context\n"); + goto free; + } + + return uobj; +free: + uobj->type->type_class->lookup_put(uobj, write); + uverbs_uobject_put(uobj); + return ERR_PTR(ret); +} + +static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext) +{ + int ret; + struct ib_uobject *uobj; + + uobj = alloc_uobj(ucontext, type); + if (IS_ERR(uobj)) + return uobj; + + ret = idr_add_uobj(uobj); + if (ret) + goto uobj_put; + + ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device, + RDMACG_RESOURCE_HCA_OBJECT); + if (ret) + goto idr_remove; + + return uobj; + +idr_remove: + uverbs_idr_remove_uobj(uobj); +uobj_put: + uverbs_uobject_put(uobj); + return ERR_PTR(ret); +} + +struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext) +{ + return type->type_class->alloc_begin(type, ucontext); +} + +static void uverbs_uobject_add(struct ib_uobject *uobject) +{ + mutex_lock(&uobject->context->uobjects_lock); + list_add(&uobject->list, &uobject->context->uobjects); + mutex_unlock(&uobject->context->uobjects_lock); +} + +static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + const struct uverbs_obj_idr_type *idr_type = + container_of(uobj->type, struct uverbs_obj_idr_type, + type); + int ret = idr_type->destroy_object(uobj, why); + + /* + * We can only fail gracefully if the user requested to destroy the + * object. In the rest of the cases, just remove whatever you can. + */ + if (why == RDMA_REMOVE_DESTROY && ret) + return ret; + + ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, + RDMACG_RESOURCE_HCA_OBJECT); + uverbs_idr_remove_uobj(uobj); + + return ret; +} + +static void lockdep_check(struct ib_uobject *uobj, bool write) +{ +#ifdef CONFIG_LOCKDEP + if (write) + WARN_ON(atomic_read(&uobj->usecnt) > 0); + else + WARN_ON(atomic_read(&uobj->usecnt) == -1); +#endif +} + +static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why, + bool lock) +{ + int ret; + struct ib_ucontext *ucontext = uobj->context; + + ret = uobj->type->type_class->remove_commit(uobj, why); + if (ret && why == RDMA_REMOVE_DESTROY) { + /* We couldn't remove the object, so just unlock the uobject */ + atomic_set(&uobj->usecnt, 0); + uobj->type->type_class->lookup_put(uobj, true); + } else { + if (lock) + mutex_lock(&ucontext->uobjects_lock); + list_del(&uobj->list); + if (lock) + mutex_unlock(&ucontext->uobjects_lock); + /* put the ref we took when we created the object */ + uverbs_uobject_put(uobj); + } + + return ret; +} + +/* This is called only for user requested DESTROY reasons */ +int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) +{ + int ret; + struct ib_ucontext *ucontext = uobj->context; + + /* put the ref count we took at lookup_get */ + uverbs_uobject_put(uobj); + /* Cleanup is running. Calling this should have been impossible */ + if (!down_read_trylock(&ucontext->cleanup_rwsem)) { + WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); + return 0; + } + lockdep_check(uobj, true); + ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY, true); + + up_read(&ucontext->cleanup_rwsem); + return ret; +} + +static void alloc_commit_idr_uobject(struct ib_uobject *uobj) +{ + uverbs_uobject_add(uobj); + spin_lock(&uobj->context->ufile->idr_lock); + /* + * We already allocated this IDR with a NULL object, so + * this shouldn't fail. + */ + WARN_ON(idr_replace(&uobj->context->ufile->idr, + uobj, uobj->id)); + spin_unlock(&uobj->context->ufile->idr_lock); +} + +int rdma_alloc_commit_uobject(struct ib_uobject *uobj) +{ + /* Cleanup is running. Calling this should have been impossible */ + if (!down_read_trylock(&uobj->context->cleanup_rwsem)) { + int ret; + + WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n"); + ret = uobj->type->type_class->remove_commit(uobj, + RDMA_REMOVE_DURING_CLEANUP); + if (ret) + pr_warn("ib_uverbs: cleanup of idr object %d failed\n", + uobj->id); + return ret; + } + + uobj->type->type_class->alloc_commit(uobj); + up_read(&uobj->context->cleanup_rwsem); + + return 0; +} + +static void alloc_abort_idr_uobject(struct ib_uobject *uobj) +{ + uverbs_idr_remove_uobj(uobj); + ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, + RDMACG_RESOURCE_HCA_OBJECT); + uverbs_uobject_put(uobj); +} + +void rdma_alloc_abort_uobject(struct ib_uobject *uobj) +{ + uobj->type->type_class->alloc_abort(uobj); +} + +static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool write) +{ +} + +void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool write) +{ + lockdep_check(uobj, write); + uobj->type->type_class->lookup_put(uobj, write); + /* + * In order to unlock an object, either decrease its usecnt for + * read access or zero it in case of write access. See + * uverbs_try_lock_object for locking schema information. + */ + if (!write) + atomic_dec(&uobj->usecnt); + else + atomic_set(&uobj->usecnt, 0); + + uverbs_uobject_put(uobj); +} + +const struct uverbs_obj_type_class uverbs_idr_class = { + .alloc_begin = alloc_begin_idr_uobject, + .lookup_get = lookup_get_idr_uobject, + .alloc_commit = alloc_commit_idr_uobject, + .alloc_abort = alloc_abort_idr_uobject, + .lookup_put = lookup_put_idr_uobject, + .remove_commit = remove_commit_idr_uobject, + /* + * When we destroy an object, we first just lock it for WRITE and + * actually DESTROY it in the finalize stage. So, the problematic + * scenario is when we just started the finalize stage of the + * destruction (nothing was executed yet). Now, the other thread + * fetched the object for READ access, but it didn't lock it yet. + * The DESTROY thread continues and starts destroying the object. + * When the other thread continue - without the RCU, it would + * access freed memory. However, the rcu_read_lock delays the free + * until the rcu_read_lock of the READ operation quits. Since the + * write lock of the object is still taken by the DESTROY flow, the + * READ operation will get -EBUSY and it'll just bail out. + */ + .needs_kfree_rcu = true, +}; + +void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) +{ + enum rdma_remove_reason reason = device_removed ? + RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE; + unsigned int cur_order = 0; + + ucontext->cleanup_reason = reason; + /* + * Waits for all remove_commit and alloc_commit to finish. Logically, We + * want to hold this forever as the context is going to be destroyed, + * but we'll release it since it causes a "held lock freed" BUG message. + */ + down_write(&ucontext->cleanup_rwsem); + + while (!list_empty(&ucontext->uobjects)) { + struct ib_uobject *obj, *next_obj; + unsigned int next_order = UINT_MAX; + + /* + * This shouldn't run while executing other commands on this + * context. + */ + mutex_lock(&ucontext->uobjects_lock); + list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, + list) + if (obj->type->destroy_order == cur_order) { + int ret; + + /* + * if we hit this WARN_ON, that means we are + * racing with a lookup_get. + */ + WARN_ON(uverbs_try_lock_object(obj, true)); + ret = _rdma_remove_commit_uobject(obj, reason, + false); + if (ret) + pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n", + obj->id, cur_order); + } else { + next_order = min(next_order, + obj->type->destroy_order); + } + mutex_unlock(&ucontext->uobjects_lock); + cur_order = next_order; + } + up_write(&ucontext->cleanup_rwsem); +} + +void uverbs_initialize_ucontext(struct ib_ucontext *ucontext) +{ + ucontext->cleanup_reason = 0; + mutex_init(&ucontext->uobjects_lock); + INIT_LIST_HEAD(&ucontext->uobjects); + init_rwsem(&ucontext->cleanup_rwsem); +} + diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h new file mode 100644 index 000000000000..ab665a6088ac --- /dev/null +++ b/drivers/infiniband/core/rdma_core.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RDMA_CORE_H +#define RDMA_CORE_H + +#include +#include +#include +#include + +/* + * These functions initialize the context and cleanups its uobjects. + * The context has a list of objects which is protected by a mutex + * on the context. initialize_ucontext should be called when we create + * a context. + * cleanup_ucontext removes all uobjects from the context and puts them. + */ +void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed); +void uverbs_initialize_ucontext(struct ib_ucontext *ucontext); + +#endif /* RDMA_CORE_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 319e69106a26..d3efd22943e9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1357,6 +1357,17 @@ struct ib_fmr_attr { struct ib_umem; +enum rdma_remove_reason { + /* Userspace requested uobject deletion. Call could fail */ + RDMA_REMOVE_DESTROY, + /* Context deletion. This call should delete the actual object itself */ + RDMA_REMOVE_CLOSE, + /* Driver is being hot-unplugged. This call should delete the actual object itself */ + RDMA_REMOVE_DRIVER_REMOVE, + /* Context is being cleaned-up, but commit was just completed */ + RDMA_REMOVE_DURING_CLEANUP, +}; + struct ib_rdmacg_object { #ifdef CONFIG_CGROUP_RDMA struct rdma_cgroup *cg; /* owner rdma cgroup */ @@ -1379,6 +1390,13 @@ struct ib_ucontext { struct list_head rwq_ind_tbl_list; int closing; + /* locking the uobjects_list */ + struct mutex uobjects_lock; + struct list_head uobjects; + /* protects cleanup process from other actions */ + struct rw_semaphore cleanup_rwsem; + enum rdma_remove_reason cleanup_reason; + struct pid *tgid; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct rb_root umem_tree; @@ -1409,8 +1427,11 @@ struct ib_uobject { int id; /* index into kernel idr */ struct kref ref; struct rw_semaphore mutex; /* protects .live */ + atomic_t usecnt; /* protects exclusive access */ struct rcu_head rcu; /* kfree_rcu() overhead */ int live; + + const struct uverbs_obj_type *type; }; struct ib_udata { diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h new file mode 100644 index 000000000000..0777e405f22a --- /dev/null +++ b/include/rdma/uverbs_types.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UVERBS_TYPES_ +#define _UVERBS_TYPES_ + +#include +#include + +struct uverbs_obj_type; + +struct uverbs_obj_type_class { + /* + * Get an ib_uobject that corresponds to the given id from ucontext, + * These functions could create or destroy objects if required. + * The action will be finalized only when commit, abort or put fops are + * called. + * The flow of the different actions is: + * [alloc]: Starts with alloc_begin. The handlers logic is than + * executed. If the handler is successful, alloc_commit + * is called and the object is inserted to the repository. + * Once alloc_commit completes the object is visible to + * other threads and userspace. + e Otherwise, alloc_abort is called and the object is + * destroyed. + * [lookup]: Starts with lookup_get which fetches and locks the + * object. After the handler finished using the object, it + * needs to call lookup_put to unlock it. The write flag + * indicates if the object is locked for exclusive access. + * [remove]: Starts with lookup_get with write flag set. This locks + * the object for exclusive access. If the handler code + * completed successfully, remove_commit is called and + * the ib_uobject is removed from the context's uobjects + * repository and put. The object itself is destroyed as + * well. Once remove succeeds new krefs to the object + * cannot be acquired by other threads or userspace and + * the hardware driver is removed from the object. + * Other krefs on the object may still exist. + * If the handler code failed, lookup_put should be + * called. This callback is used when the context + * is destroyed as well (process termination, + * reset flow). + */ + struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext); + void (*alloc_commit)(struct ib_uobject *uobj); + void (*alloc_abort)(struct ib_uobject *uobj); + + struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext, int id, + bool write); + void (*lookup_put)(struct ib_uobject *uobj, bool write); + /* + * Must be called with the write lock held. If successful uobj is + * invalid on return. On failure uobject is left completely + * unchanged + */ + int __must_check (*remove_commit)(struct ib_uobject *uobj, + enum rdma_remove_reason why); + u8 needs_kfree_rcu; +}; + +struct uverbs_obj_type { + const struct uverbs_obj_type_class * const type_class; + size_t obj_size; + unsigned int destroy_order; +}; + +/* + * Objects type classes which support a detach state (object is still alive but + * it's not attached to any context need to make sure: + * (a) no call through to a driver after a detach is called + * (b) detach isn't called concurrently with context_cleanup + */ + +struct uverbs_obj_idr_type { + /* + * In idr based objects, uverbs_obj_type_class points to a generic + * idr operations. In order to specialize the underlying types (e.g. CQ, + * QPs, etc.), we add destroy_object specific callbacks. + */ + struct uverbs_obj_type type; + + /* Free driver resources from the uobject, make the driver uncallable, + * and move the uobject to the detached state. If the object was + * destroyed by the user's request, a failure should leave the uobject + * completely unchanged. + */ + int __must_check (*destroy_object)(struct ib_uobject *uobj, + enum rdma_remove_reason why); +}; + +struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext, + int id, bool write); +void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool write); +struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext); +void rdma_alloc_abort_uobject(struct ib_uobject *uobj); +int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj); +int rdma_alloc_commit_uobject(struct ib_uobject *uobj); + +#endif -- cgit v1.2.3 From 6be60aed126ccd4dfb4a60d1dc2ecec0bca21b2e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 4 Apr 2017 13:31:43 +0300 Subject: IB/core: Add idr based standard types This patch adds the standard idr based types. These types are used in downstream patches in order to initialize, destroy and lookup IB standard objects which are based on idr objects. An idr object requires filling out several parameters. Its op pointer should point to uverbs_idr_ops and its size should be at least the size of ib_uobject. We add a macro to make the type declaration easier. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Reviewed-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/uverbs.h | 5 +- drivers/infiniband/core/uverbs_cmd.c | 16 +- drivers/infiniband/core/uverbs_main.c | 8 +- drivers/infiniband/core/uverbs_std_types.c | 244 +++++++++++++++++++++++++++++ include/rdma/uverbs_std_types.h | 50 ++++++ include/rdma/uverbs_types.h | 14 ++ 7 files changed, 329 insertions(+), 10 deletions(-) create mode 100644 drivers/infiniband/core/uverbs_std_types.c create mode 100644 include/rdma/uverbs_std_types.h diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index d29f910d98c9..6ebd9ad95010 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -30,4 +30,4 @@ ib_umad-y := user_mad.o ib_ucm-y := ucm.o ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ - rdma_core.o + rdma_core.o uverbs_std_types.o diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 6215735fa98e..cf0519dff3a2 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -201,9 +201,12 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); -void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); +int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd, + enum rdma_remove_reason why); int uverbs_dealloc_mw(struct ib_mw *mw); +void ib_uverbs_detach_umcast(struct ib_qp *qp, + struct ib_uqp_object *uobj); struct ib_uverbs_flow_spec { union { diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 03c4f68a88e1..79de69dc9f1c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -958,19 +958,25 @@ out: return ret; } -void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, - struct ib_xrcd *xrcd) +int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, + struct ib_xrcd *xrcd, + enum rdma_remove_reason why) { struct inode *inode; + int ret; inode = xrcd->inode; if (inode && !atomic_dec_and_test(&xrcd->usecnt)) - return; + return 0; - ib_dealloc_xrcd(xrcd); + ret = ib_dealloc_xrcd(xrcd); - if (inode) + if (why == RDMA_REMOVE_DESTROY && ret) + atomic_inc(&xrcd->usecnt); + else if (inode) xrcd_table_delete(dev, inode); + + return ret; } ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index f6812fb6cd0c..e1db6782d0d4 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -201,8 +201,8 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file, spin_unlock_irq(&file->async_file->lock); } -static void ib_uverbs_detach_umcast(struct ib_qp *qp, - struct ib_uqp_object *uobj) +void ib_uverbs_detach_umcast(struct ib_qp *qp, + struct ib_uqp_object *uobj) { struct ib_uverbs_mcast_entry *mcast, *tmp; @@ -331,7 +331,9 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, container_of(uobj, struct ib_uxrcd_object, uobject); idr_remove_uobj(uobj); - ib_uverbs_dealloc_xrcd(file->device, xrcd); + ib_uverbs_dealloc_xrcd(file->device, xrcd, + file->ucontext ? RDMA_REMOVE_CLOSE : + RDMA_REMOVE_DRIVER_REMOVE); kfree(uxrcd); } mutex_unlock(&file->device->xrcd_tree_mutex); diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c new file mode 100644 index 000000000000..a514556139e7 --- /dev/null +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "rdma_core.h" +#include "uverbs.h" + +int uverbs_free_ah(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + return ib_destroy_ah((struct ib_ah *)uobject->object); +} + +int uverbs_free_flow(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + return ib_destroy_flow((struct ib_flow *)uobject->object); +} + +int uverbs_free_mw(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + return uverbs_dealloc_mw((struct ib_mw *)uobject->object); +} + +int uverbs_free_qp(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_qp *qp = uobject->object; + struct ib_uqp_object *uqp = + container_of(uobject, struct ib_uqp_object, uevent.uobject); + int ret; + + if (why == RDMA_REMOVE_DESTROY) { + if (!list_empty(&uqp->mcast_list)) + return -EBUSY; + } else if (qp == qp->real_qp) { + ib_uverbs_detach_umcast(qp, uqp); + } + + ret = ib_destroy_qp(qp); + if (ret && why == RDMA_REMOVE_DESTROY) + return ret; + + if (uqp->uxrcd) + atomic_dec(&uqp->uxrcd->refcnt); + + ib_uverbs_release_uevent(uobject->context->ufile, &uqp->uevent); + return ret; +} + +int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object; + struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; + int ret; + + ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); + if (!ret || why != RDMA_REMOVE_DESTROY) + kfree(ind_tbl); + return ret; +} + +int uverbs_free_wq(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_wq *wq = uobject->object; + struct ib_uwq_object *uwq = + container_of(uobject, struct ib_uwq_object, uevent.uobject); + int ret; + + ret = ib_destroy_wq(wq); + if (!ret || why != RDMA_REMOVE_DESTROY) + ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); + return ret; +} + +int uverbs_free_srq(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_srq *srq = uobject->object; + struct ib_uevent_object *uevent = + container_of(uobject, struct ib_uevent_object, uobject); + enum ib_srq_type srq_type = srq->srq_type; + int ret; + + ret = ib_destroy_srq(srq); + + if (ret && why == RDMA_REMOVE_DESTROY) + return ret; + + if (srq_type == IB_SRQT_XRC) { + struct ib_usrq_object *us = + container_of(uevent, struct ib_usrq_object, uevent); + + atomic_dec(&us->uxrcd->refcnt); + } + + ib_uverbs_release_uevent(uobject->context->ufile, uevent); + return ret; +} + +int uverbs_free_cq(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_cq *cq = uobject->object; + struct ib_uverbs_event_file *ev_file = cq->cq_context; + struct ib_ucq_object *ucq = + container_of(uobject, struct ib_ucq_object, uobject); + int ret; + + ret = ib_destroy_cq(cq); + if (!ret || why != RDMA_REMOVE_DESTROY) + ib_uverbs_release_ucq(uobject->context->ufile, ev_file, ucq); + return ret; +} + +int uverbs_free_mr(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + return ib_dereg_mr((struct ib_mr *)uobject->object); +} + +int uverbs_free_xrcd(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_xrcd *xrcd = uobject->object; + struct ib_uxrcd_object *uxrcd = + container_of(uobject, struct ib_uxrcd_object, uobject); + int ret; + + mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex); + if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt)) + ret = -EBUSY; + else + ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device, + xrcd, why); + mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex); + + return ret; +} + +int uverbs_free_pd(struct ib_uobject *uobject, + enum rdma_remove_reason why) +{ + struct ib_pd *pd = uobject->object; + + if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt)) + return -EBUSY; + + ib_dealloc_pd((struct ib_pd *)uobject->object); + return 0; +} + +const struct uverbs_obj_idr_type uverbs_type_attrs_cq = { + .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0), + .destroy_object = uverbs_free_cq, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_qp = { + .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0), + .destroy_object = uverbs_free_qp, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_mw = { + .type = UVERBS_TYPE_ALLOC_IDR(0), + .destroy_object = uverbs_free_mw, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_mr = { + /* 1 is used in order to free the MR after all the MWs */ + .type = UVERBS_TYPE_ALLOC_IDR(1), + .destroy_object = uverbs_free_mr, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_srq = { + .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0), + .destroy_object = uverbs_free_srq, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_ah = { + .type = UVERBS_TYPE_ALLOC_IDR(0), + .destroy_object = uverbs_free_ah, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_flow = { + .type = UVERBS_TYPE_ALLOC_IDR(0), + .destroy_object = uverbs_free_flow, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_wq = { + .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0), + .destroy_object = uverbs_free_wq, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table = { + .type = UVERBS_TYPE_ALLOC_IDR(0), + .destroy_object = uverbs_free_rwq_ind_tbl, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd = { + .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0), + .destroy_object = uverbs_free_xrcd, +}; + +const struct uverbs_obj_idr_type uverbs_type_attrs_pd = { + /* 2 is used in order to free the PD after MRs */ + .type = UVERBS_TYPE_ALLOC_IDR(2), + .destroy_object = uverbs_free_pd, +}; diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h new file mode 100644 index 000000000000..2edb77648986 --- /dev/null +++ b/include/rdma/uverbs_std_types.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UVERBS_STD_TYPES__ +#define _UVERBS_STD_TYPES__ + +#include + +extern const struct uverbs_obj_idr_type uverbs_type_attrs_cq; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_qp; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_wq; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_srq; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_ah; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_flow; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_mr; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_mw; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_pd; +extern const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd; +#endif + diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 0777e405f22a..66368b5a3006 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -129,4 +129,18 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj); int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj); int rdma_alloc_commit_uobject(struct ib_uobject *uobj); +extern const struct uverbs_obj_type_class uverbs_idr_class; + +#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ + sizeof(char)) +#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order) \ + { \ + .destroy_order = _order, \ + .type_class = &uverbs_idr_class, \ + .obj_size = (_size) + \ + UVERBS_BUILD_BUG_ON((_size) < \ + sizeof(struct ib_uobject)), \ + } +#define UVERBS_TYPE_ALLOC_IDR(_order) \ + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order) #endif -- cgit v1.2.3 From fd3c7904db6e05043398aee5c1448682acfb025b Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 4 Apr 2017 13:31:44 +0300 Subject: IB/core: Change idr objects to use the new schema This changes only the handlers which deals with idr based objects to use the new idr allocation, fetching and destruction schema. This patch consists of the following changes: (1) Allocation, fetching and destruction is done via idr ops. (2) Context initializing and release is done through uverbs_initialize_ucontext and uverbs_cleanup_ucontext. (3) Ditching the live flag. Mostly, this is pretty straight forward. The only place that is a bit trickier is in ib_uverbs_open_qp. Commit [1] added code to check whether the uobject is already live and initialized. This mostly happens because of a race between open_qp and events. We delayed assigning the uobject's pointer in order to eliminate this race without using the live variable. [1] commit a040f95dc819 ("IB/core: Fix XRC race condition in ib_uverbs_open_qp") Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/rdma_core.h | 15 + drivers/infiniband/core/uverbs.h | 2 - drivers/infiniband/core/uverbs_cmd.c | 1311 ++++++++------------------------- drivers/infiniband/core/uverbs_main.c | 142 +--- include/rdma/ib_verbs.h | 13 - include/rdma/uverbs_std_types.h | 63 ++ 6 files changed, 402 insertions(+), 1144 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index ab665a6088ac..0247bb5e3dd3 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -52,4 +52,19 @@ void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed); void uverbs_initialize_ucontext(struct ib_ucontext *ucontext); +/* + * uverbs_uobject_get is called in order to increase the reference count on + * an uobject. This is useful when a handler wants to keep the uobject's memory + * alive, regardless if this uobject is still alive in the context's objects + * repository. Objects are put via uverbs_uobject_put. + */ +void uverbs_uobject_get(struct ib_uobject *uobject); + +/* + * In order to indicate we no longer needs this uobject, uverbs_uobject_put + * is called. When the reference count is decreased, the uobject is freed. + * For example, this is used when attaching a completion channel to a CQ. + */ +void uverbs_uobject_put(struct ib_uobject *uobject); + #endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index cf0519dff3a2..3660278b62b0 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -180,8 +180,6 @@ struct ib_ucq_object { u32 async_events_reported; }; -void idr_remove_uobj(struct ib_uobject *uobj); - struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, struct ib_device *ib_dev, int is_async); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 79de69dc9f1c..2f258aaec7b9 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -40,269 +40,13 @@ #include +#include +#include +#include "rdma_core.h" + #include "uverbs.h" #include "core_priv.h" -struct uverbs_lock_class { - struct lock_class_key key; - char name[16]; -}; - -static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; -static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" }; -static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" }; -static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" }; -static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; -static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; -static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; -static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; -static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; -static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" }; -static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" }; - -/* - * The ib_uobject locking scheme is as follows: - * - * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it - * needs to be held during all idr write operations. When an object is - * looked up, a reference must be taken on the object's kref before - * dropping this lock. For read operations, the rcu_read_lock() - * and rcu_write_lock() but similarly the kref reference is grabbed - * before the rcu_read_unlock(). - * - * - Each object also has an rwsem. This rwsem must be held for - * reading while an operation that uses the object is performed. - * For example, while registering an MR, the associated PD's - * uobject.mutex must be held for reading. The rwsem must be held - * for writing while initializing or destroying an object. - * - * - In addition, each object has a "live" flag. If this flag is not - * set, then lookups of the object will fail even if it is found in - * the idr. This handles a reader that blocks and does not acquire - * the rwsem until after the object is destroyed. The destroy - * operation will set the live flag to 0 and then drop the rwsem; - * this will allow the reader to acquire the rwsem, see that the - * live flag is 0, and then drop the rwsem and its reference to - * object. The underlying storage will not be freed until the last - * reference to the object is dropped. - */ - -static void init_uobj(struct ib_uobject *uobj, u64 user_handle, - struct ib_ucontext *context, struct uverbs_lock_class *c) -{ - uobj->user_handle = user_handle; - uobj->context = context; - kref_init(&uobj->ref); - init_rwsem(&uobj->mutex); - lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name); - uobj->live = 0; -} - -static void release_uobj(struct kref *kref) -{ - kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu); -} - -static void put_uobj(struct ib_uobject *uobj) -{ - kref_put(&uobj->ref, release_uobj); -} - -static void put_uobj_read(struct ib_uobject *uobj) -{ - up_read(&uobj->mutex); - put_uobj(uobj); -} - -static void put_uobj_write(struct ib_uobject *uobj) -{ - up_write(&uobj->mutex); - put_uobj(uobj); -} - -static int idr_add_uobj(struct ib_uobject *uobj) -{ - int ret; - - idr_preload(GFP_KERNEL); - spin_lock(&uobj->context->ufile->idr_lock); - - ret = idr_alloc(&uobj->context->ufile->idr, uobj, 0, 0, GFP_NOWAIT); - if (ret >= 0) - uobj->id = ret; - - spin_unlock(&uobj->context->ufile->idr_lock); - idr_preload_end(); - - return ret < 0 ? ret : 0; -} - -void idr_remove_uobj(struct ib_uobject *uobj) -{ - spin_lock(&uobj->context->ufile->idr_lock); - idr_remove(&uobj->context->ufile->idr, uobj->id); - spin_unlock(&uobj->context->ufile->idr_lock); -} - -static struct ib_uobject *__idr_get_uobj(int id, struct ib_ucontext *context) -{ - struct ib_uobject *uobj; - - rcu_read_lock(); - uobj = idr_find(&context->ufile->idr, id); - if (uobj) { - if (uobj->context == context) - kref_get(&uobj->ref); - else - uobj = NULL; - } - rcu_read_unlock(); - - return uobj; -} - -static struct ib_uobject *idr_read_uobj(int id, struct ib_ucontext *context, - int nested) -{ - struct ib_uobject *uobj; - - uobj = __idr_get_uobj(id, context); - if (!uobj) - return NULL; - - if (nested) - down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING); - else - down_read(&uobj->mutex); - if (!uobj->live) { - put_uobj_read(uobj); - return NULL; - } - - return uobj; -} - -static struct ib_uobject *idr_write_uobj(int id, struct ib_ucontext *context) -{ - struct ib_uobject *uobj; - - uobj = __idr_get_uobj(id, context); - if (!uobj) - return NULL; - - down_write(&uobj->mutex); - if (!uobj->live) { - put_uobj_write(uobj); - return NULL; - } - - return uobj; -} - -static void *idr_read_obj(int id, struct ib_ucontext *context, - int nested) -{ - struct ib_uobject *uobj; - - uobj = idr_read_uobj(id, context, nested); - return uobj ? uobj->object : NULL; -} - -static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) -{ - return idr_read_obj(pd_handle, context, 0); -} - -static void put_pd_read(struct ib_pd *pd) -{ - put_uobj_read(pd->uobject); -} - -static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested) -{ - return idr_read_obj(cq_handle, context, nested); -} - -static void put_cq_read(struct ib_cq *cq) -{ - put_uobj_read(cq->uobject); -} - -static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) -{ - return idr_read_obj(ah_handle, context, 0); -} - -static void put_ah_read(struct ib_ah *ah) -{ - put_uobj_read(ah->uobject); -} - -static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) -{ - return idr_read_obj(qp_handle, context, 0); -} - -static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context) -{ - return idr_read_obj(wq_handle, context, 0); -} - -static void put_wq_read(struct ib_wq *wq) -{ - put_uobj_read(wq->uobject); -} - -static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle, - struct ib_ucontext *context) -{ - return idr_read_obj(ind_table_handle, context, 0); -} - -static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table) -{ - put_uobj_read(ind_table->uobject); -} - -static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) -{ - struct ib_uobject *uobj; - - uobj = idr_write_uobj(qp_handle, context); - return uobj ? uobj->object : NULL; -} - -static void put_qp_read(struct ib_qp *qp) -{ - put_uobj_read(qp->uobject); -} - -static void put_qp_write(struct ib_qp *qp) -{ - put_uobj_write(qp->uobject); -} - -static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) -{ - return idr_read_obj(srq_handle, context, 0); -} - -static void put_srq_read(struct ib_srq *srq) -{ - put_uobj_read(srq->uobject); -} - -static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, - struct ib_uobject **uobj) -{ - *uobj = idr_read_uobj(xrcd_handle, context, 0); - return *uobj ? (*uobj)->object : NULL; -} - -static void put_xrcd_read(struct ib_uobject *uobj) -{ - put_uobj_read(uobj); -} ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, @@ -347,17 +91,8 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ucontext->cg_obj = cg_obj; /* ufile is required when some objects are released */ ucontext->ufile = file; - INIT_LIST_HEAD(&ucontext->pd_list); - INIT_LIST_HEAD(&ucontext->mr_list); - INIT_LIST_HEAD(&ucontext->mw_list); - INIT_LIST_HEAD(&ucontext->cq_list); - INIT_LIST_HEAD(&ucontext->qp_list); - INIT_LIST_HEAD(&ucontext->srq_list); - INIT_LIST_HEAD(&ucontext->ah_list); - INIT_LIST_HEAD(&ucontext->wq_list); - INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list); - INIT_LIST_HEAD(&ucontext->xrcd_list); - INIT_LIST_HEAD(&ucontext->rule_list); + uverbs_initialize_ucontext(ucontext); + rcu_read_lock(); ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); rcu_read_unlock(); @@ -564,19 +299,9 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, 0, file->ucontext, &pd_lock_class); - ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) { - kfree(uobj); - return ret; - } - - down_write(&uobj->mutex); + uobj = uobj_alloc(uobj_get_type(pd), file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata); if (IS_ERR(pd)) { @@ -590,10 +315,6 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, atomic_set(&pd->usecnt, 0); uobj->object = pd; - ret = idr_add_uobj(uobj); - if (ret) - goto err_idr; - memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; @@ -603,25 +324,15 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, goto err_copy; } - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->pd_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + uobj_alloc_commit(uobj); return in_len; err_copy: - idr_remove_uobj(uobj); - -err_idr: ib_dealloc_pd(pd); err: - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - put_uobj_write(uobj); + uobj_alloc_abort(uobj); return ret; } @@ -632,45 +343,19 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, { struct ib_uverbs_dealloc_pd cmd; struct ib_uobject *uobj; - struct ib_pd *pd; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(cmd.pd_handle, file->ucontext); - if (!uobj) - return -EINVAL; - pd = uobj->object; + uobj = uobj_get_write(uobj_get_type(pd), cmd.pd_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - if (atomic_read(&pd->usecnt)) { - ret = -EBUSY; - goto err_put; - } + ret = uobj_remove_commit(uobj); - ret = pd->device->dealloc_pd(uobj->object); - WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); - if (ret) - goto err_put; - - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - uobj->live = 0; - put_uobj_write(uobj); - - idr_remove_uobj(uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; - -err_put: - put_uobj_write(uobj); - return ret; + return ret ?: in_len; } struct xrcd_table_entry { @@ -807,16 +492,13 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } } - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) { - ret = -ENOMEM; + obj = (struct ib_uxrcd_object *)uobj_alloc(uobj_get_type(xrcd), + file->ucontext); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); goto err_tree_mutex_unlock; } - init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class); - - down_write(&obj->uobject.mutex); - if (!xrcd) { xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata); if (IS_ERR(xrcd)) { @@ -834,10 +516,6 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, atomic_set(&obj->refcnt, 0); obj->uobject.object = xrcd; - ret = idr_add_uobj(&obj->uobject); - if (ret) - goto err_idr; - memset(&resp, 0, sizeof resp); resp.xrcd_handle = obj->uobject.id; @@ -846,7 +524,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, /* create new inode/xrcd table entry */ ret = xrcd_table_insert(file->device, inode, xrcd); if (ret) - goto err_insert_xrcd; + goto err_dealloc_xrcd; } atomic_inc(&xrcd->usecnt); } @@ -860,12 +538,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, if (f.file) fdput(f); - mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); - mutex_unlock(&file->mutex); - - obj->uobject.live = 1; - up_write(&obj->uobject.mutex); + uobj_alloc_commit(&obj->uobject); mutex_unlock(&file->device->xrcd_tree_mutex); return in_len; @@ -877,14 +550,11 @@ err_copy: atomic_dec(&xrcd->usecnt); } -err_insert_xrcd: - idr_remove_uobj(&obj->uobject); - -err_idr: +err_dealloc_xrcd: ib_dealloc_xrcd(xrcd); err: - put_uobj_write(&obj->uobject); + uobj_alloc_abort(&obj->uobject); err_tree_mutex_unlock: if (f.file) @@ -902,60 +572,20 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, { struct ib_uverbs_close_xrcd cmd; struct ib_uobject *uobj; - struct ib_xrcd *xrcd = NULL; - struct inode *inode = NULL; - struct ib_uxrcd_object *obj; - int live; int ret = 0; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&file->device->xrcd_tree_mutex); - uobj = idr_write_uobj(cmd.xrcd_handle, file->ucontext); - if (!uobj) { - ret = -EINVAL; - goto out; - } - - xrcd = uobj->object; - inode = xrcd->inode; - obj = container_of(uobj, struct ib_uxrcd_object, uobject); - if (atomic_read(&obj->refcnt)) { - put_uobj_write(uobj); - ret = -EBUSY; - goto out; - } - - if (!inode || atomic_dec_and_test(&xrcd->usecnt)) { - ret = ib_dealloc_xrcd(uobj->object); - if (!ret) - uobj->live = 0; + uobj = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle, + file->ucontext); + if (IS_ERR(uobj)) { + mutex_unlock(&file->device->xrcd_tree_mutex); + return PTR_ERR(uobj); } - live = uobj->live; - if (inode && ret) - atomic_inc(&xrcd->usecnt); - - put_uobj_write(uobj); - - if (ret) - goto out; - - if (inode && !live) - xrcd_table_delete(file->device, inode); - - idr_remove_uobj(uobj); - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - ret = in_len; - -out: - mutex_unlock(&file->device->xrcd_tree_mutex); - return ret; + ret = uobj_remove_commit(uobj); + return ret ?: in_len; } int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, @@ -1009,14 +639,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (ret) return ret; - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, 0, file->ucontext, &mr_lock_class); - down_write(&uobj->mutex); + uobj = uobj_alloc(uobj_get_type(mr), file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_free; @@ -1030,10 +657,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, goto err_put; } } - ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) - goto err_charge; mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, cmd.access_flags, &udata); @@ -1048,9 +671,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, atomic_inc(&pd->usecnt); uobj->object = mr; - ret = idr_add_uobj(uobj); - if (ret) - goto err_unreg; memset(&resp, 0, sizeof resp); resp.lkey = mr->lkey; @@ -1063,32 +683,20 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, goto err_copy; } - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->mr_list); - mutex_unlock(&file->mutex); + uobj_put_obj_read(pd); - uobj->live = 1; - - up_write(&uobj->mutex); + uobj_alloc_commit(uobj); return in_len; err_copy: - idr_remove_uobj(uobj); - -err_unreg: ib_dereg_mr(mr); err_put: - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - -err_charge: - put_pd_read(pd); + uobj_put_obj_read(pd); err_free: - put_uobj_write(uobj); + uobj_alloc_abort(uobj); return ret; } @@ -1124,10 +732,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) return -EINVAL; - uobj = idr_write_uobj(cmd.mr_handle, file->ucontext); - - if (!uobj) - return -EINVAL; + uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); mr = uobj->object; @@ -1138,7 +746,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, } if (cmd.flags & IB_MR_REREG_PD) { - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto put_uobjs; @@ -1171,11 +779,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, put_uobj_pd: if (cmd.flags & IB_MR_REREG_PD) - put_pd_read(pd); + uobj_put_obj_read(pd); put_uobjs: - - put_uobj_write(mr->uobject); + uobj_put_write(uobj); return ret; } @@ -1186,38 +793,20 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_dereg_mr cmd; - struct ib_mr *mr; struct ib_uobject *uobj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(cmd.mr_handle, file->ucontext); - if (!uobj) - return -EINVAL; - - mr = uobj->object; - - ret = ib_dereg_mr(mr); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + uobj = uobj_get_write(uobj_get_type(mr), cmd.mr_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - if (ret) - return ret; + ret = uobj_remove_commit(uobj); - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - idr_remove_uobj(uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; + return ret ?: in_len; } ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, @@ -1239,14 +828,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) - return -ENOMEM; + uobj = uobj_alloc(uobj_get_type(mw), file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - init_uobj(uobj, 0, file->ucontext, &mw_lock_class); - down_write(&uobj->mutex); - - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_free; @@ -1257,11 +843,6 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); - ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) - goto err_charge; - mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata); if (IS_ERR(mw)) { ret = PTR_ERR(mw); @@ -1274,9 +855,6 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, atomic_inc(&pd->usecnt); uobj->object = mw; - ret = idr_add_uobj(uobj); - if (ret) - goto err_unalloc; memset(&resp, 0, sizeof(resp)); resp.rkey = mw->rkey; @@ -1288,32 +866,17 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, goto err_copy; } - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->mw_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + uobj_put_obj_read(pd); + uobj_alloc_commit(uobj); return in_len; err_copy: - idr_remove_uobj(uobj); - -err_unalloc: uverbs_dealloc_mw(mw); - err_put: - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - -err_charge: - put_pd_read(pd); - + uobj_put_obj_read(pd); err_free: - put_uobj_write(uobj); + uobj_alloc_abort(uobj); return ret; } @@ -1323,38 +886,19 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_dealloc_mw cmd; - struct ib_mw *mw; struct ib_uobject *uobj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = idr_write_uobj(cmd.mw_handle, file->ucontext); - if (!uobj) - return -EINVAL; - - mw = uobj->object; + uobj = uobj_get_write(uobj_get_type(mw), cmd.mw_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - ret = uverbs_dealloc_mw(mw); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) - return ret; - - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - idr_remove_uobj(uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; + ret = uobj_remove_commit(uobj); + return ret ?: in_len; } ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, @@ -1418,12 +962,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (cmd->comp_vector >= file->device->num_comp_vectors) return ERR_PTR(-EINVAL); - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return ERR_PTR(-ENOMEM); - - init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class); - down_write(&obj->uobject.mutex); + obj = (struct ib_ucq_object *)uobj_alloc(uobj_get_type(cq), + file->ucontext); + if (IS_ERR(obj)) + return obj; if (cmd->comp_channel >= 0) { ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel); @@ -1433,6 +975,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, } } + obj->uobject.user_handle = cmd->user_handle; obj->uverbs_file = file; obj->comp_events_reported = 0; obj->async_events_reported = 0; @@ -1445,13 +988,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) attr.flags = cmd->flags; - ret = ib_rdmacg_try_charge(&obj->uobject.cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) - goto err_charge; - - cq = ib_dev->create_cq(ib_dev, &attr, - file->ucontext, uhw); + cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; @@ -1465,10 +1002,6 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, atomic_set(&cq->usecnt, 0); obj->uobject.object = cq; - ret = idr_add_uobj(&obj->uobject); - if (ret) - goto err_free; - memset(&resp, 0, sizeof resp); resp.base.cq_handle = obj->uobject.id; resp.base.cqe = cq->cqe; @@ -1480,32 +1013,19 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (ret) goto err_cb; - mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->cq_list); - mutex_unlock(&file->mutex); - - obj->uobject.live = 1; - - up_write(&obj->uobject.mutex); + uobj_alloc_commit(&obj->uobject); return obj; err_cb: - idr_remove_uobj(&obj->uobject); - -err_free: ib_destroy_cq(cq); err_file: - ib_rdmacg_uncharge(&obj->uobject.cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - -err_charge: if (ev_file) ib_uverbs_release_ucq(file, ev_file, obj); err: - put_uobj_write(&obj->uobject); + uobj_alloc_abort(&obj->uobject); return ERR_PTR(ret); } @@ -1628,7 +1148,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); if (!cq) return -EINVAL; @@ -1643,7 +1163,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, ret = -EFAULT; out: - put_cq_read(cq); + uobj_put_obj_read(cq); return ret ? ret : in_len; } @@ -1690,7 +1210,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); if (!cq) return -EINVAL; @@ -1722,7 +1242,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, ret = in_len; out_put: - put_cq_read(cq); + uobj_put_obj_read(cq); return ret; } @@ -1737,14 +1257,14 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); if (!cq) return -EINVAL; ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - put_cq_read(cq); + uobj_put_obj_read(cq); return in_len; } @@ -1765,37 +1285,32 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(cmd.cq_handle, file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uobj_get_write(uobj_get_type(cq), cmd.cq_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + + /* + * Make sure we don't free the memory in remove_commit as we still + * needs the uobject memory to create the response. + */ + uverbs_uobject_get(uobj); cq = uobj->object; ev_file = cq->cq_context; obj = container_of(cq->uobject, struct ib_ucq_object, uobject); - ret = ib_destroy_cq(cq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + memset(&resp, 0, sizeof(resp)); - if (ret) + ret = uobj_remove_commit(uobj); + if (ret) { + uverbs_uobject_put(uobj); return ret; + } - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - idr_remove_uobj(uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_ucq(file, ev_file, obj); - - memset(&resp, 0, sizeof resp); resp.comp_events_reported = obj->comp_events_reported; resp.async_events_reported = obj->async_events_reported; - put_uobj(uobj); - + uverbs_uobject_put(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) return -EFAULT; @@ -1817,7 +1332,7 @@ static int create_qp(struct ib_uverbs_file *file, struct ib_device *device; struct ib_pd *pd = NULL; struct ib_xrcd *xrcd = NULL; - struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_uobject *xrcd_uobj = ERR_PTR(-ENOENT); struct ib_cq *scq = NULL, *rcq = NULL; struct ib_srq *srq = NULL; struct ib_qp *qp; @@ -1831,18 +1346,19 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - obj = kzalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; + obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp), + file->ucontext); + if (IS_ERR(obj)) + return PTR_ERR(obj); + obj->uxrcd = NULL; + obj->uevent.uobject.user_handle = cmd->user_handle; - init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, - &qp_lock_class); - down_write(&obj->uevent.uobject.mutex); if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + sizeof(cmd->rwq_ind_tbl_handle) && (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { - ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle, - file->ucontext); + ind_tbl = uobj_get_obj_read(rwq_ind_table, + cmd->rwq_ind_tbl_handle, + file->ucontext); if (!ind_tbl) { ret = -EINVAL; goto err_put; @@ -1866,8 +1382,15 @@ static int create_qp(struct ib_uverbs_file *file, has_sq = false; if (cmd->qp_type == IB_QPT_XRC_TGT) { - xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, - &xrcd_uobj); + xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->pd_handle, + file->ucontext); + + if (IS_ERR(xrcd_uobj)) { + ret = -EINVAL; + goto err_put; + } + + xrcd = (struct ib_xrcd *)xrcd_uobj->object; if (!xrcd) { ret = -EINVAL; goto err_put; @@ -1879,8 +1402,8 @@ static int create_qp(struct ib_uverbs_file *file, cmd->max_recv_sge = 0; } else { if (cmd->is_srq) { - srq = idr_read_srq(cmd->srq_handle, - file->ucontext); + srq = uobj_get_obj_read(srq, cmd->srq_handle, + file->ucontext); if (!srq || srq->srq_type != IB_SRQT_BASIC) { ret = -EINVAL; goto err_put; @@ -1889,8 +1412,8 @@ static int create_qp(struct ib_uverbs_file *file, if (!ind_tbl) { if (cmd->recv_cq_handle != cmd->send_cq_handle) { - rcq = idr_read_cq(cmd->recv_cq_handle, - file->ucontext, 0); + rcq = uobj_get_obj_read(cq, cmd->recv_cq_handle, + file->ucontext); if (!rcq) { ret = -EINVAL; goto err_put; @@ -1900,10 +1423,11 @@ static int create_qp(struct ib_uverbs_file *file, } if (has_sq) - scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); + scq = uobj_get_obj_read(cq, cmd->send_cq_handle, + file->ucontext); if (!ind_tbl) rcq = rcq ?: scq; - pd = idr_read_pd(cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext); if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; @@ -1955,11 +1479,6 @@ static int create_qp(struct ib_uverbs_file *file, goto err_put; } - ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, device, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) - goto err_put; - if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else @@ -1967,7 +1486,7 @@ static int create_qp(struct ib_uverbs_file *file, if (IS_ERR(qp)) { ret = PTR_ERR(qp); - goto err_create; + goto err_put; } if (cmd->qp_type != IB_QPT_XRC_TGT) { @@ -1995,9 +1514,6 @@ static int create_qp(struct ib_uverbs_file *file, qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; - ret = idr_add_uobj(&obj->uevent.uobject); - if (ret) - goto err_destroy; memset(&resp, 0, sizeof resp); resp.base.qpn = qp->qp_num; @@ -2019,54 +1535,41 @@ static int create_qp(struct ib_uverbs_file *file, obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - put_xrcd_read(xrcd_uobj); + uobj_put_read(xrcd_uobj); } if (pd) - put_pd_read(pd); + uobj_put_obj_read(pd); if (scq) - put_cq_read(scq); + uobj_put_obj_read(scq); if (rcq && rcq != scq) - put_cq_read(rcq); + uobj_put_obj_read(rcq); if (srq) - put_srq_read(srq); + uobj_put_obj_read(srq); if (ind_tbl) - put_rwq_indirection_table_read(ind_tbl); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; + uobj_put_obj_read(ind_tbl); - up_write(&obj->uevent.uobject.mutex); + uobj_alloc_commit(&obj->uevent.uobject); return 0; err_cb: - idr_remove_uobj(&obj->uevent.uobject); - -err_destroy: ib_destroy_qp(qp); -err_create: - ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, device, - RDMACG_RESOURCE_HCA_OBJECT); - err_put: - if (xrcd) - put_xrcd_read(xrcd_uobj); + if (!IS_ERR(xrcd_uobj)) + uobj_put_read(xrcd_uobj); if (pd) - put_pd_read(pd); + uobj_put_obj_read(pd); if (scq) - put_cq_read(scq); + uobj_put_obj_read(scq); if (rcq && rcq != scq) - put_cq_read(rcq); + uobj_put_obj_read(rcq); if (srq) - put_srq_read(srq); + uobj_put_obj_read(srq); if (ind_tbl) - put_rwq_indirection_table_read(ind_tbl); + uobj_put_obj_read(ind_tbl); - put_uobj_write(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject); return ret; } @@ -2202,17 +1705,22 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; + obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp), + file->ucontext); + if (IS_ERR(obj)) + return PTR_ERR(obj); - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); - down_write(&obj->uevent.uobject.mutex); + xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd.pd_handle, + file->ucontext); + if (IS_ERR(xrcd_uobj)) { + ret = -EINVAL; + goto err_put; + } - xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + xrcd = (struct ib_xrcd *)xrcd_uobj->object; if (!xrcd) { ret = -EINVAL; - goto err_put; + goto err_xrcd; } attr.event_handler = ib_uverbs_qp_event_handler; @@ -2227,15 +1735,11 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, qp = ib_open_qp(xrcd, &attr); if (IS_ERR(qp)) { ret = PTR_ERR(qp); - goto err_put; + goto err_xrcd; } - qp->uobject = &obj->uevent.uobject; - obj->uevent.uobject.object = qp; - ret = idr_add_uobj(&obj->uevent.uobject); - if (ret) - goto err_destroy; + obj->uevent.uobject.user_handle = cmd.user_handle; memset(&resp, 0, sizeof resp); resp.qpn = qp->qp_num; @@ -2244,32 +1748,25 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_remove; + goto err_destroy; } obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - put_xrcd_read(xrcd_uobj); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); - mutex_unlock(&file->mutex); + qp->uobject = &obj->uevent.uobject; + uobj_put_read(xrcd_uobj); - obj->uevent.uobject.live = 1; - up_write(&obj->uevent.uobject.mutex); + uobj_alloc_commit(&obj->uevent.uobject); return in_len; -err_remove: - idr_remove_uobj(&obj->uevent.uobject); - err_destroy: ib_destroy_qp(qp); - +err_xrcd: + uobj_put_read(xrcd_uobj); err_put: - put_xrcd_read(xrcd_uobj); - put_uobj_write(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject); return ret; } @@ -2295,7 +1792,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, goto out; } - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); if (!qp) { ret = -EINVAL; goto out; @@ -2303,7 +1800,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); - put_qp_read(qp); + uobj_put_obj_read(qp); if (ret) goto out; @@ -2399,7 +1896,7 @@ static int modify_qp(struct ib_uverbs_file *file, if (!attr) return -ENOMEM; - qp = idr_read_qp(cmd->base.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd->base.qp_handle, file->ucontext); if (!qp) { ret = -EINVAL; goto out; @@ -2471,7 +1968,7 @@ static int modify_qp(struct ib_uverbs_file *file, } release_qp: - put_qp_read(qp); + uobj_put_obj_read(qp); out: kfree(attr); @@ -2558,42 +2055,27 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); - uobj = idr_write_uobj(cmd.qp_handle, file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uobj_get_write(uobj_get_type(qp), cmd.qp_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + qp = uobj->object; obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); + /* + * Make sure we don't free the memory in remove_commit as we still + * needs the uobject memory to create the response. + */ + uverbs_uobject_get(uobj); - if (!list_empty(&obj->mcast_list)) { - put_uobj_write(uobj); - return -EBUSY; - } - - ret = ib_destroy_qp(qp); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) + ret = uobj_remove_commit(uobj); + if (ret) { + uverbs_uobject_put(uobj); return ret; - - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - if (obj->uxrcd) - atomic_dec(&obj->uxrcd->refcnt); - - idr_remove_uobj(uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_uevent(file, &obj->uevent); + } resp.events_reported = obj->uevent.events_reported; - - put_uobj(uobj); + uverbs_uobject_put(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) @@ -2637,7 +2119,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, if (!user_wr) return -ENOMEM; - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); if (!qp) goto out; @@ -2673,7 +2155,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, goto out_put; } - ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); + ud->ah = uobj_get_obj_read(ah, user_wr->wr.ud.ah, + file->ucontext); if (!ud->ah) { kfree(ud); ret = -EINVAL; @@ -2780,11 +2263,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, ret = -EFAULT; out_put: - put_qp_read(qp); + uobj_put_obj_read(qp); while (wr) { if (is_ud && ud_wr(wr)->ah) - put_ah_read(ud_wr(wr)->ah); + uobj_put_obj_read(ud_wr(wr)->ah); next = wr->next; kfree(wr); wr = next; @@ -2901,21 +2384,21 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); if (!qp) goto out; resp.bad_wr = 0; ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); - put_qp_read(qp); - - if (ret) + uobj_put_obj_read(qp); + if (ret) { for (next = wr; next; next = next->next) { ++resp.bad_wr; if (next == bad_wr) break; } + } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) @@ -2951,14 +2434,14 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, if (IS_ERR(wr)) return PTR_ERR(wr); - srq = idr_read_srq(cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext); if (!srq) goto out; resp.bad_wr = 0; ret = srq->device->post_srq_recv(srq, wr, &bad_wr); - put_srq_read(srq); + uobj_put_obj_read(srq); if (ret) for (next = wr; next; next = next->next) { @@ -3005,14 +2488,11 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, (unsigned long)cmd.response + sizeof(resp), in_len - sizeof(cmd), out_len - sizeof(resp)); - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class); - down_write(&uobj->mutex); + uobj = uobj_alloc(uobj_get_type(ah), file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err; @@ -3031,28 +2511,20 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, memset(&attr.dmac, 0, sizeof(attr.dmac)); memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); - ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) - goto err_charge; - ah = pd->device->create_ah(pd, &attr, &udata); if (IS_ERR(ah)) { ret = PTR_ERR(ah); - goto err_create; + goto err_put; } ah->device = pd->device; ah->pd = pd; atomic_inc(&pd->usecnt); ah->uobject = uobj; + uobj->user_handle = cmd.user_handle; uobj->object = ah; - ret = idr_add_uobj(uobj); - if (ret) - goto err_destroy; - resp.ah_handle = uobj->id; if (copy_to_user((void __user *) (unsigned long) cmd.response, @@ -3061,32 +2533,19 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, goto err_copy; } - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->ah_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + uobj_put_obj_read(pd); + uobj_alloc_commit(uobj); return in_len; err_copy: - idr_remove_uobj(uobj); - -err_destroy: ib_destroy_ah(ah); -err_create: - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - -err_charge: - put_pd_read(pd); +err_put: + uobj_put_obj_read(pd); err: - put_uobj_write(uobj); + uobj_alloc_abort(uobj); return ret; } @@ -3095,37 +2554,19 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_ah cmd; - struct ib_ah *ah; struct ib_uobject *uobj; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(cmd.ah_handle, file->ucontext); - if (!uobj) - return -EINVAL; - ah = uobj->object; - - ret = ib_destroy_ah(ah); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) - return ret; - - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - idr_remove_uobj(uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); + uobj = uobj_get_write(uobj_get_type(ah), cmd.ah_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - put_uobj(uobj); - - return in_len; + ret = uobj_remove_commit(uobj); + return ret ?: in_len; } ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, @@ -3142,7 +2583,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = idr_write_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; @@ -3171,7 +2612,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, kfree(mcast); out_put: - put_qp_write(qp); + uobj_put_obj_read(qp); return ret ? ret : in_len; } @@ -3190,16 +2631,16 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = idr_write_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; + obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); if (ret) goto out_put; - obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); - list_for_each_entry(mcast, &obj->mcast_list, list) if (cmd.mlid == mcast->lid && !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { @@ -3209,8 +2650,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, } out_put: - put_qp_write(qp); - + uobj_put_obj_read(qp); return ret ? ret : in_len; } @@ -3402,20 +2842,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - obj = kmalloc(sizeof(*obj), GFP_KERNEL); - if (!obj) - return -ENOMEM; + obj = (struct ib_uwq_object *)uobj_alloc(uobj_get_type(wq), + file->ucontext); + if (IS_ERR(obj)) + return PTR_ERR(obj); - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, - &wq_lock_class); - down_write(&obj->uevent.uobject.mutex); - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext); if (!pd) { err = -EINVAL; goto err_uobj; } - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); if (!cq) { err = -EINVAL; goto err_put_pd; @@ -3450,9 +2888,6 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, atomic_inc(&cq->usecnt); wq->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = wq; - err = idr_add_uobj(&obj->uevent.uobject); - if (err) - goto destroy_wq; memset(&resp, 0, sizeof(resp)); resp.wq_handle = obj->uevent.uobject.id; @@ -3465,27 +2900,19 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, if (err) goto err_copy; - put_pd_read(pd); - put_cq_read(cq); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; - up_write(&obj->uevent.uobject.mutex); + uobj_put_obj_read(pd); + uobj_put_obj_read(cq); + uobj_alloc_commit(&obj->uevent.uobject); return 0; err_copy: - idr_remove_uobj(&obj->uevent.uobject); -destroy_wq: ib_destroy_wq(wq); err_put_cq: - put_cq_read(cq); + uobj_put_obj_read(cq); err_put_pd: - put_pd_read(pd); + uobj_put_obj_read(pd); err_uobj: - put_uobj_write(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject); return err; } @@ -3526,31 +2953,27 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, return -EOPNOTSUPP; resp.response_length = required_resp_len; - uobj = idr_write_uobj(cmd.wq_handle, - file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uobj_get_write(uobj_get_type(wq), cmd.wq_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); wq = uobj->object; obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); - ret = ib_destroy_wq(wq); - if (!ret) - uobj->live = 0; + /* + * Make sure we don't free the memory in remove_commit as we still + * needs the uobject memory to create the response. + */ + uverbs_uobject_get(uobj); - put_uobj_write(uobj); - if (ret) + ret = uobj_remove_commit(uobj); + if (ret) { + uverbs_uobject_put(uobj); return ret; + } - idr_remove_uobj(uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_uevent(file, &obj->uevent); resp.events_reported = obj->uevent.events_reported; - put_uobj(uobj); - + uverbs_uobject_put(uobj); ret = ib_copy_to_udata(ucore, &resp, resp.response_length); if (ret) return ret; @@ -3588,7 +3011,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS)) return -EINVAL; - wq = idr_read_wq(cmd.wq_handle, file->ucontext); + wq = uobj_get_obj_read(wq, cmd.wq_handle, file->ucontext); if (!wq) return -EINVAL; @@ -3599,7 +3022,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, wq_attr.flags_mask = cmd.flags_mask; } ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); - put_wq_read(wq); + uobj_put_obj_read(wq); return ret; } @@ -3677,7 +3100,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (num_read_wqs = 0; num_read_wqs < num_wq_handles; num_read_wqs++) { - wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext); + wq = uobj_get_obj_read(wq, wqs_handles[num_read_wqs], + file->ucontext); if (!wq) { err = -EINVAL; goto put_wqs; @@ -3686,14 +3110,12 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, wqs[num_read_wqs] = wq; } - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) { - err = -ENOMEM; + uobj = uobj_alloc(uobj_get_type(rwq_ind_table), file->ucontext); + if (IS_ERR(uobj)) { + err = PTR_ERR(uobj); goto put_wqs; } - init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class); - down_write(&uobj->mutex); init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; init_attr.ind_tbl = wqs; rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); @@ -3713,10 +3135,6 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (i = 0; i < num_wq_handles; i++) atomic_inc(&wqs[i]->usecnt); - err = idr_add_uobj(uobj); - if (err) - goto destroy_ind_tbl; - resp.ind_tbl_handle = uobj->id; resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; resp.response_length = required_resp_len; @@ -3729,26 +3147,18 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, kfree(wqs_handles); for (j = 0; j < num_read_wqs; j++) - put_wq_read(wqs[j]); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list); - mutex_unlock(&file->mutex); + uobj_put_obj_read(wqs[j]); - uobj->live = 1; - - up_write(&uobj->mutex); + uobj_alloc_commit(uobj); return 0; err_copy: - idr_remove_uobj(uobj); -destroy_ind_tbl: ib_destroy_rwq_ind_table(rwq_ind_tbl); err_uobj: - put_uobj_write(uobj); + uobj_alloc_abort(uobj); put_wqs: for (j = 0; j < num_read_wqs; j++) - put_wq_read(wqs[j]); + uobj_put_obj_read(wqs[j]); err_free: kfree(wqs_handles); kfree(wqs); @@ -3761,10 +3171,8 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, struct ib_udata *uhw) { struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; - struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_uobject *uobj; int ret; - struct ib_wq **ind_tbl; size_t required_cmd_sz; required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle); @@ -3784,31 +3192,12 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - uobj = idr_write_uobj(cmd.ind_tbl_handle, - file->ucontext); - if (!uobj) - return -EINVAL; - rwq_ind_tbl = uobj->object; - ind_tbl = rwq_ind_tbl->ind_tbl; - - ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) - return ret; - - idr_remove_uobj(uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); + uobj = uobj_get_write(uobj_get_type(rwq_ind_table), cmd.ind_tbl_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - put_uobj(uobj); - kfree(ind_tbl); - return ret; + return uobj_remove_commit(uobj); } int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, @@ -3882,15 +3271,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kern_flow_attr = &cmd.flow_attr; } - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) { - err = -ENOMEM; + uobj = uobj_alloc(uobj_get_type(flow), file->ucontext); + if (IS_ERR(uobj)) { + err = PTR_ERR(uobj); goto err_free_attr; } - init_uobj(uobj, 0, file->ucontext, &rule_lock_class); - down_write(&uobj->mutex); - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext); if (!qp) { err = -EINVAL; goto err_uobj; @@ -3931,24 +3318,14 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, err = -EINVAL; goto err_free; } - - err = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (err) - goto err_free; - flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); if (IS_ERR(flow_id)) { err = PTR_ERR(flow_id); - goto err_create; + goto err_free; } flow_id->uobject = uobj; uobj->object = flow_id; - err = idr_add_uobj(uobj); - if (err) - goto destroy_flow; - memset(&resp, 0, sizeof(resp)); resp.flow_handle = uobj->id; @@ -3957,30 +3334,20 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, if (err) goto err_copy; - put_qp_read(qp); - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->rule_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + uobj_put_obj_read(qp); + uobj_alloc_commit(uobj); kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); return 0; err_copy: - idr_remove_uobj(uobj); -destroy_flow: ib_destroy_flow(flow_id); -err_create: - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); err_free: kfree(flow_attr); err_put: - put_qp_read(qp); + uobj_put_obj_read(qp); err_uobj: - put_uobj_write(uobj); + uobj_alloc_abort(uobj); err_free_attr: if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); @@ -3993,7 +3360,6 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, struct ib_udata *uhw) { struct ib_uverbs_destroy_flow cmd; - struct ib_flow *flow_id; struct ib_uobject *uobj; int ret; @@ -4007,28 +3373,12 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EINVAL; - uobj = idr_write_uobj(cmd.flow_handle, file->ucontext); - if (!uobj) - return -EINVAL; - flow_id = uobj->object; - - ret = ib_destroy_flow(flow_id); - if (!ret) { - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - uobj->live = 0; - } - - put_uobj_write(uobj); - - idr_remove_uobj(uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); + uobj = uobj_get_write(uobj_get_type(flow), cmd.flow_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + ret = uobj_remove_commit(uobj); return ret; } @@ -4045,31 +3395,37 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_srq_init_attr attr; int ret; - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; - - init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); - down_write(&obj->uevent.uobject.mutex); + obj = (struct ib_usrq_object *)uobj_alloc(uobj_get_type(srq), + file->ucontext); + if (IS_ERR(obj)) + return PTR_ERR(obj); if (cmd->srq_type == IB_SRQT_XRC) { - attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); - if (!attr.ext.xrc.xrcd) { + xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle, + file->ucontext); + if (IS_ERR(xrcd_uobj)) { ret = -EINVAL; goto err; } + attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object; + if (!attr.ext.xrc.xrcd) { + ret = -EINVAL; + goto err_put_xrcd; + } + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); + attr.ext.xrc.cq = uobj_get_obj_read(cq, cmd->cq_handle, + file->ucontext); if (!attr.ext.xrc.cq) { ret = -EINVAL; goto err_put_xrcd; } } - pd = idr_read_pd(cmd->pd_handle, file->ucontext); + pd = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_put_cq; @@ -4085,11 +3441,6 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); - ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - if (ret) - goto err_put_cq; - srq = pd->device->create_srq(pd, &attr, udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); @@ -4114,9 +3465,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, atomic_set(&srq->usecnt, 0); obj->uevent.uobject.object = srq; - ret = idr_add_uobj(&obj->uevent.uobject); - if (ret) - goto err_destroy; + obj->uevent.uobject.user_handle = cmd->user_handle; memset(&resp, 0, sizeof resp); resp.srq_handle = obj->uevent.uobject.id; @@ -4132,44 +3481,32 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, } if (cmd->srq_type == IB_SRQT_XRC) { - put_uobj_read(xrcd_uobj); - put_cq_read(attr.ext.xrc.cq); + uobj_put_read(xrcd_uobj); + uobj_put_obj_read(attr.ext.xrc.cq); } - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; - - up_write(&obj->uevent.uobject.mutex); + uobj_put_obj_read(pd); + uobj_alloc_commit(&obj->uevent.uobject); return 0; err_copy: - idr_remove_uobj(&obj->uevent.uobject); - -err_destroy: ib_destroy_srq(srq); err_put: - ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, ib_dev, - RDMACG_RESOURCE_HCA_OBJECT); - put_pd_read(pd); + uobj_put_obj_read(pd); err_put_cq: if (cmd->srq_type == IB_SRQT_XRC) - put_cq_read(attr.ext.xrc.cq); + uobj_put_obj_read(attr.ext.xrc.cq); err_put_xrcd: if (cmd->srq_type == IB_SRQT_XRC) { atomic_dec(&obj->uxrcd->refcnt); - put_uobj_read(xrcd_uobj); + uobj_put_read(xrcd_uobj); } err: - put_uobj_write(&obj->uevent.uobject); + uobj_alloc_abort(&obj->uevent.uobject); return ret; } @@ -4254,7 +3591,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); - srq = idr_read_srq(cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext); if (!srq) return -EINVAL; @@ -4263,7 +3600,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata); - put_srq_read(srq); + uobj_put_obj_read(srq); return ret ? ret : in_len; } @@ -4285,13 +3622,13 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - srq = idr_read_srq(cmd.srq_handle, file->ucontext); + srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext); if (!srq) return -EINVAL; ret = ib_query_srq(srq, &attr); - put_srq_read(srq); + uobj_put_obj_read(srq); if (ret) return ret; @@ -4320,53 +3657,39 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_srq *srq; struct ib_uevent_object *obj; int ret = -EINVAL; - struct ib_usrq_object *us; enum ib_srq_type srq_type; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(cmd.srq_handle, file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uobj_get_write(uobj_get_type(srq), cmd.srq_handle, + file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + srq = uobj->object; obj = container_of(uobj, struct ib_uevent_object, uobject); srq_type = srq->srq_type; + /* + * Make sure we don't free the memory in remove_commit as we still + * needs the uobject memory to create the response. + */ + uverbs_uobject_get(uobj); - ret = ib_destroy_srq(srq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); + memset(&resp, 0, sizeof(resp)); - if (ret) + ret = uobj_remove_commit(uobj); + if (ret) { + uverbs_uobject_put(uobj); return ret; - - ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT); - - if (srq_type == IB_SRQT_XRC) { - us = container_of(obj, struct ib_usrq_object, uevent); - atomic_dec(&us->uxrcd->refcnt); } - - idr_remove_uobj(uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_uevent(file, obj); - - memset(&resp, 0, sizeof resp); resp.events_reported = obj->events_reported; + uverbs_uobject_put(uobj); + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + return -EFAULT; - put_uobj(uobj); - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - ret = -EFAULT; - - return ret ? ret : in_len; + return in_len; } int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index e1db6782d0d4..7ccb52584be8 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -52,6 +52,7 @@ #include "uverbs.h" #include "core_priv.h" +#include "rdma_core.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); @@ -214,140 +215,11 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp, } static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, - struct ib_ucontext *context) + struct ib_ucontext *context, + bool device_removed) { - struct ib_uobject *uobj, *tmp; - context->closing = 1; - - list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { - struct ib_ah *ah = uobj->object; - - idr_remove_uobj(uobj); - ib_destroy_ah(ah); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - kfree(uobj); - } - - /* Remove MWs before QPs, in order to support type 2A MWs. */ - list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { - struct ib_mw *mw = uobj->object; - - idr_remove_uobj(uobj); - uverbs_dealloc_mw(mw); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { - struct ib_flow *flow_id = uobj->object; - - idr_remove_uobj(uobj); - ib_destroy_flow(flow_id); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { - struct ib_qp *qp = uobj->object; - struct ib_uqp_object *uqp = - container_of(uobj, struct ib_uqp_object, uevent.uobject); - - idr_remove_uobj(uobj); - if (qp == qp->real_qp) - ib_uverbs_detach_umcast(qp, uqp); - ib_destroy_qp(qp); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - ib_uverbs_release_uevent(file, &uqp->uevent); - kfree(uqp); - } - - list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) { - struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object; - struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; - - idr_remove_uobj(uobj); - ib_destroy_rwq_ind_table(rwq_ind_tbl); - kfree(ind_tbl); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { - struct ib_wq *wq = uobj->object; - struct ib_uwq_object *uwq = - container_of(uobj, struct ib_uwq_object, uevent.uobject); - - idr_remove_uobj(uobj); - ib_destroy_wq(wq); - ib_uverbs_release_uevent(file, &uwq->uevent); - kfree(uwq); - } - - list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { - struct ib_srq *srq = uobj->object; - struct ib_uevent_object *uevent = - container_of(uobj, struct ib_uevent_object, uobject); - - idr_remove_uobj(uobj); - ib_destroy_srq(srq); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - ib_uverbs_release_uevent(file, uevent); - kfree(uevent); - } - - list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { - struct ib_cq *cq = uobj->object; - struct ib_uverbs_event_file *ev_file = cq->cq_context; - struct ib_ucq_object *ucq = - container_of(uobj, struct ib_ucq_object, uobject); - - idr_remove_uobj(uobj); - ib_destroy_cq(cq); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - ib_uverbs_release_ucq(file, ev_file, ucq); - kfree(ucq); - } - - list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { - struct ib_mr *mr = uobj->object; - - idr_remove_uobj(uobj); - ib_dereg_mr(mr); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - kfree(uobj); - } - - mutex_lock(&file->device->xrcd_tree_mutex); - list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { - struct ib_xrcd *xrcd = uobj->object; - struct ib_uxrcd_object *uxrcd = - container_of(uobj, struct ib_uxrcd_object, uobject); - - idr_remove_uobj(uobj); - ib_uverbs_dealloc_xrcd(file->device, xrcd, - file->ucontext ? RDMA_REMOVE_CLOSE : - RDMA_REMOVE_DRIVER_REMOVE); - kfree(uxrcd); - } - mutex_unlock(&file->device->xrcd_tree_mutex); - - list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { - struct ib_pd *pd = uobj->object; - - idr_remove_uobj(uobj); - ib_dealloc_pd(pd); - ib_rdmacg_uncharge(&uobj->cg_obj, context->device, - RDMACG_RESOURCE_HCA_OBJECT); - kfree(uobj); - } - + uverbs_cleanup_ucontext(context, device_removed); put_pid(context->tgid); ib_rdmacg_uncharge(&context->cg_obj, context->device, @@ -592,7 +464,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) struct ib_uevent_object *uobj; /* for XRC target qp's, check that qp is live */ - if (!event->element.qp->uobject || !event->element.qp->uobject->live) + if (!event->element.qp->uobject) return; uobj = container_of(event->element.qp->uobject, @@ -1010,7 +882,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) mutex_lock(&file->cleanup_mutex); if (file->ucontext) { - ib_uverbs_cleanup_ucontext(file, file->ucontext); + ib_uverbs_cleanup_ucontext(file, file->ucontext, false); file->ucontext = NULL; } mutex_unlock(&file->cleanup_mutex); @@ -1260,7 +1132,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, * (e.g mmput). */ ib_dev->disassociate_ucontext(ucontext); - ib_uverbs_cleanup_ucontext(file, ucontext); + ib_uverbs_cleanup_ucontext(file, ucontext, true); } mutex_lock(&uverbs_dev->lists_mutex); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d3efd22943e9..2e8f661c900a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1377,17 +1377,6 @@ struct ib_rdmacg_object { struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; - struct list_head pd_list; - struct list_head mr_list; - struct list_head mw_list; - struct list_head cq_list; - struct list_head qp_list; - struct list_head srq_list; - struct list_head ah_list; - struct list_head xrcd_list; - struct list_head rule_list; - struct list_head wq_list; - struct list_head rwq_ind_tbl_list; int closing; /* locking the uobjects_list */ @@ -1426,10 +1415,8 @@ struct ib_uobject { struct ib_rdmacg_object cg_obj; /* rdmacg object */ int id; /* index into kernel idr */ struct kref ref; - struct rw_semaphore mutex; /* protects .live */ atomic_t usecnt; /* protects exclusive access */ struct rcu_head rcu; /* kfree_rcu() overhead */ - int live; const struct uverbs_obj_type *type; }; diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 2edb77648986..88856642fdf5 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -46,5 +46,68 @@ extern const struct uverbs_obj_idr_type uverbs_type_attrs_mr; extern const struct uverbs_obj_idr_type uverbs_type_attrs_mw; extern const struct uverbs_obj_idr_type uverbs_type_attrs_pd; extern const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd; + +static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, + bool write, + struct ib_ucontext *ucontext, + int id) +{ + return rdma_lookup_get_uobject(type, ucontext, id, write); +} + +#define uobj_get_type(_type) uverbs_type_attrs_##_type.type + +#define uobj_get_read(_type, _id, _ucontext) \ + __uobj_get(&(_type), false, _ucontext, _id) + +#define uobj_get_obj_read(_type, _id, _ucontext) \ +({ \ + struct ib_uobject *uobj = \ + __uobj_get(&uobj_get_type(_type), \ + false, _ucontext, _id); \ + \ + (struct ib_##_type *)(IS_ERR(uobj) ? NULL : uobj->object); \ +}) + +#define uobj_get_write(_type, _id, _ucontext) \ + __uobj_get(&(_type), true, _ucontext, _id) + +static inline void uobj_put_read(struct ib_uobject *uobj) +{ + rdma_lookup_put_uobject(uobj, false); +} + +#define uobj_put_obj_read(_obj) \ + uobj_put_read((_obj)->uobject) + +static inline void uobj_put_write(struct ib_uobject *uobj) +{ + rdma_lookup_put_uobject(uobj, true); +} + +static inline int __must_check uobj_remove_commit(struct ib_uobject *uobj) +{ + return rdma_remove_commit_uobject(uobj); +} + +static inline void uobj_alloc_commit(struct ib_uobject *uobj) +{ + rdma_alloc_commit_uobject(uobj); +} + +static inline void uobj_alloc_abort(struct ib_uobject *uobj) +{ + rdma_alloc_abort_uobject(uobj); +} + +static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext) +{ + return rdma_alloc_begin_uobject(type, ucontext); +} + +#define uobj_alloc(_type, ucontext) \ + __uobj_alloc(&(_type), ucontext) + #endif -- cgit v1.2.3 From f48b726920d96dcd1860df06143bdea7d6d7dcc3 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 4 Apr 2017 13:31:45 +0300 Subject: IB/core: Add lock to multicast handlers When two handlers used the same object in the old schema, we blocked the process in the kernel. The new schema just returns -EBUSY. This could lead to different behaviour in applications between the old schema and the new schema. In most cases, using such handlers concurrently could lead to crashing the process. For example, if thread A destroys a QP and thread B modifies it, we could have the destruction happens before the modification. In this case, we are accessing freed memory which could lead to crashing the process. This is true for most cases. However, attaching and detaching a multicast address from QP concurrently is safe. Therefore, we preserve the original behaviour by adding a lock there. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 2 ++ drivers/infiniband/core/uverbs_cmd.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 3660278b62b0..27c8b98b36f6 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -163,6 +163,8 @@ struct ib_usrq_object { struct ib_uqp_object { struct ib_uevent_object uevent; + /* lock for mcast list */ + struct mutex mcast_lock; struct list_head mcast_list; struct ib_uxrcd_object *uxrcd; }; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2f258aaec7b9..119c10da7751 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1352,6 +1352,7 @@ static int create_qp(struct ib_uverbs_file *file, return PTR_ERR(obj); obj->uxrcd = NULL; obj->uevent.uobject.user_handle = cmd->user_handle; + mutex_init(&obj->mcast_lock); if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + sizeof(cmd->rwq_ind_tbl_handle) && @@ -2589,6 +2590,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + mutex_lock(&obj->mcast_lock); list_for_each_entry(mcast, &obj->mcast_list, list) if (cmd.mlid == mcast->lid && !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { @@ -2612,6 +2614,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, kfree(mcast); out_put: + mutex_unlock(&obj->mcast_lock); uobj_put_obj_read(qp); return ret ? ret : in_len; @@ -2636,6 +2639,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, return -EINVAL; obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + mutex_lock(&obj->mcast_lock); ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); if (ret) @@ -2650,6 +2654,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, } out_put: + mutex_unlock(&obj->mcast_lock); uobj_put_obj_read(qp); return ret ? ret : in_len; } -- cgit v1.2.3 From cf8966b3477d5e6545393bb4499f2051ea554c62 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 4 Apr 2017 13:31:46 +0300 Subject: IB/core: Add support for fd objects The completion channel we use in verbs infrastructure is FD based. Previously, we had a separate way to manage this object. Since we strive for a single way to manage any kind of object in this infrastructure, we conceptually treat all objects as subclasses of ib_uobject. This commit adds the necessary mechanism to support FD based objects like their IDR counterparts. FD objects release need to be synchronized with context release. We use the cleanup_mutex on the uverbs_file for that. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/rdma_core.c | 177 +++++++++++++++++++++++++++++++++- drivers/infiniband/core/rdma_core.h | 8 ++ drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_main.c | 4 +- include/rdma/ib_verbs.h | 6 ++ include/rdma/uverbs_types.h | 16 +++ 6 files changed, 210 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 1cbc053add34..e5bdf7f67574 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -153,6 +153,37 @@ free: return uobj; } +static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext, + int id, bool write) +{ + struct file *f; + struct ib_uobject *uobject; + const struct uverbs_obj_fd_type *fd_type = + container_of(type, struct uverbs_obj_fd_type, type); + + if (write) + return ERR_PTR(-EOPNOTSUPP); + + f = fget(id); + if (!f) + return ERR_PTR(-EBADF); + + uobject = f->private_data; + /* + * fget(id) ensures we are not currently running uverbs_close_fd, + * and the caller is expected to ensure that uverbs_close_fd is never + * done while a call top lookup is possible. + */ + if (f->f_op != fd_type->fops) { + fput(f); + return ERR_PTR(-EBADF); + } + + uverbs_uobject_get(uobject); + return uobject; +} + struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, struct ib_ucontext *ucontext, int id, bool write) @@ -211,6 +242,46 @@ uobj_put: return ERR_PTR(ret); } +static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type, + struct ib_ucontext *ucontext) +{ + const struct uverbs_obj_fd_type *fd_type = + container_of(type, struct uverbs_obj_fd_type, type); + int new_fd; + struct ib_uobject *uobj; + struct ib_uobject_file *uobj_file; + struct file *filp; + + new_fd = get_unused_fd_flags(O_CLOEXEC); + if (new_fd < 0) + return ERR_PTR(new_fd); + + uobj = alloc_uobj(ucontext, type); + if (IS_ERR(uobj)) { + put_unused_fd(new_fd); + return uobj; + } + + uobj_file = container_of(uobj, struct ib_uobject_file, uobj); + filp = anon_inode_getfile(fd_type->name, + fd_type->fops, + uobj_file, + fd_type->flags); + if (IS_ERR(filp)) { + put_unused_fd(new_fd); + uverbs_uobject_put(uobj); + return (void *)filp; + } + + uobj_file->uobj.id = new_fd; + uobj_file->uobj.object = filp; + uobj_file->ufile = ucontext->ufile; + INIT_LIST_HEAD(&uobj->list); + kref_get(&uobj_file->ufile->ref); + + return uobj; +} + struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, struct ib_ucontext *ucontext) { @@ -246,6 +317,39 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, return ret; } +static void alloc_abort_fd_uobject(struct ib_uobject *uobj) +{ + struct ib_uobject_file *uobj_file = + container_of(uobj, struct ib_uobject_file, uobj); + struct file *filp = uobj->object; + int id = uobj_file->uobj.id; + + /* Unsuccessful NEW */ + fput(filp); + put_unused_fd(id); +} + +static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + const struct uverbs_obj_fd_type *fd_type = + container_of(uobj->type, struct uverbs_obj_fd_type, type); + struct ib_uobject_file *uobj_file = + container_of(uobj, struct ib_uobject_file, uobj); + int ret = fd_type->context_closed(uobj_file, why); + + if (why == RDMA_REMOVE_DESTROY && ret) + return ret; + + if (why == RDMA_REMOVE_DURING_CLEANUP) { + alloc_abort_fd_uobject(uobj); + return ret; + } + + uobj_file->uobj.context = NULL; + return ret; +} + static void lockdep_check(struct ib_uobject *uobj, bool write) { #ifdef CONFIG_LOCKDEP @@ -314,6 +418,19 @@ static void alloc_commit_idr_uobject(struct ib_uobject *uobj) spin_unlock(&uobj->context->ufile->idr_lock); } +static void alloc_commit_fd_uobject(struct ib_uobject *uobj) +{ + struct ib_uobject_file *uobj_file = + container_of(uobj, struct ib_uobject_file, uobj); + + uverbs_uobject_add(&uobj_file->uobj); + fd_install(uobj_file->uobj.id, uobj->object); + /* This shouldn't be used anymore. Use the file object instead */ + uobj_file->uobj.id = 0; + /* Get another reference as we export this to the fops */ + uverbs_uobject_get(&uobj_file->uobj); +} + int rdma_alloc_commit_uobject(struct ib_uobject *uobj) { /* Cleanup is running. Calling this should have been impossible */ @@ -352,6 +469,15 @@ static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool write) { } +static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool write) +{ + struct file *filp = uobj->object; + + WARN_ON(write); + /* This indirectly calls uverbs_close_fd and free the object */ + fput(filp); +} + void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool write) { lockdep_check(uobj, write); @@ -392,6 +518,39 @@ const struct uverbs_obj_type_class uverbs_idr_class = { .needs_kfree_rcu = true, }; +static void _uverbs_close_fd(struct ib_uobject_file *uobj_file) +{ + struct ib_ucontext *ucontext; + struct ib_uverbs_file *ufile = uobj_file->ufile; + int ret; + + mutex_lock(&uobj_file->ufile->cleanup_mutex); + + /* uobject was either already cleaned up or is cleaned up right now anyway */ + if (!uobj_file->uobj.context || + !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem)) + goto unlock; + + ucontext = uobj_file->uobj.context; + ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE, + true); + up_read(&ucontext->cleanup_rwsem); + if (ret) + pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n"); +unlock: + mutex_unlock(&ufile->cleanup_mutex); +} + +void uverbs_close_fd(struct file *f) +{ + struct ib_uobject_file *uobj_file = f->private_data; + struct kref *uverbs_file_ref = &uobj_file->ufile->ref; + + _uverbs_close_fd(uobj_file); + uverbs_uobject_put(&uobj_file->uobj); + kref_put(uverbs_file_ref, ib_uverbs_release_file); +} + void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) { enum rdma_remove_reason reason = device_removed ? @@ -412,7 +571,13 @@ void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) /* * This shouldn't run while executing other commands on this - * context. + * context. Thus, the only thing we should take care of is + * releasing a FD while traversing this list. The FD could be + * closed and released from the _release fop of this FD. + * In order to mitigate this, we add a lock. + * We take and release the lock per order traversal in order + * to let other threads (which might still use the FDs) chance + * to run. */ mutex_lock(&ucontext->uobjects_lock); list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, @@ -448,3 +613,13 @@ void uverbs_initialize_ucontext(struct ib_ucontext *ucontext) init_rwsem(&ucontext->cleanup_rwsem); } +const struct uverbs_obj_type_class uverbs_fd_class = { + .alloc_begin = alloc_begin_fd_uobject, + .lookup_get = lookup_get_fd_uobject, + .alloc_commit = alloc_commit_fd_uobject, + .alloc_abort = alloc_abort_fd_uobject, + .lookup_put = lookup_put_fd_uobject, + .remove_commit = remove_commit_fd_uobject, + .needs_kfree_rcu = false, +}; + diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 0247bb5e3dd3..1b82e7ff7fe8 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -67,4 +67,12 @@ void uverbs_uobject_get(struct ib_uobject *uobject); */ void uverbs_uobject_put(struct ib_uobject *uobject); +/* Indicate this fd is no longer used by this consumer, but its memory isn't + * necessarily released yet. When the last reference is put, we release the + * memory. After this call is executed, calling uverbs_uobject_get isn't + * allowed. + * This must be called from the release file_operations of the file! + */ +void uverbs_close_fd(struct file *f); + #endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 27c8b98b36f6..5f8a7f222356 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -193,6 +193,7 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file, struct ib_ucq_object *uobj); void ib_uverbs_release_uevent(struct ib_uverbs_file *file, struct ib_uevent_object *uobj); +void ib_uverbs_release_file(struct kref *ref); void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 7ccb52584be8..8ee1d08b3135 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -233,7 +233,7 @@ static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) complete(&dev->comp); } -static void ib_uverbs_release_file(struct kref *ref) +void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); @@ -1132,7 +1132,9 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, * (e.g mmput). */ ib_dev->disassociate_ucontext(ucontext); + mutex_lock(&file->cleanup_mutex); ib_uverbs_cleanup_ucontext(file, ucontext, true); + mutex_unlock(&file->cleanup_mutex); } mutex_lock(&uverbs_dev->lists_mutex); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 2e8f661c900a..3a8e05894e9b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1421,6 +1421,12 @@ struct ib_uobject { const struct uverbs_obj_type *type; }; +struct ib_uobject_file { + struct ib_uobject uobj; + /* ufile contains the lock between context release and file close */ + struct ib_uverbs_file *ufile; +}; + struct ib_udata { const void __user *inbuf; void __user *outbuf; diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 66368b5a3006..58674290fab0 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -129,6 +129,22 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj); int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj); int rdma_alloc_commit_uobject(struct ib_uobject *uobj); +struct uverbs_obj_fd_type { + /* + * In fd based objects, uverbs_obj_type_ops points to generic + * fd operations. In order to specialize the underlying types (e.g. + * completion_channel), we use fops, name and flags for fd creation. + * context_closed is called when the context is closed either when + * the driver is removed or the process terminated. + */ + struct uverbs_obj_type type; + int (*context_closed)(struct ib_uobject_file *uobj_file, + enum rdma_remove_reason why); + const struct file_operations *fops; + const char *name; + int flags; +}; + extern const struct uverbs_obj_type_class uverbs_idr_class; #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ -- cgit v1.2.3 From 1e7710f3f6563940bb6bbc94aa8eadfd344a86af Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 4 Apr 2017 13:31:47 +0300 Subject: IB/core: Change completion channel to use the reworked objects schema This patch adds the standard fd based type - completion_channel. The completion_channel is now prefixed with ib_uobject, similarly to the rest of the uobjects. This requires a few changes: (1) We define a new completion channel fd based object type. (2) completion_event and async_event are now two different types. This means they use different fops. (3) We release the completion_channel exactly as we release other idr based objects. (4) Since ib_uobjects are already kref-ed, we only add the kref to the async event. A fd object requires filling out several parameters. Its op pointer should point to uverbs_fd_ops and its size should be at least the size if ib_uobject. We use a macro to make the type declaration easier. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 26 ++- drivers/infiniband/core/uverbs_cmd.c | 57 +++--- drivers/infiniband/core/uverbs_main.c | 279 +++++++++++++++++------------ drivers/infiniband/core/uverbs_std_types.c | 33 +++- include/rdma/uverbs_std_types.h | 1 + include/rdma/uverbs_types.h | 9 + 6 files changed, 257 insertions(+), 148 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 5f8a7f222356..826f82748718 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -102,17 +102,25 @@ struct ib_uverbs_device { }; struct ib_uverbs_event_file { - struct kref ref; - int is_async; - struct ib_uverbs_file *uverbs_file; spinlock_t lock; int is_closed; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; struct list_head event_list; +}; + +struct ib_uverbs_async_event_file { + struct ib_uverbs_event_file ev_file; + struct ib_uverbs_file *uverbs_file; + struct kref ref; struct list_head list; }; +struct ib_uverbs_completion_event_file { + struct ib_uobject_file uobj_file; + struct ib_uverbs_event_file ev_file; +}; + struct ib_uverbs_file { struct kref ref; struct mutex mutex; @@ -120,7 +128,7 @@ struct ib_uverbs_file { struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; - struct ib_uverbs_event_file *async_file; + struct ib_uverbs_async_event_file *async_file; struct list_head list; int is_closed; @@ -182,14 +190,14 @@ struct ib_ucq_object { u32 async_events_reported; }; -struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - struct ib_device *ib_dev, - int is_async); +extern const struct file_operations uverbs_event_fops; +void ib_uverbs_init_event_file(struct ib_uverbs_event_file *ev_file); +struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, + struct ib_device *ib_dev); void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file); -struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); void ib_uverbs_release_ucq(struct ib_uverbs_file *file, - struct ib_uverbs_event_file *ev_file, + struct ib_uverbs_completion_event_file *ev_file, struct ib_ucq_object *uobj); void ib_uverbs_release_uevent(struct ib_uverbs_file *file, struct ib_uevent_object *uobj); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 119c10da7751..b9024fa31b18 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -47,6 +47,24 @@ #include "uverbs.h" #include "core_priv.h" +static struct ib_uverbs_completion_event_file * +ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context) +{ + struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel), + fd, context); + struct ib_uobject_file *uobj_file; + + if (IS_ERR(uobj)) + return (void *)uobj; + + uverbs_uobject_get(uobj); + uobj_put_read(uobj); + + uobj_file = container_of(uobj, struct ib_uobject_file, uobj); + return container_of(uobj_file, struct ib_uverbs_completion_event_file, + uobj_file); +} + ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, @@ -116,7 +134,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err_free; resp.async_fd = ret; - filp = ib_uverbs_alloc_event_file(file, ib_dev, 1); + filp = ib_uverbs_alloc_async_event_file(file, ib_dev); if (IS_ERR(filp)) { ret = PTR_ERR(filp); goto err_fd; @@ -908,8 +926,8 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, { struct ib_uverbs_create_comp_channel cmd; struct ib_uverbs_create_comp_channel_resp resp; - struct file *filp; - int ret; + struct ib_uobject *uobj; + struct ib_uverbs_completion_event_file *ev_file; if (out_len < sizeof resp) return -ENOSPC; @@ -917,25 +935,23 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - ret = get_unused_fd_flags(O_CLOEXEC); - if (ret < 0) - return ret; - resp.fd = ret; + uobj = uobj_alloc(uobj_get_type(comp_channel), file->ucontext); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - filp = ib_uverbs_alloc_event_file(file, ib_dev, 0); - if (IS_ERR(filp)) { - put_unused_fd(resp.fd); - return PTR_ERR(filp); - } + resp.fd = uobj->id; + + ev_file = container_of(uobj, struct ib_uverbs_completion_event_file, + uobj_file.uobj); + ib_uverbs_init_event_file(&ev_file->ev_file); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { - put_unused_fd(resp.fd); - fput(filp); + uobj_alloc_abort(uobj); return -EFAULT; } - fd_install(resp.fd, filp); + uobj_alloc_commit(uobj); return in_len; } @@ -953,7 +969,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, void *context) { struct ib_ucq_object *obj; - struct ib_uverbs_event_file *ev_file = NULL; + struct ib_uverbs_completion_event_file *ev_file = NULL; struct ib_cq *cq; int ret; struct ib_uverbs_ex_create_cq_resp resp; @@ -968,9 +984,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, return obj; if (cmd->comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel); - if (!ev_file) { - ret = -EINVAL; + ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, + file->ucontext); + if (IS_ERR(ev_file)) { + ret = PTR_ERR(ev_file); goto err; } } @@ -998,7 +1015,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, cq->uobject = &obj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = ev_file; + cq->cq_context = &ev_file->ev_file; atomic_set(&cq->usecnt, 0); obj->uobject.object = cq; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 8ee1d08b3135..0b0dab89106a 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -156,37 +156,37 @@ static struct kobj_type ib_uverbs_dev_ktype = { .release = ib_uverbs_release_dev, }; -static void ib_uverbs_release_event_file(struct kref *ref) +static void ib_uverbs_release_async_event_file(struct kref *ref) { - struct ib_uverbs_event_file *file = - container_of(ref, struct ib_uverbs_event_file, ref); + struct ib_uverbs_async_event_file *file = + container_of(ref, struct ib_uverbs_async_event_file, ref); kfree(file); } void ib_uverbs_release_ucq(struct ib_uverbs_file *file, - struct ib_uverbs_event_file *ev_file, + struct ib_uverbs_completion_event_file *ev_file, struct ib_ucq_object *uobj) { struct ib_uverbs_event *evt, *tmp; if (ev_file) { - spin_lock_irq(&ev_file->lock); + spin_lock_irq(&ev_file->ev_file.lock); list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&ev_file->lock); + spin_unlock_irq(&ev_file->ev_file.lock); - kref_put(&ev_file->ref, ib_uverbs_release_event_file); + uverbs_uobject_put(&ev_file->uobj_file.uobj); } - spin_lock_irq(&file->async_file->lock); + spin_lock_irq(&file->async_file->ev_file.lock); list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file->lock); + spin_unlock_irq(&file->async_file->ev_file.lock); } void ib_uverbs_release_uevent(struct ib_uverbs_file *file, @@ -194,12 +194,12 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file, { struct ib_uverbs_event *evt, *tmp; - spin_lock_irq(&file->async_file->lock); + spin_lock_irq(&file->async_file->ev_file.lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file->lock); + spin_unlock_irq(&file->async_file->ev_file.lock); } void ib_uverbs_detach_umcast(struct ib_qp *qp, @@ -253,10 +253,12 @@ void ib_uverbs_release_file(struct kref *ref) kfree(file); } -static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, - size_t count, loff_t *pos) +static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_file *file, + struct ib_uverbs_file *uverbs_file, + struct file *filp, char __user *buf, + size_t count, loff_t *pos, + bool is_async) { - struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *event; int eventsz; int ret = 0; @@ -275,12 +277,12 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, * and wake_up() guarentee this will see the null set * without using RCU */ - !file->uverbs_file->device->ib_dev))) + !uverbs_file->device->ib_dev))) return -ERESTARTSYS; /* If device was disassociated and no event exists set an error */ if (list_empty(&file->event_list) && - !file->uverbs_file->device->ib_dev) + !uverbs_file->device->ib_dev) return -EIO; spin_lock_irq(&file->lock); @@ -288,7 +290,7 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, event = list_entry(file->event_list.next, struct ib_uverbs_event, list); - if (file->is_async) + if (is_async) eventsz = sizeof (struct ib_uverbs_async_event_desc); else eventsz = sizeof (struct ib_uverbs_comp_event_desc); @@ -318,11 +320,31 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, return ret; } -static unsigned int ib_uverbs_event_poll(struct file *filp, +static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_async_event_file *file = filp->private_data; + + return ib_uverbs_event_read(&file->ev_file, file->uverbs_file, filp, + buf, count, pos, true); +} + +static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_completion_event_file *comp_ev_file = + filp->private_data; + + return ib_uverbs_event_read(&comp_ev_file->ev_file, + comp_ev_file->uobj_file.ufile, filp, + buf, count, pos, false); +} + +static unsigned int ib_uverbs_event_poll(struct ib_uverbs_event_file *file, + struct file *filp, struct poll_table_struct *wait) { unsigned int pollflags = 0; - struct ib_uverbs_event_file *file = filp->private_data; poll_wait(filp, &file->poll_wait, wait); @@ -334,49 +356,98 @@ static unsigned int ib_uverbs_event_poll(struct file *filp, return pollflags; } -static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) +static unsigned int ib_uverbs_async_event_poll(struct file *filp, + struct poll_table_struct *wait) +{ + return ib_uverbs_event_poll(filp->private_data, filp, wait); +} + +static unsigned int ib_uverbs_comp_event_poll(struct file *filp, + struct poll_table_struct *wait) +{ + struct ib_uverbs_completion_event_file *comp_ev_file = + filp->private_data; + + return ib_uverbs_event_poll(&comp_ev_file->ev_file, filp, wait); +} + +static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) { struct ib_uverbs_event_file *file = filp->private_data; return fasync_helper(fd, filp, on, &file->async_queue); } -static int ib_uverbs_event_close(struct inode *inode, struct file *filp) +static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) { - struct ib_uverbs_event_file *file = filp->private_data; + struct ib_uverbs_completion_event_file *comp_ev_file = + filp->private_data; + + return fasync_helper(fd, filp, on, &comp_ev_file->ev_file.async_queue); +} + +static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_async_event_file *file = filp->private_data; + struct ib_uverbs_file *uverbs_file = file->uverbs_file; struct ib_uverbs_event *entry, *tmp; int closed_already = 0; - mutex_lock(&file->uverbs_file->device->lists_mutex); - spin_lock_irq(&file->lock); - closed_already = file->is_closed; - file->is_closed = 1; - list_for_each_entry_safe(entry, tmp, &file->event_list, list) { + mutex_lock(&uverbs_file->device->lists_mutex); + spin_lock_irq(&file->ev_file.lock); + closed_already = file->ev_file.is_closed; + file->ev_file.is_closed = 1; + list_for_each_entry_safe(entry, tmp, &file->ev_file.event_list, list) { if (entry->counter) list_del(&entry->obj_list); kfree(entry); } - spin_unlock_irq(&file->lock); + spin_unlock_irq(&file->ev_file.lock); if (!closed_already) { list_del(&file->list); - if (file->is_async) - ib_unregister_event_handler(&file->uverbs_file-> - event_handler); + ib_unregister_event_handler(&uverbs_file->event_handler); + } + mutex_unlock(&uverbs_file->device->lists_mutex); + + kref_put(&uverbs_file->ref, ib_uverbs_release_file); + kref_put(&file->ref, ib_uverbs_release_async_event_file); + + return 0; +} + +static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_completion_event_file *file = filp->private_data; + struct ib_uverbs_event *entry, *tmp; + + spin_lock_irq(&file->ev_file.lock); + list_for_each_entry_safe(entry, tmp, &file->ev_file.event_list, list) { + if (entry->counter) + list_del(&entry->obj_list); + kfree(entry); } - mutex_unlock(&file->uverbs_file->device->lists_mutex); + spin_unlock_irq(&file->ev_file.lock); - kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); - kref_put(&file->ref, ib_uverbs_release_event_file); + uverbs_close_fd(filp); return 0; } -static const struct file_operations uverbs_event_fops = { +const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, - .read = ib_uverbs_event_read, - .poll = ib_uverbs_event_poll, - .release = ib_uverbs_event_close, - .fasync = ib_uverbs_event_fasync, + .read = ib_uverbs_comp_event_read, + .poll = ib_uverbs_comp_event_poll, + .release = ib_uverbs_comp_event_close, + .fasync = ib_uverbs_comp_event_fasync, + .llseek = no_llseek, +}; + +static const struct file_operations uverbs_async_event_fops = { + .owner = THIS_MODULE, + .read = ib_uverbs_async_event_read, + .poll = ib_uverbs_async_event_poll, + .release = ib_uverbs_async_event_close, + .fasync = ib_uverbs_async_event_fasync, .llseek = no_llseek, }; @@ -423,15 +494,15 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, struct ib_uverbs_event *entry; unsigned long flags; - spin_lock_irqsave(&file->async_file->lock, flags); - if (file->async_file->is_closed) { - spin_unlock_irqrestore(&file->async_file->lock, flags); + spin_lock_irqsave(&file->async_file->ev_file.lock, flags); + if (file->async_file->ev_file.is_closed) { + spin_unlock_irqrestore(&file->async_file->ev_file.lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { - spin_unlock_irqrestore(&file->async_file->lock, flags); + spin_unlock_irqrestore(&file->async_file->ev_file.lock, flags); return; } @@ -440,13 +511,13 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, entry->desc.async.reserved = 0; entry->counter = counter; - list_add_tail(&entry->list, &file->async_file->event_list); + list_add_tail(&entry->list, &file->async_file->ev_file.event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); - spin_unlock_irqrestore(&file->async_file->lock, flags); + spin_unlock_irqrestore(&file->async_file->ev_file.lock, flags); - wake_up_interruptible(&file->async_file->poll_wait); - kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->async_file->ev_file.poll_wait); + kill_fasync(&file->async_file->ev_file.async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) @@ -509,15 +580,23 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler, void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) { - kref_put(&file->async_file->ref, ib_uverbs_release_event_file); + kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); file->async_file = NULL; } -struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - struct ib_device *ib_dev, - int is_async) +void ib_uverbs_init_event_file(struct ib_uverbs_event_file *ev_file) { - struct ib_uverbs_event_file *ev_file; + spin_lock_init(&ev_file->lock); + INIT_LIST_HEAD(&ev_file->event_list); + init_waitqueue_head(&ev_file->poll_wait); + ev_file->is_closed = 0; + ev_file->async_queue = NULL; +} + +struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, + struct ib_device *ib_dev) +{ + struct ib_uverbs_async_event_file *ev_file; struct file *filp; int ret; @@ -525,16 +604,11 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, if (!ev_file) return ERR_PTR(-ENOMEM); - kref_init(&ev_file->ref); - spin_lock_init(&ev_file->lock); - INIT_LIST_HEAD(&ev_file->event_list); - init_waitqueue_head(&ev_file->poll_wait); + ib_uverbs_init_event_file(&ev_file->ev_file); ev_file->uverbs_file = uverbs_file; kref_get(&ev_file->uverbs_file->ref); - ev_file->async_queue = NULL; - ev_file->is_closed = 0; - - filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops, + kref_init(&ev_file->ref); + filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops, ev_file, O_RDONLY); if (IS_ERR(filp)) goto err_put_refs; @@ -544,64 +618,33 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, &uverbs_file->device->uverbs_events_file_list); mutex_unlock(&uverbs_file->device->lists_mutex); - if (is_async) { - WARN_ON(uverbs_file->async_file); - uverbs_file->async_file = ev_file; - kref_get(&uverbs_file->async_file->ref); - INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, - ib_dev, - ib_uverbs_event_handler); - ret = ib_register_event_handler(&uverbs_file->event_handler); - if (ret) - goto err_put_file; - - /* At that point async file stuff was fully set */ - ev_file->is_async = 1; - } + WARN_ON(uverbs_file->async_file); + uverbs_file->async_file = ev_file; + kref_get(&uverbs_file->async_file->ref); + INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, + ib_dev, + ib_uverbs_event_handler); + ret = ib_register_event_handler(&uverbs_file->event_handler); + if (ret) + goto err_put_file; + + /* At that point async file stuff was fully set */ return filp; err_put_file: fput(filp); - kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file); + kref_put(&uverbs_file->async_file->ref, + ib_uverbs_release_async_event_file); uverbs_file->async_file = NULL; return ERR_PTR(ret); err_put_refs: kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); - kref_put(&ev_file->ref, ib_uverbs_release_event_file); + kref_put(&ev_file->ref, ib_uverbs_release_async_event_file); return filp; } -/* - * Look up a completion event file by FD. If lookup is successful, - * takes a ref to the event file struct that it returns; if - * unsuccessful, returns NULL. - */ -struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) -{ - struct ib_uverbs_event_file *ev_file = NULL; - struct fd f = fdget(fd); - - if (!f.file) - return NULL; - - if (f.file->f_op != &uverbs_event_fops) - goto out; - - ev_file = f.file->private_data; - if (ev_file->is_async) { - ev_file = NULL; - goto out; - } - - kref_get(&ev_file->ref); - -out: - fdput(f); - return ev_file; -} - static int verify_command_mask(struct ib_device *ib_dev, __u32 command) { u64 mask; @@ -896,7 +939,8 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) mutex_unlock(&file->device->lists_mutex); if (file->async_file) - kref_put(&file->async_file->ref, ib_uverbs_release_event_file); + kref_put(&file->async_file->ref, + ib_uverbs_release_async_event_file); kref_put(&file->ref, ib_uverbs_release_file); kobject_put(&dev->kobj); @@ -1095,7 +1139,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { struct ib_uverbs_file *file; - struct ib_uverbs_event_file *event_file; + struct ib_uverbs_async_event_file *event_file; struct ib_event event; /* Pending running commands to terminate */ @@ -1144,21 +1188,20 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { event_file = list_first_entry(&uverbs_dev-> uverbs_events_file_list, - struct ib_uverbs_event_file, + struct ib_uverbs_async_event_file, list); - spin_lock_irq(&event_file->lock); - event_file->is_closed = 1; - spin_unlock_irq(&event_file->lock); + spin_lock_irq(&event_file->ev_file.lock); + event_file->ev_file.is_closed = 1; + spin_unlock_irq(&event_file->ev_file.lock); list_del(&event_file->list); - if (event_file->is_async) { - ib_unregister_event_handler(&event_file->uverbs_file-> - event_handler); - event_file->uverbs_file->event_handler.device = NULL; - } + ib_unregister_event_handler( + &event_file->uverbs_file->event_handler); + event_file->uverbs_file->event_handler.device = + NULL; - wake_up_interruptible(&event_file->poll_wait); - kill_fasync(&event_file->async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&event_file->ev_file.poll_wait); + kill_fasync(&event_file->ev_file.async_queue, SIGIO, POLL_IN); } mutex_unlock(&uverbs_dev->lists_mutex); } diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index a514556139e7..7f26af5ea066 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -145,7 +145,11 @@ int uverbs_free_cq(struct ib_uobject *uobject, ret = ib_destroy_cq(cq); if (!ret || why != RDMA_REMOVE_DESTROY) - ib_uverbs_release_ucq(uobject->context->ufile, ev_file, ucq); + ib_uverbs_release_ucq(uobject->context->ufile, ev_file ? + container_of(ev_file, + struct ib_uverbs_completion_event_file, + ev_file) : NULL, + ucq); return ret; } @@ -186,6 +190,33 @@ int uverbs_free_pd(struct ib_uobject *uobject, return 0; } +int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file, + enum rdma_remove_reason why) +{ + struct ib_uverbs_completion_event_file *comp_event_file = + container_of(uobj_file, struct ib_uverbs_completion_event_file, + uobj_file); + struct ib_uverbs_event_file *event_file = &comp_event_file->ev_file; + + spin_lock_irq(&event_file->lock); + event_file->is_closed = 1; + spin_unlock_irq(&event_file->lock); + + if (why == RDMA_REMOVE_DRIVER_REMOVE) { + wake_up_interruptible(&event_file->poll_wait); + kill_fasync(&event_file->async_queue, SIGIO, POLL_IN); + } + return 0; +}; + +const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = { + .type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0), + .context_closed = uverbs_hot_unplug_completion_event_file, + .fops = &uverbs_event_fops, + .name = "[infinibandevent]", + .flags = O_RDONLY, +}; + const struct uverbs_obj_idr_type uverbs_type_attrs_cq = { .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0), .destroy_object = uverbs_free_cq, diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 88856642fdf5..7771ce966952 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -35,6 +35,7 @@ #include +extern const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel; extern const struct uverbs_obj_idr_type uverbs_type_attrs_cq; extern const struct uverbs_obj_idr_type uverbs_type_attrs_qp; extern const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table; diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 58674290fab0..a37692167a45 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -146,9 +146,18 @@ struct uverbs_obj_fd_type { }; extern const struct uverbs_obj_type_class uverbs_idr_class; +extern const struct uverbs_obj_type_class uverbs_fd_class; #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ sizeof(char)) +#define UVERBS_TYPE_ALLOC_FD(_size, _order) \ + { \ + .destroy_order = _order, \ + .type_class = &uverbs_fd_class, \ + .obj_size = (_size) + \ + UVERBS_BUILD_BUG_ON((_size) < \ + sizeof(struct ib_uobject_file)),\ + } #define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order) \ { \ .destroy_order = _order, \ -- cgit v1.2.3 From cd6ce4a5737829052abc4ffc8befd0adfff8998d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 30 Mar 2017 15:56:01 +0100 Subject: IB/hns: Explicitly include linux/of.h hns_roce_hw_v1.c uses DT interfaces but relies on implict inclusion of linux/of.h which means that changes in other headers could break the build, as happened in -next for arm64 today. Add an explicit include. Signed-off-by: Mark Brown Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index b8111b0c8877..5c48dbf43c1a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "hns_roce_common.h" #include "hns_roce_device.h" -- cgit v1.2.3 From 771a52584096c45e4565e8aabb596eece9d73d61 Mon Sep 17 00:00:00 2001 From: Shamir Rabinovitch Date: Wed, 29 Mar 2017 06:21:59 -0400 Subject: IB/IPoIB: ibX: failed to create mcg debug file When udev renames the netdev devices, ipoib debugfs entries does not get renamed. As a result, if subsequent probe of ipoib device reuse the name then creating a debugfs entry for the new device would fail. Also, moved ipoib_create_debug_files and ipoib_delete_debug_files as part of ipoib event handling in order to avoid any race condition between these. Fixes: 1732b0ef3b3a ([IPoIB] add path record information in debugfs) Cc: stable@vger.kernel.org # 2.6.15+ Signed-off-by: Vijay Kumar Signed-off-by: Shamir Rabinovitch Reviewed-by: Mark Bloch Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_fs.c | 3 +++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 44 +++++++++++++++++++++++++++---- drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 3 --- 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 6bd5740e2691..09396bd7b02d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -281,8 +281,11 @@ void ipoib_delete_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); + WARN_ONCE(!priv->mcg_dentry, "null mcg debug file\n"); + WARN_ONCE(!priv->path_dentry, "null path debug file\n"); debugfs_remove(priv->mcg_dentry); debugfs_remove(priv->path_dentry); + priv->mcg_dentry = priv->path_dentry = NULL; } int ipoib_register_debugfs(void) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index d1d3fb7a6127..b319cc26c9a7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -108,6 +108,33 @@ static struct ib_client ipoib_client = { .get_net_dev_by_params = ipoib_get_net_dev_by_params, }; +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +static int ipoib_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netdev_notifier_info *ni = ptr; + struct net_device *dev = ni->dev; + + if (dev->netdev_ops->ndo_open != ipoib_open) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_REGISTER: + ipoib_create_debug_files(dev); + break; + case NETDEV_CHANGENAME: + ipoib_delete_debug_files(dev); + ipoib_create_debug_files(dev); + break; + case NETDEV_UNREGISTER: + ipoib_delete_debug_files(dev); + break; + } + + return NOTIFY_DONE; +} +#endif + int ipoib_open(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -1674,8 +1701,6 @@ void ipoib_dev_cleanup(struct net_device *dev) ASSERT_RTNL(); - ipoib_delete_debug_files(dev); - /* Delete any child interfaces first */ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { /* Stop GC on child */ @@ -2090,8 +2115,6 @@ static struct net_device *ipoib_add_port(const char *format, goto register_failed; } - ipoib_create_debug_files(priv->dev); - if (ipoib_cm_add_mode_attr(priv->dev)) goto sysfs_failed; if (ipoib_add_pkey_attr(priv->dev)) @@ -2106,7 +2129,6 @@ static struct net_device *ipoib_add_port(const char *format, return priv->dev; sysfs_failed: - ipoib_delete_debug_files(priv->dev); unregister_netdev(priv->dev); register_failed: @@ -2191,6 +2213,12 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) kfree(dev_list); } +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +static struct notifier_block ipoib_netdev_notifier = { + .notifier_call = ipoib_netdev_event, +}; +#endif + static int __init ipoib_init_module(void) { int ret; @@ -2243,6 +2271,9 @@ static int __init ipoib_init_module(void) if (ret) goto err_client; +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG + register_netdevice_notifier(&ipoib_netdev_notifier); +#endif return 0; err_client: @@ -2260,6 +2291,9 @@ err_fs: static void __exit ipoib_cleanup_module(void) { +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG + unregister_netdevice_notifier(&ipoib_netdev_notifier); +#endif ipoib_netlink_fini(); ib_unregister_client(&ipoib_client); ib_sa_unregister_client(&ipoib_sa_client); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 3e10e3dac2e7..e543bc745f34 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -86,8 +86,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, goto register_failed; } - ipoib_create_debug_files(priv->dev); - /* RTNL childs don't need proprietary sysfs entries */ if (type == IPOIB_LEGACY_CHILD) { if (ipoib_cm_add_mode_attr(priv->dev)) @@ -108,7 +106,6 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, sysfs_failed: result = -ENOMEM; - ipoib_delete_debug_files(priv->dev); unregister_netdevice(priv->dev); register_failed: -- cgit v1.2.3 From ec8a142327f85ae9aa5ad784520e669a34839941 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 20 Mar 2017 17:24:39 -0700 Subject: IB/hfi1: Force logical link down If the logical link state does not read as down when the physical link state is offline, force it to down. Reviewed-by: Dennis Dalessandro Reviewed-by: Jakub Byczkowski Signed-off-by: Dean Luick Signed-off-by: Easwar Hariharan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 86 +++++++++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 121a4c920f1b..44322c645469 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1045,6 +1045,7 @@ static void dc_start(struct hfi1_devdata *); static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np); static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd); +static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms); /* * Error interrupt table entry. This is used as input to the interrupt @@ -8891,8 +8892,6 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message) */ static int do_quick_linkup(struct hfi1_devdata *dd) { - u64 reg; - unsigned long timeout; int ret; lcb_shutdown(dd, 0); @@ -8915,19 +8914,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd) write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT); - /* watch LCB_STS_LINK_TRANSFER_ACTIVE */ - timeout = jiffies + msecs_to_jiffies(10); - while (1) { - reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE); - if (reg) - break; - if (time_after(jiffies, timeout)) { - dd_dev_err(dd, - "timeout waiting for LINK_TRANSFER_ACTIVE\n"); - return -ETIMEDOUT; - } - udelay(2); - } + ret = wait_link_transfer_active(dd, 10); + if (ret) + return ret; write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT); @@ -10082,6 +10071,64 @@ static void check_lni_states(struct hfi1_pportdata *ppd) decode_state_complete(ppd, last_remote_state, "received"); } +/* wait for wait_ms for LINK_TRANSFER_ACTIVE to go to 1 */ +static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms) +{ + u64 reg; + unsigned long timeout; + + /* watch LCB_STS_LINK_TRANSFER_ACTIVE */ + timeout = jiffies + msecs_to_jiffies(wait_ms); + while (1) { + reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE); + if (reg) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(dd, + "timeout waiting for LINK_TRANSFER_ACTIVE\n"); + return -ETIMEDOUT; + } + udelay(2); + } + return 0; +} + +/* called when the logical link state is not down as it should be */ +static void force_logical_link_state_down(struct hfi1_pportdata *ppd) +{ + struct hfi1_devdata *dd = ppd->dd; + + /* + * Bring link up in LCB loopback + */ + write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1); + write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, + DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK); + + write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0); + write_csr(dd, DC_LCB_CFG_REINIT_AS_SLAVE, 0); + write_csr(dd, DC_LCB_CFG_CNT_FOR_SKIP_STALL, 0x110); + write_csr(dd, DC_LCB_CFG_LOOPBACK, 0x2); + + write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0); + (void)read_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET); + udelay(3); + write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1); + write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT); + + wait_link_transfer_active(dd, 100); + + /* + * Bring the link down again. + */ + write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1); + write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0); + write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0); + + /* call again to adjust ppd->statusp, if needed */ + get_logical_state(ppd); +} + /* * Helper for set_link_state(). Do not call except from that routine. * Expects ppd->hls_mutex to be held. @@ -10135,15 +10182,18 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) return ret; } - /* make sure the logical state is also down */ - wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000); - /* * Now in charge of LCB - must be after the physical state is * offline.quiet and before host_link_state is changed. */ set_host_lcb_access(dd); write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */ + + /* make sure the logical state is also down */ + ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000); + if (ret) + force_logical_link_state_down(ppd); + ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */ if (ppd->port_type == PORT_TYPE_QSFP && -- cgit v1.2.3 From 0519c520dc38e9950f4032d0f41f2b72b2c3c940 Mon Sep 17 00:00:00 2001 From: Michael J. Ruhl Date: Mon, 20 Mar 2017 17:24:45 -0700 Subject: IB/hfi1: Race hazard avoidance in user SDMA driver Set the errcode before the state and add the smb_wmb() to avoid a potential race condition with the user. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index e6811c4edc73..060e374ca37b 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1615,9 +1615,10 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, { hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d", pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret); - cq->comps[idx].status = state; if (state == ERROR) cq->comps[idx].errcode = -ret; + smp_wmb(); /* make sure errcode is visible first */ + cq->comps[idx].status = state; trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt, idx, state, ret); } -- cgit v1.2.3 From 8688426ba6464f7079649f52cf9108856c419415 Mon Sep 17 00:00:00 2001 From: Michael J. Ruhl Date: Mon, 20 Mar 2017 17:24:51 -0700 Subject: IB/hfi1: Cache registers during state change When the LCB is going offline, inopportune port queries can cause benign error messages to be logged. To deal with this, cache the registers just before setting the LCB to offline, allowing queries to return without eliciting the error. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 58 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 44322c645469..8b8840a676e4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8344,6 +8344,52 @@ static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data) return 0; } +/* + * Provide a cache for some of the LCB registers in case the LCB is + * unavailable. + * (The LCB is unavailable in certain link states, for example.) + */ +struct lcb_datum { + u32 off; + u64 val; +}; + +static struct lcb_datum lcb_cache[] = { + { DC_LCB_ERR_INFO_RX_REPLAY_CNT, 0}, + { DC_LCB_ERR_INFO_SEQ_CRC_CNT, 0 }, + { DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT, 0 }, +}; + +static void update_lcb_cache(struct hfi1_devdata *dd) +{ + int i; + int ret; + u64 val; + + for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) { + ret = read_lcb_csr(dd, lcb_cache[i].off, &val); + + /* Update if we get good data */ + if (likely(ret != -EBUSY)) + lcb_cache[i].val = val; + } +} + +static int read_lcb_cache(u32 off, u64 *val) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) { + if (lcb_cache[i].off == off) { + *val = lcb_cache[i].val; + return 0; + } + } + + pr_warn("%s bad offset 0x%x\n", __func__, off); + return -1; +} + /* * Read an LCB CSR. Access may not be in host control, so check. * Return 0 on success, -EBUSY on failure. @@ -8355,9 +8401,13 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data) /* if up, go through the 8051 for the value */ if (ppd->host_link_state & HLS_UP) return read_lcb_via_8051(dd, addr, data); - /* if going up or down, no access */ - if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) - return -EBUSY; + /* if going up or down, check the cache, otherwise, no access */ + if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) { + if (read_lcb_cache(addr, data)) + return -EBUSY; + return 0; + } + /* otherwise, host has access */ *data = read_csr(dd, addr); return 0; @@ -10145,6 +10195,8 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) int do_transition; int do_wait; + update_lcb_cache(dd); + previous_state = ppd->host_link_state; ppd->host_link_state = HLS_GOING_OFFLINE; pstate = read_physical_state(dd); -- cgit v1.2.3 From 5a52a7acf7e2a812d2852342992cee3dc22ad25d Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Mon, 20 Mar 2017 17:24:58 -0700 Subject: IB/hfi1: NULL pointer dereference when freeing rhashtable A NULL pointer dereference occurs when the driver is unloaded, and the SDMA rhashtable is freed if the rhashtable_init() function has not been called. Prevent this by changing sdma_rht to be a pointer to a dynamically allocated hash table. The NULL-ness of the pointer serves as an indication that the hash table was initialized and that it needs to be destroyed. Fixes: 0cb2aa690c7e ("IB/hfi1: Add sysfs interface for affinity setup") Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 2 +- drivers/infiniband/hw/hfi1/sdma.c | 38 +++++++++++++++++++++++++++----------- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 0808e3c3ba39..b69ab4736c86 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1167,7 +1167,7 @@ struct hfi1_devdata { bool eprom_available; /* true if EPROM is available for this device */ bool aspm_supported; /* Does HW support ASPM */ bool aspm_enabled; /* ASPM state: enabled/disabled */ - struct rhashtable sdma_rht; + struct rhashtable *sdma_rht; struct kobject kobj; }; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 5cde1ecda0fe..d89852b1f984 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -868,7 +868,7 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, cpu_id = smp_processor_id(); rcu_read_lock(); - rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id, + rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id, sdma_rht_params); if (rht_node && rht_node->map[vl]) { @@ -962,7 +962,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, continue; } - rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu, sdma_rht_params); if (!rht_node) { rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL); @@ -982,7 +982,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, rht_node->map[vl]->ctr = 1; rht_node->map[vl]->sde[0] = sde; - ret = rhashtable_insert_fast(&dd->sdma_rht, + ret = rhashtable_insert_fast(dd->sdma_rht, &rht_node->node, sdma_rht_params); if (ret) { @@ -1025,7 +1025,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, if (cpumask_test_cpu(cpu, mask)) continue; - rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu, sdma_rht_params); if (rht_node) { bool empty = true; @@ -1049,7 +1049,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, } if (empty) { - ret = rhashtable_remove_fast(&dd->sdma_rht, + ret = rhashtable_remove_fast(dd->sdma_rht, &rht_node->node, sdma_rht_params); WARN_ON(ret); @@ -1108,7 +1108,7 @@ void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct sdma_rht_node *rht_node; int i, j; - rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid, + rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid, sdma_rht_params); if (!rht_node) return; @@ -1322,6 +1322,12 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) synchronize_rcu(); kfree(dd->per_sdma); dd->per_sdma = NULL; + + if (dd->sdma_rht) { + rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL); + kfree(dd->sdma_rht); + dd->sdma_rht = NULL; + } } /** @@ -1341,12 +1347,14 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) { unsigned this_idx; struct sdma_engine *sde; + struct rhashtable *tmp_sdma_rht; u16 descq_cnt; void *curr_head; struct hfi1_pportdata *ppd = dd->pport + port; u32 per_sdma_credits; uint idle_cnt = sdma_idle_cnt; size_t num_engines = dd->chip_sdma_engines; + int ret = -ENOMEM; if (!HFI1_CAP_IS_KSET(SDMA)) { HFI1_CAP_CLEAR(SDMA_AHG); @@ -1378,7 +1386,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) /* alloc memory for array of send engines */ dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL); if (!dd->per_sdma) - return -ENOMEM; + return ret; idle_cnt = ns_to_cclock(dd, idle_cnt); if (!sdma_desct_intr) @@ -1507,18 +1515,27 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) dd->flags |= HFI1_HAS_SEND_DMA; dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0; dd->num_sdma = num_engines; - if (sdma_map_init(dd, port, ppd->vls_operational, NULL)) + ret = sdma_map_init(dd, port, ppd->vls_operational, NULL); + if (ret < 0) + goto bail; + + tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL); + if (!tmp_sdma_rht) { + ret = -ENOMEM; goto bail; + } - if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params)) + ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params); + if (ret < 0) goto bail; + dd->sdma_rht = tmp_sdma_rht; dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); return 0; bail: sdma_clean(dd, num_engines); - return -ENOMEM; + return ret; } /** @@ -1604,7 +1621,6 @@ void sdma_exit(struct hfi1_devdata *dd) sdma_finalput(&sde->state); } sdma_clean(dd, dd->num_sdma); - rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL); } /* -- cgit v1.2.3 From 43a474aadbd55252cea2036bac36e3ad159344b2 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 20 Mar 2017 17:25:04 -0700 Subject: IB/rdmavt, IB/hfi1, IB/qib: Make wc opcode translation driver dependent The work to create a completion helper moved the translation of send wqe operations to completion opcodes to rdmvat. This precludes having driver dependent operations. Make the translation driver dependent by doing the translation in the driver prior to the rvt_qp_swqe_complete() call using restored translation tables. Fixes: Commit f2dc9cdce83c ("IB/rdmavt: Add a send completion helper") Fixes: Commit 0771da5a6e9d ("IB/hfi1,IB/qib: Use new send completion helper") Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 10 ++++++++-- drivers/infiniband/hw/hfi1/ruc.c | 5 ++++- drivers/infiniband/hw/hfi1/verbs.c | 16 ++++++++++++++++ drivers/infiniband/hw/qib/qib_rc.c | 10 ++++++++-- drivers/infiniband/hw/qib/qib_ruc.c | 5 ++++- drivers/infiniband/hw/qib/qib_verbs.c | 13 +++++++++++++ drivers/infiniband/sw/rdmavt/qp.c | 17 ----------------- include/rdma/rdmavt_qp.h | 3 ++- 8 files changed, 55 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 7382be11afca..4649530ac4e8 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1034,7 +1034,10 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) /* see post_send() */ barrier(); rvt_put_swqe(wqe); - rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); + rvt_qp_swqe_complete(qp, + wqe, + ib_hfi1_wc_opcode[wqe->wr.opcode], + IB_WC_SUCCESS); } /* * If we were waiting for sends to complete before re-sending, @@ -1081,7 +1084,10 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, qp->s_last = s_last; /* see post_send() */ barrier(); - rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); + rvt_qp_swqe_complete(qp, + wqe, + ib_hfi1_wc_opcode[wqe->wr.opcode], + IB_WC_SUCCESS); } else { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index aa15bcbfb079..d2eb793b34af 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -920,7 +920,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->ibqp.qp_type == IB_QPT_GSI) atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - rvt_qp_swqe_complete(qp, wqe, status); + rvt_qp_swqe_complete(qp, + wqe, + ib_hfi1_wc_opcode[wqe->wr.opcode], + status); if (qp->s_acked == old_last) qp->s_acked = last; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 222315fadab1..815cb44b7693 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -296,6 +296,22 @@ static inline bool wss_exceeds_threshold(void) return atomic_read(&wss.total_count) >= wss.threshold; } +/* + * Translate ib_wr_opcode into ib_wc_opcode. + */ +const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { + [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, + [IB_WR_SEND] = IB_WC_SEND, + [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, + [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, + [IB_WR_SEND_WITH_INV] = IB_WC_SEND, + [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, + [IB_WR_REG_MR] = IB_WC_REG_MR +}; + /* * Length of header by opcode, 0 --> not supported */ diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 12658e3fe154..023498745b8a 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -938,7 +938,10 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) /* see post_send() */ barrier(); rvt_put_swqe(wqe); - rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); + rvt_qp_swqe_complete(qp, + wqe, + ib_qib_wc_opcode[wqe->wr.opcode], + IB_WC_SUCCESS); } /* * If we were waiting for sends to complete before resending, @@ -983,7 +986,10 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, qp->s_last = s_last; /* see post_send() */ barrier(); - rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); + rvt_qp_swqe_complete(qp, + wqe, + ib_qib_wc_opcode[wqe->wr.opcode], + IB_WC_SUCCESS); } else this_cpu_inc(*ibp->rvp.rc_delayed_comp); diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 17655cc3e6fe..6e1adf709483 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -769,7 +769,10 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->ibqp.qp_type == IB_QPT_GSI) atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - rvt_qp_swqe_complete(qp, wqe, status); + rvt_qp_swqe_complete(qp, + wqe, + ib_qib_wc_opcode[wqe->wr.opcode], + status); if (qp->s_acked == old_last) qp->s_acked = last; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 83f8b5f24381..e120efefb8d7 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -113,6 +113,19 @@ static unsigned int ib_qib_disable_sma; module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(disable_sma, "Disable the SMA"); +/* + * Translate ib_wr_opcode into ib_wc_opcode. + */ +const enum ib_wc_opcode ib_qib_wc_opcode[] = { + [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, + [IB_WR_SEND] = IB_WC_SEND, + [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, + [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD +}; + /* * System image GUID. */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index f5ad8d4bfb39..28fb7241ae6b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -117,23 +117,6 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { }; EXPORT_SYMBOL(ib_rvt_state_ops); -/* - * Translate ib_wr_opcode into ib_wc_opcode. - */ -const enum ib_wc_opcode ib_rvt_wc_opcode[] = { - [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [IB_WR_SEND] = IB_WC_SEND, - [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, - [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, - [IB_WR_SEND_WITH_INV] = IB_WC_SEND, - [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, - [IB_WR_REG_MR] = IB_WC_REG_MR -}; -EXPORT_SYMBOL(ib_rvt_wc_opcode); - static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f3816396c76a..3cdd9e26e96e 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -574,6 +574,7 @@ extern const enum ib_wc_opcode ib_rvt_wc_opcode[]; static inline void rvt_qp_swqe_complete( struct rvt_qp *qp, struct rvt_swqe *wqe, + enum ib_wc_opcode opcode, enum ib_wc_status status) { if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) @@ -586,7 +587,7 @@ static inline void rvt_qp_swqe_complete( memset(&wc, 0, sizeof(wc)); wc.wr_id = wqe->wr.wr_id; wc.status = status; - wc.opcode = ib_rvt_wc_opcode[wqe->wr.opcode]; + wc.opcode = opcode; wc.qp = &qp->ibqp; wc.byte_len = wqe->length; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, -- cgit v1.2.3 From 2a1b7c8b8e9d6f069e71537a54a670a174e2a8f3 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 20 Mar 2017 17:25:10 -0700 Subject: IB/rdmavt: Add additional fields to post send trace This fix is to get additional debugging information. The following fields are added: - wqe - qpt - num_sge - ssn - pid - send_flags These additional fields provide for more focused filtering and triggering. The patch also moves the trace to just before the wqe is posted to get the most accurate information and future proofs the code to trace all possible reserved opcodes. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 2 +- drivers/infiniband/sw/rdmavt/trace_tx.h | 34 ++++++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 28fb7241ae6b..3c55a8b420fa 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1772,11 +1772,11 @@ static int rvt_post_one_wr(struct rvt_qp *qp, 0); qp->s_next_psn = wqe->lpsn + 1; } - trace_rvt_post_one_wr(qp, wqe); if (unlikely(reserved_op)) rvt_qp_wqe_reserve(qp, wqe); else qp->s_avail--; + trace_rvt_post_one_wr(qp, wqe); smp_wmb(); /* see request builders */ qp->s_head = next; diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h index 0e03173662d8..a613a2223751 100644 --- a/drivers/infiniband/sw/rdmavt/trace_tx.h +++ b/drivers/infiniband/sw/rdmavt/trace_tx.h @@ -71,10 +71,20 @@ __print_symbolic(opcode, \ wr_opcode_name(RDMA_READ_WITH_INV), \ wr_opcode_name(LOCAL_INV), \ wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \ - wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD)) + wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD), \ + wr_opcode_name(RESERVED1), \ + wr_opcode_name(RESERVED2), \ + wr_opcode_name(RESERVED3), \ + wr_opcode_name(RESERVED4), \ + wr_opcode_name(RESERVED5), \ + wr_opcode_name(RESERVED6), \ + wr_opcode_name(RESERVED7), \ + wr_opcode_name(RESERVED8), \ + wr_opcode_name(RESERVED9), \ + wr_opcode_name(RESERVED10)) #define POS_PRN \ -"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u" +"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u" TRACE_EVENT( rvt_post_one_wr, @@ -83,7 +93,9 @@ TRACE_EVENT( TP_STRUCT__entry( RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) __field(u64, wr_id) + __field(struct rvt_swqe *, wqe) __field(u32, qpn) + __field(u32, qpt) __field(u32, psn) __field(u32, lpsn) __field(u32, length) @@ -92,11 +104,17 @@ TRACE_EVENT( __field(u32, avail) __field(u32, head) __field(u32, last) + __field(u32, ssn) + __field(int, send_flags) + __field(pid_t, pid) + __field(int, num_sge) ), TP_fast_assign( RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->wqe = wqe; __entry->wr_id = wqe->wr.wr_id; __entry->qpn = qp->ibqp.qp_num; + __entry->qpt = qp->ibqp.qp_type; __entry->psn = wqe->psn; __entry->lpsn = wqe->lpsn; __entry->length = wqe->length; @@ -105,20 +123,30 @@ TRACE_EVENT( __entry->avail = qp->s_avail; __entry->head = qp->s_head; __entry->last = qp->s_last; + __entry->pid = qp->pid; + __entry->ssn = wqe->ssn; + __entry->send_flags = wqe->wr.send_flags; + __entry->num_sge = wqe->wr.num_sge; ), TP_printk( POS_PRN, __get_str(dev), + __entry->wqe, __entry->wr_id, + __entry->send_flags, __entry->qpn, + __entry->qpt, __entry->psn, __entry->lpsn, + __entry->ssn, __entry->length, __entry->opcode, show_wr_opcode(__entry->opcode), __entry->size, __entry->avail, __entry->head, - __entry->last + __entry->last, + __entry->pid, + __entry->num_sge ) ); -- cgit v1.2.3 From c6ad9482fcb85178c44f0368c39f1324a78b8fc2 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 20 Mar 2017 17:25:16 -0700 Subject: IB/rdmavt: Add tracing for cq entry and poll The following fields are defined for filtering and triggering: - wr_id - status - opcode - qpn - length - idx Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/cq.c | 3 + drivers/infiniband/sw/rdmavt/trace.h | 1 + drivers/infiniband/sw/rdmavt/trace_cq.h | 127 ++++++++++++++++++++++++++++++++ 3 files changed, 131 insertions(+) create mode 100644 drivers/infiniband/sw/rdmavt/trace_cq.h diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 7aa7a4e312f1..0ae2ff8cf81e 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -50,6 +50,7 @@ #include #include "cq.h" #include "vt.h" +#include "trace.h" /** * rvt_cq_enter - add a new entry to the completion queue @@ -93,6 +94,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) } return; } + trace_rvt_cq_enter(cq, entry, head); if (cq->ip) { wc->uqueue[head].wr_id = entry->wr_id; wc->uqueue[head].status = entry->status; @@ -482,6 +484,7 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) if (tail == wc->head) break; /* The kernel doesn't need a RMB since it has the lock. */ + trace_rvt_cq_poll(cq, &wc->kqueue[tail], npolled); *entry = wc->kqueue[tail]; if (tail >= cq->ibcq.cqe) tail = 0; diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index e2d23acb6a7d..89554c0e168c 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -52,3 +52,4 @@ #include "trace_qp.h" #include "trace_tx.h" #include "trace_mr.h" +#include "trace_cq.h" diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h b/drivers/infiniband/sw/rdmavt/trace_cq.h new file mode 100644 index 000000000000..a315850aa9bb --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_cq.h @@ -0,0 +1,127 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_CQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_CQ_H + +#include +#include + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_cq + +#define wc_opcode_name(opcode) { IB_WC_##opcode, #opcode } +#define show_wc_opcode(opcode) \ +__print_symbolic(opcode, \ + wc_opcode_name(SEND), \ + wc_opcode_name(RDMA_WRITE), \ + wc_opcode_name(RDMA_READ), \ + wc_opcode_name(COMP_SWAP), \ + wc_opcode_name(FETCH_ADD), \ + wc_opcode_name(LSO), \ + wc_opcode_name(LOCAL_INV), \ + wc_opcode_name(REG_MR), \ + wc_opcode_name(MASKED_COMP_SWAP), \ + wc_opcode_name(RECV), \ + wc_opcode_name(RECV_RDMA_WITH_IMM)) + +#define CQ_PRN \ +"[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x" + +DECLARE_EVENT_CLASS( + rvt_cq_entry_template, + TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx), + TP_ARGS(cq, wc, idx), + TP_STRUCT__entry( + RDI_DEV_ENTRY(cq->rdi) + __field(u64, wr_id) + __field(u32, status) + __field(u32, opcode) + __field(u32, qpn) + __field(u32, length) + __field(u32, idx) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(cq->rdi) + __entry->wr_id = wc->wr_id; + __entry->status = wc->status; + __entry->opcode = wc->opcode; + __entry->length = wc->byte_len; + __entry->qpn = wc->qp->qp_num; + __entry->idx = idx; + ), + TP_printk( + CQ_PRN, + __get_str(dev), + __entry->idx, + __entry->wr_id, + __entry->status, + __entry->opcode, show_wc_opcode(__entry->opcode), + __entry->length, + __entry->qpn + ) +); + +DEFINE_EVENT( + rvt_cq_entry_template, rvt_cq_enter, + TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx), + TP_ARGS(cq, wc, idx)); + +DEFINE_EVENT( + rvt_cq_entry_template, rvt_cq_poll, + TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx), + TP_ARGS(cq, wc, idx)); + +#endif /* __RVT_TRACE_CQ_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_cq +#include -- cgit v1.2.3 From 9260b3541f3a9159b6f49270077d8669870d5db0 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 20 Mar 2017 17:25:23 -0700 Subject: IB/rdmavt: Add swqe completion trace The following fields are available for filter/trace: - wqe - wr_id - qpn - qpt - length - idx - ssn - (wr)opcode - (wr)send_flags Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 2 ++ drivers/infiniband/hw/hfi1/ruc.c | 2 ++ drivers/infiniband/hw/hfi1/trace_tx.h | 43 +++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 4649530ac4e8..0e5657803a54 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1028,6 +1028,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; s_last = qp->s_last; + trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) s_last = 0; qp->s_last = s_last; @@ -1079,6 +1080,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, rvt_put_swqe(wqe); s_last = qp->s_last; + trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) s_last = 0; qp->s_last = s_last; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index d2eb793b34af..eeb650dde776 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -909,8 +909,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, last = qp->s_last; old_last = last; + trace_hfi1_qp_send_completion(qp, wqe, last); if (++last >= qp->s_size) last = 0; + trace_hfi1_qp_send_completion(qp, wqe, last); qp->s_last = last; /* See post_send() */ barrier(); diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index 415d6be42c5d..2c9ac57657d3 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -633,6 +633,49 @@ DEFINE_EVENT(hfi1_bct_template, bct_get, TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), TP_ARGS(dd, bc)); +TRACE_EVENT( + hfi1_qp_send_completion, + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx), + TP_ARGS(qp, wqe, idx), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(struct rvt_swqe *, wqe) + __field(u64, wr_id) + __field(u32, qpn) + __field(u32, qpt) + __field(u32, length) + __field(u32, idx) + __field(u32, ssn) + __field(enum ib_wr_opcode, opcode) + __field(int, send_flags) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->wqe = wqe; + __entry->wr_id = wqe->wr.wr_id; + __entry->qpn = qp->ibqp.qp_num; + __entry->qpt = qp->ibqp.qp_type; + __entry->length = wqe->length; + __entry->idx = idx; + __entry->ssn = wqe->ssn; + __entry->opcode = wqe->wr.opcode; + __entry->send_flags = wqe->wr.send_flags; + ), + TP_printk( + "[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x", + __get_str(dev), + __entry->qpn, + __entry->qpt, + __entry->wqe, + __entry->idx, + __entry->wr_id, + __entry->length, + __entry->ssn, + __entry->opcode, + __entry->send_flags + ) +); + #endif /* __HFI1_TRACE_TX_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 5d6f08afdd394c0b162cefe383093aace3e94e39 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 20 Mar 2017 17:25:29 -0700 Subject: IB/hfi1: Check device id early during init If there is a wrong device passed to the driver it should fail early, without trying to initialize the device only to find out that it has an invalid device later during the init. Reviewed-by: Ira Weiny Signed-off-by: Tadeusz Struk Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index f40864e9a3b2..9bfb8ebe28b1 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1425,6 +1425,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* First, lock the non-writable module parameters */ HFI1_CAP_LOCK(); + /* Validate dev ids */ + if (!(ent->device == PCI_DEVICE_ID_INTEL0 || + ent->device == PCI_DEVICE_ID_INTEL1)) { + hfi1_early_err(&pdev->dev, + "Failing on unknown Intel deviceid 0x%x\n", + ent->device); + ret = -ENODEV; + goto bail; + } + /* Validate some global module parameters */ ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt); if (ret) @@ -1470,15 +1480,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto bail; - if (!(ent->device == PCI_DEVICE_ID_INTEL0 || - ent->device == PCI_DEVICE_ID_INTEL1)) { - hfi1_early_err(&pdev->dev, - "Failing on unknown Intel deviceid 0x%x\n", - ent->device); - ret = -ENODEV; - goto clean_bail; - } - /* * Do device-specific initialization, function table setup, dd * allocation, etc. -- cgit v1.2.3 From 62eed66e98b4c2286fef2ce5911d8d75b7515f7b Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 20 Mar 2017 17:25:35 -0700 Subject: IB/hfi1: Protect the global dev_cntr_names and port_cntr_names Protect the global dev_cntr_names and port_cntr_names with the global mutex as they are allocated and freed in a function called per device. Otherwise there is a danger of double free and memory leaks. Reviewed-by: Dennis Dalessandro Reviewed-by: Easwar Hariharan Signed-off-by: Tadeusz Struk Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/verbs.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 815cb44b7693..8d716547da9d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1540,6 +1540,7 @@ static const char * const driver_cntr_names[] = { "DRIVER_EgrHdrFull" }; +static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */ static const char **dev_cntr_names; static const char **port_cntr_names; static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); @@ -1594,6 +1595,7 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, { int i, err; + mutex_lock(&cntr_names_lock); if (!cntr_names_initialized) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); @@ -1602,8 +1604,10 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, num_driver_cntrs, &num_dev_cntrs, &dev_cntr_names); - if (err) + if (err) { + mutex_unlock(&cntr_names_lock); return NULL; + } for (i = 0; i < num_driver_cntrs; i++) dev_cntr_names[num_dev_cntrs + i] = @@ -1617,10 +1621,12 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, if (err) { kfree(dev_cntr_names); dev_cntr_names = NULL; + mutex_unlock(&cntr_names_lock); return NULL; } cntr_names_initialized = 1; } + mutex_unlock(&cntr_names_lock); if (!port_num) return rdma_alloc_hw_stats_struct( @@ -1839,9 +1845,13 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) del_timer_sync(&dev->mem_timer); verbs_txreq_exit(dev); + mutex_lock(&cntr_names_lock); kfree(dev_cntr_names); kfree(port_cntr_names); + dev_cntr_names = NULL; + port_cntr_names = NULL; cntr_names_initialized = 0; + mutex_unlock(&cntr_names_lock); } void hfi1_cnp_rcv(struct hfi1_packet *packet) -- cgit v1.2.3 From fb897ad315643e5dc1092a115b3cec914b66df9d Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Mon, 20 Mar 2017 17:25:42 -0700 Subject: IB/hfi1: Check for QSFP presence before attempting reads Attempting to read the status of a QSFP cable creates noise in the logs and misses out on setting an appropriate Offline/Disabled Reason if the cable is not plugged in. Check for this prior to attempting the read and attendant retries. Fixes: 673b975f1fba ("IB/hfi1: Add QSFP sanity pre-check") Reviewed-by: Dennis Dalessandro Signed-off-by: Easwar Hariharan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 8b8840a676e4..f9d0d8c09785 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9533,8 +9533,11 @@ static int test_qsfp_read(struct hfi1_pportdata *ppd) int ret; u8 status; - /* report success if not a QSFP */ - if (ppd->port_type != PORT_TYPE_QSFP) + /* + * Report success if not a QSFP or, if it is a QSFP, but the cable is + * not present + */ + if (ppd->port_type != PORT_TYPE_QSFP || !qsfp_mod_present(ppd)) return 0; /* read byte 2, the status byte */ -- cgit v1.2.3 From 5e6e94244bba1eb5be3c5ac9ceb3af87280b56d1 Mon Sep 17 00:00:00 2001 From: Michael J. Ruhl Date: Mon, 20 Mar 2017 17:25:48 -0700 Subject: IB/hfi1: Add a patch value to the firmware version string The HFI firmware now includes a patch level in its version. Updating the necessary code to include the patch version in the firmware string. Reviewed-by: Easwar Hariharan Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 23 +++++++++++++++-------- drivers/infiniband/hw/hfi1/chip.h | 18 +++++++++++------- drivers/infiniband/hw/hfi1/firmware.c | 14 ++++++++------ drivers/infiniband/hw/hfi1/hfi.h | 9 +++++---- drivers/infiniband/hw/hfi1/verbs.c | 14 ++++++++------ 5 files changed, 47 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index f9d0d8c09785..77f4b41de2b0 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -7166,7 +7166,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width, * set the max_rate field in handle_verify_cap until v0.19. */ if ((dd->icode == ICODE_RTL_SILICON) && - (dd->dc8051_ver < dc8051_ver(0, 19))) { + (dd->dc8051_ver < dc8051_ver(0, 19, 0))) { /* max_rate: 0 = 12.5G, 1 = 25G */ switch (max_rate) { case 0: @@ -7351,7 +7351,7 @@ void handle_verify_cap(struct work_struct *work) } ppd->link_speed_active = 0; /* invalid value */ - if (dd->dc8051_ver < dc8051_ver(0, 20)) { + if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) { /* remote_tx_rate: 0 = 12.5G, 1 = 25G */ switch (remote_tx_rate) { case 0: @@ -8422,7 +8422,7 @@ static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data) int ret; if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || - (dd->dc8051_ver < dc8051_ver(0, 20))) { + (dd->dc8051_ver < dc8051_ver(0, 20, 0))) { if (acquire_lcb_access(dd, 0) == 0) { write_csr(dd, addr, data); release_lcb_access(dd, 0); @@ -8728,13 +8728,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, & REMOTE_DEVICE_REV_MASK; } -void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b) +void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, + u8 *ver_patch) { u32 frame; read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame); - *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK; - *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK; + *ver_major = (frame >> STS_FM_VERSION_MAJOR_SHIFT) & + STS_FM_VERSION_MAJOR_MASK; + *ver_minor = (frame >> STS_FM_VERSION_MINOR_SHIFT) & + STS_FM_VERSION_MINOR_MASK; + + read_8051_config(dd, VERSION_PATCH, GENERAL_CONFIG, &frame); + *ver_patch = (frame >> STS_FM_VERSION_PATCH_SHIFT) & + STS_FM_VERSION_PATCH_MASK; } static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management, @@ -9130,7 +9137,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) if (ret) goto set_local_link_attributes_fail; - if (dd->dc8051_ver < dc8051_ver(0, 20)) { + if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) { /* set the tx rate to the fastest enabled */ if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G) ppd->local_tx_rate = 1; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 043fd21dc5f3..24df45fc8722 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1,7 +1,7 @@ #ifndef _CHIP_H #define _CHIP_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -394,7 +394,8 @@ #define LAST_REMOTE_STATE_COMPLETE 0x13 #define LINK_QUALITY_INFO 0x14 #define REMOTE_DEVICE_ID 0x15 -#define LINK_DOWN_REASON 0x16 +#define LINK_DOWN_REASON 0x16 /* first byte of offset 0x16 */ +#define VERSION_PATCH 0x16 /* last byte of offset 0x16 */ /* 8051 lane specific register field IDs */ #define TX_EQ_SETTINGS 0x00 @@ -524,10 +525,12 @@ enum { #define SUPPORTED_CRCS (CAP_CRC_14B | CAP_CRC_48B) /* misc status version fields */ -#define STS_FM_VERSION_A_SHIFT 16 -#define STS_FM_VERSION_A_MASK 0xff -#define STS_FM_VERSION_B_SHIFT 24 -#define STS_FM_VERSION_B_MASK 0xff +#define STS_FM_VERSION_MINOR_SHIFT 16 +#define STS_FM_VERSION_MINOR_MASK 0xff +#define STS_FM_VERSION_MAJOR_SHIFT 24 +#define STS_FM_VERSION_MAJOR_MASK 0xff +#define STS_FM_VERSION_PATCH_SHIFT 24 +#define STS_FM_VERSION_PATCH_MASK 0xff /* LCB_CFG_CRC_MODE TX_VAL and RX_VAL CRC mode values */ #define LCB_CRC_16B 0x0 /* 16b CRC */ @@ -698,7 +701,8 @@ void fabric_serdes_reset(struct hfi1_devdata *dd); int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result); /* chip.c */ -void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b); +void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, + u8 *ver_patch); void read_guid(struct hfi1_devdata *dd); int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout); void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 0dd50cdb039a..4042c11b2742 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1004,7 +1004,9 @@ static int load_8051_firmware(struct hfi1_devdata *dd, { u64 reg; int ret; - u8 ver_a, ver_b; + u8 ver_major; + u8 ver_minor; + u8 ver_patch; /* * DC Reset sequence @@ -1073,10 +1075,10 @@ static int load_8051_firmware(struct hfi1_devdata *dd, return -ETIMEDOUT; } - read_misc_status(dd, &ver_a, &ver_b); - dd_dev_info(dd, "8051 firmware version %d.%d\n", - (int)ver_b, (int)ver_a); - dd->dc8051_ver = dc8051_ver(ver_b, ver_a); + read_misc_status(dd, &ver_major, &ver_minor, &ver_patch); + dd_dev_info(dd, "8051 firmware version %d.%d.%d\n", + (int)ver_major, (int)ver_minor, (int)ver_patch); + dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b69ab4736c86..a31638cc30ff 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1020,7 +1020,7 @@ struct hfi1_devdata { u8 qos_shift; u16 irev; /* implementation revision */ - u16 dc8051_ver; /* 8051 firmware version */ + u32 dc8051_ver; /* 8051 firmware version */ spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ struct platform_config platform_config; @@ -1173,9 +1173,10 @@ struct hfi1_devdata { }; /* 8051 firmware version helper */ -#define dc8051_ver(a, b) ((a) << 8 | (b)) -#define dc8051_ver_maj(a) ((a & 0xff00) >> 8) -#define dc8051_ver_min(a) (a & 0x00ff) +#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c)) +#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16) +#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8) +#define dc8051_ver_patch(a) ((a) & 0x0000ff) /* f_put_tid types */ #define PT_EXPECTED 0 diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 8d716547da9d..928918cc7d80 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1236,12 +1236,14 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) static void hfi1_fill_device_attr(struct hfi1_devdata *dd) { struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; - u16 ver = dd->dc8051_ver; + u32 ver = dd->dc8051_ver; memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); - rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) | - (u64)dc8051_ver_min(ver); + rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) | + ((u64)(dc8051_ver_min(ver)) << 16) | + (u64)dc8051_ver_patch(ver); + rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | @@ -1520,10 +1522,10 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); struct hfi1_ibdev *dev = dev_from_rdi(rdi); - u16 ver = dd_from_dev(dev)->dc8051_ver; + u32 ver = dd_from_dev(dev)->dc8051_ver; - snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver), - dc8051_ver_min(ver)); + snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver), + dc8051_ver_min(ver), dc8051_ver_patch(ver)); } static const char * const driver_cntr_names[] = { -- cgit v1.2.3 From 5f14e4e6674002f377ba164114eb9a67ba23d864 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Mon, 20 Mar 2017 17:25:55 -0700 Subject: IB/rdmavt, IB/hfi1: Fix timer migration regressions RC timeout counter isn't getting incremented. Increment counter and add the trace for it. Fixes: 87c23b4ab018 ("IB/rdmavt: Adding timer logic to rdmavt") Reviewed-by: Brian Welty Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/trace_rc.h | 7 +- drivers/infiniband/sw/rdmavt/qp.c | 6 +- drivers/infiniband/sw/rdmavt/trace.h | 3 +- drivers/infiniband/sw/rdmavt/trace_rc.h | 109 ++++++++++++++++++++++++++++++++ 4 files changed, 117 insertions(+), 8 deletions(-) create mode 100644 drivers/infiniband/sw/rdmavt/trace_rc.h diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h index 5ea5005f9f41..8ce476570462 100644 --- a/drivers/infiniband/hw/hfi1/trace_rc.h +++ b/drivers/infiniband/hw/hfi1/trace_rc.h @@ -1,5 +1,5 @@ /* -* Copyright(c) 2015, 2016 Intel Corporation. +* Copyright(c) 2015, 2016, 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -104,11 +104,6 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_ack, TP_ARGS(qp, psn) ); -DEFINE_EVENT(hfi1_rc_template, hfi1_timeout, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn) -); - DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error, TP_PROTO(struct rvt_qp *qp, u32 psn), TP_ARGS(qp, psn) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 3c55a8b420fa..d7dabdfaab8d 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016, 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -2052,8 +2052,12 @@ static void rvt_rc_timeout(unsigned long arg) spin_lock_irqsave(&qp->r_lock, flags); spin_lock(&qp->s_lock); if (qp->s_flags & RVT_S_TIMER) { + struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; + qp->s_flags &= ~RVT_S_TIMER; + rvp->n_rc_timeouts++; del_timer(&qp->s_timer); + trace_rvt_rc_timeout(qp, qp->s_last_psn + 1); if (rdi->driver_f.notify_restart_rc) rdi->driver_f.notify_restart_rc(qp, qp->s_last_psn + 1, diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 89554c0e168c..bb4b1e710f22 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016, 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -53,3 +53,4 @@ #include "trace_tx.h" #include "trace_mr.h" #include "trace_cq.h" +#include "trace_rc.h" diff --git a/drivers/infiniband/sw/rdmavt/trace_rc.h b/drivers/infiniband/sw/rdmavt/trace_rc.h new file mode 100644 index 000000000000..995276933a55 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_rc.h @@ -0,0 +1,109 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_RC_H + +#include +#include + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_rc + +DECLARE_EVENT_CLASS(rvt_rc_template, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, s_flags) + __field(u32, psn) + __field(u32, s_psn) + __field(u32, s_next_psn) + __field(u32, s_sending_psn) + __field(u32, s_sending_hpsn) + __field(u32, r_psn) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->s_flags = qp->s_flags; + __entry->psn = psn; + __entry->s_psn = qp->s_psn; + __entry->s_next_psn = qp->s_next_psn; + __entry->s_sending_psn = qp->s_sending_psn; + __entry->s_sending_hpsn = qp->s_sending_hpsn; + __entry->r_psn = qp->r_psn; + ), + TP_printk( + "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x", + __get_str(dev), + __entry->qpn, + __entry->s_flags, + __entry->psn, + __entry->s_psn, + __entry->s_next_psn, + __entry->s_sending_psn, + __entry->s_sending_hpsn, + __entry->r_psn + ) +); + +DEFINE_EVENT(rvt_rc_template, rvt_rc_timeout, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + +#endif /* __RVT_TRACE_RC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_rc +#include -- cgit v1.2.3 From 44dcfa4b182dffed0e1e6535220fcdd12620b9ec Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 20 Mar 2017 17:26:01 -0700 Subject: IB/rdmavt: Avoid reseting wqe send_flags in unreserve The wqe should be read only and in fact the superfluous reset of the RVT_SEND_RESERVE_USED flag causes an issue where reserved operations elicit a bad completion to the ULP. The maintenance of the flag is now entirely within rvt_post_one_wr() where a reserved operation will set the flag and a non-reserved operation will insure the operation that is about to be posted has the flag reset. Fixes: Commit 856cc4c237ad ("IB/hfi1: Add the capability for reserved operations") Reviewed-by: Don Hiatt Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 7 +++++-- include/rdma/rdmavt_qp.h | 4 +--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index d7dabdfaab8d..728f5f1218c8 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1772,10 +1772,13 @@ static int rvt_post_one_wr(struct rvt_qp *qp, 0); qp->s_next_psn = wqe->lpsn + 1; } - if (unlikely(reserved_op)) + if (unlikely(reserved_op)) { + wqe->wr.send_flags |= RVT_SEND_RESERVE_USED; rvt_qp_wqe_reserve(qp, wqe); - else + } else { + wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED; qp->s_avail--; + } trace_rvt_post_one_wr(qp, wqe); smp_wmb(); /* see request builders */ qp->s_head = next; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 3cdd9e26e96e..e3bb312a2ffa 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCQP_H /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016, 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -526,7 +526,6 @@ static inline void rvt_qp_wqe_reserve( struct rvt_qp *qp, struct rvt_swqe *wqe) { - wqe->wr.send_flags |= RVT_SEND_RESERVE_USED; atomic_inc(&qp->s_reserved_used); } @@ -550,7 +549,6 @@ static inline void rvt_qp_wqe_unreserve( struct rvt_swqe *wqe) { if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) { - wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED; atomic_dec(&qp->s_reserved_used); /* insure no compiler re-order up to s_last change */ smp_mb__after_atomic(); -- cgit v1.2.3 From f7b42633720deb5ca8f4bcb175c7dc2933057e7f Mon Sep 17 00:00:00 2001 From: Michael J. Ruhl Date: Mon, 20 Mar 2017 17:26:07 -0700 Subject: IB/hfi1: Ensure VL index is within bounds Improve the safety of the code and ensure the array cannot be indexed out of bounds when picking the CPU for a given SDMA engine. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sdma.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index d89852b1f984..bfd0d5187e9b 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -962,6 +962,11 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, continue; } + if (vl >= ARRAY_SIZE(rht_node->map)) { + ret = -EINVAL; + goto out; + } + rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu, sdma_rht_params); if (!rht_node) { -- cgit v1.2.3 From 0181ce31b26021f7c2f9506112a05c847a36f0f2 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Mon, 20 Mar 2017 17:26:14 -0700 Subject: IB/hfi1: Add receive fault injection feature Add fault injection capability: - Drop packets unconditionally (fault_by_packet) - Drop packets based on opcode (fault_by_opcode) This feature reacts to the global FAULT_INJECTION config flag. The faulting traces have been added: - misc/fault_opcode - misc/fault_packet See 'Documentation/fault-injection/fault-injection.txt' for details. Examples: - Dropping packets by opcode: /sys/kernel/debug/hfi1/hfi1_X/fault_opcode # Enable fault echo Y > fault_by_opcode # Setprobability of dropping (0-100%) # echo 25 > probability # Set opcode echo 0x64 > opcode # Number of times to fault echo 3 > times # An optional mask allows you to fault # a range of opcodes echo 0xf0 > mask /sys/kernel/debug/hfi1/hfi1_X/fault_stats contains a value in parentheses to indicate number of each opcode dropped. - Dropping packets unconditionally /sys/kernel/debug/hfi1/hfi1_X/fault_packet # Enable fault echo Y > fault_by_packet /sys/kernel/debug/hfi1/hfi1_X/fault_packet/fault_stats contains the number of packets dropped. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/debugfs.c | 222 ++++++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/debugfs.h | 51 +++++++- drivers/infiniband/hw/hfi1/driver.c | 8 ++ drivers/infiniband/hw/hfi1/trace_misc.h | 48 +++++++ drivers/infiniband/hw/hfi1/verbs.c | 6 + drivers/infiniband/hw/hfi1/verbs.h | 4 + 6 files changed, 336 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 7fe9dd885746..cac6d5256f40 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -51,8 +51,12 @@ #include #include #include +#include +#include +#include #include "hfi.h" +#include "trace.h" #include "debugfs.h" #include "device.h" #include "qp.h" @@ -1063,6 +1067,217 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list); DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list) DEBUGFS_FILE_OPS(sdma_cpu_list); +#ifdef CONFIG_FAULT_INJECTION +static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct hfi1_opcode_stats_perctx *opstats; + + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct hfi1_opcode_stats_perctx *opstats; + + ++*pos; + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void _fault_stats_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _fault_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos, j; + u64 n_packets = 0, n_bytes = 0; + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + + for (j = 0; j < dd->first_user_ctxt; j++) { + if (!dd->rcd[j]) + continue; + n_packets += dd->rcd[j]->opstats->stats[i].n_packets; + n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes; + } + if (!n_packets && !n_bytes) + return SEQ_SKIP; + if (!ibd->fault_opcode->n_rxfaults[i] && + !ibd->fault_opcode->n_txfaults[i]) + return SEQ_SKIP; + seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i, + (unsigned long long)n_packets, + (unsigned long long)n_bytes, + (unsigned long long)ibd->fault_opcode->n_rxfaults[i], + (unsigned long long)ibd->fault_opcode->n_txfaults[i]); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(fault_stats); +DEBUGFS_SEQ_FILE_OPEN(fault_stats); +DEBUGFS_FILE_OPS(fault_stats); + +static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd) +{ + debugfs_remove_recursive(ibd->fault_opcode->dir); + kfree(ibd->fault_opcode); + ibd->fault_opcode = NULL; +} + +static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd) +{ + struct dentry *parent = ibd->hfi1_ibdev_dbg; + + ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL); + if (!ibd->fault_opcode) + return -ENOMEM; + + ibd->fault_opcode->attr.interval = 1; + ibd->fault_opcode->attr.require_end = ULONG_MAX; + ibd->fault_opcode->attr.stacktrace_depth = 32; + ibd->fault_opcode->attr.dname = NULL; + ibd->fault_opcode->attr.verbose = 0; + ibd->fault_opcode->fault_by_opcode = false; + ibd->fault_opcode->opcode = 0; + ibd->fault_opcode->mask = 0xff; + + ibd->fault_opcode->dir = + fault_create_debugfs_attr("fault_opcode", + parent, + &ibd->fault_opcode->attr); + if (IS_ERR(ibd->fault_opcode->dir)) { + kfree(ibd->fault_opcode); + return -ENOENT; + } + + DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd); + if (!debugfs_create_bool("fault_by_opcode", 0600, + ibd->fault_opcode->dir, + &ibd->fault_opcode->fault_by_opcode)) + goto fail; + if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir, + &ibd->fault_opcode->opcode)) + goto fail; + if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir, + &ibd->fault_opcode->mask)) + goto fail; + + return 0; +fail: + fault_exit_opcode_debugfs(ibd); + return -ENOMEM; +} + +static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd) +{ + debugfs_remove_recursive(ibd->fault_packet->dir); + kfree(ibd->fault_packet); + ibd->fault_packet = NULL; +} + +static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd) +{ + struct dentry *parent = ibd->hfi1_ibdev_dbg; + + ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL); + if (!ibd->fault_packet) + return -ENOMEM; + + ibd->fault_packet->attr.interval = 1; + ibd->fault_packet->attr.require_end = ULONG_MAX; + ibd->fault_packet->attr.stacktrace_depth = 32; + ibd->fault_packet->attr.dname = NULL; + ibd->fault_packet->attr.verbose = 0; + ibd->fault_packet->fault_by_packet = false; + + ibd->fault_packet->dir = + fault_create_debugfs_attr("fault_packet", + parent, + &ibd->fault_opcode->attr); + if (IS_ERR(ibd->fault_packet->dir)) { + kfree(ibd->fault_packet); + return -ENOENT; + } + + if (!debugfs_create_bool("fault_by_packet", 0600, + ibd->fault_packet->dir, + &ibd->fault_packet->fault_by_packet)) + goto fail; + if (!debugfs_create_u64("fault_stats", 0400, + ibd->fault_packet->dir, + &ibd->fault_packet->n_faults)) + goto fail; + + return 0; +fail: + fault_exit_packet_debugfs(ibd); + return -ENOMEM; +} + +static void fault_exit_debugfs(struct hfi1_ibdev *ibd) +{ + fault_exit_opcode_debugfs(ibd); + fault_exit_packet_debugfs(ibd); +} + +static int fault_init_debugfs(struct hfi1_ibdev *ibd) +{ + int ret = 0; + + ret = fault_init_opcode_debugfs(ibd); + if (ret) + return ret; + + ret = fault_init_packet_debugfs(ibd); + if (ret) + fault_exit_opcode_debugfs(ibd); + + return ret; +} + +bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx) +{ + bool ret = false; + struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device); + + if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode) + return false; + if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask)) + return false; + ret = should_fail(&ibd->fault_opcode->attr, 1); + if (ret) { + trace_hfi1_fault_opcode(qp, opcode); + if (rx) + ibd->fault_opcode->n_rxfaults[opcode]++; + else + ibd->fault_opcode->n_txfaults[opcode]++; + } + return ret; +} + +bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) +{ + struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi; + struct hfi1_ibdev *ibd = dev_from_rdi(rdi); + bool ret = false; + + if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet) + return false; + + ret = should_fail(&ibd->fault_packet->attr, 1); + if (ret) { + ++ibd->fault_packet->n_faults; + trace_hfi1_fault_packet(packet); + } + return ret; +} +#endif + void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) { char name[sizeof("port0counters") + 1]; @@ -1112,12 +1327,19 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) !port_cntr_ops[i].ops.write ? S_IRUGO : S_IRUGO | S_IWUSR); } + +#ifdef CONFIG_FAULT_INJECTION + fault_init_debugfs(ibd); +#endif } void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) { if (!hfi1_dbg_root) goto out; +#ifdef CONFIG_FAULT_INJECTION + fault_exit_debugfs(ibd); +#endif debugfs_remove(ibd->hfi1_ibdev_link); debugfs_remove_recursive(ibd->hfi1_ibdev_dbg); out: diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h index b6fb6814f1b8..70be5ca14736 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.h +++ b/drivers/infiniband/hw/hfi1/debugfs.h @@ -53,23 +53,68 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd); void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd); void hfi1_dbg_init(void); void hfi1_dbg_exit(void); + +#ifdef CONFIG_FAULT_INJECTION +#include +struct fault_opcode { + struct fault_attr attr; + struct dentry *dir; + bool fault_by_opcode; + u64 n_rxfaults[256]; + u64 n_txfaults[256]; + u8 opcode; + u8 mask; +}; + +struct fault_packet { + struct fault_attr attr; + struct dentry *dir; + bool fault_by_packet; + u64 n_faults; +}; + +bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx); +bool hfi1_dbg_fault_packet(struct hfi1_packet *packet); +#else +static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) +{ + return false; +} + +static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, + u32 opcode, bool rx) +{ + return false; +} +#endif + #else static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) { } -void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) +static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) +{ +} + +static inline void hfi1_dbg_init(void) { } -void hfi1_dbg_init(void) +static inline void hfi1_dbg_exit(void) { } -void hfi1_dbg_exit(void) +static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) { + return false; } +static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, + u32 opcode, bool rx) +{ + return false; +} #endif #endif /* _HFI1_DEBUGFS_H */ diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 3881c951f6af..c0b012f6e11c 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -59,6 +59,7 @@ #include "trace.h" #include "qp.h" #include "sdma.h" +#include "debugfs.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -1354,6 +1355,9 @@ void handle_eflags(struct hfi1_packet *packet) */ int process_receive_ib(struct hfi1_packet *packet) { + if (unlikely(hfi1_dbg_fault_packet(packet))) + return RHF_RCV_CONTINUE; + trace_hfi1_rcvhdr(packet->rcd->ppd->dd, packet->rcd->ctxt, rhf_err_flags(packet->rhf), @@ -1409,6 +1413,8 @@ int process_receive_error(struct hfi1_packet *packet) int kdeth_process_expected(struct hfi1_packet *packet) { + if (unlikely(hfi1_dbg_fault_packet(packet))) + return RHF_RCV_CONTINUE; if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); @@ -1421,6 +1427,8 @@ int kdeth_process_eager(struct hfi1_packet *packet) { if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); + if (unlikely(hfi1_dbg_fault_packet(packet))) + return RHF_RCV_CONTINUE; dd_dev_err(packet->rcd->dd, "Unhandled eager packet received. Dropping.\n"); diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h index d308454af7fd..deac77ddaeab 100644 --- a/drivers/infiniband/hw/hfi1/trace_misc.h +++ b/drivers/infiniband/hw/hfi1/trace_misc.h @@ -72,6 +72,54 @@ TRACE_EVENT(hfi1_interrupt, __entry->src) ); +#ifdef CONFIG_FAULT_INJECTION +TRACE_EVENT(hfi1_fault_opcode, + TP_PROTO(struct rvt_qp *qp, u8 opcode), + TP_ARGS(qp, opcode), + TP_STRUCT__entry(DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(u8, opcode) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->opcode = opcode; + ), + TP_printk("[%s] qpn 0x%x opcode 0x%x", + __get_str(dev), __entry->qpn, __entry->opcode) +); + +TRACE_EVENT(hfi1_fault_packet, + TP_PROTO(struct hfi1_packet *packet), + TP_ARGS(packet), + TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->ppd->dd) + __field(u64, eflags) + __field(u32, ctxt) + __field(u32, hlen) + __field(u32, tlen) + __field(u32, updegr) + __field(u32, etail) + ), + TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->ppd->dd); + __entry->eflags = rhf_err_flags(packet->rhf); + __entry->ctxt = packet->rcd->ctxt; + __entry->hlen = packet->hlen; + __entry->tlen = packet->tlen; + __entry->updegr = packet->updegr; + __entry->etail = rhf_egr_index(packet->rhf); + ), + TP_printk( + "[%s] ctxt %d eflags 0x%llx hlen %d tlen %d updegr %d etail %d", + __get_str(dev), + __entry->ctxt, + __entry->eflags, + __entry->hlen, + __entry->tlen, + __entry->updegr, + __entry->etail + ) +); +#endif + #endif /* __HFI1_TRACE_MISC_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 928918cc7d80..9f016daba256 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -60,6 +60,7 @@ #include "trace.h" #include "qp.h" #include "verbs_txreq.h" +#include "debugfs.h" static unsigned int hfi1_lkey_table_size = 16; module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, @@ -599,6 +600,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) rcu_read_unlock(); goto drop; } + if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode, + true))) { + rcu_read_unlock(); + goto drop; + } spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(opcode, packet); if (likely(packet_handler)) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 3a0b589e41c2..2756ec35b054 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -195,6 +195,10 @@ struct hfi1_ibdev { struct dentry *hfi1_ibdev_dbg; /* per HFI symlinks to above */ struct dentry *hfi1_ibdev_link; +#ifdef CONFIG_FAULT_INJECTION + struct fault_opcode *fault_opcode; + struct fault_packet *fault_packet; +#endif #endif }; -- cgit v1.2.3 From 243d9f436f89f95c304011bd32485afc27581986 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Mon, 20 Mar 2017 17:26:20 -0700 Subject: IB/hfi1: Add transmit fault injection feature Add ability to fault packets on transmit by opcode. Dropping by packet can be achieved by setting the mask to 0. In order to drop non-verbs traffic we set PbcInsertHrc to NONE (0x2). The packet will still be delivered to the receiving node but a KHdrHCRCErr (KDETH packet with a bad HCRC) will be triggered and the packet will not be delivered to the correct context. In order to drop regular verbs traffic we set the PbcTestEbp flag. The packet will still be delivered to the receiving node but a 'late ebp error' will be triggered and will be dropped. A global toggle (/sys/kernel/debug/hfi1/hfi1_X/fault_suppress_err) has been added to suppress the error messages on the receive node when a packet was faulted on the sending node. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 4 +++ drivers/infiniband/hw/hfi1/debugfs.c | 8 ++++++ drivers/infiniband/hw/hfi1/debugfs.h | 11 ++++++++ drivers/infiniband/hw/hfi1/driver.c | 11 ++++++++ drivers/infiniband/hw/hfi1/verbs.c | 49 ++++++++++++++++++++++++++++++------ drivers/infiniband/hw/hfi1/verbs.h | 1 + include/rdma/ib_pack.h | 2 ++ 7 files changed, 79 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 77f4b41de2b0..79a316acb8f4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -64,6 +64,7 @@ #include "platform.h" #include "aspm.h" #include "affinity.h" +#include "debugfs.h" #define NUM_IB_PORTS 1 @@ -7898,6 +7899,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK; } + if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev))) + reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK; + /* report any remaining errors */ if (reg) dd_dev_info_ratelimited(dd, "DCC Error: %s\n", diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index cac6d5256f40..dc2c1c993f04 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1240,6 +1240,11 @@ static int fault_init_debugfs(struct hfi1_ibdev *ibd) return ret; } +bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + return ibd->fault_suppress_err; +} + bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx) { bool ret = false; @@ -1329,6 +1334,9 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) } #ifdef CONFIG_FAULT_INJECTION + debugfs_create_bool("fault_suppress_err", 0600, + ibd->hfi1_ibdev_dbg, + &ibd->fault_suppress_err); fault_init_debugfs(ibd); #endif } diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h index 70be5ca14736..38c38a98156d 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.h +++ b/drivers/infiniband/hw/hfi1/debugfs.h @@ -75,6 +75,7 @@ struct fault_packet { bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx); bool hfi1_dbg_fault_packet(struct hfi1_packet *packet); +bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd); #else static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) { @@ -86,6 +87,11 @@ static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, { return false; } + +static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + return false; +} #endif #else @@ -115,6 +121,11 @@ static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, { return false; } + +static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + return false; +} #endif #endif /* _HFI1_DEBUGFS_H */ diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index c0b012f6e11c..64bdbcef5f05 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1367,6 +1367,11 @@ int process_receive_ib(struct hfi1_packet *packet) packet->updegr, rhf_egr_index(packet->rhf)); + if (unlikely( + (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && + (packet->rhf & RHF_DC_ERR)))) + return RHF_RCV_CONTINUE; + if (unlikely(rhf_err_flags(packet->rhf))) { handle_eflags(packet); return RHF_RCV_CONTINUE; @@ -1402,6 +1407,12 @@ int process_receive_bypass(struct hfi1_packet *packet) int process_receive_error(struct hfi1_packet *packet) { + /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ + if (unlikely( + hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && + rhf_rcv_type_err(packet->rhf) == 3)) + return RHF_RCV_CONTINUE; + handle_eflags(packet); if (unlikely(rhf_err_flags(packet->rhf))) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 9f016daba256..070a349afd78 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -518,6 +518,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) return NULL; } +static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) +{ +#ifdef CONFIG_FAULT_INJECTION + if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) + /* + * In order to drop non-IB traffic we + * set PbcInsertHrc to NONE (0x2). + * The packet will still be delivered + * to the receiving node but a + * KHdrHCRCErr (KDETH packet with a bad + * HCRC) will be triggered and the + * packet will not be delivered to the + * correct context. + */ + pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT; + else + /* + * In order to drop regular verbs + * traffic we set the PbcTestEbp + * flag. The packet will still be + * delivered to the receiving node but + * a 'late ebp error' will be + * triggered and will be dropped. + */ + pbc |= PBC_TEST_EBP; +#endif + return pbc; +} + /** * hfi1_ib_rcv - process an incoming packet * @packet: data packet information @@ -803,7 +832,6 @@ static int build_verbs_tx_desc( if (ret) goto bail_txadd; } - /* add the ulp payload - if any. tx->ss can be NULL for acks */ if (tx->ss) ret = build_verbs_ulp_payload(sde, length, tx); @@ -822,7 +850,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_ibdev *dev = ps->dev; struct hfi1_pportdata *ppd = ps->ppd; struct verbs_txreq *tx; - u64 pbc_flags = 0; u8 sc5 = priv->s_sc; int ret; @@ -831,12 +858,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (!sdma_txreq_built(&tx->txreq)) { if (likely(pbc == 0)) { u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); + u8 opcode = get_opcode(&tx->phdr.hdr); + /* No vl15 here */ /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) + pbc = hfi1_fault_tx(qp, opcode, pbc); pbc = create_pbc(ppd, - pbc_flags, + pbc, qp->srate_mbps, vl, plen); @@ -939,7 +970,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u32 plen = hdrwords + dwords + 2; /* includes pbc */ struct hfi1_pportdata *ppd = ps->ppd; u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; - u64 pbc_flags = 0; u8 sc5; unsigned long flags = 0; struct send_context *sc; @@ -964,9 +994,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (likely(pbc == 0)) { u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); + struct verbs_txreq *tx = ps->s_txreq; + u8 opcode = get_opcode(&tx->phdr.hdr); + /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; - pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); + pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) + pbc = hfi1_fault_tx(qp, opcode, pbc); + pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); } if (cb) iowait_pio_inc(&priv->s_iowait); diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 2756ec35b054..6c549e7a25e7 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -198,6 +198,7 @@ struct hfi1_ibdev { #ifdef CONFIG_FAULT_INJECTION struct fault_opcode *fault_opcode; struct fault_packet *fault_packet; + bool fault_suppress_err; #endif #endif }; diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index b13419ce99ff..36655899ee02 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -80,6 +80,8 @@ enum { IB_OPCODE_UD = 0x60, /* per IBTA 1.3 vol 1 Table 38, A10.3.2 */ IB_OPCODE_CNP = 0x80, + /* Manufacturer specific */ + IB_OPCODE_MSP = 0xe0, /* operations -- just used to define real constants */ IB_OPCODE_SEND_FIRST = 0x00, -- cgit v1.2.3 From b58fc8049790ca780fb42bd2faabe9b921aedf44 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 20 Mar 2017 17:26:26 -0700 Subject: IB/hfi1: Eliminate synchronize_rcu() in mr delete The synchronize_rcu() call can be eliminated to improve memory deregistration performance. There are two key fields involved: - The rcu pointer itself - the lkey_published field To close the window between the rcu read of the mregion pointer and the reference count the code should: 1. To lkey/rkey validation (reader) Read the rcu pointer. If the pointer is non-NULL, get a reference. To the current validation tests use a READ_ONCE() on the lkey_published. Upon any failure release the reference. 2. To the remove logic (delete) Insure the published is zeroed prior to setting the pointer to NULL. This requires using rcu_assign_pointer() to insure lkey_published is written prior to the NULL. 3. To the insert logic (add) Insure the published is set use an rcu_assign_pointer() to insure the pointer is after all MR fields. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mr.c | 49 ++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index ae30b6838d79..7c869555cf73 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -191,8 +191,9 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region) tmr = rcu_access_pointer(dev->dma_mr); if (!tmr) { - rcu_assign_pointer(dev->dma_mr, mr); mr->lkey_published = 1; + /* Insure published written first */ + rcu_assign_pointer(dev->dma_mr, mr); rvt_get_mr(mr); } goto success; @@ -224,8 +225,9 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region) mr->lkey |= 1 << 8; rkt->gen++; } - rcu_assign_pointer(rkt->table[r], mr); mr->lkey_published = 1; + /* Insure published written first */ + rcu_assign_pointer(rkt->table[r], mr); success: spin_unlock_irqrestore(&rkt->lock, flags); out: @@ -253,23 +255,24 @@ static void rvt_free_lkey(struct rvt_mregion *mr) spin_lock_irqsave(&rkt->lock, flags); if (!lkey) { if (mr->lkey_published) { - RCU_INIT_POINTER(dev->dma_mr, NULL); + mr->lkey_published = 0; + /* insure published is written before pointer */ + rcu_assign_pointer(dev->dma_mr, NULL); rvt_put_mr(mr); } } else { if (!mr->lkey_published) goto out; r = lkey >> (32 - dev->dparms.lkey_table_size); - RCU_INIT_POINTER(rkt->table[r], NULL); + mr->lkey_published = 0; + /* insure published is written before pointer */ + rcu_assign_pointer(rkt->table[r], NULL); } - mr->lkey_published = 0; freed++; out: spin_unlock_irqrestore(&rkt->lock, flags); - if (freed) { - synchronize_rcu(); + if (freed) percpu_ref_kill(&mr->refcount); - } } static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd) @@ -822,16 +825,21 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, goto ok; } mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); - if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || - mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) + if (!mr) goto bail; + rvt_get_mr(mr); + if (!READ_ONCE(mr->lkey_published)) + goto bail_unref; + + if (unlikely(atomic_read(&mr->lkey_invalid) || + mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) + goto bail_unref; off = sge->addr - mr->user_base; if (unlikely(sge->addr < mr->user_base || off + sge->length > mr->length || (mr->access_flags & acc) != acc)) - goto bail; - rvt_get_mr(mr); + goto bail_unref; rcu_read_unlock(); off += mr->offset; @@ -867,6 +875,8 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->n = n; ok: return 1; +bail_unref: + rvt_put_mr(mr); bail: rcu_read_unlock(); return 0; @@ -922,15 +932,20 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, } mr = rcu_dereference(rkt->table[rkey >> rkt->shift]); - if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || - mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + if (!mr) goto bail; + rvt_get_mr(mr); + /* insure mr read is before test */ + if (!READ_ONCE(mr->lkey_published)) + goto bail_unref; + if (unlikely(atomic_read(&mr->lkey_invalid) || + mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + goto bail_unref; off = vaddr - mr->iova; if (unlikely(vaddr < mr->iova || off + len > mr->length || (mr->access_flags & acc) == 0)) - goto bail; - rvt_get_mr(mr); + goto bail_unref; rcu_read_unlock(); off += mr->offset; @@ -966,6 +981,8 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, sge->n = n; ok: return 1; +bail_unref: + rvt_put_mr(mr); bail: rcu_read_unlock(); return 0; -- cgit v1.2.3 From 30004b861afd99aebf34237373cb8ee9e890418e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 18 Apr 2017 12:03:37 +0300 Subject: IB/core: Rename write flag to exclusive in rdma_core We rename the "write" flags to "exclusive", as it's used for both WRITE and DESTROY actions. Fixes: 3832125624b7 ('IB/core: Add support for idr types') Signed-off-by: Matan Barak Reviewed-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/rdma_core.c | 60 +++++++++++++++++++------------------ include/rdma/uverbs_types.h | 33 ++++++++++---------- 2 files changed, 48 insertions(+), 45 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index e5bdf7f67574..88d1e596f910 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -44,7 +44,7 @@ void uverbs_uobject_get(struct ib_uobject *uobject) kref_get(&uobject->ref); } -static void uverbs_uobject_put_ref(struct kref *ref) +static void uverbs_uobject_free(struct kref *ref) { struct ib_uobject *uobj = container_of(ref, struct ib_uobject, ref); @@ -57,21 +57,23 @@ static void uverbs_uobject_put_ref(struct kref *ref) void uverbs_uobject_put(struct ib_uobject *uobject) { - kref_put(&uobject->ref, uverbs_uobject_put_ref); + kref_put(&uobject->ref, uverbs_uobject_free); } -static int uverbs_try_lock_object(struct ib_uobject *uobj, bool write) +static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) { /* - * When a read is required, we use a positive counter. Each read - * request checks that the value != -1 and increment it. Write - * requires an exclusive access, thus we check that the counter is - * zero (nobody claimed this object) and we set it to -1. - * Releasing a read lock is done by simply decreasing the counter. - * As for writes, since only a single write is permitted, setting - * it to zero is enough for releasing it. + * When a shared access is required, we use a positive counter. Each + * shared access request checks that the value != -1 and increment it. + * Exclusive access is required for operations like write or destroy. + * In exclusive access mode, we check that the counter is zero (nobody + * claimed this object) and we set it to -1. Releasing a shared access + * lock is done simply by decreasing the counter. As for exclusive + * access locks, since only a single one of them is is allowed + * concurrently, setting the counter to zero is enough for releasing + * this lock. */ - if (!write) + if (!exclusive) return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ? -EBUSY : 0; @@ -135,7 +137,7 @@ static void uverbs_idr_remove_uobj(struct ib_uobject *uobj) /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type, struct ib_ucontext *ucontext, - int id, bool write) + int id, bool exclusive) { struct ib_uobject *uobj; @@ -155,14 +157,14 @@ free: static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type, struct ib_ucontext *ucontext, - int id, bool write) + int id, bool exclusive) { struct file *f; struct ib_uobject *uobject; const struct uverbs_obj_fd_type *fd_type = container_of(type, struct uverbs_obj_fd_type, type); - if (write) + if (exclusive) return ERR_PTR(-EOPNOTSUPP); f = fget(id); @@ -186,12 +188,12 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, struct ib_ucontext *ucontext, - int id, bool write) + int id, bool exclusive) { struct ib_uobject *uobj; int ret; - uobj = type->type_class->lookup_get(type, ucontext, id, write); + uobj = type->type_class->lookup_get(type, ucontext, id, exclusive); if (IS_ERR(uobj)) return uobj; @@ -200,7 +202,7 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, goto free; } - ret = uverbs_try_lock_object(uobj, write); + ret = uverbs_try_lock_object(uobj, exclusive); if (ret) { WARN(ucontext->cleanup_reason, "ib_uverbs: Trying to lookup_get while cleanup context\n"); @@ -209,7 +211,7 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, return uobj; free: - uobj->type->type_class->lookup_put(uobj, write); + uobj->type->type_class->lookup_put(uobj, exclusive); uverbs_uobject_put(uobj); return ERR_PTR(ret); } @@ -350,10 +352,10 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, return ret; } -static void lockdep_check(struct ib_uobject *uobj, bool write) +static void lockdep_check(struct ib_uobject *uobj, bool exclusive) { #ifdef CONFIG_LOCKDEP - if (write) + if (exclusive) WARN_ON(atomic_read(&uobj->usecnt) > 0); else WARN_ON(atomic_read(&uobj->usecnt) == -1); @@ -465,29 +467,29 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj) uobj->type->type_class->alloc_abort(uobj); } -static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool write) +static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive) { } -static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool write) +static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive) { struct file *filp = uobj->object; - WARN_ON(write); + WARN_ON(exclusive); /* This indirectly calls uverbs_close_fd and free the object */ fput(filp); } -void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool write) +void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive) { - lockdep_check(uobj, write); - uobj->type->type_class->lookup_put(uobj, write); + lockdep_check(uobj, exclusive); + uobj->type->type_class->lookup_put(uobj, exclusive); /* * In order to unlock an object, either decrease its usecnt for - * read access or zero it in case of write access. See + * read access or zero it in case of exclusive access. See * uverbs_try_lock_object for locking schema information. */ - if (!write) + if (!exclusive) atomic_dec(&uobj->usecnt); else atomic_set(&uobj->usecnt, 0); @@ -512,7 +514,7 @@ const struct uverbs_obj_type_class uverbs_idr_class = { * When the other thread continue - without the RCU, it would * access freed memory. However, the rcu_read_lock delays the free * until the rcu_read_lock of the READ operation quits. Since the - * write lock of the object is still taken by the DESTROY flow, the + * exclusive lock of the object is still taken by the DESTROY flow, the * READ operation will get -EBUSY and it'll just bail out. */ .needs_kfree_rcu = true, diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index a37692167a45..351ea185df44 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -54,17 +54,18 @@ struct uverbs_obj_type_class { * destroyed. * [lookup]: Starts with lookup_get which fetches and locks the * object. After the handler finished using the object, it - * needs to call lookup_put to unlock it. The write flag - * indicates if the object is locked for exclusive access. - * [remove]: Starts with lookup_get with write flag set. This locks - * the object for exclusive access. If the handler code - * completed successfully, remove_commit is called and - * the ib_uobject is removed from the context's uobjects - * repository and put. The object itself is destroyed as - * well. Once remove succeeds new krefs to the object - * cannot be acquired by other threads or userspace and - * the hardware driver is removed from the object. - * Other krefs on the object may still exist. + * needs to call lookup_put to unlock it. The exclusive + * flag indicates if the object is locked for exclusive + * access. + * [remove]: Starts with lookup_get with exclusive flag set. This + * locks the object for exclusive access. If the handler + * code completed successfully, remove_commit is called + * and the ib_uobject is removed from the context's + * uobjects repository and put. The object itself is + * destroyed as well. Once remove succeeds new krefs to + * the object cannot be acquired by other threads or + * userspace and the hardware driver is removed from the + * object. Other krefs on the object may still exist. * If the handler code failed, lookup_put should be * called. This callback is used when the context * is destroyed as well (process termination, @@ -77,10 +78,10 @@ struct uverbs_obj_type_class { struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type, struct ib_ucontext *ucontext, int id, - bool write); - void (*lookup_put)(struct ib_uobject *uobj, bool write); + bool exclusive); + void (*lookup_put)(struct ib_uobject *uobj, bool exclusive); /* - * Must be called with the write lock held. If successful uobj is + * Must be called with the exclusive lock held. If successful uobj is * invalid on return. On failure uobject is left completely * unchanged */ @@ -121,8 +122,8 @@ struct uverbs_obj_idr_type { struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, struct ib_ucontext *ucontext, - int id, bool write); -void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool write); + int id, bool exclusive); +void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive); struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, struct ib_ucontext *ucontext); void rdma_alloc_abort_uobject(struct ib_uobject *uobj); -- cgit v1.2.3 From f025c48958104868a9fceb04696cdfdb056794c9 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 18 Apr 2017 12:03:38 +0300 Subject: IB/core: Don't pass the lock state to _rdma_remove_commit_uobject The only scenario where this function was called while the lock is already taken is in the context cleanup scenario. Thus, in order not to pass the lock state to this function, we just call the remove logic straight from the cleanup context function. Fixes: 3832125624b7 ('IB/core: Add support for idr types') Signed-off-by: Matan Barak Reviewed-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/rdma_core.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 88d1e596f910..699a6595e7cf 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -363,8 +363,7 @@ static void lockdep_check(struct ib_uobject *uobj, bool exclusive) } static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, - enum rdma_remove_reason why, - bool lock) + enum rdma_remove_reason why) { int ret; struct ib_ucontext *ucontext = uobj->context; @@ -375,11 +374,9 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, atomic_set(&uobj->usecnt, 0); uobj->type->type_class->lookup_put(uobj, true); } else { - if (lock) - mutex_lock(&ucontext->uobjects_lock); + mutex_lock(&ucontext->uobjects_lock); list_del(&uobj->list); - if (lock) - mutex_unlock(&ucontext->uobjects_lock); + mutex_unlock(&ucontext->uobjects_lock); /* put the ref we took when we created the object */ uverbs_uobject_put(uobj); } @@ -401,7 +398,7 @@ int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) return 0; } lockdep_check(uobj, true); - ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY, true); + ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY); up_read(&ucontext->cleanup_rwsem); return ret; @@ -534,8 +531,7 @@ static void _uverbs_close_fd(struct ib_uobject_file *uobj_file) goto unlock; ucontext = uobj_file->uobj.context; - ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE, - true); + ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE); up_read(&ucontext->cleanup_rwsem); if (ret) pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n"); @@ -583,7 +579,7 @@ void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) */ mutex_lock(&ucontext->uobjects_lock); list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, - list) + list) { if (obj->type->destroy_order == cur_order) { int ret; @@ -592,15 +588,19 @@ void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) * racing with a lookup_get. */ WARN_ON(uverbs_try_lock_object(obj, true)); - ret = _rdma_remove_commit_uobject(obj, reason, - false); + ret = obj->type->type_class->remove_commit(obj, + reason); + list_del(&obj->list); if (ret) pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n", obj->id, cur_order); + /* put the ref we took when we created the object */ + uverbs_uobject_put(obj); } else { next_order = min(next_order, obj->type->destroy_order); } + } mutex_unlock(&ucontext->uobjects_lock); cur_order = next_order; } -- cgit v1.2.3 From d9edfc5a4f81165e64d4ad6e423a8554c88dd0de Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 18 Apr 2017 12:03:39 +0300 Subject: IB/core: Nullify ib_uobject during allocation Currently, we initialize all fields of ib_uobject straight after allocation. Therefore, a kmalloc was sufficient. Since ib_uobject could be embedded in a type specific structure, we nullify it to spare programmer errors. Fixes: 3832125624b7 ('IB/core: Add support for idr types') Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/core/rdma_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 699a6595e7cf..41c31a2bf093 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -84,7 +84,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, const struct uverbs_obj_type *type) { - struct ib_uobject *uobj = kmalloc(type->obj_size, GFP_KERNEL); + struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL); if (!uobj) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From c52d8114d1a58f6fe4bfb9af39d262dd8f21e50e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 18 Apr 2017 12:03:40 +0300 Subject: IB/core: A small refactor in destroy WQ handler Instead of having uverbs_uobject_put both in the error flow and the good flow, we unite them. Fixes: fd3c7904db6e ('IB/core: Change idr objects to use the new schema') Signed-off-by: Matan Barak Reviewed-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b9024fa31b18..66cb22e82e24 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2989,18 +2989,12 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, uverbs_uobject_get(uobj); ret = uobj_remove_commit(uobj); - if (ret) { - uverbs_uobject_put(uobj); - return ret; - } - resp.events_reported = obj->uevent.events_reported; uverbs_uobject_put(uobj); - ret = ib_copy_to_udata(ucore, &resp, resp.response_length); if (ret) return ret; - return 0; + return ib_copy_to_udata(ucore, &resp, resp.response_length); } int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, -- cgit v1.2.3 From e0fcc61113c17a2eae0dea7e9e67ac71849475b5 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 18 Apr 2017 12:03:41 +0300 Subject: IB/core: Don't use is_async in event files to infer events size Previously, we inferred the events size in ib_uverbs_event_read by using the is_async flag. Instead of that, we pass the event size directly. Fixes: 1e7710f3f656 ('IB/core: Change completion channel to use the reworked objects schema') Signed-off-by: Matan Barak Reviewed-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_main.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 0b0dab89106a..4ab0e5d3ce15 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -257,10 +257,9 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_file *file, struct ib_uverbs_file *uverbs_file, struct file *filp, char __user *buf, size_t count, loff_t *pos, - bool is_async) + size_t eventsz) { struct ib_uverbs_event *event; - int eventsz; int ret = 0; spin_lock_irq(&file->lock); @@ -290,11 +289,6 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_file *file, event = list_entry(file->event_list.next, struct ib_uverbs_event, list); - if (is_async) - eventsz = sizeof (struct ib_uverbs_async_event_desc); - else - eventsz = sizeof (struct ib_uverbs_comp_event_desc); - if (eventsz > count) { ret = -EINVAL; event = NULL; @@ -326,7 +320,8 @@ static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, struct ib_uverbs_async_event_file *file = filp->private_data; return ib_uverbs_event_read(&file->ev_file, file->uverbs_file, filp, - buf, count, pos, true); + buf, count, pos, + sizeof(struct ib_uverbs_async_event_desc)); } static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, @@ -337,7 +332,8 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, return ib_uverbs_event_read(&comp_ev_file->ev_file, comp_ev_file->uobj_file.ufile, filp, - buf, count, pos, false); + buf, count, pos, + sizeof(struct ib_uverbs_comp_event_desc)); } static unsigned int ib_uverbs_event_poll(struct ib_uverbs_event_file *file, -- cgit v1.2.3 From db1b5ddd53365a07a7754803bdba370ebb84ba19 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 18 Apr 2017 12:03:42 +0300 Subject: IB/core: Rename uverbs event file structure Previously, ib_uverbs_event_file was suffixed by _file as it contained the actual file information. Since it's now only used as base struct for ib_uverbs_async_event_file and ib_uverbs_completion_event_file, we change its name to ib_uverbs_event_queue. This represents its logical role better. Fixes: 1e7710f3f656 ('IB/core: Change completion channel to use the reworked objects schema') Signed-off-by: Matan Barak Reviewed-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 21 ++--- drivers/infiniband/core/uverbs_cmd.c | 8 +- drivers/infiniband/core/uverbs_main.c | 132 ++++++++++++++--------------- drivers/infiniband/core/uverbs_std_types.c | 20 ++--- 4 files changed, 91 insertions(+), 90 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 826f82748718..a3230b6ab766 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -76,12 +76,13 @@ * an asynchronous event queue file is created and released when the * event file is closed. * - * struct ib_uverbs_event_file: One reference is held by the VFS and - * released when the file is closed. For asynchronous event files, - * another reference is held by the corresponding main context file - * and released when that file is closed. For completion event files, - * a reference is taken when a CQ is created that uses the file, and - * released when the CQ is destroyed. + * struct ib_uverbs_event_queue: Base structure for + * struct ib_uverbs_async_event_file and struct ib_uverbs_completion_event_file. + * One reference is held by the VFS and released when the file is closed. + * For asynchronous event files, another reference is held by the corresponding + * main context file and released when that file is closed. For completion + * event files, a reference is taken when a CQ is created that uses the file, + * and released when the CQ is destroyed. */ struct ib_uverbs_device { @@ -101,7 +102,7 @@ struct ib_uverbs_device { struct list_head uverbs_events_file_list; }; -struct ib_uverbs_event_file { +struct ib_uverbs_event_queue { spinlock_t lock; int is_closed; wait_queue_head_t poll_wait; @@ -110,7 +111,7 @@ struct ib_uverbs_event_file { }; struct ib_uverbs_async_event_file { - struct ib_uverbs_event_file ev_file; + struct ib_uverbs_event_queue ev_queue; struct ib_uverbs_file *uverbs_file; struct kref ref; struct list_head list; @@ -118,7 +119,7 @@ struct ib_uverbs_async_event_file { struct ib_uverbs_completion_event_file { struct ib_uobject_file uobj_file; - struct ib_uverbs_event_file ev_file; + struct ib_uverbs_event_queue ev_queue; }; struct ib_uverbs_file { @@ -191,7 +192,7 @@ struct ib_ucq_object { }; extern const struct file_operations uverbs_event_fops; -void ib_uverbs_init_event_file(struct ib_uverbs_event_file *ev_file); +void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue); struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, struct ib_device *ib_dev); void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 66cb22e82e24..e2fee045f03b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -943,7 +943,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, ev_file = container_of(uobj, struct ib_uverbs_completion_event_file, uobj_file.uobj); - ib_uverbs_init_event_file(&ev_file->ev_file); + ib_uverbs_init_event_queue(&ev_file->ev_queue); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { @@ -1015,7 +1015,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, cq->uobject = &obj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = &ev_file->ev_file; + cq->cq_context = &ev_file->ev_queue; atomic_set(&cq->usecnt, 0); obj->uobject.object = cq; @@ -1296,7 +1296,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, struct ib_uobject *uobj; struct ib_cq *cq; struct ib_ucq_object *obj; - struct ib_uverbs_event_file *ev_file; + struct ib_uverbs_event_queue *ev_queue; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1313,7 +1313,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, */ uverbs_uobject_get(uobj); cq = uobj->object; - ev_file = cq->cq_context; + ev_queue = cq->cq_context; obj = container_of(cq->uobject, struct ib_ucq_object, uobject); memset(&resp, 0, sizeof(resp)); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 4ab0e5d3ce15..3a9883d1257e 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -171,22 +171,22 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file, struct ib_uverbs_event *evt, *tmp; if (ev_file) { - spin_lock_irq(&ev_file->ev_file.lock); + spin_lock_irq(&ev_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&ev_file->ev_file.lock); + spin_unlock_irq(&ev_file->ev_queue.lock); uverbs_uobject_put(&ev_file->uobj_file.uobj); } - spin_lock_irq(&file->async_file->ev_file.lock); + spin_lock_irq(&file->async_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file->ev_file.lock); + spin_unlock_irq(&file->async_file->ev_queue.lock); } void ib_uverbs_release_uevent(struct ib_uverbs_file *file, @@ -194,12 +194,12 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file, { struct ib_uverbs_event *evt, *tmp; - spin_lock_irq(&file->async_file->ev_file.lock); + spin_lock_irq(&file->async_file->ev_queue.lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file->ev_file.lock); + spin_unlock_irq(&file->async_file->ev_queue.lock); } void ib_uverbs_detach_umcast(struct ib_qp *qp, @@ -253,7 +253,7 @@ void ib_uverbs_release_file(struct kref *ref) kfree(file); } -static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_file *file, +static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, struct ib_uverbs_file *uverbs_file, struct file *filp, char __user *buf, size_t count, loff_t *pos, @@ -262,16 +262,16 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_file *file, struct ib_uverbs_event *event; int ret = 0; - spin_lock_irq(&file->lock); + spin_lock_irq(&ev_queue->lock); - while (list_empty(&file->event_list)) { - spin_unlock_irq(&file->lock); + while (list_empty(&ev_queue->event_list)) { + spin_unlock_irq(&ev_queue->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; - if (wait_event_interruptible(file->poll_wait, - (!list_empty(&file->event_list) || + if (wait_event_interruptible(ev_queue->poll_wait, + (!list_empty(&ev_queue->event_list) || /* The barriers built into wait_event_interruptible() * and wake_up() guarentee this will see the null set * without using RCU @@ -280,27 +280,27 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_file *file, return -ERESTARTSYS; /* If device was disassociated and no event exists set an error */ - if (list_empty(&file->event_list) && + if (list_empty(&ev_queue->event_list) && !uverbs_file->device->ib_dev) return -EIO; - spin_lock_irq(&file->lock); + spin_lock_irq(&ev_queue->lock); } - event = list_entry(file->event_list.next, struct ib_uverbs_event, list); + event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); if (eventsz > count) { ret = -EINVAL; event = NULL; } else { - list_del(file->event_list.next); + list_del(ev_queue->event_list.next); if (event->counter) { ++(*event->counter); list_del(&event->obj_list); } } - spin_unlock_irq(&file->lock); + spin_unlock_irq(&ev_queue->lock); if (event) { if (copy_to_user(buf, event, eventsz)) @@ -319,7 +319,7 @@ static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, { struct ib_uverbs_async_event_file *file = filp->private_data; - return ib_uverbs_event_read(&file->ev_file, file->uverbs_file, filp, + return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp, buf, count, pos, sizeof(struct ib_uverbs_async_event_desc)); } @@ -330,24 +330,24 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, struct ib_uverbs_completion_event_file *comp_ev_file = filp->private_data; - return ib_uverbs_event_read(&comp_ev_file->ev_file, + return ib_uverbs_event_read(&comp_ev_file->ev_queue, comp_ev_file->uobj_file.ufile, filp, buf, count, pos, sizeof(struct ib_uverbs_comp_event_desc)); } -static unsigned int ib_uverbs_event_poll(struct ib_uverbs_event_file *file, +static unsigned int ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, struct file *filp, struct poll_table_struct *wait) { unsigned int pollflags = 0; - poll_wait(filp, &file->poll_wait, wait); + poll_wait(filp, &ev_queue->poll_wait, wait); - spin_lock_irq(&file->lock); - if (!list_empty(&file->event_list)) + spin_lock_irq(&ev_queue->lock); + if (!list_empty(&ev_queue->event_list)) pollflags = POLLIN | POLLRDNORM; - spin_unlock_irq(&file->lock); + spin_unlock_irq(&ev_queue->lock); return pollflags; } @@ -364,14 +364,14 @@ static unsigned int ib_uverbs_comp_event_poll(struct file *filp, struct ib_uverbs_completion_event_file *comp_ev_file = filp->private_data; - return ib_uverbs_event_poll(&comp_ev_file->ev_file, filp, wait); + return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait); } static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) { - struct ib_uverbs_event_file *file = filp->private_data; + struct ib_uverbs_event_queue *ev_queue = filp->private_data; - return fasync_helper(fd, filp, on, &file->async_queue); + return fasync_helper(fd, filp, on, &ev_queue->async_queue); } static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) @@ -379,7 +379,7 @@ static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) struct ib_uverbs_completion_event_file *comp_ev_file = filp->private_data; - return fasync_helper(fd, filp, on, &comp_ev_file->ev_file.async_queue); + return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue); } static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) @@ -390,15 +390,15 @@ static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) int closed_already = 0; mutex_lock(&uverbs_file->device->lists_mutex); - spin_lock_irq(&file->ev_file.lock); - closed_already = file->ev_file.is_closed; - file->ev_file.is_closed = 1; - list_for_each_entry_safe(entry, tmp, &file->ev_file.event_list, list) { + spin_lock_irq(&file->ev_queue.lock); + closed_already = file->ev_queue.is_closed; + file->ev_queue.is_closed = 1; + list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { if (entry->counter) list_del(&entry->obj_list); kfree(entry); } - spin_unlock_irq(&file->ev_file.lock); + spin_unlock_irq(&file->ev_queue.lock); if (!closed_already) { list_del(&file->list); ib_unregister_event_handler(&uverbs_file->event_handler); @@ -416,13 +416,13 @@ static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) struct ib_uverbs_completion_event_file *file = filp->private_data; struct ib_uverbs_event *entry, *tmp; - spin_lock_irq(&file->ev_file.lock); - list_for_each_entry_safe(entry, tmp, &file->ev_file.event_list, list) { + spin_lock_irq(&file->ev_queue.lock); + list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { if (entry->counter) list_del(&entry->obj_list); kfree(entry); } - spin_unlock_irq(&file->ev_file.lock); + spin_unlock_irq(&file->ev_queue.lock); uverbs_close_fd(filp); @@ -449,23 +449,23 @@ static const struct file_operations uverbs_async_event_fops = { void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { - struct ib_uverbs_event_file *file = cq_context; + struct ib_uverbs_event_queue *ev_queue = cq_context; struct ib_ucq_object *uobj; struct ib_uverbs_event *entry; unsigned long flags; - if (!file) + if (!ev_queue) return; - spin_lock_irqsave(&file->lock, flags); - if (file->is_closed) { - spin_unlock_irqrestore(&file->lock, flags); + spin_lock_irqsave(&ev_queue->lock, flags); + if (ev_queue->is_closed) { + spin_unlock_irqrestore(&ev_queue->lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { - spin_unlock_irqrestore(&file->lock, flags); + spin_unlock_irqrestore(&ev_queue->lock, flags); return; } @@ -474,12 +474,12 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) entry->desc.comp.cq_handle = cq->uobject->user_handle; entry->counter = &uobj->comp_events_reported; - list_add_tail(&entry->list, &file->event_list); + list_add_tail(&entry->list, &ev_queue->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); - spin_unlock_irqrestore(&file->lock, flags); + spin_unlock_irqrestore(&ev_queue->lock, flags); - wake_up_interruptible(&file->poll_wait); - kill_fasync(&file->async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&ev_queue->poll_wait); + kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN); } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, @@ -490,15 +490,15 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, struct ib_uverbs_event *entry; unsigned long flags; - spin_lock_irqsave(&file->async_file->ev_file.lock, flags); - if (file->async_file->ev_file.is_closed) { - spin_unlock_irqrestore(&file->async_file->ev_file.lock, flags); + spin_lock_irqsave(&file->async_file->ev_queue.lock, flags); + if (file->async_file->ev_queue.is_closed) { + spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { - spin_unlock_irqrestore(&file->async_file->ev_file.lock, flags); + spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); return; } @@ -507,13 +507,13 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, entry->desc.async.reserved = 0; entry->counter = counter; - list_add_tail(&entry->list, &file->async_file->ev_file.event_list); + list_add_tail(&entry->list, &file->async_file->ev_queue.event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); - spin_unlock_irqrestore(&file->async_file->ev_file.lock, flags); + spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags); - wake_up_interruptible(&file->async_file->ev_file.poll_wait); - kill_fasync(&file->async_file->ev_file.async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->async_file->ev_queue.poll_wait); + kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) @@ -580,13 +580,13 @@ void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) file->async_file = NULL; } -void ib_uverbs_init_event_file(struct ib_uverbs_event_file *ev_file) +void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue) { - spin_lock_init(&ev_file->lock); - INIT_LIST_HEAD(&ev_file->event_list); - init_waitqueue_head(&ev_file->poll_wait); - ev_file->is_closed = 0; - ev_file->async_queue = NULL; + spin_lock_init(&ev_queue->lock); + INIT_LIST_HEAD(&ev_queue->event_list); + init_waitqueue_head(&ev_queue->poll_wait); + ev_queue->is_closed = 0; + ev_queue->async_queue = NULL; } struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, @@ -600,7 +600,7 @@ struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file if (!ev_file) return ERR_PTR(-ENOMEM); - ib_uverbs_init_event_file(&ev_file->ev_file); + ib_uverbs_init_event_queue(&ev_file->ev_queue); ev_file->uverbs_file = uverbs_file; kref_get(&ev_file->uverbs_file->ref); kref_init(&ev_file->ref); @@ -1186,9 +1186,9 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, uverbs_events_file_list, struct ib_uverbs_async_event_file, list); - spin_lock_irq(&event_file->ev_file.lock); - event_file->ev_file.is_closed = 1; - spin_unlock_irq(&event_file->ev_file.lock); + spin_lock_irq(&event_file->ev_queue.lock); + event_file->ev_queue.is_closed = 1; + spin_unlock_irq(&event_file->ev_queue.lock); list_del(&event_file->list); ib_unregister_event_handler( @@ -1196,8 +1196,8 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, event_file->uverbs_file->event_handler.device = NULL; - wake_up_interruptible(&event_file->ev_file.poll_wait); - kill_fasync(&event_file->ev_file.async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&event_file->ev_queue.poll_wait); + kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN); } mutex_unlock(&uverbs_dev->lists_mutex); } diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 7f26af5ea066..e3338b19d6a2 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -138,17 +138,17 @@ int uverbs_free_cq(struct ib_uobject *uobject, enum rdma_remove_reason why) { struct ib_cq *cq = uobject->object; - struct ib_uverbs_event_file *ev_file = cq->cq_context; + struct ib_uverbs_event_queue *ev_queue = cq->cq_context; struct ib_ucq_object *ucq = container_of(uobject, struct ib_ucq_object, uobject); int ret; ret = ib_destroy_cq(cq); if (!ret || why != RDMA_REMOVE_DESTROY) - ib_uverbs_release_ucq(uobject->context->ufile, ev_file ? - container_of(ev_file, + ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ? + container_of(ev_queue, struct ib_uverbs_completion_event_file, - ev_file) : NULL, + ev_queue) : NULL, ucq); return ret; } @@ -196,15 +196,15 @@ int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file, struct ib_uverbs_completion_event_file *comp_event_file = container_of(uobj_file, struct ib_uverbs_completion_event_file, uobj_file); - struct ib_uverbs_event_file *event_file = &comp_event_file->ev_file; + struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue; - spin_lock_irq(&event_file->lock); - event_file->is_closed = 1; - spin_unlock_irq(&event_file->lock); + spin_lock_irq(&event_queue->lock); + event_queue->is_closed = 1; + spin_unlock_irq(&event_queue->lock); if (why == RDMA_REMOVE_DRIVER_REMOVE) { - wake_up_interruptible(&event_file->poll_wait); - kill_fasync(&event_file->async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&event_queue->poll_wait); + kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN); } return 0; }; -- cgit v1.2.3