Skip to content

Commit 136e895

Browse files
Sindhu-Devalegregkh
authored andcommitted
i40iw: Add support to make destroy QP synchronous
[ Upstream commit f233496 ] Occasionally ib_write_bw crash is seen due to access of a pd object in i40iw_sc_qp_destroy after it is freed. Destroy qp is not synchronous in i40iw and thus the iwqp object could be referencing a pd object that is freed by ib core as a result of successful return from i40iw_destroy_qp. Wait in i40iw_destroy_qp till all QP references are released and destroy the QP and its associated resources before returning. Switch to use the refcount API vs atomic API for lifetime management of the qp. RIP: 0010:i40iw_sc_qp_destroy+0x4b/0x120 [i40iw] [...] RSP: 0018:ffffb4a7042e3ba8 EFLAGS: 00010002 RAX: 0000000000000000 RBX: 0000000000000001 RCX: dead000000000122 RDX: ffffb4a7042e3bac RSI: ffff8b7ef9b1e940 RDI: ffff8b7efbf09080 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: 8080808080808080 R11: 0000000000000010 R12: ffff8b7efbf08050 R13: 0000000000000001 R14: ffff8b7f15042928 R15: ffff8b7ef9b1e940 FS: 0000000000000000(0000) GS:ffff8b7f2fa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000400 CR3: 000000020d60a006 CR4: 00000000001606e0 Call Trace: i40iw_exec_cqp_cmd+0x4d3/0x5c0 [i40iw] ? try_to_wake_up+0x1ea/0x5d0 ? __switch_to_asm+0x40/0x70 i40iw_process_cqp_cmd+0x95/0xa0 [i40iw] i40iw_handle_cqp_op+0x42/0x1a0 [i40iw] ? cm_event_handler+0x13c/0x1f0 [iw_cm] i40iw_rem_ref+0xa0/0xf0 [i40iw] cm_work_handler+0x99c/0xd10 [iw_cm] process_one_work+0x1a1/0x360 worker_thread+0x30/0x380 ? process_one_work+0x360/0x360 kthread+0x10c/0x130 ? kthread_park+0x80/0x80 ret_from_fork+0x35/0x40 Fixes: d374984 ("i40iw: add files for iwarp interface") Link: https://lore.kernel.org/r/20200916131811.2077-1-shiraz.saleem@intel.com Reported-by: Kamal Heib <kheib@redhat.com> Signed-off-by: Sindhu, Devale <sindhu.devale@intel.com> Signed-off-by: Shiraz, Saleem <shiraz.saleem@intel.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 1b881ba commit 136e895

6 files changed

Lines changed: 45 additions & 71 deletions

File tree

drivers/infiniband/hw/i40iw/i40iw.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -409,8 +409,8 @@ static inline struct i40iw_qp *to_iwqp(struct ib_qp *ibqp)
409409
}
410410

411411
/* i40iw.c */
412-
void i40iw_add_ref(struct ib_qp *);
413-
void i40iw_rem_ref(struct ib_qp *);
412+
void i40iw_qp_add_ref(struct ib_qp *ibqp);
413+
void i40iw_qp_rem_ref(struct ib_qp *ibqp);
414414
struct ib_qp *i40iw_get_qp(struct ib_device *, int);
415415

416416
void i40iw_flush_wqes(struct i40iw_device *iwdev,
@@ -554,9 +554,8 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
554554
bool wait);
555555
void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf);
556556
void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp);
557-
void i40iw_free_qp_resources(struct i40iw_device *iwdev,
558-
struct i40iw_qp *iwqp,
559-
u32 qp_num);
557+
void i40iw_free_qp_resources(struct i40iw_qp *iwqp);
558+
560559
enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
561560
struct i40iw_dma_mem *memptr,
562561
u32 size, u32 mask);

drivers/infiniband/hw/i40iw/i40iw_cm.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2322,7 +2322,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
23222322
iwqp = cm_node->iwqp;
23232323
if (iwqp) {
23242324
iwqp->cm_node = NULL;
2325-
i40iw_rem_ref(&iwqp->ibqp);
2325+
i40iw_qp_rem_ref(&iwqp->ibqp);
23262326
cm_node->iwqp = NULL;
23272327
} else if (cm_node->qhash_set) {
23282328
i40iw_get_addr_info(cm_node, &nfo);
@@ -3452,7 +3452,7 @@ void i40iw_cm_disconn(struct i40iw_qp *iwqp)
34523452
kfree(work);
34533453
return;
34543454
}
3455-
i40iw_add_ref(&iwqp->ibqp);
3455+
i40iw_qp_add_ref(&iwqp->ibqp);
34563456
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
34573457

34583458
work->iwqp = iwqp;
@@ -3623,7 +3623,7 @@ static void i40iw_disconnect_worker(struct work_struct *work)
36233623

36243624
kfree(dwork);
36253625
i40iw_cm_disconn_true(iwqp);
3626-
i40iw_rem_ref(&iwqp->ibqp);
3626+
i40iw_qp_rem_ref(&iwqp->ibqp);
36273627
}
36283628

36293629
/**
@@ -3745,7 +3745,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
37453745
cm_node->lsmm_size = accept.size + conn_param->private_data_len;
37463746
i40iw_cm_init_tsa_conn(iwqp, cm_node);
37473747
cm_id->add_ref(cm_id);
3748-
i40iw_add_ref(&iwqp->ibqp);
3748+
i40iw_qp_add_ref(&iwqp->ibqp);
37493749

37503750
attr.qp_state = IB_QPS_RTS;
37513751
cm_node->qhash_set = false;
@@ -3908,7 +3908,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
39083908
iwqp->cm_node = cm_node;
39093909
cm_node->iwqp = iwqp;
39103910
iwqp->cm_id = cm_id;
3911-
i40iw_add_ref(&iwqp->ibqp);
3911+
i40iw_qp_add_ref(&iwqp->ibqp);
39123912

39133913
if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
39143914
cm_node->state = I40IW_CM_STATE_SYN_SENT;

drivers/infiniband/hw/i40iw/i40iw_hw.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
313313
__func__, info->qp_cq_id);
314314
continue;
315315
}
316-
i40iw_add_ref(&iwqp->ibqp);
316+
i40iw_qp_add_ref(&iwqp->ibqp);
317317
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
318318
qp = &iwqp->sc_qp;
319319
spin_lock_irqsave(&iwqp->lock, flags);
@@ -426,7 +426,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
426426
break;
427427
}
428428
if (info->qp)
429-
i40iw_rem_ref(&iwqp->ibqp);
429+
i40iw_qp_rem_ref(&iwqp->ibqp);
430430
} while (1);
431431

432432
if (aeqcnt)

drivers/infiniband/hw/i40iw/i40iw_utils.c

Lines changed: 10 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -477,25 +477,6 @@ void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev)
477477
}
478478
}
479479

480-
/**
481-
* i40iw_free_qp - callback after destroy cqp completes
482-
* @cqp_request: cqp request for destroy qp
483-
* @num: not used
484-
*/
485-
static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num)
486-
{
487-
struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)cqp_request->param;
488-
struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
489-
struct i40iw_device *iwdev;
490-
u32 qp_num = iwqp->ibqp.qp_num;
491-
492-
iwdev = iwqp->iwdev;
493-
494-
i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
495-
i40iw_free_qp_resources(iwdev, iwqp, qp_num);
496-
i40iw_rem_devusecount(iwdev);
497-
}
498-
499480
/**
500481
* i40iw_wait_event - wait for completion
501482
* @iwdev: iwarp device
@@ -616,60 +597,40 @@ void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev)
616597
}
617598

618599
/**
619-
* i40iw_add_ref - add refcount for qp
600+
* i40iw_qp_add_ref - add refcount for qp
620601
* @ibqp: iqarp qp
621602
*/
622-
void i40iw_add_ref(struct ib_qp *ibqp)
603+
void i40iw_qp_add_ref(struct ib_qp *ibqp)
623604
{
624605
struct i40iw_qp *iwqp = (struct i40iw_qp *)ibqp;
625606

626-
atomic_inc(&iwqp->refcount);
607+
refcount_inc(&iwqp->refcount);
627608
}
628609

629610
/**
630-
* i40iw_rem_ref - rem refcount for qp and free if 0
611+
* i40iw_qp_rem_ref - rem refcount for qp and free if 0
631612
* @ibqp: iqarp qp
632613
*/
633-
void i40iw_rem_ref(struct ib_qp *ibqp)
614+
void i40iw_qp_rem_ref(struct ib_qp *ibqp)
634615
{
635616
struct i40iw_qp *iwqp;
636-
enum i40iw_status_code status;
637-
struct i40iw_cqp_request *cqp_request;
638-
struct cqp_commands_info *cqp_info;
639617
struct i40iw_device *iwdev;
640618
u32 qp_num;
641619
unsigned long flags;
642620

643621
iwqp = to_iwqp(ibqp);
644622
iwdev = iwqp->iwdev;
645623
spin_lock_irqsave(&iwdev->qptable_lock, flags);
646-
if (!atomic_dec_and_test(&iwqp->refcount)) {
624+
if (!refcount_dec_and_test(&iwqp->refcount)) {
647625
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
648626
return;
649627
}
650628

651629
qp_num = iwqp->ibqp.qp_num;
652630
iwdev->qp_table[qp_num] = NULL;
653631
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
654-
cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
655-
if (!cqp_request)
656-
return;
657-
658-
cqp_request->callback_fcn = i40iw_free_qp;
659-
cqp_request->param = (void *)&iwqp->sc_qp;
660-
cqp_info = &cqp_request->info;
661-
cqp_info->cqp_cmd = OP_QP_DESTROY;
662-
cqp_info->post_sq = 1;
663-
cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp;
664-
cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
665-
cqp_info->in.u.qp_destroy.remove_hash_idx = true;
666-
status = i40iw_handle_cqp_op(iwdev, cqp_request);
667-
if (!status)
668-
return;
632+
complete(&iwqp->free_qp);
669633

670-
i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
671-
i40iw_free_qp_resources(iwdev, iwqp, qp_num);
672-
i40iw_rem_devusecount(iwdev);
673634
}
674635

675636
/**
@@ -936,7 +897,7 @@ static void i40iw_terminate_timeout(struct timer_list *t)
936897
struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp;
937898

938899
i40iw_terminate_done(qp, 1);
939-
i40iw_rem_ref(&iwqp->ibqp);
900+
i40iw_qp_rem_ref(&iwqp->ibqp);
940901
}
941902

942903
/**
@@ -948,7 +909,7 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp)
948909
struct i40iw_qp *iwqp;
949910

950911
iwqp = (struct i40iw_qp *)qp->back_qp;
951-
i40iw_add_ref(&iwqp->ibqp);
912+
i40iw_qp_add_ref(&iwqp->ibqp);
952913
timer_setup(&iwqp->terminate_timer, i40iw_terminate_timeout, 0);
953914
iwqp->terminate_timer.expires = jiffies + HZ;
954915
add_timer(&iwqp->terminate_timer);
@@ -964,7 +925,7 @@ void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp)
964925

965926
iwqp = (struct i40iw_qp *)qp->back_qp;
966927
if (del_timer(&iwqp->terminate_timer))
967-
i40iw_rem_ref(&iwqp->ibqp);
928+
i40iw_qp_rem_ref(&iwqp->ibqp);
968929
}
969930

970931
/**

drivers/infiniband/hw/i40iw/i40iw_verbs.c

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -363,11 +363,11 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
363363
* @iwqp: qp ptr (user or kernel)
364364
* @qp_num: qp number assigned
365365
*/
366-
void i40iw_free_qp_resources(struct i40iw_device *iwdev,
367-
struct i40iw_qp *iwqp,
368-
u32 qp_num)
366+
void i40iw_free_qp_resources(struct i40iw_qp *iwqp)
369367
{
370368
struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
369+
struct i40iw_device *iwdev = iwqp->iwdev;
370+
u32 qp_num = iwqp->ibqp.qp_num;
371371

372372
i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
373373
i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
@@ -401,6 +401,10 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq)
401401
static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
402402
{
403403
struct i40iw_qp *iwqp = to_iwqp(ibqp);
404+
struct ib_qp_attr attr;
405+
struct i40iw_device *iwdev = iwqp->iwdev;
406+
407+
memset(&attr, 0, sizeof(attr));
404408

405409
iwqp->destroyed = 1;
406410

@@ -415,7 +419,15 @@ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
415419
}
416420
}
417421

418-
i40iw_rem_ref(&iwqp->ibqp);
422+
attr.qp_state = IB_QPS_ERR;
423+
i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
424+
i40iw_qp_rem_ref(&iwqp->ibqp);
425+
wait_for_completion(&iwqp->free_qp);
426+
i40iw_cqp_qp_destroy_cmd(&iwdev->sc_dev, &iwqp->sc_qp);
427+
i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
428+
i40iw_free_qp_resources(iwqp);
429+
i40iw_rem_devusecount(iwdev);
430+
419431
return 0;
420432
}
421433

@@ -576,6 +588,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
576588
qp->back_qp = (void *)iwqp;
577589
qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
578590

591+
iwqp->iwdev = iwdev;
579592
iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info;
580593

581594
if (i40iw_allocate_dma_mem(dev->hw,
@@ -600,7 +613,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
600613
goto error;
601614
}
602615

603-
iwqp->iwdev = iwdev;
604616
iwqp->iwpd = iwpd;
605617
iwqp->ibqp.qp_num = qp_num;
606618
qp = &iwqp->sc_qp;
@@ -714,7 +726,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
714726
goto error;
715727
}
716728

717-
i40iw_add_ref(&iwqp->ibqp);
729+
refcount_set(&iwqp->refcount, 1);
718730
spin_lock_init(&iwqp->lock);
719731
iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
720732
iwdev->qp_table[qp_num] = iwqp;
@@ -736,10 +748,11 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
736748
}
737749
init_completion(&iwqp->sq_drained);
738750
init_completion(&iwqp->rq_drained);
751+
init_completion(&iwqp->free_qp);
739752

740753
return &iwqp->ibqp;
741754
error:
742-
i40iw_free_qp_resources(iwdev, iwqp, qp_num);
755+
i40iw_free_qp_resources(iwqp);
743756
return ERR_PTR(err_code);
744757
}
745758

@@ -2637,13 +2650,13 @@ static const struct ib_device_ops i40iw_dev_ops = {
26372650
.get_hw_stats = i40iw_get_hw_stats,
26382651
.get_port_immutable = i40iw_port_immutable,
26392652
.iw_accept = i40iw_accept,
2640-
.iw_add_ref = i40iw_add_ref,
2653+
.iw_add_ref = i40iw_qp_add_ref,
26412654
.iw_connect = i40iw_connect,
26422655
.iw_create_listen = i40iw_create_listen,
26432656
.iw_destroy_listen = i40iw_destroy_listen,
26442657
.iw_get_qp = i40iw_get_qp,
26452658
.iw_reject = i40iw_reject,
2646-
.iw_rem_ref = i40iw_rem_ref,
2659+
.iw_rem_ref = i40iw_qp_rem_ref,
26472660
.map_mr_sg = i40iw_map_mr_sg,
26482661
.mmap = i40iw_mmap,
26492662
.modify_qp = i40iw_modify_qp,

drivers/infiniband/hw/i40iw/i40iw_verbs.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ struct i40iw_qp {
139139
struct i40iw_qp_host_ctx_info ctx_info;
140140
struct i40iwarp_offload_info iwarp_info;
141141
void *allocated_buffer;
142-
atomic_t refcount;
142+
refcount_t refcount;
143143
struct iw_cm_id *cm_id;
144144
void *cm_node;
145145
struct ib_mr *lsmm_mr;
@@ -174,5 +174,6 @@ struct i40iw_qp {
174174
struct i40iw_dma_mem ietf_mem;
175175
struct completion sq_drained;
176176
struct completion rq_drained;
177+
struct completion free_qp;
177178
};
178179
#endif

0 commit comments

Comments
 (0)