Skip to content

Commit 4429f14

Browse files
committed
Merge tag 'block-5.10-2020-11-07' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: - NVMe pull request from Christoph: - revert a nvme_queue size optimization (Keith Bush) - fabrics timeout races fixes (Chao Leng and Sagi Grimberg)" - null_blk zone locking fix (Damien) * tag 'block-5.10-2020-11-07' of git://git.kernel.dk/linux-block: null_blk: Fix scheduling in atomic with zoned mode nvme-tcp: avoid repeated request completion nvme-rdma: avoid repeated request completion nvme-tcp: avoid race between time out and tear down nvme-rdma: avoid race between time out and tear down nvme: introduce nvme_sync_io_queues Revert "nvme-pci: remove last_sq_tail"
2 parents e9c02d6 + e1777d0 commit 4429f14

7 files changed

Lines changed: 65 additions & 46 deletions

File tree

drivers/block/null_blk.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ struct nullb_device {
4747
unsigned int nr_zones_closed;
4848
struct blk_zone *zones;
4949
sector_t zone_size_sects;
50-
spinlock_t zone_dev_lock;
50+
spinlock_t zone_lock;
5151
unsigned long *zone_locks;
5252

5353
unsigned long size; /* device size in MB */

drivers/block/null_blk_zoned.c

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,20 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
4646
if (!dev->zones)
4747
return -ENOMEM;
4848

49-
spin_lock_init(&dev->zone_dev_lock);
50-
dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
51-
if (!dev->zone_locks) {
52-
kvfree(dev->zones);
53-
return -ENOMEM;
49+
/*
50+
* With memory backing, the zone_lock spinlock needs to be temporarily
51+
* released to avoid scheduling in atomic context. To guarantee zone
52+
* information protection, use a bitmap to lock zones with
53+
* wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing
54+
* implies that the queue is marked with BLK_MQ_F_BLOCKING.
55+
*/
56+
spin_lock_init(&dev->zone_lock);
57+
if (dev->memory_backed) {
58+
dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
59+
if (!dev->zone_locks) {
60+
kvfree(dev->zones);
61+
return -ENOMEM;
62+
}
5463
}
5564

5665
if (dev->zone_nr_conv >= dev->nr_zones) {
@@ -137,12 +146,17 @@ void null_free_zoned_dev(struct nullb_device *dev)
137146

138147
static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno)
139148
{
140-
wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
149+
if (dev->memory_backed)
150+
wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
151+
spin_lock_irq(&dev->zone_lock);
141152
}
142153

143154
static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno)
144155
{
145-
clear_and_wake_up_bit(zno, dev->zone_locks);
156+
spin_unlock_irq(&dev->zone_lock);
157+
158+
if (dev->memory_backed)
159+
clear_and_wake_up_bit(zno, dev->zone_locks);
146160
}
147161

148162
int null_report_zones(struct gendisk *disk, sector_t sector,
@@ -322,7 +336,6 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
322336
return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
323337

324338
null_lock_zone(dev, zno);
325-
spin_lock(&dev->zone_dev_lock);
326339

327340
switch (zone->cond) {
328341
case BLK_ZONE_COND_FULL:
@@ -375,9 +388,17 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
375388
if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
376389
zone->cond = BLK_ZONE_COND_IMP_OPEN;
377390

378-
spin_unlock(&dev->zone_dev_lock);
391+
/*
392+
* Memory backing allocation may sleep: release the zone_lock spinlock
393+
* to avoid scheduling in atomic context. Zone operation atomicity is
394+
* still guaranteed through the zone_locks bitmap.
395+
*/
396+
if (dev->memory_backed)
397+
spin_unlock_irq(&dev->zone_lock);
379398
ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
380-
spin_lock(&dev->zone_dev_lock);
399+
if (dev->memory_backed)
400+
spin_lock_irq(&dev->zone_lock);
401+
381402
if (ret != BLK_STS_OK)
382403
goto unlock;
383404

@@ -392,7 +413,6 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
392413
ret = BLK_STS_OK;
393414

394415
unlock:
395-
spin_unlock(&dev->zone_dev_lock);
396416
null_unlock_zone(dev, zno);
397417

398418
return ret;
@@ -516,9 +536,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
516536
null_lock_zone(dev, i);
517537
zone = &dev->zones[i];
518538
if (zone->cond != BLK_ZONE_COND_EMPTY) {
519-
spin_lock(&dev->zone_dev_lock);
520539
null_reset_zone(dev, zone);
521-
spin_unlock(&dev->zone_dev_lock);
522540
trace_nullb_zone_op(cmd, i, zone->cond);
523541
}
524542
null_unlock_zone(dev, i);
@@ -530,7 +548,6 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
530548
zone = &dev->zones[zone_no];
531549

532550
null_lock_zone(dev, zone_no);
533-
spin_lock(&dev->zone_dev_lock);
534551

535552
switch (op) {
536553
case REQ_OP_ZONE_RESET:
@@ -550,8 +567,6 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
550567
break;
551568
}
552569

553-
spin_unlock(&dev->zone_dev_lock);
554-
555570
if (ret == BLK_STS_OK)
556571
trace_nullb_zone_op(cmd, zone_no, zone->cond);
557572

drivers/nvme/host/core.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4582,16 +4582,20 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
45824582
}
45834583
EXPORT_SYMBOL_GPL(nvme_start_queues);
45844584

4585-
4586-
void nvme_sync_queues(struct nvme_ctrl *ctrl)
4585+
void nvme_sync_io_queues(struct nvme_ctrl *ctrl)
45874586
{
45884587
struct nvme_ns *ns;
45894588

45904589
down_read(&ctrl->namespaces_rwsem);
45914590
list_for_each_entry(ns, &ctrl->namespaces, list)
45924591
blk_sync_queue(ns->queue);
45934592
up_read(&ctrl->namespaces_rwsem);
4593+
}
4594+
EXPORT_SYMBOL_GPL(nvme_sync_io_queues);
45944595

4596+
void nvme_sync_queues(struct nvme_ctrl *ctrl)
4597+
{
4598+
nvme_sync_io_queues(ctrl);
45954599
if (ctrl->admin_q)
45964600
blk_sync_queue(ctrl->admin_q);
45974601
}

drivers/nvme/host/nvme.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl);
602602
void nvme_start_queues(struct nvme_ctrl *ctrl);
603603
void nvme_kill_queues(struct nvme_ctrl *ctrl);
604604
void nvme_sync_queues(struct nvme_ctrl *ctrl);
605+
void nvme_sync_io_queues(struct nvme_ctrl *ctrl);
605606
void nvme_unfreeze(struct nvme_ctrl *ctrl);
606607
void nvme_wait_freeze(struct nvme_ctrl *ctrl);
607608
int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);

drivers/nvme/host/pci.c

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ struct nvme_queue {
198198
u32 q_depth;
199199
u16 cq_vector;
200200
u16 sq_tail;
201+
u16 last_sq_tail;
201202
u16 cq_head;
202203
u16 qid;
203204
u8 cq_phase;
@@ -455,11 +456,24 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
455456
return 0;
456457
}
457458

458-
static inline void nvme_write_sq_db(struct nvme_queue *nvmeq)
459+
/*
460+
* Write sq tail if we are asked to, or if the next command would wrap.
461+
*/
462+
static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
459463
{
464+
if (!write_sq) {
465+
u16 next_tail = nvmeq->sq_tail + 1;
466+
467+
if (next_tail == nvmeq->q_depth)
468+
next_tail = 0;
469+
if (next_tail != nvmeq->last_sq_tail)
470+
return;
471+
}
472+
460473
if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
461474
nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
462475
writel(nvmeq->sq_tail, nvmeq->q_db);
476+
nvmeq->last_sq_tail = nvmeq->sq_tail;
463477
}
464478

465479
/**
@@ -476,8 +490,7 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
476490
cmd, sizeof(*cmd));
477491
if (++nvmeq->sq_tail == nvmeq->q_depth)
478492
nvmeq->sq_tail = 0;
479-
if (write_sq)
480-
nvme_write_sq_db(nvmeq);
493+
nvme_write_sq_db(nvmeq, write_sq);
481494
spin_unlock(&nvmeq->sq_lock);
482495
}
483496

@@ -486,7 +499,8 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
486499
struct nvme_queue *nvmeq = hctx->driver_data;
487500

488501
spin_lock(&nvmeq->sq_lock);
489-
nvme_write_sq_db(nvmeq);
502+
if (nvmeq->sq_tail != nvmeq->last_sq_tail)
503+
nvme_write_sq_db(nvmeq, true);
490504
spin_unlock(&nvmeq->sq_lock);
491505
}
492506

@@ -1496,6 +1510,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
14961510
struct nvme_dev *dev = nvmeq->dev;
14971511

14981512
nvmeq->sq_tail = 0;
1513+
nvmeq->last_sq_tail = 0;
14991514
nvmeq->cq_head = 0;
15001515
nvmeq->cq_phase = 1;
15011516
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];

drivers/nvme/host/rdma.c

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,6 @@ struct nvme_rdma_ctrl {
122122
struct sockaddr_storage src_addr;
123123

124124
struct nvme_ctrl ctrl;
125-
struct mutex teardown_lock;
126125
bool use_inline_data;
127126
u32 io_queues[HCTX_MAX_TYPES];
128127
};
@@ -1010,8 +1009,8 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
10101009
static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
10111010
bool remove)
10121011
{
1013-
mutex_lock(&ctrl->teardown_lock);
10141012
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
1013+
blk_sync_queue(ctrl->ctrl.admin_q);
10151014
nvme_rdma_stop_queue(&ctrl->queues[0]);
10161015
if (ctrl->ctrl.admin_tagset) {
10171016
blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset,
@@ -1021,16 +1020,15 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
10211020
if (remove)
10221021
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
10231022
nvme_rdma_destroy_admin_queue(ctrl, remove);
1024-
mutex_unlock(&ctrl->teardown_lock);
10251023
}
10261024

10271025
static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
10281026
bool remove)
10291027
{
1030-
mutex_lock(&ctrl->teardown_lock);
10311028
if (ctrl->ctrl.queue_count > 1) {
10321029
nvme_start_freeze(&ctrl->ctrl);
10331030
nvme_stop_queues(&ctrl->ctrl);
1031+
nvme_sync_io_queues(&ctrl->ctrl);
10341032
nvme_rdma_stop_io_queues(ctrl);
10351033
if (ctrl->ctrl.tagset) {
10361034
blk_mq_tagset_busy_iter(ctrl->ctrl.tagset,
@@ -1041,7 +1039,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
10411039
nvme_start_queues(&ctrl->ctrl);
10421040
nvme_rdma_destroy_io_queues(ctrl, remove);
10431041
}
1044-
mutex_unlock(&ctrl->teardown_lock);
10451042
}
10461043

10471044
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
@@ -1976,16 +1973,12 @@ static void nvme_rdma_complete_timed_out(struct request *rq)
19761973
{
19771974
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
19781975
struct nvme_rdma_queue *queue = req->queue;
1979-
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
19801976

1981-
/* fence other contexts that may complete the command */
1982-
mutex_lock(&ctrl->teardown_lock);
19831977
nvme_rdma_stop_queue(queue);
1984-
if (!blk_mq_request_completed(rq)) {
1978+
if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
19851979
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
19861980
blk_mq_complete_request(rq);
19871981
}
1988-
mutex_unlock(&ctrl->teardown_lock);
19891982
}
19901983

19911984
static enum blk_eh_timer_return
@@ -2320,7 +2313,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
23202313
return ERR_PTR(-ENOMEM);
23212314
ctrl->ctrl.opts = opts;
23222315
INIT_LIST_HEAD(&ctrl->list);
2323-
mutex_init(&ctrl->teardown_lock);
23242316

23252317
if (!(opts->mask & NVMF_OPT_TRSVCID)) {
23262318
opts->trsvcid =

drivers/nvme/host/tcp.c

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,6 @@ struct nvme_tcp_ctrl {
124124
struct sockaddr_storage src_addr;
125125
struct nvme_ctrl ctrl;
126126

127-
struct mutex teardown_lock;
128127
struct work_struct err_work;
129128
struct delayed_work connect_work;
130129
struct nvme_tcp_request async_req;
@@ -1886,8 +1885,8 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
18861885
static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
18871886
bool remove)
18881887
{
1889-
mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
18901888
blk_mq_quiesce_queue(ctrl->admin_q);
1889+
blk_sync_queue(ctrl->admin_q);
18911890
nvme_tcp_stop_queue(ctrl, 0);
18921891
if (ctrl->admin_tagset) {
18931892
blk_mq_tagset_busy_iter(ctrl->admin_tagset,
@@ -1897,18 +1896,17 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
18971896
if (remove)
18981897
blk_mq_unquiesce_queue(ctrl->admin_q);
18991898
nvme_tcp_destroy_admin_queue(ctrl, remove);
1900-
mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
19011899
}
19021900

19031901
static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
19041902
bool remove)
19051903
{
1906-
mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
19071904
if (ctrl->queue_count <= 1)
1908-
goto out;
1905+
return;
19091906
blk_mq_quiesce_queue(ctrl->admin_q);
19101907
nvme_start_freeze(ctrl);
19111908
nvme_stop_queues(ctrl);
1909+
nvme_sync_io_queues(ctrl);
19121910
nvme_tcp_stop_io_queues(ctrl);
19131911
if (ctrl->tagset) {
19141912
blk_mq_tagset_busy_iter(ctrl->tagset,
@@ -1918,8 +1916,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
19181916
if (remove)
19191917
nvme_start_queues(ctrl);
19201918
nvme_tcp_destroy_io_queues(ctrl, remove);
1921-
out:
1922-
mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
19231919
}
19241920

19251921
static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
@@ -2171,14 +2167,11 @@ static void nvme_tcp_complete_timed_out(struct request *rq)
21712167
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
21722168
struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
21732169

2174-
/* fence other contexts that may complete the command */
2175-
mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock);
21762170
nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
2177-
if (!blk_mq_request_completed(rq)) {
2171+
if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
21782172
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
21792173
blk_mq_complete_request(rq);
21802174
}
2181-
mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock);
21822175
}
21832176

21842177
static enum blk_eh_timer_return
@@ -2455,7 +2448,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
24552448
nvme_tcp_reconnect_ctrl_work);
24562449
INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work);
24572450
INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work);
2458-
mutex_init(&ctrl->teardown_lock);
24592451

24602452
if (!(opts->mask & NVMF_OPT_TRSVCID)) {
24612453
opts->trsvcid =

0 commit comments

Comments
 (0)