diff options
author | Linus Torvalds | 2023-04-26 12:52:58 -0700 |
---|---|---|
committer | Linus Torvalds | 2023-04-26 12:52:58 -0700 |
commit | 9dd6956b38923dc1b7b349ca1eee3c0bb1f0163a (patch) | |
tree | c70bb7d65a50a51686378b6113a8663e0e60d9b8 /drivers | |
parent | 5b9a7bb72fddbc5247f56ede55d485fab7abdf92 (diff) | |
parent | 55793ea54d77719a071b1ccc05a05056e3b5e009 (diff) |
Merge tag 'for-6.4/block-2023-04-21' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- drbd patches, bringing us closer to unifying the out-of-tree version
and the in tree one (Andreas, Christoph)
- support for auto-quiesce for the s390 dasd driver (Stefan)
- MD pull request via Song:
- md/bitmap: Optimal last page size (Jon Derrick)
- Various raid10 fixes (Yu Kuai, Li Nan)
- md: add error_handlers for raid0 and linear (Mariusz Tkaczyk)
- NVMe pull request via Christoph:
- Drop redundant pci_enable_pcie_error_reporting (Bjorn Helgaas)
- Validate nvmet module parameters (Chaitanya Kulkarni)
- Fence TCP socket on receive error (Chris Leech)
- Fix async event trace event (Keith Busch)
- Minor cleanups (Chaitanya Kulkarni, zhenwei pi)
- Fix and cleanup nvmet Identify handling (Damien Le Moal,
Christoph Hellwig)
- Fix double blk_mq_complete_request race in the timeout handler
(Lei Yin)
- Fix irq locking in nvme-fcloop (Ming Lei)
- Remove queue mapping helper for rdma devices (Sagi Grimberg)
- use structured request attribute checks for nbd (Jakub)
- fix blk-crypto race conditions between keyslot management (Eric)
- add sed-opal support for reading read locking range attributes
(Ondrej)
- make fault injection configurable for null_blk (Akinobu)
- clean up the request insertion API (Christoph)
- clean up the queue running API (Christoph)
- blkg config helper cleanups (Tejun)
- lazy init support for blk-iolatency (Tejun)
- various fixes and tweaks to ublk (Ming)
- remove hybrid polling. It hasn't really been useful since we got
async polled IO support, and these days we don't support sync polled
IO at all (Keith)
- misc fixes, cleanups, improvements (Zhong, Ondrej, Colin, Chengming,
Chaitanya, me)
* tag 'for-6.4/block-2023-04-21' of git://git.kernel.dk/linux: (118 commits)
nbd: fix incomplete validation of ioctl arg
ublk: don't return 0 in case of any failure
sed-opal: geometry feature reporting command
null_blk: Always check queue mode setting from configfs
block: ublk: switch to ioctl command encoding
blk-mq: fix the blk_mq_add_to_requeue_list call in blk_kick_flush
block, bfq: Fix division by zero error on zero wsum
fault-inject: fix build error when FAULT_INJECTION_CONFIGFS=y and CONFIGFS_FS=m
block: store bdev->bd_disk->fops->submit_bio state in bdev
block: re-arrange the struct block_device fields for better layout
md/raid5: remove unused working_disks variable
md/raid10: don't call bio_start_io_acct twice for bio which experienced read error
md/raid10: fix memleak of md thread
md/raid10: fix memleak for 'conf->bio_split'
md/raid10: fix leak of 'r10bio->remaining' for recovery
md/raid10: don't BUG_ON() in raise_barrier()
md: fix soft lockup in status_resync
md: add error_handlers for raid0 and linear
md: Use optimal I/O size for last bitmap page
md: Fix types in sb writer
...
Diffstat (limited to 'drivers')
40 files changed, 1008 insertions, 648 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index f79f20430ef7..5b9d4aaebb81 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -385,6 +385,23 @@ config BLK_DEV_UBLK can handle batch more effectively, but task_work_add() isn't exported for module, so ublk has to be built to kernel. +config BLKDEV_UBLK_LEGACY_OPCODES + bool "Support legacy command opcode" + depends on BLK_DEV_UBLK + default y + help + ublk driver started to take plain command encoding, which turns out + one bad way. The traditional ioctl command opcode encodes more + info and basically defines each code uniquely, so opcode conflict + is avoided, and driver can handle wrong command easily, meantime it + may help security subsystem to audit io_uring command. + + Say Y if your application still uses legacy command opcode. + + Say N if you don't want to support legacy command opcode. It is + suggested to enable N if your application(ublk server) switches to + ioctl command encoding. + source "drivers/block/rnbd/Kconfig" endif # BLK_DEV diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 429255876800..64b3a1c76f03 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -735,8 +735,9 @@ static bool update_rs_extent(struct drbd_device *device, return false; } -void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) +void drbd_advance_rs_marks(struct drbd_peer_device *peer_device, unsigned long still_to_go) { + struct drbd_device *device = peer_device->device; unsigned long now = jiffies; unsigned long last = device->rs_mark_time[device->rs_last_mark]; int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS; @@ -819,7 +820,7 @@ static int update_sync_bits(struct drbd_device *device, if (mode == SET_IN_SYNC) { unsigned long still_to_go = drbd_bm_total_weight(device); bool rs_is_done = (still_to_go <= device->rs_failed); - drbd_advance_rs_marks(device, still_to_go); + drbd_advance_rs_marks(first_peer_device(device), still_to_go); if (cleared || rs_is_done) maybe_schedule_on_disk_bitmap_update(device, rs_is_done); } else if (mode == RECORD_RS_FAILED) @@ -843,10 +844,11 @@ static bool plausible_request_size(int size) * called by worker on C_SYNC_TARGET and receiver on SyncSource. * */ -int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, +int __drbd_change_sync(struct drbd_peer_device *peer_device, sector_t sector, int size, enum update_sync_bits_mode mode) { /* Is called from worker and receiver context _only_ */ + struct drbd_device *device = peer_device->device; unsigned long sbnr, ebnr, lbnr; unsigned long count = 0; sector_t esector, nr_sectors; @@ -1009,14 +1011,15 @@ retry: * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN * if there is still application IO going on in this area. */ -int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) +int drbd_try_rs_begin_io(struct drbd_peer_device *peer_device, sector_t sector) { + struct drbd_device *device = peer_device->device; unsigned int enr = BM_SECT_TO_EXT(sector); const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; struct lc_element *e; struct bm_extent *bm_ext; int i; - bool throttle = drbd_rs_should_slow_down(device, sector, true); + bool throttle = drbd_rs_should_slow_down(peer_device, sector, true); /* If we need to throttle, a half-locked (only marked BME_NO_WRITES, * not yet BME_LOCKED) extent needs to be kicked out explicitly if we diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 289876ffbc31..6ac8c54b44c7 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1216,7 +1216,9 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned * drbd_bm_read() - Read the whole bitmap from its on disk location. * @device: DRBD device. */ -int drbd_bm_read(struct drbd_device *device) __must_hold(local) +int drbd_bm_read(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) + { return bm_rw(device, BM_AIO_READ, 0); } @@ -1227,7 +1229,8 @@ int drbd_bm_read(struct drbd_device *device) __must_hold(local) * * Will only write pages that have changed since last IO. */ -int drbd_bm_write(struct drbd_device *device) __must_hold(local) +int drbd_bm_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) { return bm_rw(device, 0, 0); } @@ -1238,7 +1241,8 @@ int drbd_bm_write(struct drbd_device *device) __must_hold(local) * * Will write all pages. */ -int drbd_bm_write_all(struct drbd_device *device) __must_hold(local) +int drbd_bm_write_all(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) { return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0); } @@ -1264,7 +1268,8 @@ int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_ho * verify is aborted due to a failed peer disk, while local IO continues, or * pending resync acks are still being processed. */ -int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local) +int drbd_bm_write_copy_pages(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) { return bm_rw(device, BM_AIO_COPY_PAGES, 0); } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d89b7d03d4c8..a30a5ed811be 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -66,6 +66,7 @@ extern int drbd_proc_details; struct drbd_device; struct drbd_connection; +struct drbd_peer_device; /* Defines to control fault insertion */ enum { @@ -126,8 +127,8 @@ struct bm_xfer_ctx { unsigned bytes[2]; }; -extern void INFO_bm_xfer_stats(struct drbd_device *device, - const char *direction, struct bm_xfer_ctx *c); +extern void INFO_bm_xfer_stats(struct drbd_peer_device *peer_device, + const char *direction, struct bm_xfer_ctx *c); static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) { @@ -541,9 +542,10 @@ struct drbd_md_io { struct bm_io_work { struct drbd_work w; + struct drbd_peer_device *peer_device; char *why; enum bm_flag flags; - int (*io_fn)(struct drbd_device *device); + int (*io_fn)(struct drbd_device *device, struct drbd_peer_device *peer_device); void (*done)(struct drbd_device *device, int rv); }; @@ -1041,7 +1043,7 @@ extern int drbd_send_drequest_csum(struct drbd_peer_device *, sector_t sector, enum drbd_packet cmd); extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int size); -extern int drbd_send_bitmap(struct drbd_device *device); +extern int drbd_send_bitmap(struct drbd_device *device, struct drbd_peer_device *peer_device); extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode); extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode); extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct drbd_peer_request *); @@ -1065,17 +1067,22 @@ extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold extern int drbd_md_test_flag(struct drbd_backing_dev *, int); extern void drbd_md_mark_dirty(struct drbd_device *device); extern void drbd_queue_bitmap_io(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), void (*done)(struct drbd_device *, int), - char *why, enum bm_flag flags); + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device); extern int drbd_bitmap_io(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), - char *why, enum bm_flag flags); + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device); extern int drbd_bitmap_io_from_worker(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), - char *why, enum bm_flag flags); -extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local); -extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local); + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device); +extern int drbd_bmio_set_n_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); +extern int drbd_bmio_clear_n_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); /* Meta data layout * @@ -1284,14 +1291,18 @@ extern void _drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e); extern int drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr); extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr); -extern int drbd_bm_read(struct drbd_device *device) __must_hold(local); +extern int drbd_bm_read(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr); -extern int drbd_bm_write(struct drbd_device *device) __must_hold(local); +extern int drbd_bm_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); extern void drbd_bm_reset_al_hints(struct drbd_device *device) __must_hold(local); extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local); extern int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local); -extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local); -extern int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local); +extern int drbd_bm_write_all(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); +extern int drbd_bm_write_copy_pages(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); extern size_t drbd_bm_words(struct drbd_device *device); extern unsigned long drbd_bm_bits(struct drbd_device *device); extern sector_t drbd_bm_capacity(struct drbd_device *device); @@ -1422,21 +1433,24 @@ void drbd_resync_after_changed(struct drbd_device *device); extern void drbd_start_resync(struct drbd_device *device, enum drbd_conns side); extern void resume_next_sg(struct drbd_device *device); extern void suspend_other_sg(struct drbd_device *device); -extern int drbd_resync_finished(struct drbd_device *device); +extern int drbd_resync_finished(struct drbd_peer_device *peer_device); /* maybe rather drbd_main.c ? */ extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent); extern void drbd_md_put_buffer(struct drbd_device *device); extern int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, sector_t sector, enum req_op op); -extern void drbd_ov_out_of_sync_found(struct drbd_device *, sector_t, int); +extern void drbd_ov_out_of_sync_found(struct drbd_peer_device *peer_device, + sector_t sector, int size); extern void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev, unsigned int *done); -extern void drbd_rs_controller_reset(struct drbd_device *device); +extern void drbd_rs_controller_reset(struct drbd_peer_device *peer_device); -static inline void ov_out_of_sync_print(struct drbd_device *device) +static inline void ov_out_of_sync_print(struct drbd_peer_device *peer_device) { + struct drbd_device *device = peer_device->device; + if (device->ov_last_oos_size) { - drbd_err(device, "Out of sync: start=%llu, size=%lu (sectors)\n", + drbd_err(peer_device, "Out of sync: start=%llu, size=%lu (sectors)\n", (unsigned long long)device->ov_last_oos_start, (unsigned long)device->ov_last_oos_size); } @@ -1475,7 +1489,7 @@ extern int drbd_ack_receiver(struct drbd_thread *thi); extern void drbd_send_ping_wf(struct work_struct *ws); extern void drbd_send_acks_wf(struct work_struct *ws); extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); -extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, +extern bool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector, bool throttle_if_app_is_waiting); extern int drbd_submit_peer_request(struct drbd_peer_request *peer_req); extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *); @@ -1531,22 +1545,22 @@ extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i); extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector); extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector); -extern int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector); +extern int drbd_try_rs_begin_io(struct drbd_peer_device *peer_device, sector_t sector); extern void drbd_rs_cancel_all(struct drbd_device *device); extern int drbd_rs_del_all(struct drbd_device *device); -extern void drbd_rs_failed_io(struct drbd_device *device, +extern void drbd_rs_failed_io(struct drbd_peer_device *peer_device, sector_t sector, int size); -extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go); +extern void drbd_advance_rs_marks(struct drbd_peer_device *peer_device, unsigned long still_to_go); enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC }; -extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, +extern int __drbd_change_sync(struct drbd_peer_device *peer_device, sector_t sector, int size, enum update_sync_bits_mode mode); -#define drbd_set_in_sync(device, sector, size) \ - __drbd_change_sync(device, sector, size, SET_IN_SYNC) -#define drbd_set_out_of_sync(device, sector, size) \ - __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC) -#define drbd_rs_failed_io(device, sector, size) \ - __drbd_change_sync(device, sector, size, RECORD_RS_FAILED) +#define drbd_set_in_sync(peer_device, sector, size) \ + __drbd_change_sync(peer_device, sector, size, SET_IN_SYNC) +#define drbd_set_out_of_sync(peer_device, sector, size) \ + __drbd_change_sync(peer_device, sector, size, SET_OUT_OF_SYNC) +#define drbd_rs_failed_io(peer_device, sector, size) \ + __drbd_change_sync(peer_device, sector, size, RECORD_RS_FAILED) extern void drbd_al_shrink(struct drbd_device *device); extern int drbd_al_initialize(struct drbd_device *, void *); @@ -1918,18 +1932,14 @@ static inline void inc_ap_pending(struct drbd_device *device) atomic_inc(&device->ap_pending_cnt); } -#define ERR_IF_CNT_IS_NEGATIVE(which, func, line) \ - if (atomic_read(&device->which) < 0) \ - drbd_err(device, "in %s:%d: " #which " = %d < 0 !\n", \ - func, line, \ - atomic_read(&device->which)) - -#define dec_ap_pending(device) _dec_ap_pending(device, __func__, __LINE__) -static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line) +#define dec_ap_pending(device) ((void)expect((device), __dec_ap_pending(device) >= 0)) +static inline int __dec_ap_pending(struct drbd_device *device) { - if (atomic_dec_and_test(&device->ap_pending_cnt)) + int ap_pending_cnt = atomic_dec_return(&device->ap_pending_cnt); + + if (ap_pending_cnt == 0) wake_up(&device->misc_wait); - ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt, func, line); + return ap_pending_cnt; } /* counts how many resync-related answers we still expect from the peer @@ -1938,16 +1948,16 @@ static inline void _dec_ap_pending(struct drbd_device *device, const char *func, * C_SYNC_SOURCE sends P_RS_DATA_REPLY (and expects P_WRITE_ACK with ID_SYNCER) * (or P_NEG_ACK with ID_SYNCER) */ -static inline void inc_rs_pending(struct drbd_device *device) +static inline void inc_rs_pending(struct drbd_peer_device *peer_device) { - atomic_inc(&device->rs_pending_cnt); + atomic_inc(&peer_device->device->rs_pending_cnt); } -#define dec_rs_pending(device) _dec_rs_pending(device, __func__, __LINE__) -static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line) +#define dec_rs_pending(peer_device) \ + ((void)expect((peer_device), __dec_rs_pending(peer_device) >= 0)) +static inline int __dec_rs_pending(struct drbd_peer_device *peer_device) { - atomic_dec(&device->rs_pending_cnt); - ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt, func, line); + return atomic_dec_return(&peer_device->device->rs_pending_cnt); } /* counts how many answers we still need to send to the peer. @@ -1964,18 +1974,16 @@ static inline void inc_unacked(struct drbd_device *device) atomic_inc(&device->unacked_cnt); } -#define dec_unacked(device) _dec_unacked(device, __func__, __LINE__) -static inline void _dec_unacked(struct drbd_device *device, const char *func, int line) +#define dec_unacked(device) ((void)expect(device, __dec_unacked(device) >= 0)) +static inline int __dec_unacked(struct drbd_device *device) { - atomic_dec(&device->unacked_cnt); - ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); + return atomic_dec_return(&device->unacked_cnt); } -#define sub_unacked(device, n) _sub_unacked(device, n, __func__, __LINE__) -static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line) +#define sub_unacked(device, n) ((void)expect(device, __sub_unacked(device) >= 0)) +static inline int __sub_unacked(struct drbd_device *device, int n) { - atomic_sub(n, &device->unacked_cnt); - ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); + return atomic_sub_return(n, &device->unacked_cnt); } static inline bool is_sync_target_state(enum drbd_conns connection_state) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2c764f7ee4a7..83987e7a5ef2 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -231,9 +231,11 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, } req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests); list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) { + struct drbd_peer_device *peer_device; if (req->epoch != expect_epoch) break; - _req_mod(req, BARRIER_ACKED); + peer_device = conn_peer_device(connection, req->device->vnr); + _req_mod(req, BARRIER_ACKED, peer_device); } spin_unlock_irq(&connection->resource->req_lock); @@ -256,10 +258,13 @@ bail: /* must hold resource->req_lock */ void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what) { + struct drbd_peer_device *peer_device; struct drbd_request *req, *r; - list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) - _req_mod(req, what); + list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) { + peer_device = conn_peer_device(connection, req->device->vnr); + _req_mod(req, what, peer_device); + } } void tl_restart(struct drbd_connection *connection, enum drbd_req_event what) @@ -297,7 +302,7 @@ void tl_abort_disk_io(struct drbd_device *device) continue; if (req->device != device) continue; - _req_mod(req, ABORT_DISK_IO); + _req_mod(req, ABORT_DISK_IO, NULL); } spin_unlock_irq(&connection->resource->req_lock); } @@ -1198,10 +1203,11 @@ static int fill_bitmap_rle_bits(struct drbd_device *device, * code upon failure. */ static int -send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) +send_bitmap_rle_or_plain(struct drbd_peer_device *peer_device, struct bm_xfer_ctx *c) { - struct drbd_socket *sock = &first_peer_device(device)->connection->data; - unsigned int header_size = drbd_header_size(first_peer_device(device)->connection); + struct drbd_device *device = peer_device->device; + struct drbd_socket *sock = &peer_device->connection->data; + unsigned int header_size = drbd_header_size(peer_device->connection); struct p_compressed_bm *p = sock->sbuf + header_size; int len, err; @@ -1212,7 +1218,7 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) if (len) { dcbp_set_code(p, RLE_VLI_Bits); - err = __send_command(first_peer_device(device)->connection, device->vnr, sock, + err = __send_command(peer_device->connection, device->vnr, sock, P_COMPRESSED_BITMAP, sizeof(*p) + len, NULL, 0); c->packets[0]++; @@ -1233,7 +1239,8 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) len = num_words * sizeof(*p); if (len) drbd_bm_get_lel(device, c->word_offset, num_words, p); - err = __send_command(first_peer_device(device)->connection, device->vnr, sock, P_BITMAP, len, NULL, 0); + err = __send_command(peer_device->connection, device->vnr, sock, P_BITMAP, + len, NULL, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; @@ -1245,7 +1252,7 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) } if (!err) { if (len == 0) { - INFO_bm_xfer_stats(device, "send", c); + INFO_bm_xfer_stats(peer_device, "send", c); return 0; } else return 1; @@ -1254,7 +1261,8 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) } /* See the comment at receive_bitmap() */ -static int _drbd_send_bitmap(struct drbd_device *device) +static int _drbd_send_bitmap(struct drbd_device *device, + struct drbd_peer_device *peer_device) { struct bm_xfer_ctx c; int err; @@ -1266,7 +1274,7 @@ static int _drbd_send_bitmap(struct drbd_device *device) if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC)) { drbd_info(device, "Writing the whole bitmap, MDF_FullSync was set.\n"); drbd_bm_set_all(device); - if (drbd_bm_write(device)) { + if (drbd_bm_write(device, peer_device)) { /* write_bm did fail! Leave full sync flag set in Meta P_DATA * but otherwise process as per normal - need to tell other * side that a full resync is required! */ @@ -1285,20 +1293,20 @@ static int _drbd_send_bitmap(struct drbd_device *device) }; do { - err = send_bitmap_rle_or_plain(device, &c); + err = send_bitmap_rle_or_plain(peer_device, &c); } while (err > 0); return err == 0; } -int drbd_send_bitmap(struct drbd_device *device) +int drbd_send_bitmap(struct drbd_device *device, struct drbd_peer_device *peer_device) { - struct drbd_socket *sock = &first_peer_device(device)->connection->data; + struct drbd_socket *sock = &peer_device->connection->data; int err = -1; mutex_lock(&sock->mutex); if (sock->socket) - err = !_drbd_send_bitmap(device); + err = !_drbd_send_bitmap(device, peer_device); mutex_unlock(&sock->mutex); return err; } @@ -3406,7 +3414,9 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local) * * Sets all bits in the bitmap and writes the whole bitmap to stable storage. */ -int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local) +int drbd_bmio_set_n_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) + { int rv = -EIO; @@ -3414,7 +3424,7 @@ int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local) drbd_md_sync(device); drbd_bm_set_all(device); - rv = drbd_bm_write(device); + rv = drbd_bm_write(device, peer_device); if (!rv) { drbd_md_clear_flag(device, MDF_FULL_SYNC); @@ -3430,11 +3440,13 @@ int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local) * * Clears all bits in the bitmap and writes the whole bitmap to stable storage. */ -int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local) +int drbd_bmio_clear_n_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) + { drbd_resume_al(device); drbd_bm_clear_all(device); - return drbd_bm_write(device); + return drbd_bm_write(device, peer_device); } static int w_bitmap_io(struct drbd_work *w, int unused) @@ -3453,7 +3465,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused) if (get_ldev(device)) { drbd_bm_lock(device, work->why, work->flags); - rv = work->io_fn(device); + rv = work->io_fn(device, work->peer_device); drbd_bm_unlock(device); put_ldev(device); } @@ -3488,11 +3500,12 @@ static int w_bitmap_io(struct drbd_work *w, int unused) * put_ldev(). */ void drbd_queue_bitmap_io(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), void (*done)(struct drbd_device *, int), - char *why, enum bm_flag flags) + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device) { - D_ASSERT(device, current == first_peer_device(device)->connection->worker.task); + D_ASSERT(device, current == peer_device->connection->worker.task); D_ASSERT(device, !test_bit(BITMAP_IO_QUEUED, &device->flags)); D_ASSERT(device, !test_bit(BITMAP_IO, &device->flags)); @@ -3501,6 +3514,7 @@ void drbd_queue_bitmap_io(struct drbd_device *device, drbd_err(device, "FIXME going to queue '%s' but '%s' still pending?\n", why, device->bm_io_work.why); + device->bm_io_work.peer_device = peer_device; device->bm_io_work.io_fn = io_fn; device->bm_io_work.done = done; device->bm_io_work.why = why; @@ -3512,7 +3526,7 @@ void drbd_queue_bitmap_io(struct drbd_device *device, * application IO does not conflict anyways. */ if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) { if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags)) - drbd_queue_work(&first_peer_device(device)->connection->sender_work, + drbd_queue_work(&peer_device->connection->sender_work, &device->bm_io_work.w); } spin_unlock_irq(&device->resource->req_lock); @@ -3528,8 +3542,10 @@ void drbd_queue_bitmap_io(struct drbd_device *device, * freezes application IO while that the actual IO operations runs. This * functions MAY NOT be called from worker context. */ -int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *), - char *why, enum bm_flag flags) +int drbd_bitmap_io(struct drbd_device *device, + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device) { /* Only suspend io, if some operation is supposed to be locked out */ const bool do_suspend_io = flags & (BM_DONT_CLEAR|BM_DONT_SET|BM_DONT_TEST); @@ -3541,7 +3557,7 @@ int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device * drbd_suspend_io(device); drbd_bm_lock(device, why, flags); - rv = io_fn(device); + rv = io_fn(device, peer_device); drbd_bm_unlock(device); if (do_suspend_io) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f49f2a5282e1..1a5d3d72d91d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1053,7 +1053,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct la_size_changed ? "size changed" : "md moved"); /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write, - "size changed", BM_LOCKED_MASK); + "size changed", BM_LOCKED_MASK, NULL); /* on-disk bitmap and activity log is authoritative again * (unless there was an IO error meanwhile...) */ @@ -2027,13 +2027,15 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) drbd_info(device, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, - "set_n_write from attaching", BM_LOCKED_MASK)) { + "set_n_write from attaching", BM_LOCKED_MASK, + NULL)) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } } else { if (drbd_bitmap_io(device, &drbd_bm_read, - "read from attaching", BM_LOCKED_MASK)) { + "read from attaching", BM_LOCKED_MASK, + NULL)) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } @@ -2972,7 +2974,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT)); if (retcode >= SS_SUCCESS) { if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, - "set_n_write from invalidate", BM_LOCKED_MASK)) + "set_n_write from invalidate", BM_LOCKED_MASK, NULL)) retcode = ERR_IO_MD_DISK; } } else @@ -3005,11 +3007,12 @@ out: return 0; } -static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local) +static int drbd_bmio_set_susp_al(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) { int rv; - rv = drbd_bmio_set_n_write(device); + rv = drbd_bmio_set_n_write(device, peer_device); drbd_suspend_al(device); return rv; } @@ -3052,7 +3055,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) if (retcode >= SS_SUCCESS) { if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al, "set_n_write from invalidate_peer", - BM_LOCKED_SET_ALLOWED)) + BM_LOCKED_SET_ALLOWED, NULL)) retcode = ERR_IO_MD_DISK; } } else @@ -4148,7 +4151,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) if (args.clear_bm) { err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write, - "clear_n_write from new_c_uuid", BM_LOCKED_MASK); + "clear_n_write from new_c_uuid", BM_LOCKED_MASK, NULL); if (err) { drbd_err(device, "Writing bitmap failed with %d\n", err); retcode = ERR_IO_MD_DISK; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e197b2a465d2..719a7260c22b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2044,11 +2044,11 @@ static int e_end_resync_block(struct drbd_work *w, int unused) D_ASSERT(device, drbd_interval_empty(&peer_req->i)); if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - drbd_set_in_sync(device, sector, peer_req->i.size); + drbd_set_in_sync(peer_device, sector, peer_req->i.size); err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req); } else { /* Record failure to sync */ - drbd_rs_failed_io(device, sector, peer_req->i.size); + drbd_rs_failed_io(peer_device, sector, peer_req->i.size); err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); } @@ -2067,7 +2067,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto if (!peer_req) goto fail; - dec_rs_pending(device); + dec_rs_pending(peer_device); inc_unacked(device); /* corresponding dec_unacked() in e_end_resync_block() @@ -2138,7 +2138,7 @@ static int receive_DataReply(struct drbd_connection *connection, struct packet_i err = recv_dless_read(peer_device, req, sector, pi->size); if (!err) - req_mod(req, DATA_RECEIVED); + req_mod(req, DATA_RECEIVED, peer_device); /* else: nothing. handled from drbd_disconnect... * I don't think we may complete this just yet * in case we are "on-disconnect: freeze" */ @@ -2196,7 +2196,7 @@ static void restart_conflicting_writes(struct drbd_device *device, continue; /* as it is RQ_POSTPONED, this will cause it to * be queued on the retry workqueue. */ - __req_mod(req, CONFLICT_RESOLVED, NULL); + __req_mod(req, CONFLICT_RESOLVED, NULL, NULL); } } @@ -2220,7 +2220,7 @@ static int e_end_block(struct drbd_work *w, int cancel) P_RS_WRITE_ACK : P_WRITE_ACK; err = drbd_send_ack(peer_device, pcmd, peer_req); if (pcmd == P_RS_WRITE_ACK) - drbd_set_in_sync(device, sector, peer_req->i.size); + drbd_set_in_sync(peer_device, sector, peer_req->i.size); } else { err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); /* we expect it to be marked out of sync anyways... @@ -2420,6 +2420,7 @@ static blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf) static void fail_postponed_requests(struct drbd_device *device, sector_t sector, unsigned int size) { + struct drbd_peer_device *peer_device = first_peer_device(device); struct drbd_interval *i; repeat: @@ -2433,7 +2434,7 @@ static void fail_postponed_requests(struct drbd_device *device, sector_t sector, if (!(req->rq_state & RQ_POSTPONED)) continue; req->rq_state &= ~RQ_POSTPONED; - __req_mod(req, NEG_ACKED, &m); + __req_mod(req, NEG_ACKED, peer_device, &m); spin_unlock_irq(&device->resource->req_lock); if (m.bio) complete_master_bio(device, &m); @@ -2690,7 +2691,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * if (device->state.pdsk < D_INCONSISTENT) { /* In case we have the only disk of the cluster, */ - drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); + drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size); peer_req->flags &= ~EE_MAY_SET_IN_SYNC; drbd_al_begin_io(device, &peer_req->i); peer_req->flags |= EE_CALL_AL_COMPLETE_IO; @@ -2729,9 +2730,10 @@ out_interrupted: * The current sync rate used here uses only the most recent two step marks, * to have a short time average so we can react faster. */ -bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, +bool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector, bool throttle_if_app_is_waiting) { + struct drbd_device *device = peer_device->device; struct lc_element *tmp; bool throttle = drbd_rs_c_min_rate_throttle(device); @@ -2843,7 +2845,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet break; case P_OV_REPLY: verb = 0; - dec_rs_pending(device); + dec_rs_pending(peer_device); drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC); break; default: @@ -2914,7 +2916,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet /* track progress, we may need to throttle */ atomic_add(size >> 9, &device->rs_sect_in); peer_req->w.cb = w_e_end_ov_reply; - dec_rs_pending(device); + dec_rs_pending(peer_device); /* drbd_rs_begin_io done when we sent this request, * but accounting still needs to be done. */ goto submit_for_resync; @@ -2977,7 +2979,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet update_receiver_timing_details(connection, drbd_rs_should_slow_down); if (device->state.peer != R_PRIMARY - && drbd_rs_should_slow_down(device, sector, false)) + && drbd_rs_should_slow_down(peer_device, sector, false)) schedule_timeout_uninterruptible(HZ/10); update_receiver_timing_details(connection, drbd_rs_begin_io); if (drbd_rs_begin_io(device, sector)) @@ -3226,10 +3228,11 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid, -1096 requires proto 96 */ -static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local) +static int drbd_uuid_compare(struct drbd_peer_device *const peer_device, + enum drbd_role const peer_role, int *rule_nr) __must_hold(local) { - struct drbd_peer_device *const peer_device = first_peer_device(device); - struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; + struct drbd_connection *const connection = peer_device->connection; + struct drbd_device *device = peer_device->device; u64 self, peer; int i, j; @@ -3465,7 +3468,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); - hg = drbd_uuid_compare(device, peer_role, &rule_nr); + hg = drbd_uuid_compare(peer_device, peer_role, &rule_nr); spin_unlock_irq(&device->ldev->md.uuid_lock); drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr); @@ -3591,7 +3594,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, if (abs(hg) >= 2) { drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake", - BM_LOCKED_SET_ALLOWED)) + BM_LOCKED_SET_ALLOWED, NULL)) return C_MASK; } @@ -4270,7 +4273,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n"); drbd_bitmap_io(device, &drbd_bmio_clear_n_write, "clear_n_write from receive_uuids", - BM_LOCKED_TEST_ALLOWED); + BM_LOCKED_TEST_ALLOWED, NULL); _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]); _drbd_uuid_set(device, UI_BITMAP, 0); _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), @@ -4448,7 +4451,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info else if (os.conn >= C_SYNC_SOURCE && peer_state.conn == C_CONNECTED) { if (drbd_bm_total_weight(device) <= device->rs_failed) - drbd_resync_finished(device); + drbd_resync_finished(peer_device); return 0; } } @@ -4456,8 +4459,8 @@ static int receive_state(struct drbd_connection *connection, struct packet_info /* explicit verify finished notification, stop sector reached. */ if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { - ov_out_of_sync_print(device); - drbd_resync_finished(device); + ov_out_of_sync_print(peer_device); + drbd_resync_finished(peer_device); return 0; } @@ -4766,11 +4769,11 @@ decode_bitmap_c(struct drbd_peer_device *peer_device, return -EIO; } -void INFO_bm_xfer_stats(struct drbd_device *device, +void INFO_bm_xfer_stats(struct drbd_peer_device *peer_device, const char *direction, struct bm_xfer_ctx *c) { /* what would it take to transfer it "plaintext" */ - unsigned int header_size = drbd_header_size(first_peer_device(device)->connection); + unsigned int header_size = drbd_header_size(peer_device->connection); unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; unsigned int plain = header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) + @@ -4794,7 +4797,7 @@ void INFO_bm_xfer_stats(struct drbd_device *device, r = 1000; r = 1000 - r; - drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " + drbd_info(peer_device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " "total %u; compression: %u.%u%%\n", direction, c->bytes[1], c->packets[1], @@ -4872,12 +4875,12 @@ static int receive_bitmap(struct drbd_connection *connection, struct packet_info goto out; } - INFO_bm_xfer_stats(device, "receive", &c); + INFO_bm_xfer_stats(peer_device, "receive", &c); if (device->state.conn == C_WF_BITMAP_T) { enum drbd_state_rv rv; - err = drbd_send_bitmap(device); + err = drbd_send_bitmap(device, peer_device); if (err) goto out; /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ @@ -4935,7 +4938,7 @@ static int receive_out_of_sync(struct drbd_connection *connection, struct packet drbd_conn_str(device->state.conn)); } - drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); + drbd_set_out_of_sync(peer_device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); return 0; } @@ -4956,7 +4959,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); - dec_rs_pending(device); + dec_rs_pending(peer_device); if (get_ldev(device)) { struct drbd_peer_request *peer_req; @@ -5214,7 +5217,7 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device) if (get_ldev(device)) { drbd_bitmap_io(device, &drbd_bm_write_copy_pages, - "write from disconnected", BM_LOCKED_CHANGE_ALLOWED); + "write from disconnected", BM_LOCKED_CHANGE_ALLOWED, NULL); put_ldev(device); } @@ -5648,22 +5651,23 @@ static int got_IsInSync(struct drbd_connection *connection, struct packet_info * if (get_ldev(device)) { drbd_rs_complete_io(device, sector); - drbd_set_in_sync(device, sector, blksize); + drbd_set_in_sync(peer_device, sector, blksize); /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); put_ldev(device); } - dec_rs_pending(device); + dec_rs_pending(peer_device); atomic_add(blksize >> 9, &device->rs_sect_in); return 0; } static int -validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector, +validate_req_change_req_state(struct drbd_peer_device *peer_device, u64 id, sector_t sector, struct rb_root *root, const char *func, enum drbd_req_event what, bool missing_ok) { + struct drbd_device *device = peer_device->device; struct drbd_request *req; struct bio_and_error m; @@ -5673,7 +5677,7 @@ validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t secto spin_unlock_irq(&device->resource->req_lock); return -EIO; } - __req_mod(req, what, &m); + __req_mod(req, what, peer_device, &m); spin_unlock_irq(&device->resource->req_lock); if (m.bio) @@ -5698,8 +5702,8 @@ static int got_BlockAck(struct drbd_connection *connection, struct packet_info * update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); if (p->block_id == ID_SYNCER) { - drbd_set_in_sync(device, sector, blksize); - dec_rs_pending(device); + drbd_set_in_sync(peer_device, sector, blksize); + dec_rs_pending(peer_device); return 0; } switch (pi->cmd) { @@ -5722,7 +5726,7 @@ static int got_BlockAck(struct drbd_connection *connection, struct packet_info * BUG(); } - return validate_req_change_req_state(device, p->block_id, sector, + return validate_req_change_req_state(peer_device, p->block_id, sector, &device->write_requests, __func__, what, false); } @@ -5744,12 +5748,12 @@ static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); if (p->block_id == ID_SYNCER) { - dec_rs_pending(device); - drbd_rs_failed_io(device, sector, size); + dec_rs_pending(peer_device); + drbd_rs_failed_io(peer_device, sector, size); return 0; } - err = validate_req_change_req_state(device, p->block_id, sector, + err = validate_req_change_req_state(peer_device, p->block_id, sector, &device->write_requests, __func__, NEG_ACKED, true); if (err) { @@ -5758,7 +5762,7 @@ static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi request is no longer in the collision hash. */ /* In Protocol B we might already have got a P_RECV_ACK but then get a P_NEG_ACK afterwards. */ - drbd_set_out_of_sync(device, sector, size); + drbd_set_out_of_sync(peer_device, sector, size); } return 0; } @@ -5780,7 +5784,7 @@ static int got_NegDReply(struct drbd_connection *connection, struct packet_info drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n", (unsigned long long)sector, be32_to_cpu(p->blksize)); - return validate_req_change_req_state(device, p->block_id, sector, + return validate_req_change_req_state(peer_device, p->block_id, sector, &device->read_requests, __func__, NEG_ACKED, false); } @@ -5803,13 +5807,13 @@ static int got_NegRSDReply(struct drbd_connection *connection, struct packet_inf update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); - dec_rs_pending(device); + dec_rs_pending(peer_device); if (get_ldev_if_state(device, D_FAILED)) { drbd_rs_complete_io(device, sector); switch (pi->cmd) { case P_NEG_RS_DREPLY: - drbd_rs_failed_io(device, sector, size); + drbd_rs_failed_io(peer_device, sector, size); break; case P_RS_CANCEL: break; @@ -5866,21 +5870,21 @@ static int got_OVResult(struct drbd_connection *connection, struct packet_info * update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC) - drbd_ov_out_of_sync_found(device, sector, size); + drbd_ov_out_of_sync_found(peer_device, sector, size); else - ov_out_of_sync_print(device); + ov_out_of_sync_print(peer_device); if (!get_ldev(device)) return 0; drbd_rs_complete_io(device, sector); - dec_rs_pending(device); + dec_rs_pending(peer_device); --device->ov_left; /* let's advance progress step marks only for every other megabyte */ if ((device->ov_left & 0x200) == 0x200) - drbd_advance_rs_marks(device, device->ov_left); + drbd_advance_rs_marks(peer_device, device->ov_left); if (device->ov_left == 0) { dw = kmalloc(sizeof(*dw), GFP_NOIO); @@ -5890,8 +5894,8 @@ static int got_OVResult(struct drbd_connection *connection, struct packet_info * drbd_queue_work(&peer_device->connection->sender_work, &dw->w); } else { drbd_err(device, "kmalloc(dw) failed."); - ov_out_of_sync_print(device); - drbd_resync_finished(device); + ov_out_of_sync_print(peer_device); + drbd_resync_finished(peer_device); } } put_ldev(device); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e36216d50753..380e6584a4ee 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -122,12 +122,13 @@ void drbd_req_destroy(struct kref *kref) * before it even was submitted or sent. * In that case we do not want to touch the bitmap at all. */ + struct drbd_peer_device *peer_device = first_peer_device(device); if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) { if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) - drbd_set_out_of_sync(device, req->i.sector, req->i.size); + drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size); if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) - drbd_set_in_sync(device, req->i.sector, req->i.size); + drbd_set_in_sync(peer_device, req->i.sector, req->i.size); } /* one might be tempted to move the drbd_al_complete_io @@ -552,12 +553,15 @@ static inline bool is_pending_write_protocol_A(struct drbd_request *req) * happen "atomically" within the req_lock, * and it enforces that we have to think in a very structured manner * about the "events" that may happen to a request during its life time ... + * + * + * peer_device == NULL means local disk */ int __req_mod(struct drbd_request *req, enum drbd_req_event what, + struct drbd_peer_device *peer_device, struct bio_and_error *m) { struct drbd_device *const device = req->device; - struct drbd_peer_device *const peer_device = first_peer_device(device); struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; struct net_conf *nc; int p, rv = 0; @@ -617,7 +621,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case READ_COMPLETED_WITH_ERROR: - drbd_set_out_of_sync(device, req->i.sector, req->i.size); + drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size); drbd_report_io_error(device, req); __drbd_chk_io_error(device, DRBD_READ_ERROR); fallthrough; @@ -1100,6 +1104,7 @@ static bool drbd_should_send_out_of_sync(union drbd_dev_state s) static int drbd_process_write_request(struct drbd_request *req) { struct drbd_device *device = req->device; + struct drbd_peer_device *peer_device = first_peer_device(device); int remote, send_oos; remote = drbd_should_do_remote(device->state); @@ -1115,7 +1120,7 @@ static int drbd_process_write_request(struct drbd_request *req) /* The only size==0 bios we expect are empty flushes. */ D_ASSERT(device, req->master_bio->bi_opf & REQ_PREFLUSH); if (remote) - _req_mod(req, QUEUE_AS_DRBD_BARRIER); + _req_mod(req, QUEUE_AS_DRBD_BARRIER, peer_device); return remote; } @@ -1125,10 +1130,10 @@ static int drbd_process_write_request(struct drbd_request *req) D_ASSERT(device, !(remote && send_oos)); if (remote) { - _req_mod(req, TO_BE_SENT); - _req_mod(req, QUEUE_FOR_NET_WRITE); - } else if (drbd_set_out_of_sync(device, req->i.sector, req->i.size)) - _req_mod(req, QUEUE_FOR_SEND_OOS); + _req_mod(req, TO_BE_SENT, peer_device); + _req_mod(req, QUEUE_FOR_NET_WRITE, peer_device); + } else if (drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size)) + _req_mod(req, QUEUE_FOR_SEND_OOS, peer_device); return remote; } @@ -1312,6 +1317,7 @@ static void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req) { struct drbd_resource *resource = device->resource; + struct drbd_peer_device *peer_device = first_peer_device(device); const int rw = bio_data_dir(req->master_bio); struct bio_and_error m = { NULL, }; bool no_remote = false; @@ -1375,8 +1381,8 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request /* We either have a private_bio, or we can read from remote. * Otherwise we had done the goto nodata above. */ if (req->private_bio == NULL) { - _req_mod(req, TO_BE_SENT); - _req_mod(req, QUEUE_FOR_NET_READ); + _req_mod(req, TO_BE_SENT, peer_device); + _req_mod(req, QUEUE_FOR_NET_READ, peer_device); } else no_remote = true; } @@ -1397,7 +1403,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request req->pre_submit_jif = jiffies; list_add_tail(&req->req_pending_local, &device->pending_completion[rw == WRITE]); - _req_mod(req, TO_BE_SUBMITTED); + _req_mod(req, TO_BE_SUBMITTED, NULL); /* but we need to give up the spinlock to submit */ submit_private_bio = true; } else if (no_remote) { diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index b4017b5c3fbc..9ae860e7591b 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -267,6 +267,7 @@ struct bio_and_error { extern void start_new_tl_epoch(struct drbd_connection *connection); extern void drbd_req_destroy(struct kref *kref); extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, + struct drbd_peer_device *peer_device, struct bio_and_error *m); extern void complete_master_bio(struct drbd_device *device, struct bio_and_error *m); @@ -280,14 +281,15 @@ extern void drbd_restart_request(struct drbd_request *req); /* use this if you don't want to deal with calling complete_master_bio() * outside the spinlock, e.g. when walking some list on cleanup. */ -static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) +static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what, + struct drbd_peer_device *peer_device) { struct drbd_device *device = req->device; struct bio_and_error m; int rv; /* __req_mod possibly frees req, do not touch req after that! */ - rv = __req_mod(req, what, &m); + rv = __req_mod(req, what, peer_device, &m); if (m.bio) complete_master_bio(device, &m); @@ -299,7 +301,8 @@ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) * of the lower level driver completion callback, so we need to * spin_lock_irqsave here. */ static inline int req_mod(struct drbd_request *req, - enum drbd_req_event what) + enum drbd_req_event what, + struct drbd_peer_device *peer_device) { unsigned long flags; struct drbd_device *device = req->device; @@ -307,7 +310,7 @@ static inline int req_mod(struct drbd_request *req, int rv; spin_lock_irqsave(&device->resource->req_lock, flags); - rv = __req_mod(req, what, &m); + rv = __req_mod(req, what, peer_device, &m); spin_unlock_irqrestore(&device->resource->req_lock, flags); if (m.bio) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 2aeea295fa28..287a8d1d3f70 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1222,9 +1222,11 @@ void drbd_resume_al(struct drbd_device *device) } /* helper for _drbd_set_state */ -static void set_ov_position(struct drbd_device *device, enum drbd_conns cs) +static void set_ov_position(struct drbd_peer_device *peer_device, enum drbd_conns cs) { - if (first_peer_device(device)->connection->agreed_pro_version < 90) + struct drbd_device *device = peer_device->device; + + if (peer_device->connection->agreed_pro_version < 90) device->ov_start_sector = 0; device->rs_total = drbd_bm_bits(device); device->ov_position = 0; @@ -1387,7 +1389,7 @@ _drbd_set_state(struct drbd_device *device, union drbd_state ns, unsigned long now = jiffies; int i; - set_ov_position(device, ns.conn); + set_ov_position(peer_device, ns.conn); device->rs_start = now; device->rs_last_sect_ev = 0; device->ov_last_oos_size = 0; @@ -1398,7 +1400,7 @@ _drbd_set_state(struct drbd_device *device, union drbd_state ns, device->rs_mark_time[i] = now; } - drbd_rs_controller_reset(device); + drbd_rs_controller_reset(peer_device); if (ns.conn == C_VERIFY_S) { drbd_info(device, "Starting Online Verify from sector %llu\n", @@ -1518,8 +1520,9 @@ static void abw_start_sync(struct drbd_device *device, int rv) } int drbd_bitmap_io_from_worker(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), - char *why, enum bm_flag flags) + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device) { int rv; @@ -1529,7 +1532,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, atomic_inc(&device->suspend_cnt); drbd_bm_lock(device, why, flags); - rv = io_fn(device); + rv = io_fn(device, peer_device); drbd_bm_unlock(device); drbd_resume_io(device); @@ -1809,7 +1812,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, device->state.conn == C_WF_BITMAP_S) drbd_queue_bitmap_io(device, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)", - BM_LOCKED_TEST_ALLOWED); + BM_LOCKED_TEST_ALLOWED, peer_device); /* Lost contact to peer's copy of the data */ if (lost_contact_to_peer_data(os.pdsk, ns.pdsk)) { @@ -1839,7 +1842,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, * No harm done if the bitmap still changes, * redirtied pages will follow later. */ drbd_bitmap_io_from_worker(device, &drbd_bm_write, - "demote diskless peer", BM_LOCKED_SET_ALLOWED); + "demote diskless peer", BM_LOCKED_SET_ALLOWED, peer_device); put_ldev(device); } @@ -1851,7 +1854,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* No changes to the bitmap expected this time, so assert that, * even though no harm was done if it did change. */ drbd_bitmap_io_from_worker(device, &drbd_bm_write, - "demote", BM_LOCKED_TEST_ALLOWED); + "demote", BM_LOCKED_TEST_ALLOWED, peer_device); put_ldev(device); } @@ -1888,7 +1891,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* no other bitmap changes expected during this phase */ drbd_queue_bitmap_io(device, &drbd_bmio_set_n_write, &abw_start_sync, - "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); + "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED, + peer_device); /* first half of local IO error, failure to attach, * or administrative detach */ @@ -2011,7 +2015,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, if ((os.conn > C_CONNECTED && os.conn < C_AHEAD) && (ns.conn == C_CONNECTED || ns.conn >= C_AHEAD) && get_ldev(device)) { drbd_queue_bitmap_io(device, &drbd_bm_write_copy_pages, NULL, - "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED); + "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED, + peer_device); put_ldev(device); } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index f46738040d6b..4352a50fbb3f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -28,8 +28,8 @@ #include "drbd_protocol.h" #include "drbd_req.h" -static int make_ov_request(struct drbd_device *, int); -static int make_resync_request(struct drbd_device *, int); +static int make_ov_request(struct drbd_peer_device *, int); +static int make_resync_request(struct drbd_peer_device *, int); /* endio handlers: * drbd_md_endio (defined here) @@ -124,7 +124,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l * In case of a write error, send the neg ack anyways. */ if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags)) inc_unacked(device); - drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); + drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size); } spin_lock_irqsave(&device->resource->req_lock, flags); @@ -276,7 +276,7 @@ void drbd_request_endio(struct bio *bio) /* not req_mod(), we need irqsave here! */ spin_lock_irqsave(&device->resource->req_lock, flags); - __req_mod(req, what, &m); + __req_mod(req, what, NULL, &m); spin_unlock_irqrestore(&device->resource->req_lock, flags); put_ldev(device); @@ -363,7 +363,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) * drbd_alloc_pages due to pp_in_use > max_buffers. */ drbd_free_peer_req(device, peer_req); peer_req = NULL; - inc_rs_pending(device); + inc_rs_pending(peer_device); err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_CSUM_RS_REQUEST); @@ -430,10 +430,10 @@ int w_resync_timer(struct drbd_work *w, int cancel) switch (device->state.conn) { case C_VERIFY_S: - make_ov_request(device, cancel); + make_ov_request(first_peer_device(device), cancel); break; case C_SYNC_TARGET: - make_resync_request(device, cancel); + make_resync_request(first_peer_device(device), cancel); break; } @@ -493,8 +493,9 @@ struct fifo_buffer *fifo_alloc(unsigned int fifo_size) return fb; } -static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) +static int drbd_rs_controller(struct drbd_peer_device *peer_device, unsigned int sect_in) { + struct drbd_device *device = peer_device->device; struct disk_conf *dc; unsigned int want; /* The number of sectors we want in-flight */ int req_sect; /* Number of sectors to request in this turn */ @@ -545,8 +546,9 @@ static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) return req_sect; } -static int drbd_rs_number_requests(struct drbd_device *device) +static int drbd_rs_number_requests(struct drbd_peer_device *peer_device) { + struct drbd_device *device = peer_device->device; unsigned int sect_in; /* Number of sectors that came in since the last turn */ int number, mxb; @@ -556,7 +558,7 @@ static int drbd_rs_number_requests(struct drbd_device *device) rcu_read_lock(); mxb = drbd_get_max_buffers(device) / 2; if (rcu_dereference(device->rs_plan_s)->size) { - number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9); + number = drbd_rs_controller(peer_device, sect_in) >> (BM_BLOCK_SHIFT - 9); device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate; @@ -580,9 +582,9 @@ static int drbd_rs_number_requests(struct drbd_device *device) return number; } -static int make_resync_request(struct drbd_device *const device, int cancel) +static int make_resync_request(struct drbd_peer_device *const peer_device, int cancel) { - struct drbd_peer_device *const peer_device = first_peer_device(device); + struct drbd_device *const device = peer_device->device; struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; unsigned long bit; sector_t sector; @@ -598,7 +600,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel) if (device->rs_total == 0) { /* empty resync? */ - drbd_resync_finished(device); + drbd_resync_finished(peer_device); return 0; } @@ -618,7 +620,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel) } max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; - number = drbd_rs_number_requests(device); + number = drbd_rs_number_requests(peer_device); if (number <= 0) goto requeue; @@ -653,7 +655,7 @@ next_sector: sector = BM_BIT_TO_SECT(bit); - if (drbd_try_rs_begin_io(device, sector)) { + if (drbd_try_rs_begin_io(peer_device, sector)) { device->bm_resync_fo = bit; goto requeue; } @@ -729,13 +731,13 @@ next_sector: } else { int err; - inc_rs_pending(device); + inc_rs_pending(peer_device); err = drbd_send_drequest(peer_device, size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST, sector, size, ID_SYNCER); if (err) { drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); - dec_rs_pending(device); + dec_rs_pending(peer_device); put_ldev(device); return err; } @@ -760,8 +762,9 @@ next_sector: return 0; } -static int make_ov_request(struct drbd_device *device, int cancel) +static int make_ov_request(struct drbd_peer_device *peer_device, int cancel) { + struct drbd_device *device = peer_device->device; int number, i, size; sector_t sector; const sector_t capacity = get_capacity(device->vdisk); @@ -770,7 +773,7 @@ static int make_ov_request(struct drbd_device *device, int cancel) if (unlikely(cancel)) return 1; - number = drbd_rs_number_requests(device); + number = drbd_rs_number_requests(peer_device); sector = device->ov_position; for (i = 0; i < number; i++) { @@ -788,7 +791,7 @@ static int make_ov_request(struct drbd_device *device, int cancel) size = BM_BLOCK_SIZE; - if (drbd_try_rs_begin_io(device, sector)) { + if (drbd_try_rs_begin_io(peer_device, sector)) { device->ov_position = sector; goto requeue; } @@ -796,9 +799,9 @@ static int make_ov_request(struct drbd_device *device, int cancel) if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - inc_rs_pending(device); + inc_rs_pending(peer_device); if (drbd_send_ov_request(first_peer_device(device), sector, size)) { - dec_rs_pending(device); + dec_rs_pending(peer_device); return 0; } sector += BM_SECT_PER_BIT; @@ -818,8 +821,8 @@ int w_ov_finished(struct drbd_work *w, int cancel) container_of(w, struct drbd_device_work, w); struct drbd_device *device = dw->device; kfree(dw); - ov_out_of_sync_print(device); - drbd_resync_finished(device); + ov_out_of_sync_print(first_peer_device(device)); + drbd_resync_finished(first_peer_device(device)); return 0; } @@ -831,7 +834,7 @@ static int w_resync_finished(struct drbd_work *w, int cancel) struct drbd_device *device = dw->device; kfree(dw); - drbd_resync_finished(device); + drbd_resync_finished(first_peer_device(device)); return 0; } @@ -846,9 +849,10 @@ static void ping_peer(struct drbd_device *device) test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED); } -int drbd_resync_finished(struct drbd_device *device) +int drbd_resync_finished(struct drbd_peer_device *peer_device) { - struct drbd_connection *connection = first_peer_device(device)->connection; + struct drbd_device *device = peer_device->device; + struct drbd_connection *connection = peer_device->connection; unsigned long db, dt, dbdt; unsigned long n_oos; union drbd_state os, ns; @@ -1129,7 +1133,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req); } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { if (likely(device->state.pdsk >= D_INCONSISTENT)) { - inc_rs_pending(device); + inc_rs_pending(peer_device); if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req)) err = drbd_send_rs_deallocated(peer_device, peer_req); else @@ -1148,7 +1152,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); /* update resync data with failure */ - drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size); + drbd_rs_failed_io(peer_device, peer_req->i.sector, peer_req->i.size); } dec_unacked(device); @@ -1199,12 +1203,12 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) } if (eq) { - drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size); + drbd_set_in_sync(peer_device, peer_req->i.sector, peer_req->i.size); /* rs_same_csums unit is BM_BLOCK_SIZE */ device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req); } else { - inc_rs_pending(device); + inc_rs_pending(peer_device); peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ kfree(di); @@ -1257,10 +1261,10 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) * drbd_alloc_pages due to pp_in_use > max_buffers. */ drbd_free_peer_req(device, peer_req); peer_req = NULL; - inc_rs_pending(device); + inc_rs_pending(peer_device); err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY); if (err) - dec_rs_pending(device); + dec_rs_pending(peer_device); kfree(digest); out: @@ -1270,15 +1274,16 @@ out: return err; } -void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size) +void drbd_ov_out_of_sync_found(struct drbd_peer_device *peer_device, sector_t sector, int size) { + struct drbd_device *device = peer_device->device; if (device->ov_last_oos_start + device->ov_last_oos_size == sector) { device->ov_last_oos_size += size>>9; } else { device->ov_last_oos_start = sector; device->ov_last_oos_size = size>>9; } - drbd_set_out_of_sync(device, sector, size); + drbd_set_out_of_sync(peer_device, sector, size); } int w_e_end_ov_reply(struct drbd_work *w, int cancel) @@ -1328,9 +1333,9 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) * drbd_alloc_pages due to pp_in_use > max_buffers. */ drbd_free_peer_req(device, peer_req); if (!eq) - drbd_ov_out_of_sync_found(device, sector, size); + drbd_ov_out_of_sync_found(peer_device, sector, size); else - ov_out_of_sync_print(device); + ov_out_of_sync_print(peer_device); err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); @@ -1341,14 +1346,14 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) /* let's advance progress step marks only for every other megabyte */ if ((device->ov_left & 0x200) == 0x200) - drbd_advance_rs_marks(device, device->ov_left); + drbd_advance_rs_marks(peer_device, device->ov_left); stop_sector_reached = verify_can_do_stop_sector(device) && (sector + (size>>9)) >= device->ov_stop_sector; if (device->ov_left == 0 || stop_sector_reached) { - ov_out_of_sync_print(device); - drbd_resync_finished(device); + ov_out_of_sync_print(peer_device); + drbd_resync_finished(peer_device); } return err; @@ -1425,7 +1430,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - req_mod(req, SEND_CANCELED); + req_mod(req, SEND_CANCELED, peer_device); return 0; } req->pre_send_jif = jiffies; @@ -1437,7 +1442,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) maybe_send_barrier(connection, req->epoch); err = drbd_send_out_of_sync(peer_device, req); - req_mod(req, OOS_HANDED_TO_NETWORK); + req_mod(req, OOS_HANDED_TO_NETWORK, peer_device); return err; } @@ -1457,7 +1462,7 @@ int w_send_dblock(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - req_mod(req, SEND_CANCELED); + req_mod(req, SEND_CANCELED, peer_device); return 0; } req->pre_send_jif = jiffies; @@ -1467,7 +1472,7 @@ int w_send_dblock(struct drbd_work *w, int cancel) connection->send.current_epoch_writes++; err = drbd_send_dblock(peer_device, req); - req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); + req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device); if (do_send_unplug && !err) pd_send_unplug_remote(peer_device); @@ -1490,7 +1495,7 @@ int w_send_read_req(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - req_mod(req, SEND_CANCELED); + req_mod(req, SEND_CANCELED, peer_device); return 0; } req->pre_send_jif = jiffies; @@ -1502,7 +1507,7 @@ int w_send_read_req(struct drbd_work *w, int cancel) err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size, (unsigned long)req); - req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); + req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device); if (do_send_unplug && !err) pd_send_unplug_remote(peer_device); @@ -1668,8 +1673,9 @@ void drbd_resync_after_changed(struct drbd_device *device) } while (changed); } -void drbd_rs_controller_reset(struct drbd_device *device) +void drbd_rs_controller_reset(struct drbd_peer_device *peer_device) { + struct drbd_device *device = peer_device->device; struct gendisk *disk = device->ldev->backing_bdev->bd_disk; struct fifo_buffer *plan; @@ -1891,10 +1897,10 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) rcu_read_unlock(); schedule_timeout_interruptible(timeo); } - drbd_resync_finished(device); + drbd_resync_finished(peer_device); } - drbd_rs_controller_reset(device); + drbd_rs_controller_reset(peer_device); /* ns.conn may already be != device->state.conn, * we may have been paused in between, or become paused until * the timer triggers. @@ -1909,8 +1915,9 @@ out: mutex_unlock(device->state_mutex); } -static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done) +static void update_on_disk_bitmap(struct drbd_peer_device *peer_device, bool resync_done) { + struct drbd_device *device = peer_device->device; struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; device->rs_last_bcast = jiffies; @@ -1919,7 +1926,7 @@ static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done) drbd_bm_write_lazy(device, 0); if (resync_done && is_sync_state(device->state.conn)) - drbd_resync_finished(device); + drbd_resync_finished(peer_device); drbd_bcast_event(device, &sib); /* update timestamp, in case it took a while to write out stuff */ @@ -1945,6 +1952,7 @@ static void drbd_ldev_destroy(struct drbd_device *device) static void go_diskless(struct drbd_device *device) { + struct drbd_peer_device *peer_device = first_peer_device(device); D_ASSERT(device, device->state.disk == D_FAILED); /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will * inc/dec it frequently. Once we are D_DISKLESS, no one will touch @@ -1970,7 +1978,7 @@ static void go_diskless(struct drbd_device *device) * Any modifications would not be expected anymore, though. */ if (drbd_bitmap_io_from_worker(device, drbd_bm_write, - "detach", BM_LOCKED_TEST_ALLOWED)) { + "detach", BM_LOCKED_TEST_ALLOWED, peer_device)) { if (test_bit(WAS_READ_ERROR, &device->flags)) { drbd_md_set_flag(device, MDF_FULL_SYNC); drbd_md_sync(device); @@ -2017,7 +2025,7 @@ static void do_device_work(struct drbd_device *device, const unsigned long todo) do_md_sync(device); if (test_bit(RS_DONE, &todo) || test_bit(RS_PROGRESS, &todo)) - update_on_disk_bitmap(device, test_bit(RS_DONE, &todo)); + update_on_disk_bitmap(first_peer_device(device), test_bit(RS_DONE, &todo)); if (test_bit(GO_DISKLESS, &todo)) go_diskless(device); if (test_bit(DESTROY_DISK, &todo)) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 592cfa8b765a..d445fd0934bd 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -325,6 +325,9 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, if (blk_validate_block_size(blksize)) return -EINVAL; + if (bytesize < 0) + return -EINVAL; + nbd->config->bytesize = bytesize; nbd->config->blksize_bits = __ffs(blksize); @@ -1111,6 +1114,9 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, struct nbd_sock *nsock; int err; + /* Arg will be cast to int, check it to avoid overflow */ + if (arg > INT_MAX) + return -EINVAL; sock = nbd_get_socket(nbd, arg, &err); if (!sock) return err; @@ -1934,11 +1940,11 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } } - if (!info->attrs[NBD_ATTR_SOCKETS]) { + if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_SOCKETS)) { pr_err("must specify at least one socket\n"); return -EINVAL; } - if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { + if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_SIZE_BYTES)) { pr_err("must specify a size in bytes for the device\n"); return -EINVAL; } @@ -2123,7 +2129,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; - if (!info->attrs[NBD_ATTR_INDEX]) { + if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_INDEX)) { pr_err("must specify an index to disconnect\n"); return -EINVAL; } @@ -2161,7 +2167,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; - if (!info->attrs[NBD_ATTR_INDEX]) { + if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_INDEX)) { pr_err("must specify a device to reconfigure\n"); return -EINVAL; } @@ -2325,6 +2331,7 @@ static struct genl_family nbd_genl_family __ro_after_init = { .n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops), .resv_start_op = NBD_CMD_STATUS + 1, .maxattr = NBD_ATTR_MAX, + .netnsok = 1, .policy = nbd_attr_policy, .mcgrps = nbd_mcast_grps, .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), diff --git a/drivers/block/null_blk/Kconfig b/drivers/block/null_blk/Kconfig index 6bf1f8ca20a2..ff23bb9346d0 100644 --- a/drivers/block/null_blk/Kconfig +++ b/drivers/block/null_blk/Kconfig @@ -9,4 +9,4 @@ config BLK_DEV_NULL_BLK config BLK_DEV_NULL_BLK_FAULT_INJECTION bool "Support fault injection for Null test block driver" - depends on BLK_DEV_NULL_BLK && FAULT_INJECTION + depends on BLK_DEV_NULL_BLK && FAULT_INJECTION_CONFIGFS diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 9e6b032c8ecc..b195b8b9fe32 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -250,7 +250,7 @@ static void null_free_device_storage(struct nullb_device *dev, bool is_cache); static inline struct nullb_device *to_nullb_device(struct config_item *item) { - return item ? container_of(item, struct nullb_device, item) : NULL; + return item ? container_of(to_config_group(item), struct nullb_device, group) : NULL; } static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page) @@ -593,8 +593,29 @@ static const struct config_item_type nullb_device_type = { .ct_owner = THIS_MODULE, }; +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + +static void nullb_add_fault_config(struct nullb_device *dev) +{ + fault_config_init(&dev->timeout_config, "timeout_inject"); + fault_config_init(&dev->requeue_config, "requeue_inject"); + fault_config_init(&dev->init_hctx_fault_config, "init_hctx_fault_inject"); + + configfs_add_default_group(&dev->timeout_config.group, &dev->group); + configfs_add_default_group(&dev->requeue_config.group, &dev->group); + configfs_add_default_group(&dev->init_hctx_fault_config.group, &dev->group); +} + +#else + +static void nullb_add_fault_config(struct nullb_device *dev) +{ +} + +#endif + static struct -config_item *nullb_group_make_item(struct config_group *group, const char *name) +config_group *nullb_group_make_group(struct config_group *group, const char *name) { struct nullb_device *dev; @@ -605,9 +626,10 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name) if (!dev) return ERR_PTR(-ENOMEM); - config_item_init_type_name(&dev->item, name, &nullb_device_type); + config_group_init_type_name(&dev->group, name, &nullb_device_type); + nullb_add_fault_config(dev); - return &dev->item; + return &dev->group; } static void @@ -645,7 +667,7 @@ static struct configfs_attribute *nullb_group_attrs[] = { }; static struct configfs_group_operations nullb_group_ops = { - .make_item = nullb_group_make_item, + .make_group = nullb_group_make_group, .drop_item = nullb_group_drop_item, }; @@ -676,6 +698,13 @@ static struct nullb_device *null_alloc_dev(void) dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return NULL; + +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + dev->timeout_config.attr = null_timeout_attr; + dev->requeue_config.attr = null_requeue_attr; + dev->init_hctx_fault_config.attr = null_init_hctx_attr; +#endif + INIT_RADIX_TREE(&dev->data, GFP_ATOMIC); INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC); if (badblocks_init(&dev->badblocks, 0)) { @@ -1030,8 +1059,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) if (!t_page) return -ENOMEM; - src = kmap_atomic(c_page->page); - dst = kmap_atomic(t_page->page); + src = kmap_local_page(c_page->page); + dst = kmap_local_page(t_page->page); for (i = 0; i < PAGE_SECTORS; i += (nullb->dev->blocksize >> SECTOR_SHIFT)) { @@ -1043,8 +1072,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) } } - kunmap_atomic(dst); - kunmap_atomic(src); + kunmap_local(dst); + kunmap_local(src); ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page); null_free_page(ret); @@ -1112,7 +1141,6 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source, size_t temp, count = 0; unsigned int offset; struct nullb_page *t_page; - void *dst, *src; while (count < n) { temp = min_t(size_t, nullb->dev->blocksize, n - count); @@ -1126,11 +1154,7 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source, if (!t_page) return -ENOSPC; - src = kmap_atomic(source); - dst = kmap_atomic(t_page->page); - memcpy(dst + offset, src + off + count, temp); - kunmap_atomic(dst); - kunmap_atomic(src); + memcpy_page(t_page->page, offset, source, off + count, temp); __set_bit(sector & SECTOR_MASK, t_page->bitmap); @@ -1149,7 +1173,6 @@ static int copy_from_nullb(struct nullb *nullb, struct page *dest, size_t temp, count = 0; unsigned int offset; struct nullb_page *t_page; - void *dst, *src; while (count < n) { temp = min_t(size_t, nullb->dev->blocksize, n - count); @@ -1158,16 +1181,11 @@ static int copy_from_nullb(struct nullb *nullb, struct page *dest, t_page = null_lookup_page(nullb, sector, false, !null_cache_active(nullb)); - dst = kmap_atomic(dest); - if (!t_page) { - memset(dst + off + count, 0, temp); - goto next; - } - src = kmap_atomic(t_page->page); - memcpy(dst + off + count, src + offset, temp); - kunmap_atomic(src); -next: - kunmap_atomic(dst); + if (t_page) + memcpy_page(dest, off + count, t_page->page, offset, + temp); + else + zero_user(dest, off + count, temp); count += temp; sector += temp >> SECTOR_SHIFT; @@ -1178,11 +1196,7 @@ next: static void nullb_fill_pattern(struct nullb *nullb, struct page *page, unsigned int len, unsigned int off) { - void *dst; - - dst = kmap_atomic(page); - memset(dst + off, 0xFF, len); - kunmap_atomic(dst); + memset_page(page, off, 0xff, len); } blk_status_t null_handle_discard(struct nullb_device *dev, @@ -1529,24 +1543,48 @@ static void null_submit_bio(struct bio *bio) null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio)); } +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + +static bool should_timeout_request(struct request *rq) +{ + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct nullb_device *dev = cmd->nq->dev; + + return should_fail(&dev->timeout_config.attr, 1); +} + +static bool should_requeue_request(struct request *rq) +{ + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct nullb_device *dev = cmd->nq->dev; + + return should_fail(&dev->requeue_config.attr, 1); +} + +static bool should_init_hctx_fail(struct nullb_device *dev) +{ + return should_fail(&dev->init_hctx_fault_config.attr, 1); +} + +#else + static bool should_timeout_request(struct request *rq) { -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_timeout_str[0]) - return should_fail(&null_timeout_attr, 1); -#endif return false; } static bool should_requeue_request(struct request *rq) { -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_requeue_str[0]) - return should_fail(&null_requeue_attr, 1); -#endif return false; } +static bool should_init_hctx_fail(struct nullb_device *dev) +{ + return false; +} + +#endif + static void null_map_queues(struct blk_mq_tag_set *set) { struct nullb *nullb = set->driver_data; @@ -1743,10 +1781,8 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, struct nullb *nullb = hctx->queue->queuedata; struct nullb_queue *nq; -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1)) + if (should_init_hctx_fail(nullb->dev)) return -EFAULT; -#endif nq = &nullb->queues[hctx_idx]; hctx->driver_data = nq; @@ -1964,6 +2000,11 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) static int null_validate_conf(struct nullb_device *dev) { + if (dev->queue_mode == NULL_Q_RQ) { + pr_err("legacy IO path is no longer available\n"); + return -EINVAL; + } + dev->blocksize = round_down(dev->blocksize, 512); dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096); @@ -2066,9 +2107,6 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_cleanup_queues; - if (!null_setup_fault()) - goto out_cleanup_tags; - nullb->tag_set->timeout = 5 * HZ; nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb); if (IS_ERR(nullb->disk)) { @@ -2130,10 +2168,10 @@ static int null_add_dev(struct nullb_device *dev) null_config_discard(nullb); - if (config_item_name(&dev->item)) { + if (config_item_name(&dev->group.cg_item)) { /* Use configfs dir name as the device name */ snprintf(nullb->disk_name, sizeof(nullb->disk_name), - "%s", config_item_name(&dev->item)); + "%s", config_item_name(&dev->group.cg_item)); } else { sprintf(nullb->disk_name, "nullb%d", nullb->index); } @@ -2233,6 +2271,9 @@ static int __init null_init(void) g_home_node = NUMA_NO_NODE; } + if (!null_setup_fault()) + return -EINVAL; + if (g_queue_mode == NULL_Q_RQ) { pr_err("legacy IO path is no longer available\n"); return -EINVAL; diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index eb5972c50be8..929f659dd255 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -69,7 +69,12 @@ enum { struct nullb_device { struct nullb *nullb; - struct config_item item; + struct config_group group; +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + struct fault_config timeout_config; + struct fault_config requeue_config; + struct fault_config init_hctx_fault_config; +#endif struct radix_tree_root data; /* data stored in the disk */ struct radix_tree_root cache; /* disk cache data */ unsigned long flags; /* device flags */ diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 604c1a13c76e..afbef182820b 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -53,7 +53,8 @@ | UBLK_F_NEED_GET_DATA \ | UBLK_F_USER_RECOVERY \ | UBLK_F_USER_RECOVERY_REISSUE \ - | UBLK_F_UNPRIVILEGED_DEV) + | UBLK_F_UNPRIVILEGED_DEV \ + | UBLK_F_CMD_IOCTL_ENCODE) /* All UBLK_PARAM_TYPE_* should be included here */ #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \ @@ -298,9 +299,7 @@ static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq) static inline bool ublk_need_get_data(const struct ublk_queue *ubq) { - if (ubq->flags & UBLK_F_NEED_GET_DATA) - return true; - return false; + return ubq->flags & UBLK_F_NEED_GET_DATA; } static struct ublk_device *ublk_get_device(struct ublk_device *ub) @@ -349,25 +348,19 @@ static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id) static inline bool ublk_queue_can_use_recovery_reissue( struct ublk_queue *ubq) { - if ((ubq->flags & UBLK_F_USER_RECOVERY) && - (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE)) - return true; - return false; + return (ubq->flags & UBLK_F_USER_RECOVERY) && + (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE); } static inline bool ublk_queue_can_use_recovery( struct ublk_queue *ubq) { - if (ubq->flags & UBLK_F_USER_RECOVERY) - return true; - return false; + return ubq->flags & UBLK_F_USER_RECOVERY; } static inline bool ublk_can_use_recovery(struct ublk_device *ub) { - if (ub->dev_info.flags & UBLK_F_USER_RECOVERY) - return true; - return false; + return ub->dev_info.flags & UBLK_F_USER_RECOVERY; } static void ublk_free_disk(struct gendisk *disk) @@ -428,10 +421,9 @@ static const struct block_device_operations ub_fops = { #define UBLK_MAX_PIN_PAGES 32 struct ublk_map_data { - const struct ublk_queue *ubq; const struct request *rq; - const struct ublk_io *io; - unsigned max_bytes; + unsigned long ubuf; + unsigned int len; }; struct ublk_io_iter { @@ -488,18 +480,17 @@ static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data, return done; } -static inline int ublk_copy_user_pages(struct ublk_map_data *data, - bool to_vm) +static int ublk_copy_user_pages(struct ublk_map_data *data, bool to_vm) { const unsigned int gup_flags = to_vm ? FOLL_WRITE : 0; - const unsigned long start_vm = data->io->addr; + const unsigned long start_vm = data->ubuf; unsigned int done = 0; struct ublk_io_iter iter = { .pg_off = start_vm & (PAGE_SIZE - 1), .bio = data->rq->bio, .iter = data->rq->bio->bi_iter, }; - const unsigned int nr_pages = round_up(data->max_bytes + + const unsigned int nr_pages = round_up(data->len + (start_vm & (PAGE_SIZE - 1)), PAGE_SIZE) >> PAGE_SHIFT; while (done < nr_pages) { @@ -512,42 +503,49 @@ static inline int ublk_copy_user_pages(struct ublk_map_data *data, iter.pages); if (iter.nr_pages <= 0) return done == 0 ? iter.nr_pages : done; - len = ublk_copy_io_pages(&iter, data->max_bytes, to_vm); + len = ublk_copy_io_pages(&iter, data->len, to_vm); for (i = 0; i < iter.nr_pages; i++) { if (to_vm) set_page_dirty(iter.pages[i]); put_page(iter.pages[i]); } - data->max_bytes -= len; + data->len -= len; done += iter.nr_pages; } return done; } +static inline bool ublk_need_map_req(const struct request *req) +{ + return ublk_rq_has_data(req) && req_op(req) == REQ_OP_WRITE; +} + +static inline bool ublk_need_unmap_req(const struct request *req) +{ + return ublk_rq_has_data(req) && req_op(req) == REQ_OP_READ; +} + static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, struct ublk_io *io) { const unsigned int rq_bytes = blk_rq_bytes(req); + /* * no zero copy, we delay copy WRITE request data into ublksrv * context and the big benefit is that pinning pages in current * context is pretty fast, see ublk_pin_user_pages */ - if (req_op(req) != REQ_OP_WRITE && req_op(req) != REQ_OP_FLUSH) - return rq_bytes; - - if (ublk_rq_has_data(req)) { + if (ublk_need_map_req(req)) { struct ublk_map_data data = { - .ubq = ubq, .rq = req, - .io = io, - .max_bytes = rq_bytes, + .ubuf = io->addr, + .len = rq_bytes, }; ublk_copy_user_pages(&data, true); - return rq_bytes - data.max_bytes; + return rq_bytes - data.len; } return rq_bytes; } @@ -558,19 +556,18 @@ static int ublk_unmap_io(const struct ublk_queue *ubq, { const unsigned int rq_bytes = blk_rq_bytes(req); - if (req_op(req) == REQ_OP_READ && ublk_rq_has_data(req)) { + if (ublk_need_unmap_req(req)) { struct ublk_map_data data = { - .ubq = ubq, .rq = req, - .io = io, - .max_bytes = io->res, + .ubuf = io->addr, + .len = io->res, }; WARN_ON_ONCE(io->res > rq_bytes); ublk_copy_user_pages(&data, false); - return io->res - data.max_bytes; + return io->res - data.len; } return rq_bytes; } @@ -655,14 +652,15 @@ static void ublk_complete_rq(struct request *req) struct ublk_queue *ubq = req->mq_hctx->driver_data; struct ublk_io *io = &ubq->ios[req->tag]; unsigned int unmapped_bytes; + blk_status_t res = BLK_STS_OK; /* failed read IO if nothing is read */ if (!io->res && req_op(req) == REQ_OP_READ) io->res = -EIO; if (io->res < 0) { - blk_mq_end_request(req, errno_to_blk_status(io->res)); - return; + res = errno_to_blk_status(io->res); + goto exit; } /* @@ -671,10 +669,8 @@ static void ublk_complete_rq(struct request *req) * * Both the two needn't unmap. */ - if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) { - blk_mq_end_request(req, BLK_STS_OK); - return; - } + if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) + goto exit; /* for READ request, writing data in iod->addr to rq buffers */ unmapped_bytes = ublk_unmap_io(ubq, req, io); @@ -691,6 +687,10 @@ static void ublk_complete_rq(struct request *req) blk_mq_requeue_request(req, true); else __blk_mq_end_request(req, BLK_STS_OK); + + return; +exit: + blk_mq_end_request(req, res); } /* @@ -771,9 +771,7 @@ static inline void __ublk_rq_task_work(struct request *req, return; } - if (ublk_need_get_data(ubq) && - (req_op(req) == REQ_OP_WRITE || - req_op(req) == REQ_OP_FLUSH)) { + if (ublk_need_get_data(ubq) && ublk_need_map_req(req)) { /* * We have not handled UBLK_IO_NEED_GET_DATA command yet, * so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv @@ -1261,6 +1259,19 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, ublk_queue_cmd(ubq, req); } +static inline int ublk_check_cmd_op(u32 cmd_op) +{ + u32 ioc_type = _IOC_TYPE(cmd_op); + + if (IS_ENABLED(CONFIG_BLKDEV_UBLK_LEGACY_OPCODES) && ioc_type != 'u') + return -EOPNOTSUPP; + + if (ioc_type != 'u' && ioc_type != 0) + return -EOPNOTSUPP; + + return 0; +} + static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags, struct ublksrv_io_cmd *ub_cmd) @@ -1303,10 +1314,15 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, * iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA. */ if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) - ^ (cmd_op == UBLK_IO_NEED_GET_DATA)) + ^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA)) + goto out; + + ret = ublk_check_cmd_op(cmd_op); + if (ret) goto out; - switch (cmd_op) { + ret = -EINVAL; + switch (_IOC_NR(cmd_op)) { case UBLK_IO_FETCH_REQ: /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ if (ublk_queue_ready(ubq)) { @@ -1770,6 +1786,8 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK)) ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK; + ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE; + /* We are not ready to support zero copy */ ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY; @@ -2128,7 +2146,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub, * know if the specified device is created as unprivileged * mode. */ - if (cmd->cmd_op != UBLK_CMD_GET_DEV_INFO2) + if (_IOC_NR(cmd->cmd_op) != UBLK_CMD_GET_DEV_INFO2) return 0; } @@ -2154,7 +2172,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub, dev_path[header->dev_path_len] = 0; ret = -EINVAL; - switch (cmd->cmd_op) { + switch (_IOC_NR(cmd->cmd_op)) { case UBLK_CMD_GET_DEV_INFO: case UBLK_CMD_GET_DEV_INFO2: case UBLK_CMD_GET_QUEUE_AFFINITY: @@ -2193,6 +2211,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; struct ublk_device *ub = NULL; + u32 cmd_op = cmd->cmd_op; int ret = -EINVAL; if (issue_flags & IO_URING_F_NONBLOCK) @@ -2203,22 +2222,22 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, if (!(issue_flags & IO_URING_F_SQE128)) goto out; - if (cmd->cmd_op != UBLK_CMD_ADD_DEV) { + ret = ublk_check_cmd_op(cmd_op); + if (ret) + goto out; + + if (_IOC_NR(cmd_op) != UBLK_CMD_ADD_DEV) { ret = -ENODEV; ub = ublk_get_device_from_id(header->dev_id); if (!ub) goto out; ret = ublk_ctrl_uring_cmd_permission(ub, cmd); - } else { - /* ADD_DEV permission check is done in command handler */ - ret = 0; + if (ret) + goto put_dev; } - if (ret) - goto put_dev; - - switch (cmd->cmd_op) { + switch (_IOC_NR(cmd_op)) { case UBLK_CMD_START_DEV: ret = ublk_ctrl_start_dev(ub, cmd); break; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 2055a758541d..7899f5fb4c13 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1202,21 +1202,12 @@ struct dm_crypto_profile { struct mapped_device *md; }; -struct dm_keyslot_evict_args { - const struct blk_crypto_key *key; - int err; -}; - static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct dm_keyslot_evict_args *args = data; - int err; + const struct blk_crypto_key *key = data; - err = blk_crypto_evict_key(dev->bdev, args->key); - if (!args->err) - args->err = err; - /* Always try to evict the key from all devices. */ + blk_crypto_evict_key(dev->bdev, key); return 0; } @@ -1229,7 +1220,6 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, { struct mapped_device *md = container_of(profile, struct dm_crypto_profile, profile)->md; - struct dm_keyslot_evict_args args = { key }; struct dm_table *t; int srcu_idx; @@ -1242,11 +1232,12 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, if (!ti->type->iterate_devices) continue; - ti->type->iterate_devices(ti, dm_keyslot_evict_callback, &args); + ti->type->iterate_devices(ti, dm_keyslot_evict_callback, + (void *)key); } dm_put_live_table(md, srcu_idx); - return args.err; + return 0; } static int diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index e7cc6ba1b657..920bb68156d2 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -209,76 +209,99 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde return NULL; } -static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) +static unsigned int optimal_io_size(struct block_device *bdev, + unsigned int last_page_size, + unsigned int io_size) +{ + if (bdev_io_opt(bdev) > bdev_logical_block_size(bdev)) + return roundup(last_page_size, bdev_io_opt(bdev)); + return io_size; +} + +static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size, + sector_t start, sector_t boundary) +{ + if (io_size != opt_size && + start + opt_size / SECTOR_SIZE <= boundary) + return opt_size; + if (start + io_size / SECTOR_SIZE <= boundary) + return io_size; + + /* Overflows boundary */ + return 0; +} + +static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap, + struct page *page) { - struct md_rdev *rdev; struct block_device *bdev; struct mddev *mddev = bitmap->mddev; struct bitmap_storage *store = &bitmap->storage; + sector_t offset = mddev->bitmap_info.offset; + sector_t ps, sboff, doff; + unsigned int size = PAGE_SIZE; + unsigned int opt_size = PAGE_SIZE; + + bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; + if (page->index == store->file_pages - 1) { + unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1); + + if (last_page_size == 0) + last_page_size = PAGE_SIZE; + size = roundup(last_page_size, bdev_logical_block_size(bdev)); + opt_size = optimal_io_size(bdev, last_page_size, size); + } + + ps = page->index * PAGE_SIZE / SECTOR_SIZE; + sboff = rdev->sb_start + offset; + doff = rdev->data_offset; + + /* Just make sure we aren't corrupting data or metadata */ + if (mddev->external) { + /* Bitmap could be anywhere. */ + if (sboff + ps > doff && + sboff < (doff + mddev->dev_sectors + PAGE_SIZE / SECTOR_SIZE)) + return -EINVAL; + } else if (offset < 0) { + /* DATA BITMAP METADATA */ + size = bitmap_io_size(size, opt_size, offset + ps, 0); + if (size == 0) + /* bitmap runs in to metadata */ + return -EINVAL; + + if (doff + mddev->dev_sectors > sboff) + /* data runs in to bitmap */ + return -EINVAL; + } else if (rdev->sb_start < rdev->data_offset) { + /* METADATA BITMAP DATA */ + size = bitmap_io_size(size, opt_size, sboff + ps, doff); + if (size == 0) + /* bitmap runs in to data */ + return -EINVAL; + } else { + /* DATA METADATA BITMAP - no problems */ + } -restart: - rdev = NULL; - while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { - int size = PAGE_SIZE; - loff_t offset = mddev->bitmap_info.offset; + md_super_write(mddev, rdev, sboff + ps, (int) size, page); + return 0; +} - bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; +static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) +{ + struct md_rdev *rdev; + struct mddev *mddev = bitmap->mddev; + int ret; - if (page->index == store->file_pages-1) { - int last_page_size = store->bytes & (PAGE_SIZE-1); - if (last_page_size == 0) - last_page_size = PAGE_SIZE; - size = roundup(last_page_size, - bdev_logical_block_size(bdev)); - } - /* Just make sure we aren't corrupting data or - * metadata - */ - if (mddev->external) { - /* Bitmap could be anywhere. */ - if (rdev->sb_start + offset + (page->index - * (PAGE_SIZE/512)) - > rdev->data_offset - && - rdev->sb_start + offset - < (rdev->data_offset + mddev->dev_sectors - + (PAGE_SIZE/512))) - goto bad_alignment; - } else if (offset < 0) { - /* DATA BITMAP METADATA */ - if (offset - + (long)(page->index * (PAGE_SIZE/512)) - + size/512 > 0) - /* bitmap runs in to metadata */ - goto bad_alignment; - if (rdev->data_offset + mddev->dev_sectors - > rdev->sb_start + offset) - /* data runs in to bitmap */ - goto bad_alignment; - } else if (rdev->sb_start < rdev->data_offset) { - /* METADATA BITMAP DATA */ - if (rdev->sb_start - + offset - + page->index*(PAGE_SIZE/512) + size/512 - > rdev->data_offset) - /* bitmap runs in to data */ - goto bad_alignment; - } else { - /* DATA METADATA BITMAP - no problems */ + do { + rdev = NULL; + while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { + ret = __write_sb_page(rdev, bitmap, page); + if (ret) + return ret; } - md_super_write(mddev, rdev, - rdev->sb_start + offset - + page->index * (PAGE_SIZE/512), - size, - page); - } + } while (wait && md_super_wait(mddev) < 0); - if (wait && md_super_wait(mddev) < 0) - goto restart; return 0; - - bad_alignment: - return -EINVAL; } static void md_bitmap_file_kick(struct bitmap *bitmap); diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index 6e7797b4e738..4eb72b9dd933 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -223,7 +223,8 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio) bio_sector < start_sector)) goto out_of_bounds; - if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) { + if (unlikely(is_rdev_broken(tmp_dev->rdev))) { + md_error(mddev, tmp_dev->rdev); bio_io_error(bio); return true; } @@ -270,6 +271,16 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev) seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); } +static void linear_error(struct mddev *mddev, struct md_rdev *rdev) +{ + if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) { + char *md_name = mdname(mddev); + + pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n", + md_name, rdev->bdev); + } +} + static void linear_quiesce(struct mddev *mddev, int state) { } @@ -286,6 +297,7 @@ static struct md_personality linear_personality = .hot_add_disk = linear_add, .size = linear_size, .quiesce = linear_quiesce, + .error_handler = linear_error, }; static int __init linear_init (void) diff --git a/drivers/md/md.c b/drivers/md/md.c index 13321dbb5fbc..2003cf86fb8d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -78,7 +78,7 @@ static LIST_HEAD(pers_list); static DEFINE_SPINLOCK(pers_lock); -static struct kobj_type md_ktype; +static const struct kobj_type md_ktype; struct md_cluster_operations *md_cluster_ops; EXPORT_SYMBOL(md_cluster_ops); @@ -3600,7 +3600,7 @@ static const struct sysfs_ops rdev_sysfs_ops = { .show = rdev_attr_show, .store = rdev_attr_store, }; -static struct kobj_type rdev_ktype = { +static const struct kobj_type rdev_ktype = { .release = rdev_free, .sysfs_ops = &rdev_sysfs_ops, .default_groups = rdev_default_groups, @@ -5558,7 +5558,7 @@ static const struct sysfs_ops md_sysfs_ops = { .show = md_attr_show, .store = md_attr_store, }; -static struct kobj_type md_ktype = { +static const struct kobj_type md_ktype = { .release = md_kobj_release, .sysfs_ops = &md_sysfs_ops, .default_groups = md_attr_groups, @@ -7974,6 +7974,9 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev) return; mddev->pers->error_handler(mddev, rdev); + if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR) + return; + if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags)) set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); sysfs_notify_dirent_safe(rdev->sysfs_state); @@ -8029,16 +8032,16 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) } else if (resync > max_sectors) { resync = max_sectors; } else { - resync -= atomic_read(&mddev->recovery_active); - if (resync < MD_RESYNC_ACTIVE) { - /* - * Resync has started, but the subtraction has - * yielded one of the special values. Force it - * to active to ensure the status reports an - * active resync. - */ + res = atomic_read(&mddev->recovery_active); + /* + * Resync has started, but the subtraction has overflowed or + * yielded one of the special values. Force it to active to + * ensure the status reports an active resync. + */ + if (resync < res || resync - res < MD_RESYNC_ACTIVE) resync = MD_RESYNC_ACTIVE; - } + else + resync -= res; } if (resync == MD_RESYNC_NONE) { diff --git a/drivers/md/md.h b/drivers/md/md.h index e148e3c83b0d..fd8f260ed5f8 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -790,15 +790,9 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev, struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr); struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev); -static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type) +static inline bool is_rdev_broken(struct md_rdev *rdev) { - if (!disk_live(rdev->bdev->bd_disk)) { - if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags)) - pr_warn("md: %s: %s array has a missing/failed member\n", - mdname(rdev->mddev), md_type); - return true; - } - return false; + return !disk_live(rdev->bdev->bd_disk); } static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index b536befd8898..f8ee9a95e25d 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -569,8 +569,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) return true; } - if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) { + if (unlikely(is_rdev_broken(tmp_dev))) { bio_io_error(bio); + md_error(mddev, tmp_dev); return true; } @@ -592,6 +593,16 @@ static void raid0_status(struct seq_file *seq, struct mddev *mddev) return; } +static void raid0_error(struct mddev *mddev, struct md_rdev *rdev) +{ + if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) { + char *md_name = mdname(mddev); + + pr_crit("md/raid0%s: Disk failure on %pg detected, failing array.\n", + md_name, rdev->bdev); + } +} + static void *raid0_takeover_raid45(struct mddev *mddev) { struct md_rdev *rdev; @@ -767,6 +778,7 @@ static struct md_personality raid0_personality= .size = raid0_size, .takeover = raid0_takeover, .quiesce = raid0_quiesce, + .error_handler = raid0_error, }; static int __init raid0_init (void) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6c66357f92f5..4fcfcb350d2b 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -952,7 +952,9 @@ static void flush_pending_writes(struct r10conf *conf) static void raise_barrier(struct r10conf *conf, int force) { write_seqlock_irq(&conf->resync_lock); - BUG_ON(force && !conf->barrier); + + if (WARN_ON_ONCE(force && !conf->barrier)) + force = false; /* Wait until no block IO is waiting (unless 'force') */ wait_event_barrier(conf, force || !conf->nr_waiting); @@ -995,11 +997,15 @@ static bool stop_waiting_barrier(struct r10conf *conf) (!bio_list_empty(&bio_list[0]) || !bio_list_empty(&bio_list[1]))) return true; - /* move on if recovery thread is blocked by us */ - if (conf->mddev->thread->tsk == current && - test_bit(MD_RECOVERY_RUNNING, &conf->mddev->recovery) && - conf->nr_queued > 0) + /* + * move on if io is issued from raid10d(), nr_pending is not released + * from original io(see handle_read_error()). All raise barrier is + * blocked until this io is done. + */ + if (conf->mddev->thread->tsk == current) { + WARN_ON_ONCE(atomic_read(&conf->nr_pending) == 0); return true; + } return false; } @@ -1244,7 +1250,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, } slot = r10_bio->read_slot; - if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) + if (!r10_bio->start_time && + blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) r10_bio->start_time = bio_start_io_acct(bio); read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set); @@ -1574,6 +1581,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) r10_bio->sector = bio->bi_iter.bi_sector; r10_bio->state = 0; r10_bio->read_slot = -1; + r10_bio->start_time = 0; memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->geo.raid_disks); @@ -1626,7 +1634,7 @@ static void raid10_end_discard_request(struct bio *bio) /* * raid10_remove_disk uses smp_mb to make sure rdev is set to * replacement before setting replacement to NULL. It can read - * rdev first without barrier protect even replacment is NULL + * rdev first without barrier protect even replacement is NULL */ smp_rmb(); rdev = conf->mirrors[dev].rdev; @@ -2609,11 +2617,22 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) { struct r10conf *conf = mddev->private; int d; - struct bio *wbio, *wbio2; + struct bio *wbio = r10_bio->devs[1].bio; + struct bio *wbio2 = r10_bio->devs[1].repl_bio; + + /* Need to test wbio2->bi_end_io before we call + * submit_bio_noacct as if the former is NULL, + * the latter is free to free wbio2. + */ + if (wbio2 && !wbio2->bi_end_io) + wbio2 = NULL; if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) { fix_recovery_read_error(r10_bio); - end_sync_request(r10_bio); + if (wbio->bi_end_io) + end_sync_request(r10_bio); + if (wbio2) + end_sync_request(r10_bio); return; } @@ -2622,14 +2641,6 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) * and submit the write request */ d = r10_bio->devs[1].devnum; - wbio = r10_bio->devs[1].bio; - wbio2 = r10_bio->devs[1].repl_bio; - /* Need to test wbio2->bi_end_io before we call - * submit_bio_noacct as if the former is NULL, - * the latter is free to free wbio2. - */ - if (wbio2 && !wbio2->bi_end_io) - wbio2 = NULL; if (wbio->bi_end_io) { atomic_inc(&conf->mirrors[d].rdev->nr_pending); md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio)); @@ -2978,9 +2989,13 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) md_error(mddev, rdev); rdev_dec_pending(rdev, mddev); - allow_barrier(conf); r10_bio->state = 0; raid10_read_request(mddev, r10_bio->master_bio, r10_bio); + /* + * allow_barrier after re-submit to ensure no sync io + * can be issued while regular io pending. + */ + allow_barrier(conf); } static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) @@ -3289,10 +3304,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, sector_t chunk_mask = conf->geo.chunk_mask; int page_idx = 0; - if (!mempool_initialized(&conf->r10buf_pool)) - if (init_resync(conf)) - return 0; - /* * Allow skipping a full rebuild for incremental assembly * of a clean array, like RAID1 does. @@ -3308,6 +3319,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, return mddev->dev_sectors - sector_nr; } + if (!mempool_initialized(&conf->r10buf_pool)) + if (init_resync(conf)) + return 0; + skipped: max_sector = mddev->dev_sectors; if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || @@ -4004,6 +4019,20 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) return nc*fc; } +static void raid10_free_conf(struct r10conf *conf) +{ + if (!conf) + return; + + mempool_exit(&conf->r10bio_pool); + kfree(conf->mirrors); + kfree(conf->mirrors_old); + kfree(conf->mirrors_new); + safe_put_page(conf->tmppage); + bioset_exit(&conf->bio_split); + kfree(conf); +} + static struct r10conf *setup_conf(struct mddev *mddev) { struct r10conf *conf = NULL; @@ -4086,13 +4115,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) return conf; out: - if (conf) { - mempool_exit(&conf->r10bio_pool); - kfree(conf->mirrors); - safe_put_page(conf->tmppage); - bioset_exit(&conf->bio_split); - kfree(conf); - } + raid10_free_conf(conf); return ERR_PTR(err); } @@ -4129,6 +4152,9 @@ static int raid10_run(struct mddev *mddev) if (!conf) goto out; + mddev->thread = conf->thread; + conf->thread = NULL; + if (mddev_is_clustered(conf->mddev)) { int fc, fo; @@ -4141,9 +4167,6 @@ static int raid10_run(struct mddev *mddev) } } - mddev->thread = conf->thread; - conf->thread = NULL; - if (mddev->queue) { blk_queue_max_write_zeroes_sectors(mddev->queue, 0); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); @@ -4283,10 +4306,7 @@ static int raid10_run(struct mddev *mddev) out_free_conf: md_unregister_thread(&mddev->thread); - mempool_exit(&conf->r10bio_pool); - safe_put_page(conf->tmppage); - kfree(conf->mirrors); - kfree(conf); + raid10_free_conf(conf); mddev->private = NULL; out: return -EIO; @@ -4294,15 +4314,7 @@ out: static void raid10_free(struct mddev *mddev, void *priv) { - struct r10conf *conf = priv; - - mempool_exit(&conf->r10bio_pool); - safe_put_page(conf->tmppage); - kfree(conf->mirrors); - kfree(conf->mirrors_old); - kfree(conf->mirrors_new); - bioset_exit(&conf->bio_split); - kfree(conf); + raid10_free_conf(priv); } static void raid10_quiesce(struct mddev *mddev, int quiesce) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7b820b81d8c2..812a12e3e41a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7716,7 +7716,6 @@ static void raid5_set_io_opt(struct r5conf *conf) static int raid5_run(struct mddev *mddev) { struct r5conf *conf; - int working_disks = 0; int dirty_parity_disks = 0; struct md_rdev *rdev; struct md_rdev *journal_dev = NULL; @@ -7912,10 +7911,8 @@ static int raid5_run(struct mddev *mddev) pr_warn("md: cannot handle concurrent replacement and reshape.\n"); goto abort; } - if (test_bit(In_sync, &rdev->flags)) { - working_disks++; + if (test_bit(In_sync, &rdev->flags)) continue; - } /* This disc is not fully in-sync. However if it * just stored parity (beyond the recovery_offset), * when we don't need to be concerned about the diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index b317ce6c4ec3..596bb11eeba5 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -209,16 +209,16 @@ static inline struct apple_nvme *queue_to_apple_nvme(struct apple_nvme_queue *q) { if (q->is_adminq) return container_of(q, struct apple_nvme, adminq); - else - return container_of(q, struct apple_nvme, ioq); + + return container_of(q, struct apple_nvme, ioq); } static unsigned int apple_nvme_queue_depth(struct apple_nvme_queue *q) { if (q->is_adminq) return APPLE_NVME_AQ_DEPTH; - else - return APPLE_ANS_MAX_QUEUE_DEPTH; + + return APPLE_ANS_MAX_QUEUE_DEPTH; } static void apple_nvme_rtkit_crashed(void *cookie) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d6a9bac91a4c..1bfd52eae2ee 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -450,8 +450,8 @@ bool nvme_cancel_request(struct request *req, void *data) dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device, "Cancelling I/O %d", req->tag); - /* don't abort one completed request */ - if (blk_mq_request_completed(req)) + /* don't abort one completed or idle request */ + if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) return true; nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD; @@ -4819,8 +4819,6 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) u32 aer_notice_type = nvme_aer_subtype(result); bool requeue = true; - trace_nvme_async_event(ctrl, aer_notice_type); - switch (aer_notice_type) { case NVME_AER_NOTICE_NS_CHANGED: set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events); @@ -4856,7 +4854,6 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) static void nvme_handle_aer_persistent_error(struct nvme_ctrl *ctrl) { - trace_nvme_async_event(ctrl, NVME_AER_ERROR); dev_warn(ctrl->device, "resetting controller due to AER\n"); nvme_reset_ctrl(ctrl); } @@ -4872,6 +4869,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS) return; + trace_nvme_async_event(ctrl, result); switch (aer_type) { case NVME_AER_NOTICE: requeue = nvme_handle_aen_notice(ctrl, result); @@ -4889,7 +4887,6 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, case NVME_AER_SMART: case NVME_AER_CSS: case NVME_AER_VS: - trace_nvme_async_event(ctrl, aer_type); ctrl->aen_result = result; break; default: diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cd7873de3121..7f25c0fe3a0b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -5,7 +5,6 @@ */ #include <linux/acpi.h> -#include <linux/aer.h> #include <linux/async.h> #include <linux/blkdev.h> #include <linux/blk-mq.h> @@ -2535,7 +2534,6 @@ static int nvme_pci_enable(struct nvme_dev *dev) nvme_map_cmb(dev); - pci_enable_pcie_error_reporting(pdev); pci_save_state(pdev); result = nvme_pci_configure_admin_queue(dev); @@ -2600,10 +2598,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_suspend_io_queues(dev); nvme_suspend_queue(dev, 0); pci_free_irq_vectors(pdev); - if (pci_is_enabled(pdev)) { - pci_disable_pcie_error_reporting(pdev); + if (pci_is_enabled(pdev)) pci_disable_device(pdev); - } nvme_reap_pending_cqes(dev); nvme_cancel_tagset(&dev->ctrl); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index bbad26b82b56..0eb79696fb73 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -12,7 +12,6 @@ #include <linux/string.h> #include <linux/atomic.h> #include <linux/blk-mq.h> -#include <linux/blk-mq-rdma.h> #include <linux/blk-integrity.h> #include <linux/types.h> #include <linux/list.h> @@ -464,7 +463,6 @@ static int nvme_rdma_create_cq(struct ib_device *ibdev, struct nvme_rdma_queue *queue) { int ret, comp_vector, idx = nvme_rdma_queue_idx(queue); - enum ib_poll_context poll_ctx; /* * Spread I/O queues completion vectors according their queue index. @@ -473,15 +471,12 @@ static int nvme_rdma_create_cq(struct ib_device *ibdev, comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors; /* Polling queues need direct cq polling context */ - if (nvme_rdma_poll_queue(queue)) { - poll_ctx = IB_POLL_DIRECT; + if (nvme_rdma_poll_queue(queue)) queue->ib_cq = ib_alloc_cq(ibdev, queue, queue->cq_size, - comp_vector, poll_ctx); - } else { - poll_ctx = IB_POLL_SOFTIRQ; + comp_vector, IB_POLL_DIRECT); + else queue->ib_cq = ib_cq_pool_get(ibdev, queue->cq_size, - comp_vector, poll_ctx); - } + comp_vector, IB_POLL_SOFTIRQ); if (IS_ERR(queue->ib_cq)) { ret = PTR_ERR(queue->ib_cq); @@ -2163,10 +2158,8 @@ static void nvme_rdma_map_queues(struct blk_mq_tag_set *set) ctrl->io_queues[HCTX_TYPE_DEFAULT]; set->map[HCTX_TYPE_READ].queue_offset = 0; } - blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT], - ctrl->device->dev, 0); - blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ], - ctrl->device->dev, 0); + blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); + blk_mq_map_queues(&set->map[HCTX_TYPE_READ]); if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) { /* map dedicated poll queues only if we have queues left */ diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 49c9e7bc9116..bf0230442d57 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -888,6 +888,9 @@ static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb, size_t consumed = len; int result; + if (unlikely(!queue->rd_enabled)) + return -EFAULT; + while (len) { switch (nvme_tcp_recv_state(queue)) { case NVME_TCP_RECV_PDU: diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index 6f0eaf6a1528..4fb5922ffdac 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -127,15 +127,12 @@ TRACE_EVENT(nvme_async_event, ), TP_printk("nvme%d: NVME_AEN=%#08x [%s]", __entry->ctrl_id, __entry->result, - __print_symbolic(__entry->result, - aer_name(NVME_AER_NOTICE_NS_CHANGED), - aer_name(NVME_AER_NOTICE_ANA), - aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), - aer_name(NVME_AER_NOTICE_DISC_CHANGED), - aer_name(NVME_AER_ERROR), - aer_name(NVME_AER_SMART), - aer_name(NVME_AER_CSS), - aer_name(NVME_AER_VS)) + __print_symbolic(__entry->result & 0x7, + aer_name(NVME_AER_ERROR), + aer_name(NVME_AER_SMART), + aer_name(NVME_AER_NOTICE), + aer_name(NVME_AER_CSS), + aer_name(NVME_AER_VS)) ) ); diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 80099df37314..39cb570f833d 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -668,21 +668,11 @@ out: nvmet_req_complete(req, status); } -static bool nvmet_handle_identify_desclist(struct nvmet_req *req) +static void nvmet_execute_identify_ctrl_nvm(struct nvmet_req *req) { - switch (req->cmd->identify.csi) { - case NVME_CSI_NVM: - nvmet_execute_identify_desclist(req); - return true; - case NVME_CSI_ZNS: - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { - nvmet_execute_identify_desclist(req); - return true; - } - return false; - default: - return false; - } + /* Not supported: return zeroes */ + nvmet_req_complete(req, + nvmet_zero_sgl(req, 0, sizeof(struct nvme_id_ctrl_nvm))); } static void nvmet_execute_identify(struct nvmet_req *req) @@ -692,54 +682,49 @@ static void nvmet_execute_identify(struct nvmet_req *req) switch (req->cmd->identify.cns) { case NVME_ID_CNS_NS: + nvmet_execute_identify_ns(req); + return; + case NVME_ID_CNS_CTRL: + nvmet_execute_identify_ctrl(req); + return; + case NVME_ID_CNS_NS_ACTIVE_LIST: + nvmet_execute_identify_nslist(req); + return; + case NVME_ID_CNS_NS_DESC_LIST: + nvmet_execute_identify_desclist(req); + return; + case NVME_ID_CNS_CS_NS: switch (req->cmd->identify.csi) { case NVME_CSI_NVM: - return nvmet_execute_identify_ns(req); - default: + /* Not supported */ break; - } - break; - case NVME_ID_CNS_CS_NS: - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { - switch (req->cmd->identify.csi) { - case NVME_CSI_ZNS: - return nvmet_execute_identify_cns_cs_ns(req); - default: - break; + case NVME_CSI_ZNS: + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { + nvmet_execute_identify_ns_zns(req); + return; } - } - break; - case NVME_ID_CNS_CTRL: - switch (req->cmd->identify.csi) { - case NVME_CSI_NVM: - return nvmet_execute_identify_ctrl(req); + break; } break; case NVME_ID_CNS_CS_CTRL: - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { - switch (req->cmd->identify.csi) { - case NVME_CSI_ZNS: - return nvmet_execute_identify_cns_cs_ctrl(req); - default: - break; - } - } - break; - case NVME_ID_CNS_NS_ACTIVE_LIST: switch (req->cmd->identify.csi) { case NVME_CSI_NVM: - return nvmet_execute_identify_nslist(req); - default: + nvmet_execute_identify_ctrl_nvm(req); + return; + case NVME_CSI_ZNS: + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { + nvmet_execute_identify_ctrl_zns(req); + return; + } break; } break; - case NVME_ID_CNS_NS_DESC_LIST: - if (nvmet_handle_identify_desclist(req) == true) - return; - break; } - nvmet_req_cns_error_complete(req); + pr_debug("unhandled identify cns %d on qid %d\n", + req->cmd->identify.cns, req->sq->qid); + req->error_loc = offsetof(struct nvme_identify, cns); + nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); } /* diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 5c16372f3b53..c780af36c1d4 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -614,10 +614,11 @@ fcloop_fcp_recv_work(struct work_struct *work) struct fcloop_fcpreq *tfcp_req = container_of(work, struct fcloop_fcpreq, fcp_rcv_work); struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + unsigned long flags; int ret = 0; bool aborted = false; - spin_lock_irq(&tfcp_req->reqlock); + spin_lock_irqsave(&tfcp_req->reqlock, flags); switch (tfcp_req->inistate) { case INI_IO_START: tfcp_req->inistate = INI_IO_ACTIVE; @@ -626,11 +627,11 @@ fcloop_fcp_recv_work(struct work_struct *work) aborted = true; break; default: - spin_unlock_irq(&tfcp_req->reqlock); + spin_unlock_irqrestore(&tfcp_req->reqlock, flags); WARN_ON(1); return; } - spin_unlock_irq(&tfcp_req->reqlock); + spin_unlock_irqrestore(&tfcp_req->reqlock, flags); if (unlikely(aborted)) ret = -ECANCELED; @@ -655,8 +656,9 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) container_of(work, struct fcloop_fcpreq, abort_rcv_work); struct nvmefc_fcp_req *fcpreq; bool completed = false; + unsigned long flags; - spin_lock_irq(&tfcp_req->reqlock); + spin_lock_irqsave(&tfcp_req->reqlock, flags); fcpreq = tfcp_req->fcpreq; switch (tfcp_req->inistate) { case INI_IO_ABORTED: @@ -665,11 +667,11 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) completed = true; break; default: - spin_unlock_irq(&tfcp_req->reqlock); + spin_unlock_irqrestore(&tfcp_req->reqlock, flags); WARN_ON(1); return; } - spin_unlock_irq(&tfcp_req->reqlock); + spin_unlock_irqrestore(&tfcp_req->reqlock, flags); if (unlikely(completed)) { /* remove reference taken in original abort downcall */ @@ -681,9 +683,9 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport, &tfcp_req->tgt_fcp_req); - spin_lock_irq(&tfcp_req->reqlock); + spin_lock_irqsave(&tfcp_req->reqlock, flags); tfcp_req->fcpreq = NULL; - spin_unlock_irq(&tfcp_req->reqlock); + spin_unlock_irqrestore(&tfcp_req->reqlock, flags); fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); /* call_host_done releases reference for abort downcall */ @@ -699,11 +701,12 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) struct fcloop_fcpreq *tfcp_req = container_of(work, struct fcloop_fcpreq, tio_done_work); struct nvmefc_fcp_req *fcpreq; + unsigned long flags; - spin_lock_irq(&tfcp_req->reqlock); + spin_lock_irqsave(&tfcp_req->reqlock, flags); fcpreq = tfcp_req->fcpreq; tfcp_req->inistate = INI_IO_COMPLETED; - spin_unlock_irq(&tfcp_req->reqlock); + spin_unlock_irqrestore(&tfcp_req->reqlock, flags); fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status); } @@ -807,13 +810,14 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, u32 rsplen = 0, xfrlen = 0; int fcp_err = 0, active, aborted; u8 op = tgt_fcpreq->op; + unsigned long flags; - spin_lock_irq(&tfcp_req->reqlock); + spin_lock_irqsave(&tfcp_req->reqlock, flags); fcpreq = tfcp_req->fcpreq; active = tfcp_req->active; aborted = tfcp_req->aborted; tfcp_req->active = true; - spin_unlock_irq(&tfcp_req->reqlock); + spin_unlock_irqrestore(&tfcp_req->reqlock, flags); if (unlikely(active)) /* illegal - call while i/o active */ @@ -821,9 +825,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, if (unlikely(aborted)) { /* target transport has aborted i/o prior */ - spin_lock_irq(&tfcp_req->reqlock); + spin_lock_irqsave(&tfcp_req->reqlock, flags); tfcp_req->active = false; - spin_unlock_irq(&tfcp_req->reqlock); + spin_unlock_irqrestore(&tfcp_req->reqlock, flags); tgt_fcpreq->transferred_length = 0; tgt_fcpreq->fcp_error = -ECANCELED; tgt_fcpreq->done(tgt_fcpreq); @@ -880,9 +884,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, break; } - spin_lock_irq(&tfcp_req->reqlock); + spin_lock_irqsave(&tfcp_req->reqlock, flags); tfcp_req->active = false; - spin_unlock_irq(&tfcp_req->reqlock); + spin_unlock_irqrestore(&tfcp_req->reqlock, flags); tgt_fcpreq->transferred_length = xfrlen; tgt_fcpreq->fcp_error = fcp_err; @@ -896,15 +900,16 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *tgt_fcpreq) { struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); + unsigned long flags; /* * mark aborted only in case there were 2 threads in transport * (one doing io, other doing abort) and only kills ops posted * after the abort request */ - spin_lock_irq(&tfcp_req->reqlock); + spin_lock_irqsave(&tfcp_req->reqlock, flags); tfcp_req->aborted = true; - spin_unlock_irq(&tfcp_req->reqlock); + spin_unlock_irqrestore(&tfcp_req->reqlock, flags); tfcp_req->status = NVME_SC_INTERNAL; @@ -946,6 +951,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, struct fcloop_ini_fcpreq *inireq = fcpreq->private; struct fcloop_fcpreq *tfcp_req; bool abortio = true; + unsigned long flags; spin_lock(&inireq->inilock); tfcp_req = inireq->tfcp_req; @@ -958,7 +964,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, return; /* break initiator/target relationship for io */ - spin_lock_irq(&tfcp_req->reqlock); + spin_lock_irqsave(&tfcp_req->reqlock, flags); switch (tfcp_req->inistate) { case INI_IO_START: case INI_IO_ACTIVE: @@ -968,11 +974,11 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, abortio = false; break; default: - spin_unlock_irq(&tfcp_req->reqlock); + spin_unlock_irqrestore(&tfcp_req->reqlock, flags); WARN_ON(1); return; } - spin_unlock_irq(&tfcp_req->reqlock); + spin_unlock_irqrestore(&tfcp_req->reqlock, flags); if (abortio) /* leave the reference while the work item is scheduled */ diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 89bedfcd974c..dc60a22646f7 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -581,8 +581,8 @@ bool nvmet_ns_revalidate(struct nvmet_ns *ns); u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts); bool nvmet_bdev_zns_enable(struct nvmet_ns *ns); -void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req); -void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req); +void nvmet_execute_identify_ctrl_zns(struct nvmet_req *req); +void nvmet_execute_identify_ns_zns(struct nvmet_req *req); void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req); void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req); void nvmet_bdev_execute_zone_append(struct nvmet_req *req); @@ -687,14 +687,6 @@ static inline bool nvmet_use_inline_bvec(struct nvmet_req *req) req->sg_cnt <= NVMET_MAX_INLINE_BIOVEC; } -static inline void nvmet_req_cns_error_complete(struct nvmet_req *req) -{ - pr_debug("unhandled identify cns %d on qid %d\n", - req->cmd->identify.cns, req->sq->qid); - req->error_loc = offsetof(struct nvme_identify, cns); - nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); -} - static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio) { if (bio != &req->b.inline_bio) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 66e8f9fd0ca7..ed98df72c76b 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -20,6 +20,31 @@ #define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE) +static int param_store_val(const char *str, int *val, int min, int max) +{ + int ret, new_val; + + ret = kstrtoint(str, 10, &new_val); + if (ret) + return -EINVAL; + + if (new_val < min || new_val > max) + return -EINVAL; + + *val = new_val; + return 0; +} + +static int set_params(const char *str, const struct kernel_param *kp) +{ + return param_store_val(str, kp->arg, 0, INT_MAX); +} + +static const struct kernel_param_ops set_param_ops = { + .set = set_params, + .get = param_get_int, +}; + /* Define the socket priority to use for connections were it is desirable * that the NIC consider performing optimized packet processing or filtering. * A non-zero value being sufficient to indicate general consideration of any @@ -27,8 +52,8 @@ * values that may be unique for some NIC implementations. */ static int so_priority; -module_param(so_priority, int, 0644); -MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority"); +device_param_cb(so_priority, &set_param_ops, &so_priority, 0644); +MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority: Default 0"); /* Define a time period (in usecs) that io_work() shall sample an activated * queue before determining it to be idle. This optional module behavior @@ -36,9 +61,10 @@ MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority"); * using advanced interrupt moderation techniques. */ static int idle_poll_period_usecs; -module_param(idle_poll_period_usecs, int, 0644); +device_param_cb(idle_poll_period_usecs, &set_param_ops, + &idle_poll_period_usecs, 0644); MODULE_PARM_DESC(idle_poll_period_usecs, - "nvmet tcp io_work poll till idle time period in usecs"); + "nvmet tcp io_work poll till idle time period in usecs: Default 0"); #define NVMET_TCP_RECV_BUDGET 8 #define NVMET_TCP_SEND_BUDGET 8 diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 7e4292d88016..5b5c1e481722 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -70,7 +70,7 @@ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns) return true; } -void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req) +void nvmet_execute_identify_ctrl_zns(struct nvmet_req *req) { u8 zasl = req->sq->ctrl->subsys->zasl; struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -95,9 +95,9 @@ out: nvmet_req_complete(req, status); } -void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req) +void nvmet_execute_identify_ns_zns(struct nvmet_req *req) { - struct nvme_id_ns_zns *id_zns; + struct nvme_id_ns_zns *id_zns = NULL; u64 zsze; u16 status; u32 mar, mor; @@ -118,16 +118,18 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req) if (status) goto done; - if (!bdev_is_zoned(req->ns->bdev)) { - req->error_loc = offsetof(struct nvme_identify, nsid); - goto done; - } - if (nvmet_ns_revalidate(req->ns)) { mutex_lock(&req->ns->subsys->lock); nvmet_ns_changed(req->ns->subsys, req->ns->nsid); mutex_unlock(&req->ns->subsys->lock); } + + if (!bdev_is_zoned(req->ns->bdev)) { + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + req->error_loc = offsetof(struct nvme_identify, nsid); + goto out; + } + zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >> req->ns->blksize_shift; id_zns->lbafe[0].zsze = cpu_to_le64(zsze); @@ -148,8 +150,8 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req) done: status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns)); - kfree(id_zns); out: + kfree(id_zns); nvmet_req_complete(req, status); } diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a9c2a8d76c45..9fbfce735d56 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -73,7 +73,8 @@ static void dasd_profile_init(struct dasd_profile *, struct dentry *); static void dasd_profile_exit(struct dasd_profile *); static void dasd_hosts_init(struct dentry *, struct dasd_device *); static void dasd_hosts_exit(struct dasd_device *); - +static int dasd_handle_autoquiesce(struct dasd_device *, struct dasd_ccw_req *, + unsigned int); /* * SECTION: Operations on the device structure. */ @@ -1451,6 +1452,8 @@ int dasd_start_IO(struct dasd_ccw_req *cqr) case -ENODEV: DBF_DEV_EVENT(DBF_WARNING, device, "%s", "start_IO: -ENODEV device gone, retry"); + /* this is equivalent to CC=3 for SSCH report this to EER */ + dasd_handle_autoquiesce(device, cqr, DASD_EER_STARTIO); break; case -EIO: DBF_DEV_EVENT(DBF_WARNING, device, "%s", @@ -1953,6 +1956,16 @@ static void __dasd_device_process_final_queue(struct dasd_device *device, } /* + * check if device should be autoquiesced due to too many timeouts + */ +static void __dasd_device_check_autoquiesce_timeout(struct dasd_device *device, + struct dasd_ccw_req *cqr) +{ + if ((device->default_retries - cqr->retries) >= device->aq_timeouts) + dasd_handle_autoquiesce(device, cqr, DASD_EER_TIMEOUTS); +} + +/* * Take a look at the first request on the ccw queue and check * if it reached its expire time. If so, terminate the IO. */ @@ -1986,6 +1999,7 @@ static void __dasd_device_check_expire(struct dasd_device *device) "remaining\n", cqr, (cqr->expires/HZ), cqr->retries); } + __dasd_device_check_autoquiesce_timeout(device, cqr); } } @@ -2325,7 +2339,7 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) /* Non-temporary stop condition will trigger fail fast */ if (device->stopped & ~DASD_STOPPED_PENDING && test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && - (!dasd_eer_enabled(device))) { + !dasd_eer_enabled(device) && device->aq_mask == 0) { cqr->status = DASD_CQR_FAILED; cqr->intrc = -ENOLINK; continue; @@ -2801,20 +2815,18 @@ restart: dasd_log_sense(cqr, &cqr->irb); } - /* First of all call extended error reporting. */ - if (dasd_eer_enabled(base) && - cqr->status == DASD_CQR_FAILED) { - dasd_eer_write(base, cqr, DASD_EER_FATALERROR); - - /* restart request */ + /* + * First call extended error reporting and check for autoquiesce + */ + spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); + if (cqr->status == DASD_CQR_FAILED && + dasd_handle_autoquiesce(base, cqr, DASD_EER_FATALERROR)) { cqr->status = DASD_CQR_FILLED; cqr->retries = 255; - spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); - dasd_device_set_stop_bits(base, DASD_STOPPED_QUIESCE); - spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), - flags); + spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); goto restart; } + spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); /* Process finished ERP request. */ if (cqr->refers) { @@ -2856,7 +2868,7 @@ static void __dasd_block_start_head(struct dasd_block *block) /* Non-temporary stop condition will trigger fail fast */ if (block->base->stopped & ~DASD_STOPPED_PENDING && test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && - (!dasd_eer_enabled(block->base))) { + !dasd_eer_enabled(block->base) && block->base->aq_mask == 0) { cqr->status = DASD_CQR_FAILED; cqr->intrc = -ENOLINK; dasd_schedule_block_bh(block); @@ -2941,7 +2953,7 @@ static int _dasd_requeue_request(struct dasd_ccw_req *cqr) return 0; spin_lock_irq(&cqr->dq->lock); req = (struct request *) cqr->callback_data; - blk_mq_requeue_request(req, false); + blk_mq_requeue_request(req, true); spin_unlock_irq(&cqr->dq->lock); return 0; @@ -3670,8 +3682,8 @@ int dasd_generic_last_path_gone(struct dasd_device *device) dev_warn(&device->cdev->dev, "No operational channel path is left " "for the device\n"); DBF_DEV_EVENT(DBF_WARNING, device, "%s", "last path gone"); - /* First of all call extended error reporting. */ - dasd_eer_write(device, NULL, DASD_EER_NOPATH); + /* First call extended error reporting and check for autoquiesce. */ + dasd_handle_autoquiesce(device, NULL, DASD_EER_NOPATH); if (device->state < DASD_STATE_BASIC) return 0; @@ -3803,7 +3815,8 @@ void dasd_generic_path_event(struct ccw_device *cdev, int *path_event) "No verified channel paths remain for the device\n"); DBF_DEV_EVENT(DBF_WARNING, device, "%s", "last verified path gone"); - dasd_eer_write(device, NULL, DASD_EER_NOPATH); + /* First call extended error reporting and check for autoquiesce. */ + dasd_handle_autoquiesce(device, NULL, DASD_EER_NOPATH); dasd_device_set_stop_bits(device, DASD_STOPPED_DC_WAIT); } @@ -3825,7 +3838,8 @@ EXPORT_SYMBOL_GPL(dasd_generic_verify_path); void dasd_generic_space_exhaust(struct dasd_device *device, struct dasd_ccw_req *cqr) { - dasd_eer_write(device, NULL, DASD_EER_NOSPC); + /* First call extended error reporting and check for autoquiesce. */ + dasd_handle_autoquiesce(device, NULL, DASD_EER_NOSPC); if (device->state < DASD_STATE_BASIC) return; @@ -3958,6 +3972,31 @@ void dasd_schedule_requeue(struct dasd_device *device) } EXPORT_SYMBOL(dasd_schedule_requeue); +static int dasd_handle_autoquiesce(struct dasd_device *device, + struct dasd_ccw_req *cqr, + unsigned int reason) +{ + /* in any case write eer message with reason */ + if (dasd_eer_enabled(device)) + dasd_eer_write(device, cqr, reason); + + if (!test_bit(reason, &device->aq_mask)) + return 0; + + /* notify eer about autoquiesce */ + if (dasd_eer_enabled(device)) + dasd_eer_write(device, NULL, DASD_EER_AUTOQUIESCE); + + pr_info("%s: The DASD has been put in the quiesce state\n", + dev_name(&device->cdev->dev)); + dasd_device_set_stop_bits(device, DASD_STOPPED_QUIESCE); + + if (device->features & DASD_FEATURE_REQUEUEQUIESCE) + dasd_schedule_requeue(device); + + return 1; +} + static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, int rdc_buffer_size, int magic) diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index df17f0f9cb0f..620fab01b710 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -50,6 +50,7 @@ struct dasd_devmap { unsigned short features; struct dasd_device *device; struct dasd_copy_relation *copy; + unsigned int aq_mask; }; /* @@ -1476,6 +1477,128 @@ dasd_eer_store(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(eer_enabled, 0644, dasd_eer_show, dasd_eer_store); /* + * aq_mask controls if the DASD should be quiesced on certain triggers + * The aq_mask attribute is interpreted as bitmap of the DASD_EER_* triggers. + */ +static ssize_t dasd_aq_mask_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct dasd_devmap *devmap; + unsigned int aq_mask = 0; + + devmap = dasd_find_busid(dev_name(dev)); + if (!IS_ERR(devmap)) + aq_mask = devmap->aq_mask; + + return sysfs_emit(buf, "%d\n", aq_mask); +} + +static ssize_t dasd_aq_mask_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct dasd_devmap *devmap; + unsigned int val; + + if (kstrtouint(buf, 0, &val) || val > DASD_EER_VALID) + return -EINVAL; + + devmap = dasd_devmap_from_cdev(to_ccwdev(dev)); + if (IS_ERR(devmap)) + return PTR_ERR(devmap); + + spin_lock(&dasd_devmap_lock); + devmap->aq_mask = val; + if (devmap->device) + devmap->device->aq_mask = devmap->aq_mask; + spin_unlock(&dasd_devmap_lock); + + return count; +} + +static DEVICE_ATTR(aq_mask, 0644, dasd_aq_mask_show, dasd_aq_mask_store); + +/* + * aq_requeue controls if requests are returned to the blocklayer on quiesce + * or if requests are only not started + */ +static ssize_t dasd_aqr_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct dasd_devmap *devmap; + int flag; + + devmap = dasd_find_busid(dev_name(dev)); + if (!IS_ERR(devmap)) + flag = (devmap->features & DASD_FEATURE_REQUEUEQUIESCE) != 0; + else + flag = (DASD_FEATURE_DEFAULT & + DASD_FEATURE_REQUEUEQUIESCE) != 0; + return sysfs_emit(buf, "%d\n", flag); +} + +static ssize_t dasd_aqr_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + bool val; + int rc; + + if (kstrtobool(buf, &val)) + return -EINVAL; + + rc = dasd_set_feature(to_ccwdev(dev), DASD_FEATURE_REQUEUEQUIESCE, val); + + return rc ? : count; +} + +static DEVICE_ATTR(aq_requeue, 0644, dasd_aqr_show, dasd_aqr_store); + +/* + * aq_timeouts controls how much retries have to time out until + * a device gets autoquiesced + */ +static ssize_t +dasd_aq_timeouts_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct dasd_device *device; + int len; + + device = dasd_device_from_cdev(to_ccwdev(dev)); + if (IS_ERR(device)) + return -ENODEV; + len = sysfs_emit(buf, "%u\n", device->aq_timeouts); + dasd_put_device(device); + return len; +} + +static ssize_t +dasd_aq_timeouts_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct dasd_device *device; + unsigned int val; + + device = dasd_device_from_cdev(to_ccwdev(dev)); + if (IS_ERR(device)) + return -ENODEV; + + if ((kstrtouint(buf, 10, &val) != 0) || + val > DASD_RETRIES_MAX || val == 0) { + dasd_put_device(device); + return -EINVAL; + } + + if (val) + device->aq_timeouts = val; + + dasd_put_device(device); + return count; +} + +static DEVICE_ATTR(aq_timeouts, 0644, dasd_aq_timeouts_show, + dasd_aq_timeouts_store); + +/* * expiration time for default requests */ static ssize_t @@ -2324,6 +2447,9 @@ static struct attribute * dasd_attrs[] = { &dev_attr_copy_pair.attr, &dev_attr_copy_role.attr, &dev_attr_ping.attr, + &dev_attr_aq_mask.attr, + &dev_attr_aq_requeue.attr, + &dev_attr_aq_timeouts.attr, NULL, }; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 1a69f97e88fb..ade1369fe5ed 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2109,6 +2109,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device) device->default_retries = DASD_RETRIES; device->path_thrhld = DASD_ECKD_PATH_THRHLD; device->path_interval = DASD_ECKD_PATH_INTERVAL; + device->aq_timeouts = DASD_RETRIES_MAX; if (private->conf.gneq) { value = 1; diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c index a4cc772208a6..c956de711cf7 100644 --- a/drivers/s390/block/dasd_eer.c +++ b/drivers/s390/block/dasd_eer.c @@ -387,6 +387,7 @@ void dasd_eer_write(struct dasd_device *device, struct dasd_ccw_req *cqr, break; case DASD_EER_NOPATH: case DASD_EER_NOSPC: + case DASD_EER_AUTOQUIESCE: dasd_eer_write_standard_trigger(device, NULL, id); break; case DASD_EER_STATECHANGE: diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 97adc8a7ae6b..33f812f0e515 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -444,22 +444,22 @@ struct dasd_discipline { extern struct dasd_discipline *dasd_diag_discipline_pointer; -/* - * Notification numbers for extended error reporting notifications: - * The DASD_EER_DISABLE notification is sent before a dasd_device (and it's - * eer pointer) is freed. The error reporting module needs to do all necessary - * cleanup steps. - * The DASD_EER_TRIGGER notification sends the actual error reports (triggers). - */ -#define DASD_EER_DISABLE 0 -#define DASD_EER_TRIGGER 1 +/* Trigger IDs for extended error reporting DASD EER and autoquiesce */ +enum eer_trigger { + DASD_EER_FATALERROR = 1, + DASD_EER_NOPATH, + DASD_EER_STATECHANGE, + DASD_EER_PPRCSUSPEND, + DASD_EER_NOSPC, + DASD_EER_TIMEOUTS, + DASD_EER_STARTIO, + + /* enum end marker, only add new trigger above */ + DASD_EER_MAX, + DASD_EER_AUTOQUIESCE = 31, /* internal only */ +}; -/* Trigger IDs for extended error reporting DASD_EER_TRIGGER notification */ -#define DASD_EER_FATALERROR 1 -#define DASD_EER_NOPATH 2 -#define DASD_EER_STATECHANGE 3 -#define DASD_EER_PPRCSUSPEND 4 -#define DASD_EER_NOSPC 5 +#define DASD_EER_VALID ((1U << DASD_EER_MAX) - 1) /* DASD path handling */ @@ -637,6 +637,8 @@ struct dasd_device { struct dasd_format_entry format_entry; struct kset *paths_info; struct dasd_copy_relation *copy; + unsigned long aq_mask; + unsigned int aq_timeouts; }; struct dasd_block { |