diff options
Diffstat (limited to 'drivers')
51 files changed, 1982 insertions, 1135 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index cf0e63dd97da..e54e31b02b88 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -65,39 +65,80 @@ struct drbd_atodb_wait { int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); +void *drbd_md_get_buffer(struct drbd_conf *mdev) +{ + int r; + + wait_event(mdev->misc_wait, + (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 || + mdev->state.disk <= D_FAILED); + + return r ? NULL : page_address(mdev->md_io_page); +} + +void drbd_md_put_buffer(struct drbd_conf *mdev) +{ + if (atomic_dec_and_test(&mdev->md_io_in_use)) + wake_up(&mdev->misc_wait); +} + +static bool md_io_allowed(struct drbd_conf *mdev) +{ + enum drbd_disk_state ds = mdev->state.disk; + return ds >= D_NEGOTIATING || ds == D_ATTACHING; +} + +void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, + unsigned int *done) +{ + long dt = bdev->dc.disk_timeout * HZ / 10; + if (dt == 0) + dt = MAX_SCHEDULE_TIMEOUT; + + dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt); + if (dt == 0) + dev_err(DEV, "meta-data IO operation timed out\n"); +} + static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, struct page *page, sector_t sector, int rw, int size) { struct bio *bio; - struct drbd_md_io md_io; int ok; - md_io.mdev = mdev; - init_completion(&md_io.event); - md_io.error = 0; + mdev->md_io.done = 0; + mdev->md_io.error = -ENODEV; if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) rw |= REQ_FUA | REQ_FLUSH; rw |= REQ_SYNC; - bio = bio_alloc(GFP_NOIO, 1); + bio = bio_alloc_drbd(GFP_NOIO); bio->bi_bdev = bdev->md_bdev; bio->bi_sector = sector; ok = (bio_add_page(bio, page, size, 0) == size); if (!ok) goto out; - bio->bi_private = &md_io; + bio->bi_private = &mdev->md_io; bio->bi_end_io = drbd_md_io_complete; bio->bi_rw = rw; + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); + ok = 0; + goto out; + } + + bio_get(bio); /* one bio_put() is in the completion handler */ + atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) bio_endio(bio, -EIO); else submit_bio(rw, bio); - wait_for_completion(&md_io.event); - ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0; + wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done); + ok = bio_flagged(bio, BIO_UPTODATE) && mdev->md_io.error == 0; out: bio_put(bio); @@ -111,7 +152,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int offset = 0; struct page *iop = mdev->md_io_page; - D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); + D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1); BUG_ON(!bdev->md_bdev); @@ -328,8 +369,13 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) return 1; } - mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ - buffer = (struct al_transaction *)page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */ + if (!buffer) { + dev_err(DEV, "disk failed while waiting for md_io buffer\n"); + complete(&((struct update_al_work *)w)->event); + put_ldev(mdev); + return 1; + } buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); buffer->tr_number = cpu_to_be32(mdev->al_tr_number); @@ -374,7 +420,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); mdev->al_tr_number++; - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); complete(&((struct update_al_work *)w)->event); put_ldev(mdev); @@ -443,8 +489,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) /* lock out all other meta data io for now, * and make sure the page is mapped. */ - mutex_lock(&mdev->md_io_mutex); - buffer = page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + return 0; /* Find the valid transaction in the log */ for (i = 0; i <= mx; i++) { @@ -452,7 +499,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (rv == 0) continue; if (rv == -1) { - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 0; } cnr = be32_to_cpu(buffer->tr_number); @@ -478,7 +525,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!found_valid) { dev_warn(DEV, "No usable activity log found.\n"); - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 1; } @@ -493,7 +540,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) rv = drbd_al_read_tr(mdev, bdev, buffer, i); ERR_IF(rv == 0) goto cancel; if (rv == -1) { - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); return 0; } @@ -534,7 +581,7 @@ cancel: mdev->al_tr_pos = 0; /* ok, we are done with it */ - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", transactions, active_extents); @@ -671,16 +718,20 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, else ext->rs_failed += count; if (ext->rs_left < ext->rs_failed) { - dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d " - "rs_failed=%d count=%d\n", + dev_warn(DEV, "BAD! sector=%llus enr=%u rs_left=%d " + "rs_failed=%d count=%d cstate=%s\n", (unsigned long long)sector, ext->lce.lc_number, ext->rs_left, - ext->rs_failed, count); - dump_stack(); - - lc_put(mdev->resync, &ext->lce); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); - return; + ext->rs_failed, count, + drbd_conn_str(mdev->state.conn)); + + /* We don't expect to be able to clear more bits + * than have been set when we originally counted + * the set bits to cache that value in ext->rs_left. + * Whatever the reason (disconnect during resync, + * delayed local completion of an application write), + * try to fix it up by recounting here. */ + ext->rs_left = drbd_bm_e_weight(mdev, enr); } } else { /* Normally this element should be in the cache, @@ -1192,6 +1243,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) put_ldev(mdev); } spin_unlock_irq(&mdev->al_lock); + wake_up(&mdev->al_wait); return 0; } diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 3030201c69d8..b5c5ff53cb57 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -205,7 +205,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev) static void bm_store_page_idx(struct page *page, unsigned long idx) { BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); - page_private(page) |= idx; + set_page_private(page, idx); } static unsigned long bm_page_to_idx(struct page *page) @@ -886,12 +886,21 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) struct bm_aio_ctx { struct drbd_conf *mdev; atomic_t in_flight; - struct completion done; + unsigned int done; unsigned flags; #define BM_AIO_COPY_PAGES 1 int error; + struct kref kref; }; +static void bm_aio_ctx_destroy(struct kref *kref) +{ + struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); + + put_ldev(ctx->mdev); + kfree(ctx); +} + /* bv_page may be a copy, or may be the original */ static void bm_async_io_complete(struct bio *bio, int error) { @@ -930,20 +939,21 @@ static void bm_async_io_complete(struct bio *bio, int error) bm_page_unlock_io(mdev, idx); - /* FIXME give back to page pool */ if (ctx->flags & BM_AIO_COPY_PAGES) - put_page(bio->bi_io_vec[0].bv_page); + mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool); bio_put(bio); - if (atomic_dec_and_test(&ctx->in_flight)) - complete(&ctx->done); + if (atomic_dec_and_test(&ctx->in_flight)) { + ctx->done = 1; + wake_up(&mdev->misc_wait); + kref_put(&ctx->kref, &bm_aio_ctx_destroy); + } } static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) { - /* we are process context. we always get a bio */ - struct bio *bio = bio_alloc(GFP_KERNEL, 1); + struct bio *bio = bio_alloc_drbd(GFP_NOIO); struct drbd_conf *mdev = ctx->mdev; struct drbd_bitmap *b = mdev->bitmap; struct page *page; @@ -966,10 +976,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must bm_set_page_unchanged(b->bm_pages[page_nr]); if (ctx->flags & BM_AIO_COPY_PAGES) { - /* FIXME alloc_page is good enough for now, but actually needs - * to use pre-allocated page pool */ void *src, *dest; - page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); + page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT); dest = kmap_atomic(page); src = kmap_atomic(b->bm_pages[page_nr]); memcpy(dest, src, PAGE_SIZE); @@ -981,6 +989,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must bio->bi_bdev = mdev->ldev->md_bdev; bio->bi_sector = on_disk_sector; + /* bio_add_page of a single page to an empty bio will always succeed, + * according to api. Do we want to assert that? */ bio_add_page(bio, page, len, 0); bio->bi_private = ctx; bio->bi_end_io = bm_async_io_complete; @@ -999,14 +1009,9 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must /* * bm_rw: read/write the whole bitmap from/to its on disk location. */ -static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local) +static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) { - struct bm_aio_ctx ctx = { - .mdev = mdev, - .in_flight = ATOMIC_INIT(1), - .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), - .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0, - }; + struct bm_aio_ctx *ctx; struct drbd_bitmap *b = mdev->bitmap; int num_pages, i, count = 0; unsigned long now; @@ -1021,7 +1026,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id * For lazy writeout, we don't care for ongoing changes to the bitmap, * as we submit copies of pages anyways. */ - if (!ctx.flags) + + ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); + if (!ctx) + return -ENOMEM; + + *ctx = (struct bm_aio_ctx) { + .mdev = mdev, + .in_flight = ATOMIC_INIT(1), + .done = 0, + .flags = flags, + .error = 0, + .kref = { ATOMIC_INIT(2) }, + }; + + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); + kfree(ctx); + return -ENODEV; + } + + if (!ctx->flags) WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); num_pages = b->bm_number_of_pages; @@ -1046,29 +1071,38 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id continue; } } - atomic_inc(&ctx.in_flight); - bm_page_io_async(&ctx, i, rw); + atomic_inc(&ctx->in_flight); + bm_page_io_async(ctx, i, rw); ++count; cond_resched(); } /* - * We initialize ctx.in_flight to one to make sure bm_async_io_complete - * will not complete() early, and decrement / test it here. If there + * We initialize ctx->in_flight to one to make sure bm_async_io_complete + * will not set ctx->done early, and decrement / test it here. If there * are still some bios in flight, we need to wait for them here. + * If all IO is done already (or nothing had been submitted), there is + * no need to wait. Still, we need to put the kref associated with the + * "in_flight reached zero, all done" event. */ - if (!atomic_dec_and_test(&ctx.in_flight)) - wait_for_completion(&ctx.done); + if (!atomic_dec_and_test(&ctx->in_flight)) + wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); + else + kref_put(&ctx->kref, &bm_aio_ctx_destroy); + dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", rw == WRITE ? "WRITE" : "READ", count, jiffies - now); - if (ctx.error) { + if (ctx->error) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); drbd_chk_io_error(mdev, 1, true); - err = -EIO; /* ctx.error ? */ + err = -EIO; /* ctx->error ? */ } + if (atomic_read(&ctx->in_flight)) + err = -EIO; /* Disk failed during IO... */ + now = jiffies; if (rw == WRITE) { drbd_md_flush(mdev); @@ -1082,6 +1116,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); + kref_put(&ctx->kref, &bm_aio_ctx_destroy); return err; } @@ -1091,7 +1126,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id */ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) { - return bm_rw(mdev, READ, 0); + return bm_rw(mdev, READ, 0, 0); } /** @@ -1102,7 +1137,7 @@ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) */ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) { - return bm_rw(mdev, WRITE, 0); + return bm_rw(mdev, WRITE, 0, 0); } /** @@ -1112,7 +1147,23 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) */ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) { - return bm_rw(mdev, WRITE, upper_idx); + return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx); +} + +/** + * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location. + * @mdev: DRBD device. + * + * Will only write pages that have changed since last IO. + * In contrast to drbd_bm_write(), this will copy the bitmap pages + * to temporary writeout pages. It is intended to trigger a full write-out + * while still allowing the bitmap to change, for example if a resync or online + * verify is aborted due to a failed peer disk, while local IO continues, or + * pending resync acks are still being processed. + */ +int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0); } @@ -1130,28 +1181,45 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l */ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) { - struct bm_aio_ctx ctx = { + struct bm_aio_ctx *ctx; + int err; + + if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { + dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); + return 0; + } + + ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); + if (!ctx) + return -ENOMEM; + + *ctx = (struct bm_aio_ctx) { .mdev = mdev, .in_flight = ATOMIC_INIT(1), - .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), + .done = 0, .flags = BM_AIO_COPY_PAGES, + .error = 0, + .kref = { ATOMIC_INIT(2) }, }; - if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { - dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); - return 0; + if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); + kfree(ctx); + return -ENODEV; } - bm_page_io_async(&ctx, idx, WRITE_SYNC); - wait_for_completion(&ctx.done); + bm_page_io_async(ctx, idx, WRITE_SYNC); + wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); - if (ctx.error) + if (ctx->error) drbd_chk_io_error(mdev, 1, true); /* that should force detach, so the in memory bitmap will be * gone in a moment as well. */ mdev->bm_writ_cnt++; - return ctx.error; + err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; + kref_put(&ctx->kref, &bm_aio_ctx_destroy); + return err; } /* NOTE diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8d680562ba73..02f013a073a7 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -712,7 +712,6 @@ struct drbd_request { struct list_head tl_requests; /* ring list in the transfer log */ struct bio *master_bio; /* master bio pointer */ unsigned long rq_state; /* see comments above _req_mod() */ - int seq_num; unsigned long start_time; }; @@ -851,6 +850,7 @@ enum { NEW_CUR_UUID, /* Create new current UUID when thawing IO */ AL_SUSPENDED, /* Activity logging is currently suspended. */ AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ + STATE_SENT, /* Do not change state/UUIDs while this is set */ }; struct drbd_bitmap; /* opaque for drbd_conf */ @@ -862,31 +862,30 @@ enum bm_flag { BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */ /* currently locked for bulk operation */ - BM_LOCKED_MASK = 0x7, + BM_LOCKED_MASK = 0xf, /* in detail, that is: */ BM_DONT_CLEAR = 0x1, BM_DONT_SET = 0x2, BM_DONT_TEST = 0x4, + /* so we can mark it locked for bulk operation, + * and still allow all non-bulk operations */ + BM_IS_LOCKED = 0x8, + /* (test bit, count bit) allowed (common case) */ - BM_LOCKED_TEST_ALLOWED = 0x3, + BM_LOCKED_TEST_ALLOWED = BM_DONT_CLEAR | BM_DONT_SET | BM_IS_LOCKED, /* testing bits, as well as setting new bits allowed, but clearing bits * would be unexpected. Used during bitmap receive. Setting new bits * requires sending of "out-of-sync" information, though. */ - BM_LOCKED_SET_ALLOWED = 0x1, + BM_LOCKED_SET_ALLOWED = BM_DONT_CLEAR | BM_IS_LOCKED, - /* clear is not expected while bitmap is locked for bulk operation */ + /* for drbd_bm_write_copy_pages, everything is allowed, + * only concurrent bulk operations are locked out. */ + BM_LOCKED_CHANGE_ALLOWED = BM_IS_LOCKED, }; - -/* TODO sort members for performance - * MAYBE group them further */ - -/* THINK maybe we actually want to use the default "event/%s" worker threads - * or similar in linux 2.6, which uses per cpu data and threads. - */ struct drbd_work_queue { struct list_head q; struct semaphore s; /* producers up it, worker down()s it */ @@ -938,8 +937,7 @@ struct drbd_backing_dev { }; struct drbd_md_io { - struct drbd_conf *mdev; - struct completion event; + unsigned int done; int error; }; @@ -1022,6 +1020,7 @@ struct drbd_conf { struct drbd_tl_epoch *newest_tle; struct drbd_tl_epoch *oldest_tle; struct list_head out_of_sequence_requests; + struct list_head barrier_acked_requests; struct hlist_head *tl_hash; unsigned int tl_hash_s; @@ -1056,6 +1055,8 @@ struct drbd_conf { struct crypto_hash *csums_tfm; struct crypto_hash *verify_tfm; + unsigned long last_reattach_jif; + unsigned long last_reconnect_jif; struct drbd_thread receiver; struct drbd_thread worker; struct drbd_thread asender; @@ -1094,7 +1095,8 @@ struct drbd_conf { wait_queue_head_t ee_wait; struct page *md_io_page; /* one page buffer for md_io */ struct page *md_io_tmpp; /* for logical_block_size != 512 */ - struct mutex md_io_mutex; /* protects the md_io_buffer */ + struct drbd_md_io md_io; + atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */ spinlock_t al_lock; wait_queue_head_t al_wait; struct lru_cache *act_log; /* activity log */ @@ -1228,8 +1230,8 @@ extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); -extern int _drbd_send_state(struct drbd_conf *mdev); -extern int drbd_send_state(struct drbd_conf *mdev); +extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s); +extern int drbd_send_current_state(struct drbd_conf *mdev); extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, enum drbd_packets cmd, struct p_header80 *h, size_t size, unsigned msg_flags); @@ -1461,6 +1463,7 @@ extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); +extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local); extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr); extern size_t drbd_bm_words(struct drbd_conf *mdev); @@ -1493,11 +1496,38 @@ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ extern mempool_t *drbd_request_mempool; extern mempool_t *drbd_ee_mempool; -extern struct page *drbd_pp_pool; /* drbd's page pool */ +/* drbd's page pool, used to buffer data received from the peer, + * or data requested by the peer. + * + * This does not have an emergency reserve. + * + * When allocating from this pool, it first takes pages from the pool. + * Only if the pool is depleted will try to allocate from the system. + * + * The assumption is that pages taken from this pool will be processed, + * and given back, "quickly", and then can be recycled, so we can avoid + * frequent calls to alloc_page(), and still will be able to make progress even + * under memory pressure. + */ +extern struct page *drbd_pp_pool; extern spinlock_t drbd_pp_lock; extern int drbd_pp_vacant; extern wait_queue_head_t drbd_pp_wait; +/* We also need a standard (emergency-reserve backed) page pool + * for meta data IO (activity log, bitmap). + * We can keep it global, as long as it is used as "N pages at a time". + * 128 should be plenty, currently we probably can get away with as few as 1. + */ +#define DRBD_MIN_POOL_PAGES 128 +extern mempool_t *drbd_md_io_page_pool; + +/* We also need to make sure we get a bio + * when we need it for housekeeping purposes */ +extern struct bio_set *drbd_md_io_bio_set; +/* to allocate from that set */ +extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); + extern rwlock_t global_state_lock; extern struct drbd_conf *drbd_new_device(unsigned int minor); @@ -1536,8 +1566,12 @@ extern void resume_next_sg(struct drbd_conf *mdev); extern void suspend_other_sg(struct drbd_conf *mdev); extern int drbd_resync_finished(struct drbd_conf *mdev); /* maybe rather drbd_main.c ? */ +extern void *drbd_md_get_buffer(struct drbd_conf *mdev); +extern void drbd_md_put_buffer(struct drbd_conf *mdev); extern int drbd_md_sync_page_io(struct drbd_conf *mdev, - struct drbd_backing_dev *bdev, sector_t sector, int rw); + struct drbd_backing_dev *bdev, sector_t sector, int rw); +extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, + unsigned int *done); extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); extern void drbd_rs_controller_reset(struct drbd_conf *mdev); @@ -1754,19 +1788,6 @@ static inline struct page *page_chain_next(struct page *page) #define page_chain_for_each_safe(page, n) \ for (; page && ({ n = page_chain_next(page); 1; }); page = n) -static inline int drbd_bio_has_active_page(struct bio *bio) -{ - struct bio_vec *bvec; - int i; - - __bio_for_each_segment(bvec, bio, i, 0) { - if (page_count(bvec->bv_page) > 1) - return 1; - } - - return 0; -} - static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) { struct page *page = e->pages; @@ -1777,7 +1798,6 @@ static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) return 0; } - static inline void drbd_state_lock(struct drbd_conf *mdev) { wait_event(mdev->misc_wait, @@ -2230,7 +2250,7 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, * Note: currently we don't support such large bitmaps on 32bit * arch anyways, but no harm done to be prepared for it here. */ - unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10; + unsigned int shift = mdev->rs_total > UINT_MAX ? 16 : 10; unsigned long left = *bits_left >> shift; unsigned long total = 1UL + (mdev->rs_total >> shift); unsigned long tmp = 1000UL - left * 1000UL/total; @@ -2306,12 +2326,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) case D_OUTDATED: case D_CONSISTENT: case D_UP_TO_DATE: + case D_FAILED: /* disk state is stable as well. */ break; /* no new io accepted during tansitional states */ case D_ATTACHING: - case D_FAILED: case D_NEGOTIATING: case D_UNKNOWN: case D_MASK: diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 211fc44f84be..920ede2829d6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -139,6 +139,8 @@ struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t *drbd_request_mempool; mempool_t *drbd_ee_mempool; +mempool_t *drbd_md_io_page_pool; +struct bio_set *drbd_md_io_bio_set; /* I do not use a standard mempool, because: 1) I want to hand out the pre-allocated objects first. @@ -159,7 +161,24 @@ static const struct block_device_operations drbd_ops = { .release = drbd_release, }; -#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0])) +static void bio_destructor_drbd(struct bio *bio) +{ + bio_free(bio, drbd_md_io_bio_set); +} + +struct bio *bio_alloc_drbd(gfp_t gfp_mask) +{ + struct bio *bio; + + if (!drbd_md_io_bio_set) + return bio_alloc(gfp_mask, 1); + + bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); + if (!bio) + return NULL; + bio->bi_destructor = bio_destructor_drbd; + return bio; +} #ifdef __CHECKER__ /* When checking with sparse, and this is an inline function, sparse will @@ -208,6 +227,7 @@ static int tl_init(struct drbd_conf *mdev) mdev->oldest_tle = b; mdev->newest_tle = b; INIT_LIST_HEAD(&mdev->out_of_sequence_requests); + INIT_LIST_HEAD(&mdev->barrier_acked_requests); mdev->tl_hash = NULL; mdev->tl_hash_s = 0; @@ -246,9 +266,7 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) new->n_writes = 0; newest_before = mdev->newest_tle; - /* never send a barrier number == 0, because that is special-cased - * when using TCQ for our write ordering code */ - new->br_number = (newest_before->br_number+1) ?: 1; + new->br_number = newest_before->br_number+1; if (mdev->newest_tle != new) { mdev->newest_tle->next = new; mdev->newest_tle = new; @@ -311,7 +329,7 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, These have been list_move'd to the out_of_sequence_requests list in _req_mod(, barrier_acked) above. */ - list_del_init(&b->requests); + list_splice_init(&b->requests, &mdev->barrier_acked_requests); nob = b->next; if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { @@ -411,6 +429,23 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) b = tmp; list_splice(&carry_reads, &b->requests); } + + /* Actions operating on the disk state, also want to work on + requests that got barrier acked. */ + switch (what) { + case fail_frozen_disk_io: + case restart_frozen_disk_io: + list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + _req_mod(req, what); + } + + case connection_lost_while_pending: + case resend: + break; + default: + dev_err(DEV, "what = %d in _tl_restart()\n", what); + } } @@ -458,6 +493,38 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) } /** + * tl_abort_disk_io() - Abort disk I/O for all requests for a certain mdev in the TL + * @mdev: DRBD device. + */ +void tl_abort_disk_io(struct drbd_conf *mdev) +{ + struct drbd_tl_epoch *b; + struct list_head *le, *tle; + struct drbd_request *req; + + spin_lock_irq(&mdev->req_lock); + b = mdev->oldest_tle; + while (b) { + list_for_each_safe(le, tle, &b->requests) { + req = list_entry(le, struct drbd_request, tl_requests); + if (!(req->rq_state & RQ_LOCAL_PENDING)) + continue; + _req_mod(req, abort_disk_io); + } + b = b->next; + } + + list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + if (!(req->rq_state & RQ_LOCAL_PENDING)) + continue; + _req_mod(req, abort_disk_io); + } + + spin_unlock_irq(&mdev->req_lock); +} + +/** * cl_wide_st_chg() - true if the state change is a cluster wide one * @mdev: DRBD device. * @os: old (current) state. @@ -470,7 +537,7 @@ static int cl_wide_st_chg(struct drbd_conf *mdev, ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || - (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || + (os.disk != D_FAILED && ns.disk == D_FAILED))) || (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); } @@ -509,8 +576,16 @@ static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, union drbd_state, union drbd_state); +enum sanitize_state_warnings { + NO_WARNING, + ABORTED_ONLINE_VERIFY, + ABORTED_RESYNC, + CONNECTION_LOST_NEGOTIATING, + IMPLICITLY_UPGRADED_DISK, + IMPLICITLY_UPGRADED_PDSK, +}; static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort); + union drbd_state ns, enum sanitize_state_warnings *warn); int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state); @@ -785,6 +860,13 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) rv = SS_IN_TRANSIENT_STATE; + /* While establishing a connection only allow cstate to change. + Delay/refuse role changes, detach attach etc... */ + if (test_bit(STATE_SENT, &mdev->flags) && + !(os.conn == C_WF_REPORT_PARAMS || + (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION))) + rv = SS_IN_TRANSIENT_STATE; + if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) rv = SS_NEED_CONNECTION; @@ -803,6 +885,21 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, return rv; } +static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn) +{ + static const char *msg_table[] = { + [NO_WARNING] = "", + [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.", + [ABORTED_RESYNC] = "Resync aborted.", + [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!", + [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk", + [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk", + }; + + if (warn != NO_WARNING) + dev_warn(DEV, "%s\n", msg_table[warn]); +} + /** * sanitize_state() - Resolves implicitly necessary additional changes to a state transition * @mdev: DRBD device. @@ -814,11 +911,14 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, * to D_UNKNOWN. This rule and many more along those lines are in this function. */ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort) + union drbd_state ns, enum sanitize_state_warnings *warn) { enum drbd_fencing_p fp; enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; + if (warn) + *warn = NO_WARNING; + fp = FP_DONT_CARE; if (get_ldev(mdev)) { fp = mdev->ldev->dc.fencing; @@ -833,18 +933,13 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow. * If you try to go into some Sync* state, that shall fail (elsewhere). */ if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && - ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN) + ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_CONNECTED) ns.conn = os.conn; /* we cannot fail (again) if we already detached */ if (ns.disk == D_FAILED && os.disk == D_DISKLESS) ns.disk = D_DISKLESS; - /* if we are only D_ATTACHING yet, - * we can (and should) go directly to D_DISKLESS. */ - if (ns.disk == D_FAILED && os.disk == D_ATTACHING) - ns.disk = D_DISKLESS; - /* After C_DISCONNECTING only C_STANDALONE may follow */ if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) ns.conn = os.conn; @@ -863,10 +958,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state /* Abort resync if a disk fails/detaches */ if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { - if (warn_sync_abort) - *warn_sync_abort = - os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? - "Online-verify" : "Resync"; + if (warn) + *warn = os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? + ABORTED_ONLINE_VERIFY : ABORTED_RESYNC; ns.conn = C_CONNECTED; } @@ -877,7 +971,8 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns.disk = mdev->new_state_tmp.disk; ns.pdsk = mdev->new_state_tmp.pdsk; } else { - dev_alert(DEV, "Connection lost while negotiating, no data!\n"); + if (warn) + *warn = CONNECTION_LOST_NEGOTIATING; ns.disk = D_DISKLESS; ns.pdsk = D_UNKNOWN; } @@ -959,16 +1054,16 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns.disk = disk_max; if (ns.disk < disk_min) { - dev_warn(DEV, "Implicitly set disk from %s to %s\n", - drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); + if (warn) + *warn = IMPLICITLY_UPGRADED_DISK; ns.disk = disk_min; } if (ns.pdsk > pdsk_max) ns.pdsk = pdsk_max; if (ns.pdsk < pdsk_min) { - dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", - drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); + if (warn) + *warn = IMPLICITLY_UPGRADED_PDSK; ns.pdsk = pdsk_min; } @@ -1045,12 +1140,12 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, { union drbd_state os; enum drbd_state_rv rv = SS_SUCCESS; - const char *warn_sync_abort = NULL; + enum sanitize_state_warnings ssw; struct after_state_chg_work *ascw; os = mdev->state; - ns = sanitize_state(mdev, os, ns, &warn_sync_abort); + ns = sanitize_state(mdev, os, ns, &ssw); if (ns.i == os.i) return SS_NOTHING_TO_DO; @@ -1076,8 +1171,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, return rv; } - if (warn_sync_abort) - dev_warn(DEV, "%s aborted.\n", warn_sync_abort); + print_sanitize_warnings(mdev, ssw); { char *pbp, pb[300]; @@ -1243,7 +1337,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, drbd_thread_stop_nowait(&mdev->receiver); /* Upon network failure, we need to restart the receiver. */ - if (os.conn > C_TEAR_DOWN && + if (os.conn > C_WF_CONNECTION && ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) drbd_thread_restart_nowait(&mdev->receiver); @@ -1251,6 +1345,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) drbd_resume_al(mdev); + /* remember last connect and attach times so request_timer_fn() won't + * kill newly established sessions while we are still trying to thaw + * previously frozen IO */ + if (os.conn != C_WF_REPORT_PARAMS && ns.conn == C_WF_REPORT_PARAMS) + mdev->last_reconnect_jif = jiffies; + if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && + ns.disk > D_NEGOTIATING) + mdev->last_reattach_jif = jiffies; + ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); if (ascw) { ascw->os = os; @@ -1354,12 +1457,16 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Here we have the actions that are performed after a state change. This function might sleep */ + if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING) + mod_timer(&mdev->request_timer, jiffies + HZ); + nsm.i = -1; if (ns.susp_nod) { if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) what = resend; - if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) + if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && + ns.disk > D_NEGOTIATING) what = restart_frozen_disk_io; if (what != nothing) @@ -1408,7 +1515,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Do not change the order of the if above and the two below... */ if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_state(mdev, ns); } /* No point in queuing send_bitmap if we don't have a connection * anymore, so check also the _current_ state, not only the new state @@ -1441,11 +1548,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { - if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) { + if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY && + mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { drbd_uuid_new_current(mdev); drbd_send_uuids(mdev); } - /* D_DISKLESS Peer becomes secondary */ if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) /* We may still be Primary ourselves. @@ -1473,14 +1580,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { drbd_send_sizes(mdev, 0, 0); /* to start sync... */ drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_state(mdev, ns); } /* We want to pause/continue resync, tell peer. */ if (ns.conn >= C_CONNECTED && ((os.aftr_isp != ns.aftr_isp) || (os.user_isp != ns.user_isp))) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); /* In case one of the isp bits got set, suspend other devices. */ if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && @@ -1490,10 +1597,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Make sure the peer gets informed about eventual state changes (ISP bits) while we were in WFReportParams. */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); if (os.conn != C_AHEAD && ns.conn == C_AHEAD) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); /* We are in the progress to start a full sync... */ if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || @@ -1513,33 +1620,38 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* first half of local IO error, failure to attach, * or administrative detach */ if (os.disk != D_FAILED && ns.disk == D_FAILED) { - enum drbd_io_error_p eh; - int was_io_error; + enum drbd_io_error_p eh = EP_PASS_ON; + int was_io_error = 0; /* corresponding get_ldev was in __drbd_set_state, to serialize - * our cleanup here with the transition to D_DISKLESS, - * so it is safe to dreference ldev here. */ - eh = mdev->ldev->dc.on_io_error; - was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); - - /* current state still has to be D_FAILED, - * there is only one way out: to D_DISKLESS, - * and that may only happen after our put_ldev below. */ - if (mdev->state.disk != D_FAILED) - dev_err(DEV, - "ASSERT FAILED: disk is %s during detach\n", - drbd_disk_str(mdev->state.disk)); - - if (drbd_send_state(mdev)) - dev_warn(DEV, "Notified peer that I am detaching my disk\n"); - else - dev_err(DEV, "Sending state for detaching disk failed\n"); - - drbd_rs_cancel_all(mdev); - - /* In case we want to get something to stable storage still, - * this may be the last chance. - * Following put_ldev may transition to D_DISKLESS. */ - drbd_md_sync(mdev); + * our cleanup here with the transition to D_DISKLESS. + * But is is still not save to dreference ldev here, since + * we might come from an failed Attach before ldev was set. */ + if (mdev->ldev) { + eh = mdev->ldev->dc.on_io_error; + was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + + /* Immediately allow completion of all application IO, that waits + for completion from the local disk. */ + tl_abort_disk_io(mdev); + + /* current state still has to be D_FAILED, + * there is only one way out: to D_DISKLESS, + * and that may only happen after our put_ldev below. */ + if (mdev->state.disk != D_FAILED) + dev_err(DEV, + "ASSERT FAILED: disk is %s during detach\n", + drbd_disk_str(mdev->state.disk)); + + if (ns.conn >= C_CONNECTED) + drbd_send_state(mdev, ns); + + drbd_rs_cancel_all(mdev); + + /* In case we want to get something to stable storage still, + * this may be the last chance. + * Following put_ldev may transition to D_DISKLESS. */ + drbd_md_sync(mdev); + } put_ldev(mdev); if (was_io_error && eh == EP_CALL_HELPER) @@ -1561,16 +1673,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, mdev->rs_failed = 0; atomic_set(&mdev->rs_pending_cnt, 0); - if (drbd_send_state(mdev)) - dev_warn(DEV, "Notified peer that I'm now diskless.\n"); + if (ns.conn >= C_CONNECTED) + drbd_send_state(mdev, ns); + /* corresponding get_ldev in __drbd_set_state * this may finally trigger drbd_ldev_destroy. */ put_ldev(mdev); } /* Notify peer that I had a local IO error, and did not detached.. */ - if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) - drbd_send_state(mdev); + if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED) + drbd_send_state(mdev, ns); /* Disks got bigger while they were detached */ if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && @@ -1588,7 +1701,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* sync target done with resync. Explicitly notify peer, even though * it should (at least for non-empty resyncs) already know itself. */ if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) - drbd_send_state(mdev); + drbd_send_state(mdev, ns); + + /* Wake up role changes, that were delayed because of connection establishing */ + if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { + clear_bit(STATE_SENT, &mdev->flags); + wake_up(&mdev->state_wait); + } /* This triggers bitmap writeout of potentially still unwritten pages * if the resync finished cleanly, or aborted because of peer disk @@ -1598,8 +1717,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * No harm done if some bits change during this phase. */ if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { - drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, - "write from resync_finished", BM_LOCKED_SET_ALLOWED); + drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL, + "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED); put_ldev(mdev); } @@ -2057,7 +2176,11 @@ int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) D_ASSERT(mdev->state.disk == D_UP_TO_DATE); - uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET; + uuid = mdev->ldev->md.uuid[UI_BITMAP]; + if (uuid && uuid != UUID_JUST_CREATED) + uuid = uuid + UUID_NEW_BM_OFFSET; + else + get_random_bytes(&uuid, sizeof(u64)); drbd_uuid_set(mdev, UI_BITMAP, uuid); drbd_print_uuids(mdev, "updated sync UUID"); drbd_md_sync(mdev); @@ -2089,6 +2212,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ } + /* Never allow old drbd (up to 8.3.7) to see more than 32KiB */ + if (mdev->agreed_pro_version <= 94) + max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET); + p.d_size = cpu_to_be64(d_size); p.u_size = cpu_to_be64(u_size); p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); @@ -2102,10 +2229,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl } /** - * drbd_send_state() - Sends the drbd state to the peer + * drbd_send_current_state() - Sends the drbd state to the peer * @mdev: DRBD device. */ -int drbd_send_state(struct drbd_conf *mdev) +int drbd_send_current_state(struct drbd_conf *mdev) { struct socket *sock; struct p_state p; @@ -2131,6 +2258,37 @@ int drbd_send_state(struct drbd_conf *mdev) return ok; } +/** + * drbd_send_state() - After a state change, sends the new state to the peer + * @mdev: DRBD device. + * @state: the state to send, not necessarily the current state. + * + * Each state change queues an "after_state_ch" work, which will eventually + * send the resulting new state to the peer. If more state changes happen + * between queuing and processing of the after_state_ch work, we still + * want to send each intermediary state in the order it occurred. + */ +int drbd_send_state(struct drbd_conf *mdev, union drbd_state state) +{ + struct socket *sock; + struct p_state p; + int ok = 0; + + mutex_lock(&mdev->data.mutex); + + p.state = cpu_to_be32(state.i); + sock = mdev->data.socket; + + if (likely(sock != NULL)) { + ok = _drbd_send_cmd(mdev, sock, P_STATE, + (struct p_header80 *)&p, sizeof(p), 0); + } + + mutex_unlock(&mdev->data.mutex); + + return ok; +} + int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) { @@ -2615,7 +2773,7 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) struct bio_vec *bvec; int i; /* hint all but last page with MSG_MORE */ - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment(bvec, bio, i) { if (!_drbd_no_send_page(mdev, bvec->bv_page, bvec->bv_offset, bvec->bv_len, i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) @@ -2629,7 +2787,7 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) struct bio_vec *bvec; int i; /* hint all but last page with MSG_MORE */ - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment(bvec, bio, i) { if (!_drbd_send_page(mdev, bvec->bv_page, bvec->bv_offset, bvec->bv_len, i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) @@ -2695,8 +2853,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) p.sector = cpu_to_be64(req->sector); p.block_id = (unsigned long)req; - p.seq_num = cpu_to_be32(req->seq_num = - atomic_add_return(1, &mdev->packet_seq)); + p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); @@ -2987,8 +3144,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->rs_sect_in, 0); atomic_set(&mdev->rs_sect_ev, 0); atomic_set(&mdev->ap_in_flight, 0); + atomic_set(&mdev->md_io_in_use, 0); - mutex_init(&mdev->md_io_mutex); mutex_init(&mdev->data.mutex); mutex_init(&mdev->meta.mutex); sema_init(&mdev->data.work.s, 0); @@ -3126,6 +3283,10 @@ static void drbd_destroy_mempools(void) /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ + if (drbd_md_io_bio_set) + bioset_free(drbd_md_io_bio_set); + if (drbd_md_io_page_pool) + mempool_destroy(drbd_md_io_page_pool); if (drbd_ee_mempool) mempool_destroy(drbd_ee_mempool); if (drbd_request_mempool) @@ -3139,6 +3300,8 @@ static void drbd_destroy_mempools(void) if (drbd_al_ext_cache) kmem_cache_destroy(drbd_al_ext_cache); + drbd_md_io_bio_set = NULL; + drbd_md_io_page_pool = NULL; drbd_ee_mempool = NULL; drbd_request_mempool = NULL; drbd_ee_cache = NULL; @@ -3162,6 +3325,8 @@ static int drbd_create_mempools(void) drbd_bm_ext_cache = NULL; drbd_al_ext_cache = NULL; drbd_pp_pool = NULL; + drbd_md_io_page_pool = NULL; + drbd_md_io_bio_set = NULL; /* caches */ drbd_request_cache = kmem_cache_create( @@ -3185,6 +3350,16 @@ static int drbd_create_mempools(void) goto Enomem; /* mempools */ +#ifdef COMPAT_HAVE_BIOSET_CREATE + drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0); + if (drbd_md_io_bio_set == NULL) + goto Enomem; +#endif + + drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0); + if (drbd_md_io_page_pool == NULL) + goto Enomem; + drbd_request_mempool = mempool_create(number, mempool_alloc_slab, mempool_free_slab, drbd_request_cache); if (drbd_request_mempool == NULL) @@ -3262,6 +3437,8 @@ static void drbd_delete_device(unsigned int minor) if (!mdev) return; + del_timer_sync(&mdev->request_timer); + /* paranoia asserts */ if (mdev->open_cnt != 0) dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, @@ -3666,8 +3843,10 @@ void drbd_md_sync(struct drbd_conf *mdev) if (!get_ldev_if_state(mdev, D_FAILED)) return; - mutex_lock(&mdev->md_io_mutex); - buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; + memset(buffer, 0, 512); buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); @@ -3698,7 +3877,8 @@ void drbd_md_sync(struct drbd_conf *mdev) * since we updated it on metadata. */ mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); +out: put_ldev(mdev); } @@ -3718,8 +3898,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) if (!get_ldev_if_state(mdev, D_ATTACHING)) return ERR_IO_MD_DISK; - mutex_lock(&mdev->md_io_mutex); - buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); + buffer = drbd_md_get_buffer(mdev); + if (!buffer) + goto out; if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) { /* NOTE: can't do normal error processing here as this is @@ -3780,7 +3961,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) mdev->sync_conf.al_extents = 127; err: - mutex_unlock(&mdev->md_io_mutex); + drbd_md_put_buffer(mdev); + out: put_ldev(mdev); return rv; @@ -4183,12 +4365,11 @@ const char *drbd_buildtag(void) static char buildtag[38] = "\0uilt-in"; if (buildtag[0] == 0) { -#ifdef CONFIG_MODULES - if (THIS_MODULE != NULL) - sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion); - else +#ifdef MODULE + sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion); +#else + buildtag[0] = 'b'; #endif - buildtag[0] = 'b'; } return buildtag; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 946166e13953..6d4de6a72e80 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -289,7 +289,7 @@ static int _try_outdate_peer_async(void *data) */ spin_lock_irq(&mdev->req_lock); ns = mdev->state; - if (ns.conn < C_WF_REPORT_PARAMS) { + if (ns.conn < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &mdev->flags)) { ns.pdsk = nps; _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); } @@ -432,7 +432,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) /* if this was forced, we should consider sync */ if (forced) drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_current_state(mdev); } drbd_md_sync(mdev); @@ -845,9 +845,10 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) Because new from 8.3.8 onwards the peer can use multiple BIOs for a single peer_request */ if (mdev->state.conn >= C_CONNECTED) { - if (mdev->agreed_pro_version < 94) - peer = mdev->peer_max_bio_size; - else if (mdev->agreed_pro_version == 94) + if (mdev->agreed_pro_version < 94) { + peer = min_t(int, mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); + /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ + } else if (mdev->agreed_pro_version == 94) peer = DRBD_MAX_SIZE_H80_PACKET; else /* drbd 8.3.8 onwards */ peer = DRBD_MAX_BIO_SIZE; @@ -1032,7 +1033,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", (unsigned long long) drbd_get_max_capacity(nbc), (unsigned long long) nbc->dc.disk_size); - retcode = ERR_DISK_TO_SMALL; + retcode = ERR_DISK_TOO_SMALL; goto fail; } @@ -1046,7 +1047,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { - retcode = ERR_MD_DISK_TO_SMALL; + retcode = ERR_MD_DISK_TOO_SMALL; dev_warn(DEV, "refusing attach: md-device too small, " "at least %llu sectors needed for this meta-disk type\n", (unsigned long long) min_md_device_sectors); @@ -1057,7 +1058,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp * (we may currently be R_PRIMARY with no local disk...) */ if (drbd_get_max_capacity(nbc) < drbd_get_capacity(mdev->this_bdev)) { - retcode = ERR_DISK_TO_SMALL; + retcode = ERR_DISK_TOO_SMALL; goto fail; } @@ -1138,7 +1139,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) { dev_warn(DEV, "refusing to truncate a consistent device\n"); - retcode = ERR_DISK_TO_SMALL; + retcode = ERR_DISK_TOO_SMALL; goto force_diskless_dec; } @@ -1336,17 +1337,34 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, { enum drbd_ret_code retcode; int ret; + struct detach dt = {}; + + if (!detach_from_tags(mdev, nlp->tag_list, &dt)) { + reply->ret_code = ERR_MANDATORY_TAG; + goto out; + } + + if (dt.detach_force) { + drbd_force_state(mdev, NS(disk, D_FAILED)); + reply->ret_code = SS_SUCCESS; + goto out; + } + drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ + drbd_md_get_buffer(mdev); /* make sure there is no in-flight meta-data IO */ retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); + drbd_md_put_buffer(mdev); /* D_FAILED will transition to DISKLESS. */ ret = wait_event_interruptible(mdev->misc_wait, mdev->state.disk != D_FAILED); drbd_resume_io(mdev); + if ((int)retcode == (int)SS_IS_DISKLESS) retcode = SS_NOTHING_TO_DO; if (ret) retcode = ERR_INTR; reply->ret_code = retcode; +out: return 0; } @@ -1711,7 +1729,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, if (rs.no_resync && mdev->agreed_pro_version < 93) { retcode = ERR_NEED_APV_93; - goto fail; + goto fail_ldev; } if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) @@ -1738,6 +1756,10 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, fail: reply->ret_code = retcode; return 0; + + fail_ldev: + put_ldev(mdev); + goto fail; } static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, @@ -1941,6 +1963,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl /* If there is still bitmap IO pending, probably because of a previous * resync just being finished, wait for it before requesting a new resync. */ + drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); @@ -1959,6 +1982,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); } + drbd_resume_io(mdev); reply->ret_code = retcode; return 0; @@ -1980,6 +2004,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re /* If there is still bitmap IO pending, probably because of a previous * resync just being finished, wait for it before requesting a new resync. */ + drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); @@ -1998,6 +2023,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re } else retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); } + drbd_resume_io(mdev); reply->ret_code = retcode; return 0; @@ -2170,11 +2196,13 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, /* If there is still bitmap IO pending, e.g. previous resync or verify * just being finished, wait for it before requesting a new resync. */ + drbd_suspend_io(mdev); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); /* w_make_ov_request expects position to be aligned */ mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); + drbd_resume_io(mdev); return 0; } diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 2959cdfb77f5..869bada2ed06 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -52,7 +52,7 @@ void seq_printf_with_thousands_grouping(struct seq_file *seq, long v) if (unlikely(v >= 1000000)) { /* cool: > GiByte/s */ seq_printf(seq, "%ld,", v / 1000000); - v /= 1000000; + v %= 1000000; seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000); } else if (likely(v >= 1000)) seq_printf(seq, "%ld,%03ld", v/1000, v % 1000); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 436f519bed1c..ea4836e0ae98 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -466,6 +466,7 @@ static int drbd_accept(struct drbd_conf *mdev, const char **what, goto out; } (*newsock)->ops = sock->ops; + __module_get((*newsock)->ops->owner); out: return err; @@ -750,6 +751,7 @@ static int drbd_connect(struct drbd_conf *mdev) { struct socket *s, *sock, *msock; int try, h, ok; + enum drbd_state_rv rv; D_ASSERT(!mdev->data.socket); @@ -888,25 +890,32 @@ retry: } } - if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) - return 0; - sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; atomic_set(&mdev->packet_seq, 0); mdev->peer_seq = 0; - drbd_thread_start(&mdev->asender); - if (drbd_send_protocol(mdev) == -1) return -1; + set_bit(STATE_SENT, &mdev->flags); drbd_send_sync_param(mdev, &mdev->sync_conf); drbd_send_sizes(mdev, 0, 0); drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_current_state(mdev); clear_bit(USE_DEGR_WFC_T, &mdev->flags); clear_bit(RESIZE_PENDING, &mdev->flags); + + spin_lock_irq(&mdev->req_lock); + rv = _drbd_set_state(_NS(mdev, conn, C_WF_REPORT_PARAMS), CS_VERBOSE, NULL); + if (mdev->state.conn != C_WF_REPORT_PARAMS) + clear_bit(STATE_SENT, &mdev->flags); + spin_unlock_irq(&mdev->req_lock); + + if (rv < SS_SUCCESS) + return 0; + + drbd_thread_start(&mdev->asender); mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ return 1; @@ -957,7 +966,7 @@ static void drbd_flush(struct drbd_conf *mdev) rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, NULL); if (rv) { - dev_err(DEV, "local disk flush failed with status %d\n", rv); + dev_info(DEV, "local disk flush failed with status %d\n", rv); /* would rather check on EOPNOTSUPP, but that is not reliable. * don't try again for ANY return value != 0 * if (rv == -EOPNOTSUPP) */ @@ -1001,13 +1010,14 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, if (epoch_size != 0 && atomic_read(&epoch->active) == 0 && - test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) { + (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) { if (!(ev & EV_CLEANUP)) { spin_unlock(&mdev->epoch_lock); drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size); spin_lock(&mdev->epoch_lock); } - dec_unacked(mdev); + if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) + dec_unacked(mdev); if (mdev->current_epoch != epoch) { next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); @@ -1096,7 +1106,11 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, /* In most cases, we will only need one bio. But in case the lower * level restrictions happen to be different at this offset on this * side than those of the sending peer, we may need to submit the - * request in more than one bio. */ + * request in more than one bio. + * + * Plain bio_alloc is good enough here, this is no DRBD internally + * generated bio, but a bio allocated on behalf of the peer. + */ next_bio: bio = bio_alloc(GFP_NOIO, nr_pages); if (!bio) { @@ -1583,6 +1597,24 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u return ok; } +static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_epoch_entry *data_e) +{ + + struct drbd_epoch_entry *rs_e; + bool rv = 0; + + spin_lock_irq(&mdev->req_lock); + list_for_each_entry(rs_e, &mdev->sync_ee, w.list) { + if (overlaps(data_e->sector, data_e->size, rs_e->sector, rs_e->size)) { + rv = 1; + break; + } + } + spin_unlock_irq(&mdev->req_lock); + + return rv; +} + /* Called from receive_Data. * Synchronize packets on sock with packets on msock. * @@ -1826,6 +1858,9 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned list_add(&e->w.list, &mdev->active_ee); spin_unlock_irq(&mdev->req_lock); + if (mdev->state.conn == C_SYNC_TARGET) + wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, e)); + switch (mdev->net_conf->wire_protocol) { case DRBD_PROT_C: inc_unacked(mdev); @@ -2420,7 +2455,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; - dev_info(DEV, "Did not got last syncUUID packet, corrected:\n"); + dev_info(DEV, "Lost last syncUUID packet, corrected:\n"); drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); return -1; @@ -2806,10 +2841,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi if (apv >= 88) { if (apv == 88) { - if (data_size > SHARED_SECRET_MAX) { - dev_err(DEV, "verify-alg too long, " - "peer wants %u, accepting only %u byte\n", - data_size, SHARED_SECRET_MAX); + if (data_size > SHARED_SECRET_MAX || data_size == 0) { + dev_err(DEV, "verify-alg of wrong size, " + "peer wants %u, accepting only up to %u byte\n", + data_size, SHARED_SECRET_MAX); return false; } @@ -3168,9 +3203,20 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned os = ns = mdev->state; spin_unlock_irq(&mdev->req_lock); - /* peer says his disk is uptodate, while we think it is inconsistent, - * and this happens while we think we have a sync going on. */ - if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE && + /* If some other part of the code (asender thread, timeout) + * already decided to close the connection again, + * we must not "re-establish" it here. */ + if (os.conn <= C_TEAR_DOWN) + return false; + + /* If this is the "end of sync" confirmation, usually the peer disk + * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits + * set) resync started in PausedSyncT, or if the timing of pause-/ + * unpause-sync events has been "just right", the peer disk may + * transition from D_CONSISTENT to D_UP_TO_DATE as well. + */ + if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) && + real_peer_disk == D_UP_TO_DATE && os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) { /* If we are (becoming) SyncSource, but peer is still in sync * preparation, ignore its uptodate-ness to avoid flapping, it @@ -3288,7 +3334,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned /* Nowadays only used when forcing a node into primary role and setting its disk to UpToDate with that */ drbd_send_uuids(mdev); - drbd_send_state(mdev); + drbd_send_current_state(mdev); } } @@ -3776,6 +3822,13 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (mdev->state.conn == C_STANDALONE) return; + /* We are about to start the cleanup after connection loss. + * Make sure drbd_make_request knows about that. + * Usually we should be in some network failure state already, + * but just in case we are not, we fix it up here. + */ + drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); + /* asender does not clean up anything. it must not interfere, either */ drbd_thread_stop(&mdev->asender); drbd_free_sock(mdev); @@ -3803,8 +3856,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) atomic_set(&mdev->rs_pending_cnt, 0); wake_up(&mdev->misc_wait); - del_timer(&mdev->request_timer); - /* make sure syncer is stopped and w_resume_next_sg queued */ del_timer_sync(&mdev->resync_timer); resync_timer_fn((unsigned long)mdev); @@ -4433,7 +4484,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) if (mdev->state.conn == C_AHEAD && atomic_read(&mdev->ap_in_flight) == 0 && - !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) { + !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) { mdev->start_resync_timer.expires = jiffies + HZ; add_timer(&mdev->start_resync_timer); } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 4a0f314086e5..9c5c84946b05 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -37,6 +37,7 @@ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req const int rw = bio_data_dir(bio); int cpu; cpu = part_stat_lock(); + part_round_stats(cpu, &mdev->vdisk->part0); part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]); part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio)); part_inc_in_flight(&mdev->vdisk->part0, rw); @@ -214,8 +215,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) { const unsigned long s = req->rq_state; struct drbd_conf *mdev = req->mdev; - /* only WRITES may end up here without a master bio (on barrier ack) */ - int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; + int rw = req->rq_state & RQ_WRITE ? WRITE : READ; /* we must not complete the master bio, while it is * still being processed by _drbd_send_zc_bio (drbd_send_dblock) @@ -230,7 +230,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) return; if (s & RQ_NET_PENDING) return; - if (s & RQ_LOCAL_PENDING) + if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED)) return; if (req->master_bio) { @@ -277,6 +277,9 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) req->master_bio = NULL; } + if (s & RQ_LOCAL_PENDING) + return; + if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) { /* this is disconnected (local only) operation, * or protocol C P_WRITE_ACK, @@ -429,7 +432,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case completed_ok: - if (bio_data_dir(req->master_bio) == WRITE) + if (req->rq_state & RQ_WRITE) mdev->writ_cnt += req->size>>9; else mdev->read_cnt += req->size>>9; @@ -438,7 +441,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~RQ_LOCAL_PENDING; _req_may_be_done_not_susp(req, m); - put_ldev(mdev); + break; + + case abort_disk_io: + req->rq_state |= RQ_LOCAL_ABORTED; + if (req->rq_state & RQ_WRITE) + _req_may_be_done_not_susp(req, m); + else + goto goto_queue_for_net_read; break; case write_completed_with_error: @@ -447,7 +457,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, __drbd_chk_io_error(mdev, false); _req_may_be_done_not_susp(req, m); - put_ldev(mdev); break; case read_ahead_completed_with_error: @@ -455,7 +464,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; _req_may_be_done_not_susp(req, m); - put_ldev(mdev); break; case read_completed_with_error: @@ -467,7 +475,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(!(req->rq_state & RQ_NET_MASK)); __drbd_chk_io_error(mdev, false); - put_ldev(mdev); + + goto_queue_for_net_read: /* no point in retrying if there is no good remote data, * or we have no connection. */ @@ -556,10 +565,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_queue_work(&mdev->data.work, &req->w); break; - case oos_handed_to_network: - /* actually the same */ + case read_retry_remote_canceled: case send_canceled: - /* treat it the same */ case send_failed: /* real cleanup will be done from tl_clear. just update flags * so it is no longer marked as on the worker queue */ @@ -589,17 +596,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, } req->rq_state &= ~RQ_NET_QUEUED; req->rq_state |= RQ_NET_SENT; - /* because _drbd_send_zc_bio could sleep, and may want to - * dereference the bio even after the "write_acked_by_peer" and - * "completed_ok" events came in, once we return from - * _drbd_send_zc_bio (drbd_send_dblock), we have to check - * whether it is done already, and end it. */ _req_may_be_done_not_susp(req, m); break; - case read_retry_remote_canceled: + case oos_handed_to_network: + /* Was not set PENDING, no longer QUEUED, so is now DONE + * as far as this connection is concerned. */ req->rq_state &= ~RQ_NET_QUEUED; - /* fall through, in case we raced with drbd_disconnect */ + req->rq_state |= RQ_NET_DONE; + _req_may_be_done_not_susp(req, m); + break; + case connection_lost_while_pending: /* transfer log cleanup after connection loss */ /* assert something? */ @@ -616,8 +623,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done(req, m); /* Allowed while state.susp */ break; - case write_acked_by_peer_and_sis: - req->rq_state |= RQ_NET_SIS; case conflict_discarded_by_peer: /* for discarded conflicting writes of multiple primaries, * there is no need to keep anything in the tl, potential @@ -628,18 +633,15 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, (unsigned long long)req->sector, req->size); req->rq_state |= RQ_NET_DONE; /* fall through */ + case write_acked_by_peer_and_sis: case write_acked_by_peer: + if (what == write_acked_by_peer_and_sis) + req->rq_state |= RQ_NET_SIS; /* protocol C; successfully written on peer. - * Nothing to do here. + * Nothing more to do here. * We want to keep the tl in place for all protocols, to cater - * for volatile write-back caches on lower level devices. - * - * A barrier request is expected to have forced all prior - * requests onto stable storage, so completion of a barrier - * request could set NET_DONE right here, and not wait for the - * P_BARRIER_ACK, but that is an unnecessary optimization. */ + * for volatile write-back caches on lower level devices. */ - /* this makes it effectively the same as for: */ case recv_acked_by_peer: /* protocol B; pretends to be successfully written on peer. * see also notes above in handed_over_to_network about @@ -773,6 +775,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns int local, remote, send_oos = 0; int err = -EIO; int ret = 0; + union drbd_state s; /* allocate outside of all locks; */ req = drbd_req_new(mdev, bio); @@ -834,8 +837,9 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns drbd_al_begin_io(mdev, sector); } - remote = remote && drbd_should_do_remote(mdev->state); - send_oos = rw == WRITE && drbd_should_send_oos(mdev->state); + s = mdev->state; + remote = remote && drbd_should_do_remote(s); + send_oos = rw == WRITE && drbd_should_send_oos(s); D_ASSERT(!(remote && send_oos)); if (!(local || remote) && !is_susp(mdev->state)) { @@ -867,7 +871,7 @@ allocate_barrier: if (is_susp(mdev->state)) { /* If we got suspended, use the retry mechanism of - generic_make_request() to restart processing of this + drbd_make_request() to restart processing of this bio. In the next call to drbd_make_request we sleep in inc_ap_bio() */ ret = 1; @@ -1091,7 +1095,6 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) */ D_ASSERT(bio->bi_size > 0); D_ASSERT((bio->bi_size & 0x1ff) == 0); - D_ASSERT(bio->bi_idx == 0); /* to make some things easier, force alignment of requests within the * granularity of our hash tables */ @@ -1099,8 +1102,9 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT; if (likely(s_enr == e_enr)) { - inc_ap_bio(mdev, 1); - drbd_make_request_common(mdev, bio, start_time); + do { + inc_ap_bio(mdev, 1); + } while (drbd_make_request_common(mdev, bio, start_time)); return; } @@ -1196,36 +1200,66 @@ void request_timer_fn(unsigned long data) struct drbd_conf *mdev = (struct drbd_conf *) data; struct drbd_request *req; /* oldest request */ struct list_head *le; - unsigned long et = 0; /* effective timeout = ko_count * timeout */ + unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ + unsigned long now; if (get_net_conf(mdev)) { - et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; + if (mdev->state.conn >= C_WF_REPORT_PARAMS) + ent = mdev->net_conf->timeout*HZ/10 + * mdev->net_conf->ko_count; put_net_conf(mdev); } - if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) + if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */ + dt = mdev->ldev->dc.disk_timeout * HZ / 10; + put_ldev(mdev); + } + et = min_not_zero(dt, ent); + + if (!et) return; /* Recurring timer stopped */ + now = jiffies; + spin_lock_irq(&mdev->req_lock); le = &mdev->oldest_tle->requests; if (list_empty(le)) { spin_unlock_irq(&mdev->req_lock); - mod_timer(&mdev->request_timer, jiffies + et); + mod_timer(&mdev->request_timer, now + et); return; } le = le->prev; req = list_entry(le, struct drbd_request, tl_requests); - if (time_is_before_eq_jiffies(req->start_time + et)) { - if (req->rq_state & RQ_NET_PENDING) { - dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); - _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); - } else { - dev_warn(DEV, "Local backing block device frozen?\n"); - mod_timer(&mdev->request_timer, jiffies + et); - } - } else { - mod_timer(&mdev->request_timer, req->start_time + et); - } + /* The request is considered timed out, if + * - we have some effective timeout from the configuration, + * with above state restrictions applied, + * - the oldest request is waiting for a response from the network + * resp. the local disk, + * - the oldest request is in fact older than the effective timeout, + * - the connection was established (resp. disk was attached) + * for longer than the timeout already. + * Note that for 32bit jiffies and very stable connections/disks, + * we may have a wrap around, which is catched by + * !time_in_range(now, last_..._jif, last_..._jif + timeout). + * + * Side effect: once per 32bit wrap-around interval, which means every + * ~198 days with 250 HZ, we have a window where the timeout would need + * to expire twice (worst case) to become effective. Good enough. + */ + if (ent && req->rq_state & RQ_NET_PENDING && + time_after(now, req->start_time + ent) && + !time_in_range(now, mdev->last_reconnect_jif, mdev->last_reconnect_jif + ent)) { + dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); + _drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); + } + if (dt && req->rq_state & RQ_LOCAL_PENDING && + time_after(now, req->start_time + dt) && + !time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) { + dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); + __drbd_chk_io_error(mdev, 1); + } + nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et; spin_unlock_irq(&mdev->req_lock); + mod_timer(&mdev->request_timer, nt); } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 68a234a5fdc5..3d2111919486 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -105,6 +105,7 @@ enum drbd_req_event { read_completed_with_error, read_ahead_completed_with_error, write_completed_with_error, + abort_disk_io, completed_ok, resend, fail_frozen_disk_io, @@ -118,18 +119,21 @@ enum drbd_req_event { * same time, so we should hold the request lock anyways. */ enum drbd_req_state_bits { - /* 210 - * 000: no local possible - * 001: to be submitted + /* 3210 + * 0000: no local possible + * 0001: to be submitted * UNUSED, we could map: 011: submitted, completion still pending - * 110: completed ok - * 010: completed with error + * 0110: completed ok + * 0010: completed with error + * 1001: Aborted (before completion) + * 1x10: Aborted and completed -> free */ __RQ_LOCAL_PENDING, __RQ_LOCAL_COMPLETED, __RQ_LOCAL_OK, + __RQ_LOCAL_ABORTED, - /* 76543 + /* 87654 * 00000: no network possible * 00001: to be send * 00011: to be send, on worker queue @@ -199,8 +203,9 @@ enum drbd_req_state_bits { #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) +#define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED) -#define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ +#define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1) #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 4d3e6f6213ba..620c70ff2231 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -70,11 +70,29 @@ rwlock_t global_state_lock; void drbd_md_io_complete(struct bio *bio, int error) { struct drbd_md_io *md_io; + struct drbd_conf *mdev; md_io = (struct drbd_md_io *)bio->bi_private; + mdev = container_of(md_io, struct drbd_conf, md_io); + md_io->error = error; - complete(&md_io->event); + /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able + * to timeout on the lower level device, and eventually detach from it. + * If this io completion runs after that timeout expired, this + * drbd_md_put_buffer() may allow us to finally try and re-attach. + * During normal operation, this only puts that extra reference + * down to 1 again. + * Make sure we first drop the reference, and only then signal + * completion, or we may (in drbd_al_read_log()) cycle so fast into the + * next drbd_md_sync_page_io(), that we trigger the + * ASSERT(atomic_read(&mdev->md_io_in_use) == 1) there. + */ + drbd_md_put_buffer(mdev); + md_io->done = 1; + wake_up(&mdev->misc_wait); + bio_put(bio); + put_ldev(mdev); } /* reads on behalf of the partner, @@ -226,6 +244,7 @@ void drbd_endio_pri(struct bio *bio, int error) spin_lock_irqsave(&mdev->req_lock, flags); __req_mod(req, what, &m); spin_unlock_irqrestore(&mdev->req_lock, flags); + put_ldev(mdev); if (m.bio) complete_master_bio(mdev, &m); @@ -290,7 +309,7 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * sg_init_table(&sg, 1); crypto_hash_init(&desc); - __bio_for_each_segment(bvec, bio, i, 0) { + bio_for_each_segment(bvec, bio, i) { sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset); crypto_hash_update(&desc, &sg, sg.length); } @@ -728,7 +747,7 @@ int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } drbd_start_resync(mdev, C_SYNC_SOURCE); - clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); + clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags); return 1; } @@ -1519,14 +1538,14 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) } drbd_state_lock(mdev); - + write_lock_irq(&global_state_lock); if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { + write_unlock_irq(&global_state_lock); drbd_state_unlock(mdev); return; } - write_lock_irq(&global_state_lock); - ns = mdev->state; + ns.i = mdev->state.i; ns.aftr_isp = !_drbd_may_sync_now(mdev); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index b0b00d70c166..cce7df367b79 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -551,7 +551,7 @@ static void floppy_ready(void); static void floppy_start(void); static void process_fd_request(void); static void recalibrate_floppy(void); -static void floppy_shutdown(unsigned long); +static void floppy_shutdown(struct work_struct *); static int floppy_request_regions(int); static void floppy_release_regions(int); @@ -588,6 +588,8 @@ static int buffer_max = -1; static struct floppy_fdc_state fdc_state[N_FDC]; static int fdc; /* current fdc */ +static struct workqueue_struct *floppy_wq; + static struct floppy_struct *_floppy = floppy_type; static unsigned char current_drive; static long current_count_sectors; @@ -629,16 +631,15 @@ static inline void set_debugt(void) { } static inline void debugt(const char *func, const char *msg) { } #endif /* DEBUGT */ -typedef void (*timeout_fn)(unsigned long); -static DEFINE_TIMER(fd_timeout, floppy_shutdown, 0, 0); +static DECLARE_DELAYED_WORK(fd_timeout, floppy_shutdown); static const char *timeout_message; static void is_alive(const char *func, const char *message) { /* this routine checks whether the floppy driver is "alive" */ if (test_bit(0, &fdc_busy) && command_status < 2 && - !timer_pending(&fd_timeout)) { + !delayed_work_pending(&fd_timeout)) { DPRINT("%s: timeout handler died. %s\n", func, message); } } @@ -666,15 +667,18 @@ static int output_log_pos; static void __reschedule_timeout(int drive, const char *message) { + unsigned long delay; + if (drive == current_reqD) drive = current_drive; - del_timer(&fd_timeout); + if (drive < 0 || drive >= N_DRIVE) { - fd_timeout.expires = jiffies + 20UL * HZ; + delay = 20UL * HZ; drive = 0; } else - fd_timeout.expires = jiffies + UDP->timeout; - add_timer(&fd_timeout); + delay = UDP->timeout; + + queue_delayed_work(floppy_wq, &fd_timeout, delay); if (UDP->flags & FD_DEBUG) DPRINT("reschedule timeout %s\n", message); timeout_message = message; @@ -872,7 +876,7 @@ static int lock_fdc(int drive, bool interruptible) command_status = FD_COMMAND_NONE; - __reschedule_timeout(drive, "lock fdc"); + reschedule_timeout(drive, "lock fdc"); set_fdc(drive); return 0; } @@ -880,23 +884,15 @@ static int lock_fdc(int drive, bool interruptible) /* unlocks the driver */ static void unlock_fdc(void) { - unsigned long flags; - - raw_cmd = NULL; if (!test_bit(0, &fdc_busy)) DPRINT("FDC access conflict!\n"); - if (do_floppy) - DPRINT("device interrupt still active at FDC release: %pf!\n", - do_floppy); + raw_cmd = NULL; command_status = FD_COMMAND_NONE; - spin_lock_irqsave(&floppy_lock, flags); - del_timer(&fd_timeout); + __cancel_delayed_work(&fd_timeout); + do_floppy = NULL; cont = NULL; clear_bit(0, &fdc_busy); - if (current_req || set_next_request()) - do_fd_request(current_req->q); - spin_unlock_irqrestore(&floppy_lock, flags); wake_up(&fdc_wait); } @@ -968,26 +964,24 @@ static DECLARE_WORK(floppy_work, NULL); static void schedule_bh(void (*handler)(void)) { + WARN_ON(work_pending(&floppy_work)); + PREPARE_WORK(&floppy_work, (work_func_t)handler); - schedule_work(&floppy_work); + queue_work(floppy_wq, &floppy_work); } -static DEFINE_TIMER(fd_timer, NULL, 0, 0); +static DECLARE_DELAYED_WORK(fd_timer, NULL); static void cancel_activity(void) { - unsigned long flags; - - spin_lock_irqsave(&floppy_lock, flags); do_floppy = NULL; - PREPARE_WORK(&floppy_work, (work_func_t)empty); - del_timer(&fd_timer); - spin_unlock_irqrestore(&floppy_lock, flags); + cancel_delayed_work_sync(&fd_timer); + cancel_work_sync(&floppy_work); } /* this function makes sure that the disk stays in the drive during the * transfer */ -static void fd_watchdog(void) +static void fd_watchdog(struct work_struct *arg) { debug_dcl(DP->flags, "calling disk change from watchdog\n"); @@ -997,21 +991,20 @@ static void fd_watchdog(void) cont->done(0); reset_fdc(); } else { - del_timer(&fd_timer); - fd_timer.function = (timeout_fn)fd_watchdog; - fd_timer.expires = jiffies + HZ / 10; - add_timer(&fd_timer); + cancel_delayed_work(&fd_timer); + PREPARE_DELAYED_WORK(&fd_timer, fd_watchdog); + queue_delayed_work(floppy_wq, &fd_timer, HZ / 10); } } static void main_command_interrupt(void) { - del_timer(&fd_timer); + cancel_delayed_work(&fd_timer); cont->interrupt(); } /* waits for a delay (spinup or select) to pass */ -static int fd_wait_for_completion(unsigned long delay, timeout_fn function) +static int fd_wait_for_completion(unsigned long expires, work_func_t function) { if (FDCS->reset) { reset_fdc(); /* do the reset during sleep to win time @@ -1020,11 +1013,10 @@ static int fd_wait_for_completion(unsigned long delay, timeout_fn function) return 1; } - if (time_before(jiffies, delay)) { - del_timer(&fd_timer); - fd_timer.function = function; - fd_timer.expires = delay; - add_timer(&fd_timer); + if (time_before(jiffies, expires)) { + cancel_delayed_work(&fd_timer); + PREPARE_DELAYED_WORK(&fd_timer, function); + queue_delayed_work(floppy_wq, &fd_timer, expires - jiffies); return 1; } return 0; @@ -1342,7 +1334,7 @@ static int fdc_dtr(void) */ FDCS->dtr = raw_cmd->rate & 3; return fd_wait_for_completion(jiffies + 2UL * HZ / 100, - (timeout_fn)floppy_ready); + (work_func_t)floppy_ready); } /* fdc_dtr */ static void tell_sector(void) @@ -1447,7 +1439,7 @@ static void setup_rw_floppy(void) int flags; int dflags; unsigned long ready_date; - timeout_fn function; + work_func_t function; flags = raw_cmd->flags; if (flags & (FD_RAW_READ | FD_RAW_WRITE)) @@ -1461,9 +1453,9 @@ static void setup_rw_floppy(void) */ if (time_after(ready_date, jiffies + DP->select_delay)) { ready_date -= DP->select_delay; - function = (timeout_fn)floppy_start; + function = (work_func_t)floppy_start; } else - function = (timeout_fn)setup_rw_floppy; + function = (work_func_t)setup_rw_floppy; /* wait until the floppy is spinning fast enough */ if (fd_wait_for_completion(ready_date, function)) @@ -1493,7 +1485,7 @@ static void setup_rw_floppy(void) inr = result(); cont->interrupt(); } else if (flags & FD_RAW_NEED_DISK) - fd_watchdog(); + fd_watchdog(NULL); } static int blind_seek; @@ -1802,20 +1794,22 @@ static void show_floppy(void) pr_info("do_floppy=%pf\n", do_floppy); if (work_pending(&floppy_work)) pr_info("floppy_work.func=%pf\n", floppy_work.func); - if (timer_pending(&fd_timer)) - pr_info("fd_timer.function=%pf\n", fd_timer.function); - if (timer_pending(&fd_timeout)) { - pr_info("timer_function=%pf\n", fd_timeout.function); - pr_info("expires=%lu\n", fd_timeout.expires - jiffies); - pr_info("now=%lu\n", jiffies); - } + if (delayed_work_pending(&fd_timer)) + pr_info("delayed work.function=%p expires=%ld\n", + fd_timer.work.func, + fd_timer.timer.expires - jiffies); + if (delayed_work_pending(&fd_timeout)) + pr_info("timer_function=%p expires=%ld\n", + fd_timeout.work.func, + fd_timeout.timer.expires - jiffies); + pr_info("cont=%p\n", cont); pr_info("current_req=%p\n", current_req); pr_info("command_status=%d\n", command_status); pr_info("\n"); } -static void floppy_shutdown(unsigned long data) +static void floppy_shutdown(struct work_struct *arg) { unsigned long flags; @@ -1868,7 +1862,7 @@ static int start_motor(void (*function)(void)) /* wait_for_completion also schedules reset if needed. */ return fd_wait_for_completion(DRS->select_date + DP->select_delay, - (timeout_fn)function); + (work_func_t)function); } static void floppy_ready(void) @@ -2821,7 +2815,6 @@ do_request: spin_lock_irq(&floppy_lock); pending = set_next_request(); spin_unlock_irq(&floppy_lock); - if (!pending) { do_floppy = NULL; unlock_fdc(); @@ -2898,13 +2891,15 @@ static void do_fd_request(struct request_queue *q) current_req->cmd_flags)) return; - if (test_bit(0, &fdc_busy)) { + if (test_and_set_bit(0, &fdc_busy)) { /* fdc busy, this new request will be treated when the current one is done */ is_alive(__func__, "old request running"); return; } - lock_fdc(MAXTIMEOUT, false); + command_status = FD_COMMAND_NONE; + __reschedule_timeout(MAXTIMEOUT, "fd_request"); + set_fdc(0); process_fd_request(); is_alive(__func__, ""); } @@ -3612,9 +3607,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode) mutex_lock(&floppy_mutex); mutex_lock(&open_lock); - if (UDRS->fd_ref < 0) - UDRS->fd_ref = 0; - else if (!UDRS->fd_ref--) { + if (!UDRS->fd_ref--) { DPRINT("floppy_release with fd_ref == 0"); UDRS->fd_ref = 0; } @@ -3650,13 +3643,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) set_bit(FD_VERIFY_BIT, &UDRS->flags); } - if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (mode & FMODE_EXCL))) - goto out2; - - if (mode & FMODE_EXCL) - UDRS->fd_ref = -1; - else - UDRS->fd_ref++; + UDRS->fd_ref++; opened_bdev[drive] = bdev; @@ -3719,10 +3706,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) mutex_unlock(&floppy_mutex); return 0; out: - if (UDRS->fd_ref < 0) - UDRS->fd_ref = 0; - else - UDRS->fd_ref--; + UDRS->fd_ref--; + if (!UDRS->fd_ref) opened_bdev[drive] = NULL; out2: @@ -4159,10 +4144,16 @@ static int __init floppy_init(void) goto out_put_disk; } + floppy_wq = alloc_ordered_workqueue("floppy", 0); + if (!floppy_wq) { + err = -ENOMEM; + goto out_put_disk; + } + disks[dr]->queue = blk_init_queue(do_fd_request, &floppy_lock); if (!disks[dr]->queue) { err = -ENOMEM; - goto out_put_disk; + goto out_destroy_workq; } blk_queue_max_hw_sectors(disks[dr]->queue, 64); @@ -4213,7 +4204,7 @@ static int __init floppy_init(void) use_virtual_dma = can_use_virtual_dma & 1; fdc_state[0].address = FDC1; if (fdc_state[0].address == -1) { - del_timer_sync(&fd_timeout); + cancel_delayed_work(&fd_timeout); err = -ENODEV; goto out_unreg_region; } @@ -4224,7 +4215,7 @@ static int __init floppy_init(void) fdc = 0; /* reset fdc in case of unexpected interrupt */ err = floppy_grab_irq_and_dma(); if (err) { - del_timer_sync(&fd_timeout); + cancel_delayed_work(&fd_timeout); err = -EBUSY; goto out_unreg_region; } @@ -4281,13 +4272,13 @@ static int __init floppy_init(void) user_reset_fdc(-1, FD_RESET_ALWAYS, false); } fdc = 0; - del_timer_sync(&fd_timeout); + cancel_delayed_work(&fd_timeout); current_drive = 0; initialized = true; if (have_no_fdc) { DPRINT("no floppy controllers found\n"); err = have_no_fdc; - goto out_flush_work; + goto out_release_dma; } for (drive = 0; drive < N_DRIVE; drive++) { @@ -4302,7 +4293,7 @@ static int __init floppy_init(void) err = platform_device_register(&floppy_device[drive]); if (err) - goto out_flush_work; + goto out_release_dma; err = device_create_file(&floppy_device[drive].dev, &dev_attr_cmos); @@ -4320,13 +4311,14 @@ static int __init floppy_init(void) out_unreg_platform_dev: platform_device_unregister(&floppy_device[drive]); -out_flush_work: - flush_work_sync(&floppy_work); +out_release_dma: if (atomic_read(&usage_count)) floppy_release_irq_and_dma(); out_unreg_region: blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); platform_driver_unregister(&floppy_driver); +out_destroy_workq: + destroy_workqueue(floppy_wq); out_unreg_blkdev: unregister_blkdev(FLOPPY_MAJOR, "fd"); out_put_disk: @@ -4397,7 +4389,7 @@ static int floppy_grab_irq_and_dma(void) * We might have scheduled a free_irq(), wait it to * drain first: */ - flush_work_sync(&floppy_work); + flush_workqueue(floppy_wq); if (fd_request_irq()) { DPRINT("Unable to grab IRQ%d for the floppy driver\n", @@ -4488,9 +4480,9 @@ static void floppy_release_irq_and_dma(void) pr_info("motor off timer %d still active\n", drive); #endif - if (timer_pending(&fd_timeout)) + if (delayed_work_pending(&fd_timeout)) pr_info("floppy timer still active:%s\n", timeout_message); - if (timer_pending(&fd_timer)) + if (delayed_work_pending(&fd_timer)) pr_info("auxiliary floppy timer still active\n"); if (work_pending(&floppy_work)) pr_info("work still pending\n"); @@ -4560,8 +4552,9 @@ static void __exit floppy_module_exit(void) put_disk(disks[drive]); } - del_timer_sync(&fd_timeout); - del_timer_sync(&fd_timer); + cancel_delayed_work_sync(&fd_timeout); + cancel_delayed_work_sync(&fd_timer); + destroy_workqueue(floppy_wq); if (atomic_read(&usage_count)) floppy_release_irq_and_dma(); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 4e86393a09cf..60eed4bdd2e4 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -526,6 +526,14 @@ static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) return 0; } +static char *encode_disk_name(char *ptr, unsigned int n) +{ + if (n >= 26) + ptr = encode_disk_name(ptr, n / 26 - 1); + *ptr = 'a' + n % 26; + return ptr + 1; +} + static int xlvbd_alloc_gendisk(blkif_sector_t capacity, struct blkfront_info *info, u16 vdisk_info, u16 sector_size) @@ -536,6 +544,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, unsigned int offset; int minor; int nr_parts; + char *ptr; BUG_ON(info->gd != NULL); BUG_ON(info->rq != NULL); @@ -560,7 +569,11 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, "emulated IDE disks,\n\t choose an xvd device name" "from xvde on\n", info->vdevice); } - err = -ENODEV; + if (minor >> MINORBITS) { + pr_warn("blkfront: %#x's minor (%#x) out of range; ignoring\n", + info->vdevice, minor); + return -ENODEV; + } if ((minor % nr_parts) == 0) nr_minors = nr_parts; @@ -574,23 +587,14 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, if (gd == NULL) goto release; - if (nr_minors > 1) { - if (offset < 26) - sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); - else - sprintf(gd->disk_name, "%s%c%c", DEV_NAME, - 'a' + ((offset / 26)-1), 'a' + (offset % 26)); - } else { - if (offset < 26) - sprintf(gd->disk_name, "%s%c%d", DEV_NAME, - 'a' + offset, - minor & (nr_parts - 1)); - else - sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME, - 'a' + ((offset / 26) - 1), - 'a' + (offset % 26), - minor & (nr_parts - 1)); - } + strcpy(gd->disk_name, DEV_NAME); + ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset); + BUG_ON(ptr >= gd->disk_name + DISK_NAME_LEN); + if (nr_minors > 1) + *ptr = 0; + else + snprintf(ptr, gd->disk_name + DISK_NAME_LEN - ptr, + "%d", minor & (nr_parts - 1)); gd->major = XENVBD_MAJOR; gd->first_minor = minor; @@ -1496,7 +1500,9 @@ module_init(xlblk_init); static void __exit xlblk_exit(void) { - return xenbus_unregister_driver(&blkfront_driver); + xenbus_unregister_driver(&blkfront_driver); + unregister_blkdev(XENVBD_MAJOR, DEV_NAME); + kfree(minors); } module_exit(xlblk_exit); diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 92cea9d77ec9..08a7aa722d6b 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -2116,7 +2116,7 @@ out: return ret; } -static int format_check(struct drm_mode_fb_cmd2 *r) +static int format_check(const struct drm_mode_fb_cmd2 *r) { uint32_t format = r->pixel_format & ~DRM_FORMAT_BIG_ENDIAN; @@ -2185,7 +2185,7 @@ static int format_check(struct drm_mode_fb_cmd2 *r) } } -static int framebuffer_check(struct drm_mode_fb_cmd2 *r) +static int framebuffer_check(const struct drm_mode_fb_cmd2 *r) { int ret, hsub, vsub, num_planes, i; @@ -3126,7 +3126,7 @@ int drm_mode_connector_update_edid_property(struct drm_connector *connector, EXPORT_SYMBOL(drm_mode_connector_update_edid_property); static bool drm_property_change_is_valid(struct drm_property *property, - __u64 value) + uint64_t value) { if (property->flags & DRM_MODE_PROP_IMMUTABLE) return false; @@ -3136,7 +3136,7 @@ static bool drm_property_change_is_valid(struct drm_property *property, return true; } else if (property->flags & DRM_MODE_PROP_BITMASK) { int i; - __u64 valid_mask = 0; + uint64_t valid_mask = 0; for (i = 0; i < property->num_values; i++) valid_mask |= (1ULL << property->values[i]); return !(value & ~valid_mask); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 608bddfc7e35..c3b5139eba7f 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -66,6 +66,8 @@ #define EDID_QUIRK_FIRST_DETAILED_PREFERRED (1 << 5) /* use +hsync +vsync for detailed mode */ #define EDID_QUIRK_DETAILED_SYNC_PP (1 << 6) +/* Force reduced-blanking timings for detailed modes */ +#define EDID_QUIRK_FORCE_REDUCED_BLANKING (1 << 7) struct detailed_mode_closure { struct drm_connector *connector; @@ -120,6 +122,9 @@ static struct edid_quirk { /* Samsung SyncMaster 22[5-6]BW */ { "SAM", 596, EDID_QUIRK_PREFER_LARGE_60 }, { "SAM", 638, EDID_QUIRK_PREFER_LARGE_60 }, + + /* ViewSonic VA2026w */ + { "VSC", 5020, EDID_QUIRK_FORCE_REDUCED_BLANKING }, }; /*** DDC fetch and block validation ***/ @@ -885,12 +890,19 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev, "Wrong Hsync/Vsync pulse width\n"); return NULL; } + + if (quirks & EDID_QUIRK_FORCE_REDUCED_BLANKING) { + mode = drm_cvt_mode(dev, hactive, vactive, 60, true, false, false); + if (!mode) + return NULL; + + goto set_size; + } + mode = drm_mode_create(dev); if (!mode) return NULL; - mode->type = DRM_MODE_TYPE_DRIVER; - if (quirks & EDID_QUIRK_135_CLOCK_TOO_HIGH) timing->pixel_clock = cpu_to_le16(1088); @@ -914,8 +926,6 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev, drm_mode_do_interlace_quirk(mode, pt); - drm_mode_set_name(mode); - if (quirks & EDID_QUIRK_DETAILED_SYNC_PP) { pt->misc |= DRM_EDID_PT_HSYNC_POSITIVE | DRM_EDID_PT_VSYNC_POSITIVE; } @@ -925,6 +935,7 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev, mode->flags |= (pt->misc & DRM_EDID_PT_VSYNC_POSITIVE) ? DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC; +set_size: mode->width_mm = pt->width_mm_lo | (pt->width_height_mm_hi & 0xf0) << 4; mode->height_mm = pt->height_mm_lo | (pt->width_height_mm_hi & 0xf) << 8; @@ -938,6 +949,9 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev, mode->height_mm = edid->height_cm * 10; } + mode->type = DRM_MODE_TYPE_DRIVER; + drm_mode_set_name(mode); + return mode; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index eb2b3c25b9e1..5363e9c66c27 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2032,6 +2032,8 @@ void i915_debugfs_cleanup(struct drm_minor *minor) 1, minor); drm_debugfs_remove_files((struct drm_info_list *) &i915_ring_stop_fops, 1, minor); + drm_debugfs_remove_files((struct drm_info_list *) &i915_error_state_fops, + 1, minor); } #endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c1e5c66553df..288d7b8f49ae 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2063,10 +2063,8 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) if (obj->gtt_space == NULL) return 0; - if (obj->pin_count != 0) { - DRM_ERROR("Attempting to unbind pinned buffer\n"); - return -EINVAL; - } + if (obj->pin_count) + return -EBUSY; ret = i915_gem_object_finish_gpu(obj); if (ret) @@ -3293,6 +3291,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; struct address_space *mapping; + u32 mask; obj = kzalloc(sizeof(*obj), GFP_KERNEL); if (obj == NULL) @@ -3303,8 +3302,15 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, return NULL; } + mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; + if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { + /* 965gm cannot relocate objects above 4GiB. */ + mask &= ~__GFP_HIGHMEM; + mask |= __GFP_DMA32; + } + mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; - mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE); + mapping_set_gfp_mask(mapping, mask); i915_gem_info_add_obj(dev_priv, size); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index cc4a63307611..1417660a93ec 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -350,8 +350,8 @@ static void gen6_pm_rps_work(struct work_struct *work) { drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, rps_work); - u8 new_delay = dev_priv->cur_delay; u32 pm_iir, pm_imr; + u8 new_delay; spin_lock_irq(&dev_priv->rps_lock); pm_iir = dev_priv->pm_iir; @@ -360,41 +360,18 @@ static void gen6_pm_rps_work(struct work_struct *work) I915_WRITE(GEN6_PMIMR, 0); spin_unlock_irq(&dev_priv->rps_lock); - if (!pm_iir) + if ((pm_iir & GEN6_PM_DEFERRED_EVENTS) == 0) return; mutex_lock(&dev_priv->dev->struct_mutex); - if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { - if (dev_priv->cur_delay != dev_priv->max_delay) - new_delay = dev_priv->cur_delay + 1; - if (new_delay > dev_priv->max_delay) - new_delay = dev_priv->max_delay; - } else if (pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT)) { - gen6_gt_force_wake_get(dev_priv); - if (dev_priv->cur_delay != dev_priv->min_delay) - new_delay = dev_priv->cur_delay - 1; - if (new_delay < dev_priv->min_delay) { - new_delay = dev_priv->min_delay; - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - I915_READ(GEN6_RP_INTERRUPT_LIMITS) | - ((new_delay << 16) & 0x3f0000)); - } else { - /* Make sure we continue to get down interrupts - * until we hit the minimum frequency */ - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - I915_READ(GEN6_RP_INTERRUPT_LIMITS) & ~0x3f0000); - } - gen6_gt_force_wake_put(dev_priv); - } + + if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) + new_delay = dev_priv->cur_delay + 1; + else + new_delay = dev_priv->cur_delay - 1; gen6_set_rps(dev_priv->dev, new_delay); - dev_priv->cur_delay = new_delay; - /* - * rps_lock not held here because clearing is non-destructive. There is - * an *extremely* unlikely race with gen6_rps_enable() that is prevented - * by holding struct_mutex for the duration of the write. - */ mutex_unlock(&dev_priv->dev->struct_mutex); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ee61ad1e642b..914789420906 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -910,9 +910,10 @@ static void assert_pll(struct drm_i915_private *dev_priv, /* For ILK+ */ static void assert_pch_pll(struct drm_i915_private *dev_priv, - struct intel_crtc *intel_crtc, bool state) + struct intel_pch_pll *pll, + struct intel_crtc *crtc, + bool state) { - int reg; u32 val; bool cur_state; @@ -921,30 +922,37 @@ static void assert_pch_pll(struct drm_i915_private *dev_priv, return; } - if (!intel_crtc->pch_pll) { - WARN(1, "asserting PCH PLL enabled with no PLL\n"); + if (WARN (!pll, + "asserting PCH PLL %s with no PLL\n", state_string(state))) return; - } - if (HAS_PCH_CPT(dev_priv->dev)) { + val = I915_READ(pll->pll_reg); + cur_state = !!(val & DPLL_VCO_ENABLE); + WARN(cur_state != state, + "PCH PLL state for reg %x assertion failure (expected %s, current %s), val=%08x\n", + pll->pll_reg, state_string(state), state_string(cur_state), val); + + /* Make sure the selected PLL is correctly attached to the transcoder */ + if (crtc && HAS_PCH_CPT(dev_priv->dev)) { u32 pch_dpll; pch_dpll = I915_READ(PCH_DPLL_SEL); - - /* Make sure the selected PLL is enabled to the transcoder */ - WARN(!((pch_dpll >> (4 * intel_crtc->pipe)) & 8), - "transcoder %d PLL not enabled\n", intel_crtc->pipe); + cur_state = pll->pll_reg == _PCH_DPLL_B; + if (!WARN(((pch_dpll >> (4 * crtc->pipe)) & 1) != cur_state, + "PLL[%d] not attached to this transcoder %d: %08x\n", + cur_state, crtc->pipe, pch_dpll)) { + cur_state = !!(val >> (4*crtc->pipe + 3)); + WARN(cur_state != state, + "PLL[%d] not %s on this transcoder %d: %08x\n", + pll->pll_reg == _PCH_DPLL_B, + state_string(state), + crtc->pipe, + val); + } } - - reg = intel_crtc->pch_pll->pll_reg; - val = I915_READ(reg); - cur_state = !!(val & DPLL_VCO_ENABLE); - WARN(cur_state != state, - "PCH PLL state assertion failure (expected %s, current %s)\n", - state_string(state), state_string(cur_state)); } -#define assert_pch_pll_enabled(d, p) assert_pch_pll(d, p, true) -#define assert_pch_pll_disabled(d, p) assert_pch_pll(d, p, false) +#define assert_pch_pll_enabled(d, p, c) assert_pch_pll(d, p, c, true) +#define assert_pch_pll_disabled(d, p, c) assert_pch_pll(d, p, c, false) static void assert_fdi_tx(struct drm_i915_private *dev_priv, enum pipe pipe, bool state) @@ -1424,7 +1432,7 @@ static void intel_enable_pch_pll(struct intel_crtc *intel_crtc) assert_pch_refclk_enabled(dev_priv); if (pll->active++ && pll->on) { - assert_pch_pll_enabled(dev_priv, intel_crtc); + assert_pch_pll_enabled(dev_priv, pll, NULL); return; } @@ -1460,12 +1468,12 @@ static void intel_disable_pch_pll(struct intel_crtc *intel_crtc) intel_crtc->base.base.id); if (WARN_ON(pll->active == 0)) { - assert_pch_pll_disabled(dev_priv, intel_crtc); + assert_pch_pll_disabled(dev_priv, pll, NULL); return; } if (--pll->active) { - assert_pch_pll_enabled(dev_priv, intel_crtc); + assert_pch_pll_enabled(dev_priv, pll, NULL); return; } @@ -1495,7 +1503,9 @@ static void intel_enable_transcoder(struct drm_i915_private *dev_priv, BUG_ON(dev_priv->info->gen < 5); /* Make sure PCH DPLL is enabled */ - assert_pch_pll_enabled(dev_priv, to_intel_crtc(crtc)); + assert_pch_pll_enabled(dev_priv, + to_intel_crtc(crtc)->pch_pll, + to_intel_crtc(crtc)); /* FDI must be feeding us bits for PCH ports */ assert_fdi_tx_enabled(dev_priv, pipe); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 71c7096e3869..296cfc201a81 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -266,6 +266,9 @@ intel_dp_mode_valid(struct drm_connector *connector, if (mode->clock < 10000) return MODE_CLOCK_LOW; + if (mode->flags & DRM_MODE_FLAG_DBLCLK) + return MODE_H_ILLEGAL; + return MODE_OK; } @@ -702,6 +705,9 @@ intel_dp_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *mode, mode->clock = intel_dp->panel_fixed_mode->clock; } + if (mode->flags & DRM_MODE_FLAG_DBLCLK) + return false; + DRM_DEBUG_KMS("DP link computation with max lane count %i " "max bw %02x pixel clock %iKHz\n", max_lane_count, bws[max_clock], mode->clock); @@ -1154,11 +1160,10 @@ static void ironlake_edp_panel_off(struct intel_dp *intel_dp) DRM_DEBUG_KMS("Turn eDP power off\n"); - WARN(intel_dp->want_panel_vdd, "Cannot turn power off while VDD is on\n"); - ironlake_panel_vdd_off_sync(intel_dp); /* finish any pending work */ + WARN(!intel_dp->want_panel_vdd, "Need VDD to turn off panel\n"); pp = ironlake_get_pp_control(dev_priv); - pp &= ~(POWER_TARGET_ON | EDP_FORCE_VDD | PANEL_POWER_RESET | EDP_BLC_ENABLE); + pp &= ~(POWER_TARGET_ON | PANEL_POWER_RESET | EDP_BLC_ENABLE); I915_WRITE(PCH_PP_CONTROL, pp); POSTING_READ(PCH_PP_CONTROL); @@ -1266,18 +1271,16 @@ static void intel_dp_prepare(struct drm_encoder *encoder) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + /* Make sure the panel is off before trying to change the mode. But also + * ensure that we have vdd while we switch off the panel. */ + ironlake_edp_panel_vdd_on(intel_dp); ironlake_edp_backlight_off(intel_dp); ironlake_edp_panel_off(intel_dp); - /* Wake up the sink first */ - ironlake_edp_panel_vdd_on(intel_dp); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_link_down(intel_dp); ironlake_edp_panel_vdd_off(intel_dp, false); - - /* Make sure the panel is off before trying to - * change the mode - */ } static void intel_dp_commit(struct drm_encoder *encoder) @@ -1309,10 +1312,11 @@ intel_dp_dpms(struct drm_encoder *encoder, int mode) uint32_t dp_reg = I915_READ(intel_dp->output_reg); if (mode != DRM_MODE_DPMS_ON) { + /* Switching the panel off requires vdd. */ + ironlake_edp_panel_vdd_on(intel_dp); ironlake_edp_backlight_off(intel_dp); ironlake_edp_panel_off(intel_dp); - ironlake_edp_panel_vdd_on(intel_dp); intel_dp_sink_dpms(intel_dp, mode); intel_dp_link_down(intel_dp); ironlake_edp_panel_vdd_off(intel_dp, false); diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 4a9707dd0f9c..1991a4408cf9 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -396,11 +396,22 @@ clear_err: * Wait for bus to IDLE before clearing NAK. * If we clear the NAK while bus is still active, then it will stay * active and the next transaction may fail. + * + * If no ACK is received during the address phase of a transaction, the + * adapter must report -ENXIO. It is not clear what to return if no ACK + * is received at other times. But we have to be careful to not return + * spurious -ENXIO because that will prevent i2c and drm edid functions + * from retrying. So return -ENXIO only when gmbus properly quiescents - + * timing out seems to happen when there _is_ a ddc chip present, but + * it's slow responding and only answers on the 2nd retry. */ + ret = -ENXIO; if (wait_for((I915_READ(GMBUS2 + reg_offset) & GMBUS_ACTIVE) == 0, - 10)) + 10)) { DRM_DEBUG_KMS("GMBUS [%s] timed out after NAK\n", adapter->name); + ret = -ETIMEDOUT; + } /* Toggle the Software Clear Interrupt bit. This has the effect * of resetting the GMBUS controller and so clearing the @@ -414,14 +425,6 @@ clear_err: adapter->name, msgs[i].addr, (msgs[i].flags & I2C_M_RD) ? 'r' : 'w', msgs[i].len); - /* - * If no ACK is received during the address phase of a transaction, - * the adapter must report -ENXIO. - * It is not clear what to return if no ACK is received at other times. - * So, we always return -ENXIO in all NAK cases, to ensure we send - * it at least during the one case that is specified. - */ - ret = -ENXIO; goto out; timeout: diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 9dee82350def..08eb04c787e8 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -747,6 +747,14 @@ static const struct dmi_system_id intel_no_lvds[] = { }, { .callback = intel_no_lvds_dmi_callback, + .ident = "Hewlett-Packard HP t5740e Thin Client", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP t5740e Thin Client"), + }, + }, + { + .callback = intel_no_lvds_dmi_callback, .ident = "Hewlett-Packard t5745", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8e79ff67ec98..d0ce2a5b1d3f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2270,10 +2270,33 @@ void ironlake_disable_drps(struct drm_device *dev) void gen6_set_rps(struct drm_device *dev, u8 val) { struct drm_i915_private *dev_priv = dev->dev_private; - u32 swreq; + u32 limits; - swreq = (val & 0x3ff) << 25; - I915_WRITE(GEN6_RPNSWREQ, swreq); + limits = 0; + if (val >= dev_priv->max_delay) + val = dev_priv->max_delay; + else + limits |= dev_priv->max_delay << 24; + + if (val <= dev_priv->min_delay) + val = dev_priv->min_delay; + else + limits |= dev_priv->min_delay << 16; + + if (val == dev_priv->cur_delay) + return; + + I915_WRITE(GEN6_RPNSWREQ, + GEN6_FREQUENCY(val) | + GEN6_OFFSET(0) | + GEN6_AGGRESSIVE_TURBO); + + /* Make sure we continue to get interrupts + * until we hit the minimum or maximum frequencies. + */ + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits); + + dev_priv->cur_delay = val; } void gen6_disable_rps(struct drm_device *dev) @@ -2327,11 +2350,10 @@ int intel_enable_rc6(const struct drm_device *dev) void gen6_enable_rps(struct drm_i915_private *dev_priv) { struct intel_ring_buffer *ring; - u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - u32 gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS); + u32 rp_state_cap; + u32 gt_perf_status; u32 pcu_mbox, rc6_mask = 0; u32 gtfifodbg; - int cur_freq, min_freq, max_freq; int rc6_mode; int i; @@ -2352,6 +2374,14 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) gen6_gt_force_wake_get(dev_priv); + rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS); + + /* In units of 100MHz */ + dev_priv->max_delay = rp_state_cap & 0xff; + dev_priv->min_delay = (rp_state_cap & 0xff0000) >> 16; + dev_priv->cur_delay = 0; + /* disable the counters and set deterministic thresholds */ I915_WRITE(GEN6_RC_CONTROL, 0); @@ -2399,8 +2429,8 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - 18 << 24 | - 6 << 16); + dev_priv->max_delay << 24 | + dev_priv->min_delay << 16); I915_WRITE(GEN6_RP_UP_THRESHOLD, 10000); I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 1000000); I915_WRITE(GEN6_RP_UP_EI, 100000); @@ -2408,7 +2438,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_MODE | + GEN6_RP_MEDIA_HW_NORMAL_MODE | GEN6_RP_MEDIA_IS_GFX | GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG | @@ -2426,10 +2456,6 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) 500)) DRM_ERROR("timeout waiting for pcode mailbox to finish\n"); - min_freq = (rp_state_cap & 0xff0000) >> 16; - max_freq = rp_state_cap & 0xff; - cur_freq = (gt_perf_status & 0xff00) >> 8; - /* Check for overclock support */ if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, 500)) @@ -2440,14 +2466,11 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) 500)) DRM_ERROR("timeout waiting for pcode mailbox to finish\n"); if (pcu_mbox & (1<<31)) { /* OC supported */ - max_freq = pcu_mbox & 0xff; + dev_priv->max_delay = pcu_mbox & 0xff; DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50); } - /* In units of 100MHz */ - dev_priv->max_delay = max_freq; - dev_priv->min_delay = min_freq; - dev_priv->cur_delay = cur_freq; + gen6_set_rps(dev_priv->dev, (gt_perf_status & 0xff00) >> 8); /* requires MSI enabled */ I915_WRITE(GEN6_PMIER, @@ -3580,8 +3603,9 @@ static void gen6_sanitize_pm(struct drm_device *dev) limits |= (dev_priv->min_delay & 0x3f) << 16; if (old != limits) { - DRM_ERROR("Power management discrepancy: GEN6_RP_INTERRUPT_LIMITS expected %08x, was %08x\n", - limits, old); + /* Note that the known failure case is to read back 0. */ + DRM_DEBUG_DRIVER("Power management discrepancy: GEN6_RP_INTERRUPT_LIMITS " + "expected %08x, was %08x\n", limits, old); I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits); } diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index a949b73880c8..b6a9d45fc3c6 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -783,10 +783,12 @@ static void intel_sdvo_get_dtd_from_mode(struct intel_sdvo_dtd *dtd, ((v_sync_len & 0x30) >> 4); dtd->part2.dtd_flags = 0x18; + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + dtd->part2.dtd_flags |= DTD_FLAG_INTERLACE; if (mode->flags & DRM_MODE_FLAG_PHSYNC) - dtd->part2.dtd_flags |= 0x2; + dtd->part2.dtd_flags |= DTD_FLAG_HSYNC_POSITIVE; if (mode->flags & DRM_MODE_FLAG_PVSYNC) - dtd->part2.dtd_flags |= 0x4; + dtd->part2.dtd_flags |= DTD_FLAG_VSYNC_POSITIVE; dtd->part2.sdvo_flags = 0; dtd->part2.v_sync_off_high = v_sync_offset & 0xc0; @@ -820,9 +822,11 @@ static void intel_sdvo_get_mode_from_dtd(struct drm_display_mode * mode, mode->clock = dtd->part1.clock * 10; mode->flags &= ~(DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC); - if (dtd->part2.dtd_flags & 0x2) + if (dtd->part2.dtd_flags & DTD_FLAG_INTERLACE) + mode->flags |= DRM_MODE_FLAG_INTERLACE; + if (dtd->part2.dtd_flags & DTD_FLAG_HSYNC_POSITIVE) mode->flags |= DRM_MODE_FLAG_PHSYNC; - if (dtd->part2.dtd_flags & 0x4) + if (dtd->part2.dtd_flags & DTD_FLAG_VSYNC_POSITIVE) mode->flags |= DRM_MODE_FLAG_PVSYNC; } diff --git a/drivers/gpu/drm/i915/intel_sdvo_regs.h b/drivers/gpu/drm/i915/intel_sdvo_regs.h index 6b7b22f4d63e..9d030142ee43 100644 --- a/drivers/gpu/drm/i915/intel_sdvo_regs.h +++ b/drivers/gpu/drm/i915/intel_sdvo_regs.h @@ -61,6 +61,11 @@ struct intel_sdvo_caps { u16 output_flags; } __attribute__((packed)); +/* Note: SDVO detailed timing flags match EDID misc flags. */ +#define DTD_FLAG_HSYNC_POSITIVE (1 << 1) +#define DTD_FLAG_VSYNC_POSITIVE (1 << 2) +#define DTD_FLAG_INTERLACE (1 << 7) + /** This matches the EDID DTD structure, more or less */ struct intel_sdvo_dtd { struct { diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 3346612d2953..a233a51fd7e6 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -674,6 +674,54 @@ static const struct tv_mode tv_modes[] = { .filter_table = filter_table, }, { + .name = "480p", + .clock = 107520, + .refresh = 59940, + .oversample = TV_OVERSAMPLE_4X, + .component_only = 1, + + .hsync_end = 64, .hblank_end = 122, + .hblank_start = 842, .htotal = 857, + + .progressive = true, .trilevel_sync = false, + + .vsync_start_f1 = 12, .vsync_start_f2 = 12, + .vsync_len = 12, + + .veq_ena = false, + + .vi_end_f1 = 44, .vi_end_f2 = 44, + .nbr_end = 479, + + .burst_ena = false, + + .filter_table = filter_table, + }, + { + .name = "576p", + .clock = 107520, + .refresh = 50000, + .oversample = TV_OVERSAMPLE_4X, + .component_only = 1, + + .hsync_end = 64, .hblank_end = 139, + .hblank_start = 859, .htotal = 863, + + .progressive = true, .trilevel_sync = false, + + .vsync_start_f1 = 10, .vsync_start_f2 = 10, + .vsync_len = 10, + + .veq_ena = false, + + .vi_end_f1 = 48, .vi_end_f2 = 48, + .nbr_end = 575, + + .burst_ena = false, + + .filter_table = filter_table, + }, + { .name = "720p@60Hz", .clock = 148800, .refresh = 60000, @@ -1194,6 +1242,11 @@ intel_tv_detect_type(struct intel_tv *intel_tv, I915_WRITE(TV_DAC, save_tv_dac & ~TVDAC_STATE_CHG_EN); I915_WRITE(TV_CTL, save_tv_ctl); + POSTING_READ(TV_CTL); + + /* For unknown reasons the hw barfs if we don't do this vblank wait. */ + intel_wait_for_vblank(intel_tv->base.base.dev, + to_intel_crtc(intel_tv->base.base.crtc)->pipe); /* Restore interrupt config */ if (connector->polled & DRM_CONNECTOR_POLL_HPD) { diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index b01c2dd627b0..ce4e7cc6c905 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -865,7 +865,7 @@ static void cayman_gpu_init(struct radeon_device *rdev) /* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */ if (rdev->flags & RADEON_IS_IGP) - rdev->config.evergreen.tile_config |= 1 << 4; + rdev->config.cayman.tile_config |= 1 << 4; else rdev->config.cayman.tile_config |= ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 1dc3a4aba020..492654f8ee74 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -848,7 +848,6 @@ struct radeon_cs_parser { s32 priority; }; -extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx); extern int radeon_cs_finish_pages(struct radeon_cs_parser *p); extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx); diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index f6e69b8c06c6..b1e3820df363 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -444,7 +444,9 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, */ if ((dev->pdev->device == 0x9498) && (dev->pdev->subsystem_vendor == 0x1682) && - (dev->pdev->subsystem_device == 0x2452)) { + (dev->pdev->subsystem_device == 0x2452) && + (i2c_bus->valid == false) && + !(supported_device & (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT))) { struct radeon_device *rdev = dev->dev_private; *i2c_bus = radeon_lookup_i2c_gpio(rdev, 0x93); } diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index c7d64a739033..0137689ed461 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -580,7 +580,7 @@ int radeon_cs_finish_pages(struct radeon_cs_parser *p) return 0; } -int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) +static int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) { int new_page; struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; @@ -623,3 +623,28 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) return new_page; } + +u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) +{ + struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; + u32 pg_idx, pg_offset; + u32 idx_value = 0; + int new_page; + + pg_idx = (idx * 4) / PAGE_SIZE; + pg_offset = (idx * 4) % PAGE_SIZE; + + if (ibc->kpage_idx[0] == pg_idx) + return ibc->kpage[0][pg_offset/4]; + if (ibc->kpage_idx[1] == pg_idx) + return ibc->kpage[1][pg_offset/4]; + + new_page = radeon_cs_update_pages(p, pg_idx); + if (new_page < 0) { + p->parser_error = new_page; + return 0; + } + + idx_value = ibc->kpage[new_page][pg_offset/4]; + return idx_value; +} diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 493a7be75306..983658c91358 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -39,31 +39,6 @@ */ int radeon_debugfs_sa_init(struct radeon_device *rdev); -u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) -{ - struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; - u32 pg_idx, pg_offset; - u32 idx_value = 0; - int new_page; - - pg_idx = (idx * 4) / PAGE_SIZE; - pg_offset = (idx * 4) % PAGE_SIZE; - - if (ibc->kpage_idx[0] == pg_idx) - return ibc->kpage[0][pg_offset/4]; - if (ibc->kpage_idx[1] == pg_idx) - return ibc->kpage[1][pg_offset/4]; - - new_page = radeon_cs_update_pages(p, pg_idx); - if (new_page < 0) { - p->parser_error = new_page; - return 0; - } - - idx_value = ibc->kpage[new_page][pg_offset/4]; - return idx_value; -} - int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, unsigned size) { diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c index 40efd32f7dce..97acc9c6c95b 100644 --- a/drivers/gpu/drm/udl/udl_gem.c +++ b/drivers/gpu/drm/udl/udl_gem.c @@ -234,7 +234,7 @@ int udl_gem_mmap(struct drm_file *file, struct drm_device *dev, ret = udl_gem_get_pages(gobj, GFP_KERNEL); if (ret) - return ret; + goto out; if (!gobj->base.map_list.map) { ret = drm_gem_create_mmap_offset(obj); if (ret) @@ -257,8 +257,6 @@ static int udl_prime_create(struct drm_device *dev, { struct udl_gem_object *obj; int npages; - int i; - struct scatterlist *iter; npages = size / PAGE_SIZE; diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 7cd9bf42108b..6f1d167cb1ea 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1036,8 +1036,9 @@ config SENSORS_SCH56XX_COMMON config SENSORS_SCH5627 tristate "SMSC SCH5627" - depends on !PPC + depends on !PPC && WATCHDOG select SENSORS_SCH56XX_COMMON + select WATCHDOG_CORE help If you say yes here you get support for the hardware monitoring features of the SMSC SCH5627 Super-I/O chip including support for @@ -1048,8 +1049,9 @@ config SENSORS_SCH5627 config SENSORS_SCH5636 tristate "SMSC SCH5636" - depends on !PPC + depends on !PPC && WATCHDOG select SENSORS_SCH56XX_COMMON + select WATCHDOG_CORE help SMSC SCH5636 Super I/O chips include an embedded microcontroller for hardware monitoring solutions, allowing motherboard manufacturers to diff --git a/drivers/hwmon/sch5627.c b/drivers/hwmon/sch5627.c index 8ec6dfbccb64..8342275378b8 100644 --- a/drivers/hwmon/sch5627.c +++ b/drivers/hwmon/sch5627.c @@ -579,7 +579,7 @@ static int __devinit sch5627_probe(struct platform_device *pdev) } /* Note failing to register the watchdog is not a fatal error */ - data->watchdog = sch56xx_watchdog_register(data->addr, + data->watchdog = sch56xx_watchdog_register(&pdev->dev, data->addr, (build_code << 24) | (build_id << 8) | hwmon_rev, &data->update_lock, 1); diff --git a/drivers/hwmon/sch5636.c b/drivers/hwmon/sch5636.c index 906d4ed32d81..96a7e68718ca 100644 --- a/drivers/hwmon/sch5636.c +++ b/drivers/hwmon/sch5636.c @@ -510,7 +510,7 @@ static int __devinit sch5636_probe(struct platform_device *pdev) } /* Note failing to register the watchdog is not a fatal error */ - data->watchdog = sch56xx_watchdog_register(data->addr, + data->watchdog = sch56xx_watchdog_register(&pdev->dev, data->addr, (revision[0] << 8) | revision[1], &data->update_lock, 0); diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c index ce52fc57d41d..4380f5d07be2 100644 --- a/drivers/hwmon/sch56xx-common.c +++ b/drivers/hwmon/sch56xx-common.c @@ -66,15 +66,10 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" struct sch56xx_watchdog_data { u16 addr; - u32 revision; struct mutex *io_lock; - struct mutex watchdog_lock; - struct list_head list; /* member of the watchdog_data_list */ struct kref kref; - struct miscdevice watchdog_miscdev; - unsigned long watchdog_is_open; - char watchdog_name[10]; /* must be unique to avoid sysfs conflict */ - char watchdog_expect_close; + struct watchdog_info wdinfo; + struct watchdog_device wddev; u8 watchdog_preset; u8 watchdog_control; u8 watchdog_output_enable; @@ -82,15 +77,6 @@ struct sch56xx_watchdog_data { static struct platform_device *sch56xx_pdev; -/* - * Somewhat ugly :( global data pointer list with all sch56xx devices, so that - * we can find our device data as when using misc_register there is no other - * method to get to ones device data from the open fop. - */ -static LIST_HEAD(watchdog_data_list); -/* Note this lock not only protect list access, but also data.kref access */ -static DEFINE_MUTEX(watchdog_data_mutex); - /* Super I/O functions */ static inline int superio_inb(int base, int reg) { @@ -272,22 +258,22 @@ EXPORT_SYMBOL(sch56xx_read_virtual_reg12); * Watchdog routines */ -/* - * Release our data struct when the platform device has been released *and* - * all references to our watchdog device are released. - */ -static void sch56xx_watchdog_release_resources(struct kref *r) +/* Release our data struct when we're unregistered *and* + all references to our watchdog device are released */ +static void watchdog_release_resources(struct kref *r) { struct sch56xx_watchdog_data *data = container_of(r, struct sch56xx_watchdog_data, kref); kfree(data); } -static int watchdog_set_timeout(struct sch56xx_watchdog_data *data, - int timeout) +static int watchdog_set_timeout(struct watchdog_device *wddev, + unsigned int timeout) { - int ret, resolution; + struct sch56xx_watchdog_data *data = watchdog_get_drvdata(wddev); + unsigned int resolution; u8 control; + int ret; /* 1 second or 60 second resolution? */ if (timeout <= 255) @@ -298,12 +284,6 @@ static int watchdog_set_timeout(struct sch56xx_watchdog_data *data, if (timeout < resolution || timeout > (resolution * 255)) return -EINVAL; - mutex_lock(&data->watchdog_lock); - if (!data->addr) { - ret = -ENODEV; - goto leave; - } - if (resolution == 1) control = data->watchdog_control | SCH56XX_WDOG_TIME_BASE_SEC; else @@ -316,7 +296,7 @@ static int watchdog_set_timeout(struct sch56xx_watchdog_data *data, control); mutex_unlock(data->io_lock); if (ret) - goto leave; + return ret; data->watchdog_control = control; } @@ -326,38 +306,17 @@ static int watchdog_set_timeout(struct sch56xx_watchdog_data *data, * the watchdog countdown. */ data->watchdog_preset = DIV_ROUND_UP(timeout, resolution); + wddev->timeout = data->watchdog_preset * resolution; - ret = data->watchdog_preset * resolution; -leave: - mutex_unlock(&data->watchdog_lock); - return ret; -} - -static int watchdog_get_timeout(struct sch56xx_watchdog_data *data) -{ - int timeout; - - mutex_lock(&data->watchdog_lock); - if (data->watchdog_control & SCH56XX_WDOG_TIME_BASE_SEC) - timeout = data->watchdog_preset; - else - timeout = data->watchdog_preset * 60; - mutex_unlock(&data->watchdog_lock); - - return timeout; + return 0; } -static int watchdog_start(struct sch56xx_watchdog_data *data) +static int watchdog_start(struct watchdog_device *wddev) { + struct sch56xx_watchdog_data *data = watchdog_get_drvdata(wddev); int ret; u8 val; - mutex_lock(&data->watchdog_lock); - if (!data->addr) { - ret = -ENODEV; - goto leave_unlock_watchdog; - } - /* * The sch56xx's watchdog cannot really be started / stopped * it is always running, but we can avoid the timer expiring @@ -385,18 +344,14 @@ static int watchdog_start(struct sch56xx_watchdog_data *data) if (ret) goto leave; - /* 2. Enable output (if not already enabled) */ - if (!(data->watchdog_output_enable & SCH56XX_WDOG_OUTPUT_ENABLE)) { - val = data->watchdog_output_enable | - SCH56XX_WDOG_OUTPUT_ENABLE; - ret = sch56xx_write_virtual_reg(data->addr, - SCH56XX_REG_WDOG_OUTPUT_ENABLE, - val); - if (ret) - goto leave; + /* 2. Enable output */ + val = data->watchdog_output_enable | SCH56XX_WDOG_OUTPUT_ENABLE; + ret = sch56xx_write_virtual_reg(data->addr, + SCH56XX_REG_WDOG_OUTPUT_ENABLE, val); + if (ret) + goto leave; - data->watchdog_output_enable = val; - } + data->watchdog_output_enable = val; /* 3. Clear the watchdog event bit if set */ val = inb(data->addr + 9); @@ -405,234 +360,70 @@ static int watchdog_start(struct sch56xx_watchdog_data *data) leave: mutex_unlock(data->io_lock); -leave_unlock_watchdog: - mutex_unlock(&data->watchdog_lock); return ret; } -static int watchdog_trigger(struct sch56xx_watchdog_data *data) +static int watchdog_trigger(struct watchdog_device *wddev) { + struct sch56xx_watchdog_data *data = watchdog_get_drvdata(wddev); int ret; - mutex_lock(&data->watchdog_lock); - if (!data->addr) { - ret = -ENODEV; - goto leave; - } - /* Reset the watchdog countdown counter */ mutex_lock(data->io_lock); ret = sch56xx_write_virtual_reg(data->addr, SCH56XX_REG_WDOG_PRESET, data->watchdog_preset); mutex_unlock(data->io_lock); -leave: - mutex_unlock(&data->watchdog_lock); + return ret; } -static int watchdog_stop_unlocked(struct sch56xx_watchdog_data *data) +static int watchdog_stop(struct watchdog_device *wddev) { + struct sch56xx_watchdog_data *data = watchdog_get_drvdata(wddev); int ret = 0; u8 val; - if (!data->addr) - return -ENODEV; - - if (data->watchdog_output_enable & SCH56XX_WDOG_OUTPUT_ENABLE) { - val = data->watchdog_output_enable & - ~SCH56XX_WDOG_OUTPUT_ENABLE; - mutex_lock(data->io_lock); - ret = sch56xx_write_virtual_reg(data->addr, - SCH56XX_REG_WDOG_OUTPUT_ENABLE, - val); - mutex_unlock(data->io_lock); - if (ret) - return ret; - - data->watchdog_output_enable = val; - } - - return ret; -} - -static int watchdog_stop(struct sch56xx_watchdog_data *data) -{ - int ret; - - mutex_lock(&data->watchdog_lock); - ret = watchdog_stop_unlocked(data); - mutex_unlock(&data->watchdog_lock); - - return ret; -} - -static int watchdog_release(struct inode *inode, struct file *filp) -{ - struct sch56xx_watchdog_data *data = filp->private_data; - - if (data->watchdog_expect_close) { - watchdog_stop(data); - data->watchdog_expect_close = 0; - } else { - watchdog_trigger(data); - pr_crit("unexpected close, not stopping watchdog!\n"); - } - - clear_bit(0, &data->watchdog_is_open); - - mutex_lock(&watchdog_data_mutex); - kref_put(&data->kref, sch56xx_watchdog_release_resources); - mutex_unlock(&watchdog_data_mutex); + val = data->watchdog_output_enable & ~SCH56XX_WDOG_OUTPUT_ENABLE; + mutex_lock(data->io_lock); + ret = sch56xx_write_virtual_reg(data->addr, + SCH56XX_REG_WDOG_OUTPUT_ENABLE, val); + mutex_unlock(data->io_lock); + if (ret) + return ret; + data->watchdog_output_enable = val; return 0; } -static int watchdog_open(struct inode *inode, struct file *filp) +static void watchdog_ref(struct watchdog_device *wddev) { - struct sch56xx_watchdog_data *pos, *data = NULL; - int ret, watchdog_is_open; - - /* - * We get called from drivers/char/misc.c with misc_mtx hold, and we - * call misc_register() from sch56xx_watchdog_probe() with - * watchdog_data_mutex hold, as misc_register() takes the misc_mtx - * lock, this is a possible deadlock, so we use mutex_trylock here. - */ - if (!mutex_trylock(&watchdog_data_mutex)) - return -ERESTARTSYS; - list_for_each_entry(pos, &watchdog_data_list, list) { - if (pos->watchdog_miscdev.minor == iminor(inode)) { - data = pos; - break; - } - } - /* Note we can never not have found data, so we don't check for this */ - watchdog_is_open = test_and_set_bit(0, &data->watchdog_is_open); - if (!watchdog_is_open) - kref_get(&data->kref); - mutex_unlock(&watchdog_data_mutex); - - if (watchdog_is_open) - return -EBUSY; - - filp->private_data = data; - - /* Start the watchdog */ - ret = watchdog_start(data); - if (ret) { - watchdog_release(inode, filp); - return ret; - } + struct sch56xx_watchdog_data *data = watchdog_get_drvdata(wddev); - return nonseekable_open(inode, filp); + kref_get(&data->kref); } -static ssize_t watchdog_write(struct file *filp, const char __user *buf, - size_t count, loff_t *offset) +static void watchdog_unref(struct watchdog_device *wddev) { - int ret; - struct sch56xx_watchdog_data *data = filp->private_data; - - if (count) { - if (!nowayout) { - size_t i; - - /* Clear it in case it was set with a previous write */ - data->watchdog_expect_close = 0; - - for (i = 0; i != count; i++) { - char c; - if (get_user(c, buf + i)) - return -EFAULT; - if (c == 'V') - data->watchdog_expect_close = 1; - } - } - ret = watchdog_trigger(data); - if (ret) - return ret; - } - return count; -} - -static long watchdog_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) -{ - struct watchdog_info ident = { - .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT, - .identity = "sch56xx watchdog" - }; - int i, ret = 0; - struct sch56xx_watchdog_data *data = filp->private_data; - - switch (cmd) { - case WDIOC_GETSUPPORT: - ident.firmware_version = data->revision; - if (!nowayout) - ident.options |= WDIOF_MAGICCLOSE; - if (copy_to_user((void __user *)arg, &ident, sizeof(ident))) - ret = -EFAULT; - break; - - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - ret = put_user(0, (int __user *)arg); - break; - - case WDIOC_KEEPALIVE: - ret = watchdog_trigger(data); - break; + struct sch56xx_watchdog_data *data = watchdog_get_drvdata(wddev); - case WDIOC_GETTIMEOUT: - i = watchdog_get_timeout(data); - ret = put_user(i, (int __user *)arg); - break; - - case WDIOC_SETTIMEOUT: - if (get_user(i, (int __user *)arg)) { - ret = -EFAULT; - break; - } - ret = watchdog_set_timeout(data, i); - if (ret >= 0) - ret = put_user(ret, (int __user *)arg); - break; - - case WDIOC_SETOPTIONS: - if (get_user(i, (int __user *)arg)) { - ret = -EFAULT; - break; - } - - if (i & WDIOS_DISABLECARD) - ret = watchdog_stop(data); - else if (i & WDIOS_ENABLECARD) - ret = watchdog_trigger(data); - else - ret = -EINVAL; - break; - - default: - ret = -ENOTTY; - } - return ret; + kref_put(&data->kref, watchdog_release_resources); } -static const struct file_operations watchdog_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .open = watchdog_open, - .release = watchdog_release, - .write = watchdog_write, - .unlocked_ioctl = watchdog_ioctl, +static const struct watchdog_ops watchdog_ops = { + .owner = THIS_MODULE, + .start = watchdog_start, + .stop = watchdog_stop, + .ping = watchdog_trigger, + .set_timeout = watchdog_set_timeout, + .ref = watchdog_ref, + .unref = watchdog_unref, }; -struct sch56xx_watchdog_data *sch56xx_watchdog_register( +struct sch56xx_watchdog_data *sch56xx_watchdog_register(struct device *parent, u16 addr, u32 revision, struct mutex *io_lock, int check_enabled) { struct sch56xx_watchdog_data *data; - int i, err, control, output_enable; - const int watchdog_minors[] = { WATCHDOG_MINOR, 212, 213, 214, 215 }; + int err, control, output_enable; /* Cache the watchdog registers */ mutex_lock(io_lock); @@ -656,82 +447,55 @@ struct sch56xx_watchdog_data *sch56xx_watchdog_register( return NULL; data->addr = addr; - data->revision = revision; data->io_lock = io_lock; - data->watchdog_control = control; - data->watchdog_output_enable = output_enable; - mutex_init(&data->watchdog_lock); - INIT_LIST_HEAD(&data->list); kref_init(&data->kref); - err = watchdog_set_timeout(data, 60); - if (err < 0) - goto error; - - /* - * We take the data_mutex lock early so that watchdog_open() cannot - * run when misc_register() has completed, but we've not yet added - * our data to the watchdog_data_list. - */ - mutex_lock(&watchdog_data_mutex); - for (i = 0; i < ARRAY_SIZE(watchdog_minors); i++) { - /* Register our watchdog part */ - snprintf(data->watchdog_name, sizeof(data->watchdog_name), - "watchdog%c", (i == 0) ? '\0' : ('0' + i)); - data->watchdog_miscdev.name = data->watchdog_name; - data->watchdog_miscdev.fops = &watchdog_fops; - data->watchdog_miscdev.minor = watchdog_minors[i]; - err = misc_register(&data->watchdog_miscdev); - if (err == -EBUSY) - continue; - if (err) - break; + strlcpy(data->wdinfo.identity, "sch56xx watchdog", + sizeof(data->wdinfo.identity)); + data->wdinfo.firmware_version = revision; + data->wdinfo.options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT; + if (!nowayout) + data->wdinfo.options |= WDIOF_MAGICCLOSE; + + data->wddev.info = &data->wdinfo; + data->wddev.ops = &watchdog_ops; + data->wddev.parent = parent; + data->wddev.timeout = 60; + data->wddev.min_timeout = 1; + data->wddev.max_timeout = 255 * 60; + if (nowayout) + set_bit(WDOG_NO_WAY_OUT, &data->wddev.status); + if (output_enable & SCH56XX_WDOG_OUTPUT_ENABLE) + set_bit(WDOG_ACTIVE, &data->wddev.status); + + /* Since the watchdog uses a downcounter there is no register to read + the BIOS set timeout from (if any was set at all) -> + Choose a preset which will give us a 1 minute timeout */ + if (control & SCH56XX_WDOG_TIME_BASE_SEC) + data->watchdog_preset = 60; /* seconds */ + else + data->watchdog_preset = 1; /* minute */ - list_add(&data->list, &watchdog_data_list); - pr_info("Registered /dev/%s chardev major 10, minor: %d\n", - data->watchdog_name, watchdog_minors[i]); - break; - } - mutex_unlock(&watchdog_data_mutex); + data->watchdog_control = control; + data->watchdog_output_enable = output_enable; + watchdog_set_drvdata(&data->wddev, data); + err = watchdog_register_device(&data->wddev); if (err) { pr_err("Registering watchdog chardev: %d\n", err); - goto error; - } - if (i == ARRAY_SIZE(watchdog_minors)) { - pr_warn("Couldn't register watchdog (no free minor)\n"); - goto error; + kfree(data); + return NULL; } return data; - -error: - kfree(data); - return NULL; } EXPORT_SYMBOL(sch56xx_watchdog_register); void sch56xx_watchdog_unregister(struct sch56xx_watchdog_data *data) { - mutex_lock(&watchdog_data_mutex); - misc_deregister(&data->watchdog_miscdev); - list_del(&data->list); - mutex_unlock(&watchdog_data_mutex); - - mutex_lock(&data->watchdog_lock); - if (data->watchdog_is_open) { - pr_warn("platform device unregistered with watchdog " - "open! Stopping watchdog.\n"); - watchdog_stop_unlocked(data); - } - /* Tell the wdog start/stop/trigger functions our dev is gone */ - data->addr = 0; - data->io_lock = NULL; - mutex_unlock(&data->watchdog_lock); - - mutex_lock(&watchdog_data_mutex); - kref_put(&data->kref, sch56xx_watchdog_release_resources); - mutex_unlock(&watchdog_data_mutex); + watchdog_unregister_device(&data->wddev); + kref_put(&data->kref, watchdog_release_resources); + /* Don't touch data after this it may have been free-ed! */ } EXPORT_SYMBOL(sch56xx_watchdog_unregister); diff --git a/drivers/hwmon/sch56xx-common.h b/drivers/hwmon/sch56xx-common.h index 7475086eb978..704ea2c6d28a 100644 --- a/drivers/hwmon/sch56xx-common.h +++ b/drivers/hwmon/sch56xx-common.h @@ -27,6 +27,6 @@ int sch56xx_read_virtual_reg16(u16 addr, u16 reg); int sch56xx_read_virtual_reg12(u16 addr, u16 msb_reg, u16 lsn_reg, int high_nibble); -struct sch56xx_watchdog_data *sch56xx_watchdog_register( +struct sch56xx_watchdog_data *sch56xx_watchdog_register(struct device *parent, u16 addr, u32 revision, struct mutex *io_lock, int check_enabled); void sch56xx_watchdog_unregister(struct sch56xx_watchdog_data *data); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index a5bee8e2dfce..d90a421e9cac 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -450,12 +450,27 @@ static void dump_command(unsigned long phys_addr) static void iommu_print_event(struct amd_iommu *iommu, void *__evt) { - u32 *event = __evt; - int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; - int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; - int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; - int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; - u64 address = (u64)(((u64)event[3]) << 32) | event[2]; + int type, devid, domid, flags; + volatile u32 *event = __evt; + int count = 0; + u64 address; + +retry: + type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; + devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; + domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; + flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; + address = (u64)(((u64)event[3]) << 32) | event[2]; + + if (type == 0) { + /* Did we hit the erratum? */ + if (++count == LOOP_TIMEOUT) { + pr_err("AMD-Vi: No event written to event log\n"); + return; + } + udelay(1); + goto retry; + } printk(KERN_ERR "AMD-Vi: Event logged ["); @@ -508,6 +523,8 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) default: printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); } + + memset(__evt, 0, 4 * sizeof(u32)); } static void iommu_poll_events(struct amd_iommu *iommu) @@ -2035,20 +2052,20 @@ out_err: } /* FIXME: Move this to PCI code */ -#define PCI_PRI_TLP_OFF (1 << 2) +#define PCI_PRI_TLP_OFF (1 << 15) bool pci_pri_tlp_required(struct pci_dev *pdev) { - u16 control; + u16 status; int pos; pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); if (!pos) return false; - pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); + pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); - return (control & PCI_PRI_TLP_OFF) ? true : false; + return (status & PCI_PRI_TLP_OFF) ? true : false; } /* diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 2198b2dbbcd3..8b9ded88e6f5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -119,6 +119,7 @@ EXPORT_SYMBOL_GPL(iommu_present); * iommu_set_fault_handler() - set a fault handler for an iommu domain * @domain: iommu domain * @handler: fault handler + * @token: user data, will be passed back to the fault handler * * This function should be used by IOMMU users which want to be notified * whenever an IOMMU fault happens. @@ -127,11 +128,13 @@ EXPORT_SYMBOL_GPL(iommu_present); * error code otherwise. */ void iommu_set_fault_handler(struct iommu_domain *domain, - iommu_fault_handler_t handler) + iommu_fault_handler_t handler, + void *token) { BUG_ON(!domain); domain->handler = handler; + domain->handler_token = token; } EXPORT_SYMBOL_GPL(iommu_set_fault_handler); diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 6899dcd02dfa..e70ee2b59df9 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -41,11 +41,13 @@ * @pgtable: the page table * @iommu_dev: an omap iommu device attached to this domain. only a single * iommu device can be attached for now. + * @dev: Device using this domain. * @lock: domain lock, should be taken when attaching/detaching */ struct omap_iommu_domain { u32 *pgtable; struct omap_iommu *iommu_dev; + struct device *dev; spinlock_t lock; }; @@ -1081,6 +1083,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) } omap_domain->iommu_dev = arch_data->iommu_dev = oiommu; + omap_domain->dev = dev; oiommu->domain = domain; out: @@ -1088,19 +1091,16 @@ out: return ret; } -static void omap_iommu_detach_dev(struct iommu_domain *domain, - struct device *dev) +static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, + struct device *dev) { - struct omap_iommu_domain *omap_domain = domain->priv; - struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; struct omap_iommu *oiommu = dev_to_omap_iommu(dev); - - spin_lock(&omap_domain->lock); + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; /* only a single device is supported per domain for now */ if (omap_domain->iommu_dev != oiommu) { dev_err(dev, "invalid iommu device\n"); - goto out; + return; } iopgtable_clear_entry_all(oiommu); @@ -1108,8 +1108,16 @@ static void omap_iommu_detach_dev(struct iommu_domain *domain, omap_iommu_detach(oiommu); omap_domain->iommu_dev = arch_data->iommu_dev = NULL; + omap_domain->dev = NULL; +} -out: +static void omap_iommu_detach_dev(struct iommu_domain *domain, + struct device *dev) +{ + struct omap_iommu_domain *omap_domain = domain->priv; + + spin_lock(&omap_domain->lock); + _omap_iommu_detach_dev(omap_domain, dev); spin_unlock(&omap_domain->lock); } @@ -1148,13 +1156,19 @@ out: return -ENOMEM; } -/* assume device was already detached */ static void omap_iommu_domain_destroy(struct iommu_domain *domain) { struct omap_iommu_domain *omap_domain = domain->priv; domain->priv = NULL; + /* + * An iommu device is still attached + * (currently, only one device can be attached) ? + */ + if (omap_domain->iommu_dev) + _omap_iommu_detach_dev(omap_domain, omap_domain->dev); + kfree(omap_domain->pgtable); kfree(omap_domain); } diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 779306ee7b16..0c0a37792218 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -29,15 +29,17 @@ #include <linux/device.h> #include <linux/io.h> #include <linux/iommu.h> +#include <linux/of.h> #include <asm/cacheflush.h> /* bitmap of the page sizes currently supported */ #define GART_IOMMU_PGSIZES (SZ_4K) -#define GART_CONFIG 0x24 -#define GART_ENTRY_ADDR 0x28 -#define GART_ENTRY_DATA 0x2c +#define GART_REG_BASE 0x24 +#define GART_CONFIG (0x24 - GART_REG_BASE) +#define GART_ENTRY_ADDR (0x28 - GART_REG_BASE) +#define GART_ENTRY_DATA (0x2c - GART_REG_BASE) #define GART_ENTRY_PHYS_ADDR_VALID (1 << 31) #define GART_PAGE_SHIFT 12 @@ -158,7 +160,7 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain, struct gart_client *client, *c; int err = 0; - gart = dev_get_drvdata(dev->parent); + gart = gart_handle; if (!gart) return -EINVAL; domain->priv = gart; @@ -422,6 +424,14 @@ const struct dev_pm_ops tegra_gart_pm_ops = { .resume = tegra_gart_resume, }; +#ifdef CONFIG_OF +static struct of_device_id tegra_gart_of_match[] __devinitdata = { + { .compatible = "nvidia,tegra20-gart", }, + { }, +}; +MODULE_DEVICE_TABLE(of, tegra_gart_of_match); +#endif + static struct platform_driver tegra_gart_driver = { .probe = tegra_gart_probe, .remove = tegra_gart_remove, @@ -429,6 +439,7 @@ static struct platform_driver tegra_gart_driver = { .owner = THIS_MODULE, .name = "tegra-gart", .pm = &tegra_gart_pm_ops, + .of_match_table = of_match_ptr(tegra_gart_of_match), }, }; @@ -448,4 +459,5 @@ module_exit(tegra_gart_exit); MODULE_DESCRIPTION("IOMMU API for GART in Tegra20"); MODULE_AUTHOR("Hiroshi DOYU <hdoyu@nvidia.com>"); +MODULE_ALIAS("platform:tegra-gart"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index eb93c821f592..ecd679043d77 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -733,7 +733,7 @@ static int smmu_iommu_attach_dev(struct iommu_domain *domain, pr_info("Reserve \"page zero\" for AVP vectors using a common dummy\n"); } - dev_dbg(smmu->dev, "%s is attached\n", dev_name(c->dev)); + dev_dbg(smmu->dev, "%s is attached\n", dev_name(dev)); return 0; err_client: diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index d6f8adaa26ef..8ea7bccc7100 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -78,7 +78,7 @@ typedef int (*rproc_handle_resource_t)(struct rproc *rproc, void *, int avail); * the recovery of the remote processor. */ static int rproc_iommu_fault(struct iommu_domain *domain, struct device *dev, - unsigned long iova, int flags) + unsigned long iova, int flags, void *token) { dev_err(dev, "iommu fault: da 0x%lx flags 0x%x\n", iova, flags); @@ -117,7 +117,7 @@ static int rproc_enable_iommu(struct rproc *rproc) return -ENOMEM; } - iommu_set_fault_handler(domain, rproc_iommu_fault); + iommu_set_fault_handler(domain, rproc_iommu_fault, rproc); ret = iommu_attach_device(domain, dev); if (ret) { diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index d92d7488be16..fe819b76de56 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -64,6 +64,18 @@ config SOFT_WATCHDOG To compile this driver as a module, choose M here: the module will be called softdog. +config DA9052_WATCHDOG + tristate "Dialog DA9052 Watchdog" + depends on PMIC_DA9052 + select WATCHDOG_CORE + help + Support for the watchdog in the DA9052 PMIC. Watchdog trigger + cause system reset. + + Say Y here to include support for the DA9052 watchdog. + Alternatively say M to compile the driver as a module, + which will be called da9052_wdt. + config WM831X_WATCHDOG tristate "WM831x watchdog" depends on MFD_WM831X @@ -87,6 +99,7 @@ config WM8350_WATCHDOG config ARM_SP805_WATCHDOG tristate "ARM SP805 Watchdog" depends on ARM_AMBA + select WATCHDOG_CORE help ARM Primecell SP805 Watchdog timer. This will reboot your system when the timeout is reached. diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 442bfbe0882a..572b39bed06a 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -163,6 +163,7 @@ obj-$(CONFIG_WATCHDOG_CP1XXX) += cpwd.o obj-$(CONFIG_XEN_WDT) += xen_wdt.o # Architecture Independent +obj-$(CONFIG_DA9052_WATCHDOG) += da9052_wdt.o obj-$(CONFIG_WM831X_WATCHDOG) += wm831x_wdt.o obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o obj-$(CONFIG_MAX63XX_WATCHDOG) += max63xx_wdt.o diff --git a/drivers/watchdog/da9052_wdt.c b/drivers/watchdog/da9052_wdt.c new file mode 100644 index 000000000000..3f75129eb0a9 --- /dev/null +++ b/drivers/watchdog/da9052_wdt.c @@ -0,0 +1,251 @@ +/* + * System monitoring driver for DA9052 PMICs. + * + * Copyright(c) 2012 Dialog Semiconductor Ltd. + * + * Author: Anthony Olech <Anthony.Olech@diasemi.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/uaccess.h> +#include <linux/platform_device.h> +#include <linux/time.h> +#include <linux/watchdog.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/jiffies.h> +#include <linux/delay.h> + +#include <linux/mfd/da9052/reg.h> +#include <linux/mfd/da9052/da9052.h> + +#define DA9052_DEF_TIMEOUT 4 +#define DA9052_TWDMIN 256 + +struct da9052_wdt_data { + struct watchdog_device wdt; + struct da9052 *da9052; + struct kref kref; + unsigned long jpast; +}; + +static const struct { + u8 reg_val; + int time; /* Seconds */ +} da9052_wdt_maps[] = { + { 1, 2 }, + { 2, 4 }, + { 3, 8 }, + { 4, 16 }, + { 5, 32 }, + { 5, 33 }, /* Actual time 32.768s so included both 32s and 33s */ + { 6, 65 }, + { 6, 66 }, /* Actual time 65.536s so include both, 65s and 66s */ + { 7, 131 }, +}; + + +static void da9052_wdt_release_resources(struct kref *r) +{ + struct da9052_wdt_data *driver_data = + container_of(r, struct da9052_wdt_data, kref); + + kfree(driver_data); +} + +static int da9052_wdt_set_timeout(struct watchdog_device *wdt_dev, + unsigned int timeout) +{ + struct da9052_wdt_data *driver_data = watchdog_get_drvdata(wdt_dev); + struct da9052 *da9052 = driver_data->da9052; + int ret, i; + + /* + * Disable the Watchdog timer before setting + * new time out. + */ + ret = da9052_reg_update(da9052, DA9052_CONTROL_D_REG, + DA9052_CONTROLD_TWDSCALE, 0); + if (ret < 0) { + dev_err(da9052->dev, "Failed to disable watchdog bit, %d\n", + ret); + return ret; + } + if (timeout) { + /* + * To change the timeout, da9052 needs to + * be disabled for at least 150 us. + */ + udelay(150); + + /* Set the desired timeout */ + for (i = 0; i < ARRAY_SIZE(da9052_wdt_maps); i++) + if (da9052_wdt_maps[i].time == timeout) + break; + + if (i == ARRAY_SIZE(da9052_wdt_maps)) + ret = -EINVAL; + else + ret = da9052_reg_update(da9052, DA9052_CONTROL_D_REG, + DA9052_CONTROLD_TWDSCALE, + da9052_wdt_maps[i].reg_val); + if (ret < 0) { + dev_err(da9052->dev, + "Failed to update timescale bit, %d\n", ret); + return ret; + } + + wdt_dev->timeout = timeout; + driver_data->jpast = jiffies; + } + + return 0; +} + +static void da9052_wdt_ref(struct watchdog_device *wdt_dev) +{ + struct da9052_wdt_data *driver_data = watchdog_get_drvdata(wdt_dev); + + kref_get(&driver_data->kref); +} + +static void da9052_wdt_unref(struct watchdog_device *wdt_dev) +{ + struct da9052_wdt_data *driver_data = watchdog_get_drvdata(wdt_dev); + + kref_put(&driver_data->kref, da9052_wdt_release_resources); +} + +static int da9052_wdt_start(struct watchdog_device *wdt_dev) +{ + return da9052_wdt_set_timeout(wdt_dev, wdt_dev->timeout); +} + +static int da9052_wdt_stop(struct watchdog_device *wdt_dev) +{ + return da9052_wdt_set_timeout(wdt_dev, 0); +} + +static int da9052_wdt_ping(struct watchdog_device *wdt_dev) +{ + struct da9052_wdt_data *driver_data = watchdog_get_drvdata(wdt_dev); + struct da9052 *da9052 = driver_data->da9052; + unsigned long msec, jnow = jiffies; + int ret; + + /* + * We have a minimum time for watchdog window called TWDMIN. A write + * to the watchdog before this elapsed time should cause an error. + */ + msec = (jnow - driver_data->jpast) * 1000/HZ; + if (msec < DA9052_TWDMIN) + mdelay(msec); + + /* Reset the watchdog timer */ + ret = da9052_reg_update(da9052, DA9052_CONTROL_D_REG, + DA9052_CONTROLD_WATCHDOG, 1 << 7); + if (ret < 0) + goto err_strobe; + + /* + * FIXME: Reset the watchdog core, in general PMIC + * is supposed to do this + */ + ret = da9052_reg_update(da9052, DA9052_CONTROL_D_REG, + DA9052_CONTROLD_WATCHDOG, 0 << 7); +err_strobe: + return ret; +} + +static struct watchdog_info da9052_wdt_info = { + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, + .identity = "DA9052 Watchdog", +}; + +static const struct watchdog_ops da9052_wdt_ops = { + .owner = THIS_MODULE, + .start = da9052_wdt_start, + .stop = da9052_wdt_stop, + .ping = da9052_wdt_ping, + .set_timeout = da9052_wdt_set_timeout, + .ref = da9052_wdt_ref, + .unref = da9052_wdt_unref, +}; + + +static int __devinit da9052_wdt_probe(struct platform_device *pdev) +{ + struct da9052 *da9052 = dev_get_drvdata(pdev->dev.parent); + struct da9052_wdt_data *driver_data; + struct watchdog_device *da9052_wdt; + int ret; + + driver_data = devm_kzalloc(&pdev->dev, sizeof(*driver_data), + GFP_KERNEL); + if (!driver_data) { + dev_err(da9052->dev, "Unable to alloacate watchdog device\n"); + ret = -ENOMEM; + goto err; + } + driver_data->da9052 = da9052; + + da9052_wdt = &driver_data->wdt; + + da9052_wdt->timeout = DA9052_DEF_TIMEOUT; + da9052_wdt->info = &da9052_wdt_info; + da9052_wdt->ops = &da9052_wdt_ops; + watchdog_set_drvdata(da9052_wdt, driver_data); + + kref_init(&driver_data->kref); + + ret = da9052_reg_update(da9052, DA9052_CONTROL_D_REG, + DA9052_CONTROLD_TWDSCALE, 0); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to disable watchdog bits, %d\n", + ret); + goto err; + } + + ret = watchdog_register_device(&driver_data->wdt); + if (ret != 0) { + dev_err(da9052->dev, "watchdog_register_device() failed: %d\n", + ret); + goto err; + } + + dev_set_drvdata(&pdev->dev, driver_data); +err: + return ret; +} + +static int __devexit da9052_wdt_remove(struct platform_device *pdev) +{ + struct da9052_wdt_data *driver_data = dev_get_drvdata(&pdev->dev); + + watchdog_unregister_device(&driver_data->wdt); + kref_put(&driver_data->kref, da9052_wdt_release_resources); + + return 0; +} + +static struct platform_driver da9052_wdt_driver = { + .probe = da9052_wdt_probe, + .remove = __devexit_p(da9052_wdt_remove), + .driver = { + .name = "da9052-watchdog", + }, +}; + +module_platform_driver(da9052_wdt_driver); + +MODULE_AUTHOR("Anthony Olech <Anthony.Olech@diasemi.com>"); +MODULE_DESCRIPTION("DA9052 SM Device Driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:da9052-watchdog"); diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 741528b032e2..bc47e9012f37 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -575,7 +575,7 @@ static int __devinit iTCO_wdt_probe(struct platform_device *dev) if (!request_region(iTCO_wdt_private.smi_res->start, resource_size(iTCO_wdt_private.smi_res), dev->name)) { pr_err("I/O address 0x%04llx already in use, device disabled\n", - SMI_EN); + (u64)SMI_EN); ret = -EBUSY; goto unmap_gcs; } @@ -592,13 +592,13 @@ static int __devinit iTCO_wdt_probe(struct platform_device *dev) if (!request_region(iTCO_wdt_private.tco_res->start, resource_size(iTCO_wdt_private.tco_res), dev->name)) { pr_err("I/O address 0x%04llx already in use, device disabled\n", - TCOBASE); + (u64)TCOBASE); ret = -EBUSY; goto unreg_smi; } pr_info("Found a %s TCO device (Version=%d, TCOBASE=0x%04llx)\n", - ich_info->name, ich_info->iTCO_version, TCOBASE); + ich_info->name, ich_info->iTCO_version, (u64)TCOBASE); /* Clear out the (probably old) status */ outw(0x0008, TCO1_STS); /* Clear the Time Out Status bit */ diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c index bbb170e50055..afcd13676542 100644 --- a/drivers/watchdog/sp805_wdt.c +++ b/drivers/watchdog/sp805_wdt.c @@ -16,20 +16,17 @@ #include <linux/amba/bus.h> #include <linux/bitops.h> #include <linux/clk.h> -#include <linux/fs.h> #include <linux/init.h> #include <linux/io.h> #include <linux/ioport.h> #include <linux/kernel.h> #include <linux/math64.h> -#include <linux/miscdevice.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/pm.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/types.h> -#include <linux/uaccess.h> #include <linux/watchdog.h> /* default timeout in seconds */ @@ -56,6 +53,7 @@ /** * struct sp805_wdt: sp805 wdt device structure + * @wdd: instance of struct watchdog_device * @lock: spin lock protecting dev structure and io access * @base: base address of wdt * @clk: clock structure of wdt @@ -65,24 +63,24 @@ * @timeout: current programmed timeout */ struct sp805_wdt { + struct watchdog_device wdd; spinlock_t lock; void __iomem *base; struct clk *clk; struct amba_device *adev; - unsigned long status; - #define WDT_BUSY 0 - #define WDT_CAN_BE_CLOSED 1 unsigned int load_val; unsigned int timeout; }; -/* local variables */ -static struct sp805_wdt *wdt; static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, + "Set to 1 to keep watchdog running after device release"); /* This routine finds load value that will reset system in required timout */ -static void wdt_setload(unsigned int timeout) +static int wdt_setload(struct watchdog_device *wdd, unsigned int timeout) { + struct sp805_wdt *wdt = watchdog_get_drvdata(wdd); u64 load, rate; rate = clk_get_rate(wdt->clk); @@ -103,11 +101,14 @@ static void wdt_setload(unsigned int timeout) /* roundup timeout to closest positive integer value */ wdt->timeout = div_u64((load + 1) * 2 + (rate / 2), rate); spin_unlock(&wdt->lock); + + return 0; } /* returns number of seconds left for reset to occur */ -static u32 wdt_timeleft(void) +static unsigned int wdt_timeleft(struct watchdog_device *wdd) { + struct sp805_wdt *wdt = watchdog_get_drvdata(wdd); u64 load, rate; rate = clk_get_rate(wdt->clk); @@ -123,166 +124,96 @@ static u32 wdt_timeleft(void) return div_u64(load, rate); } -/* enables watchdog timers reset */ -static void wdt_enable(void) +static int wdt_config(struct watchdog_device *wdd, bool ping) { - spin_lock(&wdt->lock); + struct sp805_wdt *wdt = watchdog_get_drvdata(wdd); + int ret; - writel_relaxed(UNLOCK, wdt->base + WDTLOCK); - writel_relaxed(wdt->load_val, wdt->base + WDTLOAD); - writel_relaxed(INT_MASK, wdt->base + WDTINTCLR); - writel_relaxed(INT_ENABLE | RESET_ENABLE, wdt->base + WDTCONTROL); - writel_relaxed(LOCK, wdt->base + WDTLOCK); + if (!ping) { + ret = clk_prepare(wdt->clk); + if (ret) { + dev_err(&wdt->adev->dev, "clock prepare fail"); + return ret; + } - /* Flush posted writes. */ - readl_relaxed(wdt->base + WDTLOCK); - spin_unlock(&wdt->lock); -} + ret = clk_enable(wdt->clk); + if (ret) { + dev_err(&wdt->adev->dev, "clock enable fail"); + clk_unprepare(wdt->clk); + return ret; + } + } -/* disables watchdog timers reset */ -static void wdt_disable(void) -{ spin_lock(&wdt->lock); writel_relaxed(UNLOCK, wdt->base + WDTLOCK); - writel_relaxed(0, wdt->base + WDTCONTROL); + writel_relaxed(wdt->load_val, wdt->base + WDTLOAD); + + if (!ping) { + writel_relaxed(INT_MASK, wdt->base + WDTINTCLR); + writel_relaxed(INT_ENABLE | RESET_ENABLE, wdt->base + + WDTCONTROL); + } + writel_relaxed(LOCK, wdt->base + WDTLOCK); /* Flush posted writes. */ readl_relaxed(wdt->base + WDTLOCK); spin_unlock(&wdt->lock); + + return 0; } -static ssize_t sp805_wdt_write(struct file *file, const char *data, - size_t len, loff_t *ppos) +static int wdt_ping(struct watchdog_device *wdd) { - if (len) { - if (!nowayout) { - size_t i; - - clear_bit(WDT_CAN_BE_CLOSED, &wdt->status); - - for (i = 0; i != len; i++) { - char c; - - if (get_user(c, data + i)) - return -EFAULT; - /* Check for Magic Close character */ - if (c == 'V') { - set_bit(WDT_CAN_BE_CLOSED, - &wdt->status); - break; - } - } - } - wdt_enable(); - } - return len; + return wdt_config(wdd, true); } -static const struct watchdog_info ident = { - .options = WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, - .identity = MODULE_NAME, -}; - -static long sp805_wdt_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) +/* enables watchdog timers reset */ +static int wdt_enable(struct watchdog_device *wdd) { - int ret = -ENOTTY; - unsigned int timeout; - - switch (cmd) { - case WDIOC_GETSUPPORT: - ret = copy_to_user((struct watchdog_info *)arg, &ident, - sizeof(ident)) ? -EFAULT : 0; - break; - - case WDIOC_GETSTATUS: - ret = put_user(0, (int *)arg); - break; - - case WDIOC_KEEPALIVE: - wdt_enable(); - ret = 0; - break; - - case WDIOC_SETTIMEOUT: - ret = get_user(timeout, (unsigned int *)arg); - if (ret) - break; - - wdt_setload(timeout); - - wdt_enable(); - /* Fall through */ - - case WDIOC_GETTIMEOUT: - ret = put_user(wdt->timeout, (unsigned int *)arg); - break; - case WDIOC_GETTIMELEFT: - ret = put_user(wdt_timeleft(), (unsigned int *)arg); - break; - } - return ret; + return wdt_config(wdd, false); } -static int sp805_wdt_open(struct inode *inode, struct file *file) +/* disables watchdog timers reset */ +static int wdt_disable(struct watchdog_device *wdd) { - int ret = 0; - - if (test_and_set_bit(WDT_BUSY, &wdt->status)) - return -EBUSY; - - ret = clk_enable(wdt->clk); - if (ret) { - dev_err(&wdt->adev->dev, "clock enable fail"); - goto err; - } - - wdt_enable(); + struct sp805_wdt *wdt = watchdog_get_drvdata(wdd); - /* can not be closed, once enabled */ - clear_bit(WDT_CAN_BE_CLOSED, &wdt->status); - return nonseekable_open(inode, file); + spin_lock(&wdt->lock); -err: - clear_bit(WDT_BUSY, &wdt->status); - return ret; -} + writel_relaxed(UNLOCK, wdt->base + WDTLOCK); + writel_relaxed(0, wdt->base + WDTCONTROL); + writel_relaxed(LOCK, wdt->base + WDTLOCK); -static int sp805_wdt_release(struct inode *inode, struct file *file) -{ - if (!test_bit(WDT_CAN_BE_CLOSED, &wdt->status)) { - clear_bit(WDT_BUSY, &wdt->status); - dev_warn(&wdt->adev->dev, "Device closed unexpectedly\n"); - return 0; - } + /* Flush posted writes. */ + readl_relaxed(wdt->base + WDTLOCK); + spin_unlock(&wdt->lock); - wdt_disable(); clk_disable(wdt->clk); - clear_bit(WDT_BUSY, &wdt->status); + clk_unprepare(wdt->clk); return 0; } -static const struct file_operations sp805_wdt_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .write = sp805_wdt_write, - .unlocked_ioctl = sp805_wdt_ioctl, - .open = sp805_wdt_open, - .release = sp805_wdt_release, +static const struct watchdog_info wdt_info = { + .options = WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, + .identity = MODULE_NAME, }; -static struct miscdevice sp805_wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &sp805_wdt_fops, +static const struct watchdog_ops wdt_ops = { + .owner = THIS_MODULE, + .start = wdt_enable, + .stop = wdt_disable, + .ping = wdt_ping, + .set_timeout = wdt_setload, + .get_timeleft = wdt_timeleft, }; static int __devinit sp805_wdt_probe(struct amba_device *adev, const struct amba_id *id) { + struct sp805_wdt *wdt; int ret = 0; if (!devm_request_mem_region(&adev->dev, adev->res.start, @@ -315,19 +246,26 @@ sp805_wdt_probe(struct amba_device *adev, const struct amba_id *id) } wdt->adev = adev; + wdt->wdd.info = &wdt_info; + wdt->wdd.ops = &wdt_ops; + spin_lock_init(&wdt->lock); - wdt_setload(DEFAULT_TIMEOUT); + watchdog_set_nowayout(&wdt->wdd, nowayout); + watchdog_set_drvdata(&wdt->wdd, wdt); + wdt_setload(&wdt->wdd, DEFAULT_TIMEOUT); - ret = misc_register(&sp805_wdt_miscdev); - if (ret < 0) { - dev_warn(&adev->dev, "cannot register misc device\n"); - goto err_misc_register; + ret = watchdog_register_device(&wdt->wdd); + if (ret) { + dev_err(&adev->dev, "watchdog_register_device() failed: %d\n", + ret); + goto err_register; } + amba_set_drvdata(adev, wdt); dev_info(&adev->dev, "registration successful\n"); return 0; -err_misc_register: +err_register: clk_put(wdt->clk); err: dev_err(&adev->dev, "Probe Failed!!!\n"); @@ -336,7 +274,11 @@ err: static int __devexit sp805_wdt_remove(struct amba_device *adev) { - misc_deregister(&sp805_wdt_miscdev); + struct sp805_wdt *wdt = amba_get_drvdata(adev); + + watchdog_unregister_device(&wdt->wdd); + amba_set_drvdata(adev, NULL); + watchdog_set_drvdata(&wdt->wdd, NULL); clk_put(wdt->clk); return 0; @@ -345,28 +287,22 @@ static int __devexit sp805_wdt_remove(struct amba_device *adev) #ifdef CONFIG_PM static int sp805_wdt_suspend(struct device *dev) { - if (test_bit(WDT_BUSY, &wdt->status)) { - wdt_disable(); - clk_disable(wdt->clk); - } + struct sp805_wdt *wdt = dev_get_drvdata(dev); + + if (watchdog_active(&wdt->wdd)) + return wdt_disable(&wdt->wdd); return 0; } static int sp805_wdt_resume(struct device *dev) { - int ret = 0; + struct sp805_wdt *wdt = dev_get_drvdata(dev); - if (test_bit(WDT_BUSY, &wdt->status)) { - ret = clk_enable(wdt->clk); - if (ret) { - dev_err(dev, "clock enable fail"); - return ret; - } - wdt_enable(); - } + if (watchdog_active(&wdt->wdd)) + return wdt_enable(&wdt->wdd); - return ret; + return 0; } #endif /* CONFIG_PM */ @@ -395,11 +331,6 @@ static struct amba_driver sp805_wdt_driver = { module_amba_driver(sp805_wdt_driver); -module_param(nowayout, bool, 0); -MODULE_PARM_DESC(nowayout, - "Set to 1 to keep watchdog running after device release"); - MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>"); MODULE_DESCRIPTION("ARM SP805 Watchdog Driver"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/via_wdt.c b/drivers/watchdog/via_wdt.c index 5603e31afdab..aa50da3ccfe3 100644 --- a/drivers/watchdog/via_wdt.c +++ b/drivers/watchdog/via_wdt.c @@ -91,7 +91,7 @@ static inline void wdt_reset(void) static void wdt_timer_tick(unsigned long data) { if (time_before(jiffies, next_heartbeat) || - (!test_bit(WDOG_ACTIVE, &wdt_dev.status))) { + (!watchdog_active(&wdt_dev))) { wdt_reset(); mod_timer(&timer, jiffies + WDT_HEARTBEAT); } else diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 14d768bfa267..6aa46a90ff02 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -34,8 +34,13 @@ #include <linux/kernel.h> /* For printk/panic/... */ #include <linux/watchdog.h> /* For watchdog specific items */ #include <linux/init.h> /* For __init/__exit/... */ +#include <linux/idr.h> /* For ida_* macros */ +#include <linux/err.h> /* For IS_ERR macros */ -#include "watchdog_dev.h" /* For watchdog_dev_register/... */ +#include "watchdog_core.h" /* For watchdog_dev_register/... */ + +static DEFINE_IDA(watchdog_ida); +static struct class *watchdog_class; /** * watchdog_register_device() - register a watchdog device @@ -49,7 +54,7 @@ */ int watchdog_register_device(struct watchdog_device *wdd) { - int ret; + int ret, id, devno; if (wdd == NULL || wdd->info == NULL || wdd->ops == NULL) return -EINVAL; @@ -74,10 +79,38 @@ int watchdog_register_device(struct watchdog_device *wdd) * corrupted in a later stage then we expect a kernel panic! */ - /* We only support 1 watchdog device via the /dev/watchdog interface */ + mutex_init(&wdd->lock); + id = ida_simple_get(&watchdog_ida, 0, MAX_DOGS, GFP_KERNEL); + if (id < 0) + return id; + wdd->id = id; + ret = watchdog_dev_register(wdd); if (ret) { - pr_err("error registering /dev/watchdog (err=%d)\n", ret); + ida_simple_remove(&watchdog_ida, id); + if (!(id == 0 && ret == -EBUSY)) + return ret; + + /* Retry in case a legacy watchdog module exists */ + id = ida_simple_get(&watchdog_ida, 1, MAX_DOGS, GFP_KERNEL); + if (id < 0) + return id; + wdd->id = id; + + ret = watchdog_dev_register(wdd); + if (ret) { + ida_simple_remove(&watchdog_ida, id); + return ret; + } + } + + devno = wdd->cdev.dev; + wdd->dev = device_create(watchdog_class, wdd->parent, devno, + NULL, "watchdog%d", wdd->id); + if (IS_ERR(wdd->dev)) { + watchdog_dev_unregister(wdd); + ida_simple_remove(&watchdog_ida, id); + ret = PTR_ERR(wdd->dev); return ret; } @@ -95,6 +128,7 @@ EXPORT_SYMBOL_GPL(watchdog_register_device); void watchdog_unregister_device(struct watchdog_device *wdd) { int ret; + int devno = wdd->cdev.dev; if (wdd == NULL) return; @@ -102,9 +136,41 @@ void watchdog_unregister_device(struct watchdog_device *wdd) ret = watchdog_dev_unregister(wdd); if (ret) pr_err("error unregistering /dev/watchdog (err=%d)\n", ret); + device_destroy(watchdog_class, devno); + ida_simple_remove(&watchdog_ida, wdd->id); + wdd->dev = NULL; } EXPORT_SYMBOL_GPL(watchdog_unregister_device); +static int __init watchdog_init(void) +{ + int err; + + watchdog_class = class_create(THIS_MODULE, "watchdog"); + if (IS_ERR(watchdog_class)) { + pr_err("couldn't create class\n"); + return PTR_ERR(watchdog_class); + } + + err = watchdog_dev_init(); + if (err < 0) { + class_destroy(watchdog_class); + return err; + } + + return 0; +} + +static void __exit watchdog_exit(void) +{ + watchdog_dev_exit(); + class_destroy(watchdog_class); + ida_destroy(&watchdog_ida); +} + +subsys_initcall(watchdog_init); +module_exit(watchdog_exit); + MODULE_AUTHOR("Alan Cox <alan@lxorguk.ukuu.org.uk>"); MODULE_AUTHOR("Wim Van Sebroeck <wim@iguana.be>"); MODULE_DESCRIPTION("WatchDog Timer Driver Core"); diff --git a/drivers/watchdog/watchdog_dev.h b/drivers/watchdog/watchdog_core.h index bc7612be25ce..6c951418fca7 100644 --- a/drivers/watchdog/watchdog_dev.h +++ b/drivers/watchdog/watchdog_core.h @@ -26,8 +26,12 @@ * This material is provided "AS-IS" and at no charge. */ +#define MAX_DOGS 32 /* Maximum number of watchdog devices */ + /* * Functions/procedures to be called by the core */ -int watchdog_dev_register(struct watchdog_device *); -int watchdog_dev_unregister(struct watchdog_device *); +extern int watchdog_dev_register(struct watchdog_device *); +extern int watchdog_dev_unregister(struct watchdog_device *); +extern int __init watchdog_dev_init(void); +extern void __exit watchdog_dev_exit(void); diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index 8558da912c42..672d169bf1da 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -42,10 +42,12 @@ #include <linux/init.h> /* For __init/__exit/... */ #include <linux/uaccess.h> /* For copy_to_user/put_user/... */ -/* make sure we only register one /dev/watchdog device */ -static unsigned long watchdog_dev_busy; +#include "watchdog_core.h" + +/* the dev_t structure to store the dynamically allocated watchdog devices */ +static dev_t watchdog_devt; /* the watchdog device behind /dev/watchdog */ -static struct watchdog_device *wdd; +static struct watchdog_device *old_wdd; /* * watchdog_ping: ping the watchdog. @@ -59,13 +61,26 @@ static struct watchdog_device *wdd; static int watchdog_ping(struct watchdog_device *wddev) { - if (test_bit(WDOG_ACTIVE, &wddev->status)) { - if (wddev->ops->ping) - return wddev->ops->ping(wddev); /* ping the watchdog */ - else - return wddev->ops->start(wddev); /* restart watchdog */ + int err = 0; + + mutex_lock(&wddev->lock); + + if (test_bit(WDOG_UNREGISTERED, &wddev->status)) { + err = -ENODEV; + goto out_ping; } - return 0; + + if (!watchdog_active(wddev)) + goto out_ping; + + if (wddev->ops->ping) + err = wddev->ops->ping(wddev); /* ping the watchdog */ + else + err = wddev->ops->start(wddev); /* restart watchdog */ + +out_ping: + mutex_unlock(&wddev->lock); + return err; } /* @@ -79,16 +94,25 @@ static int watchdog_ping(struct watchdog_device *wddev) static int watchdog_start(struct watchdog_device *wddev) { - int err; + int err = 0; - if (!test_bit(WDOG_ACTIVE, &wddev->status)) { - err = wddev->ops->start(wddev); - if (err < 0) - return err; + mutex_lock(&wddev->lock); - set_bit(WDOG_ACTIVE, &wddev->status); + if (test_bit(WDOG_UNREGISTERED, &wddev->status)) { + err = -ENODEV; + goto out_start; } - return 0; + + if (watchdog_active(wddev)) + goto out_start; + + err = wddev->ops->start(wddev); + if (err == 0) + set_bit(WDOG_ACTIVE, &wddev->status); + +out_start: + mutex_unlock(&wddev->lock); + return err; } /* @@ -103,22 +127,155 @@ static int watchdog_start(struct watchdog_device *wddev) static int watchdog_stop(struct watchdog_device *wddev) { - int err = -EBUSY; + int err = 0; - if (test_bit(WDOG_NO_WAY_OUT, &wddev->status)) { - pr_info("%s: nowayout prevents watchdog to be stopped!\n", - wddev->info->identity); - return err; + mutex_lock(&wddev->lock); + + if (test_bit(WDOG_UNREGISTERED, &wddev->status)) { + err = -ENODEV; + goto out_stop; } - if (test_bit(WDOG_ACTIVE, &wddev->status)) { - err = wddev->ops->stop(wddev); - if (err < 0) - return err; + if (!watchdog_active(wddev)) + goto out_stop; + if (test_bit(WDOG_NO_WAY_OUT, &wddev->status)) { + dev_info(wddev->dev, "nowayout prevents watchdog being stopped!\n"); + err = -EBUSY; + goto out_stop; + } + + err = wddev->ops->stop(wddev); + if (err == 0) clear_bit(WDOG_ACTIVE, &wddev->status); + +out_stop: + mutex_unlock(&wddev->lock); + return err; +} + +/* + * watchdog_get_status: wrapper to get the watchdog status + * @wddev: the watchdog device to get the status from + * @status: the status of the watchdog device + * + * Get the watchdog's status flags. + */ + +static int watchdog_get_status(struct watchdog_device *wddev, + unsigned int *status) +{ + int err = 0; + + *status = 0; + if (!wddev->ops->status) + return -EOPNOTSUPP; + + mutex_lock(&wddev->lock); + + if (test_bit(WDOG_UNREGISTERED, &wddev->status)) { + err = -ENODEV; + goto out_status; } - return 0; + + *status = wddev->ops->status(wddev); + +out_status: + mutex_unlock(&wddev->lock); + return err; +} + +/* + * watchdog_set_timeout: set the watchdog timer timeout + * @wddev: the watchdog device to set the timeout for + * @timeout: timeout to set in seconds + */ + +static int watchdog_set_timeout(struct watchdog_device *wddev, + unsigned int timeout) +{ + int err; + + if ((wddev->ops->set_timeout == NULL) || + !(wddev->info->options & WDIOF_SETTIMEOUT)) + return -EOPNOTSUPP; + + if ((wddev->max_timeout != 0) && + (timeout < wddev->min_timeout || timeout > wddev->max_timeout)) + return -EINVAL; + + mutex_lock(&wddev->lock); + + if (test_bit(WDOG_UNREGISTERED, &wddev->status)) { + err = -ENODEV; + goto out_timeout; + } + + err = wddev->ops->set_timeout(wddev, timeout); + +out_timeout: + mutex_unlock(&wddev->lock); + return err; +} + +/* + * watchdog_get_timeleft: wrapper to get the time left before a reboot + * @wddev: the watchdog device to get the remaining time from + * @timeleft: the time that's left + * + * Get the time before a watchdog will reboot (if not pinged). + */ + +static int watchdog_get_timeleft(struct watchdog_device *wddev, + unsigned int *timeleft) +{ + int err = 0; + + *timeleft = 0; + if (!wddev->ops->get_timeleft) + return -EOPNOTSUPP; + + mutex_lock(&wddev->lock); + + if (test_bit(WDOG_UNREGISTERED, &wddev->status)) { + err = -ENODEV; + goto out_timeleft; + } + + *timeleft = wddev->ops->get_timeleft(wddev); + +out_timeleft: + mutex_unlock(&wddev->lock); + return err; +} + +/* + * watchdog_ioctl_op: call the watchdog drivers ioctl op if defined + * @wddev: the watchdog device to do the ioctl on + * @cmd: watchdog command + * @arg: argument pointer + */ + +static int watchdog_ioctl_op(struct watchdog_device *wddev, unsigned int cmd, + unsigned long arg) +{ + int err; + + if (!wddev->ops->ioctl) + return -ENOIOCTLCMD; + + mutex_lock(&wddev->lock); + + if (test_bit(WDOG_UNREGISTERED, &wddev->status)) { + err = -ENODEV; + goto out_ioctl; + } + + err = wddev->ops->ioctl(wddev, cmd, arg); + +out_ioctl: + mutex_unlock(&wddev->lock); + return err; } /* @@ -136,6 +293,7 @@ static int watchdog_stop(struct watchdog_device *wddev) static ssize_t watchdog_write(struct file *file, const char __user *data, size_t len, loff_t *ppos) { + struct watchdog_device *wdd = file->private_data; size_t i; char c; @@ -175,23 +333,24 @@ static ssize_t watchdog_write(struct file *file, const char __user *data, static long watchdog_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + struct watchdog_device *wdd = file->private_data; void __user *argp = (void __user *)arg; int __user *p = argp; unsigned int val; int err; - if (wdd->ops->ioctl) { - err = wdd->ops->ioctl(wdd, cmd, arg); - if (err != -ENOIOCTLCMD) - return err; - } + err = watchdog_ioctl_op(wdd, cmd, arg); + if (err != -ENOIOCTLCMD) + return err; switch (cmd) { case WDIOC_GETSUPPORT: return copy_to_user(argp, wdd->info, sizeof(struct watchdog_info)) ? -EFAULT : 0; case WDIOC_GETSTATUS: - val = wdd->ops->status ? wdd->ops->status(wdd) : 0; + err = watchdog_get_status(wdd, &val); + if (err) + return err; return put_user(val, p); case WDIOC_GETBOOTSTATUS: return put_user(wdd->bootstatus, p); @@ -215,15 +374,9 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, watchdog_ping(wdd); return 0; case WDIOC_SETTIMEOUT: - if ((wdd->ops->set_timeout == NULL) || - !(wdd->info->options & WDIOF_SETTIMEOUT)) - return -EOPNOTSUPP; if (get_user(val, p)) return -EFAULT; - if ((wdd->max_timeout != 0) && - (val < wdd->min_timeout || val > wdd->max_timeout)) - return -EINVAL; - err = wdd->ops->set_timeout(wdd, val); + err = watchdog_set_timeout(wdd, val); if (err < 0) return err; /* If the watchdog is active then we send a keepalive ping @@ -237,21 +390,21 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, return -EOPNOTSUPP; return put_user(wdd->timeout, p); case WDIOC_GETTIMELEFT: - if (!wdd->ops->get_timeleft) - return -EOPNOTSUPP; - - return put_user(wdd->ops->get_timeleft(wdd), p); + err = watchdog_get_timeleft(wdd, &val); + if (err) + return err; + return put_user(val, p); default: return -ENOTTY; } } /* - * watchdog_open: open the /dev/watchdog device. + * watchdog_open: open the /dev/watchdog* devices. * @inode: inode of device * @file: file handle to device * - * When the /dev/watchdog device gets opened, we start the watchdog. + * When the /dev/watchdog* device gets opened, we start the watchdog. * Watch out: the /dev/watchdog device is single open, so we make sure * it can only be opened once. */ @@ -259,6 +412,13 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, static int watchdog_open(struct inode *inode, struct file *file) { int err = -EBUSY; + struct watchdog_device *wdd; + + /* Get the corresponding watchdog device */ + if (imajor(inode) == MISC_MAJOR) + wdd = old_wdd; + else + wdd = container_of(inode->i_cdev, struct watchdog_device, cdev); /* the watchdog is single open! */ if (test_and_set_bit(WDOG_DEV_OPEN, &wdd->status)) @@ -275,6 +435,11 @@ static int watchdog_open(struct inode *inode, struct file *file) if (err < 0) goto out_mod; + file->private_data = wdd; + + if (wdd->ops->ref) + wdd->ops->ref(wdd); + /* dev/watchdog is a virtual (and thus non-seekable) filesystem */ return nonseekable_open(inode, file); @@ -286,9 +451,9 @@ out: } /* - * watchdog_release: release the /dev/watchdog device. - * @inode: inode of device - * @file: file handle to device + * watchdog_release: release the watchdog device. + * @inode: inode of device + * @file: file handle to device * * This is the code for when /dev/watchdog gets closed. We will only * stop the watchdog when we have received the magic char (and nowayout @@ -297,6 +462,7 @@ out: static int watchdog_release(struct inode *inode, struct file *file) { + struct watchdog_device *wdd = file->private_data; int err = -EBUSY; /* @@ -310,7 +476,10 @@ static int watchdog_release(struct inode *inode, struct file *file) /* If the watchdog was not stopped, send a keepalive ping */ if (err < 0) { - pr_crit("%s: watchdog did not stop!\n", wdd->info->identity); + mutex_lock(&wdd->lock); + if (!test_bit(WDOG_UNREGISTERED, &wdd->status)) + dev_crit(wdd->dev, "watchdog did not stop!\n"); + mutex_unlock(&wdd->lock); watchdog_ping(wdd); } @@ -320,6 +489,10 @@ static int watchdog_release(struct inode *inode, struct file *file) /* make sure that /dev/watchdog can be re-opened */ clear_bit(WDOG_DEV_OPEN, &wdd->status); + /* Note wdd may be gone after this, do not use after this! */ + if (wdd->ops->unref) + wdd->ops->unref(wdd); + return 0; } @@ -338,62 +511,92 @@ static struct miscdevice watchdog_miscdev = { }; /* - * watchdog_dev_register: + * watchdog_dev_register: register a watchdog device * @watchdog: watchdog device * - * Register a watchdog device as /dev/watchdog. /dev/watchdog - * is actually a miscdevice and thus we set it up like that. + * Register a watchdog device including handling the legacy + * /dev/watchdog node. /dev/watchdog is actually a miscdevice and + * thus we set it up like that. */ int watchdog_dev_register(struct watchdog_device *watchdog) { - int err; - - /* Only one device can register for /dev/watchdog */ - if (test_and_set_bit(0, &watchdog_dev_busy)) { - pr_err("only one watchdog can use /dev/watchdog\n"); - return -EBUSY; + int err, devno; + + if (watchdog->id == 0) { + watchdog_miscdev.parent = watchdog->parent; + err = misc_register(&watchdog_miscdev); + if (err != 0) { + pr_err("%s: cannot register miscdev on minor=%d (err=%d).\n", + watchdog->info->identity, WATCHDOG_MINOR, err); + if (err == -EBUSY) + pr_err("%s: a legacy watchdog module is probably present.\n", + watchdog->info->identity); + return err; + } + old_wdd = watchdog; } - wdd = watchdog; - - err = misc_register(&watchdog_miscdev); - if (err != 0) { - pr_err("%s: cannot register miscdev on minor=%d (err=%d)\n", - watchdog->info->identity, WATCHDOG_MINOR, err); - goto out; + /* Fill in the data structures */ + devno = MKDEV(MAJOR(watchdog_devt), watchdog->id); + cdev_init(&watchdog->cdev, &watchdog_fops); + watchdog->cdev.owner = watchdog->ops->owner; + + /* Add the device */ + err = cdev_add(&watchdog->cdev, devno, 1); + if (err) { + pr_err("watchdog%d unable to add device %d:%d\n", + watchdog->id, MAJOR(watchdog_devt), watchdog->id); + if (watchdog->id == 0) { + misc_deregister(&watchdog_miscdev); + old_wdd = NULL; + } } - - return 0; - -out: - wdd = NULL; - clear_bit(0, &watchdog_dev_busy); return err; } /* - * watchdog_dev_unregister: + * watchdog_dev_unregister: unregister a watchdog device * @watchdog: watchdog device * - * Deregister the /dev/watchdog device. + * Unregister the watchdog and if needed the legacy /dev/watchdog device. */ int watchdog_dev_unregister(struct watchdog_device *watchdog) { - /* Check that a watchdog device was registered in the past */ - if (!test_bit(0, &watchdog_dev_busy) || !wdd) - return -ENODEV; - - /* We can only unregister the watchdog device that was registered */ - if (watchdog != wdd) { - pr_err("%s: watchdog was not registered as /dev/watchdog\n", - watchdog->info->identity); - return -ENODEV; + mutex_lock(&watchdog->lock); + set_bit(WDOG_UNREGISTERED, &watchdog->status); + mutex_unlock(&watchdog->lock); + + cdev_del(&watchdog->cdev); + if (watchdog->id == 0) { + misc_deregister(&watchdog_miscdev); + old_wdd = NULL; } - - misc_deregister(&watchdog_miscdev); - wdd = NULL; - clear_bit(0, &watchdog_dev_busy); return 0; } + +/* + * watchdog_dev_init: init dev part of watchdog core + * + * Allocate a range of chardev nodes to use for watchdog devices + */ + +int __init watchdog_dev_init(void) +{ + int err = alloc_chrdev_region(&watchdog_devt, 0, MAX_DOGS, "watchdog"); + if (err < 0) + pr_err("watchdog: unable to allocate char dev region\n"); + return err; +} + +/* + * watchdog_dev_exit: exit dev part of watchdog core + * + * Release the range of chardev nodes used for watchdog devices + */ + +void __exit watchdog_dev_exit(void) +{ + unregister_chrdev_region(watchdog_devt, MAX_DOGS); +} |