From 28c455ceb2d9fc276839ebeae68115f850b25c6d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 9 Jan 2011 17:52:09 +0100 Subject: [PATCH 0001/5831] drbd: Get rid of req_validator_fn typedef Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 43beaca53179..d499aa6b7ac0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4270,11 +4270,9 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, return NULL; } -typedef struct drbd_request *(req_validator_fn) - (struct drbd_conf *mdev, u64 id, sector_t sector); - static int validate_req_change_req_state(struct drbd_conf *mdev, - u64 id, sector_t sector, req_validator_fn validator, + u64 id, sector_t sector, + struct drbd_request *(*validator)(struct drbd_conf *, u64, sector_t), const char *func, enum drbd_req_event what) { struct drbd_request *req; From 3980485361f5f71e559c6b8868bb5a1c41171407 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Jan 2011 12:25:21 +0100 Subject: [PATCH 0002/5831] drbd: Remove superfluous declaration Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ef2ceed3be4b..88b247eac342 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -689,7 +689,6 @@ struct drbd_work { drbd_work_cb cb; }; -struct drbd_tl_epoch; struct drbd_request { struct drbd_work w; struct drbd_conf *mdev; From 9a8e77530fa7059044114bcf1a897a470ec21bc9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Jan 2011 14:04:09 +0100 Subject: [PATCH 0003/5831] drbd: Consistently use block_id == ID_SYNCER for checksum based resync and online verify DRBD_MAGIC has nothing to do with block ids and the funny values computed were not actually used, anyway. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 4 ++++ drivers/block/drbd/drbd_worker.c | 4 +--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0358e55356c8..0c16620ecec5 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2474,7 +2474,7 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, struct p_block_req p; p.sector = cpu_to_be64(sector); - p.block_id = BE_DRBD_MAGIC + 0xbeef; + p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); p.head.magic = BE_DRBD_MAGIC; @@ -2497,7 +2497,7 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) struct p_block_req p; p.sector = cpu_to_be64(sector); - p.block_id = BE_DRBD_MAGIC + 0xbabe; + p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d499aa6b7ac0..5ed96198538b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -341,6 +341,10 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, e->size = data_size; e->flags = 0; e->sector = sector; + /* + * The block_id is opaque to the receiver. It is not endianness + * converted, and sent back to the sender unchanged. + */ e->block_id = id; return e; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 4d3e6f6213ba..10438c41f559 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -305,8 +305,6 @@ int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) void *digest; int ok = 1; - D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef); - if (unlikely(cancel)) goto out; @@ -359,7 +357,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) /* GFP_TRY, because if there is no memory available right now, this may * be rescheduled for later. It is "only" background resync, after all. */ - e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY); + e = drbd_alloc_ee(mdev, ID_SYNCER /* unused */, sector, size, GFP_TRY); if (!e) goto defer; From ca9bc12b90fbc4e2b1f81360f63842c9da54bb3c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Jan 2011 13:47:24 +0100 Subject: [PATCH 0004/5831] drbd: Get rid of BE_DRBD_MAGIC and BE_DRBD_MAGIC_BIG Converting the constants happens at compile time. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 14 +++++++------- drivers/block/drbd/drbd_receiver.c | 8 ++++---- include/linux/drbd.h | 2 -- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0c16620ecec5..2cd132a91b83 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1841,7 +1841,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, ERR_IF(!h) return false; ERR_IF(!size) return false; - h->magic = BE_DRBD_MAGIC; + h->magic = cpu_to_be32(DRBD_MAGIC); h->command = cpu_to_be16(cmd); h->length = cpu_to_be16(size-sizeof(struct p_header80)); @@ -1889,7 +1889,7 @@ int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, struct p_header80 h; int ok; - h.magic = BE_DRBD_MAGIC; + h.magic = cpu_to_be32(DRBD_MAGIC); h.command = cpu_to_be16(cmd); h.length = cpu_to_be16(size); @@ -2477,7 +2477,7 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - p.head.magic = BE_DRBD_MAGIC; + p.head.magic = cpu_to_be32(DRBD_MAGIC); p.head.command = cpu_to_be16(cmd); p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); @@ -2682,12 +2682,12 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; if (req->size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = BE_DRBD_MAGIC; + p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); p.head.h80.command = cpu_to_be16(P_DATA); p.head.h80.length = cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size); } else { - p.head.h95.magic = BE_DRBD_MAGIC_BIG; + p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); p.head.h95.command = cpu_to_be16(P_DATA); p.head.h95.length = cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size); @@ -2767,12 +2767,12 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; if (e->size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = BE_DRBD_MAGIC; + p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); p.head.h80.command = cpu_to_be16(cmd); p.head.h80.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size); } else { - p.head.h95.magic = BE_DRBD_MAGIC_BIG; + p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); p.head.h95.command = cpu_to_be16(cmd); p.head.h95.length = cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5ed96198538b..69eec6980c29 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -712,7 +712,7 @@ static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *soc rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); - if (rr == sizeof(*h) && h->magic == BE_DRBD_MAGIC) + if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC)) return be16_to_cpu(h->command); return 0xffff; @@ -935,10 +935,10 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi return false; } - if (likely(h->h80.magic == BE_DRBD_MAGIC)) { + if (likely(h->h80.magic == cpu_to_be32(DRBD_MAGIC))) { *cmd = be16_to_cpu(h->h80.command); *packet_size = be16_to_cpu(h->h80.length); - } else if (h->h95.magic == BE_DRBD_MAGIC_BIG) { + } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) { *cmd = be16_to_cpu(h->h95.command); *packet_size = be32_to_cpu(h->h95.length); } else { @@ -4623,7 +4623,7 @@ int drbd_asender(struct drbd_thread *thi) } if (received == expect && cmd == NULL) { - if (unlikely(h->magic != BE_DRBD_MAGIC)) { + if (unlikely(h->magic != cpu_to_be32(DRBD_MAGIC))) { dev_err(DEV, "magic?? on meta m: 0x%08x c: %d l: %d\n", be32_to_cpu(h->magic), be16_to_cpu(h->command), diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 9e5f5607eba3..d28202811672 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -334,9 +334,7 @@ enum drbd_timeout_flag { #define UUID_JUST_CREATED ((__u64)4) #define DRBD_MAGIC 0x83740267 -#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC) #define DRBD_MAGIC_BIG 0x835a -#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG) /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 From e7fad8af750c5780143e4b6876f80042ec0c21f5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Jan 2011 13:54:02 +0100 Subject: [PATCH 0005/5831] drbd: Endianness convert the constants instead of the variables Converting the constants happens at compile time. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 +- drivers/block/drbd/drbd_main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index cf0e63dd97da..0eb17d3adf21 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -407,7 +407,7 @@ static int drbd_al_read_tr(struct drbd_conf *mdev, if (!drbd_md_sync_page_io(mdev, bdev, sector, READ)) return -1; - rv = (be32_to_cpu(b->magic) == DRBD_MAGIC); + rv = (b->magic == cpu_to_be32(DRBD_MAGIC)); for (i = 0; i < AL_EXTENTS_PT + 1; i++) xor_sum ^= be32_to_cpu(b->updates[i].extent); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2cd132a91b83..f65b8c532248 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3729,7 +3729,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) goto err; } - if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) { + if (buffer->magic != cpu_to_be32(DRBD_MD_MAGIC)) { dev_err(DEV, "Error while reading metadata, magic not found.\n"); rv = ERR_MD_INVALID; goto err; From 579b57ed730819970a3542b4bbcc2d4176f25c72 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 13 Jan 2011 18:40:57 +0100 Subject: [PATCH 0006/5831] drbd: Magic reserved block_id value cleanup The ID_VACANT definition has become entirely irrelevant by now. The is_syncer_block_id() macro does not improve the code, so eliminated it. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 9 +-------- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 14 ++++---------- 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 88b247eac342..c1d175514aa8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -87,17 +87,10 @@ extern char usermode_helper[]; */ #define DRBD_SIGKILL SIGHUP -/* All EEs on the free list should have ID_VACANT (== 0) - * freshly allocated EEs get !ID_VACANT (== 1) - * so if it says "cannot dereference null pointer at address 0x00000001", - * it is most likely one of these :( */ - #define ID_IN_SYNC (4711ULL) #define ID_OUT_OF_SYNC (4712ULL) - #define ID_SYNCER (-1ULL) -#define ID_VACANT 0 -#define is_syncer_block_id(id) ((id) == ID_SYNCER) + #define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL) struct drbd_conf; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 69eec6980c29..efe141eb521b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4308,7 +4308,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) update_peer_seq(mdev, be32_to_cpu(p->seq_num)); - if (is_syncer_block_id(p->block_id)) { + if (p->block_id == ID_SYNCER) { drbd_set_in_sync(mdev, sector, blksize); dec_rs_pending(mdev); return true; @@ -4349,7 +4349,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) update_peer_seq(mdev, be32_to_cpu(p->seq_num)); - if (is_syncer_block_id(p->block_id)) { + if (p->block_id == ID_SYNCER) { dec_rs_pending(mdev); drbd_rs_failed_io(mdev, sector, size); return true; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 10438c41f559..43a9fefd29b8 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -85,8 +85,6 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) unsigned long flags = 0; struct drbd_conf *mdev = e->mdev; - D_ASSERT(e->block_id != ID_VACANT); - spin_lock_irqsave(&mdev->req_lock, flags); mdev->read_cnt += e->size >> 9; list_del(&e->w.list); @@ -108,18 +106,16 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo struct drbd_conf *mdev = e->mdev; sector_t e_sector; int do_wake; - int is_syncer_req; + u64 block_id; int do_al_complete_io; - D_ASSERT(e->block_id != ID_VACANT); - /* after we moved e to done_ee, * we may no longer access it, * it may be freed/reused already! * (as soon as we release the req_lock) */ e_sector = e->sector; do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; - is_syncer_req = is_syncer_block_id(e->block_id); + block_id = e->block_id; spin_lock_irqsave(&mdev->req_lock, flags); mdev->writ_cnt += e->size >> 9; @@ -131,15 +127,13 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo * done from "drbd_process_done_ee" within the appropriate w.cb * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */ - do_wake = is_syncer_req - ? list_empty(&mdev->sync_ee) - : list_empty(&mdev->active_ee); + do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); if (test_bit(__EE_WAS_ERROR, &e->flags)) __drbd_chk_io_error(mdev, false); spin_unlock_irqrestore(&mdev->req_lock, flags); - if (is_syncer_req) + if (block_id == ID_SYNCER) drbd_rs_complete_io(mdev, e_sector); if (do_wake) From d628769b3c6b4ddafea358944ef1f106fccfaaff Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 13 Jan 2011 23:05:39 +0100 Subject: [PATCH 0007/5831] drbd: Move drbd_free_tl_hash() to drbd_main() This is the only place where this function is used. Make it static. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 30 ++++++++++++++++++++++++++++++ drivers/block/drbd/drbd_receiver.c | 30 ------------------------------ 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c1d175514aa8..c6d8200b4b5d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1592,7 +1592,6 @@ extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); extern void drbd_flush_workqueue(struct drbd_conf *mdev); -extern void drbd_free_tl_hash(struct drbd_conf *mdev); /* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f65b8c532248..eecbfc8f8978 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -228,6 +228,36 @@ static void tl_cleanup(struct drbd_conf *mdev) mdev->tl_hash_s = 0; } +static void drbd_free_tl_hash(struct drbd_conf *mdev) +{ + struct hlist_head *h; + + spin_lock_irq(&mdev->req_lock); + + if (!mdev->tl_hash || mdev->state.conn != C_STANDALONE) { + spin_unlock_irq(&mdev->req_lock); + return; + } + /* paranoia code */ + for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++) + if (h->first) + dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n", + (int)(h - mdev->ee_hash), h->first); + kfree(mdev->ee_hash); + mdev->ee_hash = NULL; + mdev->ee_hash_s = 0; + + /* paranoia code */ + for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) + if (h->first) + dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n", + (int)(h - mdev->tl_hash), h->first); + kfree(mdev->tl_hash); + mdev->tl_hash = NULL; + mdev->tl_hash_s = 0; + spin_unlock_irq(&mdev->req_lock); +} + /** * _tl_add_barrier() - Adds a barrier to the transfer log * @mdev: DRBD device. diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index efe141eb521b..bafc233ef3fa 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3740,36 +3740,6 @@ void drbd_flush_workqueue(struct drbd_conf *mdev) wait_for_completion(&barr.done); } -void drbd_free_tl_hash(struct drbd_conf *mdev) -{ - struct hlist_head *h; - - spin_lock_irq(&mdev->req_lock); - - if (!mdev->tl_hash || mdev->state.conn != C_STANDALONE) { - spin_unlock_irq(&mdev->req_lock); - return; - } - /* paranoia code */ - for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++) - if (h->first) - dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n", - (int)(h - mdev->ee_hash), h->first); - kfree(mdev->ee_hash); - mdev->ee_hash = NULL; - mdev->ee_hash_s = 0; - - /* paranoia code */ - for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) - if (h->first) - dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n", - (int)(h - mdev->tl_hash), h->first); - kfree(mdev->tl_hash); - mdev->tl_hash = NULL; - mdev->tl_hash_s = 0; - spin_unlock_irq(&mdev->req_lock); -} - static void drbd_disconnect(struct drbd_conf *mdev) { enum drbd_fencing_p fp; From 9c50842a35420f9c8fde9da626a9c0cad456becc Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 14 Jan 2011 21:19:36 +0100 Subject: [PATCH 0008/5831] drbd: Update outdated comment Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index bafc233ef3fa..26810ce5d1e1 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1512,7 +1512,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, un if (get_ldev(mdev)) { /* data is submitted to disk within recv_resync_read. * corresponding put_ldev done below on error, - * or in drbd_endio_write_sec. */ + * or in drbd_endio_sec. */ ok = recv_resync_read(mdev, sector, data_size); } else { if (__ratelimit(&drbd_ratelimit_state)) @@ -1673,7 +1673,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned /* get_ldev(mdev) successful. * Corresponding put_ldev done either below (on various errors), - * or in drbd_endio_write_sec, if we successfully submit the data at + * or in drbd_endio_sec, if we successfully submit the data at * the end of this function. */ sector = be64_to_cpu(p->sector); From 516245856456db591c5336a90584077545e772b1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 5 Jan 2011 23:27:02 +0100 Subject: [PATCH 0009/5831] drbd: Request lookup code cleanup (1) Move _ar_id_to_req() to drbd_receiver.c and mark it non-inline. Remove the leading underscores from _ar_id_to_req() and _ack_id_to_req(). Mark ar_hash_slot() inline. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 32 +++++++++++++++++++++++------- drivers/block/drbd/drbd_req.h | 21 ++------------------ 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 26810ce5d1e1..1684c4809a91 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1469,6 +1469,24 @@ fail: return false; } +/* when we receive the answer for a read request, + * verify that we actually know about it */ +static struct drbd_request *ar_id_to_req(struct drbd_conf *mdev, u64 id, + sector_t sector) +{ + struct hlist_head *slot = ar_hash_slot(mdev, sector); + struct hlist_node *n; + struct drbd_request *req; + + hlist_for_each_entry(req, n, slot, collision) { + if ((unsigned long)req == (unsigned long)id) { + D_ASSERT(req->sector == sector); + return req; + } + } + return NULL; +} + static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { struct drbd_request *req; @@ -1479,7 +1497,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi sector = be64_to_cpu(p->sector); spin_lock_irq(&mdev->req_lock); - req = _ar_id_to_req(mdev, p->block_id, sector); + req = ar_id_to_req(mdev, p->block_id, sector); spin_unlock_irq(&mdev->req_lock); if (unlikely(!req)) { dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); @@ -4222,8 +4240,8 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) /* when we receive the ACK for a write request, * verify that we actually know about it */ -static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, - u64 id, sector_t sector) +static struct drbd_request *ack_id_to_req(struct drbd_conf *mdev, u64 id, + sector_t sector) { struct hlist_head *slot = tl_hash_slot(mdev, sector); struct hlist_node *n; @@ -4232,7 +4250,7 @@ static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev, hlist_for_each_entry(req, n, slot, collision) { if ((unsigned long)req == (unsigned long)id) { if (req->sector != sector) { - dev_err(DEV, "_ack_id_to_req: found req %p but it has " + dev_err(DEV, "ack_id_to_req: found req %p but it has " "wrong sector (%llus versus %llus)\n", req, (unsigned long long)req->sector, (unsigned long long)sector); @@ -4306,7 +4324,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) } return validate_req_change_req_state(mdev, p->block_id, sector, - _ack_id_to_req, __func__ , what); + ack_id_to_req, __func__, what); } static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) @@ -4326,7 +4344,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) } spin_lock_irq(&mdev->req_lock); - req = _ack_id_to_req(mdev, p->block_id, sector); + req = ack_id_to_req(mdev, p->block_id, sector); if (!req) { spin_unlock_irq(&mdev->req_lock); if (mdev->net_conf->wire_protocol == DRBD_PROT_A || @@ -4363,7 +4381,7 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) (unsigned long long)sector, be32_to_cpu(p->blksize)); return validate_req_change_req_state(mdev, p->block_id, sector, - _ar_id_to_req, __func__ , neg_acked); + ar_id_to_req, __func__ , neg_acked); } static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 68a234a5fdc5..a773636cca9d 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -241,30 +241,13 @@ struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector) } /* application reads (drbd_request objects) */ -static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector) +static inline +struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector) { return mdev->app_reads_hash + ((unsigned int)(sector) % APP_R_HSIZE); } -/* when we receive the answer for a read request, - * verify that we actually know about it */ -static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev, - u64 id, sector_t sector) -{ - struct hlist_head *slot = ar_hash_slot(mdev, sector); - struct hlist_node *n; - struct drbd_request *req; - - hlist_for_each_entry(req, n, slot, collision) { - if ((unsigned long)req == (unsigned long)id) { - D_ASSERT(req->sector == sector); - return req; - } - } - return NULL; -} - static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src) { struct bio *bio; From 668eebc6a10ba146db6b8257b9938121d1f3a06a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 20 Jan 2011 17:14:26 +0100 Subject: [PATCH 0010/5831] drbd: Request lookup code cleanup (2) Unify the ar_id_to_req() and ack_id_to_req() functions: make both fail if the consistency check fails. Move the request lookup code now duplicated in both functions into its own function. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 55 +++++++++++++++--------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1684c4809a91..ae32aed441ae 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1469,22 +1469,37 @@ fail: return false; } +static struct drbd_request * +find_request(struct drbd_conf *mdev, + struct hlist_head *(*hash_slot)(struct drbd_conf *, sector_t), + u64 id, sector_t sector, const char *func) +{ + struct hlist_head *slot = hash_slot(mdev, sector); + struct hlist_node *n; + struct drbd_request *req; + + hlist_for_each_entry(req, n, slot, collision) { + if ((unsigned long)req != (unsigned long)id) + continue; + if (req->sector != sector) { + dev_err(DEV, "%s: found request %lu but it has " + "wrong sector (%llus versus %llus)\n", + func, (unsigned long)req, + (unsigned long long)req->sector, + (unsigned long long)sector); + break; + } + return req; + } + return NULL; +} + /* when we receive the answer for a read request, * verify that we actually know about it */ static struct drbd_request *ar_id_to_req(struct drbd_conf *mdev, u64 id, sector_t sector) { - struct hlist_head *slot = ar_hash_slot(mdev, sector); - struct hlist_node *n; - struct drbd_request *req; - - hlist_for_each_entry(req, n, slot, collision) { - if ((unsigned long)req == (unsigned long)id) { - D_ASSERT(req->sector == sector); - return req; - } - } - return NULL; + return find_request(mdev, ar_hash_slot, id, sector, __func__); } static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) @@ -4243,23 +4258,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) static struct drbd_request *ack_id_to_req(struct drbd_conf *mdev, u64 id, sector_t sector) { - struct hlist_head *slot = tl_hash_slot(mdev, sector); - struct hlist_node *n; - struct drbd_request *req; - - hlist_for_each_entry(req, n, slot, collision) { - if ((unsigned long)req == (unsigned long)id) { - if (req->sector != sector) { - dev_err(DEV, "ack_id_to_req: found req %p but it has " - "wrong sector (%llus versus %llus)\n", req, - (unsigned long long)req->sector, - (unsigned long long)sector); - break; - } - return req; - } - } - return NULL; + return find_request(mdev, tl_hash_slot, id, sector, __func__); } static int validate_req_change_req_state(struct drbd_conf *mdev, From ae3388daaec96cc53d6d02cae0d8b744a6b9ca5c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 20 Jan 2011 17:23:59 +0100 Subject: [PATCH 0011/5831] drbd: Request lookup code cleanup (3) Get rid of the ar_id_to_req() and ack_id_to_req() wrappers. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ae32aed441ae..84c8d94a9d09 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1494,14 +1494,6 @@ find_request(struct drbd_conf *mdev, return NULL; } -/* when we receive the answer for a read request, - * verify that we actually know about it */ -static struct drbd_request *ar_id_to_req(struct drbd_conf *mdev, u64 id, - sector_t sector) -{ - return find_request(mdev, ar_hash_slot, id, sector, __func__); -} - static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { struct drbd_request *req; @@ -1512,7 +1504,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi sector = be64_to_cpu(p->sector); spin_lock_irq(&mdev->req_lock); - req = ar_id_to_req(mdev, p->block_id, sector); + req = find_request(mdev, ar_hash_slot, p->block_id, sector, __func__); spin_unlock_irq(&mdev->req_lock); if (unlikely(!req)) { dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); @@ -4253,24 +4245,16 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) return true; } -/* when we receive the ACK for a write request, - * verify that we actually know about it */ -static struct drbd_request *ack_id_to_req(struct drbd_conf *mdev, u64 id, - sector_t sector) -{ - return find_request(mdev, tl_hash_slot, id, sector, __func__); -} - static int validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, - struct drbd_request *(*validator)(struct drbd_conf *, u64, sector_t), + struct hlist_head *(*hash_slot)(struct drbd_conf *, sector_t), const char *func, enum drbd_req_event what) { struct drbd_request *req; struct bio_and_error m; spin_lock_irq(&mdev->req_lock); - req = validator(mdev, id, sector); + req = find_request(mdev, hash_slot, id, sector, func); if (unlikely(!req)) { spin_unlock_irq(&mdev->req_lock); @@ -4323,7 +4307,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) } return validate_req_change_req_state(mdev, p->block_id, sector, - ack_id_to_req, __func__, what); + tl_hash_slot, __func__, what); } static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) @@ -4343,7 +4327,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) } spin_lock_irq(&mdev->req_lock); - req = ack_id_to_req(mdev, p->block_id, sector); + req = find_request(mdev, tl_hash_slot, p->block_id, sector, __func__); if (!req) { spin_unlock_irq(&mdev->req_lock); if (mdev->net_conf->wire_protocol == DRBD_PROT_A || @@ -4380,7 +4364,7 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) (unsigned long long)sector, be32_to_cpu(p->blksize)); return validate_req_change_req_state(mdev, p->block_id, sector, - ar_id_to_req, __func__ , neg_acked); + ar_hash_slot, __func__, neg_acked); } static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) From c3afd8f568999e974382f7b5b05267c018056016 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 20 Jan 2011 22:25:40 +0100 Subject: [PATCH 0012/5831] drbd: Request lookup code cleanup (4) Factor out duplicate code in got_NegAck(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 66 +++++++++++++----------------- 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 84c8d94a9d09..8e7875e72609 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1472,7 +1472,7 @@ fail: static struct drbd_request * find_request(struct drbd_conf *mdev, struct hlist_head *(*hash_slot)(struct drbd_conf *, sector_t), - u64 id, sector_t sector, const char *func) + u64 id, sector_t sector, bool missing_ok, const char *func) { struct hlist_head *slot = hash_slot(mdev, sector); struct hlist_node *n; @@ -1487,10 +1487,14 @@ find_request(struct drbd_conf *mdev, func, (unsigned long)req, (unsigned long long)req->sector, (unsigned long long)sector); - break; + return NULL; } return req; } + if (!missing_ok) { + dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func, + (unsigned long)id, (unsigned long long)sector); + } return NULL; } @@ -1504,12 +1508,10 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi sector = be64_to_cpu(p->sector); spin_lock_irq(&mdev->req_lock); - req = find_request(mdev, ar_hash_slot, p->block_id, sector, __func__); + req = find_request(mdev, ar_hash_slot, p->block_id, sector, false, __func__); spin_unlock_irq(&mdev->req_lock); - if (unlikely(!req)) { - dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n"); + if (unlikely(!req)) return false; - } /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid * special casing it there for the various failure cases. @@ -4248,18 +4250,15 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) static int validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, struct hlist_head *(*hash_slot)(struct drbd_conf *, sector_t), - const char *func, enum drbd_req_event what) + const char *func, enum drbd_req_event what, bool missing_ok) { struct drbd_request *req; struct bio_and_error m; spin_lock_irq(&mdev->req_lock); - req = find_request(mdev, hash_slot, id, sector, func); + req = find_request(mdev, hash_slot, id, sector, missing_ok, func); if (unlikely(!req)) { spin_unlock_irq(&mdev->req_lock); - - dev_err(DEV, "%s: failed to find req %p, sector %llus\n", func, - (void *)(unsigned long)id, (unsigned long long)sector); return false; } __req_mod(req, what, &m); @@ -4307,7 +4306,8 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) } return validate_req_change_req_state(mdev, p->block_id, sector, - tl_hash_slot, __func__, what); + tl_hash_slot, __func__, what, + false); } static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) @@ -4315,8 +4315,9 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); int size = be32_to_cpu(p->blksize); - struct drbd_request *req; - struct bio_and_error m; + bool missing_ok = mdev->net_conf->wire_protocol == DRBD_PROT_A || + mdev->net_conf->wire_protocol == DRBD_PROT_B; + bool found; update_peer_seq(mdev, be32_to_cpu(p->seq_num)); @@ -4326,31 +4327,19 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) return true; } - spin_lock_irq(&mdev->req_lock); - req = find_request(mdev, tl_hash_slot, p->block_id, sector, __func__); - if (!req) { - spin_unlock_irq(&mdev->req_lock); - if (mdev->net_conf->wire_protocol == DRBD_PROT_A || - mdev->net_conf->wire_protocol == DRBD_PROT_B) { - /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. - The master bio might already be completed, therefore the - request is no longer in the collision hash. - => Do not try to validate block_id as request. */ - /* In Protocol B we might already have got a P_RECV_ACK - but then get a P_NEG_ACK after wards. */ - drbd_set_out_of_sync(mdev, sector, size); - return true; - } else { - dev_err(DEV, "%s: failed to find req %p, sector %llus\n", __func__, - (void *)(unsigned long)p->block_id, (unsigned long long)sector); + found = validate_req_change_req_state(mdev, p->block_id, sector, + tl_hash_slot, __func__, + neg_acked, missing_ok); + if (!found) { + /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. + The master bio might already be completed, therefore the + request is no longer in the collision hash. */ + /* In Protocol B we might already have got a P_RECV_ACK + but then get a P_NEG_ACK afterwards. */ + if (!missing_ok) return false; - } + drbd_set_out_of_sync(mdev, sector, size); } - __req_mod(req, neg_acked, &m); - spin_unlock_irq(&mdev->req_lock); - - if (m.bio) - complete_master_bio(mdev, &m); return true; } @@ -4364,7 +4353,8 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) (unsigned long long)sector, be32_to_cpu(p->blksize)); return validate_req_change_req_state(mdev, p->block_id, sector, - ar_hash_slot, __func__, neg_acked); + ar_hash_slot, __func__, neg_acked, + false); } static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) From 0939b0e5cdeeafa0adf0150edd350092e47acc49 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 3 Jan 2011 17:42:00 +0100 Subject: [PATCH 0013/5831] drbd: Add interval tree data structure Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/Makefile | 1 + drivers/block/drbd/drbd_interval.c | 156 +++++++++++++++++++++++++++++ drivers/block/drbd/drbd_interval.h | 31 ++++++ 3 files changed, 188 insertions(+) create mode 100644 drivers/block/drbd/drbd_interval.c create mode 100644 drivers/block/drbd/drbd_interval.h diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index 0d3f337ff5ff..cacbb04f285d 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -1,5 +1,6 @@ drbd-y := drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += drbd_main.o drbd_strings.o drbd_nl.o +drbd-y += drbd_interval.o obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c new file mode 100644 index 000000000000..2511dd9993f3 --- /dev/null +++ b/drivers/block/drbd/drbd_interval.c @@ -0,0 +1,156 @@ +#include "drbd_interval.h" + +/** + * interval_end - return end of @node + */ +static inline +sector_t interval_end(struct rb_node *node) +{ + struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb); + return this->end; +} + +/** + * update_interval_end - recompute end of @node + * + * The end of an interval is the highest (start + (size >> 9)) value of this + * node and of its children. Called for @node and its parents whenever the end + * may have changed. + */ +static void +update_interval_end(struct rb_node *node, void *__unused) +{ + struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb); + sector_t end; + + end = this->sector + (this->size >> 9); + if (node->rb_left) { + sector_t left = interval_end(node->rb_left); + if (left > end) + end = left; + } + if (node->rb_right) { + sector_t right = interval_end(node->rb_right); + if (right > end) + end = right; + } + this->end = end; +} + +/** + * drbd_insert_interval - insert a new interval into a tree + */ +bool +drbd_insert_interval(struct rb_root *root, struct drbd_interval *this) +{ + struct rb_node **new = &root->rb_node, *parent = NULL; + + BUG_ON(!IS_ALIGNED(this->size, 512)); + + while (*new) { + struct drbd_interval *here = + rb_entry(*new, struct drbd_interval, rb); + + parent = *new; + if (this->sector < here->sector) + new = &(*new)->rb_left; + else if (this->sector > here->sector) + new = &(*new)->rb_right; + else if (this < here) + new = &(*new)->rb_left; + else if (this->sector > here->sector) + new = &(*new)->rb_right; + return false; + } + + rb_link_node(&this->rb, parent, new); + rb_insert_color(&this->rb, root); + rb_augment_insert(&this->rb, update_interval_end, NULL); + return true; +} + +/** + * drbd_contains_interval - check if a tree contains a given interval + * @sector: start sector of @interval + * @interval: may not be a valid pointer + * + * Returns if the tree contains the node @interval with start sector @start. + * Does not dereference @interval until @interval is known to be a valid object + * in @tree. Returns %false if @interval is in the tree but with a different + * sector number. + */ +bool +drbd_contains_interval(struct rb_root *root, sector_t sector, + struct drbd_interval *interval) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct drbd_interval *here = + rb_entry(node, struct drbd_interval, rb); + + if (sector < here->sector) + node = node->rb_left; + else if (sector > here->sector) + node = node->rb_right; + else if (interval < here) + node = node->rb_left; + else if (interval > here) + node = node->rb_right; + else + return interval->sector == sector; + } + return false; +} + +/** + * drbd_remove_interval - remove an interval from a tree + */ +void +drbd_remove_interval(struct rb_root *root, struct drbd_interval *this) +{ + struct rb_node *deepest; + + deepest = rb_augment_erase_begin(&this->rb); + rb_erase(&this->rb, root); + rb_augment_erase_end(deepest, update_interval_end, NULL); +} + +/** + * drbd_find_overlap - search for an interval overlapping with [sector, sector + size) + * @sector: start sector + * @size: size, aligned to 512 bytes + * + * Returns the interval overlapping with [sector, sector + size), or NULL. + * When there is more than one overlapping interval in the tree, the interval + * with the lowest start sector is returned. + */ +struct drbd_interval * +drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size) +{ + struct rb_node *node = root->rb_node; + struct drbd_interval *overlap = NULL; + sector_t end = sector + (size >> 9); + + BUG_ON(!IS_ALIGNED(size, 512)); + + while (node) { + struct drbd_interval *here = + rb_entry(node, struct drbd_interval, rb); + + if (node->rb_left && + sector < interval_end(node->rb_left)) { + /* Overlap if any must be on left side */ + node = node->rb_left; + } else if (here->sector < end && + sector < here->sector + (here->size >> 9)) { + overlap = here; + break; + } else if (sector >= here->sector) { + /* Overlap if any must be on right side */ + node = node->rb_right; + } else + break; + } + return overlap; +} diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h new file mode 100644 index 000000000000..bf8dcf7bab09 --- /dev/null +++ b/drivers/block/drbd/drbd_interval.h @@ -0,0 +1,31 @@ +#ifndef __DRBD_INTERVAL_H +#define __DRBD_INTERVAL_H + +#include +#include + +struct drbd_interval { + struct rb_node rb; + sector_t sector; /* start sector of the interval */ + unsigned int size; /* size in bytes */ + sector_t end; /* highest interval end in subtree */ +}; + +static inline void drbd_clear_interval(struct drbd_interval *i) +{ + RB_CLEAR_NODE(&i->rb); +} + +static inline bool drbd_interval_empty(struct drbd_interval *i) +{ + return RB_EMPTY_NODE(&i->rb); +} + +bool drbd_insert_interval(struct rb_root *, struct drbd_interval *); +struct drbd_interval *drbd_find_interval(struct rb_root *, sector_t, + struct drbd_interval *); +void drbd_remove_interval(struct rb_root *, struct drbd_interval *); +struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t, + unsigned int); + +#endif /* __DRBD_INTERVAL_H */ From ace652acf2d7e564dac48c615d9184e7ed575f9c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 3 Jan 2011 17:09:58 +0100 Subject: [PATCH 0014/5831] drbd: Put sector and size in struct drbd_request into struct drbd_interval Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 5 ++-- drivers/block/drbd/drbd_main.c | 14 ++++----- drivers/block/drbd/drbd_receiver.c | 8 ++--- drivers/block/drbd/drbd_req.c | 48 +++++++++++++++--------------- drivers/block/drbd/drbd_req.h | 4 +-- drivers/block/drbd/drbd_worker.c | 4 +-- 6 files changed, 42 insertions(+), 41 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c6d8200b4b5d..d7678e85031b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -682,6 +682,8 @@ struct drbd_work { drbd_work_cb cb; }; +#include "drbd_interval.h" + struct drbd_request { struct drbd_work w; struct drbd_conf *mdev; @@ -693,8 +695,7 @@ struct drbd_request { struct bio *private_bio; struct hlist_node collision; - sector_t sector; - unsigned int size; + struct drbd_interval i; unsigned int epoch; /* barrier_nr */ /* barrier_nr: used to check on "completion" whether this req was in diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index eecbfc8f8978..a77b4bfd452a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2711,19 +2711,19 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; - if (req->size <= DRBD_MAX_SIZE_H80_PACKET) { + if (req->i.size <= DRBD_MAX_SIZE_H80_PACKET) { p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); p.head.h80.command = cpu_to_be16(P_DATA); p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size); + cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->i.size); } else { p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); p.head.h95.command = cpu_to_be16(P_DATA); p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size); + cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->i.size); } - p.sector = cpu_to_be64(req->sector); + p.sector = cpu_to_be64(req->i.sector); p.block_id = (unsigned long)req; p.seq_num = cpu_to_be32(req->seq_num = atomic_add_return(1, &mdev->packet_seq)); @@ -2769,7 +2769,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) if (memcmp(mdev->int_dig_out, digest, dgs)) { dev_warn(DEV, "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n", - (unsigned long long)req->sector, req->size); + (unsigned long long)req->i.sector, req->i.size); } } /* else if (dgs > 64) { ... Be noisy about digest too large ... @@ -2837,8 +2837,8 @@ int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req) { struct p_block_desc p; - p.sector = cpu_to_be64(req->sector); - p.blksize = cpu_to_be32(req->size); + p.sector = cpu_to_be64(req->i.sector); + p.blksize = cpu_to_be32(req->i.size); return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p)); } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8e7875e72609..6bb1a2f2a38d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1481,11 +1481,11 @@ find_request(struct drbd_conf *mdev, hlist_for_each_entry(req, n, slot, collision) { if ((unsigned long)req != (unsigned long)id) continue; - if (req->sector != sector) { + if (req->i.sector != sector) { dev_err(DEV, "%s: found request %lu but it has " "wrong sector (%llus versus %llus)\n", func, (unsigned long)req, - (unsigned long long)req->sector, + (unsigned long long)req->i.sector, (unsigned long long)sector); return NULL; } @@ -1783,7 +1783,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); -#define OVERLAPS overlaps(i->sector, i->size, sector, size) +#define OVERLAPS overlaps(i->i.sector, i->i.size, sector, size) slot = tl_hash_slot(mdev, sector); first = 1; for (;;) { @@ -1800,7 +1800,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned " new: %llus +%u; pending: %llus +%u\n", current->comm, current->pid, (unsigned long long)sector, size, - (unsigned long long)i->sector, i->size); + (unsigned long long)i->i.sector, i->i.size); if (i->rq_state & RQ_NET_PENDING) ++have_unacked; ++have_conflict; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3424d675b769..1af11a198b58 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -77,10 +77,10 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const * Other places where we set out-of-sync: * READ with local io-error */ if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) - drbd_set_out_of_sync(mdev, req->sector, req->size); + drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) - drbd_set_in_sync(mdev, req->sector, req->size); + drbd_set_in_sync(mdev, req->i.sector, req->i.size); /* one might be tempted to move the drbd_al_complete_io * to the local io completion callback drbd_endio_pri. @@ -95,12 +95,12 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const if (s & RQ_LOCAL_MASK) { if (get_ldev_if_state(mdev, D_FAILED)) { if (s & RQ_IN_ACT_LOG) - drbd_al_complete_io(mdev, req->sector); + drbd_al_complete_io(mdev, req->i.sector); put_ldev(mdev); } else if (__ratelimit(&drbd_ratelimit_state)) { dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), " "but my Disk seems to have failed :(\n", - (unsigned long long) req->sector); + (unsigned long long) req->i.sector); } } } @@ -155,20 +155,20 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * if we have the ee_hash (two_primaries) and * this has been on the network */ if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) { - const sector_t sector = req->sector; - const int size = req->size; + const sector_t sector = req->i.sector; + const int size = req->i.size; /* ASSERT: * there must be no conflicting requests, since * they must have been failed on the spot */ -#define OVERLAPS overlaps(sector, size, i->sector, i->size) +#define OVERLAPS overlaps(sector, size, i->i.sector, i->i.size) slot = tl_hash_slot(mdev, sector); hlist_for_each_entry(i, n, slot, collision) { if (OVERLAPS) { dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " "other: %p %llus +%u\n", req, (unsigned long long)sector, size, - i, (unsigned long long)i->sector, i->size); + i, (unsigned long long)i->i.sector, i->i.size); } } @@ -186,7 +186,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * we just have to do a wake_up. */ #undef OVERLAPS #define OVERLAPS overlaps(sector, size, e->sector, e->size) - slot = ee_hash_slot(mdev, req->sector); + slot = ee_hash_slot(mdev, req->i.sector); hlist_for_each_entry(e, n, slot, collision) { if (OVERLAPS) { wake_up(&mdev->misc_wait); @@ -322,8 +322,8 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e static int _req_conflicts(struct drbd_request *req) { struct drbd_conf *mdev = req->mdev; - const sector_t sector = req->sector; - const int size = req->size; + const sector_t sector = req->i.sector; + const int size = req->i.size; struct drbd_request *i; struct drbd_epoch_entry *e; struct hlist_node *n; @@ -339,7 +339,7 @@ static int _req_conflicts(struct drbd_request *req) goto out_no_conflict; BUG_ON(mdev->tl_hash == NULL); -#define OVERLAPS overlaps(i->sector, i->size, sector, size) +#define OVERLAPS overlaps(i->i.sector, i->i.size, sector, size) slot = tl_hash_slot(mdev, sector); hlist_for_each_entry(i, n, slot, collision) { if (OVERLAPS) { @@ -348,7 +348,7 @@ static int _req_conflicts(struct drbd_request *req) "pending: %llus +%u\n", current->comm, current->pid, (unsigned long long)sector, size, - (unsigned long long)i->sector, i->size); + (unsigned long long)i->i.sector, i->i.size); goto out_conflict; } } @@ -430,9 +430,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case completed_ok: if (bio_data_dir(req->master_bio) == WRITE) - mdev->writ_cnt += req->size>>9; + mdev->writ_cnt += req->i.size >> 9; else - mdev->read_cnt += req->size>>9; + mdev->read_cnt += req->i.size >> 9; req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK); req->rq_state &= ~RQ_LOCAL_PENDING; @@ -459,7 +459,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case read_completed_with_error: - drbd_set_out_of_sync(mdev, req->sector, req->size); + drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; @@ -491,7 +491,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* so we can verify the handle in the answer packet * corresponding hlist_del is in _req_may_be_done() */ - hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector)); + hlist_add_head(&req->collision, ar_hash_slot(mdev, req->i.sector)); set_bit(UNPLUG_REMOTE, &mdev->flags); @@ -507,7 +507,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* assert something? */ /* from drbd_make_request_common only */ - hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector)); + hlist_add_head(&req->collision, tl_hash_slot(mdev, req->i.sector)); /* corresponding hlist_del is in _req_may_be_done() */ /* NOTE @@ -572,7 +572,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case handed_over_to_network: /* assert something? */ if (bio_data_dir(req->master_bio) == WRITE) - atomic_add(req->size>>9, &mdev->ap_in_flight); + atomic_add(req->i.size >> 9, &mdev->ap_in_flight); if (bio_data_dir(req->master_bio) == WRITE && mdev->net_conf->wire_protocol == DRBD_PROT_A) { @@ -608,7 +608,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); req->rq_state |= RQ_NET_DONE; if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE) - atomic_sub(req->size>>9, &mdev->ap_in_flight); + atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); /* if it is still queued, we may not complete it here. * it will be canceled soon. */ @@ -625,7 +625,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if (what == conflict_discarded_by_peer) dev_alert(DEV, "Got DiscardAck packet %llus +%u!" " DRBD is not a random data generator!\n", - (unsigned long long)req->sector, req->size); + (unsigned long long)req->i.sector, req->i.size); req->rq_state |= RQ_NET_DONE; /* fall through */ case write_acked_by_peer: @@ -647,7 +647,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_NET_OK; D_ASSERT(req->rq_state & RQ_NET_PENDING); dec_ap_pending(mdev); - atomic_sub(req->size>>9, &mdev->ap_in_flight); + atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); req->rq_state &= ~RQ_NET_PENDING; _req_may_be_done_not_susp(req, m); break; @@ -656,7 +656,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* assert something? */ if (req->rq_state & RQ_NET_PENDING) { dec_ap_pending(mdev); - atomic_sub(req->size>>9, &mdev->ap_in_flight); + atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); } req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); @@ -715,7 +715,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, if ((req->rq_state & RQ_NET_MASK) != 0) { req->rq_state |= RQ_NET_DONE; if (mdev->net_conf->wire_protocol == DRBD_PROT_A) - atomic_sub(req->size>>9, &mdev->ap_in_flight); + atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); } _req_may_be_done(req, m); /* Allowed while state.susp */ break; diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index a773636cca9d..2520186c4c2b 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -272,8 +272,8 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, req->mdev = mdev; req->master_bio = bio_src; req->epoch = 0; - req->sector = bio_src->bi_sector; - req->size = bio_src->bi_size; + req->i.sector = bio_src->bi_sector; + req->i.size = bio_src->bi_size; INIT_HLIST_NODE(&req->collision); INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 43a9fefd29b8..a1eff6e9c0e7 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1288,7 +1288,7 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return 1; } - ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->sector, req->size, + ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size, (unsigned long)req); if (!ok) { @@ -1307,7 +1307,7 @@ int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel) struct drbd_request *req = container_of(w, struct drbd_request, w); if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) - drbd_al_begin_io(mdev, req->sector); + drbd_al_begin_io(mdev, req->i.sector); /* Calling drbd_al_begin_io() out of the worker might deadlocks theoretically. Practically it can not deadlock, since this is only used when unfreezing IOs. All the extents of the requests From de696716e8c40475d259fb49b3876ca0d9415970 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 20 Jan 2011 15:00:24 +0100 Subject: [PATCH 0015/5831] drbd: Use interval tree for overlapping write request detection Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 ++ drivers/block/drbd/drbd_main.c | 1 + drivers/block/drbd/drbd_receiver.c | 38 ++++++++++---------- drivers/block/drbd/drbd_req.c | 56 ++++++++++++++++-------------- drivers/block/drbd/drbd_req.h | 1 + 5 files changed, 52 insertions(+), 47 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d7678e85031b..058371318da4 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1019,6 +1019,9 @@ struct drbd_conf { struct hlist_head *tl_hash; unsigned int tl_hash_s; + /* Interval tree of pending local write requests */ + struct rb_root write_requests; + /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ unsigned long rs_total; /* number of resync blocks that failed in this run */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a77b4bfd452a..4d85838f53e3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3473,6 +3473,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) /* no need to lock access, we are still initializing this minor device. */ if (!tl_init(mdev)) goto out_no_tl; + mdev->write_requests = RB_ROOT; mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL); if (!mdev->app_reads_hash) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6bb1a2f2a38d..6b0725842508 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1733,9 +1733,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned const int size = e->size; const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags); DEFINE_WAIT(wait); - struct drbd_request *i; - struct hlist_node *n; - struct hlist_head *slot; int first; D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); @@ -1783,30 +1780,31 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); -#define OVERLAPS overlaps(i->i.sector, i->i.size, sector, size) - slot = tl_hash_slot(mdev, sector); first = 1; for (;;) { + struct drbd_interval *i; int have_unacked = 0; int have_conflict = 0; prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); - hlist_for_each_entry(i, n, slot, collision) { - if (OVERLAPS) { - /* only ALERT on first iteration, - * we may be woken up early... */ - if (first) - dev_alert(DEV, "%s[%u] Concurrent local write detected!" - " new: %llus +%u; pending: %llus +%u\n", - current->comm, current->pid, - (unsigned long long)sector, size, - (unsigned long long)i->i.sector, i->i.size); - if (i->rq_state & RQ_NET_PENDING) - ++have_unacked; - ++have_conflict; - } + + i = drbd_find_overlap(&mdev->write_requests, sector, size); + if (i) { + struct drbd_request *req2 = + container_of(i, struct drbd_request, i); + + /* only ALERT on first iteration, + * we may be woken up early... */ + if (first) + dev_alert(DEV, "%s[%u] Concurrent local write detected!" + " new: %llus +%u; pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)req2->i.sector, req2->i.size); + if (req2->rq_state & RQ_NET_PENDING) + ++have_unacked; + ++have_conflict; } -#undef OVERLAPS if (!have_conflict) break; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 1af11a198b58..593576fcf64e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -135,7 +135,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, struct drbd_request *req) { const unsigned long s = req->rq_state; - struct drbd_request *i; struct drbd_epoch_entry *e; struct hlist_node *n; struct hlist_head *slot; @@ -157,19 +156,21 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) { const sector_t sector = req->i.sector; const int size = req->i.size; + struct drbd_interval *i; /* ASSERT: * there must be no conflicting requests, since * they must have been failed on the spot */ -#define OVERLAPS overlaps(sector, size, i->i.sector, i->i.size) - slot = tl_hash_slot(mdev, sector); - hlist_for_each_entry(i, n, slot, collision) { - if (OVERLAPS) { - dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " - "other: %p %llus +%u\n", - req, (unsigned long long)sector, size, - i, (unsigned long long)i->i.sector, i->i.size); - } + + i = drbd_find_overlap(&mdev->write_requests, sector, size); + if (i) { + struct drbd_request *req2 = + container_of(i, struct drbd_request, i); + + dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " + "other: %p %llus +%u\n", + req, (unsigned long long)sector, size, + i, (unsigned long long)req2->i.sector, req2->i.size); } /* maybe "wake" those conflicting epoch entries @@ -184,7 +185,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * * anyways, if we found one, * we just have to do a wake_up. */ -#undef OVERLAPS #define OVERLAPS overlaps(sector, size, e->sector, e->size) slot = ee_hash_slot(mdev, req->i.sector); hlist_for_each_entry(e, n, slot, collision) { @@ -260,9 +260,11 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) /* remove the request from the conflict detection * respective block_id verification hash */ - if (!hlist_unhashed(&req->collision)) + if (!hlist_unhashed(&req->collision)) { hlist_del(&req->collision); - else + if (!drbd_interval_empty(&req->i)) + drbd_remove_interval(&mdev->write_requests, &req->i); + } else D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); /* for writes we need to do some extra housekeeping */ @@ -324,7 +326,7 @@ static int _req_conflicts(struct drbd_request *req) struct drbd_conf *mdev = req->mdev; const sector_t sector = req->i.sector; const int size = req->i.size; - struct drbd_request *i; + struct drbd_interval *i; struct drbd_epoch_entry *e; struct hlist_node *n; struct hlist_head *slot; @@ -339,24 +341,23 @@ static int _req_conflicts(struct drbd_request *req) goto out_no_conflict; BUG_ON(mdev->tl_hash == NULL); -#define OVERLAPS overlaps(i->i.sector, i->i.size, sector, size) - slot = tl_hash_slot(mdev, sector); - hlist_for_each_entry(i, n, slot, collision) { - if (OVERLAPS) { - dev_alert(DEV, "%s[%u] Concurrent local write detected! " - "[DISCARD L] new: %llus +%u; " - "pending: %llus +%u\n", - current->comm, current->pid, - (unsigned long long)sector, size, - (unsigned long long)i->i.sector, i->i.size); - goto out_conflict; - } + i = drbd_find_overlap(&mdev->write_requests, sector, size); + if (i) { + struct drbd_request *req2 = + container_of(i, struct drbd_request, i); + + dev_alert(DEV, "%s[%u] Concurrent local write detected! " + "[DISCARD L] new: %llus +%u; " + "pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)req2->i.sector, req2->i.size); + goto out_conflict; } if (mdev->ee_hash_s) { /* now, check for overlapping requests with remote origin */ BUG_ON(mdev->ee_hash == NULL); -#undef OVERLAPS #define OVERLAPS overlaps(e->sector, e->size, sector, size) slot = ee_hash_slot(mdev, sector); hlist_for_each_entry(e, n, slot, collision) { @@ -509,6 +510,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, hlist_add_head(&req->collision, tl_hash_slot(mdev, req->i.sector)); /* corresponding hlist_del is in _req_may_be_done() */ + drbd_insert_interval(&mdev->write_requests, &req->i); /* NOTE * In case the req ended up on the transfer log before being diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 2520186c4c2b..6f11624cce38 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -275,6 +275,7 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, req->i.sector = bio_src->bi_sector; req->i.size = bio_src->bi_size; INIT_HLIST_NODE(&req->collision); + drbd_clear_interval(&req->i); INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); } From dac1389ccc273b5486f2931c64c8e1672f233727 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 21 Jan 2011 17:18:39 +0100 Subject: [PATCH 0016/5831] drbd: Add read_requests tree We do not do collision detection for read requests, but we still need to look up the request objects when we receive a package over the network. Using the same data structure for read and write requests results in simpler code once the tl_hash and app_reads_hash tables are removed. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 ++- drivers/block/drbd/drbd_main.c | 1 + drivers/block/drbd/drbd_req.c | 13 ++++++++++--- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 058371318da4..46a4332d3441 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1019,7 +1019,8 @@ struct drbd_conf { struct hlist_head *tl_hash; unsigned int tl_hash_s; - /* Interval tree of pending local write requests */ + /* Interval tree of pending local requests */ + struct rb_root read_requests; struct rb_root write_requests; /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4d85838f53e3..c0ea5baa9a1b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3473,6 +3473,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) /* no need to lock access, we are still initializing this minor device. */ if (!tl_init(mdev)) goto out_no_tl; + mdev->read_requests = RB_ROOT; mdev->write_requests = RB_ROOT; mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 593576fcf64e..d2a78c4ee919 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -260,10 +260,15 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) /* remove the request from the conflict detection * respective block_id verification hash */ - if (!hlist_unhashed(&req->collision)) { + if (!drbd_interval_empty(&req->i)) { + struct rb_root *root; + hlist_del(&req->collision); - if (!drbd_interval_empty(&req->i)) - drbd_remove_interval(&mdev->write_requests, &req->i); + if (rw == WRITE) + root = &mdev->write_requests; + else + root = &mdev->read_requests; + drbd_remove_interval(root, &req->i); } else D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); @@ -332,6 +337,7 @@ static int _req_conflicts(struct drbd_request *req) struct hlist_head *slot; D_ASSERT(hlist_unhashed(&req->collision)); + D_ASSERT(drbd_interval_empty(&req->i)); if (!get_net_conf(mdev)) return 0; @@ -493,6 +499,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* so we can verify the handle in the answer packet * corresponding hlist_del is in _req_may_be_done() */ hlist_add_head(&req->collision, ar_hash_slot(mdev, req->i.sector)); + drbd_insert_interval(&mdev->read_requests, &req->i); set_bit(UNPLUG_REMOTE, &mdev->flags); From bc9c5c41181a84ad243639c79a10f621a97af44b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 21 Jan 2011 18:00:55 +0100 Subject: [PATCH 0017/5831] drbd: Use the read and write request trees for request lookups Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_interval.h | 3 +- drivers/block/drbd/drbd_receiver.c | 44 +++++++++++------------------- 2 files changed, 17 insertions(+), 30 deletions(-) diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index bf8dcf7bab09..a847b4a07b25 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -22,8 +22,7 @@ static inline bool drbd_interval_empty(struct drbd_interval *i) } bool drbd_insert_interval(struct rb_root *, struct drbd_interval *); -struct drbd_interval *drbd_find_interval(struct rb_root *, sector_t, - struct drbd_interval *); +bool drbd_contains_interval(struct rb_root *, sector_t, struct drbd_interval *); void drbd_remove_interval(struct rb_root *, struct drbd_interval *); struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t, unsigned int); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6b0725842508..b148398b5aa4 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1470,27 +1470,15 @@ fail: } static struct drbd_request * -find_request(struct drbd_conf *mdev, - struct hlist_head *(*hash_slot)(struct drbd_conf *, sector_t), - u64 id, sector_t sector, bool missing_ok, const char *func) +find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id, + sector_t sector, bool missing_ok, const char *func) { - struct hlist_head *slot = hash_slot(mdev, sector); - struct hlist_node *n; struct drbd_request *req; - hlist_for_each_entry(req, n, slot, collision) { - if ((unsigned long)req != (unsigned long)id) - continue; - if (req->i.sector != sector) { - dev_err(DEV, "%s: found request %lu but it has " - "wrong sector (%llus versus %llus)\n", - func, (unsigned long)req, - (unsigned long long)req->i.sector, - (unsigned long long)sector); - return NULL; - } + /* Request object according to our peer */ + req = (struct drbd_request *)(unsigned long)id; + if (drbd_contains_interval(root, sector, &req->i)) return req; - } if (!missing_ok) { dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func, (unsigned long)id, (unsigned long long)sector); @@ -1508,7 +1496,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi sector = be64_to_cpu(p->sector); spin_lock_irq(&mdev->req_lock); - req = find_request(mdev, ar_hash_slot, p->block_id, sector, false, __func__); + req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__); spin_unlock_irq(&mdev->req_lock); if (unlikely(!req)) return false; @@ -4245,16 +4233,16 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int validate_req_change_req_state(struct drbd_conf *mdev, - u64 id, sector_t sector, - struct hlist_head *(*hash_slot)(struct drbd_conf *, sector_t), - const char *func, enum drbd_req_event what, bool missing_ok) +static int +validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, + struct rb_root *root, const char *func, + enum drbd_req_event what, bool missing_ok) { struct drbd_request *req; struct bio_and_error m; spin_lock_irq(&mdev->req_lock); - req = find_request(mdev, hash_slot, id, sector, missing_ok, func); + req = find_request(mdev, root, id, sector, missing_ok, func); if (unlikely(!req)) { spin_unlock_irq(&mdev->req_lock); return false; @@ -4304,8 +4292,8 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) } return validate_req_change_req_state(mdev, p->block_id, sector, - tl_hash_slot, __func__, what, - false); + &mdev->write_requests, __func__, + what, false); } static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) @@ -4326,7 +4314,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) } found = validate_req_change_req_state(mdev, p->block_id, sector, - tl_hash_slot, __func__, + &mdev->write_requests, __func__, neg_acked, missing_ok); if (!found) { /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. @@ -4351,8 +4339,8 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) (unsigned long long)sector, be32_to_cpu(p->blksize)); return validate_req_change_req_state(mdev, p->block_id, sector, - ar_hash_slot, __func__, neg_acked, - false); + &mdev->read_requests, __func__, + neg_acked, false); } static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) From 010f6e678ffddbf3134863038c5b2f6509f1eed3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 14 Jan 2011 20:59:35 +0100 Subject: [PATCH 0018/5831] drbd: Put sector and size in struct drbd_epoch_entry into struct drbd_interval Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 +-- drivers/block/drbd/drbd_main.c | 14 +++++----- drivers/block/drbd/drbd_nl.c | 6 ++--- drivers/block/drbd/drbd_receiver.c | 28 ++++++++++---------- drivers/block/drbd/drbd_req.c | 6 ++--- drivers/block/drbd/drbd_worker.c | 42 +++++++++++++++--------------- 6 files changed, 49 insertions(+), 50 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 46a4332d3441..fa722a986e07 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -764,10 +764,9 @@ struct drbd_epoch_entry { struct drbd_conf *mdev; struct page *pages; atomic_t pending_bios; - unsigned int size; + struct drbd_interval i; /* see comments on ee flag bits below */ unsigned long flags; - sector_t sector; union { u64 block_id; struct digest_info *digest; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c0ea5baa9a1b..003313711ef2 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2464,8 +2464,8 @@ int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, struct drbd_epoch_entry *e) { return _drbd_send_ack(mdev, cmd, - cpu_to_be64(e->sector), - cpu_to_be32(e->size), + cpu_to_be64(e->i.sector), + cpu_to_be32(e->i.size), e->block_id); } @@ -2671,7 +2671,7 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) { struct page *page = e->pages; - unsigned len = e->size; + unsigned len = e->i.size; /* hint all but last page with MSG_MORE */ page_chain_for_each(page) { unsigned l = min_t(unsigned, len, PAGE_SIZE); @@ -2796,19 +2796,19 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; - if (e->size <= DRBD_MAX_SIZE_H80_PACKET) { + if (e->i.size <= DRBD_MAX_SIZE_H80_PACKET) { p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); p.head.h80.command = cpu_to_be16(cmd); p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size); + cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); } else { p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); p.head.h95.command = cpu_to_be16(cmd); p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size); + cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); } - p.sector = cpu_to_be64(e->sector); + p.sector = cpu_to_be64(e->i.sector); p.block_id = e->block_id; /* p.seq_num = 0; No sequence numbers here.. */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 515bcd948a43..98c0e9b871e6 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2506,7 +2506,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev, if (!cn_reply) { dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n", - (unsigned long long)e->sector, e->size); + (unsigned long long)e->i.sector, e->i.size); return; } @@ -2516,11 +2516,11 @@ void drbd_bcast_ee(struct drbd_conf *mdev, tl = tl_add_str(tl, T_dump_ee_reason, reason); tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs); tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs); - tl = tl_add_int(tl, T_ee_sector, &e->sector); + tl = tl_add_int(tl, T_ee_sector, &e->i.sector); tl = tl_add_int(tl, T_ee_block_id, &e->block_id); /* dump the first 32k */ - len = min_t(unsigned, e->size, 32 << 10); + len = min_t(unsigned, e->i.size, 32 << 10); put_unaligned(T_ee_data, tl++); put_unaligned(len, tl++); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b148398b5aa4..42c0ffabad72 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -338,9 +338,9 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, e->mdev = mdev; e->pages = page; atomic_set(&e->pending_bios, 0); - e->size = data_size; + e->i.size = data_size; e->flags = 0; - e->sector = sector; + e->i.sector = sector; /* * The block_id is opaque to the receiver. It is not endianness * converted, and sent back to the sender unchanged. @@ -1091,8 +1091,8 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, struct bio *bios = NULL; struct bio *bio; struct page *page = e->pages; - sector_t sector = e->sector; - unsigned ds = e->size; + sector_t sector = e->i.sector; + unsigned ds = e->i.size; unsigned n_bios = 0; unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; int err = -ENOMEM; @@ -1107,7 +1107,7 @@ next_bio: dev_err(DEV, "submit_ee: Allocation of a bio failed\n"); goto fail; } - /* > e->sector, unless this is the first bio */ + /* > e->i.sector, unless this is the first bio */ bio->bi_sector = sector; bio->bi_bdev = mdev->ldev->backing_bdev; bio->bi_rw = rw; @@ -1414,17 +1414,17 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) { struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; - sector_t sector = e->sector; + sector_t sector = e->i.sector; int ok; D_ASSERT(hlist_unhashed(&e->collision)); if (likely((e->flags & EE_WAS_ERROR) == 0)) { - drbd_set_in_sync(mdev, sector, e->size); + drbd_set_in_sync(mdev, sector, e->i.size); ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e); } else { /* Record failure to sync */ - drbd_rs_failed_io(mdev, sector, e->size); + drbd_rs_failed_io(mdev, sector, e->i.size); ok = drbd_send_ack(mdev, P_NEG_ACK, e); } @@ -1549,7 +1549,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, un static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; - sector_t sector = e->sector; + sector_t sector = e->i.sector; int ok = 1, pcmd; if (mdev->net_conf->wire_protocol == DRBD_PROT_C) { @@ -1560,7 +1560,7 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) P_RS_WRITE_ACK : P_WRITE_ACK; ok &= drbd_send_ack(mdev, pcmd, e); if (pcmd == P_RS_WRITE_ACK) - drbd_set_in_sync(mdev, sector, e->size); + drbd_set_in_sync(mdev, sector, e->i.size); } else { ok = drbd_send_ack(mdev, P_NEG_ACK, e); /* we expect it to be marked out of sync anyways... @@ -1718,7 +1718,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } else { /* don't get the req_lock yet, * we may sleep in drbd_wait_peer_seq */ - const int size = e->size; + const int size = e->i.size; const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags); DEFINE_WAIT(wait); int first; @@ -1861,10 +1861,10 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (mdev->state.pdsk < D_INCONSISTENT) { /* In case we have the only disk of the cluster, */ - drbd_set_out_of_sync(mdev, e->sector, e->size); + drbd_set_out_of_sync(mdev, e->i.sector, e->i.size); e->flags |= EE_CALL_AL_COMPLETE_IO; e->flags &= ~EE_MAY_SET_IN_SYNC; - drbd_al_begin_io(mdev, e->sector); + drbd_al_begin_io(mdev, e->i.sector); } if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) @@ -1877,7 +1877,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned hlist_del_init(&e->collision); spin_unlock_irq(&mdev->req_lock); if (e->flags & EE_CALL_AL_COMPLETE_IO) - drbd_al_complete_io(mdev, e->sector); + drbd_al_complete_io(mdev, e->i.sector); out_interrupted: drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index d2a78c4ee919..5bf93a7c91b0 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -185,7 +185,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * * anyways, if we found one, * we just have to do a wake_up. */ -#define OVERLAPS overlaps(sector, size, e->sector, e->size) +#define OVERLAPS overlaps(sector, size, e->i.sector, e->i.size) slot = ee_hash_slot(mdev, req->i.sector); hlist_for_each_entry(e, n, slot, collision) { if (OVERLAPS) { @@ -364,7 +364,7 @@ static int _req_conflicts(struct drbd_request *req) if (mdev->ee_hash_s) { /* now, check for overlapping requests with remote origin */ BUG_ON(mdev->ee_hash == NULL); -#define OVERLAPS overlaps(e->sector, e->size, sector, size) +#define OVERLAPS overlaps(e->i.sector, e->i.size, sector, size) slot = ee_hash_slot(mdev, sector); hlist_for_each_entry(e, n, slot, collision) { if (OVERLAPS) { @@ -373,7 +373,7 @@ static int _req_conflicts(struct drbd_request *req) "pending: %llus +%u\n", current->comm, current->pid, (unsigned long long)sector, size, - (unsigned long long)e->sector, e->size); + (unsigned long long)e->i.sector, e->i.size); goto out_conflict; } } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a1eff6e9c0e7..2b83aaf02c33 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -86,7 +86,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) struct drbd_conf *mdev = e->mdev; spin_lock_irqsave(&mdev->req_lock, flags); - mdev->read_cnt += e->size >> 9; + mdev->read_cnt += e->i.size >> 9; list_del(&e->w.list); if (list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); @@ -113,12 +113,12 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo * we may no longer access it, * it may be freed/reused already! * (as soon as we release the req_lock) */ - e_sector = e->sector; + e_sector = e->i.sector; do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; block_id = e->block_id; spin_lock_irqsave(&mdev->req_lock, flags); - mdev->writ_cnt += e->size >> 9; + mdev->writ_cnt += e->i.size >> 9; list_del(&e->w.list); /* has been on active_ee or sync_ee */ list_add_tail(&e->w.list, &mdev->done_ee); @@ -159,12 +159,12 @@ void drbd_endio_sec(struct bio *bio, int error) if (error && __ratelimit(&drbd_ratelimit_state)) dev_warn(DEV, "%s: error=%d s=%llus\n", is_write ? "write" : "read", error, - (unsigned long long)e->sector); + (unsigned long long)e->i.sector); if (!error && !uptodate) { if (__ratelimit(&drbd_ratelimit_state)) dev_warn(DEV, "%s: setting error to -EIO s=%llus\n", is_write ? "write" : "read", - (unsigned long long)e->sector); + (unsigned long long)e->i.sector); /* strange behavior of some lower level drivers... * fail the request by clearing the uptodate flag, * but do not return any error?! */ @@ -265,7 +265,7 @@ void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_e page = tmp; } /* and now the last, possibly only partially used page */ - len = e->size & (PAGE_SIZE - 1); + len = e->i.size & (PAGE_SIZE - 1); sg_set_page(&sg, page, len ?: PAGE_SIZE, 0); crypto_hash_update(&desc, &sg, sg.length); crypto_hash_final(&desc, digest); @@ -308,8 +308,8 @@ int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) digest_size = crypto_hash_digestsize(mdev->csums_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { - sector_t sector = e->sector; - unsigned int size = e->size; + sector_t sector = e->i.sector; + unsigned int size = e->i.size; drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); /* Free e and pages before send. * In case we block on congestion, we could otherwise run into @@ -901,7 +901,7 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_ent { if (drbd_ee_has_active_page(e)) { /* This might happen if sendpage() has not finished */ - int i = (e->size + PAGE_SIZE -1) >> PAGE_SHIFT; + int i = (e->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; atomic_add(i, &mdev->pp_in_use_by_net); atomic_sub(i, &mdev->pp_in_use); spin_lock_irq(&mdev->req_lock); @@ -934,7 +934,7 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegDReply. sector=%llus.\n", - (unsigned long long)e->sector); + (unsigned long long)e->i.sector); ok = drbd_send_ack(mdev, P_NEG_DREPLY, e); } @@ -966,7 +966,7 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } if (get_ldev_if_state(mdev, D_FAILED)) { - drbd_rs_complete_io(mdev, e->sector); + drbd_rs_complete_io(mdev, e->i.sector); put_ldev(mdev); } @@ -985,12 +985,12 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegRSDReply. sector %llus.\n", - (unsigned long long)e->sector); + (unsigned long long)e->i.sector); ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); /* update resync data with failure */ - drbd_rs_failed_io(mdev, e->sector, e->size); + drbd_rs_failed_io(mdev, e->i.sector, e->i.size); } dec_unacked(mdev); @@ -1017,7 +1017,7 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } if (get_ldev(mdev)) { - drbd_rs_complete_io(mdev, e->sector); + drbd_rs_complete_io(mdev, e->i.sector); put_ldev(mdev); } @@ -1039,9 +1039,9 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } if (eq) { - drbd_set_in_sync(mdev, e->sector, e->size); + drbd_set_in_sync(mdev, e->i.sector, e->i.size); /* rs_same_csums unit is BM_BLOCK_SIZE */ - mdev->rs_same_csum += e->size >> BM_BLOCK_SHIFT; + mdev->rs_same_csum += e->i.size >> BM_BLOCK_SHIFT; ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e); } else { inc_rs_pending(mdev); @@ -1068,8 +1068,8 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); - sector_t sector = e->sector; - unsigned int size = e->size; + sector_t sector = e->i.sector; + unsigned int size = e->i.size; int digest_size; void *digest; int ok = 1; @@ -1127,8 +1127,8 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); struct digest_info *di; void *digest; - sector_t sector = e->sector; - unsigned int size = e->size; + sector_t sector = e->i.sector; + unsigned int size = e->i.size; int digest_size; int ok, eq = 0; @@ -1141,7 +1141,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all * the resync lru has been cleaned up already */ if (get_ldev(mdev)) { - drbd_rs_complete_io(mdev, e->sector); + drbd_rs_complete_io(mdev, e->i.sector); put_ldev(mdev); } From 8b946255f8467e30f98988be426d8c1604d63ffd Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 20 Jan 2011 15:23:07 +0100 Subject: [PATCH 0019/5831] drbd: Use interval tree for overlapping epoch entry detection Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 ++ drivers/block/drbd/drbd_main.c | 1 + drivers/block/drbd/drbd_receiver.c | 15 ++++++++++ drivers/block/drbd/drbd_req.c | 44 +++++++++++------------------- 4 files changed, 35 insertions(+), 28 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index fa722a986e07..751a4d4ff070 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1080,6 +1080,9 @@ struct drbd_conf { struct hlist_head *ee_hash; /* is proteced by req_lock! */ unsigned int ee_hash_s; + /* Interval tree of pending remote write requests (struct drbd_epoch_entry) */ + struct rb_root epoch_entries; + /* this one is protected by ee_lock, single thread */ struct drbd_epoch_entry *last_write_w_barrier; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 003313711ef2..18f27afab81e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3475,6 +3475,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) goto out_no_tl; mdev->read_requests = RB_ROOT; mdev->write_requests = RB_ROOT; + mdev->epoch_entries = RB_ROOT; mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL); if (!mdev->app_reads_hash) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 42c0ffabad72..a0fbbfc77d85 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -334,6 +334,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, goto fail; INIT_HLIST_NODE(&e->collision); + drbd_clear_interval(&e->i); e->epoch = NULL; e->mdev = mdev; e->pages = page; @@ -361,6 +362,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i drbd_pp_free(mdev, e->pages, is_net); D_ASSERT(atomic_read(&e->pending_bios) == 0); D_ASSERT(hlist_unhashed(&e->collision)); + D_ASSERT(drbd_interval_empty(&e->i)); mempool_free(e, drbd_ee_mempool); } @@ -1418,6 +1420,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u int ok; D_ASSERT(hlist_unhashed(&e->collision)); + D_ASSERT(drbd_interval_empty(&e->i)); if (likely((e->flags & EE_WAS_ERROR) == 0)) { drbd_set_in_sync(mdev, sector, e->i.size); @@ -1574,9 +1577,13 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) spin_lock_irq(&mdev->req_lock); D_ASSERT(!hlist_unhashed(&e->collision)); hlist_del_init(&e->collision); + D_ASSERT(!drbd_interval_empty(&e->i)); + drbd_remove_interval(&mdev->epoch_entries, &e->i); + drbd_clear_interval(&e->i); spin_unlock_irq(&mdev->req_lock); } else { D_ASSERT(hlist_unhashed(&e->collision)); + D_ASSERT(drbd_interval_empty(&e->i)); } drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); @@ -1595,6 +1602,9 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u spin_lock_irq(&mdev->req_lock); D_ASSERT(!hlist_unhashed(&e->collision)); hlist_del_init(&e->collision); + D_ASSERT(!drbd_interval_empty(&e->i)); + drbd_remove_interval(&mdev->epoch_entries, &e->i); + drbd_clear_interval(&e->i); spin_unlock_irq(&mdev->req_lock); dec_unacked(mdev); @@ -1767,6 +1777,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_lock_irq(&mdev->req_lock); hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); + drbd_insert_interval(&mdev->epoch_entries, &e->i); first = 1; for (;;) { @@ -1817,6 +1828,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (signal_pending(current)) { hlist_del_init(&e->collision); + drbd_remove_interval(&mdev->epoch_entries, &e->i); + drbd_clear_interval(&e->i); spin_unlock_irq(&mdev->req_lock); @@ -1875,6 +1888,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_lock_irq(&mdev->req_lock); list_del(&e->w.list); hlist_del_init(&e->collision); + drbd_remove_interval(&mdev->epoch_entries, &e->i); + drbd_clear_interval(&e->i); spin_unlock_irq(&mdev->req_lock); if (e->flags & EE_CALL_AL_COMPLETE_IO) drbd_al_complete_io(mdev, e->i.sector); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 5bf93a7c91b0..b81ce82eb159 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -135,9 +135,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, struct drbd_request *req) { const unsigned long s = req->rq_state; - struct drbd_epoch_entry *e; - struct hlist_node *n; - struct hlist_head *slot; /* Before we can signal completion to the upper layers, * we may need to close the current epoch. @@ -185,16 +182,10 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, * * anyways, if we found one, * we just have to do a wake_up. */ -#define OVERLAPS overlaps(sector, size, e->i.sector, e->i.size) - slot = ee_hash_slot(mdev, req->i.sector); - hlist_for_each_entry(e, n, slot, collision) { - if (OVERLAPS) { - wake_up(&mdev->misc_wait); - break; - } - } + i = drbd_find_overlap(&mdev->epoch_entries, sector, size); + if (i) + wake_up(&mdev->misc_wait); } -#undef OVERLAPS } void complete_master_bio(struct drbd_conf *mdev, @@ -332,9 +323,6 @@ static int _req_conflicts(struct drbd_request *req) const sector_t sector = req->i.sector; const int size = req->i.size; struct drbd_interval *i; - struct drbd_epoch_entry *e; - struct hlist_node *n; - struct hlist_head *slot; D_ASSERT(hlist_unhashed(&req->collision)); D_ASSERT(drbd_interval_empty(&req->i)); @@ -364,21 +352,21 @@ static int _req_conflicts(struct drbd_request *req) if (mdev->ee_hash_s) { /* now, check for overlapping requests with remote origin */ BUG_ON(mdev->ee_hash == NULL); -#define OVERLAPS overlaps(e->i.sector, e->i.size, sector, size) - slot = ee_hash_slot(mdev, sector); - hlist_for_each_entry(e, n, slot, collision) { - if (OVERLAPS) { - dev_alert(DEV, "%s[%u] Concurrent remote write detected!" - " [DISCARD L] new: %llus +%u; " - "pending: %llus +%u\n", - current->comm, current->pid, - (unsigned long long)sector, size, - (unsigned long long)e->i.sector, e->i.size); - goto out_conflict; - } + + i = drbd_find_overlap(&mdev->epoch_entries, sector, size); + if (i) { + struct drbd_epoch_entry *e = + container_of(i, struct drbd_epoch_entry, i); + + dev_alert(DEV, "%s[%u] Concurrent remote write detected!" + " [DISCARD L] new: %llus +%u; " + "pending: %llus +%u\n", + current->comm, current->pid, + (unsigned long long)sector, size, + (unsigned long long)e->i.sector, e->i.size); + goto out_conflict; } } -#undef OVERLAPS out_no_conflict: /* this is like it should be, and what we expected. From bb3bfe96144a4535d47ccfea444bc1ef8e02f4e3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 21 Jan 2011 15:59:23 +0100 Subject: [PATCH 0020/5831] drbd: Remove the unused hash tables Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 13 ------- drivers/block/drbd/drbd_main.c | 57 ------------------------------ drivers/block/drbd/drbd_nl.c | 36 +------------------ drivers/block/drbd/drbd_receiver.c | 27 ++++---------- drivers/block/drbd/drbd_req.c | 26 ++++---------- drivers/block/drbd/drbd_req.h | 27 -------------- drivers/block/drbd/drbd_worker.c | 11 +++--- 7 files changed, 20 insertions(+), 177 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 751a4d4ff070..5874357b0f9c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -694,7 +694,6 @@ struct drbd_request { * see drbd_endio_pri(). */ struct bio *private_bio; - struct hlist_node collision; struct drbd_interval i; unsigned int epoch; /* barrier_nr */ @@ -759,7 +758,6 @@ struct digest_info { struct drbd_epoch_entry { struct drbd_work w; - struct hlist_node collision; struct drbd_epoch *epoch; /* for writes */ struct drbd_conf *mdev; struct page *pages; @@ -1015,8 +1013,6 @@ struct drbd_conf { struct drbd_tl_epoch *newest_tle; struct drbd_tl_epoch *oldest_tle; struct list_head out_of_sequence_requests; - struct hlist_head *tl_hash; - unsigned int tl_hash_s; /* Interval tree of pending local requests */ struct rb_root read_requests; @@ -1077,8 +1073,6 @@ struct drbd_conf { struct list_head done_ee; /* send ack */ struct list_head read_ee; /* IO in progress (any read) */ struct list_head net_ee; /* zero-copy network send in progress */ - struct hlist_head *ee_hash; /* is proteced by req_lock! */ - unsigned int ee_hash_s; /* Interval tree of pending remote write requests (struct drbd_epoch_entry) */ struct rb_root epoch_entries; @@ -1087,7 +1081,6 @@ struct drbd_conf { struct drbd_epoch_entry *last_write_w_barrier; int next_barrier_nr; - struct hlist_head *app_reads_hash; /* is proteced by req_lock */ struct list_head resync_reads; atomic_t pp_in_use; /* allocated from page pool */ atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ @@ -1428,18 +1421,12 @@ struct bm_extent { #endif #endif -/* Sector shift value for the "hash" functions of tl_hash and ee_hash tables. - * With a value of 8 all IO in one 128K block make it to the same slot of the - * hash table. */ #define HT_SHIFT 8 #define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT)) #define DRBD_MAX_BIO_SIZE_SAFE (1 << 12) /* Works always = 4k */ #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */ -/* Number of elements in the app_reads_hash */ -#define APP_R_HSIZE 15 - extern int drbd_bm_init(struct drbd_conf *mdev); extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new_bits); extern void drbd_bm_cleanup(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 18f27afab81e..878f7d4fc885 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -209,9 +209,6 @@ static int tl_init(struct drbd_conf *mdev) mdev->newest_tle = b; INIT_LIST_HEAD(&mdev->out_of_sequence_requests); - mdev->tl_hash = NULL; - mdev->tl_hash_s = 0; - return 1; } @@ -223,39 +220,6 @@ static void tl_cleanup(struct drbd_conf *mdev) mdev->oldest_tle = NULL; kfree(mdev->unused_spare_tle); mdev->unused_spare_tle = NULL; - kfree(mdev->tl_hash); - mdev->tl_hash = NULL; - mdev->tl_hash_s = 0; -} - -static void drbd_free_tl_hash(struct drbd_conf *mdev) -{ - struct hlist_head *h; - - spin_lock_irq(&mdev->req_lock); - - if (!mdev->tl_hash || mdev->state.conn != C_STANDALONE) { - spin_unlock_irq(&mdev->req_lock); - return; - } - /* paranoia code */ - for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++) - if (h->first) - dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n", - (int)(h - mdev->ee_hash), h->first); - kfree(mdev->ee_hash); - mdev->ee_hash = NULL; - mdev->ee_hash_s = 0; - - /* paranoia code */ - for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) - if (h->first) - dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n", - (int)(h - mdev->tl_hash), h->first); - kfree(mdev->tl_hash); - mdev->tl_hash = NULL; - mdev->tl_hash_s = 0; - spin_unlock_irq(&mdev->req_lock); } /** @@ -475,8 +439,6 @@ void tl_clear(struct drbd_conf *mdev) /* ensure bit indicating barrier is required is clear */ clear_bit(CREATE_BARRIER, &mdev->flags); - memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); - spin_unlock_irq(&mdev->req_lock); } @@ -1633,10 +1595,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, put_ldev(mdev); } - /* free tl_hash if we Got thawed and are C_STANDALONE */ - if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash) - drbd_free_tl_hash(mdev); - /* Upon network connection, we need to start the receiver */ if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) drbd_thread_start(&mdev->receiver); @@ -3317,13 +3275,6 @@ static void drbd_delete_device(unsigned int minor) drbd_release_ee_lists(mdev); - /* should be freed on disconnect? */ - kfree(mdev->ee_hash); - /* - mdev->ee_hash_s = 0; - mdev->ee_hash = NULL; - */ - lc_destroy(mdev->act_log); lc_destroy(mdev->resync); @@ -3477,10 +3428,6 @@ struct drbd_conf *drbd_new_device(unsigned int minor) mdev->write_requests = RB_ROOT; mdev->epoch_entries = RB_ROOT; - mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL); - if (!mdev->app_reads_hash) - goto out_no_app_reads; - mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); if (!mdev->current_epoch) goto out_no_epoch; @@ -3493,8 +3440,6 @@ struct drbd_conf *drbd_new_device(unsigned int minor) /* out_whatever_else: kfree(mdev->current_epoch); */ out_no_epoch: - kfree(mdev->app_reads_hash); -out_no_app_reads: tl_cleanup(mdev); out_no_tl: drbd_bm_cleanup(mdev); @@ -3516,7 +3461,6 @@ out_no_cpumask: void drbd_free_mdev(struct drbd_conf *mdev) { kfree(mdev->current_epoch); - kfree(mdev->app_reads_hash); tl_cleanup(mdev); if (mdev->bitmap) /* should no longer be there. */ drbd_bm_cleanup(mdev); @@ -3524,7 +3468,6 @@ void drbd_free_mdev(struct drbd_conf *mdev) put_disk(mdev->vdisk); blk_cleanup_queue(mdev->rq_queue); free_cpumask_var(mdev->cpu_mask); - drbd_free_tl_hash(mdev); kfree(mdev); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 98c0e9b871e6..5b8ebbef95de 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1353,14 +1353,12 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - int i, ns; + int i; enum drbd_ret_code retcode; struct net_conf *new_conf = NULL; struct crypto_hash *tfm = NULL; struct crypto_hash *integrity_w_tfm = NULL; struct crypto_hash *integrity_r_tfm = NULL; - struct hlist_head *new_tl_hash = NULL; - struct hlist_head *new_ee_hash = NULL; struct drbd_conf *odev; char hmac_name[CRYPTO_MAX_ALG_NAME]; void *int_dig_out = NULL; @@ -1494,24 +1492,6 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } } - ns = new_conf->max_epoch_size/8; - if (mdev->tl_hash_s != ns) { - new_tl_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); - if (!new_tl_hash) { - retcode = ERR_NOMEM; - goto fail; - } - } - - ns = new_conf->max_buffers/8; - if (new_conf->two_primaries && (mdev->ee_hash_s != ns)) { - new_ee_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); - if (!new_ee_hash) { - retcode = ERR_NOMEM; - goto fail; - } - } - ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; if (integrity_w_tfm) { @@ -1552,18 +1532,6 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->send_cnt = 0; mdev->recv_cnt = 0; - if (new_tl_hash) { - kfree(mdev->tl_hash); - mdev->tl_hash_s = mdev->net_conf->max_epoch_size/8; - mdev->tl_hash = new_tl_hash; - } - - if (new_ee_hash) { - kfree(mdev->ee_hash); - mdev->ee_hash_s = mdev->net_conf->max_buffers/8; - mdev->ee_hash = new_ee_hash; - } - crypto_free_hash(mdev->cram_hmac_tfm); mdev->cram_hmac_tfm = tfm; @@ -1594,8 +1562,6 @@ fail: crypto_free_hash(tfm); crypto_free_hash(integrity_w_tfm); crypto_free_hash(integrity_r_tfm); - kfree(new_tl_hash); - kfree(new_ee_hash); kfree(new_conf); reply->ret_code = retcode; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a0fbbfc77d85..566317bb74e8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -333,7 +333,6 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, if (!page) goto fail; - INIT_HLIST_NODE(&e->collision); drbd_clear_interval(&e->i); e->epoch = NULL; e->mdev = mdev; @@ -361,7 +360,6 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i kfree(e->digest); drbd_pp_free(mdev, e->pages, is_net); D_ASSERT(atomic_read(&e->pending_bios) == 0); - D_ASSERT(hlist_unhashed(&e->collision)); D_ASSERT(drbd_interval_empty(&e->i)); mempool_free(e, drbd_ee_mempool); } @@ -1419,7 +1417,6 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u sector_t sector = e->i.sector; int ok; - D_ASSERT(hlist_unhashed(&e->collision)); D_ASSERT(drbd_interval_empty(&e->i)); if (likely((e->flags & EE_WAS_ERROR) == 0)) { @@ -1575,16 +1572,12 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ if (mdev->net_conf->two_primaries) { spin_lock_irq(&mdev->req_lock); - D_ASSERT(!hlist_unhashed(&e->collision)); - hlist_del_init(&e->collision); D_ASSERT(!drbd_interval_empty(&e->i)); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); spin_unlock_irq(&mdev->req_lock); - } else { - D_ASSERT(hlist_unhashed(&e->collision)); + } else D_ASSERT(drbd_interval_empty(&e->i)); - } drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); @@ -1600,8 +1593,6 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); spin_lock_irq(&mdev->req_lock); - D_ASSERT(!hlist_unhashed(&e->collision)); - hlist_del_init(&e->collision); D_ASSERT(!drbd_interval_empty(&e->i)); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); @@ -1734,23 +1725,20 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int first; D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); - BUG_ON(mdev->ee_hash == NULL); - BUG_ON(mdev->tl_hash == NULL); /* conflict detection and handling: * 1. wait on the sequence number, * in case this data packet overtook ACK packets. - * 2. check our hash tables for conflicting requests. - * we only need to walk the tl_hash, since an ee can not - * have a conflict with an other ee: on the submitting - * node, the corresponding req had already been conflicting, - * and a conflicting req is never sent. + * 2. check our interval trees for conflicting requests: + * we only need to check the write_requests tree; the + * epoch_entries tree cannot contain any overlaps because + * they were already eliminated on the submitting node. * * Note: for two_primaries, we are protocol C, * so there cannot be any request that is DONE * but still on the transfer log. * - * unconditionally add to the ee_hash. + * unconditionally add to the epoch_entries tree. * * if no conflicting request is found: * submit. @@ -1776,7 +1764,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_lock_irq(&mdev->req_lock); - hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); drbd_insert_interval(&mdev->epoch_entries, &e->i); first = 1; @@ -1827,7 +1814,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } if (signal_pending(current)) { - hlist_del_init(&e->collision); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); @@ -1887,7 +1873,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned dev_err(DEV, "submit failed, triggering re-connect\n"); spin_lock_irq(&mdev->req_lock); list_del(&e->w.list); - hlist_del_init(&e->collision); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); spin_unlock_irq(&mdev->req_lock); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index b81ce82eb159..8541b16de08b 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -148,9 +148,9 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, queue_barrier(mdev); /* we need to do the conflict detection stuff, - * if we have the ee_hash (two_primaries) and - * this has been on the network */ - if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) { + * if the epoch_entries tree is non-empty and + * this request has completed on the network */ + if ((s & RQ_NET_DONE) && !RB_EMPTY_ROOT(&mdev->epoch_entries)) { const sector_t sector = req->i.sector; const int size = req->i.size; struct drbd_interval *i; @@ -254,7 +254,6 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) if (!drbd_interval_empty(&req->i)) { struct rb_root *root; - hlist_del(&req->collision); if (rw == WRITE) root = &mdev->write_requests; else @@ -313,9 +312,7 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e * conflicting requests with local origin, and why we have to do so regardless * of whether we allowed multiple primaries. * - * BTW, in case we only have one primary, the ee_hash is empty anyways, and the - * second hlist_for_each_entry becomes a noop. This is even simpler than to - * grab a reference on the net_conf, and check for the two_primaries flag... + * In case we only have one primary, the epoch_entries tree is empty. */ static int _req_conflicts(struct drbd_request *req) { @@ -324,17 +321,11 @@ static int _req_conflicts(struct drbd_request *req) const int size = req->i.size; struct drbd_interval *i; - D_ASSERT(hlist_unhashed(&req->collision)); D_ASSERT(drbd_interval_empty(&req->i)); if (!get_net_conf(mdev)) return 0; - /* BUG_ON */ - ERR_IF (mdev->tl_hash_s == 0) - goto out_no_conflict; - BUG_ON(mdev->tl_hash == NULL); - i = drbd_find_overlap(&mdev->write_requests, sector, size); if (i) { struct drbd_request *req2 = @@ -349,10 +340,8 @@ static int _req_conflicts(struct drbd_request *req) goto out_conflict; } - if (mdev->ee_hash_s) { - /* now, check for overlapping requests with remote origin */ - BUG_ON(mdev->ee_hash == NULL); - + if (!RB_EMPTY_ROOT(&mdev->epoch_entries)) { + /* check for overlapping requests with remote origin */ i = drbd_find_overlap(&mdev->epoch_entries, sector, size); if (i) { struct drbd_epoch_entry *e = @@ -368,7 +357,6 @@ static int _req_conflicts(struct drbd_request *req) } } -out_no_conflict: /* this is like it should be, and what we expected. * our users do behave after all... */ put_net_conf(mdev); @@ -486,7 +474,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* so we can verify the handle in the answer packet * corresponding hlist_del is in _req_may_be_done() */ - hlist_add_head(&req->collision, ar_hash_slot(mdev, req->i.sector)); drbd_insert_interval(&mdev->read_requests, &req->i); set_bit(UNPLUG_REMOTE, &mdev->flags); @@ -503,7 +490,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* assert something? */ /* from drbd_make_request_common only */ - hlist_add_head(&req->collision, tl_hash_slot(mdev, req->i.sector)); /* corresponding hlist_del is in _req_may_be_done() */ drbd_insert_interval(&mdev->write_requests, &req->i); diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 6f11624cce38..ee591749c4d3 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -222,32 +222,6 @@ enum drbd_req_state_bits { #define MR_READ_SHIFT 1 #define MR_READ (1 << MR_READ_SHIFT) -/* epoch entries */ -static inline -struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector) -{ - BUG_ON(mdev->ee_hash_s == 0); - return mdev->ee_hash + - ((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s); -} - -/* transfer log (drbd_request objects) */ -static inline -struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector) -{ - BUG_ON(mdev->tl_hash_s == 0); - return mdev->tl_hash + - ((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s); -} - -/* application reads (drbd_request objects) */ -static inline -struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector) -{ - return mdev->app_reads_hash - + ((unsigned int)(sector) % APP_R_HSIZE); -} - static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src) { struct bio *bio; @@ -274,7 +248,6 @@ static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, req->epoch = 0; req->i.sector = bio_src->bi_sector; req->i.size = bio_src->bi_size; - INIT_HLIST_NODE(&req->collision); drbd_clear_interval(&req->i); INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 2b83aaf02c33..1ddf6b61b20b 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -122,10 +122,13 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo list_del(&e->w.list); /* has been on active_ee or sync_ee */ list_add_tail(&e->w.list, &mdev->done_ee); - /* No hlist_del_init(&e->collision) here, we did not send the Ack yet, - * neither did we wake possibly waiting conflicting requests. - * done from "drbd_process_done_ee" within the appropriate w.cb - * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */ + /* + * Do not remove from the epoch_entries tree here: we did not send the + * Ack yet and did not wake possibly waiting conflicting requests. + * Removed from the tree from "drbd_process_done_ee" within the + * appropriate w.cb (e_end_block/e_end_resync_block) or from + * _drbd_clear_done_ee. + */ do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); From 8554df1c6d3bb7686b39ed775772f507fa857c19 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 25 Jan 2011 15:37:43 +0100 Subject: [PATCH 0021/5831] drbd: Convert all constants in enum drbd_req_event to upper case Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 ++-- drivers/block/drbd/drbd_main.c | 28 +++++----- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 18 +++---- drivers/block/drbd/drbd_req.c | 84 +++++++++++++++--------------- drivers/block/drbd/drbd_req.h | 58 ++++++++++----------- drivers/block/drbd/drbd_worker.c | 22 ++++---- 7 files changed, 111 insertions(+), 111 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 5874357b0f9c..6099c667b634 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2031,21 +2031,21 @@ static inline void drbd_thread_restart_nowait(struct drbd_thread *thi) * or implicit barrier packets as necessary. * increased: * w_send_barrier - * _req_mod(req, queue_for_net_write or queue_for_net_read); + * _req_mod(req, QUEUE_FOR_NET_WRITE or QUEUE_FOR_NET_READ); * it is much easier and equally valid to count what we queue for the * worker, even before it actually was queued or send. * (drbd_make_request_common; recovery path on read io-error) * decreased: * got_BarrierAck (respective tl_clear, tl_clear_barrier) - * _req_mod(req, data_received) + * _req_mod(req, DATA_RECEIVED) * [from receive_DataReply] - * _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked) + * _req_mod(req, WRITE_ACKED_BY_PEER or RECV_ACKED_BY_PEER or NEG_ACKED) * [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)] * for some reason it is NOT decreased in got_NegAck, * but in the resulting cleanup code from report_params. * we should try to remember the reason for that... - * _req_mod(req, send_failed or send_canceled) - * _req_mod(req, connection_lost_while_pending) + * _req_mod(req, SEND_FAILED or SEND_CANCELED) + * _req_mod(req, CONNECTION_LOST_WHILE_PENDING) * [from tl_clear_barrier] */ static inline void inc_ap_pending(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 878f7d4fc885..c5bb87143347 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -290,7 +290,7 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, /* Clean up list of requests processed during current epoch */ list_for_each_safe(le, tle, &b->requests) { r = list_entry(le, struct drbd_request, tl_requests); - _req_mod(r, barrier_acked); + _req_mod(r, BARRIER_ACKED); } /* There could be requests on the list waiting for completion of the write to the local disk. To avoid corruptions of @@ -300,10 +300,10 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, the write acks - which would be a bug and violating write ordering. To not deadlock in case we lose connection while such requests are still pending, we need some way to find them for the - _req_mode(connection_lost_while_pending). + _req_mode(CONNECTION_LOST_WHILE_PENDING). These have been list_move'd to the out_of_sequence_requests list in - _req_mod(, barrier_acked) above. + _req_mod(, BARRIER_ACKED) above. */ list_del_init(&b->requests); @@ -336,8 +336,8 @@ bail: * @mdev: DRBD device. * @what: The action/event to perform with all request objects * - * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io, - * restart_frozen_disk_io. + * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, + * RESTART_FROZEN_DISK_IO. */ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) { @@ -362,7 +362,7 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) tmp = b->next; if (n_writes) { - if (what == resend) { + if (what == RESEND) { b->n_writes = n_writes; if (b->w.cb == NULL) { b->w.cb = w_send_barrier; @@ -423,7 +423,7 @@ void tl_clear(struct drbd_conf *mdev) spin_lock_irq(&mdev->req_lock); - _tl_restart(mdev, connection_lost_while_pending); + _tl_restart(mdev, CONNECTION_LOST_WHILE_PENDING); /* we expect this list to be empty. */ D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); @@ -433,7 +433,7 @@ void tl_clear(struct drbd_conf *mdev) r = list_entry(le, struct drbd_request, tl_requests); /* It would be nice to complete outside of spinlock. * But this is easier for now. */ - _req_mod(r, connection_lost_while_pending); + _req_mod(r, CONNECTION_LOST_WHILE_PENDING); } /* ensure bit indicating barrier is required is clear */ @@ -1321,7 +1321,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags) { enum drbd_fencing_p fp; - enum drbd_req_event what = nothing; + enum drbd_req_event what = NOTHING; union drbd_state nsm = (union drbd_state){ .i = -1 }; if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { @@ -1349,12 +1349,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, nsm.i = -1; if (ns.susp_nod) { if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) - what = resend; + what = RESEND; if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) - what = restart_frozen_disk_io; + what = RESTART_FROZEN_DISK_IO; - if (what != nothing) + if (what != NOTHING) nsm.susp_nod = 0; } @@ -1373,12 +1373,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* case2: The connection was established again: */ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { clear_bit(NEW_CUR_UUID, &mdev->flags); - what = resend; + what = RESEND; nsm.susp_fen = 0; } } - if (what != nothing) { + if (what != NOTHING) { spin_lock_irq(&mdev->req_lock); _tl_restart(mdev, what); nsm.i &= mdev->state.i; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 5b8ebbef95de..1840cbb8a10b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2022,7 +2022,7 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (mdev->state.conn < C_CONNECTED) tl_clear(mdev); if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED) - tl_restart(mdev, fail_frozen_disk_io); + tl_restart(mdev, FAIL_FROZEN_DISK_IO); } drbd_resume_io(mdev); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 566317bb74e8..1762ef0375e1 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -385,7 +385,7 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) /* * This function is called from _asender only_ - * but see also comments in _req_mod(,barrier_acked) + * but see also comments in _req_mod(,BARRIER_ACKED) * and receive_Barrier. * * Move entries from net_ee to done_ee, if ready. @@ -1507,7 +1507,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi ok = recv_dless_read(mdev, req, sector, data_size); if (ok) - req_mod(req, data_received); + req_mod(req, DATA_RECEIVED); /* else: nothing. handled from drbd_disconnect... * I don't think we may complete this just yet * in case we are "on-disconnect: freeze" */ @@ -3279,7 +3279,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD); if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && test_bit(NEW_CUR_UUID, &mdev->flags)) { - /* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this + /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this for temporal network outages! */ spin_unlock_irq(&mdev->req_lock); dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); @@ -4272,19 +4272,19 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) switch (be16_to_cpu(h->command)) { case P_RS_WRITE_ACK: D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); - what = write_acked_by_peer_and_sis; + what = WRITE_ACKED_BY_PEER_AND_SIS; break; case P_WRITE_ACK: D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); - what = write_acked_by_peer; + what = WRITE_ACKED_BY_PEER; break; case P_RECV_ACK: D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_B); - what = recv_acked_by_peer; + what = RECV_ACKED_BY_PEER; break; case P_DISCARD_ACK: D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); - what = conflict_discarded_by_peer; + what = CONFLICT_DISCARDED_BY_PEER; break; default: D_ASSERT(0); @@ -4315,7 +4315,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) found = validate_req_change_req_state(mdev, p->block_id, sector, &mdev->write_requests, __func__, - neg_acked, missing_ok); + NEG_ACKED, missing_ok); if (!found) { /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs. The master bio might already be completed, therefore the @@ -4340,7 +4340,7 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) return validate_req_change_req_state(mdev, p->block_id, sector, &mdev->read_requests, __func__, - neg_acked, false); + NEG_ACKED, false); } static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 8541b16de08b..b3b1d4edbb03 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -225,10 +225,10 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) return; if (req->master_bio) { - /* this is data_received (remote read) + /* this is DATA_RECEIVED (remote read) * or protocol C P_WRITE_ACK * or protocol B P_RECV_ACK - * or protocol A "handed_over_to_network" (SendAck) + * or protocol A "HANDED_OVER_TO_NETWORK" (SendAck) * or canceled or failed, * or killed from the transfer log due to connection loss. */ @@ -393,11 +393,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* does not happen... * initialization done in drbd_req_new - case created: + case CREATED: break; */ - case to_be_send: /* via network */ + case TO_BE_SENT: /* via network */ /* reached via drbd_make_request_common * and from w_read_retry_remote */ D_ASSERT(!(req->rq_state & RQ_NET_MASK)); @@ -405,13 +405,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, inc_ap_pending(mdev); break; - case to_be_submitted: /* locally */ + case TO_BE_SUBMITTED: /* locally */ /* reached via drbd_make_request_common */ D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK)); req->rq_state |= RQ_LOCAL_PENDING; break; - case completed_ok: + case COMPLETED_OK: if (bio_data_dir(req->master_bio) == WRITE) mdev->writ_cnt += req->i.size >> 9; else @@ -424,7 +424,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, put_ldev(mdev); break; - case write_completed_with_error: + case WRITE_COMPLETED_WITH_ERROR: req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; @@ -433,7 +433,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, put_ldev(mdev); break; - case read_ahead_completed_with_error: + case READ_AHEAD_COMPLETED_WITH_ERROR: /* it is legal to fail READA */ req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; @@ -441,7 +441,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, put_ldev(mdev); break; - case read_completed_with_error: + case READ_COMPLETED_WITH_ERROR: drbd_set_out_of_sync(mdev, req->i.sector, req->i.size); req->rq_state |= RQ_LOCAL_COMPLETED; @@ -459,12 +459,12 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; } - /* _req_mod(req,to_be_send); oops, recursion... */ + /* _req_mod(req,TO_BE_SENT); oops, recursion... */ req->rq_state |= RQ_NET_PENDING; inc_ap_pending(mdev); - /* fall through: _req_mod(req,queue_for_net_read); */ + /* fall through: _req_mod(req,QUEUE_FOR_NET_READ); */ - case queue_for_net_read: + case QUEUE_FOR_NET_READ: /* READ or READA, and * no local disk, * or target area marked as invalid, @@ -486,7 +486,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_queue_work(&mdev->data.work, &req->w); break; - case queue_for_net_write: + case QUEUE_FOR_NET_WRITE: /* assert something? */ /* from drbd_make_request_common only */ @@ -533,17 +533,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; - case queue_for_send_oos: + case QUEUE_FOR_SEND_OOS: req->rq_state |= RQ_NET_QUEUED; req->w.cb = w_send_oos; drbd_queue_work(&mdev->data.work, &req->w); break; - case oos_handed_to_network: + case OOS_HANDED_TO_NETWORK: /* actually the same */ - case send_canceled: + case SEND_CANCELED: /* treat it the same */ - case send_failed: + case SEND_FAILED: /* real cleanup will be done from tl_clear. just update flags * so it is no longer marked as on the worker queue */ req->rq_state &= ~RQ_NET_QUEUED; @@ -552,7 +552,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done_not_susp(req, m); break; - case handed_over_to_network: + case HANDED_OVER_TO_NETWORK: /* assert something? */ if (bio_data_dir(req->master_bio) == WRITE) atomic_add(req->i.size >> 9, &mdev->ap_in_flight); @@ -573,17 +573,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state &= ~RQ_NET_QUEUED; req->rq_state |= RQ_NET_SENT; /* because _drbd_send_zc_bio could sleep, and may want to - * dereference the bio even after the "write_acked_by_peer" and - * "completed_ok" events came in, once we return from + * dereference the bio even after the "WRITE_ACKED_BY_PEER" and + * "COMPLETED_OK" events came in, once we return from * _drbd_send_zc_bio (drbd_send_dblock), we have to check * whether it is done already, and end it. */ _req_may_be_done_not_susp(req, m); break; - case read_retry_remote_canceled: + case READ_RETRY_REMOTE_CANCELED: req->rq_state &= ~RQ_NET_QUEUED; /* fall through, in case we raced with drbd_disconnect */ - case connection_lost_while_pending: + case CONNECTION_LOST_WHILE_PENDING: /* transfer log cleanup after connection loss */ /* assert something? */ if (req->rq_state & RQ_NET_PENDING) @@ -599,19 +599,19 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done(req, m); /* Allowed while state.susp */ break; - case write_acked_by_peer_and_sis: + case WRITE_ACKED_BY_PEER_AND_SIS: req->rq_state |= RQ_NET_SIS; - case conflict_discarded_by_peer: + case CONFLICT_DISCARDED_BY_PEER: /* for discarded conflicting writes of multiple primaries, * there is no need to keep anything in the tl, potential * node crashes are covered by the activity log. */ - if (what == conflict_discarded_by_peer) + if (what == CONFLICT_DISCARDED_BY_PEER) dev_alert(DEV, "Got DiscardAck packet %llus +%u!" " DRBD is not a random data generator!\n", (unsigned long long)req->i.sector, req->i.size); req->rq_state |= RQ_NET_DONE; /* fall through */ - case write_acked_by_peer: + case WRITE_ACKED_BY_PEER: /* protocol C; successfully written on peer. * Nothing to do here. * We want to keep the tl in place for all protocols, to cater @@ -623,9 +623,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * P_BARRIER_ACK, but that is an unnecessary optimization. */ /* this makes it effectively the same as for: */ - case recv_acked_by_peer: + case RECV_ACKED_BY_PEER: /* protocol B; pretends to be successfully written on peer. - * see also notes above in handed_over_to_network about + * see also notes above in HANDED_OVER_TO_NETWORK about * protocol != C */ req->rq_state |= RQ_NET_OK; D_ASSERT(req->rq_state & RQ_NET_PENDING); @@ -635,7 +635,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done_not_susp(req, m); break; - case neg_acked: + case NEG_ACKED: /* assert something? */ if (req->rq_state & RQ_NET_PENDING) { dec_ap_pending(mdev); @@ -645,17 +645,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_NET_DONE; _req_may_be_done_not_susp(req, m); - /* else: done by handed_over_to_network */ + /* else: done by HANDED_OVER_TO_NETWORK */ break; - case fail_frozen_disk_io: + case FAIL_FROZEN_DISK_IO: if (!(req->rq_state & RQ_LOCAL_COMPLETED)) break; _req_may_be_done(req, m); /* Allowed while state.susp */ break; - case restart_frozen_disk_io: + case RESTART_FROZEN_DISK_IO: if (!(req->rq_state & RQ_LOCAL_COMPLETED)) break; @@ -670,7 +670,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_queue_work(&mdev->data.work, &req->w); break; - case resend: + case RESEND: /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK before the connection loss (B&C only); only P_BARRIER_ACK was missing. Trowing them out of the TL here by pretending we got a BARRIER_ACK @@ -682,9 +682,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, } break; } - /* else, fall through to barrier_acked */ + /* else, fall through to BARRIER_ACKED */ - case barrier_acked: + case BARRIER_ACKED: if (!(req->rq_state & RQ_WRITE)) break; @@ -692,7 +692,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, /* barrier came in before all requests have been acked. * this is bad, because if the connection is lost now, * we won't be able to clean them up... */ - dev_err(DEV, "FIXME (barrier_acked but pending)\n"); + dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n"); list_move(&req->tl_requests, &mdev->out_of_sequence_requests); } if ((req->rq_state & RQ_NET_MASK) != 0) { @@ -703,7 +703,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done(req, m); /* Allowed while state.susp */ break; - case data_received: + case DATA_RECEIVED: D_ASSERT(req->rq_state & RQ_NET_PENDING); dec_ap_pending(mdev); req->rq_state &= ~RQ_NET_PENDING; @@ -924,9 +924,9 @@ allocate_barrier: /* mark them early for readability. * this just sets some state flags. */ if (remote) - _req_mod(req, to_be_send); + _req_mod(req, TO_BE_SENT); if (local) - _req_mod(req, to_be_submitted); + _req_mod(req, TO_BE_SUBMITTED); /* check this request on the collision detection hash tables. * if we have a conflict, just complete it here. @@ -944,11 +944,11 @@ allocate_barrier: * or READ, but not in sync. */ _req_mod(req, (rw == WRITE) - ? queue_for_net_write - : queue_for_net_read); + ? QUEUE_FOR_NET_WRITE + : QUEUE_FOR_NET_READ); } if (send_oos && drbd_set_out_of_sync(mdev, sector, size)) - _req_mod(req, queue_for_send_oos); + _req_mod(req, QUEUE_FOR_SEND_OOS); if (remote && mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index ee591749c4d3..6dbbe8906c8e 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -77,39 +77,39 @@ */ enum drbd_req_event { - created, - to_be_send, - to_be_submitted, + CREATED, + TO_BE_SENT, + TO_BE_SUBMITTED, /* XXX yes, now I am inconsistent... * these are not "events" but "actions" * oh, well... */ - queue_for_net_write, - queue_for_net_read, - queue_for_send_oos, + QUEUE_FOR_NET_WRITE, + QUEUE_FOR_NET_READ, + QUEUE_FOR_SEND_OOS, - send_canceled, - send_failed, - handed_over_to_network, - oos_handed_to_network, - connection_lost_while_pending, - read_retry_remote_canceled, - recv_acked_by_peer, - write_acked_by_peer, - write_acked_by_peer_and_sis, /* and set_in_sync */ - conflict_discarded_by_peer, - neg_acked, - barrier_acked, /* in protocol A and B */ - data_received, /* (remote read) */ + SEND_CANCELED, + SEND_FAILED, + HANDED_OVER_TO_NETWORK, + OOS_HANDED_TO_NETWORK, + CONNECTION_LOST_WHILE_PENDING, + READ_RETRY_REMOTE_CANCELED, + RECV_ACKED_BY_PEER, + WRITE_ACKED_BY_PEER, + WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */ + CONFLICT_DISCARDED_BY_PEER, + NEG_ACKED, + BARRIER_ACKED, /* in protocol A and B */ + DATA_RECEIVED, /* (remote read) */ - read_completed_with_error, - read_ahead_completed_with_error, - write_completed_with_error, - completed_ok, - resend, - fail_frozen_disk_io, - restart_frozen_disk_io, - nothing, /* for tracing only */ + READ_COMPLETED_WITH_ERROR, + READ_AHEAD_COMPLETED_WITH_ERROR, + WRITE_COMPLETED_WITH_ERROR, + COMPLETED_OK, + RESEND, + FAIL_FROZEN_DISK_IO, + RESTART_FROZEN_DISK_IO, + NOTHING, }; /* encoding of request states for now. we don't actually need that many bits. @@ -138,8 +138,8 @@ enum drbd_req_state_bits { * recv_ack (B) or implicit "ack" (A), * still waiting for the barrier ack. * master_bio may already be completed and invalidated. - * 11100: write_acked (C), - * data_received (for remote read, any protocol) + * 11100: write acked (C), + * data received (for remote read, any protocol) * or finally the barrier ack has arrived (B,A)... * request can be freed * 01100: neg-acked (write, protocol C) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 1ddf6b61b20b..550617b1a39c 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -209,12 +209,12 @@ void drbd_endio_pri(struct bio *bio, int error) /* to avoid recursion in __req_mod */ if (unlikely(error)) { what = (bio_data_dir(bio) == WRITE) - ? write_completed_with_error + ? WRITE_COMPLETED_WITH_ERROR : (bio_rw(bio) == READ) - ? read_completed_with_error - : read_ahead_completed_with_error; + ? READ_COMPLETED_WITH_ERROR + : READ_AHEAD_COMPLETED_WITH_ERROR; } else - what = completed_ok; + what = COMPLETED_OK; bio_put(req->private_bio); req->private_bio = ERR_PTR(error); @@ -238,7 +238,7 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) spin_lock_irq(&mdev->req_lock); if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { - _req_mod(req, read_retry_remote_canceled); + _req_mod(req, READ_RETRY_REMOTE_CANCELED); spin_unlock_irq(&mdev->req_lock); return 1; } @@ -1243,12 +1243,12 @@ int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int ok; if (unlikely(cancel)) { - req_mod(req, send_canceled); + req_mod(req, SEND_CANCELED); return 1; } ok = drbd_send_oos(mdev, req); - req_mod(req, oos_handed_to_network); + req_mod(req, OOS_HANDED_TO_NETWORK); return ok; } @@ -1265,12 +1265,12 @@ int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int ok; if (unlikely(cancel)) { - req_mod(req, send_canceled); + req_mod(req, SEND_CANCELED); return 1; } ok = drbd_send_dblock(mdev, req); - req_mod(req, ok ? handed_over_to_network : send_failed); + req_mod(req, ok ? HANDED_OVER_TO_NETWORK : SEND_FAILED); return ok; } @@ -1287,7 +1287,7 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int ok; if (unlikely(cancel)) { - req_mod(req, send_canceled); + req_mod(req, SEND_CANCELED); return 1; } @@ -1300,7 +1300,7 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (mdev->state.conn >= C_CONNECTED) drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); } - req_mod(req, ok ? handed_over_to_network : send_failed); + req_mod(req, ok ? HANDED_OVER_TO_NETWORK : SEND_FAILED); return ok; } From e77a0a5cc1e6961f485b5623ef42f3b910969675 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 25 Jan 2011 15:43:39 +0100 Subject: [PATCH 0022/5831] drbd: Convert all constants in enum drbd_thread_state to upper case Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 +++---- drivers/block/drbd/drbd_main.c | 38 +++++++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 6 ++--- drivers/block/drbd/drbd_worker.c | 8 +++---- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 6099c667b634..1cf9c0954906 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -649,10 +649,10 @@ union p_polymorph { /**********************************************************************/ enum drbd_thread_state { - None, - Running, - Exiting, - Restarting + NONE, + RUNNING, + EXITING, + RESTARTING }; struct drbd_thread { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c5bb87143347..19176a149ac7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1627,25 +1627,25 @@ restart: spin_lock_irqsave(&thi->t_lock, flags); - /* if the receiver has been "Exiting", the last thing it did + /* if the receiver has been "EXITING", the last thing it did * was set the conn state to "StandAlone", * if now a re-connect request comes in, conn state goes C_UNCONNECTED, * and receiver thread will be "started". - * drbd_thread_start needs to set "Restarting" in that case. + * drbd_thread_start needs to set "RESTARTING" in that case. * t_state check and assignment needs to be within the same spinlock, - * so either thread_start sees Exiting, and can remap to Restarting, - * or thread_start see None, and can proceed as normal. + * so either thread_start sees EXITING, and can remap to RESTARTING, + * or thread_start see NONE, and can proceed as normal. */ - if (thi->t_state == Restarting) { + if (thi->t_state == RESTARTING) { dev_info(DEV, "Restarting %s\n", current->comm); - thi->t_state = Running; + thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); goto restart; } thi->task = NULL; - thi->t_state = None; + thi->t_state = NONE; smp_mb(); complete(&thi->stop); spin_unlock_irqrestore(&thi->t_lock, flags); @@ -1662,7 +1662,7 @@ static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi, { spin_lock_init(&thi->t_lock); thi->task = NULL; - thi->t_state = None; + thi->t_state = NONE; thi->function = func; thi->mdev = mdev; } @@ -1683,7 +1683,7 @@ int drbd_thread_start(struct drbd_thread *thi) spin_lock_irqsave(&thi->t_lock, flags); switch (thi->t_state) { - case None: + case NONE: dev_info(DEV, "Starting %s thread (from %s [%d])\n", me, current->comm, current->pid); @@ -1697,7 +1697,7 @@ int drbd_thread_start(struct drbd_thread *thi) init_completion(&thi->stop); D_ASSERT(thi->task == NULL); thi->reset_cpu_mask = 1; - thi->t_state = Running; + thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ @@ -1712,17 +1712,17 @@ int drbd_thread_start(struct drbd_thread *thi) } spin_lock_irqsave(&thi->t_lock, flags); thi->task = nt; - thi->t_state = Running; + thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); wake_up_process(nt); break; - case Exiting: - thi->t_state = Restarting; + case EXITING: + thi->t_state = RESTARTING; dev_info(DEV, "Restarting %s thread (from %s [%d])\n", me, current->comm, current->pid); /* fall through */ - case Running: - case Restarting: + case RUNNING: + case RESTARTING: default: spin_unlock_irqrestore(&thi->t_lock, flags); break; @@ -1736,12 +1736,12 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) { unsigned long flags; - enum drbd_thread_state ns = restart ? Restarting : Exiting; + enum drbd_thread_state ns = restart ? RESTARTING : EXITING; /* may be called from state engine, holding the req lock irqsave */ spin_lock_irqsave(&thi->t_lock, flags); - if (thi->t_state == None) { + if (thi->t_state == NONE) { spin_unlock_irqrestore(&thi->t_lock, flags); if (restart) drbd_thread_start(thi); @@ -2504,7 +2504,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * drop_it = mdev->meta.socket == sock || !mdev->asender.task - || get_t_state(&mdev->asender) != Running + || get_t_state(&mdev->asender) != RUNNING || mdev->state.conn < C_CONNECTED; if (drop_it) @@ -3046,7 +3046,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) void drbd_mdev_cleanup(struct drbd_conf *mdev) { int i; - if (mdev->receiver.t_state != None) + if (mdev->receiver.t_state != NONE) dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n", mdev->receiver.t_state); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1762ef0375e1..1cfcc44fd484 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -833,7 +833,7 @@ retry: if (signal_pending(current)) { flush_signals(current); smp_rmb(); - if (get_t_state(&mdev->receiver) == Exiting) + if (get_t_state(&mdev->receiver) == EXITING) goto out_release_sockets; } @@ -3700,7 +3700,7 @@ static void drbdd(struct drbd_conf *mdev) size_t shs; /* sub header size */ int rv; - while (get_t_state(&mdev->receiver) == Running) { + while (get_t_state(&mdev->receiver) == RUNNING) { drbd_thread_current_set_cpu(mdev); if (!drbd_recv_header(mdev, &cmd, &packet_size)) goto err_out; @@ -4490,7 +4490,7 @@ int drbd_asender(struct drbd_thread *thi) current->policy = SCHED_RR; /* Make this a realtime task! */ current->rt_priority = 2; /* more important than all other tasks */ - while (get_t_state(thi) == Running) { + while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(mdev); if (test_and_clear_bit(SEND_PING, &mdev->flags)) { ERR_IF(!drbd_send_ping(mdev)) goto reconnect; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 550617b1a39c..c2a9285afad6 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1617,7 +1617,7 @@ int drbd_worker(struct drbd_thread *thi) sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev)); - while (get_t_state(thi) == Running) { + while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(mdev); if (down_trylock(&mdev->data.work.s)) { @@ -1637,12 +1637,12 @@ int drbd_worker(struct drbd_thread *thi) if (intr) { D_ASSERT(intr == -EINTR); flush_signals(current); - ERR_IF (get_t_state(thi) == Running) + ERR_IF (get_t_state(thi) == RUNNING) continue; break; } - if (get_t_state(thi) != Running) + if (get_t_state(thi) != RUNNING) break; /* With this break, we have done a down() but not consumed the entry from the list. The cleanup code takes care of @@ -1704,7 +1704,7 @@ int drbd_worker(struct drbd_thread *thi) D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); /* _drbd_set_state only uses stop_nowait. - * wait here for the Exiting receiver. */ + * wait here for the EXITING receiver. */ drbd_thread_stop(&mdev->receiver); drbd_mdev_cleanup(mdev); From 841ce241fa355048f66172a47e356bb6e9159c9d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 15 Dec 2010 19:31:20 +0100 Subject: [PATCH 0023/5831] drbd: Replace the ERR_IF macro with an assert-like macro Remove the file name and line number from the syslog messages generated: we have no duplicate function names, and no function contains the same assertion more than once. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 21 ++++--- drivers/block/drbd/drbd_bitmap.c | 91 +++++++++++++++++++----------- drivers/block/drbd/drbd_int.h | 18 ++++-- drivers/block/drbd/drbd_main.c | 13 +++-- drivers/block/drbd/drbd_nl.c | 8 ++- drivers/block/drbd/drbd_receiver.c | 19 +++++-- drivers/block/drbd/drbd_worker.c | 7 ++- 7 files changed, 114 insertions(+), 63 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 0eb17d3adf21..9284b10e42bb 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -491,7 +491,8 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) unsigned int trn; rv = drbd_al_read_tr(mdev, bdev, buffer, i); - ERR_IF(rv == 0) goto cancel; + if (!expect(rv != 0)) + goto cancel; if (rv == -1) { mutex_unlock(&mdev->md_io_mutex); return 0; @@ -770,8 +771,10 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, nr_sectors = drbd_get_capacity(mdev->this_bdev); esector = sector + (size >> 9) - 1; - ERR_IF(sector >= nr_sectors) return; - ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1); + if (!expect(sector < nr_sectors)) + return; + if (!expect(esector < nr_sectors)) + esector = nr_sectors - 1; lbnr = BM_SECT_TO_BIT(nr_sectors-1); @@ -837,10 +840,10 @@ int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, nr_sectors = drbd_get_capacity(mdev->this_bdev); esector = sector + (size >> 9) - 1; - ERR_IF(sector >= nr_sectors) + if (!expect(sector < nr_sectors)) goto out; - ERR_IF(esector >= nr_sectors) - esector = (nr_sectors-1); + if (!expect(esector < nr_sectors)) + esector = nr_sectors - 1; lbnr = BM_SECT_TO_BIT(nr_sectors-1); @@ -1218,8 +1221,10 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) nr_sectors = drbd_get_capacity(mdev->this_bdev); esector = sector + (size >> 9) - 1; - ERR_IF(sector >= nr_sectors) return; - ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1); + if (!expect(sector < nr_sectors)) + return; + if (!expect(esector < nr_sectors)) + esector = nr_sectors - 1; lbnr = BM_SECT_TO_BIT(nr_sectors-1); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 7b976296b564..c756b4dbd135 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -440,7 +440,8 @@ int drbd_bm_init(struct drbd_conf *mdev) sector_t drbd_bm_capacity(struct drbd_conf *mdev) { - ERR_IF(!mdev->bitmap) return 0; + if (!expect(mdev->bitmap)) + return 0; return mdev->bitmap->bm_dev_capacity; } @@ -448,7 +449,8 @@ sector_t drbd_bm_capacity(struct drbd_conf *mdev) */ void drbd_bm_cleanup(struct drbd_conf *mdev) { - ERR_IF (!mdev->bitmap) return; + if (!expect(mdev->bitmap)) + return; bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags)); kfree(mdev->bitmap); @@ -611,7 +613,8 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) int err = 0, growing; int opages_vmalloced; - ERR_IF(!b) return -ENOMEM; + if (!expect(b)) + return -ENOMEM; drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK); @@ -733,8 +736,10 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev) unsigned long s; unsigned long flags; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); s = b->bm_set; @@ -757,8 +762,10 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev) size_t drbd_bm_words(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; return b->bm_words; } @@ -766,7 +773,8 @@ size_t drbd_bm_words(struct drbd_conf *mdev) unsigned long drbd_bm_bits(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return 0; + if (!expect(b)) + return 0; return b->bm_bits; } @@ -787,8 +795,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, end = offset + number; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; if (number == 0) return; WARN_ON(offset >= b->bm_words); @@ -832,8 +842,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, end = offset + number; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; spin_lock_irq(&b->bm_lock); if ((offset >= b->bm_words) || @@ -861,8 +873,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, void drbd_bm_set_all(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; spin_lock_irq(&b->bm_lock); bm_memset(b, 0, 0xff, b->bm_words); @@ -875,8 +889,10 @@ void drbd_bm_set_all(struct drbd_conf *mdev) void drbd_bm_clear_all(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; spin_lock_irq(&b->bm_lock); bm_memset(b, 0, 0, b->bm_words); @@ -1209,8 +1225,10 @@ static unsigned long bm_find_next(struct drbd_conf *mdev, struct drbd_bitmap *b = mdev->bitmap; unsigned long i = DRBD_END_OF_BITMAP; - ERR_IF(!b) return i; - ERR_IF(!b->bm_pages) return i; + if (!expect(b)) + return i; + if (!expect(b->bm_pages)) + return i; spin_lock_irq(&b->bm_lock); if (BM_DONT_TEST & b->bm_flags) @@ -1311,8 +1329,10 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, struct drbd_bitmap *b = mdev->bitmap; int c = 0; - ERR_IF(!b) return 1; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 1; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags) @@ -1437,8 +1457,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) unsigned long *p_addr; int i; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) @@ -1472,8 +1494,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi * robust in case we screwed up elsewhere, in that case pretend there * was one dirty bit in the requested area, so we won't try to do a * local read there (no bitmap probably implies no disk) */ - ERR_IF(!b) return 1; - ERR_IF(!b->bm_pages) return 1; + if (!expect(b)) + return 1; + if (!expect(b->bm_pages)) + return 1; spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) @@ -1486,11 +1510,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi bm_unmap(p_addr); p_addr = bm_map_pidx(b, idx); } - ERR_IF (bitnr >= b->bm_bits) { - dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); - } else { + if (expect(bitnr < b->bm_bits)) c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); - } + else + dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); } if (p_addr) bm_unmap(p_addr); @@ -1520,8 +1543,10 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) unsigned long flags; unsigned long *p_addr, *bm; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) @@ -1553,8 +1578,10 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) unsigned long weight; unsigned long s, e; int count, i, do_now; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irq(&b->bm_lock); if (BM_DONT_SET & b->bm_flags) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1cf9c0954906..03dd7a0b1bc1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -102,12 +102,18 @@ struct drbd_conf; #define D_ASSERT(exp) if (!(exp)) \ dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) -#define ERR_IF(exp) if (({ \ - int _b = (exp) != 0; \ - if (_b) dev_err(DEV, "ASSERT FAILED: %s: (%s) in %s:%d\n", \ - __func__, #exp, __FILE__, __LINE__); \ - _b; \ - })) +/** + * expect - Make an assertion + * + * Unlike the assert macro, this macro returns a boolean result. + */ +#define expect(exp) ({ \ + bool _bool = (exp); \ + if (!_bool) \ + dev_err(DEV, "ASSERTION %s FAILED in %s\n", \ + #exp, __func__); \ + _bool; \ + }) /* Defines to control fault insertion */ enum { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 19176a149ac7..46ba4aa03f3f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1810,7 +1810,7 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) p == mdev->receiver.task ? &mdev->receiver : p == mdev->worker.task ? &mdev->worker : NULL; - ERR_IF(thi == NULL) + if (!expect(thi != NULL)) return; if (!thi->reset_cpu_mask) return; @@ -1826,8 +1826,10 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, { int sent, ok; - ERR_IF(!h) return false; - ERR_IF(!size) return false; + if (!expect(h)) + return false; + if (!expect(size)) + return false; h->magic = cpu_to_be32(DRBD_MAGIC); h->command = cpu_to_be16(cmd); @@ -2300,7 +2302,8 @@ int _drbd_send_bitmap(struct drbd_conf *mdev) struct p_header80 *p; int err; - ERR_IF(!mdev->bitmap) return false; + if (!expect(mdev->bitmap)) + return false; /* maybe we should use some per thread scratch page, * and allocate that during initial device creation? */ @@ -3255,7 +3258,7 @@ static void drbd_delete_device(unsigned int minor) dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, __FILE__ , __LINE__); - ERR_IF (!list_empty(&mdev->data.work.q)) { + if (!expect(list_empty(&mdev->data.work.q))) { struct list_head *lp; list_for_each(lp, &mdev->data.work.q) { dev_err(DEV, "lp = %p\n", lp); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 1840cbb8a10b..51da84940a33 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -751,7 +751,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev) unsigned int in_use; int i; - ERR_IF(mdev->sync_conf.al_extents < 7) + if (!expect(mdev->sync_conf.al_extents >= 7)) mdev->sync_conf.al_extents = 127; if (mdev->act_log && @@ -1804,8 +1804,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } } - ERR_IF (sc.rate < 1) sc.rate = 1; - ERR_IF (sc.al_extents < 7) sc.al_extents = 127; /* arbitrary minimum */ + if (!expect(sc.rate >= 1)) + sc.rate = 1; + if (!expect(sc.al_extents >= 7)) + sc.al_extents = 127; /* arbitrary minimum */ #define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT) if (sc.al_extents > AL_MAX) { dev_err(DEV, "sc.al_extents > %d\n", AL_MAX); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1cfcc44fd484..a41b07820ddf 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1260,9 +1260,12 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ data_size -= dgs; - ERR_IF(data_size == 0) return NULL; - ERR_IF(data_size & 0x1ff) return NULL; - ERR_IF(data_size > DRBD_MAX_BIO_SIZE) return NULL; + if (!expect(data_size != 0)) + return NULL; + if (!expect(IS_ALIGNED(data_size, 512))) + return NULL; + if (!expect(data_size <= DRBD_MAX_BIO_SIZE)) + return NULL; /* even though we trust out peer, * we sometimes have to double check. */ @@ -3615,7 +3618,8 @@ static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned while (size > 0) { want = min_t(int, size, sizeof(sink)); r = drbd_recv(mdev, sink, want); - ERR_IF(r <= 0) break; + if (!expect(r > 0)) + break; size -= r; } return size == 0; @@ -4493,7 +4497,10 @@ int drbd_asender(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(mdev); if (test_and_clear_bit(SEND_PING, &mdev->flags)) { - ERR_IF(!drbd_send_ping(mdev)) goto reconnect; + if (!drbd_send_ping(mdev)) { + dev_err(DEV, "drbd_send_ping has failed\n"); + goto reconnect; + } mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*HZ/10; ping_timeout_active = 1; @@ -4587,7 +4594,7 @@ int drbd_asender(struct drbd_thread *thi) goto disconnect; } expect = cmd->pkt_size; - ERR_IF(len != expect-sizeof(struct p_header80)) + if (!expect(len == expect - sizeof(struct p_header80))) goto reconnect; } if (received == expect) { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index c2a9285afad6..2e2c0659a3eb 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1331,7 +1331,8 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev) if (odev->sync_conf.after == -1) return 1; odev = minor_to_mdev(odev->sync_conf.after); - ERR_IF(!odev) return 1; + if (!expect(odev)) + return 1; if ((odev->state.conn >= C_SYNC_SOURCE && odev->state.conn <= C_PAUSED_SYNC_T) || odev->state.aftr_isp || odev->state.peer_isp || @@ -1637,7 +1638,7 @@ int drbd_worker(struct drbd_thread *thi) if (intr) { D_ASSERT(intr == -EINTR); flush_signals(current); - ERR_IF (get_t_state(thi) == RUNNING) + if (!expect(get_t_state(thi) != RUNNING)) continue; break; } @@ -1650,7 +1651,7 @@ int drbd_worker(struct drbd_thread *thi) w = NULL; spin_lock_irq(&mdev->data.work.q_lock); - ERR_IF(list_empty(&mdev->data.work.q)) { + if (!expect(!list_empty(&mdev->data.work.q))) { /* something terribly wrong in our logic. * we were able to down() the semaphore, * but the list is empty... doh. From 70dc65e1b3453c5b78ab8ec6bfb604aee7038ae3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 21 Dec 2010 14:46:57 +0100 Subject: [PATCH 0024/5831] drbd: Remove some useless paranoia code The open_cnt check is an open-coded D_ASSERT() check. In case the data.work queue is not empty, it does not really help to know which drbd_work elements remained on that list: they will be freed immediately afterwards, anyway. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 46ba4aa03f3f..2902f6dd7bf1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3254,16 +3254,8 @@ static void drbd_delete_device(unsigned int minor) return; /* paranoia asserts */ - if (mdev->open_cnt != 0) - dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, - __FILE__ , __LINE__); - - if (!expect(list_empty(&mdev->data.work.q))) { - struct list_head *lp; - list_for_each(lp, &mdev->data.work.q) { - dev_err(DEV, "lp = %p\n", lp); - } - }; + D_ASSERT(mdev->open_cnt == 0); + D_ASSERT(list_empty(&mdev->data.work.q)); /* end paranoia asserts */ del_gendisk(mdev->vdisk); From e3cfa7b26a56cb9a3361034cc74f2aaad45d0987 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 25 Jan 2011 16:36:10 +0100 Subject: [PATCH 0025/5831] drbd: Inline function overlaps() is now unused Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 6dbbe8906c8e..9d75647cae8f 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -260,11 +260,6 @@ static inline void drbd_req_free(struct drbd_request *req) mempool_free(req, drbd_request_mempool); } -static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2) -{ - return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9))); -} - /* Short lived temporary struct on the stack. * We could squirrel the error to be returned into * bio->bi_size, or similar. But that would be too ugly. */ From 6618bf16384463c0b97a5f5f1f0ce5276f5865fd Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 13:06:08 +0100 Subject: [PATCH 0026/5831] drbd: Interval tree bugfix Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_interval.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index 2511dd9993f3..b77a9bda03d4 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -58,8 +58,9 @@ drbd_insert_interval(struct rb_root *root, struct drbd_interval *this) new = &(*new)->rb_right; else if (this < here) new = &(*new)->rb_left; - else if (this->sector > here->sector) + else if (this > here) new = &(*new)->rb_right; + else return false; } From 9749f30f1a387070e6e8351f35aeb829eacc3ab6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 20 Jul 2011 14:59:37 +0200 Subject: [PATCH 0027/5831] idr: idr_for_each_entry() macro Inspired by the list_for_each_entry() macro --- include/linux/idr.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/idr.h b/include/linux/idr.h index 255491cf522e..52a9da295296 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id); void __init idr_init_cache(void); +/** + * idr_for_each_entry - iterate over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + */ +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ + entry != NULL; \ + ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) + #endif /* __IDR_H__ */ From 2111438b30a509cfe8a1595d7fad304308ff2466 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 12:26:59 +0100 Subject: [PATCH 0028/5831] drbd: Minimal struct drbd_tconn Starting to dissolve the network connection from the actual block devices. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 15 ++++++++++++ drivers/block/drbd/drbd_main.c | 45 ++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 03dd7a0b1bc1..1f486f001dfd 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -94,6 +94,7 @@ extern char usermode_helper[]; #define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL) struct drbd_conf; +struct drbd_tconn; /* to shorten dev_warn(DEV, "msg"); and relatives statements */ @@ -960,7 +961,18 @@ struct fifo_buffer { unsigned int size; }; +struct drbd_tconn { /* is a resource from the config file */ + char *name; /* Resource name */ + struct list_head all_tconn; /* List of all drbd_tconn, prot by global_state_lock */ + struct drbd_conf *volume0; /* TODO: Remove me again */ + + struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ +}; + struct drbd_conf { + struct drbd_tconn *tconn; + int vnr; /* volume number within the connection */ + /* things that are stored as / read from meta data on disk */ unsigned long flags; @@ -1496,6 +1508,9 @@ extern rwlock_t global_state_lock; extern struct drbd_conf *drbd_new_device(unsigned int minor); extern void drbd_free_mdev(struct drbd_conf *mdev); +struct drbd_tconn *drbd_new_tconn(char *name); +extern void drbd_free_tconn(struct drbd_tconn *tconn); + extern int proc_details; /* drbd_req */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2902f6dd7bf1..a6ac0c81406b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -132,6 +132,7 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0 * as member "struct gendisk *vdisk;" */ struct drbd_conf **minor_table; +struct list_head drbd_tconns; /* list of struct drbd_tconn */ struct kmem_cache *drbd_request_cache; struct kmem_cache *drbd_ee_cache; /* epoch entries */ @@ -3267,6 +3268,7 @@ static void drbd_delete_device(unsigned int minor) bdput(mdev->this_bdev); drbd_free_resources(mdev); + drbd_free_tconn(mdev->tconn); drbd_release_ee_lists(mdev); @@ -3358,6 +3360,41 @@ out: return r; } +struct drbd_tconn *drbd_new_tconn(char *name) +{ + struct drbd_tconn *tconn; + + tconn = kzalloc(sizeof(struct drbd_tconn), GFP_KERNEL); + if (!tconn) + return NULL; + + tconn->name = kstrdup(name, GFP_KERNEL); + if (!tconn->name) + goto fail; + + write_lock_irq(&global_state_lock); + list_add(&tconn->all_tconn, &drbd_tconns); + write_unlock_irq(&global_state_lock); + + return tconn; + +fail: + kfree(tconn->name); + kfree(tconn); + + return NULL; +} + +void drbd_free_tconn(struct drbd_tconn *tconn) +{ + write_lock_irq(&global_state_lock); + list_del(&tconn->all_tconn); + write_unlock_irq(&global_state_lock); + + kfree(tconn->name); + kfree(tconn); +} + struct drbd_conf *drbd_new_device(unsigned int minor) { struct drbd_conf *mdev; @@ -3368,9 +3405,14 @@ struct drbd_conf *drbd_new_device(unsigned int minor) mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); if (!mdev) return NULL; + mdev->tconn = drbd_new_tconn("dummy"); + if (!mdev->tconn) + goto out_no_tconn; + if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL)) goto out_no_cpumask; + mdev->tconn->volume0 = mdev; mdev->minor = minor; drbd_init_set_defaults(mdev); @@ -3447,6 +3489,8 @@ out_no_disk: out_no_q: free_cpumask_var(mdev->cpu_mask); out_no_cpumask: + drbd_free_tconn(mdev->tconn); +out_no_tconn: kfree(mdev); return NULL; } @@ -3526,6 +3570,7 @@ int __init drbd_init(void) } rwlock_init(&global_state_lock); + INIT_LIST_HEAD(&drbd_tconns); printk(KERN_INFO "drbd: initialized. " "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", From 89e58e755e37137135c28a90c93be1b28faff485 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 13:12:45 +0100 Subject: [PATCH 0029/5831] drbd: moved net_conf from mdev to tconn Besides moving the struct member, everything else is generated by: sed -i -e 's/mdev->net_conf/mdev->tconn->net_conf/g' \ -e 's/odev->net_conf/odev->tconn->net_conf/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 7 +- drivers/block/drbd/drbd_main.c | 28 +++---- drivers/block/drbd/drbd_nl.c | 28 +++---- drivers/block/drbd/drbd_proc.c | 4 +- drivers/block/drbd/drbd_receiver.c | 124 ++++++++++++++--------------- drivers/block/drbd/drbd_req.c | 22 ++--- drivers/block/drbd/drbd_worker.c | 8 +- 7 files changed, 110 insertions(+), 111 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1f486f001dfd..4c4c276e0eb8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -977,7 +977,6 @@ struct drbd_conf { unsigned long flags; /* configured by drbdsetup */ - struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ struct syncer_conf sync_conf; struct drbd_backing_dev *ldev __protected_by(local); @@ -2134,10 +2133,10 @@ static inline void put_net_conf(struct drbd_conf *mdev) } /** - * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there + * get_net_conf() - Increase ref count on mdev->tconn->net_conf; Returns 0 if nothing there * @mdev: DRBD device. * - * You have to call put_net_conf() when finished working with mdev->net_conf. + * You have to call put_net_conf() when finished working with mdev->tconn->net_conf. */ static inline int get_net_conf(struct drbd_conf *mdev) { @@ -2253,7 +2252,7 @@ static inline int drbd_get_max_buffers(struct drbd_conf *mdev) { int mxb = 1000000; /* arbitrary limit on open requests */ if (get_net_conf(mdev)) { - mxb = mdev->net_conf->max_buffers; + mxb = mdev->tconn->net_conf->max_buffers; put_net_conf(mdev); } return mxb; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a6ac0c81406b..7e88a49d3448 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -693,7 +693,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) } if (get_net_conf(mdev)) { - if (!mdev->net_conf->two_primaries && + if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY && ns.peer == R_PRIMARY) rv = SS_TWO_PRIMARIES; put_net_conf(mdev); @@ -1952,7 +1952,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) size = sizeof(struct p_protocol); if (mdev->agreed_pro_version >= 87) - size += strlen(mdev->net_conf->integrity_alg) + 1; + size += strlen(mdev->tconn->net_conf->integrity_alg) + 1; /* we must not recurse into our own queue, * as that is blocked during handshake */ @@ -1960,16 +1960,16 @@ int drbd_send_protocol(struct drbd_conf *mdev) if (p == NULL) return 0; - p->protocol = cpu_to_be32(mdev->net_conf->wire_protocol); - p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p); - p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p); - p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p); - p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries); + p->protocol = cpu_to_be32(mdev->tconn->net_conf->wire_protocol); + p->after_sb_0p = cpu_to_be32(mdev->tconn->net_conf->after_sb_0p); + p->after_sb_1p = cpu_to_be32(mdev->tconn->net_conf->after_sb_1p); + p->after_sb_2p = cpu_to_be32(mdev->tconn->net_conf->after_sb_2p); + p->two_primaries = cpu_to_be32(mdev->tconn->net_conf->two_primaries); cf = 0; - if (mdev->net_conf->want_lose) + if (mdev->tconn->net_conf->want_lose) cf |= CF_WANT_LOSE; - if (mdev->net_conf->dry_run) { + if (mdev->tconn->net_conf->dry_run) { if (mdev->agreed_pro_version >= 92) cf |= CF_DRY_RUN; else { @@ -1981,7 +1981,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) p->conn_flags = cpu_to_be32(cf); if (mdev->agreed_pro_version >= 87) - strcpy(p->integrity_alg, mdev->net_conf->integrity_alg); + strcpy(p->integrity_alg, mdev->tconn->net_conf->integrity_alg); rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, (struct p_header80 *)p, size); @@ -2002,7 +2002,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) mdev->comm_bm_set = drbd_bm_total_weight(mdev); p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set); - uuid_flags |= mdev->net_conf->want_lose ? 1 : 0; + uuid_flags |= mdev->tconn->net_conf->want_lose ? 1 : 0; uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0; uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0; p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags); @@ -2717,7 +2717,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * out ok after sending on this side, but does not fit on the * receiving side, we sure have detected corruption elsewhere. */ - if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs) + if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || dgs) ok = _drbd_send_bio(mdev, req->master_bio); else ok = _drbd_send_zc_bio(mdev, req->master_bio); @@ -2843,7 +2843,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, msg.msg_flags = msg_flags | MSG_NOSIGNAL; if (sock == mdev->data.socket) { - mdev->ko_count = mdev->net_conf->ko_count; + mdev->ko_count = mdev->tconn->net_conf->ko_count; drbd_update_congested(mdev); } do { @@ -3073,7 +3073,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) mdev->rs_mark_left[i] = 0; mdev->rs_mark_time[i] = 0; } - D_ASSERT(mdev->net_conf == NULL); + D_ASSERT(mdev->tconn->net_conf == NULL); drbd_set_my_capacity(mdev, 0); if (mdev->bitmap) { diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 51da84940a33..d816c61cd983 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -150,21 +150,21 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); if (get_net_conf(mdev)) { - switch (((struct sockaddr *)mdev->net_conf->peer_addr)->sa_family) { + switch (((struct sockaddr *)mdev->tconn->net_conf->peer_addr)->sa_family) { case AF_INET6: afs = "ipv6"; snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6", - &((struct sockaddr_in6 *)mdev->net_conf->peer_addr)->sin6_addr); + &((struct sockaddr_in6 *)mdev->tconn->net_conf->peer_addr)->sin6_addr); break; case AF_INET: afs = "ipv4"; snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr); + &((struct sockaddr_in *)mdev->tconn->net_conf->peer_addr)->sin_addr); break; default: afs = "ssocks"; snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4", - &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr); + &((struct sockaddr_in *)mdev->tconn->net_conf->peer_addr)->sin_addr); } snprintf(af, 20, "DRBD_PEER_AF=%s", afs); envp[3]=af; @@ -379,7 +379,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (rv == SS_TWO_PRIMARIES) { /* Maybe the peer is detected as dead very soon... retry at most once more in this case. */ - schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10); + schedule_timeout_interruptible((mdev->tconn->net_conf->ping_timeo+1)*HZ/10); if (try < max_tries) try = max_tries - 1; continue; @@ -410,7 +410,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) } } else { if (get_net_conf(mdev)) { - mdev->net_conf->want_lose = 0; + mdev->tconn->net_conf->want_lose = 0; put_net_conf(mdev); } set_disk_ro(mdev->vdisk, false); @@ -972,7 +972,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } if (get_net_conf(mdev)) { - int prot = mdev->net_conf->wire_protocol; + int prot = mdev->tconn->net_conf->wire_protocol; put_net_conf(mdev); if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) { retcode = ERR_STONITH_AND_PROT_A; @@ -1439,13 +1439,13 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, if (!odev || odev == mdev) continue; if (get_net_conf(odev)) { - taken_addr = (struct sockaddr *)&odev->net_conf->my_addr; - if (new_conf->my_addr_len == odev->net_conf->my_addr_len && + taken_addr = (struct sockaddr *)&odev->tconn->net_conf->my_addr; + if (new_conf->my_addr_len == odev->tconn->net_conf->my_addr_len && !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) retcode = ERR_LOCAL_ADDR; - taken_addr = (struct sockaddr *)&odev->net_conf->peer_addr; - if (new_conf->peer_addr_len == odev->net_conf->peer_addr_len && + taken_addr = (struct sockaddr *)&odev->tconn->net_conf->peer_addr; + if (new_conf->peer_addr_len == odev->tconn->net_conf->peer_addr_len && !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) retcode = ERR_PEER_ADDR; @@ -1522,12 +1522,12 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, drbd_flush_workqueue(mdev); spin_lock_irq(&mdev->req_lock); - if (mdev->net_conf != NULL) { + if (mdev->tconn->net_conf != NULL) { retcode = ERR_NET_CONFIGURED; spin_unlock_irq(&mdev->req_lock); goto fail; } - mdev->net_conf = new_conf; + mdev->tconn->net_conf = new_conf; mdev->send_cnt = 0; mdev->recv_cnt = 0; @@ -2051,7 +2051,7 @@ static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl } if (get_net_conf(mdev)) { - tl = net_conf_to_tags(mdev, mdev->net_conf, tl); + tl = net_conf_to_tags(mdev, mdev->tconn->net_conf, tl); put_net_conf(mdev); } tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 2959cdfb77f5..4e53cb3d99e7 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -254,8 +254,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v) drbd_role_str(mdev->state.peer), drbd_disk_str(mdev->state.disk), drbd_disk_str(mdev->state.pdsk), - (mdev->net_conf == NULL ? ' ' : - (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')), + (mdev->tconn->net_conf == NULL ? ' ' : + (mdev->tconn->net_conf->wire_protocol - DRBD_PROT_A+'A')), is_susp(mdev->state) ? 's' : 'r', mdev->state.aftr_isp ? 'a' : '-', mdev->state.peer_isp ? 'p' : '-', diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a41b07820ddf..e5e7dd1c6dd8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -237,7 +237,7 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool /* Yes, we may run up to @number over max_buffers. If we * follow it strictly, the admin will get it wrong anyways. */ - if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) + if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) page = drbd_pp_first_pages_or_try_alloc(mdev, number); while (page == NULL) { @@ -245,7 +245,7 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool drbd_kick_lo_and_reclaim_net(mdev); - if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) { + if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) { page = drbd_pp_first_pages_or_try_alloc(mdev, number); if (page) break; @@ -582,7 +582,7 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) return NULL; what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family, + err = sock_create_kern(((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (err < 0) { sock = NULL; @@ -590,9 +590,9 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) } sock->sk->sk_rcvtimeo = - sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; - drbd_setbufsize(sock, mdev->net_conf->sndbuf_size, - mdev->net_conf->rcvbuf_size); + sock->sk->sk_sndtimeo = mdev->tconn->net_conf->try_connect_int*HZ; + drbd_setbufsize(sock, mdev->tconn->net_conf->sndbuf_size, + mdev->tconn->net_conf->rcvbuf_size); /* explicitly bind to the configured IP as source IP * for the outgoing connections. @@ -601,9 +601,9 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) * Make sure to use 0 as port number, so linux selects * a free one dynamically. */ - memcpy(&src_in6, mdev->net_conf->my_addr, - min_t(int, mdev->net_conf->my_addr_len, sizeof(src_in6))); - if (((struct sockaddr *)mdev->net_conf->my_addr)->sa_family == AF_INET6) + memcpy(&src_in6, mdev->tconn->net_conf->my_addr, + min_t(int, mdev->tconn->net_conf->my_addr_len, sizeof(src_in6))); + if (((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family == AF_INET6) src_in6.sin6_port = 0; else ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ @@ -611,7 +611,7 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) what = "bind before connect"; err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, - mdev->net_conf->my_addr_len); + mdev->tconn->net_conf->my_addr_len); if (err < 0) goto out; @@ -620,8 +620,8 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) disconnect_on_error = 0; what = "connect"; err = sock->ops->connect(sock, - (struct sockaddr *)mdev->net_conf->peer_addr, - mdev->net_conf->peer_addr_len, 0); + (struct sockaddr *)mdev->tconn->net_conf->peer_addr, + mdev->tconn->net_conf->peer_addr_len, 0); out: if (err < 0) { @@ -658,26 +658,26 @@ static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) return NULL; what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family, + err = sock_create_kern(((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &s_listen); if (err) { s_listen = NULL; goto out; } - timeo = mdev->net_conf->try_connect_int * HZ; + timeo = mdev->tconn->net_conf->try_connect_int * HZ; timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ s_listen->sk->sk_rcvtimeo = timeo; s_listen->sk->sk_sndtimeo = timeo; - drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size, - mdev->net_conf->rcvbuf_size); + drbd_setbufsize(s_listen, mdev->tconn->net_conf->sndbuf_size, + mdev->tconn->net_conf->rcvbuf_size); what = "bind before listen"; err = s_listen->ops->bind(s_listen, - (struct sockaddr *) mdev->net_conf->my_addr, - mdev->net_conf->my_addr_len); + (struct sockaddr *) mdev->tconn->net_conf->my_addr, + mdev->tconn->net_conf->my_addr_len); if (err < 0) goto out; @@ -791,7 +791,7 @@ static int drbd_connect(struct drbd_conf *mdev) } if (sock && msock) { - schedule_timeout_interruptible(mdev->net_conf->ping_timeo*HZ/10); + schedule_timeout_interruptible(mdev->tconn->net_conf->ping_timeo*HZ/10); ok = drbd_socket_okay(mdev, &sock); ok = drbd_socket_okay(mdev, &msock) && ok; if (ok) @@ -855,15 +855,15 @@ retry: msock->sk->sk_priority = TC_PRIO_INTERACTIVE; /* NOT YET ... - * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; + * sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10; * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; * first set it to the P_HAND_SHAKE timeout, * which we set to 4x the configured ping_timeout. */ sock->sk->sk_sndtimeo = - sock->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*4*HZ/10; + sock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_timeo*4*HZ/10; - msock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; - msock->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; + msock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10; + msock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; /* we don't want delays. * we use TCP_CORK where appropriate, though */ @@ -895,7 +895,7 @@ retry: if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) return 0; - sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; + sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10; sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; atomic_set(&mdev->packet_seq, 0); @@ -1555,7 +1555,7 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) sector_t sector = e->i.sector; int ok = 1, pcmd; - if (mdev->net_conf->wire_protocol == DRBD_PROT_C) { + if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) { if (likely((e->flags & EE_WAS_ERROR) == 0)) { pcmd = (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn <= C_PAUSED_SYNC_T && @@ -1573,7 +1573,7 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } /* we delete from the conflict detection hash _after_ we sent out the * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ - if (mdev->net_conf->two_primaries) { + if (mdev->tconn->net_conf->two_primaries) { spin_lock_irq(&mdev->req_lock); D_ASSERT(!drbd_interval_empty(&e->i)); drbd_remove_interval(&mdev->epoch_entries, &e->i); @@ -1592,7 +1592,7 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; int ok = 1; - D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); spin_lock_irq(&mdev->req_lock); @@ -1717,7 +1717,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_unlock(&mdev->epoch_lock); /* I'm the receiver, I do hold a net_cnt reference. */ - if (!mdev->net_conf->two_primaries) { + if (!mdev->tconn->net_conf->two_primaries) { spin_lock_irq(&mdev->req_lock); } else { /* don't get the req_lock yet, @@ -1727,7 +1727,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned DEFINE_WAIT(wait); int first; - D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); /* conflict detection and handling: * 1. wait on the sequence number, @@ -1845,7 +1845,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned list_add(&e->w.list, &mdev->active_ee); spin_unlock_irq(&mdev->req_lock); - switch (mdev->net_conf->wire_protocol) { + switch (mdev->tconn->net_conf->wire_protocol) { case DRBD_PROT_C: inc_unacked(mdev); /* corresponding dec_unacked() in e_end_block() @@ -2153,7 +2153,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) ch_peer = mdev->p_uuid[UI_SIZE]; ch_self = mdev->comm_bm_set; - switch (mdev->net_conf->after_sb_0p) { + switch (mdev->tconn->net_conf->after_sb_0p) { case ASB_CONSENSUS: case ASB_DISCARD_SECONDARY: case ASB_CALL_HELPER: @@ -2192,7 +2192,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) if (ch_peer == 0) { rv = 1; break; } if (ch_self == 0) { rv = -1; break; } } - if (mdev->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG) + if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG) break; case ASB_DISCARD_LEAST_CHG: if (ch_self < ch_peer) @@ -2218,7 +2218,7 @@ static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local) { int hg, rv = -100; - switch (mdev->net_conf->after_sb_1p) { + switch (mdev->tconn->net_conf->after_sb_1p) { case ASB_DISCARD_YOUNGER_PRI: case ASB_DISCARD_OLDER_PRI: case ASB_DISCARD_LEAST_CHG: @@ -2267,7 +2267,7 @@ static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local) { int hg, rv = -100; - switch (mdev->net_conf->after_sb_2p) { + switch (mdev->tconn->net_conf->after_sb_2p) { case ASB_DISCARD_YOUNGER_PRI: case ASB_DISCARD_OLDER_PRI: case ASB_DISCARD_LEAST_CHG: @@ -2558,7 +2558,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol if (abs(hg) == 100) drbd_khelper(mdev, "initial-split-brain"); - if (hg == 100 || (hg == -100 && mdev->net_conf->always_asbp)) { + if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) { int pcount = (mdev->state.role == R_PRIMARY) + (peer_role == R_PRIMARY); int forced = (hg == -100); @@ -2587,9 +2587,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } if (hg == -100) { - if (mdev->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1)) + if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1)) hg = -1; - if (!mdev->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1)) + if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1)) hg = 1; if (abs(hg) < 100) @@ -2615,7 +2615,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol if (hg < 0 && /* by intention we do not use mydisk here. */ mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) { - switch (mdev->net_conf->rr_conflict) { + switch (mdev->tconn->net_conf->rr_conflict) { case ASB_CALL_HELPER: drbd_khelper(mdev, "pri-lost"); /* fall through */ @@ -2628,7 +2628,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } } - if (mdev->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) { + if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) { if (hg == 0) dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n"); else @@ -2701,38 +2701,38 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig if (cf & CF_DRY_RUN) set_bit(CONN_DRY_RUN, &mdev->flags); - if (p_proto != mdev->net_conf->wire_protocol) { + if (p_proto != mdev->tconn->net_conf->wire_protocol) { dev_err(DEV, "incompatible communication protocols\n"); goto disconnect; } - if (cmp_after_sb(p_after_sb_0p, mdev->net_conf->after_sb_0p)) { + if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) { dev_err(DEV, "incompatible after-sb-0pri settings\n"); goto disconnect; } - if (cmp_after_sb(p_after_sb_1p, mdev->net_conf->after_sb_1p)) { + if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) { dev_err(DEV, "incompatible after-sb-1pri settings\n"); goto disconnect; } - if (cmp_after_sb(p_after_sb_2p, mdev->net_conf->after_sb_2p)) { + if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) { dev_err(DEV, "incompatible after-sb-2pri settings\n"); goto disconnect; } - if (p_want_lose && mdev->net_conf->want_lose) { + if (p_want_lose && mdev->tconn->net_conf->want_lose) { dev_err(DEV, "both sides have the 'want_lose' flag set\n"); goto disconnect; } - if (p_two_primaries != mdev->net_conf->two_primaries) { + if (p_two_primaries != mdev->tconn->net_conf->two_primaries) { dev_err(DEV, "incompatible setting of the two-primaries options\n"); goto disconnect; } if (mdev->agreed_pro_version >= 87) { - unsigned char *my_alg = mdev->net_conf->integrity_alg; + unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg; if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) return false; @@ -3312,7 +3312,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } } - mdev->net_conf->want_lose = 0; + mdev->tconn->net_conf->want_lose = 0; drbd_md_sync(mdev); /* update connected indicator, la_size, ... */ @@ -3844,8 +3844,8 @@ static void drbd_disconnect(struct drbd_conf *mdev) crypto_free_hash(mdev->cram_hmac_tfm); mdev->cram_hmac_tfm = NULL; - kfree(mdev->net_conf); - mdev->net_conf = NULL; + kfree(mdev->tconn->net_conf); + mdev->tconn->net_conf = NULL; drbd_request_state(mdev, NS(conn, C_STANDALONE)); } @@ -4005,7 +4005,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) char *response = NULL; char *right_response = NULL; char *peers_ch = NULL; - unsigned int key_len = strlen(mdev->net_conf->shared_secret); + unsigned int key_len = strlen(mdev->tconn->net_conf->shared_secret); unsigned int resp_size; struct hash_desc desc; enum drbd_packets cmd; @@ -4016,7 +4016,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) desc.flags = 0; rv = crypto_hash_setkey(mdev->cram_hmac_tfm, - (u8 *)mdev->net_conf->shared_secret, key_len); + (u8 *)mdev->tconn->net_conf->shared_secret, key_len); if (rv) { dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv); rv = -1; @@ -4130,7 +4130,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) if (rv) dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n", - resp_size, mdev->net_conf->cram_hmac_alg); + resp_size, mdev->tconn->net_conf->cram_hmac_alg); else rv = -1; @@ -4207,7 +4207,7 @@ static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) { /* restore idle timeout */ - mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; + mdev->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) wake_up(&mdev->misc_wait); @@ -4275,19 +4275,19 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) } switch (be16_to_cpu(h->command)) { case P_RS_WRITE_ACK: - D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); what = WRITE_ACKED_BY_PEER_AND_SIS; break; case P_WRITE_ACK: - D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); what = WRITE_ACKED_BY_PEER; break; case P_RECV_ACK: - D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_B); + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B); what = RECV_ACKED_BY_PEER; break; case P_DISCARD_ACK: - D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C); + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); what = CONFLICT_DISCARDED_BY_PEER; break; default: @@ -4305,8 +4305,8 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); int size = be32_to_cpu(p->blksize); - bool missing_ok = mdev->net_conf->wire_protocol == DRBD_PROT_A || - mdev->net_conf->wire_protocol == DRBD_PROT_B; + bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || + mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B; bool found; update_peer_seq(mdev, be32_to_cpu(p->seq_num)); @@ -4502,13 +4502,13 @@ int drbd_asender(struct drbd_thread *thi) goto reconnect; } mdev->meta.socket->sk->sk_rcvtimeo = - mdev->net_conf->ping_timeo*HZ/10; + mdev->tconn->net_conf->ping_timeo*HZ/10; ping_timeout_active = 1; } /* conditionally cork; * it may hurt latency if we cork without much to send */ - if (!mdev->net_conf->no_cork && + if (!mdev->tconn->net_conf->no_cork && 3 < atomic_read(&mdev->unacked_cnt)) drbd_tcp_cork(mdev->meta.socket); while (1) { @@ -4528,7 +4528,7 @@ int drbd_asender(struct drbd_thread *thi) break; } /* but unconditionally uncork unless disabled */ - if (!mdev->net_conf->no_cork) + if (!mdev->tconn->net_conf->no_cork) drbd_tcp_uncork(mdev->meta.socket); /* short circuit, recv_msg would return EINTR anyways. */ diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index b3b1d4edbb03..2b2662d4ab3c 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -528,7 +528,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_queue_work(&mdev->data.work, &req->w); /* close the epoch, in case it outgrew the limit */ - if (mdev->newest_tle->n_writes >= mdev->net_conf->max_epoch_size) + if (mdev->newest_tle->n_writes >= mdev->tconn->net_conf->max_epoch_size) queue_barrier(mdev); break; @@ -558,7 +558,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, atomic_add(req->i.size >> 9, &mdev->ap_in_flight); if (bio_data_dir(req->master_bio) == WRITE && - mdev->net_conf->wire_protocol == DRBD_PROT_A) { + mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A) { /* this is what is dangerous about protocol A: * pretend it was successfully written on the peer. */ if (req->rq_state & RQ_NET_PENDING) { @@ -697,8 +697,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, } if ((req->rq_state & RQ_NET_MASK) != 0) { req->rq_state |= RQ_NET_DONE; - if (mdev->net_conf->wire_protocol == DRBD_PROT_A) - atomic_sub(req->i.size >> 9, &mdev->ap_in_flight); + if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A) + atomic_sub(req->i.size>>9, &mdev->ap_in_flight); } _req_may_be_done(req, m); /* Allowed while state.susp */ break; @@ -951,16 +951,16 @@ allocate_barrier: _req_mod(req, QUEUE_FOR_SEND_OOS); if (remote && - mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { + mdev->tconn->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { int congested = 0; - if (mdev->net_conf->cong_fill && - atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) { + if (mdev->tconn->net_conf->cong_fill && + atomic_read(&mdev->ap_in_flight) >= mdev->tconn->net_conf->cong_fill) { dev_info(DEV, "Congestion-fill threshold reached\n"); congested = 1; } - if (mdev->act_log->used >= mdev->net_conf->cong_extents) { + if (mdev->act_log->used >= mdev->tconn->net_conf->cong_extents) { dev_info(DEV, "Congestion-extents threshold reached\n"); congested = 1; } @@ -968,9 +968,9 @@ allocate_barrier: if (congested) { queue_barrier(mdev); /* last barrier, after mirrored writes */ - if (mdev->net_conf->on_congestion == OC_PULL_AHEAD) + if (mdev->tconn->net_conf->on_congestion == OC_PULL_AHEAD) _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); - else /*mdev->net_conf->on_congestion == OC_DISCONNECT */ + else /*mdev->tconn->net_conf->on_congestion == OC_DISCONNECT */ _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); } } @@ -1182,7 +1182,7 @@ void request_timer_fn(unsigned long data) unsigned long et = 0; /* effective timeout = ko_count * timeout */ if (get_net_conf(mdev)) { - et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; + et = mdev->tconn->net_conf->timeout*HZ/10 * mdev->tconn->net_conf->ko_count; put_net_conf(mdev); } if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 2e2c0659a3eb..d8c61816d103 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1590,8 +1590,8 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) * the race considerably, but does not solve it. */ if (side == C_SYNC_SOURCE) schedule_timeout_interruptible( - mdev->net_conf->ping_int * HZ + - mdev->net_conf->ping_timeo*HZ/9); + mdev->tconn->net_conf->ping_int * HZ + + mdev->tconn->net_conf->ping_timeo*HZ/9); drbd_resync_finished(mdev); } @@ -1623,14 +1623,14 @@ int drbd_worker(struct drbd_thread *thi) if (down_trylock(&mdev->data.work.s)) { mutex_lock(&mdev->data.mutex); - if (mdev->data.socket && !mdev->net_conf->no_cork) + if (mdev->data.socket && !mdev->tconn->net_conf->no_cork) drbd_tcp_uncork(mdev->data.socket); mutex_unlock(&mdev->data.mutex); intr = down_interruptible(&mdev->data.work.s); mutex_lock(&mdev->data.mutex); - if (mdev->data.socket && !mdev->net_conf->no_cork) + if (mdev->data.socket && !mdev->tconn->net_conf->no_cork) drbd_tcp_cork(mdev->data.socket); mutex_unlock(&mdev->data.mutex); } From b2fb6dbe52dafa3cd18e0665937a0ebcc0892b92 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 13:48:44 +0100 Subject: [PATCH 0030/5831] drbd: moved net_cont and net_cnt_wait from mdev to tconn Patch partly generated by: sed -i -e 's/get_net_conf(mdev)/get_net_conf(mdev->tconn)/g' \ -e 's/put_net_conf(mdev)/put_net_conf(mdev->tconn)/g' \ -e 's/get_net_conf(odev)/get_net_conf(odev->tconn)/g' \ -e 's/put_net_conf(odev)/put_net_conf(odev->tconn)/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 23 ++++++++++++----------- drivers/block/drbd/drbd_main.c | 9 +++++---- drivers/block/drbd/drbd_nl.c | 20 ++++++++++---------- drivers/block/drbd/drbd_receiver.c | 14 +++++++------- drivers/block/drbd/drbd_req.c | 10 +++++----- 5 files changed, 39 insertions(+), 37 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4c4c276e0eb8..fd015502c62a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -967,6 +967,8 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_conf *volume0; /* TODO: Remove me again */ struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ + atomic_t net_cnt; /* Users of net_conf */ + wait_queue_head_t net_cnt_wait; }; struct drbd_conf { @@ -1012,7 +1014,6 @@ struct drbd_conf { union drbd_state state; wait_queue_head_t misc_wait; wait_queue_head_t state_wait; /* upon each state change. */ - wait_queue_head_t net_cnt_wait; unsigned int send_cnt; unsigned int recv_cnt; unsigned int read_cnt; @@ -1024,7 +1025,7 @@ struct drbd_conf { atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ atomic_t unacked_cnt; /* Need to send replys for */ atomic_t local_cnt; /* Waiting for local completion */ - atomic_t net_cnt; /* Users of net_conf */ + spinlock_t req_lock; struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */ struct drbd_tl_epoch *newest_tle; @@ -2126,10 +2127,10 @@ static inline void inc_unacked(struct drbd_conf *mdev) ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) -static inline void put_net_conf(struct drbd_conf *mdev) +static inline void put_net_conf(struct drbd_tconn *tconn) { - if (atomic_dec_and_test(&mdev->net_cnt)) - wake_up(&mdev->net_cnt_wait); + if (atomic_dec_and_test(&tconn->net_cnt)) + wake_up(&tconn->net_cnt_wait); } /** @@ -2138,14 +2139,14 @@ static inline void put_net_conf(struct drbd_conf *mdev) * * You have to call put_net_conf() when finished working with mdev->tconn->net_conf. */ -static inline int get_net_conf(struct drbd_conf *mdev) +static inline int get_net_conf(struct drbd_tconn *tconn) { int have_net_conf; - atomic_inc(&mdev->net_cnt); - have_net_conf = mdev->state.conn >= C_UNCONNECTED; + atomic_inc(&tconn->net_cnt); + have_net_conf = tconn->volume0->state.conn >= C_UNCONNECTED; if (!have_net_conf) - put_net_conf(mdev); + put_net_conf(tconn); return have_net_conf; } @@ -2251,9 +2252,9 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, static inline int drbd_get_max_buffers(struct drbd_conf *mdev) { int mxb = 1000000; /* arbitrary limit on open requests */ - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { mxb = mdev->tconn->net_conf->max_buffers; - put_net_conf(mdev); + put_net_conf(mdev->tconn); } return mxb; } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7e88a49d3448..9a77a9b950d3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -692,11 +692,11 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) put_ldev(mdev); } - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY && ns.peer == R_PRIMARY) rv = SS_TWO_PRIMARIES; - put_net_conf(mdev); + put_net_conf(mdev->tconn); } if (rv <= 0) @@ -2972,7 +2972,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->rs_pending_cnt, 0); atomic_set(&mdev->unacked_cnt, 0); atomic_set(&mdev->local_cnt, 0); - atomic_set(&mdev->net_cnt, 0); atomic_set(&mdev->packet_seq, 0); atomic_set(&mdev->pp_in_use, 0); atomic_set(&mdev->pp_in_use_by_net, 0); @@ -3031,7 +3030,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) init_waitqueue_head(&mdev->misc_wait); init_waitqueue_head(&mdev->state_wait); - init_waitqueue_head(&mdev->net_cnt_wait); init_waitqueue_head(&mdev->ee_wait); init_waitqueue_head(&mdev->al_wait); init_waitqueue_head(&mdev->seq_wait); @@ -3372,6 +3370,9 @@ struct drbd_tconn *drbd_new_tconn(char *name) if (!tconn->name) goto fail; + atomic_set(&tconn->net_cnt, 0); + init_waitqueue_head(&tconn->net_cnt_wait); + write_lock_irq(&global_state_lock); list_add(&tconn->all_tconn, &drbd_tconns); write_unlock_irq(&global_state_lock); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d816c61cd983..a936d61a90cd 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -149,7 +149,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { switch (((struct sockaddr *)mdev->tconn->net_conf->peer_addr)->sa_family) { case AF_INET6: afs = "ipv6"; @@ -169,7 +169,7 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd) snprintf(af, 20, "DRBD_PEER_AF=%s", afs); envp[3]=af; envp[4]=ad; - put_net_conf(mdev); + put_net_conf(mdev->tconn); } /* The helper may take some time. @@ -409,9 +409,9 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) put_ldev(mdev); } } else { - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { mdev->tconn->net_conf->want_lose = 0; - put_net_conf(mdev); + put_net_conf(mdev->tconn); } set_disk_ro(mdev->vdisk, false); if (get_ldev(mdev)) { @@ -971,9 +971,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto fail; } - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { int prot = mdev->tconn->net_conf->wire_protocol; - put_net_conf(mdev); + put_net_conf(mdev->tconn); if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) { retcode = ERR_STONITH_AND_PROT_A; goto fail; @@ -1438,7 +1438,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, odev = minor_to_mdev(i); if (!odev || odev == mdev) continue; - if (get_net_conf(odev)) { + if (get_net_conf(odev->tconn)) { taken_addr = (struct sockaddr *)&odev->tconn->net_conf->my_addr; if (new_conf->my_addr_len == odev->tconn->net_conf->my_addr_len && !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) @@ -1449,7 +1449,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) retcode = ERR_PEER_ADDR; - put_net_conf(odev); + put_net_conf(odev->tconn); if (retcode != NO_ERROR) goto fail; } @@ -2050,9 +2050,9 @@ static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl put_ldev(mdev); } - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { tl = net_conf_to_tags(mdev, mdev->tconn->net_conf, tl); - put_net_conf(mdev); + put_net_conf(mdev->tconn); } tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e5e7dd1c6dd8..8a01f2787332 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -578,7 +578,7 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) int err; int disconnect_on_error = 1; - if (!get_net_conf(mdev)) + if (!get_net_conf(mdev->tconn)) return NULL; what = "sock_create_kern"; @@ -644,7 +644,7 @@ out: if (disconnect_on_error) drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); } - put_net_conf(mdev); + put_net_conf(mdev->tconn); return sock; } @@ -654,7 +654,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) struct socket *s_estab = NULL, *s_listen; const char *what; - if (!get_net_conf(mdev)) + if (!get_net_conf(mdev->tconn)) return NULL; what = "sock_create_kern"; @@ -692,7 +692,7 @@ out: drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); } } - put_net_conf(mdev); + put_net_conf(mdev->tconn); return s_estab; } @@ -3839,7 +3839,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) spin_unlock_irq(&mdev->req_lock); if (os.conn == C_DISCONNECTING) { - wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0); + wait_event(mdev->tconn->net_cnt_wait, atomic_read(&mdev->tconn->net_cnt) == 0); crypto_free_hash(mdev->cram_hmac_tfm); mdev->cram_hmac_tfm = NULL; @@ -4166,9 +4166,9 @@ int drbdd_init(struct drbd_thread *thi) } while (h == 0); if (h > 0) { - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { drbdd(mdev); - put_net_conf(mdev); + put_net_conf(mdev->tconn); } } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 2b2662d4ab3c..8f1e7db5e583 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -323,7 +323,7 @@ static int _req_conflicts(struct drbd_request *req) D_ASSERT(drbd_interval_empty(&req->i)); - if (!get_net_conf(mdev)) + if (!get_net_conf(mdev->tconn)) return 0; i = drbd_find_overlap(&mdev->write_requests, sector, size); @@ -359,11 +359,11 @@ static int _req_conflicts(struct drbd_request *req) /* this is like it should be, and what we expected. * our users do behave after all... */ - put_net_conf(mdev); + put_net_conf(mdev->tconn); return 0; out_conflict: - put_net_conf(mdev); + put_net_conf(mdev->tconn); return 1; } @@ -1181,9 +1181,9 @@ void request_timer_fn(unsigned long data) struct list_head *le; unsigned long et = 0; /* effective timeout = ko_count * timeout */ - if (get_net_conf(mdev)) { + if (get_net_conf(mdev->tconn)) { et = mdev->tconn->net_conf->timeout*HZ/10 * mdev->tconn->net_conf->ko_count; - put_net_conf(mdev); + put_net_conf(mdev->tconn); } if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) return; /* Recurring timer stopped */ From e42325a57606396539807ff55c24febda39f8d01 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 13:55:45 +0100 Subject: [PATCH 0031/5831] drbd: moved data and meta from mdev to tconn Patch mostly: sed -i -e 's/mdev->data/mdev->tconn->data/g' \ -e 's/mdev->meta/mdev->tconn->meta/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 4 +- drivers/block/drbd/drbd_int.h | 17 ++-- drivers/block/drbd/drbd_main.c | 122 ++++++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 82 +++++++++---------- drivers/block/drbd/drbd_req.c | 12 +-- drivers/block/drbd/drbd_worker.c | 66 ++++++++-------- 6 files changed, 152 insertions(+), 151 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 9284b10e42bb..794317778db5 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -228,7 +228,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) al_work.enr = enr; al_work.old_enr = al_ext->lc_number; al_work.w.cb = w_al_write_transaction; - drbd_queue_work_front(&mdev->data.work, &al_work.w); + drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); wait_for_completion(&al_work.event); mdev->al_writ_cnt++; @@ -717,7 +717,7 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, if (udw) { udw->enr = ext->lce.lc_number; udw->w.cb = w_update_odbm; - drbd_queue_work_front(&mdev->data.work, &udw->w); + drbd_queue_work_front(&mdev->tconn->data.work, &udw->w); } else { dev_warn(DEV, "Could not kmalloc an udw\n"); } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index fd015502c62a..8de17b5bd42a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -969,6 +969,9 @@ struct drbd_tconn { /* is a resource from the config file */ struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ atomic_t net_cnt; /* Users of net_conf */ wait_queue_head_t net_cnt_wait; + + struct drbd_socket data; /* data/barrier/cstate/parameter packets */ + struct drbd_socket meta; /* ping/ack (metadata) packets */ }; struct drbd_conf { @@ -987,8 +990,6 @@ struct drbd_conf { struct block_device *this_bdev; struct gendisk *vdisk; - struct drbd_socket data; /* data/barrier/cstate/parameter packets */ - struct drbd_socket meta; /* ping/ack (metadata) packets */ int agreed_pro_version; /* actually used protocol version */ unsigned long last_received; /* in jiffies, either socket */ unsigned int ko_count; @@ -1167,11 +1168,11 @@ static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) */ static inline int drbd_get_data_sock(struct drbd_conf *mdev) { - mutex_lock(&mdev->data.mutex); + mutex_lock(&mdev->tconn->data.mutex); /* drbd_disconnect() could have called drbd_free_sock() * while we were waiting in down()... */ - if (unlikely(mdev->data.socket == NULL)) { - mutex_unlock(&mdev->data.mutex); + if (unlikely(mdev->tconn->data.socket == NULL)) { + mutex_unlock(&mdev->tconn->data.mutex); return 0; } return 1; @@ -1179,7 +1180,7 @@ static inline int drbd_get_data_sock(struct drbd_conf *mdev) static inline void drbd_put_data_sock(struct drbd_conf *mdev) { - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); } /* @@ -2399,7 +2400,7 @@ static inline void dec_ap_bio(struct drbd_conf *mdev) wake_up(&mdev->misc_wait); if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) { if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) - drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); + drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w); } } @@ -2439,7 +2440,7 @@ static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq) static inline void drbd_update_congested(struct drbd_conf *mdev) { - struct sock *sk = mdev->data.socket->sk; + struct sock *sk = mdev->tconn->data.socket->sk; if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) set_bit(NET_CONGESTED, &mdev->flags); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9a77a9b950d3..84e40fbfd3e9 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -371,7 +371,7 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) set_bit(CREATE_BARRIER, &mdev->flags); } - drbd_queue_work(&mdev->data.work, &b->w); + drbd_queue_work(&mdev->tconn->data.work, &b->w); } pn = &b->next; } else { @@ -1251,7 +1251,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, ascw->flags = flags; ascw->w.cb = w_after_state_ch; ascw->done = done; - drbd_queue_work(&mdev->data.work, &ascw->w); + drbd_queue_work(&mdev->tconn->data.work, &ascw->w); } else { dev_warn(DEV, "Could not kmalloc an ascw\n"); } @@ -1855,11 +1855,11 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, struct socket *sock; if (use_data_socket) { - mutex_lock(&mdev->data.mutex); - sock = mdev->data.socket; + mutex_lock(&mdev->tconn->data.mutex); + sock = mdev->tconn->data.socket; } else { - mutex_lock(&mdev->meta.mutex); - sock = mdev->meta.socket; + mutex_lock(&mdev->tconn->meta.mutex); + sock = mdev->tconn->meta.socket; } /* drbd_disconnect() could have called drbd_free_sock() @@ -1868,9 +1868,9 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0); if (use_data_socket) - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); else - mutex_unlock(&mdev->meta.mutex); + mutex_unlock(&mdev->tconn->meta.mutex); return ok; } @@ -1888,9 +1888,9 @@ int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, return 0; ok = (sizeof(h) == - drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0)); + drbd_send(mdev, mdev->tconn->data.socket, &h, sizeof(h), 0)); ok = ok && (size == - drbd_send(mdev, mdev->data.socket, data, size, 0)); + drbd_send(mdev, mdev->tconn->data.socket, data, size, 0)); drbd_put_data_sock(mdev); @@ -1913,13 +1913,13 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) /* used from admin command context and receiver/worker context. * to avoid kmalloc, grab the socket right here, * then use the pre-allocated sbuf there */ - mutex_lock(&mdev->data.mutex); - sock = mdev->data.socket; + mutex_lock(&mdev->tconn->data.mutex); + sock = mdev->tconn->data.socket; if (likely(sock != NULL)) { enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; - p = &mdev->data.sbuf.rs_param_95; + p = &mdev->tconn->data.sbuf.rs_param_95; /* initialize verify_alg and csums_alg */ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); @@ -1939,7 +1939,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) } else rv = 0; /* not ok */ - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); return rv; } @@ -2106,17 +2106,17 @@ int drbd_send_state(struct drbd_conf *mdev) * of a cluster wide state change on another thread */ drbd_state_lock(mdev); - mutex_lock(&mdev->data.mutex); + mutex_lock(&mdev->tconn->data.mutex); p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */ - sock = mdev->data.socket; + sock = mdev->tconn->data.socket; if (likely(sock != NULL)) { ok = _drbd_send_cmd(mdev, sock, P_STATE, (struct p_header80 *)&p, sizeof(p), 0); } - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); drbd_state_unlock(mdev); return ok; @@ -2260,7 +2260,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, if (len) { DCBP_set_code(p, RLE_VLI_Bits); - ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h, + ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_COMPRESSED_BITMAP, h, sizeof(*p) + len, 0); c->packets[0]++; @@ -2275,7 +2275,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, len = num_words * sizeof(long); if (len) drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload); - ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP, + ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BITMAP, h, sizeof(struct p_header80) + len, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; @@ -2391,7 +2391,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, p.blksize = blksize; p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); - if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED) + if (!mdev->tconn->meta.socket || mdev->state.conn < C_CONNECTED) return false; ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, (struct p_header80 *)&p, sizeof(p)); @@ -2473,12 +2473,12 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, p.head.command = cpu_to_be16(cmd); p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); - mutex_lock(&mdev->data.mutex); + mutex_lock(&mdev->tconn->data.mutex); - ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), 0)); - ok = ok && (digest_size == drbd_send(mdev, mdev->data.socket, digest, digest_size, 0)); + ok = (sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), 0)); + ok = ok && (digest_size == drbd_send(mdev, mdev->tconn->data.socket, digest, digest_size, 0)); - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); return ok; } @@ -2506,7 +2506,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * int drop_it; /* long elapsed = (long)(jiffies - mdev->last_received); */ - drop_it = mdev->meta.socket == sock + drop_it = mdev->tconn->meta.socket == sock || !mdev->asender.task || get_t_state(&mdev->asender) != RUNNING || mdev->state.conn < C_CONNECTED; @@ -2548,7 +2548,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, int offset, size_t size, unsigned msg_flags) { - int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags); + int sent = drbd_send(mdev, mdev->tconn->data.socket, kmap(page) + offset, size, msg_flags); kunmap(page); if (sent == size) mdev->send_cnt += size>>9; @@ -2575,12 +2575,12 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, drbd_update_congested(mdev); set_fs(KERNEL_DS); do { - sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, + sent = mdev->tconn->data.socket->ops->sendpage(mdev->tconn->data.socket, page, offset, len, msg_flags); if (sent == -EAGAIN) { if (we_should_drop_the_connection(mdev, - mdev->data.socket)) + mdev->tconn->data.socket)) break; else continue; @@ -2699,11 +2699,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) p.dp_flags = cpu_to_be32(dp_flags); set_bit(UNPLUG_REMOTE, &mdev->flags); ok = (sizeof(p) == - drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); + drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); if (ok && dgs) { dgb = mdev->int_dig_out; drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); - ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); + ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) { /* For protocol A, we have to memcpy the payload into @@ -2781,11 +2781,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, if (!drbd_get_data_sock(mdev)) return 0; - ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); + ok = sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); if (ok && dgs) { dgb = mdev->int_dig_out; drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); - ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); + ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) ok = _drbd_send_zc_ee(mdev, e); @@ -2842,7 +2842,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, msg.msg_controllen = 0; msg.msg_flags = msg_flags | MSG_NOSIGNAL; - if (sock == mdev->data.socket) { + if (sock == mdev->tconn->data.socket) { mdev->ko_count = mdev->tconn->net_conf->ko_count; drbd_update_congested(mdev); } @@ -2875,13 +2875,13 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, iov.iov_len -= rv; } while (sent < size); - if (sock == mdev->data.socket) + if (sock == mdev->tconn->data.socket) clear_bit(NET_CONGESTED, &mdev->flags); if (rv <= 0) { if (rv != -EAGAIN) { dev_err(DEV, "%s_sendmsg returned %d\n", - sock == mdev->meta.socket ? "msock" : "sock", + sock == mdev->tconn->meta.socket ? "msock" : "sock", rv); drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE)); } else @@ -2980,14 +2980,14 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->ap_in_flight, 0); mutex_init(&mdev->md_io_mutex); - mutex_init(&mdev->data.mutex); - mutex_init(&mdev->meta.mutex); - sema_init(&mdev->data.work.s, 0); - sema_init(&mdev->meta.work.s, 0); + mutex_init(&mdev->tconn->data.mutex); + mutex_init(&mdev->tconn->meta.mutex); + sema_init(&mdev->tconn->data.work.s, 0); + sema_init(&mdev->tconn->meta.work.s, 0); mutex_init(&mdev->state_mutex); - spin_lock_init(&mdev->data.work.q_lock); - spin_lock_init(&mdev->meta.work.q_lock); + spin_lock_init(&mdev->tconn->data.work.q_lock); + spin_lock_init(&mdev->tconn->meta.work.q_lock); spin_lock_init(&mdev->al_lock); spin_lock_init(&mdev->req_lock); @@ -3000,8 +3000,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) INIT_LIST_HEAD(&mdev->read_ee); INIT_LIST_HEAD(&mdev->net_ee); INIT_LIST_HEAD(&mdev->resync_reads); - INIT_LIST_HEAD(&mdev->data.work.q); - INIT_LIST_HEAD(&mdev->meta.work.q); + INIT_LIST_HEAD(&mdev->tconn->data.work.q); + INIT_LIST_HEAD(&mdev->tconn->meta.work.q); INIT_LIST_HEAD(&mdev->resync_work.list); INIT_LIST_HEAD(&mdev->unplug_work.list); INIT_LIST_HEAD(&mdev->go_diskless.list); @@ -3093,8 +3093,8 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) D_ASSERT(list_empty(&mdev->read_ee)); D_ASSERT(list_empty(&mdev->net_ee)); D_ASSERT(list_empty(&mdev->resync_reads)); - D_ASSERT(list_empty(&mdev->data.work.q)); - D_ASSERT(list_empty(&mdev->meta.work.q)); + D_ASSERT(list_empty(&mdev->tconn->data.work.q)); + D_ASSERT(list_empty(&mdev->tconn->meta.work.q)); D_ASSERT(list_empty(&mdev->resync_work.list)); D_ASSERT(list_empty(&mdev->unplug_work.list)); D_ASSERT(list_empty(&mdev->go_diskless.list)); @@ -3254,7 +3254,7 @@ static void drbd_delete_device(unsigned int minor) /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); - D_ASSERT(list_empty(&mdev->data.work.q)); + D_ASSERT(list_empty(&mdev->tconn->data.work.q)); /* end paranoia asserts */ del_gendisk(mdev->vdisk); @@ -3606,19 +3606,19 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) void drbd_free_sock(struct drbd_conf *mdev) { - if (mdev->data.socket) { - mutex_lock(&mdev->data.mutex); - kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR); - sock_release(mdev->data.socket); - mdev->data.socket = NULL; - mutex_unlock(&mdev->data.mutex); + if (mdev->tconn->data.socket) { + mutex_lock(&mdev->tconn->data.mutex); + kernel_sock_shutdown(mdev->tconn->data.socket, SHUT_RDWR); + sock_release(mdev->tconn->data.socket); + mdev->tconn->data.socket = NULL; + mutex_unlock(&mdev->tconn->data.mutex); } - if (mdev->meta.socket) { - mutex_lock(&mdev->meta.mutex); - kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR); - sock_release(mdev->meta.socket); - mdev->meta.socket = NULL; - mutex_unlock(&mdev->meta.mutex); + if (mdev->tconn->meta.socket) { + mutex_lock(&mdev->tconn->meta.mutex); + kernel_sock_shutdown(mdev->tconn->meta.socket, SHUT_RDWR); + sock_release(mdev->tconn->meta.socket); + mdev->tconn->meta.socket = NULL; + mutex_unlock(&mdev->tconn->meta.mutex); } } @@ -4012,7 +4012,7 @@ void drbd_go_diskless(struct drbd_conf *mdev) { D_ASSERT(mdev->state.disk == D_FAILED); if (!test_and_set_bit(GO_DISKLESS, &mdev->flags)) - drbd_queue_work(&mdev->data.work, &mdev->go_diskless); + drbd_queue_work(&mdev->tconn->data.work, &mdev->go_diskless); } /** @@ -4050,7 +4050,7 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, set_bit(BITMAP_IO, &mdev->flags); if (atomic_read(&mdev->ap_bio_cnt) == 0) { if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) - drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w); + drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w); } spin_unlock_irq(&mdev->req_lock); } @@ -4108,7 +4108,7 @@ static void md_sync_timer_fn(unsigned long data) { struct drbd_conf *mdev = (struct drbd_conf *) data; - drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work); + drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work); } static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8a01f2787332..2636bcc173a6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -516,7 +516,7 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) set_fs(KERNEL_DS); for (;;) { - rv = sock_recvmsg(mdev->data.socket, &msg, size, msg.msg_flags); + rv = sock_recvmsg(mdev->tconn->data.socket, &msg, size, msg.msg_flags); if (rv == size) break; @@ -700,14 +700,14 @@ out: static int drbd_send_fp(struct drbd_conf *mdev, struct socket *sock, enum drbd_packets cmd) { - struct p_header80 *h = &mdev->data.sbuf.header.h80; + struct p_header80 *h = &mdev->tconn->data.sbuf.header.h80; return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); } static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) { - struct p_header80 *h = &mdev->data.rbuf.header.h80; + struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; int rr; rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); @@ -755,7 +755,7 @@ static int drbd_connect(struct drbd_conf *mdev) struct socket *s, *sock, *msock; int try, h, ok; - D_ASSERT(!mdev->data.socket); + D_ASSERT(!mdev->tconn->data.socket); if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) return -2; @@ -870,8 +870,8 @@ retry: drbd_tcp_nodelay(sock); drbd_tcp_nodelay(msock); - mdev->data.socket = sock; - mdev->meta.socket = msock; + mdev->tconn->data.socket = sock; + mdev->tconn->meta.socket = msock; mdev->last_received = jiffies; D_ASSERT(mdev->asender.task == NULL); @@ -925,7 +925,7 @@ out_release_sockets: static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size) { - union p_header *h = &mdev->data.rbuf.header; + union p_header *h = &mdev->tconn->data.rbuf.header; int r; r = drbd_recv(mdev, h, sizeof(*h)); @@ -1163,7 +1163,7 @@ fail: static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { int rv; - struct p_barrier *p = &mdev->data.rbuf.barrier; + struct p_barrier *p = &mdev->tconn->data.rbuf.barrier; struct drbd_epoch *epoch; inc_unacked(mdev); @@ -1494,7 +1494,7 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi struct drbd_request *req; sector_t sector; int ok; - struct p_data *p = &mdev->data.rbuf.data; + struct p_data *p = &mdev->tconn->data.rbuf.data; sector = be64_to_cpu(p->sector); @@ -1522,7 +1522,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, un { sector_t sector; int ok; - struct p_data *p = &mdev->data.rbuf.data; + struct p_data *p = &mdev->tconn->data.rbuf.data; sector = be64_to_cpu(p->sector); D_ASSERT(p->block_id == ID_SYNCER); @@ -1675,7 +1675,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned { sector_t sector; struct drbd_epoch_entry *e; - struct p_data *p = &mdev->data.rbuf.data; + struct p_data *p = &mdev->tconn->data.rbuf.data; int rw = WRITE; u32 dp_flags; @@ -1964,7 +1964,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un struct digest_info *di = NULL; int size, verb; unsigned int fault_type; - struct p_block_req *p = &mdev->data.rbuf.block_req; + struct p_block_req *p = &mdev->tconn->data.rbuf.block_req; sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); @@ -2683,7 +2683,7 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_protocol *p = &mdev->data.rbuf.protocol; + struct p_protocol *p = &mdev->tconn->data.rbuf.protocol; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; int p_want_lose, p_two_primaries, cf; char p_integrity_alg[SHARED_SECRET_MAX] = ""; @@ -2783,7 +2783,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size) { int ok = true; - struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95; + struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95; unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; @@ -2946,7 +2946,7 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_sizes *p = &mdev->data.rbuf.sizes; + struct p_sizes *p = &mdev->tconn->data.rbuf.sizes; enum determine_dev_size dd = unchanged; sector_t p_size, p_usize, my_usize; int ldsc = 0; /* local disk size changed */ @@ -3049,7 +3049,7 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_uuids *p = &mdev->data.rbuf.uuids; + struct p_uuids *p = &mdev->tconn->data.rbuf.uuids; u64 *p_uuid; int i, updated_uuids = 0; @@ -3143,7 +3143,7 @@ static union drbd_state convert_state(union drbd_state ps) static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_req_state *p = &mdev->data.rbuf.req_state; + struct p_req_state *p = &mdev->tconn->data.rbuf.req_state; union drbd_state mask, val; enum drbd_state_rv rv; @@ -3169,7 +3169,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_state *p = &mdev->data.rbuf.state; + struct p_state *p = &mdev->tconn->data.rbuf.state; union drbd_state os, ns, peer_state; enum drbd_disk_state real_peer_disk; enum chg_state_flags cs_flags; @@ -3321,7 +3321,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_rs_uuid *p = &mdev->data.rbuf.rs_uuid; + struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid; wait_event(mdev->misc_wait, mdev->state.conn == C_WF_SYNC_UUID || @@ -3520,7 +3520,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne void *buffer; int err; int ok = false; - struct p_header80 *h = &mdev->data.rbuf.header.h80; + struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); /* you are supposed to send additional out-of-sync information @@ -3629,14 +3629,14 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, u { /* Make sure we've acked all the TCP data associated * with the data requests being unplugged */ - drbd_tcp_quickack(mdev->data.socket); + drbd_tcp_quickack(mdev->tconn->data.socket); return true; } static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_block_desc *p = &mdev->data.rbuf.block_desc; + struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc; switch (mdev->state.conn) { case C_WF_SYNC_UUID: @@ -3690,15 +3690,15 @@ static struct data_cmd drbd_cmd_handler[] = { }; /* All handler functions that expect a sub-header get that sub-heder in - mdev->data.rbuf.header.head.payload. + mdev->tconn->data.rbuf.header.head.payload. - Usually in mdev->data.rbuf.header.head the callback can find the usual + Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual p_header, but they may not rely on that. Since there is also p_header95 ! */ static void drbdd(struct drbd_conf *mdev) { - union p_header *header = &mdev->data.rbuf.header; + union p_header *header = &mdev->tconn->data.rbuf.header; unsigned int packet_size; enum drbd_packets cmd; size_t shs; /* sub header size */ @@ -3753,7 +3753,7 @@ void drbd_flush_workqueue(struct drbd_conf *mdev) barr.w.cb = w_prev_work_done; init_completion(&barr.done); - drbd_queue_work(&mdev->data.work, &barr.w); + drbd_queue_work(&mdev->tconn->data.work, &barr.w); wait_for_completion(&barr.done); } @@ -3892,25 +3892,25 @@ static void drbd_disconnect(struct drbd_conf *mdev) static int drbd_send_handshake(struct drbd_conf *mdev) { /* ASSERT current == mdev->receiver ... */ - struct p_handshake *p = &mdev->data.sbuf.handshake; + struct p_handshake *p = &mdev->tconn->data.sbuf.handshake; int ok; - if (mutex_lock_interruptible(&mdev->data.mutex)) { + if (mutex_lock_interruptible(&mdev->tconn->data.mutex)) { dev_err(DEV, "interrupted during initial handshake\n"); return 0; /* interrupted. not ok. */ } - if (mdev->data.socket == NULL) { - mutex_unlock(&mdev->data.mutex); + if (mdev->tconn->data.socket == NULL) { + mutex_unlock(&mdev->tconn->data.mutex); return 0; } memset(p, 0, sizeof(*p)); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); - ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE, + ok = _drbd_send_cmd( mdev, mdev->tconn->data.socket, P_HAND_SHAKE, (struct p_header80 *)p, sizeof(*p), 0 ); - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); return ok; } @@ -3924,7 +3924,7 @@ static int drbd_send_handshake(struct drbd_conf *mdev) static int drbd_do_handshake(struct drbd_conf *mdev) { /* ASSERT current == mdev->receiver ... */ - struct p_handshake *p = &mdev->data.rbuf.handshake; + struct p_handshake *p = &mdev->tconn->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); unsigned int length; enum drbd_packets cmd; @@ -4207,7 +4207,7 @@ static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) { /* restore idle timeout */ - mdev->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; + mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) wake_up(&mdev->misc_wait); @@ -4427,7 +4427,7 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) w = kmalloc(sizeof(*w), GFP_NOIO); if (w) { w->cb = w_ov_finished; - drbd_queue_work_front(&mdev->data.work, w); + drbd_queue_work_front(&mdev->tconn->data.work, w); } else { dev_err(DEV, "kmalloc(w) failed."); ov_oos_print(mdev); @@ -4479,7 +4479,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) int drbd_asender(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; - struct p_header80 *h = &mdev->meta.rbuf.header.h80; + struct p_header80 *h = &mdev->tconn->meta.rbuf.header.h80; struct asender_cmd *cmd = NULL; int rv, len; @@ -4501,7 +4501,7 @@ int drbd_asender(struct drbd_thread *thi) dev_err(DEV, "drbd_send_ping has failed\n"); goto reconnect; } - mdev->meta.socket->sk->sk_rcvtimeo = + mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_timeo*HZ/10; ping_timeout_active = 1; } @@ -4510,7 +4510,7 @@ int drbd_asender(struct drbd_thread *thi) * it may hurt latency if we cork without much to send */ if (!mdev->tconn->net_conf->no_cork && 3 < atomic_read(&mdev->unacked_cnt)) - drbd_tcp_cork(mdev->meta.socket); + drbd_tcp_cork(mdev->tconn->meta.socket); while (1) { clear_bit(SIGNAL_ASENDER, &mdev->flags); flush_signals(current); @@ -4529,13 +4529,13 @@ int drbd_asender(struct drbd_thread *thi) } /* but unconditionally uncork unless disabled */ if (!mdev->tconn->net_conf->no_cork) - drbd_tcp_uncork(mdev->meta.socket); + drbd_tcp_uncork(mdev->tconn->meta.socket); /* short circuit, recv_msg would return EINTR anyways. */ if (signal_pending(current)) continue; - rv = drbd_recv_short(mdev, mdev->meta.socket, + rv = drbd_recv_short(mdev, mdev->tconn->meta.socket, buf, expect-received, 0); clear_bit(SIGNAL_ASENDER, &mdev->flags); @@ -4561,7 +4561,7 @@ int drbd_asender(struct drbd_thread *thi) /* If the data socket received something meanwhile, * that is good enough: peer is still alive. */ if (time_after(mdev->last_received, - jiffies - mdev->meta.socket->sk->sk_rcvtimeo)) + jiffies - mdev->tconn->meta.socket->sk->sk_rcvtimeo)) continue; if (ping_timeout_active) { dev_err(DEV, "PingAck did not arrive in time.\n"); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 8f1e7db5e583..ac43e440d660 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -127,7 +127,7 @@ static void queue_barrier(struct drbd_conf *mdev) * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in tl_clear. */ inc_ap_pending(mdev); - drbd_queue_work(&mdev->data.work, &b->w); + drbd_queue_work(&mdev->tconn->data.work, &b->w); set_bit(CREATE_BARRIER, &mdev->flags); } @@ -483,7 +483,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->w.cb = (req->rq_state & RQ_LOCAL_MASK) ? w_read_retry_remote : w_send_read_req; - drbd_queue_work(&mdev->data.work, &req->w); + drbd_queue_work(&mdev->tconn->data.work, &req->w); break; case QUEUE_FOR_NET_WRITE: @@ -525,7 +525,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(req->rq_state & RQ_NET_PENDING); req->rq_state |= RQ_NET_QUEUED; req->w.cb = w_send_dblock; - drbd_queue_work(&mdev->data.work, &req->w); + drbd_queue_work(&mdev->tconn->data.work, &req->w); /* close the epoch, in case it outgrew the limit */ if (mdev->newest_tle->n_writes >= mdev->tconn->net_conf->max_epoch_size) @@ -536,7 +536,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case QUEUE_FOR_SEND_OOS: req->rq_state |= RQ_NET_QUEUED; req->w.cb = w_send_oos; - drbd_queue_work(&mdev->data.work, &req->w); + drbd_queue_work(&mdev->tconn->data.work, &req->w); break; case OOS_HANDED_TO_NETWORK: @@ -667,7 +667,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, get_ldev(mdev); req->w.cb = w_restart_disk_io; - drbd_queue_work(&mdev->data.work, &req->w); + drbd_queue_work(&mdev->tconn->data.work, &req->w); break; case RESEND: @@ -677,7 +677,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, We ensure that the peer was not rebooted */ if (!(req->rq_state & RQ_NET_OK)) { if (req->w.cb) { - drbd_queue_work(&mdev->data.work, &req->w); + drbd_queue_work(&mdev->tconn->data.work, &req->w); rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ; } break; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d8c61816d103..9b1e2bad5fbd 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -94,7 +94,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) __drbd_chk_io_error(mdev, false); spin_unlock_irqrestore(&mdev->req_lock, flags); - drbd_queue_work(&mdev->data.work, &e->w); + drbd_queue_work(&mdev->tconn->data.work, &e->w); put_ldev(mdev); } @@ -400,7 +400,7 @@ void resync_timer_fn(unsigned long data) struct drbd_conf *mdev = (struct drbd_conf *) data; if (list_empty(&mdev->resync_work.list)) - drbd_queue_work(&mdev->data.work, &mdev->resync_work); + drbd_queue_work(&mdev->tconn->data.work, &mdev->resync_work); } static void fifo_set(struct fifo_buffer *fb, int value) @@ -538,15 +538,15 @@ static int w_make_resync_request(struct drbd_conf *mdev, for (i = 0; i < number; i++) { /* Stop generating RS requests, when half of the send buffer is filled */ - mutex_lock(&mdev->data.mutex); - if (mdev->data.socket) { - queued = mdev->data.socket->sk->sk_wmem_queued; - sndbuf = mdev->data.socket->sk->sk_sndbuf; + mutex_lock(&mdev->tconn->data.mutex); + if (mdev->tconn->data.socket) { + queued = mdev->tconn->data.socket->sk->sk_wmem_queued; + sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf; } else { queued = 1; sndbuf = 0; } - mutex_unlock(&mdev->data.mutex); + mutex_unlock(&mdev->tconn->data.mutex); if (queued > sndbuf / 2) goto requeue; @@ -710,7 +710,7 @@ void start_resync_timer_fn(unsigned long data) { struct drbd_conf *mdev = (struct drbd_conf *) data; - drbd_queue_work(&mdev->data.work, &mdev->start_resync_work); + drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work); } int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) @@ -775,7 +775,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); if (w) { w->cb = w_resync_finished; - drbd_queue_work(&mdev->data.work, w); + drbd_queue_work(&mdev->tconn->data.work, w); return 1; } dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n"); @@ -1202,7 +1202,7 @@ int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w); - struct p_barrier *p = &mdev->data.sbuf.barrier; + struct p_barrier *p = &mdev->tconn->data.sbuf.barrier; int ok = 1; /* really avoid racing with tl_clear. w.cb may have been referenced @@ -1223,7 +1223,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) /* inc_ap_pending was done where this was queued. * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in w_clear_epoch. */ - ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER, + ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BARRIER, (struct p_header80 *)p, sizeof(*p), 0); drbd_put_data_sock(mdev); @@ -1621,18 +1621,18 @@ int drbd_worker(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(mdev); - if (down_trylock(&mdev->data.work.s)) { - mutex_lock(&mdev->data.mutex); - if (mdev->data.socket && !mdev->tconn->net_conf->no_cork) - drbd_tcp_uncork(mdev->data.socket); - mutex_unlock(&mdev->data.mutex); + if (down_trylock(&mdev->tconn->data.work.s)) { + mutex_lock(&mdev->tconn->data.mutex); + if (mdev->tconn->data.socket && !mdev->tconn->net_conf->no_cork) + drbd_tcp_uncork(mdev->tconn->data.socket); + mutex_unlock(&mdev->tconn->data.mutex); - intr = down_interruptible(&mdev->data.work.s); + intr = down_interruptible(&mdev->tconn->data.work.s); - mutex_lock(&mdev->data.mutex); - if (mdev->data.socket && !mdev->tconn->net_conf->no_cork) - drbd_tcp_cork(mdev->data.socket); - mutex_unlock(&mdev->data.mutex); + mutex_lock(&mdev->tconn->data.mutex); + if (mdev->tconn->data.socket && !mdev->tconn->net_conf->no_cork) + drbd_tcp_cork(mdev->tconn->data.socket); + mutex_unlock(&mdev->tconn->data.mutex); } if (intr) { @@ -1650,8 +1650,8 @@ int drbd_worker(struct drbd_thread *thi) this... */ w = NULL; - spin_lock_irq(&mdev->data.work.q_lock); - if (!expect(!list_empty(&mdev->data.work.q))) { + spin_lock_irq(&mdev->tconn->data.work.q_lock); + if (!expect(!list_empty(&mdev->tconn->data.work.q))) { /* something terribly wrong in our logic. * we were able to down() the semaphore, * but the list is empty... doh. @@ -1663,12 +1663,12 @@ int drbd_worker(struct drbd_thread *thi) * * I'll try to get away just starting over this loop. */ - spin_unlock_irq(&mdev->data.work.q_lock); + spin_unlock_irq(&mdev->tconn->data.work.q_lock); continue; } - w = list_entry(mdev->data.work.q.next, struct drbd_work, list); + w = list_entry(mdev->tconn->data.work.q.next, struct drbd_work, list); list_del_init(&w->list); - spin_unlock_irq(&mdev->data.work.q_lock); + spin_unlock_irq(&mdev->tconn->data.work.q_lock); if (!w->cb(mdev, w, mdev->state.conn < C_CONNECTED)) { /* dev_warn(DEV, "worker: a callback failed! \n"); */ @@ -1680,11 +1680,11 @@ int drbd_worker(struct drbd_thread *thi) D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags)); D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags)); - spin_lock_irq(&mdev->data.work.q_lock); + spin_lock_irq(&mdev->tconn->data.work.q_lock); i = 0; - while (!list_empty(&mdev->data.work.q)) { - list_splice_init(&mdev->data.work.q, &work_list); - spin_unlock_irq(&mdev->data.work.q_lock); + while (!list_empty(&mdev->tconn->data.work.q)) { + list_splice_init(&mdev->tconn->data.work.q, &work_list); + spin_unlock_irq(&mdev->tconn->data.work.q_lock); while (!list_empty(&work_list)) { w = list_entry(work_list.next, struct drbd_work, list); @@ -1693,15 +1693,15 @@ int drbd_worker(struct drbd_thread *thi) i++; /* dead debugging code */ } - spin_lock_irq(&mdev->data.work.q_lock); + spin_lock_irq(&mdev->tconn->data.work.q_lock); } - sema_init(&mdev->data.work.s, 0); + sema_init(&mdev->tconn->data.work.s, 0); /* DANGEROUS race: if someone did queue his work within the spinlock, * but up() ed outside the spinlock, we could get an up() on the * semaphore without corresponding list entry. * So don't do that. */ - spin_unlock_irq(&mdev->data.work.q_lock); + spin_unlock_irq(&mdev->tconn->data.work.q_lock); D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); /* _drbd_set_state only uses stop_nowait. From e6b3ea83bc72e126247b241c1164794a644d6fdc Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 14:02:01 +0100 Subject: [PATCH 0032/5831] drbd: moved receiver, worker and asender from mdev to tconn Patch mostly: sed -i -e 's/mdev->receiver/mdev->tconn->receiver/g' \ -e 's/mdev->worker/mdev->tconn->worker/g' \ -e 's/mdev->asender/mdev->tconn->asender/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 24 ++++++++-------- drivers/block/drbd/drbd_int.h | 9 +++--- drivers/block/drbd/drbd_main.c | 44 +++++++++++++++--------------- drivers/block/drbd/drbd_nl.c | 10 +++---- drivers/block/drbd/drbd_receiver.c | 14 +++++----- drivers/block/drbd/drbd_worker.c | 4 +-- 6 files changed, 53 insertions(+), 52 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index c756b4dbd135..4da4c322fa56 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -119,13 +119,13 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) if (!__ratelimit(&drbd_ratelimit_state)) return; dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", - current == mdev->receiver.task ? "receiver" : - current == mdev->asender.task ? "asender" : - current == mdev->worker.task ? "worker" : current->comm, + current == mdev->tconn->receiver.task ? "receiver" : + current == mdev->tconn->asender.task ? "asender" : + current == mdev->tconn->worker.task ? "worker" : current->comm, func, b->bm_why ?: "?", - b->bm_task == mdev->receiver.task ? "receiver" : - b->bm_task == mdev->asender.task ? "asender" : - b->bm_task == mdev->worker.task ? "worker" : "?"); + b->bm_task == mdev->tconn->receiver.task ? "receiver" : + b->bm_task == mdev->tconn->asender.task ? "asender" : + b->bm_task == mdev->tconn->worker.task ? "worker" : "?"); } void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) @@ -142,13 +142,13 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) if (trylock_failed) { dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", - current == mdev->receiver.task ? "receiver" : - current == mdev->asender.task ? "asender" : - current == mdev->worker.task ? "worker" : current->comm, + current == mdev->tconn->receiver.task ? "receiver" : + current == mdev->tconn->asender.task ? "asender" : + current == mdev->tconn->worker.task ? "worker" : current->comm, why, b->bm_why ?: "?", - b->bm_task == mdev->receiver.task ? "receiver" : - b->bm_task == mdev->asender.task ? "asender" : - b->bm_task == mdev->worker.task ? "worker" : "?"); + b->bm_task == mdev->tconn->receiver.task ? "receiver" : + b->bm_task == mdev->tconn->asender.task ? "asender" : + b->bm_task == mdev->tconn->worker.task ? "worker" : "?"); mutex_lock(&b->bm_change); } if (BM_LOCKED_MASK & b->bm_flags) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8de17b5bd42a..c5b1167aab50 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -972,6 +972,10 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ + + struct drbd_thread receiver; + struct drbd_thread worker; + struct drbd_thread asender; }; struct drbd_conf { @@ -1068,9 +1072,6 @@ struct drbd_conf { struct crypto_hash *csums_tfm; struct crypto_hash *verify_tfm; - struct drbd_thread receiver; - struct drbd_thread worker; - struct drbd_thread asender; struct drbd_bitmap *bitmap; unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */ @@ -2005,7 +2006,7 @@ drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) static inline void wake_asender(struct drbd_conf *mdev) { if (test_bit(SIGNAL_ASENDER, &mdev->flags)) - force_sig(DRBD_SIG, mdev->asender.task); + force_sig(DRBD_SIG, mdev->tconn->asender.task); } static inline void request_ping(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 84e40fbfd3e9..5d8a6e94a4a5 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -613,7 +613,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, spin_unlock_irqrestore(&mdev->req_lock, flags); if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { - D_ASSERT(current != mdev->worker.task); + D_ASSERT(current != mdev->tconn->worker.task); wait_for_completion(&done); } @@ -1229,16 +1229,16 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, /* Receiver should clean up itself */ if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) - drbd_thread_stop_nowait(&mdev->receiver); + drbd_thread_stop_nowait(&mdev->tconn->receiver); /* Now the receiver finished cleaning up itself, it should die */ if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) - drbd_thread_stop_nowait(&mdev->receiver); + drbd_thread_stop_nowait(&mdev->tconn->receiver); /* Upon network failure, we need to restart the receiver. */ if (os.conn > C_TEAR_DOWN && ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) - drbd_thread_restart_nowait(&mdev->receiver); + drbd_thread_restart_nowait(&mdev->tconn->receiver); /* Resume AL writing if we get a connection */ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) @@ -1297,7 +1297,7 @@ int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, { int rv; - D_ASSERT(current == mdev->worker.task); + D_ASSERT(current == mdev->tconn->worker.task); /* open coded non-blocking drbd_suspend_io(mdev); */ set_bit(SUSPEND_IO, &mdev->flags); @@ -1598,7 +1598,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Upon network connection, we need to start the receiver */ if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) - drbd_thread_start(&mdev->receiver); + drbd_thread_start(&mdev->tconn->receiver); /* Terminate worker thread if we are unconfigured - it will be restarted as needed... */ @@ -1609,7 +1609,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, resume_next_sg(mdev); /* set in __drbd_set_state, unless CONFIG_PENDING was set */ if (test_bit(DEVICE_DYING, &mdev->flags)) - drbd_thread_stop_nowait(&mdev->worker); + drbd_thread_stop_nowait(&mdev->tconn->worker); } drbd_md_sync(mdev); @@ -1675,9 +1675,9 @@ int drbd_thread_start(struct drbd_thread *thi) unsigned long flags; const char *me = - thi == &mdev->receiver ? "receiver" : - thi == &mdev->asender ? "asender" : - thi == &mdev->worker ? "worker" : "NONSENSE"; + thi == &mdev->tconn->receiver ? "receiver" : + thi == &mdev->tconn->asender ? "asender" : + thi == &mdev->tconn->worker ? "worker" : "NONSENSE"; /* is used from state engine doing drbd_thread_stop_nowait, * while holding the req lock irqsave */ @@ -1807,9 +1807,9 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) { struct task_struct *p = current; struct drbd_thread *thi = - p == mdev->asender.task ? &mdev->asender : - p == mdev->receiver.task ? &mdev->receiver : - p == mdev->worker.task ? &mdev->worker : + p == mdev->tconn->asender.task ? &mdev->tconn->asender : + p == mdev->tconn->receiver.task ? &mdev->tconn->receiver : + p == mdev->tconn->worker.task ? &mdev->tconn->worker : NULL; if (!expect(thi != NULL)) return; @@ -2507,8 +2507,8 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * /* long elapsed = (long)(jiffies - mdev->last_received); */ drop_it = mdev->tconn->meta.socket == sock - || !mdev->asender.task - || get_t_state(&mdev->asender) != RUNNING + || !mdev->tconn->asender.task + || get_t_state(&mdev->tconn->asender) != RUNNING || mdev->state.conn < C_CONNECTED; if (drop_it) @@ -3034,9 +3034,9 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) init_waitqueue_head(&mdev->al_wait); init_waitqueue_head(&mdev->seq_wait); - drbd_thread_init(mdev, &mdev->receiver, drbdd_init); - drbd_thread_init(mdev, &mdev->worker, drbd_worker); - drbd_thread_init(mdev, &mdev->asender, drbd_asender); + drbd_thread_init(mdev, &mdev->tconn->receiver, drbdd_init); + drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker); + drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender); mdev->agreed_pro_version = PRO_VERSION_MAX; mdev->write_ordering = WO_bdev_flush; @@ -3048,9 +3048,9 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) void drbd_mdev_cleanup(struct drbd_conf *mdev) { int i; - if (mdev->receiver.t_state != NONE) + if (mdev->tconn->receiver.t_state != NONE) dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n", - mdev->receiver.t_state); + mdev->tconn->receiver.t_state); /* no need to lock it, I'm the only thread alive */ if (atomic_read(&mdev->current_epoch->epoch_size) != 0) @@ -4032,7 +4032,7 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, void (*done)(struct drbd_conf *, int), char *why, enum bm_flag flags) { - D_ASSERT(current == mdev->worker.task); + D_ASSERT(current == mdev->tconn->worker.task); D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags)); D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags)); @@ -4069,7 +4069,7 @@ int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), { int rv; - D_ASSERT(current != mdev->worker.task); + D_ASSERT(current != mdev->tconn->worker.task); if ((flags & BM_LOCKED_SET_ALLOWED) == 0) drbd_suspend_io(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a936d61a90cd..59bb58c9b22f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -875,7 +875,7 @@ static void drbd_reconfig_start(struct drbd_conf *mdev) { wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags)); wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); - drbd_thread_start(&mdev->worker); + drbd_thread_start(&mdev->tconn->worker); drbd_flush_workqueue(mdev); } @@ -889,7 +889,7 @@ static void drbd_reconfig_done(struct drbd_conf *mdev) mdev->state.conn == C_STANDALONE && mdev->state.role == R_SECONDARY) { set_bit(DEVICE_DYING, &mdev->flags); - drbd_thread_stop_nowait(&mdev->worker); + drbd_thread_stop_nowait(&mdev->tconn->worker); } else clear_bit(CONFIG_PENDING, &mdev->flags); spin_unlock_irq(&mdev->req_lock); @@ -1887,9 +1887,9 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) { cpumask_copy(mdev->cpu_mask, new_cpu_mask); drbd_calc_cpu_mask(mdev); - mdev->receiver.reset_cpu_mask = 1; - mdev->asender.reset_cpu_mask = 1; - mdev->worker.reset_cpu_mask = 1; + mdev->tconn->receiver.reset_cpu_mask = 1; + mdev->tconn->asender.reset_cpu_mask = 1; + mdev->tconn->worker.reset_cpu_mask = 1; } kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2636bcc173a6..e9f670cd5542 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -833,7 +833,7 @@ retry: if (signal_pending(current)) { flush_signals(current); smp_rmb(); - if (get_t_state(&mdev->receiver) == EXITING) + if (get_t_state(&mdev->tconn->receiver) == EXITING) goto out_release_sockets; } @@ -874,7 +874,7 @@ retry: mdev->tconn->meta.socket = msock; mdev->last_received = jiffies; - D_ASSERT(mdev->asender.task == NULL); + D_ASSERT(mdev->tconn->asender.task == NULL); h = drbd_do_handshake(mdev); if (h <= 0) @@ -901,7 +901,7 @@ retry: atomic_set(&mdev->packet_seq, 0); mdev->peer_seq = 0; - drbd_thread_start(&mdev->asender); + drbd_thread_start(&mdev->tconn->asender); if (drbd_send_protocol(mdev) == -1) return -1; @@ -3704,7 +3704,7 @@ static void drbdd(struct drbd_conf *mdev) size_t shs; /* sub header size */ int rv; - while (get_t_state(&mdev->receiver) == RUNNING) { + while (get_t_state(&mdev->tconn->receiver) == RUNNING) { drbd_thread_current_set_cpu(mdev); if (!drbd_recv_header(mdev, &cmd, &packet_size)) goto err_out; @@ -3768,7 +3768,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) return; /* asender does not clean up anything. it must not interfere, either */ - drbd_thread_stop(&mdev->asender); + drbd_thread_stop(&mdev->tconn->asender); drbd_free_sock(mdev); /* wait for current activity to cease. */ @@ -3891,7 +3891,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) */ static int drbd_send_handshake(struct drbd_conf *mdev) { - /* ASSERT current == mdev->receiver ... */ + /* ASSERT current == mdev->tconn->receiver ... */ struct p_handshake *p = &mdev->tconn->data.sbuf.handshake; int ok; @@ -3923,7 +3923,7 @@ static int drbd_send_handshake(struct drbd_conf *mdev) */ static int drbd_do_handshake(struct drbd_conf *mdev) { - /* ASSERT current == mdev->receiver ... */ + /* ASSERT current == mdev->tconn->receiver ... */ struct p_handshake *p = &mdev->tconn->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); unsigned int length; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 9b1e2bad5fbd..1ca7856f8136 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1705,8 +1705,8 @@ int drbd_worker(struct drbd_thread *thi) D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); /* _drbd_set_state only uses stop_nowait. - * wait here for the EXITING receiver. */ - drbd_thread_stop(&mdev->receiver); + * wait here for the exiting receiver. */ + drbd_thread_stop(&mdev->tconn->receiver); drbd_mdev_cleanup(mdev); dev_info(DEV, "worker terminated\n"); From 31890f4ab299c4116cf0a104ca9ce4f9ca2c5da0 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 14:12:51 +0100 Subject: [PATCH 0033/5831] drbd: moved agreed_pro_version, last_received and ko_count to tconn sed -i \ -e 's/mdev->agreed_pro_version/mdev->tconn->agreed_pro_version/g' \ -e 's/mdev->last_received/mdev->tconn->last_received/g' \ -e 's/mdev->ko_count/mdev->tconn->ko_count/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 +++--- drivers/block/drbd/drbd_main.c | 32 ++++++++++++------------ drivers/block/drbd/drbd_nl.c | 8 +++--- drivers/block/drbd/drbd_receiver.c | 40 +++++++++++++++--------------- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/drbd/drbd_worker.c | 6 ++--- 6 files changed, 48 insertions(+), 48 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c5b1167aab50..9efe499b1122 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -972,6 +972,9 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ + int agreed_pro_version; /* actually used protocol version */ + unsigned long last_received; /* in jiffies, either socket */ + unsigned int ko_count; struct drbd_thread receiver; struct drbd_thread worker; @@ -994,9 +997,6 @@ struct drbd_conf { struct block_device *this_bdev; struct gendisk *vdisk; - int agreed_pro_version; /* actually used protocol version */ - unsigned long last_received; /* in jiffies, either socket */ - unsigned int ko_count; struct drbd_work resync_work, unplug_work, go_diskless, @@ -2297,7 +2297,7 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) /* Allow IO in BM exchange states with new protocols */ case C_WF_BITMAP_S: - if (mdev->agreed_pro_version < 96) + if (mdev->tconn->agreed_pro_version < 96) return 0; break; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5d8a6e94a4a5..e06ca4a0d906 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -735,7 +735,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) rv = SS_NO_VERIFY_ALG; else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - mdev->agreed_pro_version < 88) + mdev->tconn->agreed_pro_version < 88) rv = SS_NOT_SUPPORTED; else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) @@ -993,7 +993,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state /* helper for __drbd_set_state */ static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) { - if (mdev->agreed_pro_version < 90) + if (mdev->tconn->agreed_pro_version < 90) mdev->ov_start_sector = 0; mdev->rs_total = drbd_bm_bits(mdev); mdev->ov_position = 0; @@ -1393,7 +1393,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, * which is unexpected. */ if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && - mdev->agreed_pro_version >= 96 && get_ldev(mdev)) { + mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) { drbd_gen_and_send_sync_uuid(mdev); put_ldev(mdev); } @@ -1902,7 +1902,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) struct p_rs_param_95 *p; struct socket *sock; int size, rv; - const int apv = mdev->agreed_pro_version; + const int apv = mdev->tconn->agreed_pro_version; size = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) @@ -1951,7 +1951,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) size = sizeof(struct p_protocol); - if (mdev->agreed_pro_version >= 87) + if (mdev->tconn->agreed_pro_version >= 87) size += strlen(mdev->tconn->net_conf->integrity_alg) + 1; /* we must not recurse into our own queue, @@ -1970,7 +1970,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) if (mdev->tconn->net_conf->want_lose) cf |= CF_WANT_LOSE; if (mdev->tconn->net_conf->dry_run) { - if (mdev->agreed_pro_version >= 92) + if (mdev->tconn->agreed_pro_version >= 92) cf |= CF_DRY_RUN; else { dev_err(DEV, "--dry-run is not supported by peer"); @@ -1980,7 +1980,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) } p->conn_flags = cpu_to_be32(cf); - if (mdev->agreed_pro_version >= 87) + if (mdev->tconn->agreed_pro_version >= 87) strcpy(p->integrity_alg, mdev->tconn->net_conf->integrity_alg); rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, @@ -2158,7 +2158,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, /* may we use this feature? */ if ((mdev->sync_conf.use_rle == 0) || - (mdev->agreed_pro_version < 90)) + (mdev->tconn->agreed_pro_version < 90)) return 0; if (c->bit_offset >= c->bm_bits) @@ -2404,7 +2404,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, struct p_data *dp, int data_size) { - data_size -= (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? + data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), dp->block_id); @@ -2514,10 +2514,10 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * if (drop_it) return true; - drop_it = !--mdev->ko_count; + drop_it = !--mdev->tconn->ko_count; if (!drop_it) { dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n", - current->comm, current->pid, mdev->ko_count); + current->comm, current->pid, mdev->tconn->ko_count); request_ping(mdev); } @@ -2647,7 +2647,7 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw) { - if (mdev->agreed_pro_version >= 95) + if (mdev->tconn->agreed_pro_version >= 95) return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) | (bi_rw & REQ_FUA ? DP_FUA : 0) | (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) | @@ -2670,7 +2670,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) if (!drbd_get_data_sock(mdev)) return 0; - dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; if (req->i.size <= DRBD_MAX_SIZE_H80_PACKET) { @@ -2755,7 +2755,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, void *dgb; int dgs; - dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; if (e->i.size <= DRBD_MAX_SIZE_H80_PACKET) { @@ -2843,7 +2843,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, msg.msg_flags = msg_flags | MSG_NOSIGNAL; if (sock == mdev->tconn->data.socket) { - mdev->ko_count = mdev->tconn->net_conf->ko_count; + mdev->tconn->ko_count = mdev->tconn->net_conf->ko_count; drbd_update_congested(mdev); } do { @@ -3038,7 +3038,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker); drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender); - mdev->agreed_pro_version = PRO_VERSION_MAX; + mdev->tconn->agreed_pro_version = PRO_VERSION_MAX; mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 59bb58c9b22f..a9ede8fc8880 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -845,9 +845,9 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) Because new from 8.3.8 onwards the peer can use multiple BIOs for a single peer_request */ if (mdev->state.conn >= C_CONNECTED) { - if (mdev->agreed_pro_version < 94) + if (mdev->tconn->agreed_pro_version < 94) peer = mdev->peer_max_bio_size; - else if (mdev->agreed_pro_version == 94) + else if (mdev->tconn->agreed_pro_version == 94) peer = DRBD_MAX_SIZE_H80_PACKET; else /* drbd 8.3.8 onwards */ peer = DRBD_MAX_BIO_SIZE; @@ -1675,7 +1675,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, goto fail; } - if (rs.no_resync && mdev->agreed_pro_version < 93) { + if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) { retcode = ERR_NEED_APV_93; goto fail; } @@ -2170,7 +2170,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl } /* this is "skip initial sync", assume to be clean */ - if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 && + if (mdev->state.conn == C_CONNECTED && mdev->tconn->agreed_pro_version >= 90 && mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { dev_info(DEV, "Preparing to skip initial sync\n"); skip_initial_sync = 1; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e9f670cd5542..27a8363510dd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -872,7 +872,7 @@ retry: mdev->tconn->data.socket = sock; mdev->tconn->meta.socket = msock; - mdev->last_received = jiffies; + mdev->tconn->last_received = jiffies; D_ASSERT(mdev->tconn->asender.task == NULL); @@ -948,7 +948,7 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi be16_to_cpu(h->h80.length)); return false; } - mdev->last_received = jiffies; + mdev->tconn->last_received = jiffies; return true; } @@ -1244,7 +1244,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ void *dig_vv = mdev->int_dig_vv; unsigned long *data; - dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; if (dgs) { @@ -1361,7 +1361,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, void *dig_in = mdev->int_dig_in; void *dig_vv = mdev->int_dig_vv; - dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; if (dgs) { @@ -2048,7 +2048,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un goto out_free_e; if (cmd == P_CSUM_RS_REQUEST) { - D_ASSERT(mdev->agreed_pro_version >= 89); + D_ASSERT(mdev->tconn->agreed_pro_version >= 89); e->w.cb = w_e_end_csum_rs_req; /* used in the sector offset progress display */ mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); @@ -2065,7 +2065,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un case P_OV_REQUEST: if (mdev->ov_start_sector == ~(sector_t)0 && - mdev->agreed_pro_version >= 90) { + mdev->tconn->agreed_pro_version >= 90) { unsigned long now = jiffies; int i; mdev->ov_start_sector = sector; @@ -2360,7 +2360,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) { - if (mdev->agreed_pro_version < 91) + if (mdev->tconn->agreed_pro_version < 91) return -1091; if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) && @@ -2381,7 +2381,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) { - if (mdev->agreed_pro_version < 91) + if (mdev->tconn->agreed_pro_version < 91) return -1091; if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) && @@ -2427,14 +2427,14 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l *rule_nr = 51; peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1); if (self == peer) { - if (mdev->agreed_pro_version < 96 ? + if (mdev->tconn->agreed_pro_version < 96 ? (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) { /* The last P_SYNC_UUID did not get though. Undo the last start of resync as sync source modifications of the peer's UUIDs. */ - if (mdev->agreed_pro_version < 91) + if (mdev->tconn->agreed_pro_version < 91) return -1091; mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; @@ -2464,14 +2464,14 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l *rule_nr = 71; self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); if (self == peer) { - if (mdev->agreed_pro_version < 96 ? + if (mdev->tconn->agreed_pro_version < 96 ? (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) : self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { /* The last P_SYNC_UUID did not get though. Undo the last start of resync as sync source modifications of our UUIDs. */ - if (mdev->agreed_pro_version < 91) + if (mdev->tconn->agreed_pro_version < 91) return -1091; _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]); @@ -2731,7 +2731,7 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsig goto disconnect; } - if (mdev->agreed_pro_version >= 87) { + if (mdev->tconn->agreed_pro_version >= 87) { unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg; if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) @@ -2787,7 +2787,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; - const int apv = mdev->agreed_pro_version; + const int apv = mdev->tconn->agreed_pro_version; int *rs_plan_s = NULL; int fifo_size = 0; @@ -3074,7 +3074,7 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (get_ldev(mdev)) { int skip_initial_sync = mdev->state.conn == C_CONNECTED && - mdev->agreed_pro_version >= 90 && + mdev->tconn->agreed_pro_version >= 90 && mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && (p_uuid[UI_FLAGS] & 8); if (skip_initial_sync) { @@ -3967,10 +3967,10 @@ static int drbd_do_handshake(struct drbd_conf *mdev) PRO_VERSION_MIN > p->protocol_max) goto incompat; - mdev->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); + mdev->tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); dev_info(DEV, "Handshake successful: " - "Agreed network protocol version %d\n", mdev->agreed_pro_version); + "Agreed network protocol version %d\n", mdev->tconn->agreed_pro_version); return 1; @@ -4220,7 +4220,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); - D_ASSERT(mdev->agreed_pro_version >= 89); + D_ASSERT(mdev->tconn->agreed_pro_version >= 89); update_peer_seq(mdev, be32_to_cpu(p->seq_num)); @@ -4560,7 +4560,7 @@ int drbd_asender(struct drbd_thread *thi) } else if (rv == -EAGAIN) { /* If the data socket received something meanwhile, * that is good enough: peer is still alive. */ - if (time_after(mdev->last_received, + if (time_after(mdev->tconn->last_received, jiffies - mdev->tconn->meta.socket->sk->sk_rcvtimeo)) continue; if (ping_timeout_active) { @@ -4598,7 +4598,7 @@ int drbd_asender(struct drbd_thread *thi) goto reconnect; } if (received == expect) { - mdev->last_received = jiffies; + mdev->tconn->last_received = jiffies; D_ASSERT(cmd != NULL); if (!cmd->process(mdev, h)) goto reconnect; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index ac43e440d660..c871ef2414fa 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -951,7 +951,7 @@ allocate_barrier: _req_mod(req, QUEUE_FOR_SEND_OOS); if (remote && - mdev->tconn->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { + mdev->tconn->net_conf->on_congestion != OC_BLOCK && mdev->tconn->agreed_pro_version >= 96) { int congested = 0; if (mdev->tconn->net_conf->cong_fill && diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 1ca7856f8136..ec26df378845 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -616,7 +616,7 @@ next_sector: /* adjust very last sectors, in case we are oddly sized */ if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) { + if (mdev->tconn->agreed_pro_version >= 89 && mdev->csums_tfm) { switch (read_for_csum(mdev, sector, size)) { case -EIO: /* Disk failure */ put_ldev(mdev); @@ -1574,10 +1574,10 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) * drbd_resync_finished from here in that case. * We drbd_gen_and_send_sync_uuid here for protocol < 96, * and from after_state_ch otherwise. */ - if (side == C_SYNC_SOURCE && mdev->agreed_pro_version < 96) + if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96) drbd_gen_and_send_sync_uuid(mdev); - if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) { + if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) { /* This still has a race (about when exactly the peers * detect connection loss) that can lead to a full sync * on next handshake. In 8.3.9 we fixed this with explicit From 87eeee41f8740451b61a1e7d37a494333a906861 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 14:16:30 +0100 Subject: [PATCH 0034/5831] drbd: moved req_lock and transfer log from mdev to tconn sed -i \ -e 's/mdev->req_lock/mdev->tconn->req_lock/g' \ -e 's/mdev->unused_spare_tle/mdev->tconn->unused_spare_tle/g' \ -e 's/mdev->newest_tle/mdev->tconn->newest_tle/g' \ -e 's/mdev->oldest_tle/mdev->tconn->oldest_tle/g' \ -e 's/mdev->out_of_sequence_requests/mdev->tconn->out_of_sequence_requests/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 20 +++--- drivers/block/drbd/drbd_main.c | 100 ++++++++++++++--------------- drivers/block/drbd/drbd_nl.c | 34 +++++----- drivers/block/drbd/drbd_receiver.c | 96 +++++++++++++-------------- drivers/block/drbd/drbd_req.c | 48 +++++++------- drivers/block/drbd/drbd_req.h | 4 +- drivers/block/drbd/drbd_worker.c | 38 +++++------ 7 files changed, 170 insertions(+), 170 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9efe499b1122..b440ffd14989 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -976,6 +976,12 @@ struct drbd_tconn { /* is a resource from the config file */ unsigned long last_received; /* in jiffies, either socket */ unsigned int ko_count; + spinlock_t req_lock; + struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */ + struct drbd_tl_epoch *newest_tle; + struct drbd_tl_epoch *oldest_tle; + struct list_head out_of_sequence_requests; + struct drbd_thread receiver; struct drbd_thread worker; struct drbd_thread asender; @@ -1031,12 +1037,6 @@ struct drbd_conf { atomic_t unacked_cnt; /* Need to send replys for */ atomic_t local_cnt; /* Waiting for local completion */ - spinlock_t req_lock; - struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */ - struct drbd_tl_epoch *newest_tle; - struct drbd_tl_epoch *oldest_tle; - struct list_head out_of_sequence_requests; - /* Interval tree of pending local requests */ struct rb_root read_requests; struct rb_root write_requests; @@ -1868,9 +1868,9 @@ static inline void drbd_chk_io_error_(struct drbd_conf *mdev, { if (error) { unsigned long flags; - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); __drbd_chk_io_error_(mdev, forcedetach, where); - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); } } @@ -2366,11 +2366,11 @@ static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count) { bool rv = false; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); rv = may_inc_ap_bio(mdev); if (rv) atomic_add(count, &mdev->ap_bio_cnt); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); return rv; } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e06ca4a0d906..c063cd513223 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -185,7 +185,7 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) * DOC: The transfer log * * The transfer log is a single linked list of &struct drbd_tl_epoch objects. - * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail + * mdev->tconn->newest_tle points to the head, mdev->tconn->oldest_tle points to the tail * of the list. There is always at least one &struct drbd_tl_epoch object. * * Each &struct drbd_tl_epoch has a circular double linked list of requests @@ -206,21 +206,21 @@ static int tl_init(struct drbd_conf *mdev) b->n_writes = 0; b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ - mdev->oldest_tle = b; - mdev->newest_tle = b; - INIT_LIST_HEAD(&mdev->out_of_sequence_requests); + mdev->tconn->oldest_tle = b; + mdev->tconn->newest_tle = b; + INIT_LIST_HEAD(&mdev->tconn->out_of_sequence_requests); return 1; } static void tl_cleanup(struct drbd_conf *mdev) { - D_ASSERT(mdev->oldest_tle == mdev->newest_tle); - D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); - kfree(mdev->oldest_tle); - mdev->oldest_tle = NULL; - kfree(mdev->unused_spare_tle); - mdev->unused_spare_tle = NULL; + D_ASSERT(mdev->tconn->oldest_tle == mdev->tconn->newest_tle); + D_ASSERT(list_empty(&mdev->tconn->out_of_sequence_requests)); + kfree(mdev->tconn->oldest_tle); + mdev->tconn->oldest_tle = NULL; + kfree(mdev->tconn->unused_spare_tle); + mdev->tconn->unused_spare_tle = NULL; } /** @@ -240,13 +240,13 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) new->next = NULL; new->n_writes = 0; - newest_before = mdev->newest_tle; + newest_before = mdev->tconn->newest_tle; /* never send a barrier number == 0, because that is special-cased * when using TCQ for our write ordering code */ new->br_number = (newest_before->br_number+1) ?: 1; - if (mdev->newest_tle != new) { - mdev->newest_tle->next = new; - mdev->newest_tle = new; + if (mdev->tconn->newest_tle != new) { + mdev->tconn->newest_tle->next = new; + mdev->tconn->newest_tle = new; } } @@ -267,9 +267,9 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, struct list_head *le, *tle; struct drbd_request *r; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); - b = mdev->oldest_tle; + b = mdev->tconn->oldest_tle; /* first some paranoia code */ if (b == NULL) { @@ -312,22 +312,22 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { _tl_add_barrier(mdev, b); if (nob) - mdev->oldest_tle = nob; + mdev->tconn->oldest_tle = nob; /* if nob == NULL b was the only barrier, and becomes the new - barrier. Therefore mdev->oldest_tle points already to b */ + barrier. Therefore mdev->tconn->oldest_tle points already to b */ } else { D_ASSERT(nob != NULL); - mdev->oldest_tle = nob; + mdev->tconn->oldest_tle = nob; kfree(b); } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); dec_ap_pending(mdev); return; bail: - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); } @@ -347,8 +347,8 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) struct drbd_request *req; int rv, n_writes, n_reads; - b = mdev->oldest_tle; - pn = &mdev->oldest_tle; + b = mdev->tconn->oldest_tle; + pn = &mdev->tconn->oldest_tle; while (b) { n_writes = 0; n_reads = 0; @@ -387,7 +387,7 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) if (b->w.cb != NULL) dec_ap_pending(mdev); - if (b == mdev->newest_tle) { + if (b == mdev->tconn->newest_tle) { /* recycle, but reinit! */ D_ASSERT(tmp == NULL); INIT_LIST_HEAD(&b->requests); @@ -422,15 +422,15 @@ void tl_clear(struct drbd_conf *mdev) struct list_head *le, *tle; struct drbd_request *r; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _tl_restart(mdev, CONNECTION_LOST_WHILE_PENDING); /* we expect this list to be empty. */ - D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); + D_ASSERT(list_empty(&mdev->tconn->out_of_sequence_requests)); /* but just in case, clean it up anyways! */ - list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) { + list_for_each_safe(le, tle, &mdev->tconn->out_of_sequence_requests) { r = list_entry(le, struct drbd_request, tl_requests); /* It would be nice to complete outside of spinlock. * But this is easier for now. */ @@ -440,14 +440,14 @@ void tl_clear(struct drbd_conf *mdev) /* ensure bit indicating barrier is required is clear */ clear_bit(CREATE_BARRIER, &mdev->flags); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _tl_restart(mdev, what); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } /** @@ -476,12 +476,12 @@ drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, union drbd_state os, ns; enum drbd_state_rv rv; - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; rv = _drbd_set_state(mdev, ns, f, NULL); ns = mdev->state; - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); return rv; } @@ -522,7 +522,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, return SS_CW_FAILED_BY_PEER; rv = 0; - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; ns = sanitize_state(mdev, os, ns, NULL); @@ -537,7 +537,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ } } - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); return rv; } @@ -566,7 +566,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, if (f & CS_SERIALIZE) mutex_lock(&mdev->state_mutex); - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; ns = sanitize_state(mdev, os, ns, NULL); @@ -575,7 +575,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) rv = is_valid_state_transition(mdev, ns, os); - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (rv < SS_SUCCESS) { if (f & CS_VERBOSE) @@ -601,7 +601,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, print_st_err(mdev, os, ns, rv); goto abort; } - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; rv = _drbd_set_state(mdev, ns, f, &done); @@ -610,7 +610,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, rv = _drbd_set_state(mdev, ns, f, &done); } - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { D_ASSERT(current != mdev->tconn->worker.task); @@ -1367,9 +1367,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); } - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } /* case2: The connection was established again: */ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { @@ -1380,11 +1380,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, } if (what != NOTHING) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _tl_restart(mdev, what); nsm.i &= mdev->state.i; _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } /* Became sync source. With protocol >= 96, we still need to send out @@ -2898,7 +2898,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode) int rv = 0; mutex_lock(&drbd_main_mutex); - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); /* to have a stable mdev->state.role * and no race with updating open_cnt */ @@ -2911,7 +2911,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode) if (!rv) mdev->open_cnt++; - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); mutex_unlock(&drbd_main_mutex); return rv; @@ -2990,7 +2990,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) spin_lock_init(&mdev->tconn->meta.work.q_lock); spin_lock_init(&mdev->al_lock); - spin_lock_init(&mdev->req_lock); + spin_lock_init(&mdev->tconn->req_lock); spin_lock_init(&mdev->peer_seq_lock); spin_lock_init(&mdev->epoch_lock); @@ -3451,7 +3451,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); blk_queue_merge_bvec(q, drbd_merge_bvec); - q->queue_lock = &mdev->req_lock; + q->queue_lock = &mdev->tconn->req_lock; /* needed since we use */ mdev->md_io_page = alloc_page(GFP_KERNEL); if (!mdev->md_io_page) @@ -3784,14 +3784,14 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents); bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.conn < C_CONNECTED) { int peer; peer = be32_to_cpu(buffer->la_peer_max_bio_size); peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE); mdev->peer_max_bio_size = peer; } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (mdev->sync_conf.al_extents < 7) mdev->sync_conf.al_extents = 127; @@ -4046,13 +4046,13 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev, mdev->bm_io_work.why = why; mdev->bm_io_work.flags = flags; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); set_bit(BITMAP_IO, &mdev->flags); if (atomic_read(&mdev->ap_bio_cnt) == 0) { if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags)) drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w); } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } /** diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a9ede8fc8880..4eaf81a463b5 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -287,13 +287,13 @@ static int _try_outdate_peer_async(void *data) pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid, therefore we have to have the pre state change check here. */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); ns = mdev->state; if (ns.conn < C_WF_REPORT_PARAMS) { ns.pdsk = nps; _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); return 0; } @@ -884,7 +884,7 @@ static void drbd_reconfig_start(struct drbd_conf *mdev) * wakes potential waiters */ static void drbd_reconfig_done(struct drbd_conf *mdev) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE && mdev->state.role == R_SECONDARY) { @@ -892,7 +892,7 @@ static void drbd_reconfig_done(struct drbd_conf *mdev) drbd_thread_stop_nowait(&mdev->tconn->worker); } else clear_bit(CONFIG_PENDING, &mdev->flags); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); wake_up(&mdev->state_wait); } @@ -909,11 +909,11 @@ static void drbd_suspend_al(struct drbd_conf *mdev) return; } - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.conn < C_CONNECTED) s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (s) dev_info(DEV, "Suspended AL updates\n"); @@ -1240,7 +1240,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) drbd_suspend_al(mdev); /* IO is still suspended here... */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); os = mdev->state; ns.i = os.i; /* If MDF_CONSISTENT is not set go into inconsistent state, @@ -1285,7 +1285,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); ns = mdev->state; - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (rv < SS_SUCCESS) goto force_diskless_dec; @@ -1521,10 +1521,10 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } drbd_flush_workqueue(mdev); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->tconn->net_conf != NULL) { retcode = ERR_NET_CONFIGURED; - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); goto fail; } mdev->tconn->net_conf = new_conf; @@ -1548,7 +1548,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->int_dig_in=int_dig_in; mdev->int_dig_vv=int_dig_vv; retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); reply->ret_code = retcode; @@ -1582,10 +1582,10 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl } if (dc.force) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.conn >= C_WF_CONNECTION) _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); goto done; } @@ -1917,10 +1917,10 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); while (retcode == SS_NEED_CONNECTION) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.conn < C_CONNECTED) retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (retcode != SS_NEED_CONNECTION) break; @@ -2193,10 +2193,10 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl drbd_send_uuids_skip_initial_sync(mdev); _drbd_uuid_set(mdev, UI_BITMAP, 0); drbd_print_uuids(mdev, "cleared bitmap UUID"); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 27a8363510dd..af968a0bae07 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -210,9 +210,9 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) LIST_HEAD(reclaimed); struct drbd_epoch_entry *e, *t; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); reclaim_net_ee(mdev, &reclaimed); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); list_for_each_entry_safe(e, t, &reclaimed, w.list) drbd_free_net_ee(mdev, e); @@ -269,7 +269,7 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool } /* Must not be used from irq, as that may deadlock: see drbd_pp_alloc. - * Is also used from inside an other spin_lock_irq(&mdev->req_lock); + * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock); * Either links the page chain back to the global pool, * or returns all pages to the system. */ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net) @@ -371,9 +371,9 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) int count = 0; int is_net = list == &mdev->net_ee; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_splice_init(list, &work_list); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); list_for_each_entry_safe(e, t, &work_list, w.list) { drbd_free_some_ee(mdev, e, is_net); @@ -399,10 +399,10 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) struct drbd_epoch_entry *e, *t; int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); reclaim_net_ee(mdev, &reclaimed); list_splice_init(&mdev->done_ee, &work_list); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); list_for_each_entry_safe(e, t, &reclaimed, w.list) drbd_free_net_ee(mdev, e); @@ -429,18 +429,18 @@ void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) * and calling prepare_to_wait in the fast path */ while (!list_empty(head)) { prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); io_schedule(); finish_wait(&mdev->ee_wait, &wait); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); } } void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _drbd_wait_ee_list_empty(mdev, head); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } /* see also kernel_accept; which is only present since 2.6.18. @@ -1452,9 +1452,9 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si e->w.cb = e_end_resync_block; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_add(&e->w.list, &mdev->sync_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); atomic_add(data_size >> 9, &mdev->rs_sect_ev); if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) @@ -1462,9 +1462,9 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_del(&e->w.list); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); drbd_free_ee(mdev, e); fail: @@ -1498,9 +1498,9 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi sector = be64_to_cpu(p->sector); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (unlikely(!req)) return false; @@ -1574,11 +1574,11 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) /* we delete from the conflict detection hash _after_ we sent out the * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ if (mdev->tconn->net_conf->two_primaries) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); D_ASSERT(!drbd_interval_empty(&e->i)); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } else D_ASSERT(drbd_interval_empty(&e->i)); @@ -1595,11 +1595,11 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); D_ASSERT(!drbd_interval_empty(&e->i)); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); dec_unacked(mdev); @@ -1718,7 +1718,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned /* I'm the receiver, I do hold a net_cnt reference. */ if (!mdev->tconn->net_conf->two_primaries) { - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); } else { /* don't get the req_lock yet, * we may sleep in drbd_wait_peer_seq */ @@ -1765,7 +1765,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num))) goto out_interrupted; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); drbd_insert_interval(&mdev->epoch_entries, &e->i); @@ -1805,7 +1805,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned e->w.cb = e_send_discard_ack; list_add_tail(&e->w.list, &mdev->done_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); /* we could probably send that P_DISCARD_ACK ourselves, * but I don't like the receiver using the msock */ @@ -1820,13 +1820,13 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); finish_wait(&mdev->misc_wait, &wait); goto out_interrupted; } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (first) { first = 0; dev_alert(DEV, "Concurrent write! [W AFTERWARDS] " @@ -1837,13 +1837,13 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned D_ASSERT(have_unacked == 0); } schedule(); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); } finish_wait(&mdev->misc_wait, &wait); } list_add(&e->w.list, &mdev->active_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); switch (mdev->tconn->net_conf->wire_protocol) { case DRBD_PROT_C: @@ -1874,11 +1874,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_del(&e->w.list); drbd_remove_interval(&mdev->epoch_entries, &e->i); drbd_clear_interval(&e->i); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (e->flags & EE_CALL_AL_COMPLETE_IO) drbd_al_complete_io(mdev, e->i.sector); @@ -2122,18 +2122,18 @@ submit_for_resync: submit: inc_unacked(mdev); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_add_tail(&e->w.list, &mdev->read_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) return true; /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_del(&e->w.list); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); /* no drbd_rs_complete_io(), we are dropping the connection anyways */ out_free_e: @@ -3183,10 +3183,10 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk)); } - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); retry: os = ns = mdev->state; - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); /* peer says his disk is uptodate, while we think it is inconsistent, * and this happens while we think we have a sync going on. */ @@ -3270,7 +3270,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } } - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.i != os.i) goto retry; clear_bit(CONSIDER_RESYNC, &mdev->flags); @@ -3284,7 +3284,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned test_bit(NEW_CUR_UUID, &mdev->flags)) { /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this for temporal network outages! */ - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); tl_clear(mdev); drbd_uuid_new_current(mdev); @@ -3294,7 +3294,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } rv = _drbd_set_state(mdev, ns, cs_flags, NULL); ns = mdev->state; - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (rv < SS_SUCCESS) { drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); @@ -3772,11 +3772,11 @@ static void drbd_disconnect(struct drbd_conf *mdev) drbd_free_sock(mdev); /* wait for current activity to cease. */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); _drbd_wait_ee_list_empty(mdev, &mdev->active_ee); _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee); _drbd_wait_ee_list_empty(mdev, &mdev->read_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); /* We do not have data structures that would allow us to * get the rs_pending_cnt down to 0 again. @@ -3828,7 +3828,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) drbd_try_outdate_peer_async(mdev); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); os = mdev->state; if (os.conn >= C_UNCONNECTED) { /* Do not restart in case we are C_DISCONNECTING */ @@ -3836,7 +3836,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) ns.conn = C_UNCONNECTED; rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (os.conn == C_DISCONNECTING) { wait_event(mdev->tconn->net_cnt_wait, atomic_read(&mdev->tconn->net_cnt) == 0); @@ -4245,14 +4245,14 @@ validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, struct drbd_request *req; struct bio_and_error m; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); req = find_request(mdev, root, id, sector, missing_ok, func); if (unlikely(!req)) { - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); return false; } __req_mod(req, what, &m); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (m.bio) complete_master_bio(mdev, &m); @@ -4518,9 +4518,9 @@ int drbd_asender(struct drbd_thread *thi) goto reconnect; /* to avoid race with newly queued ACKs */ set_bit(SIGNAL_ASENDER, &mdev->flags); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); empty = list_empty(&mdev->done_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); /* new ack may have been queued right here, * but then there is also a signal pending, * and we start over... */ diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c871ef2414fa..74179f7986e1 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -120,7 +120,7 @@ static void queue_barrier(struct drbd_conf *mdev) if (test_bit(CREATE_BARRIER, &mdev->flags)) return; - b = mdev->newest_tle; + b = mdev->tconn->newest_tle; b->w.cb = w_send_barrier; /* inc_ap_pending done here, so we won't * get imbalanced on connection loss. @@ -144,7 +144,7 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, */ if (mdev->state.conn >= C_CONNECTED && (s & RQ_NET_SENT) != 0 && - req->epoch == mdev->newest_tle->br_number) + req->epoch == mdev->tconn->newest_tle->br_number) queue_barrier(mdev); /* we need to do the conflict detection stuff, @@ -516,10 +516,10 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * just after it grabs the req_lock */ D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0); - req->epoch = mdev->newest_tle->br_number; + req->epoch = mdev->tconn->newest_tle->br_number; /* increment size of current epoch */ - mdev->newest_tle->n_writes++; + mdev->tconn->newest_tle->n_writes++; /* queue work item to send data */ D_ASSERT(req->rq_state & RQ_NET_PENDING); @@ -528,7 +528,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, drbd_queue_work(&mdev->tconn->data.work, &req->w); /* close the epoch, in case it outgrew the limit */ - if (mdev->newest_tle->n_writes >= mdev->tconn->net_conf->max_epoch_size) + if (mdev->tconn->newest_tle->n_writes >= mdev->tconn->net_conf->max_epoch_size) queue_barrier(mdev); break; @@ -693,7 +693,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * this is bad, because if the connection is lost now, * we won't be able to clean them up... */ dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n"); - list_move(&req->tl_requests, &mdev->out_of_sequence_requests); + list_move(&req->tl_requests, &mdev->tconn->out_of_sequence_requests); } if ((req->rq_state & RQ_NET_MASK) != 0) { req->rq_state |= RQ_NET_DONE; @@ -834,7 +834,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns * spinlock, and grabbing the spinlock. * if we lost that race, we retry. */ if (rw == WRITE && (remote || send_oos) && - mdev->unused_spare_tle == NULL && + mdev->tconn->unused_spare_tle == NULL && test_bit(CREATE_BARRIER, &mdev->flags)) { allocate_barrier: b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO); @@ -846,7 +846,7 @@ allocate_barrier: } /* GOOD, everything prepared, grab the spin_lock */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (is_susp(mdev->state)) { /* If we got suspended, use the retry mechanism of @@ -854,7 +854,7 @@ allocate_barrier: bio. In the next call to drbd_make_request we sleep in inc_ap_bio() */ ret = 1; - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); goto fail_free_complete; } @@ -867,21 +867,21 @@ allocate_barrier: dev_warn(DEV, "lost connection while grabbing the req_lock!\n"); if (!(local || remote)) { dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); goto fail_free_complete; } } - if (b && mdev->unused_spare_tle == NULL) { - mdev->unused_spare_tle = b; + if (b && mdev->tconn->unused_spare_tle == NULL) { + mdev->tconn->unused_spare_tle = b; b = NULL; } if (rw == WRITE && (remote || send_oos) && - mdev->unused_spare_tle == NULL && + mdev->tconn->unused_spare_tle == NULL && test_bit(CREATE_BARRIER, &mdev->flags)) { /* someone closed the current epoch * while we were grabbing the spinlock */ - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); goto allocate_barrier; } @@ -899,10 +899,10 @@ allocate_barrier: * barrier packet. To get the write ordering right, we only have to * make sure that, if this is a write request and it triggered a * barrier packet, this request is queued within the same spinlock. */ - if ((remote || send_oos) && mdev->unused_spare_tle && + if ((remote || send_oos) && mdev->tconn->unused_spare_tle && test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { - _tl_add_barrier(mdev, mdev->unused_spare_tle); - mdev->unused_spare_tle = NULL; + _tl_add_barrier(mdev, mdev->tconn->unused_spare_tle); + mdev->tconn->unused_spare_tle = NULL; } else { D_ASSERT(!(remote && rw == WRITE && test_bit(CREATE_BARRIER, &mdev->flags))); @@ -934,7 +934,7 @@ allocate_barrier: if (rw == WRITE && _req_conflicts(req)) goto fail_conflicting; - list_add_tail(&req->tl_requests, &mdev->newest_tle->requests); + list_add_tail(&req->tl_requests, &mdev->tconn->newest_tle->requests); /* NOTE remote first: to get the concurrent write detection right, * we must register the request before start of local IO. */ @@ -975,7 +975,7 @@ allocate_barrier: } } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); kfree(b); /* if someone else has beaten us to it... */ if (local) { @@ -1008,7 +1008,7 @@ fail_conflicting: * pretend that it was successfully served right now. */ _drbd_end_io_acct(mdev, req); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (remote) dec_ap_pending(mdev); /* THINK: do we want to fail it (-EIO), or pretend success? @@ -1188,10 +1188,10 @@ void request_timer_fn(unsigned long data) if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) return; /* Recurring timer stopped */ - spin_lock_irq(&mdev->req_lock); - le = &mdev->oldest_tle->requests; + spin_lock_irq(&mdev->tconn->req_lock); + le = &mdev->tconn->oldest_tle->requests; if (list_empty(le)) { - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); mod_timer(&mdev->request_timer, jiffies + et); return; } @@ -1210,5 +1210,5 @@ void request_timer_fn(unsigned long data) mod_timer(&mdev->request_timer, req->start_time + et); } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 9d75647cae8f..4b0858bf2866 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -305,9 +305,9 @@ static inline int req_mod(struct drbd_request *req, struct bio_and_error m; int rv; - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); rv = __req_mod(req, what, &m); - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (m.bio) complete_master_bio(mdev, &m); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ec26df378845..671251af6bcf 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -85,14 +85,14 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) unsigned long flags = 0; struct drbd_conf *mdev = e->mdev; - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); mdev->read_cnt += e->i.size >> 9; list_del(&e->w.list); if (list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); if (test_bit(__EE_WAS_ERROR, &e->flags)) __drbd_chk_io_error(mdev, false); - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); drbd_queue_work(&mdev->tconn->data.work, &e->w); put_ldev(mdev); @@ -117,7 +117,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; block_id = e->block_id; - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); mdev->writ_cnt += e->i.size >> 9; list_del(&e->w.list); /* has been on active_ee or sync_ee */ list_add_tail(&e->w.list, &mdev->done_ee); @@ -134,7 +134,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo if (test_bit(__EE_WAS_ERROR, &e->flags)) __drbd_chk_io_error(mdev, false); - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (block_id == ID_SYNCER) drbd_rs_complete_io(mdev, e_sector); @@ -220,9 +220,9 @@ void drbd_endio_pri(struct bio *bio, int error) req->private_bio = ERR_PTR(error); /* not req_mod(), we need irqsave here! */ - spin_lock_irqsave(&mdev->req_lock, flags); + spin_lock_irqsave(&mdev->tconn->req_lock, flags); __req_mod(req, what, &m); - spin_unlock_irqrestore(&mdev->req_lock, flags); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (m.bio) complete_master_bio(mdev, &m); @@ -236,13 +236,13 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * but try to WRITE the P_DATA_REPLY to the failed location, * to give the disk the chance to relocate that block */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { _req_mod(req, READ_RETRY_REMOTE_CANCELED); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); return 1; } - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); return w_send_read_req(mdev, w, 0); } @@ -359,9 +359,9 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) goto defer; e->w.cb = w_e_send_csum; - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_add(&e->w.list, &mdev->read_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); atomic_add(size >> 9, &mdev->rs_sect_ev); if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) @@ -371,9 +371,9 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) * because bio_add_page failed (probably broken lower level driver), * retry may or may not help. * If it does not, you may need to force disconnect. */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_del(&e->w.list); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); drbd_free_ee(mdev, e); defer: @@ -793,7 +793,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) ping_peer(mdev); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); os = mdev->state; verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T); @@ -882,7 +882,7 @@ int drbd_resync_finished(struct drbd_conf *mdev) _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); out_unlock: - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); put_ldev(mdev); out: mdev->rs_total = 0; @@ -907,9 +907,9 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_ent int i = (e->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; atomic_add(i, &mdev->pp_in_use_by_net); atomic_sub(i, &mdev->pp_in_use); - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); list_add_tail(&e->w.list, &mdev->net_ee); - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); wake_up(&drbd_pp_wait); } else drbd_free_ee(mdev, e); @@ -1210,10 +1210,10 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * actually, this race was harmless, since we only try to send the * barrier packet here, and otherwise do nothing with the object. * but compare with the head of w_clear_epoch */ - spin_lock_irq(&mdev->req_lock); + spin_lock_irq(&mdev->tconn->req_lock); if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED) cancel = 1; - spin_unlock_irq(&mdev->req_lock); + spin_unlock_irq(&mdev->tconn->req_lock); if (cancel) return 1; From a0638456c6ef502506db6ea6cfd0265dfbcf6b51 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 14:31:32 +0100 Subject: [PATCH 0035/5831] drbd: moved crypto transformations and friends from mdev to tconn sed -i \ -e 's/mdev->cram_hmac_tfm/mdev->tconn->cram_hmac_tfm/g' \ -e 's/mdev->integrity_w_tfm/mdev->tconn->integrity_w_tfm/g' \ -e 's/mdev->integrity_r_tfm/mdev->tconn->integrity_r_tfm/g' \ -e 's/mdev->int_dig_out/mdev->tconn->int_dig_out/g' \ -e 's/mdev->int_dig_in/mdev->tconn->int_dig_in/g' \ -e 's/mdev->int_dig_vv/mdev->tconn->int_dig_vv/g' \ *.[ch] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 13 ++++----- drivers/block/drbd/drbd_main.c | 42 +++++++++++++++--------------- drivers/block/drbd/drbd_nl.c | 24 ++++++++--------- drivers/block/drbd/drbd_receiver.c | 32 +++++++++++------------ 4 files changed, 56 insertions(+), 55 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b440ffd14989..af1a2b795084 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -982,6 +982,13 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_tl_epoch *oldest_tle; struct list_head out_of_sequence_requests; + struct crypto_hash *cram_hmac_tfm; + struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ + struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ + void *int_dig_out; + void *int_dig_in; + void *int_dig_vv; + struct drbd_thread receiver; struct drbd_thread worker; struct drbd_thread asender; @@ -1114,12 +1121,6 @@ struct drbd_conf { unsigned int al_tr_number; int al_tr_cycle; int al_tr_pos; /* position of the next transaction in the journal */ - struct crypto_hash *cram_hmac_tfm; - struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */ - struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */ - void *int_dig_out; - void *int_dig_in; - void *int_dig_vv; wait_queue_head_t seq_wait; atomic_t packet_seq; unsigned int peer_seq; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index c063cd513223..699f63929c1c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2404,8 +2404,8 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, struct p_data *dp, int data_size) { - data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? - crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; + data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ? + crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), dp->block_id); } @@ -2670,8 +2670,8 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) if (!drbd_get_data_sock(mdev)) return 0; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? - crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? + crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; if (req->i.size <= DRBD_MAX_SIZE_H80_PACKET) { p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); @@ -2701,8 +2701,8 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) ok = (sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); if (ok && dgs) { - dgb = mdev->int_dig_out; - drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); + dgb = mdev->tconn->int_dig_out; + drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, dgb); ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) { @@ -2727,8 +2727,8 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) /* 64 byte, 512 bit, is the largest digest size * currently supported in kernel crypto. */ unsigned char digest[64]; - drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest); - if (memcmp(mdev->int_dig_out, digest, dgs)) { + drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, digest); + if (memcmp(mdev->tconn->int_dig_out, digest, dgs)) { dev_warn(DEV, "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n", (unsigned long long)req->i.sector, req->i.size); @@ -2755,8 +2755,8 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, void *dgb; int dgs; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? - crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? + crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; if (e->i.size <= DRBD_MAX_SIZE_H80_PACKET) { p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); @@ -2783,8 +2783,8 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, ok = sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); if (ok && dgs) { - dgb = mdev->int_dig_out; - drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); + dgb = mdev->tconn->int_dig_out; + drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, e, dgb); ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) @@ -3276,9 +3276,9 @@ static void drbd_delete_device(unsigned int minor) kfree(mdev->p_uuid); /* mdev->p_uuid = NULL; */ - kfree(mdev->int_dig_out); - kfree(mdev->int_dig_in); - kfree(mdev->int_dig_vv); + kfree(mdev->tconn->int_dig_out); + kfree(mdev->tconn->int_dig_in); + kfree(mdev->tconn->int_dig_vv); /* cleanup the rest that has been * allocated from drbd_new_device @@ -3629,12 +3629,12 @@ void drbd_free_resources(struct drbd_conf *mdev) mdev->csums_tfm = NULL; crypto_free_hash(mdev->verify_tfm); mdev->verify_tfm = NULL; - crypto_free_hash(mdev->cram_hmac_tfm); - mdev->cram_hmac_tfm = NULL; - crypto_free_hash(mdev->integrity_w_tfm); - mdev->integrity_w_tfm = NULL; - crypto_free_hash(mdev->integrity_r_tfm); - mdev->integrity_r_tfm = NULL; + crypto_free_hash(mdev->tconn->cram_hmac_tfm); + mdev->tconn->cram_hmac_tfm = NULL; + crypto_free_hash(mdev->tconn->integrity_w_tfm); + mdev->tconn->integrity_w_tfm = NULL; + crypto_free_hash(mdev->tconn->integrity_r_tfm); + mdev->tconn->integrity_r_tfm = NULL; drbd_free_sock(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 4eaf81a463b5..083680873324 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1532,21 +1532,21 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->send_cnt = 0; mdev->recv_cnt = 0; - crypto_free_hash(mdev->cram_hmac_tfm); - mdev->cram_hmac_tfm = tfm; + crypto_free_hash(mdev->tconn->cram_hmac_tfm); + mdev->tconn->cram_hmac_tfm = tfm; - crypto_free_hash(mdev->integrity_w_tfm); - mdev->integrity_w_tfm = integrity_w_tfm; + crypto_free_hash(mdev->tconn->integrity_w_tfm); + mdev->tconn->integrity_w_tfm = integrity_w_tfm; - crypto_free_hash(mdev->integrity_r_tfm); - mdev->integrity_r_tfm = integrity_r_tfm; + crypto_free_hash(mdev->tconn->integrity_r_tfm); + mdev->tconn->integrity_r_tfm = integrity_r_tfm; - kfree(mdev->int_dig_out); - kfree(mdev->int_dig_in); - kfree(mdev->int_dig_vv); - mdev->int_dig_out=int_dig_out; - mdev->int_dig_in=int_dig_in; - mdev->int_dig_vv=int_dig_vv; + kfree(mdev->tconn->int_dig_out); + kfree(mdev->tconn->int_dig_in); + kfree(mdev->tconn->int_dig_vv); + mdev->tconn->int_dig_out=int_dig_out; + mdev->tconn->int_dig_in=int_dig_in; + mdev->tconn->int_dig_vv=int_dig_vv; retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL); spin_unlock_irq(&mdev->tconn->req_lock); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index af968a0bae07..4b37010cf461 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -880,7 +880,7 @@ retry: if (h <= 0) return h; - if (mdev->cram_hmac_tfm) { + if (mdev->tconn->cram_hmac_tfm) { /* drbd_request_state(mdev, NS(conn, WFAuth)); */ switch (drbd_do_auth(mdev)) { case -1: @@ -1240,12 +1240,12 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ struct drbd_epoch_entry *e; struct page *page; int dgs, ds, rr; - void *dig_in = mdev->int_dig_in; - void *dig_vv = mdev->int_dig_vv; + void *dig_in = mdev->tconn->int_dig_in; + void *dig_vv = mdev->tconn->int_dig_vv; unsigned long *data; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? - crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ? + crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; if (dgs) { rr = drbd_recv(mdev, dig_in, dgs); @@ -1306,7 +1306,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __ } if (dgs) { - drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv); + drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, e, dig_vv); if (memcmp(dig_in, dig_vv, dgs)) { dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n", (unsigned long long)sector, data_size); @@ -1358,11 +1358,11 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, struct bio_vec *bvec; struct bio *bio; int dgs, rr, i, expect; - void *dig_in = mdev->int_dig_in; - void *dig_vv = mdev->int_dig_vv; + void *dig_in = mdev->tconn->int_dig_in; + void *dig_vv = mdev->tconn->int_dig_vv; - dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ? - crypto_hash_digestsize(mdev->integrity_r_tfm) : 0; + dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ? + crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; if (dgs) { rr = drbd_recv(mdev, dig_in, dgs); @@ -1401,7 +1401,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, } if (dgs) { - drbd_csum_bio(mdev, mdev->integrity_r_tfm, bio, dig_vv); + drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv); if (memcmp(dig_in, dig_vv, dgs)) { dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n"); return 0; @@ -3841,8 +3841,8 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (os.conn == C_DISCONNECTING) { wait_event(mdev->tconn->net_cnt_wait, atomic_read(&mdev->tconn->net_cnt) == 0); - crypto_free_hash(mdev->cram_hmac_tfm); - mdev->cram_hmac_tfm = NULL; + crypto_free_hash(mdev->tconn->cram_hmac_tfm); + mdev->tconn->cram_hmac_tfm = NULL; kfree(mdev->tconn->net_conf); mdev->tconn->net_conf = NULL; @@ -4012,10 +4012,10 @@ static int drbd_do_auth(struct drbd_conf *mdev) unsigned int length; int rv; - desc.tfm = mdev->cram_hmac_tfm; + desc.tfm = mdev->tconn->cram_hmac_tfm; desc.flags = 0; - rv = crypto_hash_setkey(mdev->cram_hmac_tfm, + rv = crypto_hash_setkey(mdev->tconn->cram_hmac_tfm, (u8 *)mdev->tconn->net_conf->shared_secret, key_len); if (rv) { dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv); @@ -4062,7 +4062,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) goto fail; } - resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm); + resp_size = crypto_hash_digestsize(mdev->tconn->cram_hmac_tfm); response = kmalloc(resp_size, GFP_NOIO); if (response == NULL) { dev_err(DEV, "kmalloc of response failed\n"); From 191d3cc8d9291bbfea66f3debf19d6c2f85b4752 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 14:53:22 +0100 Subject: [PATCH 0036/5831] drbd: Made drbd_flush_workqueue() to take a tconn instead of an mdev Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_nl.c | 6 +++--- drivers/block/drbd/drbd_receiver.c | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index af1a2b795084..526928c368c9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1609,7 +1609,7 @@ extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head); extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); -extern void drbd_flush_workqueue(struct drbd_conf *mdev); +extern void drbd_flush_workqueue(struct drbd_tconn *tconn); /* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 083680873324..8b8894e10e61 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -876,7 +876,7 @@ static void drbd_reconfig_start(struct drbd_conf *mdev) wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags)); wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); drbd_thread_start(&mdev->tconn->worker); - drbd_flush_workqueue(mdev); + drbd_flush_workqueue(mdev->tconn); } /* if still unconfigured, stops worker again. @@ -1076,7 +1076,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp /* also wait for the last barrier ack. */ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state)); /* and for any other previously queued work */ - drbd_flush_workqueue(mdev); + drbd_flush_workqueue(mdev->tconn); rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); retcode = rv; /* FIXME: Type mismatch. */ @@ -1520,7 +1520,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } } - drbd_flush_workqueue(mdev); + drbd_flush_workqueue(mdev->tconn); spin_lock_irq(&mdev->tconn->req_lock); if (mdev->tconn->net_conf != NULL) { retcode = ERR_NET_CONFIGURED; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4b37010cf461..fbf93826ef0e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3747,13 +3747,13 @@ static void drbdd(struct drbd_conf *mdev) drbd_md_sync(mdev); } -void drbd_flush_workqueue(struct drbd_conf *mdev) +void drbd_flush_workqueue(struct drbd_tconn *tconn) { struct drbd_wq_barrier barr; barr.w.cb = w_prev_work_done; init_completion(&barr.done); - drbd_queue_work(&mdev->tconn->data.work, &barr.w); + drbd_queue_work(&tconn->data.work, &barr.w); wait_for_completion(&barr.done); } @@ -3803,7 +3803,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, * w_make_resync_request etc. which may still be on the worker queue * to be "canceled" */ - drbd_flush_workqueue(mdev); + drbd_flush_workqueue(mdev->tconn); /* This also does reclaim_net_ee(). If we do this too early, we might * miss some resync ee and pages.*/ From c6d25cfe52a32232e4de0bbe6ddf8219f054f55c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 16:13:06 +0100 Subject: [PATCH 0037/5831] drbd: Preparing to use p_header96 for all packets recv_bm_rle_bits() should not make any assumptions abou the layout of the packet header Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fbf93826ef0e..12fdd737cb69 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3393,7 +3393,8 @@ receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, static int recv_bm_rle_bits(struct drbd_conf *mdev, struct p_compressed_bm *p, - struct bm_xfer_ctx *c) + struct bm_xfer_ctx *c, + unsigned int len) { struct bitstream bs; u64 look_ahead; @@ -3401,7 +3402,6 @@ recv_bm_rle_bits(struct drbd_conf *mdev, u64 tmp; unsigned long s = c->bit_offset; unsigned long e; - int len = be16_to_cpu(p->head.length) - (sizeof(*p) - sizeof(p->head)); int toggle = DCBP_get_start(p); int have; int bits; @@ -3458,10 +3458,11 @@ recv_bm_rle_bits(struct drbd_conf *mdev, static int decode_bitmap_c(struct drbd_conf *mdev, struct p_compressed_bm *p, - struct bm_xfer_ctx *c) + struct bm_xfer_ctx *c, + unsigned int len) { if (DCBP_get_code(p) == RLE_VLI_Bits) - return recv_bm_rle_bits(mdev, p, c); + return recv_bm_rle_bits(mdev, p, c, len); /* other variants had been implemented for evaluation, * but have been dropped as this one turned out to be "best" @@ -3560,7 +3561,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size); goto out; } - err = decode_bitmap_c(mdev, p, &c); + err = decode_bitmap_c(mdev, p, &c, data_size); } else { dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd); goto out; From c012949a4084a9f91654121d28f199ef408cb9d7 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 16:58:16 +0100 Subject: [PATCH 0038/5831] drbd: Replaced all p_header80 with a generic p_header Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 64 +++++++++++++++--------------- drivers/block/drbd/drbd_main.c | 54 ++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 16 ++++---- drivers/block/drbd/drbd_worker.c | 2 +- 4 files changed, 64 insertions(+), 72 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 526928c368c9..dc669dfe5b0d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -338,7 +338,6 @@ struct p_header80 { u32 magic; u16 command; u16 length; /* bytes of data after this header */ - u8 payload[0]; } __packed; /* Header for big packets, Used for data packets exceeding 64kB */ @@ -349,9 +348,12 @@ struct p_header95 { u8 payload[0]; } __packed; -union p_header { - struct p_header80 h80; - struct p_header95 h95; +struct p_header { + union { + struct p_header80 h80; + struct p_header95 h95; + }; + u8 payload[0]; }; /* @@ -380,7 +382,7 @@ union p_header { #define DP_DISCARD 64 /* equals REQ_DISCARD */ struct p_data { - union p_header head; + struct p_header head; u64 sector; /* 64 bits sector number */ u64 block_id; /* to identify the request in protocol B&C */ u32 seq_num; @@ -396,7 +398,7 @@ struct p_data { * P_DATA_REQUEST, P_RS_DATA_REQUEST */ struct p_block_ack { - struct p_header80 head; + struct p_header head; u64 sector; u64 block_id; u32 blksize; @@ -405,7 +407,7 @@ struct p_block_ack { struct p_block_req { - struct p_header80 head; + struct p_header head; u64 sector; u64 block_id; u32 blksize; @@ -422,7 +424,7 @@ struct p_block_req { */ struct p_handshake { - struct p_header80 head; /* 8 bytes */ + struct p_header head; /* Note: You must always use a h80 here */ u32 protocol_min; u32 feature_flags; u32 protocol_max; @@ -437,19 +439,19 @@ struct p_handshake { /* 80 bytes, FIXED for the next century */ struct p_barrier { - struct p_header80 head; + struct p_header head; u32 barrier; /* barrier number _handle_ only */ u32 pad; /* to multiple of 8 Byte */ } __packed; struct p_barrier_ack { - struct p_header80 head; + struct p_header head; u32 barrier; u32 set_size; } __packed; struct p_rs_param { - struct p_header80 head; + struct p_header head; u32 rate; /* Since protocol version 88 and higher. */ @@ -457,7 +459,7 @@ struct p_rs_param { } __packed; struct p_rs_param_89 { - struct p_header80 head; + struct p_header head; u32 rate; /* protocol version 89: */ char verify_alg[SHARED_SECRET_MAX]; @@ -465,7 +467,7 @@ struct p_rs_param_89 { } __packed; struct p_rs_param_95 { - struct p_header80 head; + struct p_header head; u32 rate; char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; @@ -481,7 +483,7 @@ enum drbd_conn_flags { }; struct p_protocol { - struct p_header80 head; + struct p_header head; u32 protocol; u32 after_sb_0p; u32 after_sb_1p; @@ -495,17 +497,17 @@ struct p_protocol { } __packed; struct p_uuids { - struct p_header80 head; + struct p_header head; u64 uuid[UI_EXTENDED_SIZE]; } __packed; struct p_rs_uuid { - struct p_header80 head; + struct p_header head; u64 uuid; } __packed; struct p_sizes { - struct p_header80 head; + struct p_header head; u64 d_size; /* size of disk */ u64 u_size; /* user requested size */ u64 c_size; /* current exported size */ @@ -515,18 +517,18 @@ struct p_sizes { } __packed; struct p_state { - struct p_header80 head; + struct p_header head; u32 state; } __packed; struct p_req_state { - struct p_header80 head; + struct p_header head; u32 mask; u32 val; } __packed; struct p_req_state_reply { - struct p_header80 head; + struct p_header head; u32 retcode; } __packed; @@ -541,14 +543,14 @@ struct p_drbd06_param { } __packed; struct p_discard { - struct p_header80 head; + struct p_header head; u64 block_id; u32 seq_num; u32 pad; } __packed; struct p_block_desc { - struct p_header80 head; + struct p_header head; u64 sector; u32 blksize; u32 pad; /* to multiple of 8 Byte */ @@ -564,7 +566,7 @@ enum drbd_bitmap_code { }; struct p_compressed_bm { - struct p_header80 head; + struct p_header head; /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code * (encoding & 0x80): polarity (set/unset) of first runlength * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits @@ -576,7 +578,7 @@ struct p_compressed_bm { } __packed; struct p_delay_probe93 { - struct p_header80 head; + struct p_header head; u32 seq_num; /* sequence number to match the two probe packets */ u32 offset; /* usecs the probe got sent after the reference time point */ } __packed; @@ -625,7 +627,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n) * so we need to use the fixed size 4KiB page size * most architectures have used for a long time. */ -#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80)) +#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header)) #define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long)) #define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm)) #if (PAGE_SIZE < 4096) @@ -634,7 +636,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n) #endif union p_polymorph { - union p_header header; + struct p_header header; struct p_handshake handshake; struct p_data data; struct p_block_ack block_ack; @@ -1245,12 +1247,12 @@ extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_f extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header80 *h, + enum drbd_packets cmd, struct p_header *h, size_t size, unsigned msg_flags); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packets cmd, struct p_header80 *h, + enum drbd_packets cmd, struct p_header *h, size_t size); extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, size_t size); @@ -2019,19 +2021,19 @@ static inline void request_ping(struct drbd_conf *mdev) static inline int drbd_send_short_cmd(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_header80 h; + struct p_header h; return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); } static inline int drbd_send_ping(struct drbd_conf *mdev) { - struct p_header80 h; + struct p_header h; return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h)); } static inline int drbd_send_ping_ack(struct drbd_conf *mdev) { - struct p_header80 h; + struct p_header h; return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 699f63929c1c..55ce48e24b8e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1822,9 +1822,10 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) /* the appropriate socket mutex must be held already */ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header80 *h, + enum drbd_packets cmd, struct p_header *hg, size_t size, unsigned msg_flags) { + struct p_header80 *h = (struct p_header80 *)hg; int sent, ok; if (!expect(h)) @@ -1849,7 +1850,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, * when we hold the appropriate socket mutex. */ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packets cmd, struct p_header80 *h, size_t size) + enum drbd_packets cmd, struct p_header *h, size_t size) { int ok = 0; struct socket *sock; @@ -1983,8 +1984,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) if (mdev->tconn->agreed_pro_version >= 87) strcpy(p->integrity_alg, mdev->tconn->net_conf->integrity_alg); - rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, - (struct p_header80 *)p, size); + rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, &p->head, size); kfree(p); return rv; } @@ -2009,8 +2009,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) put_ldev(mdev); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS, - (struct p_header80 *)&p, sizeof(p)); + return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS, &p.head, sizeof(p)); } int drbd_send_uuids(struct drbd_conf *mdev) @@ -2054,8 +2053,7 @@ int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev) drbd_md_sync(mdev); p.uuid = cpu_to_be64(uuid); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, - (struct p_header80 *)&p, sizeof(p)); + return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, &p.head, sizeof(p)); } int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) @@ -2087,8 +2085,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl p.queue_order_type = cpu_to_be16(q_order_type); p.dds_flags = cpu_to_be16(flags); - ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, - (struct p_header80 *)&p, sizeof(p)); + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, &p.head, sizeof(p)); return ok; } @@ -2112,8 +2109,7 @@ int drbd_send_state(struct drbd_conf *mdev) sock = mdev->tconn->data.socket; if (likely(sock != NULL)) { - ok = _drbd_send_cmd(mdev, sock, P_STATE, - (struct p_header80 *)&p, sizeof(p), 0); + ok = _drbd_send_cmd(mdev, sock, P_STATE, &p.head, sizeof(p), 0); } mutex_unlock(&mdev->tconn->data.mutex); @@ -2130,8 +2126,7 @@ int drbd_send_state_req(struct drbd_conf *mdev, p.mask = cpu_to_be32(mask.i); p.val = cpu_to_be32(val.i); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ, - (struct p_header80 *)&p, sizeof(p)); + return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ, &p.head, sizeof(p)); } int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) @@ -2140,8 +2135,7 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) p.retcode = cpu_to_be32(retcode); - return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, - (struct p_header80 *)&p, sizeof(p)); + return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, &p.head, sizeof(p)); } int fill_bitmap_rle_bits(struct drbd_conf *mdev, @@ -2246,7 +2240,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, */ static int send_bitmap_rle_or_plain(struct drbd_conf *mdev, - struct p_header80 *h, struct bm_xfer_ctx *c) + struct p_header *h, struct bm_xfer_ctx *c) { struct p_compressed_bm *p = (void*)h; unsigned long num_words; @@ -2300,7 +2294,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, int _drbd_send_bitmap(struct drbd_conf *mdev) { struct bm_xfer_ctx c; - struct p_header80 *p; + struct p_header *p; int err; if (!expect(mdev->bitmap)) @@ -2308,7 +2302,7 @@ int _drbd_send_bitmap(struct drbd_conf *mdev) /* maybe we should use some per thread scratch page, * and allocate that during initial device creation? */ - p = (struct p_header80 *) __get_free_page(GFP_NOIO); + p = (struct p_header *) __get_free_page(GFP_NOIO); if (!p) { dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); return false; @@ -2365,8 +2359,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) if (mdev->state.conn < C_CONNECTED) return false; - ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, - (struct p_header80 *)&p, sizeof(p)); + ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, &p.head, sizeof(p)); return ok; } @@ -2393,8 +2386,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, if (!mdev->tconn->meta.socket || mdev->state.conn < C_CONNECTED) return false; - ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, - (struct p_header80 *)&p, sizeof(p)); + ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, &p.head, sizeof(p)); return ok; } @@ -2452,8 +2444,7 @@ int drbd_send_drequest(struct drbd_conf *mdev, int cmd, p.block_id = block_id; p.blksize = cpu_to_be32(size); - ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, - (struct p_header80 *)&p, sizeof(p)); + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &p.head, sizeof(p)); return ok; } @@ -2469,9 +2460,9 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - p.head.magic = cpu_to_be32(DRBD_MAGIC); - p.head.command = cpu_to_be16(cmd); - p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); + p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); + p.head.h80.command = cpu_to_be16(cmd); + p.head.h80.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); mutex_lock(&mdev->tconn->data.mutex); @@ -2492,8 +2483,7 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, - (struct p_header80 *)&p, sizeof(p)); + ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, &p.head, sizeof(p)); return ok; } @@ -2677,12 +2667,12 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); p.head.h80.command = cpu_to_be16(P_DATA); p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->i.size); + cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); } else { p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); p.head.h95.command = cpu_to_be16(P_DATA); p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->i.size); + cpu_to_be32(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); } p.sector = cpu_to_be64(req->i.sector); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 12fdd737cb69..9393fe482efc 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -700,7 +700,7 @@ out: static int drbd_send_fp(struct drbd_conf *mdev, struct socket *sock, enum drbd_packets cmd) { - struct p_header80 *h = &mdev->tconn->data.sbuf.header.h80; + struct p_header *h = &mdev->tconn->data.sbuf.header; return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); } @@ -925,7 +925,7 @@ out_release_sockets: static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size) { - union p_header *h = &mdev->tconn->data.rbuf.header; + struct p_header *h = &mdev->tconn->data.rbuf.header; int r; r = drbd_recv(mdev, h, sizeof(*h)); @@ -3477,7 +3477,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, const char *direction, struct bm_xfer_ctx *c) { /* what would it take to transfer it "plaintext" */ - unsigned plain = sizeof(struct p_header80) * + unsigned plain = sizeof(struct p_header) * ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) + c->bm_words * sizeof(long); unsigned total = c->bytes[0] + c->bytes[1]; @@ -3699,7 +3699,7 @@ static struct data_cmd drbd_cmd_handler[] = { static void drbdd(struct drbd_conf *mdev) { - union p_header *header = &mdev->tconn->data.rbuf.header; + struct p_header *header = &mdev->tconn->data.rbuf.header; unsigned int packet_size; enum drbd_packets cmd; size_t shs; /* sub header size */ @@ -3715,14 +3715,14 @@ static void drbdd(struct drbd_conf *mdev) goto err_out; } - shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header); + shs = drbd_cmd_handler[cmd].pkt_size - sizeof(struct p_header); if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) { dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size); goto err_out; } if (shs) { - rv = drbd_recv(mdev, &header->h80.payload, shs); + rv = drbd_recv(mdev, &header->payload, shs); if (unlikely(rv != shs)) { if (!signal_pending(current)) dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv); @@ -3909,8 +3909,8 @@ static int drbd_send_handshake(struct drbd_conf *mdev) memset(p, 0, sizeof(*p)); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); - ok = _drbd_send_cmd( mdev, mdev->tconn->data.socket, P_HAND_SHAKE, - (struct p_header80 *)p, sizeof(*p), 0 ); + ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_HAND_SHAKE, + &p->head, sizeof(*p), 0 ); mutex_unlock(&mdev->tconn->data.mutex); return ok; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 671251af6bcf..afad8ea4d888 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1224,7 +1224,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in w_clear_epoch. */ ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BARRIER, - (struct p_header80 *)p, sizeof(*p), 0); + &p->head, sizeof(*p), 0); drbd_put_data_sock(mdev); return ok; From fd340c12c98b57ec0751ebb317057eee41be0c3d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 16:57:39 +0100 Subject: [PATCH 0039/5831] drbd: Use new header layout The new header layout will only be used if the peer supports it of course. For the first packet and the handshake packet the old (h80) layout is used for compatibility reasons. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 82 +++++++++++++----------------- drivers/block/drbd/drbd_receiver.c | 7 ++- include/linux/drbd.h | 2 +- 4 files changed, 42 insertions(+), 50 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index dc669dfe5b0d..4de43481bcb9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -345,7 +345,6 @@ struct p_header95 { u16 magic; /* use DRBD_MAGIC_BIG here */ u16 command; u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */ - u8 payload[0]; } __packed; struct p_header { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 55ce48e24b8e..f8cb15c84ed8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1820,12 +1820,36 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) } #endif +static void prepare_header80(struct drbd_conf *mdev, struct p_header80 *h, + enum drbd_packets cmd, int size) +{ + h->magic = cpu_to_be32(DRBD_MAGIC); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be16(size); +} + +static void prepare_header95(struct drbd_conf *mdev, struct p_header95 *h, + enum drbd_packets cmd, int size) +{ + h->magic = cpu_to_be16(DRBD_MAGIC_BIG); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be32(size); +} + +static void prepare_header(struct drbd_conf *mdev, struct p_header *h, + enum drbd_packets cmd, int size) +{ + if (mdev->tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET) + prepare_header95(mdev, &h->h95, cmd, size); + else + prepare_header80(mdev, &h->h80, cmd, size); +} + /* the appropriate socket mutex must be held already */ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header *hg, + enum drbd_packets cmd, struct p_header *h, size_t size, unsigned msg_flags) { - struct p_header80 *h = (struct p_header80 *)hg; int sent, ok; if (!expect(h)) @@ -1833,9 +1857,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, if (!expect(size)) return false; - h->magic = cpu_to_be32(DRBD_MAGIC); - h->command = cpu_to_be16(cmd); - h->length = cpu_to_be16(size-sizeof(struct p_header80)); + prepare_header(mdev, h, cmd, size - sizeof(struct p_header)); sent = drbd_send(mdev, sock, h, size, msg_flags); @@ -1878,12 +1900,10 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, size_t size) { - struct p_header80 h; + struct p_header h; int ok; - h.magic = cpu_to_be32(DRBD_MAGIC); - h.command = cpu_to_be16(cmd); - h.length = cpu_to_be16(size); + prepare_header(mdev, &h, cmd, size); if (!drbd_get_data_sock(mdev)) return 0; @@ -2456,14 +2476,11 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, int ok; struct p_block_req p; + prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header) + digest_size); p.sector = cpu_to_be64(sector); p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(cmd); - p.head.h80.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); - mutex_lock(&mdev->tconn->data.mutex); ok = (sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), 0)); @@ -2663,22 +2680,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - if (req->i.size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(P_DATA); - p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); - } else { - p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); - p.head.h95.command = cpu_to_be16(P_DATA); - p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); - } - + prepare_header(mdev, &p.head, P_DATA, sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); p.sector = cpu_to_be64(req->i.sector); p.block_id = (unsigned long)req; - p.seq_num = cpu_to_be32(req->seq_num = - atomic_add_return(1, &mdev->packet_seq)); + p.seq_num = cpu_to_be32(req->seq_num = atomic_add_return(1, &mdev->packet_seq)); dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); @@ -2748,18 +2753,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - if (e->i.size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(cmd); - p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); - } else { - p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); - p.head.h95.command = cpu_to_be16(cmd); - p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); - } - + prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); p.sector = cpu_to_be64(e->i.sector); p.block_id = e->block_id; /* p.seq_num = 0; No sequence numbers here.. */ @@ -3028,7 +3022,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker); drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender); - mdev->tconn->agreed_pro_version = PRO_VERSION_MAX; + /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */ mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; @@ -3506,12 +3500,8 @@ int __init drbd_init(void) { int err; - if (sizeof(struct p_handshake) != 80) { - printk(KERN_ERR - "drbd: never change the size or layout " - "of the HandShake packet.\n"); - return -EINVAL; - } + BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95)); + BUILD_BUG_ON(sizeof(struct p_handshake) != 80); if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { printk(KERN_ERR diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9393fe482efc..8f5a241fe20a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -761,6 +761,9 @@ static int drbd_connect(struct drbd_conf *mdev) return -2; clear_bit(DISCARD_CONCURRENT, &mdev->flags); + mdev->tconn->agreed_pro_version = 99; + /* agreed_pro_version must be smaller than 100 so we send the old + header (h80) in the first packet and in the handshake packet. */ sock = NULL; msock = NULL; @@ -935,12 +938,12 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi return false; } - if (likely(h->h80.magic == cpu_to_be32(DRBD_MAGIC))) { + if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { *cmd = be16_to_cpu(h->h80.command); *packet_size = be16_to_cpu(h->h80.length); } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) { *cmd = be16_to_cpu(h->h95.command); - *packet_size = be32_to_cpu(h->h95.length); + *packet_size = be32_to_cpu(h->h95.length) & 0x00ffffff; } else { dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n", be32_to_cpu(h->h80.magic), diff --git a/include/linux/drbd.h b/include/linux/drbd.h index d28202811672..35fc08a0a552 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.11" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 96 +#define PRO_VERSION_MAX 100 enum drbd_io_error_p { From 257d0af689df9aaf6ebecfc8d66b15415006c257 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 26 Jan 2011 12:15:29 +0100 Subject: [PATCH 0040/5831] drbd: Implemented receiving of new style packets on meta socket Now drbd communication with protocol 100 actually works. Replaced the remaining p_header80 with p_header where we no longer know which header it is. In the places where p_header80 is still in use, it is on purpose, because we know that it is an old style header there. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 133 +++++++++++++++-------------- 1 file changed, 68 insertions(+), 65 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8f5a241fe20a..c0435c4f5d84 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -926,18 +926,9 @@ out_release_sockets: return -1; } -static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size) +static bool decode_header(struct drbd_conf *mdev, struct p_header *h, enum drbd_packets *cmd, + unsigned int *packet_size) { - struct p_header *h = &mdev->tconn->data.rbuf.header; - int r; - - r = drbd_recv(mdev, h, sizeof(*h)); - if (unlikely(r != sizeof(*h))) { - if (!signal_pending(current)) - dev_warn(DEV, "short read expecting header on sock: r=%d\n", r); - return false; - } - if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { *cmd = be16_to_cpu(h->h80.command); *packet_size = be16_to_cpu(h->h80.length); @@ -951,9 +942,25 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi be16_to_cpu(h->h80.length)); return false; } + return true; +} + +static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size) +{ + struct p_header *h = &mdev->tconn->data.rbuf.header; + int r; + + r = drbd_recv(mdev, h, sizeof(*h)); + if (unlikely(r != sizeof(*h))) { + if (!signal_pending(current)) + dev_warn(DEV, "short read expecting header on sock: r=%d\n", r); + return false; + } + + r = decode_header(mdev, h, cmd, packet_size); mdev->tconn->last_received = jiffies; - return true; + return r; } static void drbd_flush(struct drbd_conf *mdev) @@ -2807,14 +2814,14 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi } if (apv <= 88) { - header_size = sizeof(struct p_rs_param) - sizeof(struct p_header80); + header_size = sizeof(struct p_rs_param) - sizeof(struct p_header); data_size = packet_size - header_size; } else if (apv <= 94) { - header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header80); + header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header); data_size = packet_size - header_size; D_ASSERT(data_size == 0); } else { - header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header80); + header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header); data_size = packet_size - header_size; D_ASSERT(data_size == 0); } @@ -3524,7 +3531,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne void *buffer; int err; int ok = false; - struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; + struct p_header *h = &mdev->tconn->data.rbuf.header; drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); /* you are supposed to send additional out-of-sync information @@ -3571,7 +3578,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne } c.packets[cmd == P_BITMAP]++; - c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size; + c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size; if (err <= 0) { if (err < 0) @@ -3670,13 +3677,13 @@ static struct data_cmd drbd_cmd_handler[] = { [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply }, [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } , [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } , - [P_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } , - [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } , - [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header80), receive_UnplugRemote }, + [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } , + [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } , + [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote }, [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, - [P_SYNC_PARAM] = { 1, sizeof(struct p_header80), receive_SyncParam }, - [P_SYNC_PARAM89] = { 1, sizeof(struct p_header80), receive_SyncParam }, + [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam }, + [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam }, [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol }, [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids }, [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes }, @@ -4184,9 +4191,9 @@ int drbdd_init(struct drbd_thread *thi) /* ********* acknowledge sender ******** */ -static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h) +static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_req_state_reply *p = (struct p_req_state_reply *)h; + struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply; int retcode = be32_to_cpu(p->retcode); @@ -4202,13 +4209,13 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) +static int got_Ping(struct drbd_conf *mdev, enum drbd_packets cmd) { return drbd_send_ping_ack(mdev); } -static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) +static int got_PingAck(struct drbd_conf *mdev, enum drbd_packets cmd) { /* restore idle timeout */ mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; @@ -4218,9 +4225,9 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) +static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_block_ack *p = (struct p_block_ack *)h; + struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); @@ -4263,9 +4270,9 @@ validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, return true; } -static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) +static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_block_ack *p = (struct p_block_ack *)h; + struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); int blksize = be32_to_cpu(p->blksize); enum drbd_req_event what; @@ -4277,7 +4284,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) dec_rs_pending(mdev); return true; } - switch (be16_to_cpu(h->command)) { + switch (cmd) { case P_RS_WRITE_ACK: D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); what = WRITE_ACKED_BY_PEER_AND_SIS; @@ -4304,9 +4311,9 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) what, false); } -static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) +static int got_NegAck(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_block_ack *p = (struct p_block_ack *)h; + struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); int size = be32_to_cpu(p->blksize); bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A || @@ -4337,9 +4344,9 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) +static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_block_ack *p = (struct p_block_ack *)h; + struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); update_peer_seq(mdev, be32_to_cpu(p->seq_num)); @@ -4351,11 +4358,11 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) NEG_ACKED, false); } -static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) +static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packets cmd) { sector_t sector; int size; - struct p_block_ack *p = (struct p_block_ack *)h; + struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); @@ -4366,7 +4373,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) if (get_ldev_if_state(mdev, D_FAILED)) { drbd_rs_complete_io(mdev, sector); - switch (be16_to_cpu(h->command)) { + switch (cmd) { case P_NEG_RS_DREPLY: drbd_rs_failed_io(mdev, sector, size); case P_RS_CANCEL: @@ -4382,9 +4389,9 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) +static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_barrier_ack *p = (struct p_barrier_ack *)h; + struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack; tl_release(mdev, p->barrier, be32_to_cpu(p->set_size)); @@ -4398,9 +4405,9 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) +static int got_OVResult(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_block_ack *p = (struct p_block_ack *)h; + struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; struct drbd_work *w; sector_t sector; int size; @@ -4442,14 +4449,14 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) return true; } -static int got_skip(struct drbd_conf *mdev, struct p_header80 *h) +static int got_skip(struct drbd_conf *mdev, enum drbd_packets cmd) { return true; } struct asender_cmd { size_t pkt_size; - int (*process)(struct drbd_conf *mdev, struct p_header80 *h); + int (*process)(struct drbd_conf *mdev, enum drbd_packets cmd); }; static struct asender_cmd *get_asender_cmd(int cmd) @@ -4458,8 +4465,8 @@ static struct asender_cmd *get_asender_cmd(int cmd) /* anything missing from this table is in * the drbd_cmd_handler (drbd_default_handler) table, * see the beginning of drbdd() */ - [P_PING] = { sizeof(struct p_header80), got_Ping }, - [P_PING_ACK] = { sizeof(struct p_header80), got_PingAck }, + [P_PING] = { sizeof(struct p_header), got_Ping }, + [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, @@ -4483,15 +4490,16 @@ static struct asender_cmd *get_asender_cmd(int cmd) int drbd_asender(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; - struct p_header80 *h = &mdev->tconn->meta.rbuf.header.h80; + struct p_header *h = &mdev->tconn->meta.rbuf.header; struct asender_cmd *cmd = NULL; - int rv, len; + int rv; void *buf = h; int received = 0; - int expect = sizeof(struct p_header80); - int empty; + int expect = sizeof(struct p_header); int ping_timeout_active = 0; + int empty, pkt_size; + enum drbd_packets cmd_nr; sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); @@ -4581,30 +4589,25 @@ int drbd_asender(struct drbd_thread *thi) } if (received == expect && cmd == NULL) { - if (unlikely(h->magic != cpu_to_be32(DRBD_MAGIC))) { - dev_err(DEV, "magic?? on meta m: 0x%08x c: %d l: %d\n", - be32_to_cpu(h->magic), - be16_to_cpu(h->command), - be16_to_cpu(h->length)); + if (!decode_header(mdev, h, &cmd_nr, &pkt_size)) goto reconnect; - } - cmd = get_asender_cmd(be16_to_cpu(h->command)); - len = be16_to_cpu(h->length); + cmd = get_asender_cmd(cmd_nr); if (unlikely(cmd == NULL)) { - dev_err(DEV, "unknown command?? on meta m: 0x%08x c: %d l: %d\n", - be32_to_cpu(h->magic), - be16_to_cpu(h->command), - be16_to_cpu(h->length)); + dev_err(DEV, "unknown command %d on meta (l: %d)\n", + cmd_nr, pkt_size); goto disconnect; } expect = cmd->pkt_size; - if (!expect(len == expect - sizeof(struct p_header80))) + if (pkt_size != expect - sizeof(struct p_header)) { + dev_err(DEV, "Wrong packet size on meta (c: %d, l: %d)\n", + cmd_nr, pkt_size); goto reconnect; + } } if (received == expect) { mdev->tconn->last_received = jiffies; D_ASSERT(cmd != NULL); - if (!cmd->process(mdev, h)) + if (!cmd->process(mdev, cmd_nr)) goto reconnect; /* the idle_timeout (ping-int) @@ -4614,7 +4617,7 @@ int drbd_asender(struct drbd_thread *thi) buf = h; received = 0; - expect = sizeof(struct p_header80); + expect = sizeof(struct p_header); cmd = NULL; } } From b42a70ad32539019c15457fce172194b0f8353d5 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 27 Jan 2011 10:55:20 +0100 Subject: [PATCH 0041/5831] drbd: Do not access tconn after it was freed Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f8cb15c84ed8..8349d42fa131 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3260,10 +3260,6 @@ static void drbd_delete_device(unsigned int minor) kfree(mdev->p_uuid); /* mdev->p_uuid = NULL; */ - kfree(mdev->tconn->int_dig_out); - kfree(mdev->tconn->int_dig_in); - kfree(mdev->tconn->int_dig_vv); - /* cleanup the rest that has been * allocated from drbd_new_device * and actually free the mdev itself */ @@ -3377,6 +3373,9 @@ void drbd_free_tconn(struct drbd_tconn *tconn) write_unlock_irq(&global_state_lock); kfree(tconn->name); + kfree(tconn->int_dig_out); + kfree(tconn->int_dig_in); + kfree(tconn->int_dig_vv); kfree(tconn); } From f2ad90637978e9cff3bdd32d414c9e851e47868c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 17:13:25 +0100 Subject: [PATCH 0042/5831] drbd: Move cmdname() out of drbd_int.h There is no good reason for cmdname() to be an inline function. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 59 +--------------------------------- drivers/block/drbd/drbd_main.c | 59 ++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 58 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4de43481bcb9..e8a1fa556956 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -221,64 +221,7 @@ enum drbd_packets { P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */ }; -static inline const char *cmdname(enum drbd_packets cmd) -{ - /* THINK may need to become several global tables - * when we want to support more than - * one PRO_VERSION */ - static const char *cmdnames[] = { - [P_DATA] = "Data", - [P_DATA_REPLY] = "DataReply", - [P_RS_DATA_REPLY] = "RSDataReply", - [P_BARRIER] = "Barrier", - [P_BITMAP] = "ReportBitMap", - [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget", - [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource", - [P_UNPLUG_REMOTE] = "UnplugRemote", - [P_DATA_REQUEST] = "DataRequest", - [P_RS_DATA_REQUEST] = "RSDataRequest", - [P_SYNC_PARAM] = "SyncParam", - [P_SYNC_PARAM89] = "SyncParam89", - [P_PROTOCOL] = "ReportProtocol", - [P_UUIDS] = "ReportUUIDs", - [P_SIZES] = "ReportSizes", - [P_STATE] = "ReportState", - [P_SYNC_UUID] = "ReportSyncUUID", - [P_AUTH_CHALLENGE] = "AuthChallenge", - [P_AUTH_RESPONSE] = "AuthResponse", - [P_PING] = "Ping", - [P_PING_ACK] = "PingAck", - [P_RECV_ACK] = "RecvAck", - [P_WRITE_ACK] = "WriteAck", - [P_RS_WRITE_ACK] = "RSWriteAck", - [P_DISCARD_ACK] = "DiscardAck", - [P_NEG_ACK] = "NegAck", - [P_NEG_DREPLY] = "NegDReply", - [P_NEG_RS_DREPLY] = "NegRSDReply", - [P_BARRIER_ACK] = "BarrierAck", - [P_STATE_CHG_REQ] = "StateChgRequest", - [P_STATE_CHG_REPLY] = "StateChgReply", - [P_OV_REQUEST] = "OVRequest", - [P_OV_REPLY] = "OVReply", - [P_OV_RESULT] = "OVResult", - [P_CSUM_RS_REQUEST] = "CsumRSRequest", - [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", - [P_COMPRESSED_BITMAP] = "CBitmap", - [P_DELAY_PROBE] = "DelayProbe", - [P_OUT_OF_SYNC] = "OutOfSync", - [P_MAX_CMD] = NULL, - }; - - if (cmd == P_HAND_SHAKE_M) - return "HandShakeM"; - if (cmd == P_HAND_SHAKE_S) - return "HandShakeS"; - if (cmd == P_HAND_SHAKE) - return "HandShake"; - if (cmd >= P_MAX_CMD) - return "Unknown"; - return cmdnames[cmd]; -} +extern const char *cmdname(enum drbd_packets cmd); /* for sending/receiving the bitmap, * possibly in some encoding scheme */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8349d42fa131..6090276ad9f4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -4101,6 +4101,65 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) return 1; } +const char *cmdname(enum drbd_packets cmd) +{ + /* THINK may need to become several global tables + * when we want to support more than + * one PRO_VERSION */ + static const char *cmdnames[] = { + [P_DATA] = "Data", + [P_DATA_REPLY] = "DataReply", + [P_RS_DATA_REPLY] = "RSDataReply", + [P_BARRIER] = "Barrier", + [P_BITMAP] = "ReportBitMap", + [P_BECOME_SYNC_TARGET] = "BecomeSyncTarget", + [P_BECOME_SYNC_SOURCE] = "BecomeSyncSource", + [P_UNPLUG_REMOTE] = "UnplugRemote", + [P_DATA_REQUEST] = "DataRequest", + [P_RS_DATA_REQUEST] = "RSDataRequest", + [P_SYNC_PARAM] = "SyncParam", + [P_SYNC_PARAM89] = "SyncParam89", + [P_PROTOCOL] = "ReportProtocol", + [P_UUIDS] = "ReportUUIDs", + [P_SIZES] = "ReportSizes", + [P_STATE] = "ReportState", + [P_SYNC_UUID] = "ReportSyncUUID", + [P_AUTH_CHALLENGE] = "AuthChallenge", + [P_AUTH_RESPONSE] = "AuthResponse", + [P_PING] = "Ping", + [P_PING_ACK] = "PingAck", + [P_RECV_ACK] = "RecvAck", + [P_WRITE_ACK] = "WriteAck", + [P_RS_WRITE_ACK] = "RSWriteAck", + [P_DISCARD_ACK] = "DiscardAck", + [P_NEG_ACK] = "NegAck", + [P_NEG_DREPLY] = "NegDReply", + [P_NEG_RS_DREPLY] = "NegRSDReply", + [P_BARRIER_ACK] = "BarrierAck", + [P_STATE_CHG_REQ] = "StateChgRequest", + [P_STATE_CHG_REPLY] = "StateChgReply", + [P_OV_REQUEST] = "OVRequest", + [P_OV_REPLY] = "OVReply", + [P_OV_RESULT] = "OVResult", + [P_CSUM_RS_REQUEST] = "CsumRSRequest", + [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", + [P_COMPRESSED_BITMAP] = "CBitmap", + [P_DELAY_PROBE] = "DelayProbe", + [P_OUT_OF_SYNC] = "OutOfSync", + [P_MAX_CMD] = NULL, + }; + + if (cmd == P_HAND_SHAKE_M) + return "HandShakeM"; + if (cmd == P_HAND_SHAKE_S) + return "HandShakeS"; + if (cmd == P_HAND_SHAKE) + return "HandShake"; + if (cmd >= P_MAX_CMD) + return "Unknown"; + return cmdnames[cmd]; +} + #ifdef CONFIG_DRBD_FAULT_INJECTION /* Fault insertion support including random number generator shamelessly * stolen from kernel/rcutorture.c */ From d87630230616ba2c13141184258906d34c727b4b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 17:39:41 +0100 Subject: [PATCH 0043/5831] drbd: Rename "enum drbd_packets" to "enum drbd_packet" Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 40 ++++++------ drivers/block/drbd/drbd_main.c | 43 ++++++------- drivers/block/drbd/drbd_receiver.c | 97 ++++++++++++++++++------------ 3 files changed, 97 insertions(+), 83 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e8a1fa556956..9f5c13513d63 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -159,7 +159,7 @@ extern struct drbd_conf **minor_table; extern struct ratelimit_state drbd_ratelimit_state; /* on the wire */ -enum drbd_packets { +enum drbd_packet { /* receiver (data socket) */ P_DATA = 0x00, P_DATA_REPLY = 0x01, /* Response to P_DATA_REQUEST */ @@ -221,7 +221,7 @@ enum drbd_packets { P_HAND_SHAKE = 0xfffe /* FIXED for the next century! */ }; -extern const char *cmdname(enum drbd_packets cmd); +extern const char *cmdname(enum drbd_packet cmd); /* for sending/receiving the bitmap, * possibly in some encoding scheme */ @@ -1189,36 +1189,34 @@ extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_f extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header *h, - size_t size, unsigned msg_flags); + enum drbd_packet cmd, struct p_header *h, + size_t size, unsigned msg_flags); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packets cmd, struct p_header *h, - size_t size); -extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, - char *data, size_t size); + enum drbd_packet cmd, struct p_header *h, size_t size); +extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packet cmd, + char *data, size_t size); extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); -extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, - struct drbd_epoch_entry *e); -extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, - struct p_block_req *rp); -extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, - struct p_data *dp, int data_size); -extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, +extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, + struct drbd_epoch_entry *e); +extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, + struct p_block_req *rp); +extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, + struct p_data *dp, int data_size); +extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, sector_t sector, int blksize, u64 block_id); extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req); -extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, +extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, struct drbd_epoch_entry *e); extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, sector_t sector, int size, u64 block_id); -extern int drbd_send_drequest_csum(struct drbd_conf *mdev, - sector_t sector,int size, - void *digest, int digest_size, - enum drbd_packets cmd); +extern int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, + int size, void *digest, int digest_size, + enum drbd_packet cmd); extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size); extern int drbd_send_bitmap(struct drbd_conf *mdev); @@ -1961,7 +1959,7 @@ static inline void request_ping(struct drbd_conf *mdev) } static inline int drbd_send_short_cmd(struct drbd_conf *mdev, - enum drbd_packets cmd) + enum drbd_packet cmd) { struct p_header h; return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6090276ad9f4..81bd1f3b1353 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1821,7 +1821,7 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) #endif static void prepare_header80(struct drbd_conf *mdev, struct p_header80 *h, - enum drbd_packets cmd, int size) + enum drbd_packet cmd, int size) { h->magic = cpu_to_be32(DRBD_MAGIC); h->command = cpu_to_be16(cmd); @@ -1829,7 +1829,7 @@ static void prepare_header80(struct drbd_conf *mdev, struct p_header80 *h, } static void prepare_header95(struct drbd_conf *mdev, struct p_header95 *h, - enum drbd_packets cmd, int size) + enum drbd_packet cmd, int size) { h->magic = cpu_to_be16(DRBD_MAGIC_BIG); h->command = cpu_to_be16(cmd); @@ -1837,7 +1837,7 @@ static void prepare_header95(struct drbd_conf *mdev, struct p_header95 *h, } static void prepare_header(struct drbd_conf *mdev, struct p_header *h, - enum drbd_packets cmd, int size) + enum drbd_packet cmd, int size) { if (mdev->tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET) prepare_header95(mdev, &h->h95, cmd, size); @@ -1847,8 +1847,8 @@ static void prepare_header(struct drbd_conf *mdev, struct p_header *h, /* the appropriate socket mutex must be held already */ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header *h, - size_t size, unsigned msg_flags) + enum drbd_packet cmd, struct p_header *h, size_t size, + unsigned msg_flags) { int sent, ok; @@ -1872,7 +1872,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, * when we hold the appropriate socket mutex. */ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packets cmd, struct p_header *h, size_t size) + enum drbd_packet cmd, struct p_header *h, size_t size) { int ok = 0; struct socket *sock; @@ -1897,7 +1897,7 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, return ok; } -int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, +int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packet cmd, char *data, size_t size) { struct p_header h; @@ -1938,7 +1938,8 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) sock = mdev->tconn->data.socket; if (likely(sock != NULL)) { - enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; + enum drbd_packet cmd = + apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; p = &mdev->tconn->data.sbuf.rs_param_95; @@ -2391,10 +2392,8 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) * @blksize: size in byte, needs to be in big endian byte order * @block_id: Id, big endian byte order */ -static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, - u64 sector, - u32 blksize, - u64 block_id) +static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, + u64 sector, u32 blksize, u64 block_id) { int ok; struct p_block_ack p; @@ -2413,7 +2412,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, /* dp->sector and dp->block_id already/still in network byte order, * data_size is payload size according to dp->head, * and may need to be corrected for digest size. */ -int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, +int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, struct p_data *dp, int data_size) { data_size -= (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ? @@ -2422,7 +2421,7 @@ int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, dp->block_id); } -int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, +int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, struct p_block_req *rp) { return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id); @@ -2434,8 +2433,8 @@ int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, * @cmd: Packet command code. * @e: Epoch entry. */ -int drbd_send_ack(struct drbd_conf *mdev, - enum drbd_packets cmd, struct drbd_epoch_entry *e) +int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, + struct drbd_epoch_entry *e) { return _drbd_send_ack(mdev, cmd, cpu_to_be64(e->i.sector), @@ -2445,7 +2444,7 @@ int drbd_send_ack(struct drbd_conf *mdev, /* This function misuses the block_id field to signal if the blocks * are is sync or not. */ -int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, +int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, sector_t sector, int blksize, u64 block_id) { return _drbd_send_ack(mdev, cmd, @@ -2468,10 +2467,8 @@ int drbd_send_drequest(struct drbd_conf *mdev, int cmd, return ok; } -int drbd_send_drequest_csum(struct drbd_conf *mdev, - sector_t sector, int size, - void *digest, int digest_size, - enum drbd_packets cmd) +int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size, + void *digest, int digest_size, enum drbd_packet cmd) { int ok; struct p_block_req p; @@ -2742,7 +2739,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY) * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY) */ -int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, +int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, struct drbd_epoch_entry *e) { int ok; @@ -4101,7 +4098,7 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) return 1; } -const char *cmdname(enum drbd_packets cmd) +const char *cmdname(enum drbd_packet cmd) { /* THINK may need to become several global tables * when we want to support more than diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c0435c4f5d84..31f6875ceba5 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -697,15 +697,16 @@ out: return s_estab; } -static int drbd_send_fp(struct drbd_conf *mdev, - struct socket *sock, enum drbd_packets cmd) +static int drbd_send_fp(struct drbd_conf *mdev, struct socket *sock, + enum drbd_packet cmd) { struct p_header *h = &mdev->tconn->data.sbuf.header; return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); } -static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) +static enum drbd_packet drbd_recv_fp(struct drbd_conf *mdev, + struct socket *sock) { struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; int rr; @@ -926,8 +927,8 @@ out_release_sockets: return -1; } -static bool decode_header(struct drbd_conf *mdev, struct p_header *h, enum drbd_packets *cmd, - unsigned int *packet_size) +static bool decode_header(struct drbd_conf *mdev, struct p_header *h, + enum drbd_packet *cmd, unsigned int *packet_size) { if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { *cmd = be16_to_cpu(h->h80.command); @@ -945,7 +946,8 @@ static bool decode_header(struct drbd_conf *mdev, struct p_header *h, enum drbd_ return true; } -static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size) +static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packet *cmd, + unsigned int *packet_size) { struct p_header *h = &mdev->tconn->data.rbuf.header; int r; @@ -1170,7 +1172,8 @@ fail: return err; } -static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { int rv; struct p_barrier *p = &mdev->tconn->data.rbuf.barrier; @@ -1499,7 +1502,8 @@ find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id, return NULL; } -static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct drbd_request *req; sector_t sector; @@ -1528,7 +1532,8 @@ static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsi return ok; } -static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { sector_t sector; int ok; @@ -1681,7 +1686,8 @@ static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf) } /* mirrored write */ -static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { sector_t sector; struct drbd_epoch_entry *e; @@ -1966,7 +1972,8 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector) } -static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int digest_size) +static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int digest_size) { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); @@ -2691,7 +2698,8 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) return 1; } -static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_protocol *p = &mdev->tconn->data.rbuf.protocol; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; @@ -2790,7 +2798,8 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, return tfm; } -static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size) +static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int packet_size) { int ok = true; struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95; @@ -2954,7 +2963,8 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, (unsigned long long)a, (unsigned long long)b); } -static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_sizes *p = &mdev->tconn->data.rbuf.sizes; enum determine_dev_size dd = unchanged; @@ -3057,7 +3067,8 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned return true; } -static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_uuids *p = &mdev->tconn->data.rbuf.uuids; u64 *p_uuid; @@ -3151,7 +3162,8 @@ static union drbd_state convert_state(union drbd_state ps) return ms; } -static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_req_state *p = &mdev->tconn->data.rbuf.req_state; union drbd_state mask, val; @@ -3177,7 +3189,8 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi return true; } -static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_state *p = &mdev->tconn->data.rbuf.state; union drbd_state os, ns, peer_state; @@ -3329,7 +3342,8 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned return true; } -static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid; @@ -3525,7 +3539,8 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, in order to be agnostic to the 32 vs 64 bits issue. returns 0 on failure, 1 if we successfully received it. */ -static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct bm_xfer_ctx c; void *buffer; @@ -3616,7 +3631,8 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne return ok; } -static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { /* TODO zero copy sink :) */ static char sink[128]; @@ -3636,7 +3652,8 @@ static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned return size == 0; } -static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { /* Make sure we've acked all the TCP data associated * with the data requests being unplugged */ @@ -3645,7 +3662,8 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, u return true; } -static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) +static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd, + unsigned int data_size) { struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc; @@ -3664,7 +3682,8 @@ static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, un return true; } -typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive); +typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd, + unsigned int to_receive); struct data_cmd { int expect_payload; @@ -3711,7 +3730,7 @@ static void drbdd(struct drbd_conf *mdev) { struct p_header *header = &mdev->tconn->data.rbuf.header; unsigned int packet_size; - enum drbd_packets cmd; + enum drbd_packet cmd; size_t shs; /* sub header size */ int rv; @@ -3938,7 +3957,7 @@ static int drbd_do_handshake(struct drbd_conf *mdev) struct p_handshake *p = &mdev->tconn->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); unsigned int length; - enum drbd_packets cmd; + enum drbd_packet cmd; int rv; rv = drbd_send_handshake(mdev); @@ -4019,7 +4038,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) unsigned int key_len = strlen(mdev->tconn->net_conf->shared_secret); unsigned int resp_size; struct hash_desc desc; - enum drbd_packets cmd; + enum drbd_packet cmd; unsigned int length; int rv; @@ -4191,7 +4210,7 @@ int drbdd_init(struct drbd_thread *thi) /* ********* acknowledge sender ******** */ -static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply; @@ -4209,13 +4228,13 @@ static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packets cmd) return true; } -static int got_Ping(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd) { return drbd_send_ping_ack(mdev); } -static int got_PingAck(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd) { /* restore idle timeout */ mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; @@ -4225,7 +4244,7 @@ static int got_PingAck(struct drbd_conf *mdev, enum drbd_packets cmd) return true; } -static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); @@ -4270,7 +4289,7 @@ validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector, return true; } -static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); @@ -4311,7 +4330,7 @@ static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packets cmd) what, false); } -static int got_NegAck(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); @@ -4344,7 +4363,7 @@ static int got_NegAck(struct drbd_conf *mdev, enum drbd_packets cmd) return true; } -static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; sector_t sector = be64_to_cpu(p->sector); @@ -4358,7 +4377,7 @@ static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packets cmd) NEG_ACKED, false); } -static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd) { sector_t sector; int size; @@ -4389,7 +4408,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packets cmd) return true; } -static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack; @@ -4405,7 +4424,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packets cmd) return true; } -static int got_OVResult(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack; struct drbd_work *w; @@ -4449,14 +4468,14 @@ static int got_OVResult(struct drbd_conf *mdev, enum drbd_packets cmd) return true; } -static int got_skip(struct drbd_conf *mdev, enum drbd_packets cmd) +static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd) { return true; } struct asender_cmd { size_t pkt_size; - int (*process)(struct drbd_conf *mdev, enum drbd_packets cmd); + int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd); }; static struct asender_cmd *get_asender_cmd(int cmd) @@ -4499,7 +4518,7 @@ int drbd_asender(struct drbd_thread *thi) int expect = sizeof(struct p_header); int ping_timeout_active = 0; int empty, pkt_size; - enum drbd_packets cmd_nr; + enum drbd_packet cmd_nr; sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); From bdc7adb006c6213190eedb8567962ff3d41d226d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 17:49:00 +0100 Subject: [PATCH 0044/5831] drbd: Remove redundant initialization packet_seq is initialized by both sides of a connection in drbd_connect(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 81bd1f3b1353..b86ef59b5219 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2953,7 +2953,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->rs_pending_cnt, 0); atomic_set(&mdev->unacked_cnt, 0); atomic_set(&mdev->local_cnt, 0); - atomic_set(&mdev->packet_seq, 0); atomic_set(&mdev->pp_in_use, 0); atomic_set(&mdev->pp_in_use_by_net, 0); atomic_set(&mdev->rs_sect_in, 0); From cc378270e4abb9273ce1641d30bf6e84248f7b2e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 18:01:50 +0100 Subject: [PATCH 0045/5831] drbd: Initialize the sequence number sent over the network even when not used Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b86ef59b5219..701f231cf4bf 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2753,7 +2753,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); p.sector = cpu_to_be64(e->i.sector); p.block_id = e->block_id; - /* p.seq_num = 0; No sequence numbers here.. */ + p.seq_num = 0; /* unused */ /* Only called by our kernel thread. * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL From 3e394da184ab32d2c345fd459e1eeb7b9586bb4e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 18:36:55 +0100 Subject: [PATCH 0046/5831] drbd: Move sequence number logic into drbd_receiver.c and simplify it These things are only used there. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 27 --------------------------- drivers/block/drbd/drbd_receiver.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9f5c13513d63..cb45ca10d4b1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2355,33 +2355,6 @@ static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) return changed; } -static inline int seq_cmp(u32 a, u32 b) -{ - /* we assume wrap around at 32bit. - * for wrap around at 24bit (old atomic_t), - * we'd have to - * a <<= 8; b <<= 8; - */ - return (s32)(a) - (s32)(b); -} -#define seq_lt(a, b) (seq_cmp((a), (b)) < 0) -#define seq_gt(a, b) (seq_cmp((a), (b)) > 0) -#define seq_ge(a, b) (seq_cmp((a), (b)) >= 0) -#define seq_le(a, b) (seq_cmp((a), (b)) <= 0) -/* CAUTION: please no side effects in arguments! */ -#define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b))) - -static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq) -{ - unsigned int m; - spin_lock(&mdev->peer_seq_lock); - m = seq_max(mdev->peer_seq, new_seq); - mdev->peer_seq = m; - spin_unlock(&mdev->peer_seq_lock); - if (m == new_seq) - wake_up(&mdev->seq_wait); -} - static inline void drbd_update_congested(struct drbd_conf *mdev) { struct sock *sk = mdev->tconn->data.socket->sk; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 31f6875ceba5..b4e1dab62dc1 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1621,6 +1621,33 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u return ok; } +static bool seq_greater(u32 a, u32 b) +{ + /* + * We assume 32-bit wrap-around here. + * For 24-bit wrap-around, we would have to shift: + * a <<= 8; b <<= 8; + */ + return (s32)a - (s32)b > 0; +} + +static u32 seq_max(u32 a, u32 b) +{ + return seq_greater(a, b) ? a : b; +} + +static void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq) +{ + unsigned int m; + + spin_lock(&mdev->peer_seq_lock); + m = seq_max(mdev->peer_seq, new_seq); + mdev->peer_seq = m; + spin_unlock(&mdev->peer_seq_lock); + if (m == new_seq) + wake_up(&mdev->seq_wait); +} + /* Called from receive_Data. * Synchronize packets on sock with packets on msock. * @@ -1651,7 +1678,7 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) spin_lock(&mdev->peer_seq_lock); for (;;) { prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE); - if (seq_le(packet_seq, mdev->peer_seq+1)) + if (!seq_greater(packet_seq, mdev->peer_seq + 1)) break; if (signal_pending(current)) { ret = -ERESTARTSYS; From 9e204cddaf76d19ce0e84f025b0946110694dbfb Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Jan 2011 18:45:11 +0100 Subject: [PATCH 0047/5831] drbd: Move some functions to where they are used Move drbd_update_congested() to drbd_main.c, and drbd_req_new() and drbd_req_free() to drbd_req.c: those functions are not used anywhere else. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 7 ------- drivers/block/drbd/drbd_main.c | 7 +++++++ drivers/block/drbd/drbd_req.c | 29 +++++++++++++++++++++++++++++ drivers/block/drbd/drbd_req.h | 26 -------------------------- 4 files changed, 36 insertions(+), 33 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index cb45ca10d4b1..7922fa0403d0 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2355,13 +2355,6 @@ static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val) return changed; } -static inline void drbd_update_congested(struct drbd_conf *mdev) -{ - struct sock *sk = mdev->tconn->data.socket->sk; - if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) - set_bit(NET_CONGESTED, &mdev->flags); -} - static inline int drbd_queue_order_type(struct drbd_conf *mdev) { /* sorry, we currently have no working implementation diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 701f231cf4bf..5da1df023a49 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2528,6 +2528,13 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * return drop_it; /* && (mdev->state == R_PRIMARY) */; } +static void drbd_update_congested(struct drbd_conf *mdev) +{ + struct sock *sk = mdev->tconn->data.socket->sk; + if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) + set_bit(NET_CONGESTED, &mdev->flags); +} + /* The idea of sendpage seems to be to put some kind of reference * to the page into the skb, and to hand it over to the NIC. In * this process get_page() gets called. diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 74179f7986e1..25fa87c95a10 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -56,6 +56,35 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req) part_stat_unlock(); } +static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, + struct bio *bio_src) +{ + struct drbd_request *req; + + req = mempool_alloc(drbd_request_mempool, GFP_NOIO); + if (!req) + return NULL; + + drbd_req_make_private_bio(req, bio_src); + req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0; + req->mdev = mdev; + req->master_bio = bio_src; + req->epoch = 0; + drbd_clear_interval(&req->i); + req->i.sector = bio_src->bi_sector; + req->i.size = bio_src->bi_size; + INIT_LIST_HEAD(&req->tl_requests); + INIT_LIST_HEAD(&req->w.list); + + return req; +} + +static void drbd_req_free(struct drbd_request *req) +{ + mempool_free(req, drbd_request_mempool); +} + +/* rw is bio_data_dir(), only READ or WRITE */ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) { const unsigned long s = req->rq_state; diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 4b0858bf2866..431e3f962c3a 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -234,32 +234,6 @@ static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bi bio->bi_next = NULL; } -static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, - struct bio *bio_src) -{ - struct drbd_request *req = - mempool_alloc(drbd_request_mempool, GFP_NOIO); - if (likely(req)) { - drbd_req_make_private_bio(req, bio_src); - - req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0; - req->mdev = mdev; - req->master_bio = bio_src; - req->epoch = 0; - req->i.sector = bio_src->bi_sector; - req->i.size = bio_src->bi_size; - drbd_clear_interval(&req->i); - INIT_LIST_HEAD(&req->tl_requests); - INIT_LIST_HEAD(&req->w.list); - } - return req; -} - -static inline void drbd_req_free(struct drbd_request *req) -{ - mempool_free(req, drbd_request_mempool); -} - /* Short lived temporary struct on the stack. * We could squirrel the error to be returned into * bio->bi_size, or similar. But that would be too ugly. */ From a500c2efbbb3a57f83e18382e927b18513aca4cd Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 27 Jan 2011 14:12:23 +0100 Subject: [PATCH 0048/5831] drbd: struct drbd_request: Introduce a new collision flag This flag is set when a processes puts itself to sleep to wait for a conflicting request to complete. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 7 +++-- drivers/block/drbd/drbd_req.c | 42 +++--------------------------- drivers/block/drbd/drbd_req.h | 7 +++++ 3 files changed, 15 insertions(+), 41 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b4e1dab62dc1..d9f3f7fd9bb2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1815,6 +1815,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, first = 1; for (;;) { struct drbd_interval *i; + struct drbd_request *req2; int have_unacked = 0; int have_conflict = 0; prepare_to_wait(&mdev->misc_wait, &wait, @@ -1822,8 +1823,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, i = drbd_find_overlap(&mdev->write_requests, sector, size); if (i) { - struct drbd_request *req2 = - container_of(i, struct drbd_request, i); + req2 = container_of(i, struct drbd_request, i); /* only ALERT on first iteration, * we may be woken up early... */ @@ -1869,6 +1869,9 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, goto out_interrupted; } + /* Indicate to wake up mdev->misc_wait upon completion. */ + req2->rq_state |= RQ_COLLISION; + spin_unlock_irq(&mdev->tconn->req_lock); if (first) { first = 0; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 25fa87c95a10..8b4ba94538bd 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -176,45 +176,9 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, req->epoch == mdev->tconn->newest_tle->br_number) queue_barrier(mdev); - /* we need to do the conflict detection stuff, - * if the epoch_entries tree is non-empty and - * this request has completed on the network */ - if ((s & RQ_NET_DONE) && !RB_EMPTY_ROOT(&mdev->epoch_entries)) { - const sector_t sector = req->i.sector; - const int size = req->i.size; - struct drbd_interval *i; - - /* ASSERT: - * there must be no conflicting requests, since - * they must have been failed on the spot */ - - i = drbd_find_overlap(&mdev->write_requests, sector, size); - if (i) { - struct drbd_request *req2 = - container_of(i, struct drbd_request, i); - - dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; " - "other: %p %llus +%u\n", - req, (unsigned long long)sector, size, - i, (unsigned long long)req2->i.sector, req2->i.size); - } - - /* maybe "wake" those conflicting epoch entries - * that wait for this request to finish. - * - * currently, there can be only _one_ such ee - * (well, or some more, which would be pending - * P_DISCARD_ACK not yet sent by the asender...), - * since we block the receiver thread upon the - * first conflict detection, which will wait on - * misc_wait. maybe we want to assert that? - * - * anyways, if we found one, - * we just have to do a wake_up. */ - i = drbd_find_overlap(&mdev->epoch_entries, sector, size); - if (i) - wake_up(&mdev->misc_wait); - } + /* Wake up any processes waiting for this request to complete. */ + if ((s & RQ_NET_DONE) && (s & RQ_COLLISION)) + wake_up(&mdev->misc_wait); } void complete_master_bio(struct drbd_conf *mdev, diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 431e3f962c3a..7a7464a2b3a6 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -194,6 +194,12 @@ enum drbd_req_state_bits { /* Should call drbd_al_complete_io() for this request... */ __RQ_IN_ACT_LOG, + + /* + * Set when a processes puts itself to sleep to wait for this request + * to complete. + */ + __RQ_COLLISION, }; #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) @@ -214,6 +220,7 @@ enum drbd_req_state_bits { #define RQ_WRITE (1UL << __RQ_WRITE) #define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) +#define RQ_COLLISION (1UL << __RQ_COLLISION) /* For waking up the frozen transfer log mod_req() has to return if the request should be counted in the epoch object*/ From 3e05146f0a9f28ef5959403eabf3239869476315 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 27 Jan 2011 16:20:57 +0100 Subject: [PATCH 0049/5831] drbd: Remove redundant check from drbd_contains_interval() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_interval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index b77a9bda03d4..0d17eaa89a69 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -99,7 +99,7 @@ drbd_contains_interval(struct rb_root *root, sector_t sector, else if (interval > here) node = node->rb_right; else - return interval->sector == sector; + return true; } return false; } From 53840641bb1feff8c08acdba9de4c0f8b8674df5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 28 Jan 2011 10:31:04 +0100 Subject: [PATCH 0050/5831] drbd: Allow to wait for the completion of an epoch entry as well Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_interval.h | 1 + drivers/block/drbd/drbd_receiver.c | 35 +++++++++++++++++++----------- drivers/block/drbd/drbd_req.c | 23 +++++++++++++++----- drivers/block/drbd/drbd_req.h | 7 ------ 4 files changed, 41 insertions(+), 25 deletions(-) diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index a847b4a07b25..9d1e5eb2d7eb 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -9,6 +9,7 @@ struct drbd_interval { sector_t sector; /* start sector of the interval */ unsigned int size; /* size in bytes */ sector_t end; /* highest interval end in subtree */ + int waiting:1; }; static inline void drbd_clear_interval(struct drbd_interval *i) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d9f3f7fd9bb2..b84a9c9fd3f8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -334,13 +334,15 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, goto fail; drbd_clear_interval(&e->i); + e->i.size = data_size; + e->i.sector = sector; + e->i.waiting = false; + e->epoch = NULL; e->mdev = mdev; e->pages = page; atomic_set(&e->pending_bios, 0); - e->i.size = data_size; e->flags = 0; - e->i.sector = sector; /* * The block_id is opaque to the receiver. It is not endianness * converted, and sent back to the sender unchanged. @@ -1172,6 +1174,19 @@ fail: return err; } +static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev, + struct drbd_epoch_entry *e) +{ + struct drbd_interval *i = &e->i; + + drbd_remove_interval(&mdev->write_requests, i); + drbd_clear_interval(i); + + /* Wake up any processes waiting for this epoch entry to complete. */ + if (i->waiting) + wake_up(&mdev->misc_wait); +} + static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { @@ -1591,8 +1606,7 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (mdev->tconn->net_conf->two_primaries) { spin_lock_irq(&mdev->tconn->req_lock); D_ASSERT(!drbd_interval_empty(&e->i)); - drbd_remove_interval(&mdev->epoch_entries, &e->i); - drbd_clear_interval(&e->i); + drbd_remove_epoch_entry_interval(mdev, e); spin_unlock_irq(&mdev->tconn->req_lock); } else D_ASSERT(drbd_interval_empty(&e->i)); @@ -1612,8 +1626,7 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u spin_lock_irq(&mdev->tconn->req_lock); D_ASSERT(!drbd_interval_empty(&e->i)); - drbd_remove_interval(&mdev->epoch_entries, &e->i); - drbd_clear_interval(&e->i); + drbd_remove_epoch_entry_interval(mdev, e); spin_unlock_irq(&mdev->tconn->req_lock); dec_unacked(mdev); @@ -1860,17 +1873,14 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, } if (signal_pending(current)) { - drbd_remove_interval(&mdev->epoch_entries, &e->i); - drbd_clear_interval(&e->i); - + drbd_remove_epoch_entry_interval(mdev, e); spin_unlock_irq(&mdev->tconn->req_lock); - finish_wait(&mdev->misc_wait, &wait); goto out_interrupted; } /* Indicate to wake up mdev->misc_wait upon completion. */ - req2->rq_state |= RQ_COLLISION; + i->waiting = true; spin_unlock_irq(&mdev->tconn->req_lock); if (first) { @@ -1922,8 +1932,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, dev_err(DEV, "submit failed, triggering re-connect\n"); spin_lock_irq(&mdev->tconn->req_lock); list_del(&e->w.list); - drbd_remove_interval(&mdev->epoch_entries, &e->i); - drbd_clear_interval(&e->i); + drbd_remove_epoch_entry_interval(mdev, e); spin_unlock_irq(&mdev->tconn->req_lock); if (e->flags & EE_CALL_AL_COMPLETE_IO) drbd_al_complete_io(mdev, e->i.sector); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 8b4ba94538bd..078f77ba68fb 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -70,9 +70,12 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, req->mdev = mdev; req->master_bio = bio_src; req->epoch = 0; + drbd_clear_interval(&req->i); req->i.sector = bio_src->bi_sector; req->i.size = bio_src->bi_size; + req->i.waiting = false; + INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); @@ -175,10 +178,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, (s & RQ_NET_SENT) != 0 && req->epoch == mdev->tconn->newest_tle->br_number) queue_barrier(mdev); - - /* Wake up any processes waiting for this request to complete. */ - if ((s & RQ_NET_DONE) && (s & RQ_COLLISION)) - wake_up(&mdev->misc_wait); } void complete_master_bio(struct drbd_conf *mdev, @@ -188,6 +187,20 @@ void complete_master_bio(struct drbd_conf *mdev, dec_ap_bio(mdev); } + +static void drbd_remove_request_interval(struct rb_root *root, + struct drbd_request *req) +{ + struct drbd_conf *mdev = req->mdev; + struct drbd_interval *i = &req->i; + + drbd_remove_interval(root, i); + + /* Wake up any processes waiting for this request to complete. */ + if (i->waiting) + wake_up(&mdev->misc_wait); +} + /* Helper for __req_mod(). * Set m->bio to the master bio, if it is fit to be completed, * or leave it alone (it is initialized to NULL in __req_mod), @@ -251,7 +264,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) root = &mdev->write_requests; else root = &mdev->read_requests; - drbd_remove_interval(root, &req->i); + drbd_remove_request_interval(root, req); } else D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 7a7464a2b3a6..431e3f962c3a 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -194,12 +194,6 @@ enum drbd_req_state_bits { /* Should call drbd_al_complete_io() for this request... */ __RQ_IN_ACT_LOG, - - /* - * Set when a processes puts itself to sleep to wait for this request - * to complete. - */ - __RQ_COLLISION, }; #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) @@ -220,7 +214,6 @@ enum drbd_req_state_bits { #define RQ_WRITE (1UL << __RQ_WRITE) #define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) -#define RQ_COLLISION (1UL << __RQ_COLLISION) /* For waking up the frozen transfer log mod_req() has to return if the request should be counted in the epoch object*/ From 5e4722645afb27ee749ea65988544450f08f78ba Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 27 Jan 2011 14:42:51 +0100 Subject: [PATCH 0051/5831] drbd: _req_conflicts(): Get rid of the epoch_entries tree Instead of keeping a separate tree for local and remote write requests for finding requests and for conflict detection, use the same tree for both purposes. Introduce a flag to allow distinguishing the two possible types of entries in this tree. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 --- drivers/block/drbd/drbd_interval.h | 1 + drivers/block/drbd/drbd_main.c | 1 - drivers/block/drbd/drbd_receiver.c | 33 +++++++++++++++--------------- drivers/block/drbd/drbd_req.c | 28 ++++--------------------- drivers/block/drbd/drbd_worker.c | 2 +- 6 files changed, 22 insertions(+), 46 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7922fa0403d0..7fcda713714f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1045,9 +1045,6 @@ struct drbd_conf { struct list_head read_ee; /* IO in progress (any read) */ struct list_head net_ee; /* zero-copy network send in progress */ - /* Interval tree of pending remote write requests (struct drbd_epoch_entry) */ - struct rb_root epoch_entries; - /* this one is protected by ee_lock, single thread */ struct drbd_epoch_entry *last_write_w_barrier; diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index 9d1e5eb2d7eb..4010ad923948 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -9,6 +9,7 @@ struct drbd_interval { sector_t sector; /* start sector of the interval */ unsigned int size; /* size in bytes */ sector_t end; /* highest interval end in subtree */ + int local:1 /* local or remote request? */; int waiting:1; }; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5da1df023a49..8c77476825e4 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3450,7 +3450,6 @@ struct drbd_conf *drbd_new_device(unsigned int minor) goto out_no_tl; mdev->read_requests = RB_ROOT; mdev->write_requests = RB_ROOT; - mdev->epoch_entries = RB_ROOT; mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL); if (!mdev->current_epoch) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b84a9c9fd3f8..b063ca234462 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -336,6 +336,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, drbd_clear_interval(&e->i); e->i.size = data_size; e->i.sector = sector; + e->i.local = false; e->i.waiting = false; e->epoch = NULL; @@ -1508,7 +1509,7 @@ find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id, /* Request object according to our peer */ req = (struct drbd_request *)(unsigned long)id; - if (drbd_contains_interval(root, sector, &req->i)) + if (drbd_contains_interval(root, sector, &req->i) && req->i.local) return req; if (!missing_ok) { dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func, @@ -1788,17 +1789,12 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, /* conflict detection and handling: * 1. wait on the sequence number, * in case this data packet overtook ACK packets. - * 2. check our interval trees for conflicting requests: - * we only need to check the write_requests tree; the - * epoch_entries tree cannot contain any overlaps because - * they were already eliminated on the submitting node. + * 2. check for conflicting write requests. * * Note: for two_primaries, we are protocol C, * so there cannot be any request that is DONE * but still on the transfer log. * - * unconditionally add to the epoch_entries tree. - * * if no conflicting request is found: * submit. * @@ -1823,12 +1819,9 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, spin_lock_irq(&mdev->tconn->req_lock); - drbd_insert_interval(&mdev->epoch_entries, &e->i); - first = 1; for (;;) { struct drbd_interval *i; - struct drbd_request *req2; int have_unacked = 0; int have_conflict = 0; prepare_to_wait(&mdev->misc_wait, &wait, @@ -1836,18 +1829,23 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, i = drbd_find_overlap(&mdev->write_requests, sector, size); if (i) { - req2 = container_of(i, struct drbd_request, i); - /* only ALERT on first iteration, * we may be woken up early... */ if (first) - dev_alert(DEV, "%s[%u] Concurrent local write detected!" + dev_alert(DEV, "%s[%u] Concurrent %s write detected!" " new: %llus +%u; pending: %llus +%u\n", current->comm, current->pid, + i->local ? "local" : "remote", (unsigned long long)sector, size, - (unsigned long long)req2->i.sector, req2->i.size); - if (req2->rq_state & RQ_NET_PENDING) - ++have_unacked; + (unsigned long long)i->sector, i->size); + + if (i->local) { + struct drbd_request *req2; + + req2 = container_of(i, struct drbd_request, i); + if (req2->rq_state & RQ_NET_PENDING) + ++have_unacked; + } ++have_conflict; } if (!have_conflict) @@ -1873,7 +1871,6 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, } if (signal_pending(current)) { - drbd_remove_epoch_entry_interval(mdev, e); spin_unlock_irq(&mdev->tconn->req_lock); finish_wait(&mdev->misc_wait, &wait); goto out_interrupted; @@ -1896,6 +1893,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, spin_lock_irq(&mdev->tconn->req_lock); } finish_wait(&mdev->misc_wait, &wait); + + drbd_insert_interval(&mdev->write_requests, &e->i); } list_add(&e->w.list, &mdev->active_ee); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 078f77ba68fb..df5f10627327 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -74,6 +74,7 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, drbd_clear_interval(&req->i); req->i.sector = bio_src->bi_sector; req->i.size = bio_src->bi_size; + req->i.local = true; req->i.waiting = false; INIT_LIST_HEAD(&req->tl_requests); @@ -317,8 +318,6 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e * to happen, but this is the rationale why we also have to check for * conflicting requests with local origin, and why we have to do so regardless * of whether we allowed multiple primaries. - * - * In case we only have one primary, the epoch_entries tree is empty. */ static int _req_conflicts(struct drbd_request *req) { @@ -334,35 +333,16 @@ static int _req_conflicts(struct drbd_request *req) i = drbd_find_overlap(&mdev->write_requests, sector, size); if (i) { - struct drbd_request *req2 = - container_of(i, struct drbd_request, i); - - dev_alert(DEV, "%s[%u] Concurrent local write detected! " + dev_alert(DEV, "%s[%u] Concurrent %s write detected! " "[DISCARD L] new: %llus +%u; " "pending: %llus +%u\n", current->comm, current->pid, + i->local ? "local" : "remote", (unsigned long long)sector, size, - (unsigned long long)req2->i.sector, req2->i.size); + (unsigned long long)i->sector, i->size); goto out_conflict; } - if (!RB_EMPTY_ROOT(&mdev->epoch_entries)) { - /* check for overlapping requests with remote origin */ - i = drbd_find_overlap(&mdev->epoch_entries, sector, size); - if (i) { - struct drbd_epoch_entry *e = - container_of(i, struct drbd_epoch_entry, i); - - dev_alert(DEV, "%s[%u] Concurrent remote write detected!" - " [DISCARD L] new: %llus +%u; " - "pending: %llus +%u\n", - current->comm, current->pid, - (unsigned long long)sector, size, - (unsigned long long)e->i.sector, e->i.size); - goto out_conflict; - } - } - /* this is like it should be, and what we expected. * our users do behave after all... */ put_net_conf(mdev->tconn); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index afad8ea4d888..0359600f5635 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -123,7 +123,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo list_add_tail(&e->w.list, &mdev->done_ee); /* - * Do not remove from the epoch_entries tree here: we did not send the + * Do not remove from the write_requests tree here: we did not send the * Ack yet and did not wake possibly waiting conflicting requests. * Removed from the tree from "drbd_process_done_ee" within the * appropriate w.cb (e_end_block/e_end_resync_block) or from From ddd8877d3169ebda7272667fc3dc9768204a157f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 28 Jan 2011 14:24:05 +0100 Subject: [PATCH 0052/5831] drbd: Remove unnecessary reference counting left-over Nothing in this function accesses mdev->tconn->net_conf, so there is no need for get_net_conf() / put_net_conf() anymore. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index df5f10627327..e11ea475a4a3 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -328,9 +328,6 @@ static int _req_conflicts(struct drbd_request *req) D_ASSERT(drbd_interval_empty(&req->i)); - if (!get_net_conf(mdev->tconn)) - return 0; - i = drbd_find_overlap(&mdev->write_requests, sector, size); if (i) { dev_alert(DEV, "%s[%u] Concurrent %s write detected! " @@ -340,17 +337,9 @@ static int _req_conflicts(struct drbd_request *req) i->local ? "local" : "remote", (unsigned long long)sector, size, (unsigned long long)i->sector, i->size); - goto out_conflict; + return 1; } - - /* this is like it should be, and what we expected. - * our users do behave after all... */ - put_net_conf(mdev->tconn); return 0; - -out_conflict: - put_net_conf(mdev->tconn); - return 1; } /* obviously this could be coded as many single functions From 6024fece739518c4c101c767d527fd624b096a34 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 28 Jan 2011 15:53:51 +0100 Subject: [PATCH 0053/5831] drbd: Defer new writes when detecting conflicting writes Before submitting a new local write request, wait for any conflicting local or remote requests to complete. We could assume that the new request occurred first and that the conflicting requests overwrote it (and therefore discard the new reques), but we know for sure that the new request occurred after the conflicting requests and so this behavior would we weird. We would also end up with the wrong result if the new request is not fully contained within the conflicting requests. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 103 +++++++++++++--------------------- 1 file changed, 39 insertions(+), 64 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e11ea475a4a3..6bcf4171a76f 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -300,48 +300,6 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e _req_may_be_done(req, m); } -/* - * checks whether there was an overlapping request - * or ee already registered. - * - * if so, return 1, in which case this request is completed on the spot, - * without ever being submitted or send. - * - * return 0 if it is ok to submit this request. - * - * NOTE: - * paranoia: assume something above us is broken, and issues different write - * requests for the same block simultaneously... - * - * To ensure these won't be reordered differently on both nodes, resulting in - * diverging data sets, we discard the later one(s). Not that this is supposed - * to happen, but this is the rationale why we also have to check for - * conflicting requests with local origin, and why we have to do so regardless - * of whether we allowed multiple primaries. - */ -static int _req_conflicts(struct drbd_request *req) -{ - struct drbd_conf *mdev = req->mdev; - const sector_t sector = req->i.sector; - const int size = req->i.size; - struct drbd_interval *i; - - D_ASSERT(drbd_interval_empty(&req->i)); - - i = drbd_find_overlap(&mdev->write_requests, sector, size); - if (i) { - dev_alert(DEV, "%s[%u] Concurrent %s write detected! " - "[DISCARD L] new: %llus +%u; " - "pending: %llus +%u\n", - current->comm, current->pid, - i->local ? "local" : "remote", - (unsigned long long)sector, size, - (unsigned long long)i->sector, i->size); - return 1; - } - return 0; -} - /* obviously this could be coded as many single functions * instead of one huge switch, * or by putting the code directly in the respective locations @@ -721,6 +679,34 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); } +/* + * complete_conflicting_writes - wait for any conflicting write requests + * + * The write_requests tree contains all active write requests which we + * currently know about. Wait for any requests to complete which conflict with + * the new one. + */ +static int complete_conflicting_writes(struct drbd_conf *mdev, + sector_t sector, int size) +{ + for(;;) { + DEFINE_WAIT(wait); + struct drbd_interval *i; + + i = drbd_find_overlap(&mdev->write_requests, sector, size); + if (!i) + return 0; + i->waiting = true; + prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); + spin_unlock_irq(&mdev->tconn->req_lock); + schedule(); + finish_wait(&mdev->misc_wait, &wait); + spin_lock_irq(&mdev->tconn->req_lock); + if (signal_pending(current)) + return -ERESTARTSYS; + } +} + static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) { const int rw = bio_rw(bio); @@ -729,7 +715,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns struct drbd_tl_epoch *b = NULL; struct drbd_request *req; int local, remote, send_oos = 0; - int err = -EIO; + int err; int ret = 0; /* allocate outside of all locks; */ @@ -799,6 +785,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns if (!(local || remote) && !is_susp(mdev->state)) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); + err = -EIO; goto fail_free_complete; } @@ -823,6 +810,14 @@ allocate_barrier: /* GOOD, everything prepared, grab the spin_lock */ spin_lock_irq(&mdev->tconn->req_lock); + if (rw == WRITE) { + err = complete_conflicting_writes(mdev, sector, size); + if (err) { + spin_unlock_irq(&mdev->tconn->req_lock); + goto fail_free_complete; + } + } + if (is_susp(mdev->state)) { /* If we got suspended, use the retry mechanism of generic_make_request() to restart processing of this @@ -843,6 +838,7 @@ allocate_barrier: if (!(local || remote)) { dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); spin_unlock_irq(&mdev->tconn->req_lock); + err = -EIO; goto fail_free_complete; } } @@ -903,12 +899,6 @@ allocate_barrier: if (local) _req_mod(req, TO_BE_SUBMITTED); - /* check this request on the collision detection hash tables. - * if we have a conflict, just complete it here. - * THINK do we want to check reads, too? (I don't think so...) */ - if (rw == WRITE && _req_conflicts(req)) - goto fail_conflicting; - list_add_tail(&req->tl_requests, &mdev->tconn->newest_tle->requests); /* NOTE remote first: to get the concurrent write detection right, @@ -975,21 +965,6 @@ allocate_barrier: return 0; -fail_conflicting: - /* this is a conflicting request. - * even though it may have been only _partially_ - * overlapping with one of the currently pending requests, - * without even submitting or sending it, we will - * pretend that it was successfully served right now. - */ - _drbd_end_io_acct(mdev, req); - spin_unlock_irq(&mdev->tconn->req_lock); - if (remote) - dec_ap_pending(mdev); - /* THINK: do we want to fail it (-EIO), or pretend success? - * this pretends success. */ - err = 0; - fail_free_complete: if (req->rq_state & RQ_IN_ACT_LOG) drbd_al_complete_io(mdev, sector); From 43ae077d0a1e98dd13112646fe967565febf4fe7 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 3 Feb 2011 18:42:08 +0100 Subject: [PATCH 0054/5831] drbd: Make the peer_seq updating code more obvious Make it more clear that update_peer_seq() is supposed to wake up the seq_wait queue whenever the sequence number changes. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b063ca234462..bc5351df8073 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1650,15 +1650,15 @@ static u32 seq_max(u32 a, u32 b) return seq_greater(a, b) ? a : b; } -static void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq) +static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) { - unsigned int m; + unsigned int old_peer_seq; spin_lock(&mdev->peer_seq_lock); - m = seq_max(mdev->peer_seq, new_seq); - mdev->peer_seq = m; + old_peer_seq = mdev->peer_seq; + mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq); spin_unlock(&mdev->peer_seq_lock); - if (m == new_seq) + if (old_peer_seq != peer_seq) wake_up(&mdev->seq_wait); } From 70b1987663851f4431a2f43d8ccefb7b6ac73331 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Feb 2011 12:11:05 +0100 Subject: [PATCH 0055/5831] drbd: Improve the drbd_find_overlap() documentation Describe how to reach any further overlapping intervals from the first overlap found. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_interval.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index 0d17eaa89a69..14dbe2dd2d33 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -122,9 +122,11 @@ drbd_remove_interval(struct rb_root *root, struct drbd_interval *this) * @sector: start sector * @size: size, aligned to 512 bytes * - * Returns the interval overlapping with [sector, sector + size), or NULL. - * When there is more than one overlapping interval in the tree, the interval - * with the lowest start sector is returned. + * Returns an interval overlapping with [sector, sector + size), or NULL if + * there is none. When there is more than one overlapping interval in the + * tree, the interval with the lowest start sector is returned, and all other + * overlapping intervals will be on the right side of the tree, reachable with + * rb_next(). */ struct drbd_interval * drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size) From c6f7df42c9ceddc5ef582f6044b15e50e6eeb053 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Feb 2011 15:10:57 +0100 Subject: [PATCH 0056/5831] drbd: Remove unused variable in struct drbd_conf Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7fcda713714f..768656a1e82c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1045,9 +1045,6 @@ struct drbd_conf { struct list_head read_ee; /* IO in progress (any read) */ struct list_head net_ee; /* zero-copy network send in progress */ - /* this one is protected by ee_lock, single thread */ - struct drbd_epoch_entry *last_write_w_barrier; - int next_barrier_nr; struct list_head resync_reads; atomic_t pp_in_use; /* allocated from page pool */ From f6ffca9f42902556bcf72426d2d0714bdbfdbe09 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Feb 2011 15:30:34 +0100 Subject: [PATCH 0057/5831] drbd: Rename struct drbd_epoch_entry to struct drbd_peer_request Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 37 +++++++++++------------ drivers/block/drbd/drbd_main.c | 8 ++--- drivers/block/drbd/drbd_nl.c | 7 ++--- drivers/block/drbd/drbd_receiver.c | 48 +++++++++++++++--------------- drivers/block/drbd/drbd_worker.c | 25 ++++++++-------- 5 files changed, 61 insertions(+), 64 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 768656a1e82c..696ff3cdb2f9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -707,7 +707,7 @@ struct digest_info { void *digest; }; -struct drbd_epoch_entry { +struct drbd_peer_request { struct drbd_work w; struct drbd_epoch *epoch; /* for writes */ struct drbd_conf *mdev; @@ -1194,8 +1194,8 @@ extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packet cmd, extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); -extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, - struct drbd_epoch_entry *e); +extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet, + struct drbd_peer_request *); extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, struct p_block_req *rp); extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, @@ -1203,8 +1203,8 @@ extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd, extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd, sector_t sector, int blksize, u64 block_id); extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req); -extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, - struct drbd_epoch_entry *e); +extern int drbd_send_block(struct drbd_conf *, enum drbd_packet, + struct drbd_peer_request *); extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req); extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd, sector_t sector, int size, u64 block_id); @@ -1500,7 +1500,8 @@ static inline void ov_oos_print(struct drbd_conf *mdev) extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio *, void *); -extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_epoch_entry *, void *); +extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, + struct drbd_peer_request *, void *); /* worker callbacks */ extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); @@ -1527,16 +1528,14 @@ extern void start_resync_timer_fn(unsigned long data); /* drbd_receiver.c */ extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector); -extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, - const unsigned rw, const int fault_type); +extern int drbd_submit_ee(struct drbd_conf *, struct drbd_peer_request *, + const unsigned, const int); extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); -extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, - u64 id, - sector_t sector, - unsigned int data_size, - gfp_t gfp_mask) __must_hold(local); -extern void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, - int is_net); +extern struct drbd_peer_request *drbd_alloc_ee(struct drbd_conf *, + u64, sector_t, unsigned int, + gfp_t) __must_hold(local); +extern void drbd_free_some_ee(struct drbd_conf *, struct drbd_peer_request *, + int); #define drbd_free_ee(m,e) drbd_free_some_ee(m, e, 0) #define drbd_free_net_ee(m,e) drbd_free_some_ee(m, e, 1) extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev, @@ -1627,10 +1626,8 @@ void drbd_nl_cleanup(void); int __init drbd_nl_init(void); void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state); void drbd_bcast_sync_progress(struct drbd_conf *mdev); -void drbd_bcast_ee(struct drbd_conf *mdev, - const char *reason, const int dgs, - const char* seen_hash, const char* calc_hash, - const struct drbd_epoch_entry* e); +void drbd_bcast_ee(struct drbd_conf *, const char *, const int, const char *, + const char *, const struct drbd_peer_request *); /** @@ -1713,7 +1710,7 @@ static inline int drbd_bio_has_active_page(struct bio *bio) return 0; } -static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) +static inline int drbd_ee_has_active_page(struct drbd_peer_request *e) { struct page *page = e->pages; page_chain_for_each(page) { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8c77476825e4..62f20dfc9b54 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2434,7 +2434,7 @@ int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, * @e: Epoch entry. */ int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, - struct drbd_epoch_entry *e) + struct drbd_peer_request *e) { return _drbd_send_ack(mdev, cmd, cpu_to_be64(e->i.sector), @@ -2641,7 +2641,7 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) return 1; } -static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) +static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_peer_request *e) { struct page *page = e->pages; unsigned len = e->i.size; @@ -2747,7 +2747,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY) */ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, - struct drbd_epoch_entry *e) + struct drbd_peer_request *e) { int ok; struct p_data p; @@ -3147,7 +3147,7 @@ static int drbd_create_mempools(void) goto Enomem; drbd_ee_cache = kmem_cache_create( - "drbd_ee", sizeof(struct drbd_epoch_entry), 0, 0, NULL); + "drbd_ee", sizeof(struct drbd_peer_request), 0, 0, NULL); if (drbd_ee_cache == NULL) goto Enomem; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 8b8894e10e61..ee00ffa04653 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2443,10 +2443,9 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); } -void drbd_bcast_ee(struct drbd_conf *mdev, - const char *reason, const int dgs, - const char* seen_hash, const char* calc_hash, - const struct drbd_epoch_entry* e) +void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, + const char *seen_hash, const char *calc_hash, + const struct drbd_peer_request *e) { struct cn_msg *cn_reply; struct drbd_nl_cfg_reply *reply; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index bc5351df8073..e061aca2d93a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -189,7 +189,7 @@ static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed) { - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; struct list_head *le, *tle; /* The EEs are always appended to the end of the list. Since @@ -198,7 +198,7 @@ static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed stop to examine the list... */ list_for_each_safe(le, tle, &mdev->net_ee) { - e = list_entry(le, struct drbd_epoch_entry, w.list); + e = list_entry(le, struct drbd_peer_request, w.list); if (drbd_ee_has_active_page(e)) break; list_move(le, to_be_freed); @@ -208,7 +208,7 @@ static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) { LIST_HEAD(reclaimed); - struct drbd_epoch_entry *e, *t; + struct drbd_peer_request *e, *t; spin_lock_irq(&mdev->tconn->req_lock); reclaim_net_ee(mdev, &reclaimed); @@ -309,13 +309,11 @@ You must not have the req_lock: drbd_wait_ee_list_empty() */ -struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, - u64 id, - sector_t sector, - unsigned int data_size, - gfp_t gfp_mask) __must_hold(local) +struct drbd_peer_request * +drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector, + unsigned int data_size, gfp_t gfp_mask) __must_hold(local) { - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; struct page *page; unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; @@ -357,7 +355,8 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, return NULL; } -void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int is_net) +void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *e, + int is_net) { if (e->flags & EE_HAS_DIGEST) kfree(e->digest); @@ -370,7 +369,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) { LIST_HEAD(work_list); - struct drbd_epoch_entry *e, *t; + struct drbd_peer_request *e, *t; int count = 0; int is_net = list == &mdev->net_ee; @@ -399,7 +398,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) { LIST_HEAD(work_list); LIST_HEAD(reclaimed); - struct drbd_epoch_entry *e, *t; + struct drbd_peer_request *e, *t; int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS); spin_lock_irq(&mdev->tconn->req_lock); @@ -1100,8 +1099,8 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) * on certain Xen deployments. */ /* TODO allocate from our own bio_set. */ -int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, - const unsigned rw, const int fault_type) +int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *e, + const unsigned rw, const int fault_type) { struct bio *bios = NULL; struct bio *bio; @@ -1176,7 +1175,7 @@ fail: } static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev, - struct drbd_epoch_entry *e) + struct drbd_peer_request *e) { struct drbd_interval *i = &e->i; @@ -1262,11 +1261,12 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd, /* used from receive_RSDataReply (recv_resync_read) * and from receive_Data */ -static struct drbd_epoch_entry * -read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local) +static struct drbd_peer_request * +read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, + int data_size) __must_hold(local) { const sector_t capacity = drbd_get_capacity(mdev->this_bdev); - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; struct page *page; int dgs, ds, rr; void *dig_in = mdev->tconn->int_dig_in; @@ -1445,7 +1445,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, * drbd_process_done_ee() by asender only */ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) { - struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; + struct drbd_peer_request *e = (struct drbd_peer_request *)w; sector_t sector = e->i.sector; int ok; @@ -1467,7 +1467,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local) { - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; e = read_in_block(mdev, ID_SYNCER, sector, data_size); if (!e) @@ -1582,7 +1582,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, */ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; + struct drbd_peer_request *e = (struct drbd_peer_request *)w; sector_t sector = e->i.sector; int ok = 1, pcmd; @@ -1619,7 +1619,7 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused) { - struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w; + struct drbd_peer_request *e = (struct drbd_peer_request *)w; int ok = 1; D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); @@ -1731,7 +1731,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { sector_t sector; - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; struct p_data *p = &mdev->tconn->data.rbuf.data; int rw = WRITE; u32 dp_flags; @@ -2015,7 +2015,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; struct digest_info *di = NULL; int size, verb; unsigned int fault_type; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 0359600f5635..06628d1504b8 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -80,7 +80,7 @@ void drbd_md_io_complete(struct bio *bio, int error) /* reads on behalf of the partner, * "submitted" by the receiver */ -void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) +void drbd_endio_read_sec_final(struct drbd_peer_request *e) __releases(local) { unsigned long flags = 0; struct drbd_conf *mdev = e->mdev; @@ -100,7 +100,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local) /* writes on behalf of the partner, or resync writes, * "submitted" by the receiver, final stage. */ -static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(local) +static void drbd_endio_write_sec_final(struct drbd_peer_request *e) __releases(local) { unsigned long flags = 0; struct drbd_conf *mdev = e->mdev; @@ -154,7 +154,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo */ void drbd_endio_sec(struct bio *bio, int error) { - struct drbd_epoch_entry *e = bio->bi_private; + struct drbd_peer_request *e = bio->bi_private; struct drbd_conf *mdev = e->mdev; int uptodate = bio_flagged(bio, BIO_UPTODATE); int is_write = bio_data_dir(bio) == WRITE; @@ -247,7 +247,8 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return w_send_read_req(mdev, w, 0); } -void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest) +void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, + struct drbd_peer_request *e, void *digest) { struct hash_desc desc; struct scatterlist sg; @@ -297,7 +298,7 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * /* TODO merge common code with w_e_end_ov_req */ int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); int digest_size; void *digest; int ok = 1; @@ -344,7 +345,7 @@ out: static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) { - struct drbd_epoch_entry *e; + struct drbd_peer_request *e; if (!get_ldev(mdev)) return -EIO; @@ -900,7 +901,7 @@ out: } /* helper */ -static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_entry *e) +static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *e) { if (drbd_ee_has_active_page(e)) { /* This might happen if sendpage() has not finished */ @@ -923,7 +924,7 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_ent */ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); int ok; if (unlikely(cancel)) { @@ -959,7 +960,7 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) */ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); int ok; if (unlikely(cancel)) { @@ -1007,7 +1008,7 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); struct digest_info *di; int digest_size; void *digest = NULL; @@ -1070,7 +1071,7 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) /* TODO merge common code with w_e_send_csum */ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); sector_t sector = e->i.sector; unsigned int size = e->i.size; int digest_size; @@ -1127,7 +1128,7 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w); + struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); struct digest_info *di; void *digest; sector_t sector = e->i.sector; From 18b75d756bdd6e87e5c4a46d6d1f279077425dae Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Feb 2011 15:36:22 +0100 Subject: [PATCH 0058/5831] drbd: Clean up some left-overs Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 696ff3cdb2f9..e6cc6301db02 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -668,15 +668,6 @@ struct drbd_tl_epoch { int n_writes; /* number of requests attached before this barrier */ }; -struct drbd_request; - -/* These Tl_epoch_entries may be in one of 6 lists: - active_ee .. data packet being written - sync_ee .. syncer block being written - done_ee .. block written, need to send P_WRITE_ACK - read_ee .. [RS]P_DATA_REQUEST being read -*/ - struct drbd_epoch { struct list_head list; unsigned int barrier_nr; @@ -1041,8 +1032,8 @@ struct drbd_conf { enum write_ordering_e write_ordering; struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */ struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */ - struct list_head done_ee; /* send ack */ - struct list_head read_ee; /* IO in progress (any read) */ + struct list_head done_ee; /* need to send P_WRITE_ACK */ + struct list_head read_ee; /* [RS]P_DATA_REQUEST being read */ struct list_head net_ee; /* zero-copy network send in progress */ int next_barrier_nr; From 6c852beca185b18e89ad7783ab15793c0911f86b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Feb 2011 15:38:52 +0100 Subject: [PATCH 0059/5831] drbd: Update some comments Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_receiver.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e6cc6301db02..c7504579c46e 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -727,7 +727,7 @@ enum { * we need to resubmit without the barrier flag. */ __EE_RESUBMITTED, - /* we may have several bios per epoch entry. + /* we may have several bios per peer request. * if any of those fail, we set this flag atomically * from the endio callback */ __EE_WAS_ERROR, @@ -1422,7 +1422,7 @@ extern void drbd_bm_unlock(struct drbd_conf *mdev); /* drbd_main.c */ extern struct kmem_cache *drbd_request_cache; -extern struct kmem_cache *drbd_ee_cache; /* epoch entries */ +extern struct kmem_cache *drbd_ee_cache; /* peer requests */ extern struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ extern mempool_t *drbd_request_mempool; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 62f20dfc9b54..3bc900f48f96 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -135,7 +135,7 @@ struct drbd_conf **minor_table; struct list_head drbd_tconns; /* list of struct drbd_tconn */ struct kmem_cache *drbd_request_cache; -struct kmem_cache *drbd_ee_cache; /* epoch entries */ +struct kmem_cache *drbd_ee_cache; /* peer requests */ struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t *drbd_request_mempool; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e061aca2d93a..6ba94febfab1 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1085,7 +1085,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) /** * drbd_submit_ee() * @mdev: DRBD device. - * @e: epoch entry + * @e: peer request * @rw: flag field, see bio->bi_rw * * May spread the pages to multiple bios, @@ -1182,7 +1182,7 @@ static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev, drbd_remove_interval(&mdev->write_requests, i); drbd_clear_interval(i); - /* Wake up any processes waiting for this epoch entry to complete. */ + /* Wake up any processes waiting for this peer request to complete. */ if (i->waiting) wake_up(&mdev->misc_wait); } From db830c464b69e26ea4d371e38bb2320c99c82f41 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 4 Feb 2011 15:57:48 +0100 Subject: [PATCH 0060/5831] drbd: Local variable renames: e -> peer_req Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_main.c | 36 ++-- drivers/block/drbd/drbd_nl.c | 18 +- drivers/block/drbd/drbd_receiver.c | 254 ++++++++++++++--------------- drivers/block/drbd/drbd_worker.c | 197 +++++++++++----------- 5 files changed, 258 insertions(+), 251 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c7504579c46e..302ccc6d9432 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1701,9 +1701,9 @@ static inline int drbd_bio_has_active_page(struct bio *bio) return 0; } -static inline int drbd_ee_has_active_page(struct drbd_peer_request *e) +static inline int drbd_ee_has_active_page(struct drbd_peer_request *peer_req) { - struct page *page = e->pages; + struct page *page = peer_req->pages; page_chain_for_each(page) { if (page_count(page) > 1) return 1; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 3bc900f48f96..7728d1613406 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2429,17 +2429,17 @@ int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd, /** * drbd_send_ack() - Sends an ack packet - * @mdev: DRBD device. - * @cmd: Packet command code. - * @e: Epoch entry. + * @mdev: DRBD device + * @cmd: packet command code + * @peer_req: peer request */ int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, - struct drbd_peer_request *e) + struct drbd_peer_request *peer_req) { return _drbd_send_ack(mdev, cmd, - cpu_to_be64(e->i.sector), - cpu_to_be32(e->i.size), - e->block_id); + cpu_to_be64(peer_req->i.sector), + cpu_to_be32(peer_req->i.size), + peer_req->block_id); } /* This function misuses the block_id field to signal if the blocks @@ -2641,10 +2641,12 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) return 1; } -static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_peer_request *e) +static int _drbd_send_zc_ee(struct drbd_conf *mdev, + struct drbd_peer_request *peer_req) { - struct page *page = e->pages; - unsigned len = e->i.size; + struct page *page = peer_req->pages; + unsigned len = peer_req->i.size; + /* hint all but last page with MSG_MORE */ page_chain_for_each(page) { unsigned l = min_t(unsigned, len, PAGE_SIZE); @@ -2747,7 +2749,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY) */ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, - struct drbd_peer_request *e) + struct drbd_peer_request *peer_req) { int ok; struct p_data p; @@ -2757,9 +2759,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); - p.sector = cpu_to_be64(e->i.sector); - p.block_id = e->block_id; + prepare_header(mdev, &p.head, cmd, sizeof(p) - + sizeof(struct p_header80) + + dgs + peer_req->i.size); + p.sector = cpu_to_be64(peer_req->i.sector); + p.block_id = peer_req->block_id; p.seq_num = 0; /* unused */ /* Only called by our kernel thread. @@ -2772,11 +2776,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, ok = sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); if (ok && dgs) { dgb = mdev->tconn->int_dig_out; - drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, e, dgb); + drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, peer_req, dgb); ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) - ok = _drbd_send_zc_ee(mdev, e); + ok = _drbd_send_zc_ee(mdev, peer_req); drbd_put_data_sock(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ee00ffa04653..e30d52ba3fcf 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2445,7 +2445,7 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, const char *seen_hash, const char *calc_hash, - const struct drbd_peer_request *e) + const struct drbd_peer_request *peer_req) { struct cn_msg *cn_reply; struct drbd_nl_cfg_reply *reply; @@ -2453,7 +2453,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, struct page *page; unsigned len; - if (!e) + if (!peer_req) return; if (!reason || !reason[0]) return; @@ -2472,8 +2472,10 @@ void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, GFP_NOIO); if (!cn_reply) { - dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n", - (unsigned long long)e->i.sector, e->i.size); + dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, " + "sector %llu, size %u\n", + (unsigned long long)peer_req->i.sector, + peer_req->i.size); return; } @@ -2483,15 +2485,15 @@ void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, tl = tl_add_str(tl, T_dump_ee_reason, reason); tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs); tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs); - tl = tl_add_int(tl, T_ee_sector, &e->i.sector); - tl = tl_add_int(tl, T_ee_block_id, &e->block_id); + tl = tl_add_int(tl, T_ee_sector, &peer_req->i.sector); + tl = tl_add_int(tl, T_ee_block_id, &peer_req->block_id); /* dump the first 32k */ - len = min_t(unsigned, e->i.size, 32 << 10); + len = min_t(unsigned, peer_req->i.size, 32 << 10); put_unaligned(T_ee_data, tl++); put_unaligned(len, tl++); - page = e->pages; + page = peer_req->pages; page_chain_for_each(page) { void *d = kmap_atomic(page, KM_USER0); unsigned l = min_t(unsigned, len, PAGE_SIZE); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6ba94febfab1..3a9cd31e094b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -189,7 +189,7 @@ static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed) { - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; struct list_head *le, *tle; /* The EEs are always appended to the end of the list. Since @@ -198,8 +198,8 @@ static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed stop to examine the list... */ list_for_each_safe(le, tle, &mdev->net_ee) { - e = list_entry(le, struct drbd_peer_request, w.list); - if (drbd_ee_has_active_page(e)) + peer_req = list_entry(le, struct drbd_peer_request, w.list); + if (drbd_ee_has_active_page(peer_req)) break; list_move(le, to_be_freed); } @@ -208,14 +208,14 @@ static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) { LIST_HEAD(reclaimed); - struct drbd_peer_request *e, *t; + struct drbd_peer_request *peer_req, *t; spin_lock_irq(&mdev->tconn->req_lock); reclaim_net_ee(mdev, &reclaimed); spin_unlock_irq(&mdev->tconn->req_lock); - list_for_each_entry_safe(e, t, &reclaimed, w.list) - drbd_free_net_ee(mdev, e); + list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) + drbd_free_net_ee(mdev, peer_req); } /** @@ -313,15 +313,15 @@ struct drbd_peer_request * drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector, unsigned int data_size, gfp_t gfp_mask) __must_hold(local) { - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; struct page *page; unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT; if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE)) return NULL; - e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); - if (!e) { + peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM); + if (!peer_req) { if (!(gfp_mask & __GFP_NOWARN)) dev_err(DEV, "alloc_ee: Allocation of an EE failed\n"); return NULL; @@ -331,45 +331,45 @@ drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector, if (!page) goto fail; - drbd_clear_interval(&e->i); - e->i.size = data_size; - e->i.sector = sector; - e->i.local = false; - e->i.waiting = false; + drbd_clear_interval(&peer_req->i); + peer_req->i.size = data_size; + peer_req->i.sector = sector; + peer_req->i.local = false; + peer_req->i.waiting = false; - e->epoch = NULL; - e->mdev = mdev; - e->pages = page; - atomic_set(&e->pending_bios, 0); - e->flags = 0; + peer_req->epoch = NULL; + peer_req->mdev = mdev; + peer_req->pages = page; + atomic_set(&peer_req->pending_bios, 0); + peer_req->flags = 0; /* * The block_id is opaque to the receiver. It is not endianness * converted, and sent back to the sender unchanged. */ - e->block_id = id; + peer_req->block_id = id; - return e; + return peer_req; fail: - mempool_free(e, drbd_ee_mempool); + mempool_free(peer_req, drbd_ee_mempool); return NULL; } -void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *e, +void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req, int is_net) { - if (e->flags & EE_HAS_DIGEST) - kfree(e->digest); - drbd_pp_free(mdev, e->pages, is_net); - D_ASSERT(atomic_read(&e->pending_bios) == 0); - D_ASSERT(drbd_interval_empty(&e->i)); - mempool_free(e, drbd_ee_mempool); + if (peer_req->flags & EE_HAS_DIGEST) + kfree(peer_req->digest); + drbd_pp_free(mdev, peer_req->pages, is_net); + D_ASSERT(atomic_read(&peer_req->pending_bios) == 0); + D_ASSERT(drbd_interval_empty(&peer_req->i)); + mempool_free(peer_req, drbd_ee_mempool); } int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) { LIST_HEAD(work_list); - struct drbd_peer_request *e, *t; + struct drbd_peer_request *peer_req, *t; int count = 0; int is_net = list == &mdev->net_ee; @@ -377,8 +377,8 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) list_splice_init(list, &work_list); spin_unlock_irq(&mdev->tconn->req_lock); - list_for_each_entry_safe(e, t, &work_list, w.list) { - drbd_free_some_ee(mdev, e, is_net); + list_for_each_entry_safe(peer_req, t, &work_list, w.list) { + drbd_free_some_ee(mdev, peer_req, is_net); count++; } return count; @@ -398,7 +398,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) { LIST_HEAD(work_list); LIST_HEAD(reclaimed); - struct drbd_peer_request *e, *t; + struct drbd_peer_request *peer_req, *t; int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS); spin_lock_irq(&mdev->tconn->req_lock); @@ -406,17 +406,17 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) list_splice_init(&mdev->done_ee, &work_list); spin_unlock_irq(&mdev->tconn->req_lock); - list_for_each_entry_safe(e, t, &reclaimed, w.list) - drbd_free_net_ee(mdev, e); + list_for_each_entry_safe(peer_req, t, &reclaimed, w.list) + drbd_free_net_ee(mdev, peer_req); /* possible callbacks here: * e_end_block, and e_end_resync_block, e_send_discard_ack. * all ignore the last argument. */ - list_for_each_entry_safe(e, t, &work_list, w.list) { + list_for_each_entry_safe(peer_req, t, &work_list, w.list) { /* list_del not necessary, next/prev members not touched */ - ok = e->w.cb(mdev, &e->w, !ok) && ok; - drbd_free_ee(mdev, e); + ok = peer_req->w.cb(mdev, &peer_req->w, !ok) && ok; + drbd_free_ee(mdev, peer_req); } wake_up(&mdev->ee_wait); @@ -1085,7 +1085,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) /** * drbd_submit_ee() * @mdev: DRBD device. - * @e: peer request + * @peer_req: peer request * @rw: flag field, see bio->bi_rw * * May spread the pages to multiple bios, @@ -1099,14 +1099,14 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) * on certain Xen deployments. */ /* TODO allocate from our own bio_set. */ -int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *e, +int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req, const unsigned rw, const int fault_type) { struct bio *bios = NULL; struct bio *bio; - struct page *page = e->pages; - sector_t sector = e->i.sector; - unsigned ds = e->i.size; + struct page *page = peer_req->pages; + sector_t sector = peer_req->i.sector; + unsigned ds = peer_req->i.size; unsigned n_bios = 0; unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; int err = -ENOMEM; @@ -1121,11 +1121,11 @@ next_bio: dev_err(DEV, "submit_ee: Allocation of a bio failed\n"); goto fail; } - /* > e->i.sector, unless this is the first bio */ + /* > peer_req->i.sector, unless this is the first bio */ bio->bi_sector = sector; bio->bi_bdev = mdev->ldev->backing_bdev; bio->bi_rw = rw; - bio->bi_private = e; + bio->bi_private = peer_req; bio->bi_end_io = drbd_endio_sec; bio->bi_next = bios; @@ -1155,7 +1155,7 @@ next_bio: D_ASSERT(page == NULL); D_ASSERT(ds == 0); - atomic_set(&e->pending_bios, n_bios); + atomic_set(&peer_req->pending_bios, n_bios); do { bio = bios; bios = bios->bi_next; @@ -1175,9 +1175,9 @@ fail: } static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev, - struct drbd_peer_request *e) + struct drbd_peer_request *peer_req) { - struct drbd_interval *i = &e->i; + struct drbd_interval *i = &peer_req->i; drbd_remove_interval(&mdev->write_requests, i); drbd_clear_interval(i); @@ -1266,7 +1266,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local) { const sector_t capacity = drbd_get_capacity(mdev->this_bdev); - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; struct page *page; int dgs, ds, rr; void *dig_in = mdev->tconn->int_dig_in; @@ -1309,12 +1309,12 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD * "criss-cross" setup, that might cause write-out on some other DRBD, * which in turn might block on the other node at this very place. */ - e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO); - if (!e) + peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO); + if (!peer_req) return NULL; ds = data_size; - page = e->pages; + page = peer_req->pages; page_chain_for_each(page) { unsigned len = min_t(int, ds, PAGE_SIZE); data = kmap(page); @@ -1325,7 +1325,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, } kunmap(page); if (rr != len) { - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); if (!signal_pending(current)) dev_warn(DEV, "short read receiving data: read %d expected %d\n", rr, len); @@ -1335,18 +1335,18 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, } if (dgs) { - drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, e, dig_vv); + drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv); if (memcmp(dig_in, dig_vv, dgs)) { dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n", (unsigned long long)sector, data_size); drbd_bcast_ee(mdev, "digest failed", - dgs, dig_in, dig_vv, e); - drbd_free_ee(mdev, e); + dgs, dig_in, dig_vv, peer_req); + drbd_free_ee(mdev, peer_req); return NULL; } } mdev->recv_cnt += data_size>>9; - return e; + return peer_req; } /* drbd_drain_block() just takes a data block @@ -1445,20 +1445,20 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, * drbd_process_done_ee() by asender only */ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) { - struct drbd_peer_request *e = (struct drbd_peer_request *)w; - sector_t sector = e->i.sector; + struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + sector_t sector = peer_req->i.sector; int ok; - D_ASSERT(drbd_interval_empty(&e->i)); + D_ASSERT(drbd_interval_empty(&peer_req->i)); - if (likely((e->flags & EE_WAS_ERROR) == 0)) { - drbd_set_in_sync(mdev, sector, e->i.size); - ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e); + if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { + drbd_set_in_sync(mdev, sector, peer_req->i.size); + ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req); } else { /* Record failure to sync */ - drbd_rs_failed_io(mdev, sector, e->i.size); + drbd_rs_failed_io(mdev, sector, peer_req->i.size); - ok = drbd_send_ack(mdev, P_NEG_ACK, e); + ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req); } dec_unacked(mdev); @@ -1467,10 +1467,10 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local) { - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; - e = read_in_block(mdev, ID_SYNCER, sector, data_size); - if (!e) + peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size); + if (!peer_req) goto fail; dec_rs_pending(mdev); @@ -1479,23 +1479,23 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si /* corresponding dec_unacked() in e_end_resync_block() * respective _drbd_clear_done_ee */ - e->w.cb = e_end_resync_block; + peer_req->w.cb = e_end_resync_block; spin_lock_irq(&mdev->tconn->req_lock); - list_add(&e->w.list, &mdev->sync_ee); + list_add(&peer_req->w.list, &mdev->sync_ee); spin_unlock_irq(&mdev->tconn->req_lock); atomic_add(data_size >> 9, &mdev->rs_sect_ev); - if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) + if (drbd_submit_ee(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) return true; /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); spin_lock_irq(&mdev->tconn->req_lock); - list_del(&e->w.list); + list_del(&peer_req->w.list); spin_unlock_irq(&mdev->tconn->req_lock); - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); fail: put_ldev(mdev); return false; @@ -1582,21 +1582,21 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, */ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = (struct drbd_peer_request *)w; - sector_t sector = e->i.sector; + struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + sector_t sector = peer_req->i.sector; int ok = 1, pcmd; if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) { - if (likely((e->flags & EE_WAS_ERROR) == 0)) { + if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { pcmd = (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn <= C_PAUSED_SYNC_T && - e->flags & EE_MAY_SET_IN_SYNC) ? + peer_req->flags & EE_MAY_SET_IN_SYNC) ? P_RS_WRITE_ACK : P_WRITE_ACK; - ok &= drbd_send_ack(mdev, pcmd, e); + ok &= drbd_send_ack(mdev, pcmd, peer_req); if (pcmd == P_RS_WRITE_ACK) - drbd_set_in_sync(mdev, sector, e->i.size); + drbd_set_in_sync(mdev, sector, peer_req->i.size); } else { - ok = drbd_send_ack(mdev, P_NEG_ACK, e); + ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req); /* we expect it to be marked out of sync anyways... * maybe assert this? */ } @@ -1606,28 +1606,28 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ if (mdev->tconn->net_conf->two_primaries) { spin_lock_irq(&mdev->tconn->req_lock); - D_ASSERT(!drbd_interval_empty(&e->i)); - drbd_remove_epoch_entry_interval(mdev, e); + D_ASSERT(!drbd_interval_empty(&peer_req->i)); + drbd_remove_epoch_entry_interval(mdev, peer_req); spin_unlock_irq(&mdev->tconn->req_lock); } else - D_ASSERT(drbd_interval_empty(&e->i)); + D_ASSERT(drbd_interval_empty(&peer_req->i)); - drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); + drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); return ok; } static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused) { - struct drbd_peer_request *e = (struct drbd_peer_request *)w; + struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; int ok = 1; D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); - ok = drbd_send_ack(mdev, P_DISCARD_ACK, e); + ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req); spin_lock_irq(&mdev->tconn->req_lock); - D_ASSERT(!drbd_interval_empty(&e->i)); - drbd_remove_epoch_entry_interval(mdev, e); + D_ASSERT(!drbd_interval_empty(&peer_req->i)); + drbd_remove_epoch_entry_interval(mdev, peer_req); spin_unlock_irq(&mdev->tconn->req_lock); dec_unacked(mdev); @@ -1731,7 +1731,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) { sector_t sector; - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; struct p_data *p = &mdev->tconn->data.rbuf.data; int rw = WRITE; u32 dp_flags; @@ -1753,24 +1753,24 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, * the end of this function. */ sector = be64_to_cpu(p->sector); - e = read_in_block(mdev, p->block_id, sector, data_size); - if (!e) { + peer_req = read_in_block(mdev, p->block_id, sector, data_size); + if (!peer_req) { put_ldev(mdev); return false; } - e->w.cb = e_end_block; + peer_req->w.cb = e_end_block; dp_flags = be32_to_cpu(p->dp_flags); rw |= wire_flags_to_bio(mdev, dp_flags); if (dp_flags & DP_MAY_SET_IN_SYNC) - e->flags |= EE_MAY_SET_IN_SYNC; + peer_req->flags |= EE_MAY_SET_IN_SYNC; spin_lock(&mdev->epoch_lock); - e->epoch = mdev->current_epoch; - atomic_inc(&e->epoch->epoch_size); - atomic_inc(&e->epoch->active); + peer_req->epoch = mdev->current_epoch; + atomic_inc(&peer_req->epoch->epoch_size); + atomic_inc(&peer_req->epoch->active); spin_unlock(&mdev->epoch_lock); /* I'm the receiver, I do hold a net_cnt reference. */ @@ -1779,7 +1779,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, } else { /* don't get the req_lock yet, * we may sleep in drbd_wait_peer_seq */ - const int size = e->i.size; + const int size = peer_req->i.size; const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags); DEFINE_WAIT(wait); int first; @@ -1856,8 +1856,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n", (unsigned long long)sector); inc_unacked(mdev); - e->w.cb = e_send_discard_ack; - list_add_tail(&e->w.list, &mdev->done_ee); + peer_req->w.cb = e_send_discard_ack; + list_add_tail(&peer_req->w.list, &mdev->done_ee); spin_unlock_irq(&mdev->tconn->req_lock); @@ -1894,10 +1894,10 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, } finish_wait(&mdev->misc_wait, &wait); - drbd_insert_interval(&mdev->write_requests, &e->i); + drbd_insert_interval(&mdev->write_requests, &peer_req->i); } - list_add(&e->w.list, &mdev->active_ee); + list_add(&peer_req->w.list, &mdev->active_ee); spin_unlock_irq(&mdev->tconn->req_lock); switch (mdev->tconn->net_conf->wire_protocol) { @@ -1909,7 +1909,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, case DRBD_PROT_B: /* I really don't like it that the receiver thread * sends on the msock, but anyways */ - drbd_send_ack(mdev, P_RECV_ACK, e); + drbd_send_ack(mdev, P_RECV_ACK, peer_req); break; case DRBD_PROT_A: /* nothing to do */ @@ -1918,28 +1918,28 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, if (mdev->state.pdsk < D_INCONSISTENT) { /* In case we have the only disk of the cluster, */ - drbd_set_out_of_sync(mdev, e->i.sector, e->i.size); - e->flags |= EE_CALL_AL_COMPLETE_IO; - e->flags &= ~EE_MAY_SET_IN_SYNC; - drbd_al_begin_io(mdev, e->i.sector); + drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size); + peer_req->flags |= EE_CALL_AL_COMPLETE_IO; + peer_req->flags &= ~EE_MAY_SET_IN_SYNC; + drbd_al_begin_io(mdev, peer_req->i.sector); } - if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) + if (drbd_submit_ee(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0) return true; /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); spin_lock_irq(&mdev->tconn->req_lock); - list_del(&e->w.list); - drbd_remove_epoch_entry_interval(mdev, e); + list_del(&peer_req->w.list); + drbd_remove_epoch_entry_interval(mdev, peer_req); spin_unlock_irq(&mdev->tconn->req_lock); - if (e->flags & EE_CALL_AL_COMPLETE_IO) - drbd_al_complete_io(mdev, e->i.sector); + if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) + drbd_al_complete_io(mdev, peer_req->i.sector); out_interrupted: - drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP); + drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP); put_ldev(mdev); - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); return false; } @@ -2015,7 +2015,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; struct digest_info *di = NULL; int size, verb; unsigned int fault_type; @@ -2066,21 +2066,21 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD * "criss-cross" setup, that might cause write-out on some other DRBD, * which in turn might block on the other node at this very place. */ - e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); - if (!e) { + peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO); + if (!peer_req) { put_ldev(mdev); return false; } switch (cmd) { case P_DATA_REQUEST: - e->w.cb = w_e_end_data_req; + peer_req->w.cb = w_e_end_data_req; fault_type = DRBD_FAULT_DT_RD; /* application IO, don't drbd_rs_begin_io */ goto submit; case P_RS_DATA_REQUEST: - e->w.cb = w_e_end_rsdata_req; + peer_req->w.cb = w_e_end_rsdata_req; fault_type = DRBD_FAULT_RS_RD; /* used in the sector offset progress display */ mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); @@ -2096,21 +2096,21 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, di->digest_size = digest_size; di->digest = (((char *)di)+sizeof(struct digest_info)); - e->digest = di; - e->flags |= EE_HAS_DIGEST; + peer_req->digest = di; + peer_req->flags |= EE_HAS_DIGEST; if (drbd_recv(mdev, di->digest, digest_size) != digest_size) goto out_free_e; if (cmd == P_CSUM_RS_REQUEST) { D_ASSERT(mdev->tconn->agreed_pro_version >= 89); - e->w.cb = w_e_end_csum_rs_req; + peer_req->w.cb = w_e_end_csum_rs_req; /* used in the sector offset progress display */ mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); } else if (cmd == P_OV_REPLY) { /* track progress, we may need to throttle */ atomic_add(size >> 9, &mdev->rs_sect_in); - e->w.cb = w_e_end_ov_reply; + peer_req->w.cb = w_e_end_ov_reply; dec_rs_pending(mdev); /* drbd_rs_begin_io done when we sent this request, * but accounting still needs to be done. */ @@ -2134,7 +2134,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, dev_info(DEV, "Online Verify start sector: %llu\n", (unsigned long long)sector); } - e->w.cb = w_e_end_ov_req; + peer_req->w.cb = w_e_end_ov_req; fault_type = DRBD_FAULT_RS_RD; break; @@ -2178,22 +2178,22 @@ submit_for_resync: submit: inc_unacked(mdev); spin_lock_irq(&mdev->tconn->req_lock); - list_add_tail(&e->w.list, &mdev->read_ee); + list_add_tail(&peer_req->w.list, &mdev->read_ee); spin_unlock_irq(&mdev->tconn->req_lock); - if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) + if (drbd_submit_ee(mdev, peer_req, READ, fault_type) == 0) return true; /* don't care for the reason here */ dev_err(DEV, "submit failed, triggering re-connect\n"); spin_lock_irq(&mdev->tconn->req_lock); - list_del(&e->w.list); + list_del(&peer_req->w.list); spin_unlock_irq(&mdev->tconn->req_lock); /* no drbd_rs_complete_io(), we are dropping the connection anyways */ out_free_e: put_ldev(mdev); - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); return false; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 06628d1504b8..f13d56c2bf05 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -80,47 +80,47 @@ void drbd_md_io_complete(struct bio *bio, int error) /* reads on behalf of the partner, * "submitted" by the receiver */ -void drbd_endio_read_sec_final(struct drbd_peer_request *e) __releases(local) +void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local) { unsigned long flags = 0; - struct drbd_conf *mdev = e->mdev; + struct drbd_conf *mdev = peer_req->mdev; spin_lock_irqsave(&mdev->tconn->req_lock, flags); - mdev->read_cnt += e->i.size >> 9; - list_del(&e->w.list); + mdev->read_cnt += peer_req->i.size >> 9; + list_del(&peer_req->w.list); if (list_empty(&mdev->read_ee)) wake_up(&mdev->ee_wait); - if (test_bit(__EE_WAS_ERROR, &e->flags)) + if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) __drbd_chk_io_error(mdev, false); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - drbd_queue_work(&mdev->tconn->data.work, &e->w); + drbd_queue_work(&mdev->tconn->data.work, &peer_req->w); put_ldev(mdev); } /* writes on behalf of the partner, or resync writes, * "submitted" by the receiver, final stage. */ -static void drbd_endio_write_sec_final(struct drbd_peer_request *e) __releases(local) +static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) { unsigned long flags = 0; - struct drbd_conf *mdev = e->mdev; + struct drbd_conf *mdev = peer_req->mdev; sector_t e_sector; int do_wake; u64 block_id; int do_al_complete_io; - /* after we moved e to done_ee, + /* after we moved peer_req to done_ee, * we may no longer access it, * it may be freed/reused already! * (as soon as we release the req_lock) */ - e_sector = e->i.sector; - do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO; - block_id = e->block_id; + e_sector = peer_req->i.sector; + do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; + block_id = peer_req->block_id; spin_lock_irqsave(&mdev->tconn->req_lock, flags); - mdev->writ_cnt += e->i.size >> 9; - list_del(&e->w.list); /* has been on active_ee or sync_ee */ - list_add_tail(&e->w.list, &mdev->done_ee); + mdev->writ_cnt += peer_req->i.size >> 9; + list_del(&peer_req->w.list); /* has been on active_ee or sync_ee */ + list_add_tail(&peer_req->w.list, &mdev->done_ee); /* * Do not remove from the write_requests tree here: we did not send the @@ -132,7 +132,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *e) __releases(l do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee); - if (test_bit(__EE_WAS_ERROR, &e->flags)) + if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) __drbd_chk_io_error(mdev, false); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); @@ -154,20 +154,20 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *e) __releases(l */ void drbd_endio_sec(struct bio *bio, int error) { - struct drbd_peer_request *e = bio->bi_private; - struct drbd_conf *mdev = e->mdev; + struct drbd_peer_request *peer_req = bio->bi_private; + struct drbd_conf *mdev = peer_req->mdev; int uptodate = bio_flagged(bio, BIO_UPTODATE); int is_write = bio_data_dir(bio) == WRITE; if (error && __ratelimit(&drbd_ratelimit_state)) dev_warn(DEV, "%s: error=%d s=%llus\n", is_write ? "write" : "read", error, - (unsigned long long)e->i.sector); + (unsigned long long)peer_req->i.sector); if (!error && !uptodate) { if (__ratelimit(&drbd_ratelimit_state)) dev_warn(DEV, "%s: setting error to -EIO s=%llus\n", is_write ? "write" : "read", - (unsigned long long)e->i.sector); + (unsigned long long)peer_req->i.sector); /* strange behavior of some lower level drivers... * fail the request by clearing the uptodate flag, * but do not return any error?! */ @@ -175,14 +175,14 @@ void drbd_endio_sec(struct bio *bio, int error) } if (error) - set_bit(__EE_WAS_ERROR, &e->flags); + set_bit(__EE_WAS_ERROR, &peer_req->flags); bio_put(bio); /* no need for the bio anymore */ - if (atomic_dec_and_test(&e->pending_bios)) { + if (atomic_dec_and_test(&peer_req->pending_bios)) { if (is_write) - drbd_endio_write_sec_final(e); + drbd_endio_write_sec_final(peer_req); else - drbd_endio_read_sec_final(e); + drbd_endio_read_sec_final(peer_req); } } @@ -248,11 +248,11 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, - struct drbd_peer_request *e, void *digest) + struct drbd_peer_request *peer_req, void *digest) { struct hash_desc desc; struct scatterlist sg; - struct page *page = e->pages; + struct page *page = peer_req->pages; struct page *tmp; unsigned len; @@ -269,7 +269,7 @@ void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, page = tmp; } /* and now the last, possibly only partially used page */ - len = e->i.size & (PAGE_SIZE - 1); + len = peer_req->i.size & (PAGE_SIZE - 1); sg_set_page(&sg, page, len ?: PAGE_SIZE, 0); crypto_hash_update(&desc, &sg, sg.length); crypto_hash_final(&desc, digest); @@ -298,7 +298,8 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * /* TODO merge common code with w_e_end_ov_req */ int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); + struct drbd_peer_request *peer_req = + container_of(w, struct drbd_peer_request, w); int digest_size; void *digest; int ok = 1; @@ -306,22 +307,22 @@ int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (unlikely(cancel)) goto out; - if (likely((e->flags & EE_WAS_ERROR) != 0)) + if (likely((peer_req->flags & EE_WAS_ERROR) != 0)) goto out; digest_size = crypto_hash_digestsize(mdev->csums_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { - sector_t sector = e->i.sector; - unsigned int size = e->i.size; - drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); + sector_t sector = peer_req->i.sector; + unsigned int size = peer_req->i.size; + drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); /* Free e and pages before send. * In case we block on congestion, we could otherwise run into * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in * drbd_pp_alloc due to pp_in_use > max_buffers. */ - drbd_free_ee(mdev, e); - e = NULL; + drbd_free_ee(mdev, peer_req); + peer_req = NULL; inc_rs_pending(mdev); ok = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, @@ -333,8 +334,8 @@ int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } out: - if (e) - drbd_free_ee(mdev, e); + if (peer_req) + drbd_free_ee(mdev, peer_req); if (unlikely(!ok)) dev_err(DEV, "drbd_send_drequest(..., csum) failed\n"); @@ -345,7 +346,7 @@ out: static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) { - struct drbd_peer_request *e; + struct drbd_peer_request *peer_req; if (!get_ldev(mdev)) return -EIO; @@ -355,17 +356,17 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) /* GFP_TRY, because if there is no memory available right now, this may * be rescheduled for later. It is "only" background resync, after all. */ - e = drbd_alloc_ee(mdev, ID_SYNCER /* unused */, sector, size, GFP_TRY); - if (!e) + peer_req = drbd_alloc_ee(mdev, ID_SYNCER /* unused */, sector, size, GFP_TRY); + if (!peer_req) goto defer; - e->w.cb = w_e_send_csum; + peer_req->w.cb = w_e_send_csum; spin_lock_irq(&mdev->tconn->req_lock); - list_add(&e->w.list, &mdev->read_ee); + list_add(&peer_req->w.list, &mdev->read_ee); spin_unlock_irq(&mdev->tconn->req_lock); atomic_add(size >> 9, &mdev->rs_sect_ev); - if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) + if (drbd_submit_ee(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0) return 0; /* If it failed because of ENOMEM, retry should help. If it failed @@ -373,10 +374,10 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) * retry may or may not help. * If it does not, you may need to force disconnect. */ spin_lock_irq(&mdev->tconn->req_lock); - list_del(&e->w.list); + list_del(&peer_req->w.list); spin_unlock_irq(&mdev->tconn->req_lock); - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); defer: put_ldev(mdev); return -EAGAIN; @@ -901,19 +902,19 @@ out: } /* helper */ -static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *e) +static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req) { - if (drbd_ee_has_active_page(e)) { + if (drbd_ee_has_active_page(peer_req)) { /* This might happen if sendpage() has not finished */ - int i = (e->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; + int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT; atomic_add(i, &mdev->pp_in_use_by_net); atomic_sub(i, &mdev->pp_in_use); spin_lock_irq(&mdev->tconn->req_lock); - list_add_tail(&e->w.list, &mdev->net_ee); + list_add_tail(&peer_req->w.list, &mdev->net_ee); spin_unlock_irq(&mdev->tconn->req_lock); wake_up(&drbd_pp_wait); } else - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); } /** @@ -924,28 +925,28 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_requ */ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); + struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); int ok; if (unlikely(cancel)) { - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); dec_unacked(mdev); return 1; } - if (likely((e->flags & EE_WAS_ERROR) == 0)) { - ok = drbd_send_block(mdev, P_DATA_REPLY, e); + if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { + ok = drbd_send_block(mdev, P_DATA_REPLY, peer_req); } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegDReply. sector=%llus.\n", - (unsigned long long)e->i.sector); + (unsigned long long)peer_req->i.sector); - ok = drbd_send_ack(mdev, P_NEG_DREPLY, e); + ok = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req); } dec_unacked(mdev); - move_to_net_ee_or_free(mdev, e); + move_to_net_ee_or_free(mdev, peer_req); if (unlikely(!ok)) dev_err(DEV, "drbd_send_block() failed\n"); @@ -960,26 +961,26 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) */ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); + struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); int ok; if (unlikely(cancel)) { - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); dec_unacked(mdev); return 1; } if (get_ldev_if_state(mdev, D_FAILED)) { - drbd_rs_complete_io(mdev, e->i.sector); + drbd_rs_complete_io(mdev, peer_req->i.sector); put_ldev(mdev); } if (mdev->state.conn == C_AHEAD) { - ok = drbd_send_ack(mdev, P_RS_CANCEL, e); - } else if (likely((e->flags & EE_WAS_ERROR) == 0)) { + ok = drbd_send_ack(mdev, P_RS_CANCEL, peer_req); + } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { if (likely(mdev->state.pdsk >= D_INCONSISTENT)) { inc_rs_pending(mdev); - ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); + ok = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Not sending RSDataReply, " @@ -989,17 +990,17 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } else { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegRSDReply. sector %llus.\n", - (unsigned long long)e->i.sector); + (unsigned long long)peer_req->i.sector); - ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); + ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); /* update resync data with failure */ - drbd_rs_failed_io(mdev, e->i.sector, e->i.size); + drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size); } dec_unacked(mdev); - move_to_net_ee_or_free(mdev, e); + move_to_net_ee_or_free(mdev, peer_req); if (unlikely(!ok)) dev_err(DEV, "drbd_send_block() failed\n"); @@ -1008,26 +1009,26 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); + struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct digest_info *di; int digest_size; void *digest = NULL; int ok, eq = 0; if (unlikely(cancel)) { - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); dec_unacked(mdev); return 1; } if (get_ldev(mdev)) { - drbd_rs_complete_io(mdev, e->i.sector); + drbd_rs_complete_io(mdev, peer_req->i.sector); put_ldev(mdev); } - di = e->digest; + di = peer_req->digest; - if (likely((e->flags & EE_WAS_ERROR) == 0)) { + if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { /* quick hack to try to avoid a race against reconfiguration. * a real fix would be much more involved, * introducing more locking mechanisms */ @@ -1037,31 +1038,31 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) digest = kmalloc(digest_size, GFP_NOIO); } if (digest) { - drbd_csum_ee(mdev, mdev->csums_tfm, e, digest); + drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); eq = !memcmp(digest, di->digest, digest_size); kfree(digest); } if (eq) { - drbd_set_in_sync(mdev, e->i.sector, e->i.size); + drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size); /* rs_same_csums unit is BM_BLOCK_SIZE */ - mdev->rs_same_csum += e->i.size >> BM_BLOCK_SHIFT; - ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e); + mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; + ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req); } else { inc_rs_pending(mdev); - e->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ - e->flags &= ~EE_HAS_DIGEST; /* This e no longer has a digest pointer */ + peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ + peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ kfree(di); - ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); + ok = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req); } } else { - ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e); + ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req); if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n"); } dec_unacked(mdev); - move_to_net_ee_or_free(mdev, e); + move_to_net_ee_or_free(mdev, peer_req); if (unlikely(!ok)) dev_err(DEV, "drbd_send_block/ack() failed\n"); @@ -1071,9 +1072,9 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) /* TODO merge common code with w_e_send_csum */ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); - sector_t sector = e->i.sector; - unsigned int size = e->i.size; + struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + sector_t sector = peer_req->i.sector; + unsigned int size = peer_req->i.size; int digest_size; void *digest; int ok = 1; @@ -1088,8 +1089,8 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) goto out; } - if (likely(!(e->flags & EE_WAS_ERROR))) - drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); + if (likely(!(peer_req->flags & EE_WAS_ERROR))) + drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest); else memset(digest, 0, digest_size); @@ -1098,8 +1099,8 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in * drbd_pp_alloc due to pp_in_use > max_buffers. */ - drbd_free_ee(mdev, e); - e = NULL; + drbd_free_ee(mdev, peer_req); + peer_req = NULL; inc_rs_pending(mdev); ok = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, @@ -1109,8 +1110,8 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) kfree(digest); out: - if (e) - drbd_free_ee(mdev, e); + if (peer_req) + drbd_free_ee(mdev, peer_req); dec_unacked(mdev); return ok; } @@ -1128,16 +1129,16 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { - struct drbd_peer_request *e = container_of(w, struct drbd_peer_request, w); + struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct digest_info *di; void *digest; - sector_t sector = e->i.sector; - unsigned int size = e->i.size; + sector_t sector = peer_req->i.sector; + unsigned int size = peer_req->i.size; int digest_size; int ok, eq = 0; if (unlikely(cancel)) { - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); dec_unacked(mdev); return 1; } @@ -1145,17 +1146,17 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all * the resync lru has been cleaned up already */ if (get_ldev(mdev)) { - drbd_rs_complete_io(mdev, e->i.sector); + drbd_rs_complete_io(mdev, peer_req->i.sector); put_ldev(mdev); } - di = e->digest; + di = peer_req->digest; - if (likely((e->flags & EE_WAS_ERROR) == 0)) { + if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { digest_size = crypto_hash_digestsize(mdev->verify_tfm); digest = kmalloc(digest_size, GFP_NOIO); if (digest) { - drbd_csum_ee(mdev, mdev->verify_tfm, e, digest); + drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest); D_ASSERT(digest_size == di->digest_size); eq = !memcmp(digest, di->digest, digest_size); @@ -1168,7 +1169,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in * drbd_pp_alloc due to pp_in_use > max_buffers. */ - drbd_free_ee(mdev, e); + drbd_free_ee(mdev, peer_req); if (!eq) drbd_ov_oos_found(mdev, sector, size); else From b8907339534b8d17f6aad9e9cc98d490aa0c6137 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 27 Jan 2011 14:07:51 +0100 Subject: [PATCH 0061/5831] drbd: Moved the state functions into its own source file Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/Makefile | 2 +- drivers/block/drbd/drbd_int.h | 46 +- drivers/block/drbd/drbd_main.c | 1179 +----------------------------- drivers/block/drbd/drbd_state.c | 1217 +++++++++++++++++++++++++++++++ drivers/block/drbd/drbd_state.h | 101 +++ 5 files changed, 1326 insertions(+), 1219 deletions(-) create mode 100644 drivers/block/drbd/drbd_state.c create mode 100644 drivers/block/drbd/drbd_state.h diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index cacbb04f285d..06fb4453734c 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -1,6 +1,6 @@ drbd-y := drbd_bitmap.o drbd_proc.o drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += drbd_main.o drbd_strings.o drbd_nl.o -drbd-y += drbd_interval.o +drbd-y += drbd_interval.o drbd_state.o obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 302ccc6d9432..98addab2c928 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -43,6 +43,8 @@ #include #include #include +#include +#include "drbd_state.h" #ifdef __CHECKER__ # define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) @@ -1120,35 +1122,12 @@ static inline void drbd_put_data_sock(struct drbd_conf *mdev) /* drbd_main.c */ -enum chg_state_flags { - CS_HARD = 1, - CS_VERBOSE = 2, - CS_WAIT_COMPLETE = 4, - CS_SERIALIZE = 8, - CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, -}; - enum dds_flags { DDSF_FORCED = 1, DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */ }; extern void drbd_init_set_defaults(struct drbd_conf *mdev); -extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, - enum chg_state_flags f, - union drbd_state mask, - union drbd_state val); -extern void drbd_force_state(struct drbd_conf *, union drbd_state, - union drbd_state); -extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *, - union drbd_state, - union drbd_state, - enum chg_state_flags); -extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state, - enum chg_state_flags, - struct completion *done); -extern void print_st_err(struct drbd_conf *, union drbd_state, - union drbd_state, int); extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); #ifdef CONFIG_SMP @@ -1712,6 +1691,10 @@ static inline int drbd_ee_has_active_page(struct drbd_peer_request *peer_req) } + + + + static inline void drbd_state_lock(struct drbd_conf *mdev) { wait_event(mdev->misc_wait, @@ -1737,23 +1720,6 @@ _drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, return rv; } -/** - * drbd_request_state() - Reqest a state change - * @mdev: DRBD device. - * @mask: mask of state bits to change. - * @val: value of new state bits. - * - * This is the most graceful way of requesting a state change. It is verbose - * quite verbose in case the state change is not possible, and all those - * state changes are globally serialized. - */ -static inline int drbd_request_state(struct drbd_conf *mdev, - union drbd_state mask, - union drbd_state val) -{ - return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); -} - #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__) static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where) { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7728d1613406..4b39b3d0dd55 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -56,14 +56,6 @@ #include "drbd_vli.h" -struct after_state_chg_work { - struct drbd_work w; - union drbd_state os; - union drbd_state ns; - enum chg_state_flags flags; - struct completion *done; -}; - static DEFINE_MUTEX(drbd_main_mutex); int drbdd_init(struct drbd_thread *); int drbd_worker(struct drbd_thread *); @@ -72,9 +64,6 @@ int drbd_asender(struct drbd_thread *); int drbd_init(void); static int drbd_open(struct block_device *bdev, fmode_t mode); static int drbd_release(struct gendisk *gd, fmode_t mode); -static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); -static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, enum chg_state_flags flags); static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); static void md_sync_timer_fn(unsigned long data); static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); @@ -340,7 +329,7 @@ bail: * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, * RESTART_FROZEN_DISK_IO. */ -static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) +void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) { struct drbd_tl_epoch *b, *tmp, **pn; struct list_head *le, *tle, carry_reads; @@ -450,1172 +439,6 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) spin_unlock_irq(&mdev->tconn->req_lock); } -/** - * cl_wide_st_chg() - true if the state change is a cluster wide one - * @mdev: DRBD device. - * @os: old (current) state. - * @ns: new (wanted) state. - */ -static int cl_wide_st_chg(struct drbd_conf *mdev, - union drbd_state os, union drbd_state ns) -{ - return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED && - ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || - (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || - (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || - (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || - (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || - (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); -} - -enum drbd_state_rv -drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, - union drbd_state mask, union drbd_state val) -{ - unsigned long flags; - union drbd_state os, ns; - enum drbd_state_rv rv; - - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - rv = _drbd_set_state(mdev, ns, f, NULL); - ns = mdev->state; - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - - return rv; -} - -/** - * drbd_force_state() - Impose a change which happens outside our control on our state - * @mdev: DRBD device. - * @mask: mask of state bits to change. - * @val: value of new state bits. - */ -void drbd_force_state(struct drbd_conf *mdev, - union drbd_state mask, union drbd_state val) -{ - drbd_change_state(mdev, CS_HARD, mask, val); -} - -static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); -static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, - union drbd_state, - union drbd_state); -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort); -int drbd_send_state_req(struct drbd_conf *, - union drbd_state, union drbd_state); - -static enum drbd_state_rv -_req_st_cond(struct drbd_conf *mdev, union drbd_state mask, - union drbd_state val) -{ - union drbd_state os, ns; - unsigned long flags; - enum drbd_state_rv rv; - - if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) - return SS_CW_SUCCESS; - - if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) - return SS_CW_FAILED_BY_PEER; - - rv = 0; - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, os, ns, NULL); - - if (!cl_wide_st_chg(mdev, os, ns)) - rv = SS_CW_NO_NEED; - if (!rv) { - rv = is_valid_state(mdev, ns); - if (rv == SS_SUCCESS) { - rv = is_valid_state_transition(mdev, ns, os); - if (rv == SS_SUCCESS) - rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ - } - } - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - - return rv; -} - -/** - * drbd_req_state() - Perform an eventually cluster wide state change - * @mdev: DRBD device. - * @mask: mask of state bits to change. - * @val: value of new state bits. - * @f: flags - * - * Should not be called directly, use drbd_request_state() or - * _drbd_request_state(). - */ -static enum drbd_state_rv -drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, - union drbd_state val, enum chg_state_flags f) -{ - struct completion done; - unsigned long flags; - union drbd_state os, ns; - enum drbd_state_rv rv; - - init_completion(&done); - - if (f & CS_SERIALIZE) - mutex_lock(&mdev->state_mutex); - - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, os, ns, NULL); - - if (cl_wide_st_chg(mdev, os, ns)) { - rv = is_valid_state(mdev, ns); - if (rv == SS_SUCCESS) - rv = is_valid_state_transition(mdev, ns, os); - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - - if (rv < SS_SUCCESS) { - if (f & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); - goto abort; - } - - drbd_state_lock(mdev); - if (!drbd_send_state_req(mdev, mask, val)) { - drbd_state_unlock(mdev); - rv = SS_CW_FAILED_BY_PEER; - if (f & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); - goto abort; - } - - wait_event(mdev->state_wait, - (rv = _req_st_cond(mdev, mask, val))); - - if (rv < SS_SUCCESS) { - drbd_state_unlock(mdev); - if (f & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); - goto abort; - } - spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - rv = _drbd_set_state(mdev, ns, f, &done); - drbd_state_unlock(mdev); - } else { - rv = _drbd_set_state(mdev, ns, f, &done); - } - - spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); - - if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { - D_ASSERT(current != mdev->tconn->worker.task); - wait_for_completion(&done); - } - -abort: - if (f & CS_SERIALIZE) - mutex_unlock(&mdev->state_mutex); - - return rv; -} - -/** - * _drbd_request_state() - Request a state change (with flags) - * @mdev: DRBD device. - * @mask: mask of state bits to change. - * @val: value of new state bits. - * @f: flags - * - * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE - * flag, or when logging of failed state change requests is not desired. - */ -enum drbd_state_rv -_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, - union drbd_state val, enum chg_state_flags f) -{ - enum drbd_state_rv rv; - - wait_event(mdev->state_wait, - (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); - - return rv; -} - -static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) -{ - dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n", - name, - drbd_conn_str(ns.conn), - drbd_role_str(ns.role), - drbd_role_str(ns.peer), - drbd_disk_str(ns.disk), - drbd_disk_str(ns.pdsk), - is_susp(ns) ? 's' : 'r', - ns.aftr_isp ? 'a' : '-', - ns.peer_isp ? 'p' : '-', - ns.user_isp ? 'u' : '-' - ); -} - -void print_st_err(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, enum drbd_state_rv err) -{ - if (err == SS_IN_TRANSIENT_STATE) - return; - dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err)); - print_st(mdev, " state", os); - print_st(mdev, "wanted", ns); -} - - -/** - * is_valid_state() - Returns an SS_ error code if ns is not valid - * @mdev: DRBD device. - * @ns: State to consider. - */ -static enum drbd_state_rv -is_valid_state(struct drbd_conf *mdev, union drbd_state ns) -{ - /* See drbd_state_sw_errors in drbd_strings.c */ - - enum drbd_fencing_p fp; - enum drbd_state_rv rv = SS_SUCCESS; - - fp = FP_DONT_CARE; - if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - } - - if (get_net_conf(mdev->tconn)) { - if (!mdev->tconn->net_conf->two_primaries && - ns.role == R_PRIMARY && ns.peer == R_PRIMARY) - rv = SS_TWO_PRIMARIES; - put_net_conf(mdev->tconn); - } - - if (rv <= 0) - /* already found a reason to abort */; - else if (ns.role == R_SECONDARY && mdev->open_cnt) - rv = SS_DEVICE_IN_USE; - - else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE) - rv = SS_NO_UP_TO_DATE_DISK; - - else if (fp >= FP_RESOURCE && - ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN) - rv = SS_PRIMARY_NOP; - - else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT) - rv = SS_NO_UP_TO_DATE_DISK; - - else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT) - rv = SS_NO_LOCAL_DISK; - - else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) - rv = SS_NO_REMOTE_DISK; - - else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) - rv = SS_NO_UP_TO_DATE_DISK; - - else if ((ns.conn == C_CONNECTED || - ns.conn == C_WF_BITMAP_S || - ns.conn == C_SYNC_SOURCE || - ns.conn == C_PAUSED_SYNC_S) && - ns.disk == D_OUTDATED) - rv = SS_CONNECTED_OUTDATES; - - else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - (mdev->sync_conf.verify_alg[0] == 0)) - rv = SS_NO_VERIFY_ALG; - - else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - mdev->tconn->agreed_pro_version < 88) - rv = SS_NOT_SUPPORTED; - - else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) - rv = SS_CONNECTED_OUTDATES; - - return rv; -} - -/** - * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible - * @mdev: DRBD device. - * @ns: new state. - * @os: old state. - */ -static enum drbd_state_rv -is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, - union drbd_state os) -{ - enum drbd_state_rv rv = SS_SUCCESS; - - if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && - os.conn > C_CONNECTED) - rv = SS_RESYNC_RUNNING; - - if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE) - rv = SS_ALREADY_STANDALONE; - - if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS) - rv = SS_IS_DISKLESS; - - if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED) - rv = SS_NO_NET_CONFIG; - - if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING) - rv = SS_LOWER_THAN_OUTDATED; - - if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) - rv = SS_IN_TRANSIENT_STATE; - - if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) - rv = SS_IN_TRANSIENT_STATE; - - if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) - rv = SS_NEED_CONNECTION; - - if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && - ns.conn != os.conn && os.conn > C_CONNECTED) - rv = SS_RESYNC_RUNNING; - - if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) && - os.conn < C_CONNECTED) - rv = SS_NEED_CONNECTION; - - if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE) - && os.conn < C_WF_REPORT_PARAMS) - rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */ - - return rv; -} - -/** - * sanitize_state() - Resolves implicitly necessary additional changes to a state transition - * @mdev: DRBD device. - * @os: old state. - * @ns: new state. - * @warn_sync_abort: - * - * When we loose connection, we have to set the state of the peers disk (pdsk) - * to D_UNKNOWN. This rule and many more along those lines are in this function. - */ -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort) -{ - enum drbd_fencing_p fp; - enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; - - fp = FP_DONT_CARE; - if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - } - - /* Disallow Network errors to configure a device's network part */ - if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && - os.conn <= C_DISCONNECTING) - ns.conn = os.conn; - - /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow. - * If you try to go into some Sync* state, that shall fail (elsewhere). */ - if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && - ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN) - ns.conn = os.conn; - - /* we cannot fail (again) if we already detached */ - if (ns.disk == D_FAILED && os.disk == D_DISKLESS) - ns.disk = D_DISKLESS; - - /* if we are only D_ATTACHING yet, - * we can (and should) go directly to D_DISKLESS. */ - if (ns.disk == D_FAILED && os.disk == D_ATTACHING) - ns.disk = D_DISKLESS; - - /* After C_DISCONNECTING only C_STANDALONE may follow */ - if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) - ns.conn = os.conn; - - if (ns.conn < C_CONNECTED) { - ns.peer_isp = 0; - ns.peer = R_UNKNOWN; - if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT) - ns.pdsk = D_UNKNOWN; - } - - /* Clear the aftr_isp when becoming unconfigured */ - if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) - ns.aftr_isp = 0; - - /* Abort resync if a disk fails/detaches */ - if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && - (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { - if (warn_sync_abort) - *warn_sync_abort = - os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? - "Online-verify" : "Resync"; - ns.conn = C_CONNECTED; - } - - /* Connection breaks down before we finished "Negotiating" */ - if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && - get_ldev_if_state(mdev, D_NEGOTIATING)) { - if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) { - ns.disk = mdev->new_state_tmp.disk; - ns.pdsk = mdev->new_state_tmp.pdsk; - } else { - dev_alert(DEV, "Connection lost while negotiating, no data!\n"); - ns.disk = D_DISKLESS; - ns.pdsk = D_UNKNOWN; - } - put_ldev(mdev); - } - - /* D_CONSISTENT and D_OUTDATED vanish when we get connected */ - if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) { - if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) - ns.disk = D_UP_TO_DATE; - if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED) - ns.pdsk = D_UP_TO_DATE; - } - - /* Implications of the connection stat on the disk states */ - disk_min = D_DISKLESS; - disk_max = D_UP_TO_DATE; - pdsk_min = D_INCONSISTENT; - pdsk_max = D_UNKNOWN; - switch ((enum drbd_conns)ns.conn) { - case C_WF_BITMAP_T: - case C_PAUSED_SYNC_T: - case C_STARTING_SYNC_T: - case C_WF_SYNC_UUID: - case C_BEHIND: - disk_min = D_INCONSISTENT; - disk_max = D_OUTDATED; - pdsk_min = D_UP_TO_DATE; - pdsk_max = D_UP_TO_DATE; - break; - case C_VERIFY_S: - case C_VERIFY_T: - disk_min = D_UP_TO_DATE; - disk_max = D_UP_TO_DATE; - pdsk_min = D_UP_TO_DATE; - pdsk_max = D_UP_TO_DATE; - break; - case C_CONNECTED: - disk_min = D_DISKLESS; - disk_max = D_UP_TO_DATE; - pdsk_min = D_DISKLESS; - pdsk_max = D_UP_TO_DATE; - break; - case C_WF_BITMAP_S: - case C_PAUSED_SYNC_S: - case C_STARTING_SYNC_S: - case C_AHEAD: - disk_min = D_UP_TO_DATE; - disk_max = D_UP_TO_DATE; - pdsk_min = D_INCONSISTENT; - pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/ - break; - case C_SYNC_TARGET: - disk_min = D_INCONSISTENT; - disk_max = D_INCONSISTENT; - pdsk_min = D_UP_TO_DATE; - pdsk_max = D_UP_TO_DATE; - break; - case C_SYNC_SOURCE: - disk_min = D_UP_TO_DATE; - disk_max = D_UP_TO_DATE; - pdsk_min = D_INCONSISTENT; - pdsk_max = D_INCONSISTENT; - break; - case C_STANDALONE: - case C_DISCONNECTING: - case C_UNCONNECTED: - case C_TIMEOUT: - case C_BROKEN_PIPE: - case C_NETWORK_FAILURE: - case C_PROTOCOL_ERROR: - case C_TEAR_DOWN: - case C_WF_CONNECTION: - case C_WF_REPORT_PARAMS: - case C_MASK: - break; - } - if (ns.disk > disk_max) - ns.disk = disk_max; - - if (ns.disk < disk_min) { - dev_warn(DEV, "Implicitly set disk from %s to %s\n", - drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); - ns.disk = disk_min; - } - if (ns.pdsk > pdsk_max) - ns.pdsk = pdsk_max; - - if (ns.pdsk < pdsk_min) { - dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", - drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); - ns.pdsk = pdsk_min; - } - - if (fp == FP_STONITH && - (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && - !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) - ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ - - if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && - (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) && - !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE)) - ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ - - if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { - if (ns.conn == C_SYNC_SOURCE) - ns.conn = C_PAUSED_SYNC_S; - if (ns.conn == C_SYNC_TARGET) - ns.conn = C_PAUSED_SYNC_T; - } else { - if (ns.conn == C_PAUSED_SYNC_S) - ns.conn = C_SYNC_SOURCE; - if (ns.conn == C_PAUSED_SYNC_T) - ns.conn = C_SYNC_TARGET; - } - - return ns; -} - -/* helper for __drbd_set_state */ -static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) -{ - if (mdev->tconn->agreed_pro_version < 90) - mdev->ov_start_sector = 0; - mdev->rs_total = drbd_bm_bits(mdev); - mdev->ov_position = 0; - if (cs == C_VERIFY_T) { - /* starting online verify from an arbitrary position - * does not fit well into the existing protocol. - * on C_VERIFY_T, we initialize ov_left and friends - * implicitly in receive_DataRequest once the - * first P_OV_REQUEST is received */ - mdev->ov_start_sector = ~(sector_t)0; - } else { - unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); - if (bit >= mdev->rs_total) { - mdev->ov_start_sector = - BM_BIT_TO_SECT(mdev->rs_total - 1); - mdev->rs_total = 1; - } else - mdev->rs_total -= bit; - mdev->ov_position = mdev->ov_start_sector; - } - mdev->ov_left = mdev->rs_total; -} - -static void drbd_resume_al(struct drbd_conf *mdev) -{ - if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags)) - dev_info(DEV, "Resumed AL updates\n"); -} - -/** - * __drbd_set_state() - Set a new DRBD state - * @mdev: DRBD device. - * @ns: new state. - * @flags: Flags - * @done: Optional completion, that will get completed after the after_state_ch() finished - * - * Caller needs to hold req_lock, and global_state_lock. Do not call directly. - */ -enum drbd_state_rv -__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, - enum chg_state_flags flags, struct completion *done) -{ - union drbd_state os; - enum drbd_state_rv rv = SS_SUCCESS; - const char *warn_sync_abort = NULL; - struct after_state_chg_work *ascw; - - os = mdev->state; - - ns = sanitize_state(mdev, os, ns, &warn_sync_abort); - - if (ns.i == os.i) - return SS_NOTHING_TO_DO; - - if (!(flags & CS_HARD)) { - /* pre-state-change checks ; only look at ns */ - /* See drbd_state_sw_errors in drbd_strings.c */ - - rv = is_valid_state(mdev, ns); - if (rv < SS_SUCCESS) { - /* If the old state was illegal as well, then let - this happen...*/ - - if (is_valid_state(mdev, os) == rv) - rv = is_valid_state_transition(mdev, ns, os); - } else - rv = is_valid_state_transition(mdev, ns, os); - } - - if (rv < SS_SUCCESS) { - if (flags & CS_VERBOSE) - print_st_err(mdev, os, ns, rv); - return rv; - } - - if (warn_sync_abort) - dev_warn(DEV, "%s aborted.\n", warn_sync_abort); - - { - char *pbp, pb[300]; - pbp = pb; - *pbp = 0; - if (ns.role != os.role) - pbp += sprintf(pbp, "role( %s -> %s ) ", - drbd_role_str(os.role), - drbd_role_str(ns.role)); - if (ns.peer != os.peer) - pbp += sprintf(pbp, "peer( %s -> %s ) ", - drbd_role_str(os.peer), - drbd_role_str(ns.peer)); - if (ns.conn != os.conn) - pbp += sprintf(pbp, "conn( %s -> %s ) ", - drbd_conn_str(os.conn), - drbd_conn_str(ns.conn)); - if (ns.disk != os.disk) - pbp += sprintf(pbp, "disk( %s -> %s ) ", - drbd_disk_str(os.disk), - drbd_disk_str(ns.disk)); - if (ns.pdsk != os.pdsk) - pbp += sprintf(pbp, "pdsk( %s -> %s ) ", - drbd_disk_str(os.pdsk), - drbd_disk_str(ns.pdsk)); - if (is_susp(ns) != is_susp(os)) - pbp += sprintf(pbp, "susp( %d -> %d ) ", - is_susp(os), - is_susp(ns)); - if (ns.aftr_isp != os.aftr_isp) - pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", - os.aftr_isp, - ns.aftr_isp); - if (ns.peer_isp != os.peer_isp) - pbp += sprintf(pbp, "peer_isp( %d -> %d ) ", - os.peer_isp, - ns.peer_isp); - if (ns.user_isp != os.user_isp) - pbp += sprintf(pbp, "user_isp( %d -> %d ) ", - os.user_isp, - ns.user_isp); - dev_info(DEV, "%s\n", pb); - } - - /* solve the race between becoming unconfigured, - * worker doing the cleanup, and - * admin reconfiguring us: - * on (re)configure, first set CONFIG_PENDING, - * then wait for a potentially exiting worker, - * start the worker, and schedule one no_op. - * then proceed with configuration. - */ - if (ns.disk == D_DISKLESS && - ns.conn == C_STANDALONE && - ns.role == R_SECONDARY && - !test_and_set_bit(CONFIG_PENDING, &mdev->flags)) - set_bit(DEVICE_DYING, &mdev->flags); - - /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference - * on the ldev here, to be sure the transition -> D_DISKLESS resp. - * drbd_ldev_destroy() won't happen before our corresponding - * after_state_ch works run, where we put_ldev again. */ - if ((os.disk != D_FAILED && ns.disk == D_FAILED) || - (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) - atomic_inc(&mdev->local_cnt); - - mdev->state = ns; - - if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) - drbd_print_uuids(mdev, "attached to UUIDs"); - - wake_up(&mdev->misc_wait); - wake_up(&mdev->state_wait); - - /* aborted verify run. log the last position */ - if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && - ns.conn < C_CONNECTED) { - mdev->ov_start_sector = - BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); - dev_info(DEV, "Online Verify reached sector %llu\n", - (unsigned long long)mdev->ov_start_sector); - } - - if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && - (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) { - dev_info(DEV, "Syncer continues.\n"); - mdev->rs_paused += (long)jiffies - -(long)mdev->rs_mark_time[mdev->rs_last_mark]; - if (ns.conn == C_SYNC_TARGET) - mod_timer(&mdev->resync_timer, jiffies); - } - - if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) && - (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) { - dev_info(DEV, "Resync suspended\n"); - mdev->rs_mark_time[mdev->rs_last_mark] = jiffies; - } - - if (os.conn == C_CONNECTED && - (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) { - unsigned long now = jiffies; - int i; - - set_ov_position(mdev, ns.conn); - mdev->rs_start = now; - mdev->rs_last_events = 0; - mdev->rs_last_sect_ev = 0; - mdev->ov_last_oos_size = 0; - mdev->ov_last_oos_start = 0; - - for (i = 0; i < DRBD_SYNC_MARKS; i++) { - mdev->rs_mark_left[i] = mdev->ov_left; - mdev->rs_mark_time[i] = now; - } - - drbd_rs_controller_reset(mdev); - - if (ns.conn == C_VERIFY_S) { - dev_info(DEV, "Starting Online Verify from sector %llu\n", - (unsigned long long)mdev->ov_position); - mod_timer(&mdev->resync_timer, jiffies); - } - } - - if (get_ldev(mdev)) { - u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| - MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| - MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); - - if (test_bit(CRASHED_PRIMARY, &mdev->flags)) - mdf |= MDF_CRASHED_PRIMARY; - if (mdev->state.role == R_PRIMARY || - (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY)) - mdf |= MDF_PRIMARY_IND; - if (mdev->state.conn > C_WF_REPORT_PARAMS) - mdf |= MDF_CONNECTED_IND; - if (mdev->state.disk > D_INCONSISTENT) - mdf |= MDF_CONSISTENT; - if (mdev->state.disk > D_OUTDATED) - mdf |= MDF_WAS_UP_TO_DATE; - if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT) - mdf |= MDF_PEER_OUT_DATED; - if (mdf != mdev->ldev->md.flags) { - mdev->ldev->md.flags = mdf; - drbd_md_mark_dirty(mdev); - } - if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT) - drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]); - put_ldev(mdev); - } - - /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */ - if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT && - os.peer == R_SECONDARY && ns.peer == R_PRIMARY) - set_bit(CONSIDER_RESYNC, &mdev->flags); - - /* Receiver should clean up itself */ - if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) - drbd_thread_stop_nowait(&mdev->tconn->receiver); - - /* Now the receiver finished cleaning up itself, it should die */ - if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) - drbd_thread_stop_nowait(&mdev->tconn->receiver); - - /* Upon network failure, we need to restart the receiver. */ - if (os.conn > C_TEAR_DOWN && - ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) - drbd_thread_restart_nowait(&mdev->tconn->receiver); - - /* Resume AL writing if we get a connection */ - if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) - drbd_resume_al(mdev); - - ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); - if (ascw) { - ascw->os = os; - ascw->ns = ns; - ascw->flags = flags; - ascw->w.cb = w_after_state_ch; - ascw->done = done; - drbd_queue_work(&mdev->tconn->data.work, &ascw->w); - } else { - dev_warn(DEV, "Could not kmalloc an ascw\n"); - } - - return rv; -} - -static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused) -{ - struct after_state_chg_work *ascw = - container_of(w, struct after_state_chg_work, w); - after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags); - if (ascw->flags & CS_WAIT_COMPLETE) { - D_ASSERT(ascw->done != NULL); - complete(ascw->done); - } - kfree(ascw); - - return 1; -} - -static void abw_start_sync(struct drbd_conf *mdev, int rv) -{ - if (rv) { - dev_err(DEV, "Writing the bitmap failed not starting resync.\n"); - _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE); - return; - } - - switch (mdev->state.conn) { - case C_STARTING_SYNC_T: - _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); - break; - case C_STARTING_SYNC_S: - drbd_start_resync(mdev, C_SYNC_SOURCE); - break; - } -} - -int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, - int (*io_fn)(struct drbd_conf *), - char *why, enum bm_flag flags) -{ - int rv; - - D_ASSERT(current == mdev->tconn->worker.task); - - /* open coded non-blocking drbd_suspend_io(mdev); */ - set_bit(SUSPEND_IO, &mdev->flags); - - drbd_bm_lock(mdev, why, flags); - rv = io_fn(mdev); - drbd_bm_unlock(mdev); - - drbd_resume_io(mdev); - - return rv; -} - -/** - * after_state_ch() - Perform after state change actions that may sleep - * @mdev: DRBD device. - * @os: old state. - * @ns: new state. - * @flags: Flags - */ -static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, enum chg_state_flags flags) -{ - enum drbd_fencing_p fp; - enum drbd_req_event what = NOTHING; - union drbd_state nsm = (union drbd_state){ .i = -1 }; - - if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { - clear_bit(CRASHED_PRIMARY, &mdev->flags); - if (mdev->p_uuid) - mdev->p_uuid[UI_FLAGS] &= ~((u64)2); - } - - fp = FP_DONT_CARE; - if (get_ldev(mdev)) { - fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - } - - /* Inform userspace about the change... */ - drbd_bcast_state(mdev, ns); - - if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) && - (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) - drbd_khelper(mdev, "pri-on-incon-degr"); - - /* Here we have the actions that are performed after a - state change. This function might sleep */ - - nsm.i = -1; - if (ns.susp_nod) { - if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) - what = RESEND; - - if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) - what = RESTART_FROZEN_DISK_IO; - - if (what != NOTHING) - nsm.susp_nod = 0; - } - - if (ns.susp_fen) { - /* case1: The outdate peer handler is successful: */ - if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { - tl_clear(mdev); - if (test_bit(NEW_CUR_UUID, &mdev->flags)) { - drbd_uuid_new_current(mdev); - clear_bit(NEW_CUR_UUID, &mdev->flags); - } - spin_lock_irq(&mdev->tconn->req_lock); - _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->tconn->req_lock); - } - /* case2: The connection was established again: */ - if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { - clear_bit(NEW_CUR_UUID, &mdev->flags); - what = RESEND; - nsm.susp_fen = 0; - } - } - - if (what != NOTHING) { - spin_lock_irq(&mdev->tconn->req_lock); - _tl_restart(mdev, what); - nsm.i &= mdev->state.i; - _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL); - spin_unlock_irq(&mdev->tconn->req_lock); - } - - /* Became sync source. With protocol >= 96, we still need to send out - * the sync uuid now. Need to do that before any drbd_send_state, or - * the other side may go "paused sync" before receiving the sync uuids, - * which is unexpected. */ - if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && - (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && - mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) { - drbd_gen_and_send_sync_uuid(mdev); - put_ldev(mdev); - } - - /* Do not change the order of the if above and the two below... */ - if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ - drbd_send_uuids(mdev); - drbd_send_state(mdev); - } - /* No point in queuing send_bitmap if we don't have a connection - * anymore, so check also the _current_ state, not only the new state - * at the time this work was queued. */ - if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S && - mdev->state.conn == C_WF_BITMAP_S) - drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, - "send_bitmap (WFBitMapS)", - BM_LOCKED_TEST_ALLOWED); - - /* Lost contact to peer's copy of the data */ - if ((os.pdsk >= D_INCONSISTENT && - os.pdsk != D_UNKNOWN && - os.pdsk != D_OUTDATED) - && (ns.pdsk < D_INCONSISTENT || - ns.pdsk == D_UNKNOWN || - ns.pdsk == D_OUTDATED)) { - if (get_ldev(mdev)) { - if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && - mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { - if (is_susp(mdev->state)) { - set_bit(NEW_CUR_UUID, &mdev->flags); - } else { - drbd_uuid_new_current(mdev); - drbd_send_uuids(mdev); - } - } - put_ldev(mdev); - } - } - - if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { - if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) { - drbd_uuid_new_current(mdev); - drbd_send_uuids(mdev); - } - - /* D_DISKLESS Peer becomes secondary */ - if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) - /* We may still be Primary ourselves. - * No harm done if the bitmap still changes, - * redirtied pages will follow later. */ - drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, - "demote diskless peer", BM_LOCKED_SET_ALLOWED); - put_ldev(mdev); - } - - /* Write out all changed bits on demote. - * Though, no need to da that just yet - * if there is a resync going on still */ - if (os.role == R_PRIMARY && ns.role == R_SECONDARY && - mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { - /* No changes to the bitmap expected this time, so assert that, - * even though no harm was done if it did change. */ - drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, - "demote", BM_LOCKED_TEST_ALLOWED); - put_ldev(mdev); - } - - /* Last part of the attaching process ... */ - if (ns.conn >= C_CONNECTED && - os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { - drbd_send_sizes(mdev, 0, 0); /* to start sync... */ - drbd_send_uuids(mdev); - drbd_send_state(mdev); - } - - /* We want to pause/continue resync, tell peer. */ - if (ns.conn >= C_CONNECTED && - ((os.aftr_isp != ns.aftr_isp) || - (os.user_isp != ns.user_isp))) - drbd_send_state(mdev); - - /* In case one of the isp bits got set, suspend other devices. */ - if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && - (ns.aftr_isp || ns.peer_isp || ns.user_isp)) - suspend_other_sg(mdev); - - /* Make sure the peer gets informed about eventual state - changes (ISP bits) while we were in WFReportParams. */ - if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) - drbd_send_state(mdev); - - if (os.conn != C_AHEAD && ns.conn == C_AHEAD) - drbd_send_state(mdev); - - /* We are in the progress to start a full sync... */ - if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || - (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) - /* no other bitmap changes expected during this phase */ - drbd_queue_bitmap_io(mdev, - &drbd_bmio_set_n_write, &abw_start_sync, - "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); - - /* We are invalidating our self... */ - if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && - os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) - /* other bitmap operation expected during this phase */ - drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, - "set_n_write from invalidate", BM_LOCKED_MASK); - - /* first half of local IO error, failure to attach, - * or administrative detach */ - if (os.disk != D_FAILED && ns.disk == D_FAILED) { - enum drbd_io_error_p eh; - int was_io_error; - /* corresponding get_ldev was in __drbd_set_state, to serialize - * our cleanup here with the transition to D_DISKLESS, - * so it is safe to dreference ldev here. */ - eh = mdev->ldev->dc.on_io_error; - was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); - - /* current state still has to be D_FAILED, - * there is only one way out: to D_DISKLESS, - * and that may only happen after our put_ldev below. */ - if (mdev->state.disk != D_FAILED) - dev_err(DEV, - "ASSERT FAILED: disk is %s during detach\n", - drbd_disk_str(mdev->state.disk)); - - if (drbd_send_state(mdev)) - dev_warn(DEV, "Notified peer that I am detaching my disk\n"); - else - dev_err(DEV, "Sending state for detaching disk failed\n"); - - drbd_rs_cancel_all(mdev); - - /* In case we want to get something to stable storage still, - * this may be the last chance. - * Following put_ldev may transition to D_DISKLESS. */ - drbd_md_sync(mdev); - put_ldev(mdev); - - if (was_io_error && eh == EP_CALL_HELPER) - drbd_khelper(mdev, "local-io-error"); - } - - /* second half of local IO error, failure to attach, - * or administrative detach, - * after local_cnt references have reached zero again */ - if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) { - /* We must still be diskless, - * re-attach has to be serialized with this! */ - if (mdev->state.disk != D_DISKLESS) - dev_err(DEV, - "ASSERT FAILED: disk is %s while going diskless\n", - drbd_disk_str(mdev->state.disk)); - - mdev->rs_total = 0; - mdev->rs_failed = 0; - atomic_set(&mdev->rs_pending_cnt, 0); - - if (drbd_send_state(mdev)) - dev_warn(DEV, "Notified peer that I'm now diskless.\n"); - /* corresponding get_ldev in __drbd_set_state - * this may finally trigger drbd_ldev_destroy. */ - put_ldev(mdev); - } - - /* Notify peer that I had a local IO error, and did not detached.. */ - if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) - drbd_send_state(mdev); - - /* Disks got bigger while they were detached */ - if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && - test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { - if (ns.conn == C_CONNECTED) - resync_after_online_grow(mdev); - } - - /* A resync finished or aborted, wake paused devices... */ - if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) || - (os.peer_isp && !ns.peer_isp) || - (os.user_isp && !ns.user_isp)) - resume_next_sg(mdev); - - /* sync target done with resync. Explicitly notify peer, even though - * it should (at least for non-empty resyncs) already know itself. */ - if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) - drbd_send_state(mdev); - - /* This triggers bitmap writeout of potentially still unwritten pages - * if the resync finished cleanly, or aborted because of peer disk - * failure, or because of connection loss. - * For resync aborted because of local disk failure, we cannot do - * any bitmap writeout anymore. - * No harm done if some bits change during this phase. - */ - if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { - drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, - "write from resync_finished", BM_LOCKED_SET_ALLOWED); - put_ldev(mdev); - } - - /* Upon network connection, we need to start the receiver */ - if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) - drbd_thread_start(&mdev->tconn->receiver); - - /* Terminate worker thread if we are unconfigured - it will be - restarted as needed... */ - if (ns.disk == D_DISKLESS && - ns.conn == C_STANDALONE && - ns.role == R_SECONDARY) { - if (os.aftr_isp != ns.aftr_isp) - resume_next_sg(mdev); - /* set in __drbd_set_state, unless CONFIG_PENDING was set */ - if (test_bit(DEVICE_DYING, &mdev->flags)) - drbd_thread_stop_nowait(&mdev->tconn->worker); - } - - drbd_md_sync(mdev); -} - - static int drbd_thread_setup(void *arg) { struct drbd_thread *thi = (struct drbd_thread *) arg; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c new file mode 100644 index 000000000000..38d330b7b662 --- /dev/null +++ b/drivers/block/drbd/drbd_state.c @@ -0,0 +1,1217 @@ +/* + drbd_state.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. + Copyright (C) 1999-2008, Philipp Reisner . + Copyright (C) 2002-2008, Lars Ellenberg . + + Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev + from Logicworks, Inc. for making SDP replication support possible. + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include "drbd_int.h" +#include "drbd_req.h" + +struct after_state_chg_work { + struct drbd_work w; + union drbd_state os; + union drbd_state ns; + enum chg_state_flags flags; + struct completion *done; +}; + + +extern void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); +int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state); +static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, enum chg_state_flags flags); +static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, + union drbd_state ns, enum chg_state_flags flags); +static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); +static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, const char **warn_sync_abort); + +/** + * cl_wide_st_chg() - true if the state change is a cluster wide one + * @mdev: DRBD device. + * @os: old (current) state. + * @ns: new (wanted) state. + */ +static int cl_wide_st_chg(struct drbd_conf *mdev, + union drbd_state os, union drbd_state ns) +{ + return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED && + ((os.role != R_PRIMARY && ns.role == R_PRIMARY) || + (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || + (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || + (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || + (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) || + (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); +} + +enum drbd_state_rv +drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, + union drbd_state mask, union drbd_state val) +{ + unsigned long flags; + union drbd_state os, ns; + enum drbd_state_rv rv; + + spin_lock_irqsave(&mdev->tconn->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + rv = _drbd_set_state(mdev, ns, f, NULL); + ns = mdev->state; + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + + return rv; +} + +/** + * drbd_force_state() - Impose a change which happens outside our control on our state + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + */ +void drbd_force_state(struct drbd_conf *mdev, + union drbd_state mask, union drbd_state val) +{ + drbd_change_state(mdev, CS_HARD, mask, val); +} + +static enum drbd_state_rv +_req_st_cond(struct drbd_conf *mdev, union drbd_state mask, + union drbd_state val) +{ + union drbd_state os, ns; + unsigned long flags; + enum drbd_state_rv rv; + + if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags)) + return SS_CW_SUCCESS; + + if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) + return SS_CW_FAILED_BY_PEER; + + rv = 0; + spin_lock_irqsave(&mdev->tconn->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + ns = sanitize_state(mdev, os, ns, NULL); + + if (!cl_wide_st_chg(mdev, os, ns)) + rv = SS_CW_NO_NEED; + if (!rv) { + rv = is_valid_state(mdev, ns); + if (rv == SS_SUCCESS) { + rv = is_valid_soft_transition(os, ns); + if (rv == SS_SUCCESS) + rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ + } + } + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + + return rv; +} + +/** + * drbd_req_state() - Perform an eventually cluster wide state change + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + * @f: flags + * + * Should not be called directly, use drbd_request_state() or + * _drbd_request_state(). + */ +static enum drbd_state_rv +drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, + union drbd_state val, enum chg_state_flags f) +{ + struct completion done; + unsigned long flags; + union drbd_state os, ns; + enum drbd_state_rv rv; + + init_completion(&done); + + if (f & CS_SERIALIZE) + mutex_lock(&mdev->state_mutex); + + spin_lock_irqsave(&mdev->tconn->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + + ns = sanitize_state(mdev, os, ns, NULL); + + if (cl_wide_st_chg(mdev, os, ns)) { + rv = is_valid_state(mdev, ns); + if (rv == SS_SUCCESS) + rv = is_valid_soft_transition(os, ns); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + + if (rv < SS_SUCCESS) { + if (f & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + goto abort; + } + + drbd_state_lock(mdev); + if (!drbd_send_state_req(mdev, mask, val)) { + drbd_state_unlock(mdev); + rv = SS_CW_FAILED_BY_PEER; + if (f & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + goto abort; + } + + wait_event(mdev->state_wait, + (rv = _req_st_cond(mdev, mask, val))); + + if (rv < SS_SUCCESS) { + drbd_state_unlock(mdev); + if (f & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + goto abort; + } + spin_lock_irqsave(&mdev->tconn->req_lock, flags); + os = mdev->state; + ns.i = (os.i & ~mask.i) | val.i; + rv = _drbd_set_state(mdev, ns, f, &done); + drbd_state_unlock(mdev); + } else { + rv = _drbd_set_state(mdev, ns, f, &done); + } + + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + + if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) { + D_ASSERT(current != mdev->tconn->worker.task); + wait_for_completion(&done); + } + +abort: + if (f & CS_SERIALIZE) + mutex_unlock(&mdev->state_mutex); + + return rv; +} + +/** + * _drbd_request_state() - Request a state change (with flags) + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + * @f: flags + * + * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE + * flag, or when logging of failed state change requests is not desired. + */ +enum drbd_state_rv +_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask, + union drbd_state val, enum chg_state_flags f) +{ + enum drbd_state_rv rv; + + wait_event(mdev->state_wait, + (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE); + + return rv; +} + +static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) +{ + dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", + name, + drbd_conn_str(ns.conn), + drbd_role_str(ns.role), + drbd_role_str(ns.peer), + drbd_disk_str(ns.disk), + drbd_disk_str(ns.pdsk), + is_susp(ns) ? 's' : 'r', + ns.aftr_isp ? 'a' : '-', + ns.peer_isp ? 'p' : '-', + ns.user_isp ? 'u' : '-', + ns.susp_fen ? 'F' : '-', + ns.susp_nod ? 'N' : '-' + ); +} + +void print_st_err(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, enum drbd_state_rv err) +{ + if (err == SS_IN_TRANSIENT_STATE) + return; + dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err)); + print_st(mdev, " state", os); + print_st(mdev, "wanted", ns); +} + + +/** + * is_valid_state() - Returns an SS_ error code if ns is not valid + * @mdev: DRBD device. + * @ns: State to consider. + */ +static enum drbd_state_rv +is_valid_state(struct drbd_conf *mdev, union drbd_state ns) +{ + /* See drbd_state_sw_errors in drbd_strings.c */ + + enum drbd_fencing_p fp; + enum drbd_state_rv rv = SS_SUCCESS; + + fp = FP_DONT_CARE; + if (get_ldev(mdev)) { + fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + } + + if (get_net_conf(mdev->tconn)) { + if (!mdev->tconn->net_conf->two_primaries && + ns.role == R_PRIMARY && ns.peer == R_PRIMARY) + rv = SS_TWO_PRIMARIES; + put_net_conf(mdev->tconn); + } + + if (rv <= 0) + /* already found a reason to abort */; + else if (ns.role == R_SECONDARY && mdev->open_cnt) + rv = SS_DEVICE_IN_USE; + + else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE) + rv = SS_NO_UP_TO_DATE_DISK; + + else if (fp >= FP_RESOURCE && + ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN) + rv = SS_PRIMARY_NOP; + + else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT) + rv = SS_NO_UP_TO_DATE_DISK; + + else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT) + rv = SS_NO_LOCAL_DISK; + + else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT) + rv = SS_NO_REMOTE_DISK; + + else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) + rv = SS_NO_UP_TO_DATE_DISK; + + else if ((ns.conn == C_CONNECTED || + ns.conn == C_WF_BITMAP_S || + ns.conn == C_SYNC_SOURCE || + ns.conn == C_PAUSED_SYNC_S) && + ns.disk == D_OUTDATED) + rv = SS_CONNECTED_OUTDATES; + + else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && + (mdev->sync_conf.verify_alg[0] == 0)) + rv = SS_NO_VERIFY_ALG; + + else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && + mdev->tconn->agreed_pro_version < 88) + rv = SS_NOT_SUPPORTED; + + else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN) + rv = SS_CONNECTED_OUTDATES; + + return rv; +} + +/** + * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible + * @mdev: DRBD device. + * @ns: new state. + * @os: old state. + */ +static enum drbd_state_rv +is_valid_soft_transition(union drbd_state os, union drbd_state ns) +{ + enum drbd_state_rv rv = SS_SUCCESS; + + if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) && + os.conn > C_CONNECTED) + rv = SS_RESYNC_RUNNING; + + if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE) + rv = SS_ALREADY_STANDALONE; + + if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS) + rv = SS_IS_DISKLESS; + + if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED) + rv = SS_NO_NET_CONFIG; + + if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING) + rv = SS_LOWER_THAN_OUTDATED; + + if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED) + rv = SS_IN_TRANSIENT_STATE; + + if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) + rv = SS_IN_TRANSIENT_STATE; + + if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) + rv = SS_NEED_CONNECTION; + + if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && + ns.conn != os.conn && os.conn > C_CONNECTED) + rv = SS_RESYNC_RUNNING; + + if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) && + os.conn < C_CONNECTED) + rv = SS_NEED_CONNECTION; + + if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE) + && os.conn < C_WF_REPORT_PARAMS) + rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */ + + return rv; +} + +/** + * sanitize_state() - Resolves implicitly necessary additional changes to a state transition + * @mdev: DRBD device. + * @os: old state. + * @ns: new state. + * @warn_sync_abort: + * + * When we loose connection, we have to set the state of the peers disk (pdsk) + * to D_UNKNOWN. This rule and many more along those lines are in this function. + */ +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, const char **warn_sync_abort) +{ + enum drbd_fencing_p fp; + enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; + + fp = FP_DONT_CARE; + if (get_ldev(mdev)) { + fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + } + + /* Disallow Network errors to configure a device's network part */ + if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && + os.conn <= C_DISCONNECTING) + ns.conn = os.conn; + + /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow. + * If you try to go into some Sync* state, that shall fail (elsewhere). */ + if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && + ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN) + ns.conn = os.conn; + + /* we cannot fail (again) if we already detached */ + if (ns.disk == D_FAILED && os.disk == D_DISKLESS) + ns.disk = D_DISKLESS; + + /* if we are only D_ATTACHING yet, + * we can (and should) go directly to D_DISKLESS. */ + if (ns.disk == D_FAILED && os.disk == D_ATTACHING) + ns.disk = D_DISKLESS; + + /* After C_DISCONNECTING only C_STANDALONE may follow */ + if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) + ns.conn = os.conn; + + if (ns.conn < C_CONNECTED) { + ns.peer_isp = 0; + ns.peer = R_UNKNOWN; + if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT) + ns.pdsk = D_UNKNOWN; + } + + /* Clear the aftr_isp when becoming unconfigured */ + if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) + ns.aftr_isp = 0; + + /* Abort resync if a disk fails/detaches */ + if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && + (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { + if (warn_sync_abort) + *warn_sync_abort = + os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? + "Online-verify" : "Resync"; + ns.conn = C_CONNECTED; + } + + /* Connection breaks down before we finished "Negotiating" */ + if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING && + get_ldev_if_state(mdev, D_NEGOTIATING)) { + if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) { + ns.disk = mdev->new_state_tmp.disk; + ns.pdsk = mdev->new_state_tmp.pdsk; + } else { + dev_alert(DEV, "Connection lost while negotiating, no data!\n"); + ns.disk = D_DISKLESS; + ns.pdsk = D_UNKNOWN; + } + put_ldev(mdev); + } + + /* D_CONSISTENT and D_OUTDATED vanish when we get connected */ + if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) { + if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) + ns.disk = D_UP_TO_DATE; + if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED) + ns.pdsk = D_UP_TO_DATE; + } + + /* Implications of the connection stat on the disk states */ + disk_min = D_DISKLESS; + disk_max = D_UP_TO_DATE; + pdsk_min = D_INCONSISTENT; + pdsk_max = D_UNKNOWN; + switch ((enum drbd_conns)ns.conn) { + case C_WF_BITMAP_T: + case C_PAUSED_SYNC_T: + case C_STARTING_SYNC_T: + case C_WF_SYNC_UUID: + case C_BEHIND: + disk_min = D_INCONSISTENT; + disk_max = D_OUTDATED; + pdsk_min = D_UP_TO_DATE; + pdsk_max = D_UP_TO_DATE; + break; + case C_VERIFY_S: + case C_VERIFY_T: + disk_min = D_UP_TO_DATE; + disk_max = D_UP_TO_DATE; + pdsk_min = D_UP_TO_DATE; + pdsk_max = D_UP_TO_DATE; + break; + case C_CONNECTED: + disk_min = D_DISKLESS; + disk_max = D_UP_TO_DATE; + pdsk_min = D_DISKLESS; + pdsk_max = D_UP_TO_DATE; + break; + case C_WF_BITMAP_S: + case C_PAUSED_SYNC_S: + case C_STARTING_SYNC_S: + case C_AHEAD: + disk_min = D_UP_TO_DATE; + disk_max = D_UP_TO_DATE; + pdsk_min = D_INCONSISTENT; + pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/ + break; + case C_SYNC_TARGET: + disk_min = D_INCONSISTENT; + disk_max = D_INCONSISTENT; + pdsk_min = D_UP_TO_DATE; + pdsk_max = D_UP_TO_DATE; + break; + case C_SYNC_SOURCE: + disk_min = D_UP_TO_DATE; + disk_max = D_UP_TO_DATE; + pdsk_min = D_INCONSISTENT; + pdsk_max = D_INCONSISTENT; + break; + case C_STANDALONE: + case C_DISCONNECTING: + case C_UNCONNECTED: + case C_TIMEOUT: + case C_BROKEN_PIPE: + case C_NETWORK_FAILURE: + case C_PROTOCOL_ERROR: + case C_TEAR_DOWN: + case C_WF_CONNECTION: + case C_WF_REPORT_PARAMS: + case C_MASK: + break; + } + if (ns.disk > disk_max) + ns.disk = disk_max; + + if (ns.disk < disk_min) { + dev_warn(DEV, "Implicitly set disk from %s to %s\n", + drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); + ns.disk = disk_min; + } + if (ns.pdsk > pdsk_max) + ns.pdsk = pdsk_max; + + if (ns.pdsk < pdsk_min) { + dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", + drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); + ns.pdsk = pdsk_min; + } + + if (fp == FP_STONITH && + (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && + !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) + ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ + + if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && + (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) && + !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE)) + ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ + + if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { + if (ns.conn == C_SYNC_SOURCE) + ns.conn = C_PAUSED_SYNC_S; + if (ns.conn == C_SYNC_TARGET) + ns.conn = C_PAUSED_SYNC_T; + } else { + if (ns.conn == C_PAUSED_SYNC_S) + ns.conn = C_SYNC_SOURCE; + if (ns.conn == C_PAUSED_SYNC_T) + ns.conn = C_SYNC_TARGET; + } + + return ns; +} + +void drbd_resume_al(struct drbd_conf *mdev) +{ + if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags)) + dev_info(DEV, "Resumed AL updates\n"); +} + +/* helper for __drbd_set_state */ +static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs) +{ + if (mdev->tconn->agreed_pro_version < 90) + mdev->ov_start_sector = 0; + mdev->rs_total = drbd_bm_bits(mdev); + mdev->ov_position = 0; + if (cs == C_VERIFY_T) { + /* starting online verify from an arbitrary position + * does not fit well into the existing protocol. + * on C_VERIFY_T, we initialize ov_left and friends + * implicitly in receive_DataRequest once the + * first P_OV_REQUEST is received */ + mdev->ov_start_sector = ~(sector_t)0; + } else { + unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector); + if (bit >= mdev->rs_total) { + mdev->ov_start_sector = + BM_BIT_TO_SECT(mdev->rs_total - 1); + mdev->rs_total = 1; + } else + mdev->rs_total -= bit; + mdev->ov_position = mdev->ov_start_sector; + } + mdev->ov_left = mdev->rs_total; +} + +/** + * __drbd_set_state() - Set a new DRBD state + * @mdev: DRBD device. + * @ns: new state. + * @flags: Flags + * @done: Optional completion, that will get completed after the after_state_ch() finished + * + * Caller needs to hold req_lock, and global_state_lock. Do not call directly. + */ +enum drbd_state_rv +__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, + enum chg_state_flags flags, struct completion *done) +{ + union drbd_state os; + enum drbd_state_rv rv = SS_SUCCESS; + const char *warn_sync_abort = NULL; + struct after_state_chg_work *ascw; + + os = mdev->state; + + ns = sanitize_state(mdev, os, ns, &warn_sync_abort); + + if (ns.i == os.i) + return SS_NOTHING_TO_DO; + + if (!(flags & CS_HARD)) { + /* pre-state-change checks ; only look at ns */ + /* See drbd_state_sw_errors in drbd_strings.c */ + + rv = is_valid_state(mdev, ns); + if (rv < SS_SUCCESS) { + /* If the old state was illegal as well, then let + this happen...*/ + + if (is_valid_state(mdev, os) == rv) + rv = is_valid_soft_transition(os, ns); + } else + rv = is_valid_soft_transition(os, ns); + } + + if (rv < SS_SUCCESS) { + if (flags & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + return rv; + } + + if (warn_sync_abort) + dev_warn(DEV, "%s aborted.\n", warn_sync_abort); + + { + char *pbp, pb[300]; + pbp = pb; + *pbp = 0; + if (ns.role != os.role) + pbp += sprintf(pbp, "role( %s -> %s ) ", + drbd_role_str(os.role), + drbd_role_str(ns.role)); + if (ns.peer != os.peer) + pbp += sprintf(pbp, "peer( %s -> %s ) ", + drbd_role_str(os.peer), + drbd_role_str(ns.peer)); + if (ns.conn != os.conn) + pbp += sprintf(pbp, "conn( %s -> %s ) ", + drbd_conn_str(os.conn), + drbd_conn_str(ns.conn)); + if (ns.disk != os.disk) + pbp += sprintf(pbp, "disk( %s -> %s ) ", + drbd_disk_str(os.disk), + drbd_disk_str(ns.disk)); + if (ns.pdsk != os.pdsk) + pbp += sprintf(pbp, "pdsk( %s -> %s ) ", + drbd_disk_str(os.pdsk), + drbd_disk_str(ns.pdsk)); + if (is_susp(ns) != is_susp(os)) + pbp += sprintf(pbp, "susp( %d -> %d ) ", + is_susp(os), + is_susp(ns)); + if (ns.aftr_isp != os.aftr_isp) + pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", + os.aftr_isp, + ns.aftr_isp); + if (ns.peer_isp != os.peer_isp) + pbp += sprintf(pbp, "peer_isp( %d -> %d ) ", + os.peer_isp, + ns.peer_isp); + if (ns.user_isp != os.user_isp) + pbp += sprintf(pbp, "user_isp( %d -> %d ) ", + os.user_isp, + ns.user_isp); + dev_info(DEV, "%s\n", pb); + } + + /* solve the race between becoming unconfigured, + * worker doing the cleanup, and + * admin reconfiguring us: + * on (re)configure, first set CONFIG_PENDING, + * then wait for a potentially exiting worker, + * start the worker, and schedule one no_op. + * then proceed with configuration. + */ + if (ns.disk == D_DISKLESS && + ns.conn == C_STANDALONE && + ns.role == R_SECONDARY && + !test_and_set_bit(CONFIG_PENDING, &mdev->flags)) + set_bit(DEVICE_DYING, &mdev->flags); + + /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference + * on the ldev here, to be sure the transition -> D_DISKLESS resp. + * drbd_ldev_destroy() won't happen before our corresponding + * after_state_ch works run, where we put_ldev again. */ + if ((os.disk != D_FAILED && ns.disk == D_FAILED) || + (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) + atomic_inc(&mdev->local_cnt); + + mdev->state = ns; + + if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) + drbd_print_uuids(mdev, "attached to UUIDs"); + + wake_up(&mdev->misc_wait); + wake_up(&mdev->state_wait); + + /* aborted verify run. log the last position */ + if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && + ns.conn < C_CONNECTED) { + mdev->ov_start_sector = + BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left); + dev_info(DEV, "Online Verify reached sector %llu\n", + (unsigned long long)mdev->ov_start_sector); + } + + if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && + (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) { + dev_info(DEV, "Syncer continues.\n"); + mdev->rs_paused += (long)jiffies + -(long)mdev->rs_mark_time[mdev->rs_last_mark]; + if (ns.conn == C_SYNC_TARGET) + mod_timer(&mdev->resync_timer, jiffies); + } + + if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) && + (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) { + dev_info(DEV, "Resync suspended\n"); + mdev->rs_mark_time[mdev->rs_last_mark] = jiffies; + } + + if (os.conn == C_CONNECTED && + (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) { + unsigned long now = jiffies; + int i; + + set_ov_position(mdev, ns.conn); + mdev->rs_start = now; + mdev->rs_last_events = 0; + mdev->rs_last_sect_ev = 0; + mdev->ov_last_oos_size = 0; + mdev->ov_last_oos_start = 0; + + for (i = 0; i < DRBD_SYNC_MARKS; i++) { + mdev->rs_mark_left[i] = mdev->ov_left; + mdev->rs_mark_time[i] = now; + } + + drbd_rs_controller_reset(mdev); + + if (ns.conn == C_VERIFY_S) { + dev_info(DEV, "Starting Online Verify from sector %llu\n", + (unsigned long long)mdev->ov_position); + mod_timer(&mdev->resync_timer, jiffies); + } + } + + if (get_ldev(mdev)) { + u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND| + MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE| + MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY); + + if (test_bit(CRASHED_PRIMARY, &mdev->flags)) + mdf |= MDF_CRASHED_PRIMARY; + if (mdev->state.role == R_PRIMARY || + (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY)) + mdf |= MDF_PRIMARY_IND; + if (mdev->state.conn > C_WF_REPORT_PARAMS) + mdf |= MDF_CONNECTED_IND; + if (mdev->state.disk > D_INCONSISTENT) + mdf |= MDF_CONSISTENT; + if (mdev->state.disk > D_OUTDATED) + mdf |= MDF_WAS_UP_TO_DATE; + if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT) + mdf |= MDF_PEER_OUT_DATED; + if (mdf != mdev->ldev->md.flags) { + mdev->ldev->md.flags = mdf; + drbd_md_mark_dirty(mdev); + } + if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT) + drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]); + put_ldev(mdev); + } + + /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */ + if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT && + os.peer == R_SECONDARY && ns.peer == R_PRIMARY) + set_bit(CONSIDER_RESYNC, &mdev->flags); + + /* Receiver should clean up itself */ + if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) + drbd_thread_stop_nowait(&mdev->tconn->receiver); + + /* Now the receiver finished cleaning up itself, it should die */ + if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) + drbd_thread_stop_nowait(&mdev->tconn->receiver); + + /* Upon network failure, we need to restart the receiver. */ + if (os.conn > C_TEAR_DOWN && + ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) + drbd_thread_restart_nowait(&mdev->tconn->receiver); + + /* Resume AL writing if we get a connection */ + if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) + drbd_resume_al(mdev); + + ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); + if (ascw) { + ascw->os = os; + ascw->ns = ns; + ascw->flags = flags; + ascw->w.cb = w_after_state_ch; + ascw->done = done; + drbd_queue_work(&mdev->tconn->data.work, &ascw->w); + } else { + dev_warn(DEV, "Could not kmalloc an ascw\n"); + } + + return rv; +} + +static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused) +{ + struct after_state_chg_work *ascw = + container_of(w, struct after_state_chg_work, w); + + after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags); + if (ascw->flags & CS_WAIT_COMPLETE) { + D_ASSERT(ascw->done != NULL); + complete(ascw->done); + } + kfree(ascw); + + return 1; +} + +static void abw_start_sync(struct drbd_conf *mdev, int rv) +{ + if (rv) { + dev_err(DEV, "Writing the bitmap failed not starting resync.\n"); + _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE); + return; + } + + switch (mdev->state.conn) { + case C_STARTING_SYNC_T: + _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE); + break; + case C_STARTING_SYNC_S: + drbd_start_resync(mdev, C_SYNC_SOURCE); + break; + } +} + +int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, + int (*io_fn)(struct drbd_conf *), + char *why, enum bm_flag flags) +{ + int rv; + + D_ASSERT(current == mdev->tconn->worker.task); + + /* open coded non-blocking drbd_suspend_io(mdev); */ + set_bit(SUSPEND_IO, &mdev->flags); + + drbd_bm_lock(mdev, why, flags); + rv = io_fn(mdev); + drbd_bm_unlock(mdev); + + drbd_resume_io(mdev); + + return rv; +} + +/** + * after_state_ch() - Perform after state change actions that may sleep + * @mdev: DRBD device. + * @os: old state. + * @ns: new state. + * @flags: Flags + */ +static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, enum chg_state_flags flags) +{ + enum drbd_fencing_p fp; + enum drbd_req_event what = NOTHING; + union drbd_state nsm = (union drbd_state){ .i = -1 }; + + if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { + clear_bit(CRASHED_PRIMARY, &mdev->flags); + if (mdev->p_uuid) + mdev->p_uuid[UI_FLAGS] &= ~((u64)2); + } + + fp = FP_DONT_CARE; + if (get_ldev(mdev)) { + fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + } + + /* Inform userspace about the change... */ + drbd_bcast_state(mdev, ns); + + if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) && + (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) + drbd_khelper(mdev, "pri-on-incon-degr"); + + /* Here we have the actions that are performed after a + state change. This function might sleep */ + + nsm.i = -1; + if (ns.susp_nod) { + if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) + what = RESEND; + + if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) + what = RESTART_FROZEN_DISK_IO; + + if (what != NOTHING) + nsm.susp_nod = 0; + } + + if (ns.susp_fen) { + /* case1: The outdate peer handler is successful: */ + if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { + tl_clear(mdev); + if (test_bit(NEW_CUR_UUID, &mdev->flags)) { + drbd_uuid_new_current(mdev); + clear_bit(NEW_CUR_UUID, &mdev->flags); + } + spin_lock_irq(&mdev->tconn->req_lock); + _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); + spin_unlock_irq(&mdev->tconn->req_lock); + } + /* case2: The connection was established again: */ + if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { + clear_bit(NEW_CUR_UUID, &mdev->flags); + what = RESEND; + nsm.susp_fen = 0; + } + } + + if (what != NOTHING) { + spin_lock_irq(&mdev->tconn->req_lock); + _tl_restart(mdev, what); + nsm.i &= mdev->state.i; + _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL); + spin_unlock_irq(&mdev->tconn->req_lock); + } + + /* Became sync source. With protocol >= 96, we still need to send out + * the sync uuid now. Need to do that before any drbd_send_state, or + * the other side may go "paused sync" before receiving the sync uuids, + * which is unexpected. */ + if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && + (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && + mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) { + drbd_gen_and_send_sync_uuid(mdev); + put_ldev(mdev); + } + + /* Do not change the order of the if above and the two below... */ + if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ + drbd_send_uuids(mdev); + drbd_send_state(mdev); + } + /* No point in queuing send_bitmap if we don't have a connection + * anymore, so check also the _current_ state, not only the new state + * at the time this work was queued. */ + if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S && + mdev->state.conn == C_WF_BITMAP_S) + drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, + "send_bitmap (WFBitMapS)", + BM_LOCKED_TEST_ALLOWED); + + /* Lost contact to peer's copy of the data */ + if ((os.pdsk >= D_INCONSISTENT && + os.pdsk != D_UNKNOWN && + os.pdsk != D_OUTDATED) + && (ns.pdsk < D_INCONSISTENT || + ns.pdsk == D_UNKNOWN || + ns.pdsk == D_OUTDATED)) { + if (get_ldev(mdev)) { + if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && + mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { + if (is_susp(mdev->state)) { + set_bit(NEW_CUR_UUID, &mdev->flags); + } else { + drbd_uuid_new_current(mdev); + drbd_send_uuids(mdev); + } + } + put_ldev(mdev); + } + } + + if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { + if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) { + drbd_uuid_new_current(mdev); + drbd_send_uuids(mdev); + } + + /* D_DISKLESS Peer becomes secondary */ + if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) + /* We may still be Primary ourselves. + * No harm done if the bitmap still changes, + * redirtied pages will follow later. */ + drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, + "demote diskless peer", BM_LOCKED_SET_ALLOWED); + put_ldev(mdev); + } + + /* Write out all changed bits on demote. + * Though, no need to da that just yet + * if there is a resync going on still */ + if (os.role == R_PRIMARY && ns.role == R_SECONDARY && + mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { + /* No changes to the bitmap expected this time, so assert that, + * even though no harm was done if it did change. */ + drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, + "demote", BM_LOCKED_TEST_ALLOWED); + put_ldev(mdev); + } + + /* Last part of the attaching process ... */ + if (ns.conn >= C_CONNECTED && + os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { + drbd_send_sizes(mdev, 0, 0); /* to start sync... */ + drbd_send_uuids(mdev); + drbd_send_state(mdev); + } + + /* We want to pause/continue resync, tell peer. */ + if (ns.conn >= C_CONNECTED && + ((os.aftr_isp != ns.aftr_isp) || + (os.user_isp != ns.user_isp))) + drbd_send_state(mdev); + + /* In case one of the isp bits got set, suspend other devices. */ + if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && + (ns.aftr_isp || ns.peer_isp || ns.user_isp)) + suspend_other_sg(mdev); + + /* Make sure the peer gets informed about eventual state + changes (ISP bits) while we were in WFReportParams. */ + if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) + drbd_send_state(mdev); + + if (os.conn != C_AHEAD && ns.conn == C_AHEAD) + drbd_send_state(mdev); + + /* We are in the progress to start a full sync... */ + if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || + (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) + /* no other bitmap changes expected during this phase */ + drbd_queue_bitmap_io(mdev, + &drbd_bmio_set_n_write, &abw_start_sync, + "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); + + /* We are invalidating our self... */ + if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && + os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) + /* other bitmap operation expected during this phase */ + drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, + "set_n_write from invalidate", BM_LOCKED_MASK); + + /* first half of local IO error, failure to attach, + * or administrative detach */ + if (os.disk != D_FAILED && ns.disk == D_FAILED) { + enum drbd_io_error_p eh; + int was_io_error; + /* corresponding get_ldev was in __drbd_set_state, to serialize + * our cleanup here with the transition to D_DISKLESS, + * so it is safe to dreference ldev here. */ + eh = mdev->ldev->dc.on_io_error; + was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + + /* current state still has to be D_FAILED, + * there is only one way out: to D_DISKLESS, + * and that may only happen after our put_ldev below. */ + if (mdev->state.disk != D_FAILED) + dev_err(DEV, + "ASSERT FAILED: disk is %s during detach\n", + drbd_disk_str(mdev->state.disk)); + + if (drbd_send_state(mdev)) + dev_warn(DEV, "Notified peer that I am detaching my disk\n"); + else + dev_err(DEV, "Sending state for detaching disk failed\n"); + + drbd_rs_cancel_all(mdev); + + /* In case we want to get something to stable storage still, + * this may be the last chance. + * Following put_ldev may transition to D_DISKLESS. */ + drbd_md_sync(mdev); + put_ldev(mdev); + + if (was_io_error && eh == EP_CALL_HELPER) + drbd_khelper(mdev, "local-io-error"); + } + + /* second half of local IO error, failure to attach, + * or administrative detach, + * after local_cnt references have reached zero again */ + if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) { + /* We must still be diskless, + * re-attach has to be serialized with this! */ + if (mdev->state.disk != D_DISKLESS) + dev_err(DEV, + "ASSERT FAILED: disk is %s while going diskless\n", + drbd_disk_str(mdev->state.disk)); + + mdev->rs_total = 0; + mdev->rs_failed = 0; + atomic_set(&mdev->rs_pending_cnt, 0); + + if (drbd_send_state(mdev)) + dev_warn(DEV, "Notified peer that I'm now diskless.\n"); + /* corresponding get_ldev in __drbd_set_state + * this may finally trigger drbd_ldev_destroy. */ + put_ldev(mdev); + } + + /* Notify peer that I had a local IO error, and did not detached.. */ + if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) + drbd_send_state(mdev); + + /* Disks got bigger while they were detached */ + if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && + test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) { + if (ns.conn == C_CONNECTED) + resync_after_online_grow(mdev); + } + + /* A resync finished or aborted, wake paused devices... */ + if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) || + (os.peer_isp && !ns.peer_isp) || + (os.user_isp && !ns.user_isp)) + resume_next_sg(mdev); + + /* sync target done with resync. Explicitly notify peer, even though + * it should (at least for non-empty resyncs) already know itself. */ + if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) + drbd_send_state(mdev); + + /* This triggers bitmap writeout of potentially still unwritten pages + * if the resync finished cleanly, or aborted because of peer disk + * failure, or because of connection loss. + * For resync aborted because of local disk failure, we cannot do + * any bitmap writeout anymore. + * No harm done if some bits change during this phase. + */ + if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { + drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, + "write from resync_finished", BM_LOCKED_SET_ALLOWED); + put_ldev(mdev); + } + + if (ns.disk == D_DISKLESS && + ns.conn == C_STANDALONE && + ns.role == R_SECONDARY) { + if (os.aftr_isp != ns.aftr_isp) + resume_next_sg(mdev); + } + + after_conn_state_ch(mdev->tconn, os, ns, flags); + drbd_md_sync(mdev); +} + +static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, + union drbd_state ns, enum chg_state_flags flags) +{ + /* Upon network configuration, we need to start the receiver */ + if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) + drbd_thread_start(&tconn->receiver); + + if (ns.disk == D_DISKLESS && + ns.conn == C_STANDALONE && + ns.role == R_SECONDARY) { + /* if (test_bit(DEVICE_DYING, &mdev->flags)) TODO: DEVICE_DYING functionality */ + drbd_thread_stop_nowait(&tconn->worker); + } +} diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h new file mode 100644 index 000000000000..3ec26e2c4c40 --- /dev/null +++ b/drivers/block/drbd/drbd_state.h @@ -0,0 +1,101 @@ +#ifndef DRBD_STATE_H +#define DRBD_STATE_H + +struct drbd_conf; + +/** + * DOC: DRBD State macros + * + * These macros are used to express state changes in easily readable form. + * + * The NS macros expand to a mask and a value, that can be bit ored onto the + * current state as soon as the spinlock (req_lock) was taken. + * + * The _NS macros are used for state functions that get called with the + * spinlock. These macros expand directly to the new state value. + * + * Besides the basic forms NS() and _NS() additional _?NS[23] are defined + * to express state changes that affect more than one aspect of the state. + * + * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY) + * Means that the network connection was established and that the peer + * is in secondary role. + */ +#define role_MASK R_MASK +#define peer_MASK R_MASK +#define disk_MASK D_MASK +#define pdsk_MASK D_MASK +#define conn_MASK C_MASK +#define susp_MASK 1 +#define user_isp_MASK 1 +#define aftr_isp_MASK 1 +#define susp_nod_MASK 1 +#define susp_fen_MASK 1 + +#define NS(T, S) \ + ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T = (S); val; }) +#define NS2(T1, S1, T2, S2) \ + ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ + mask.T2 = T2##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ + val.T2 = (S2); val; }) +#define NS3(T1, S1, T2, S2, T3, S3) \ + ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ + mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \ + ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ + val.T2 = (S2); val.T3 = (S3); val; }) + +#define _NS(D, T, S) \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; }) +#define _NS2(D, T1, S1, T2, S2) \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + __ns.T2 = (S2); __ns; }) +#define _NS3(D, T1, S1, T2, S2, T3, S3) \ + D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ + __ns.T2 = (S2); __ns.T3 = (S3); __ns; }) + +enum chg_state_flags { + CS_HARD = 1, + CS_VERBOSE = 2, + CS_WAIT_COMPLETE = 4, + CS_SERIALIZE = 8, + CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, +}; + +extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, + enum chg_state_flags f, + union drbd_state mask, + union drbd_state val); +extern void drbd_force_state(struct drbd_conf *, union drbd_state, + union drbd_state); +extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *, + union drbd_state, + union drbd_state, + enum chg_state_flags); +extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state, + enum chg_state_flags, + struct completion *done); +extern void print_st_err(struct drbd_conf *, union drbd_state, + union drbd_state, int); + +extern void drbd_resume_al(struct drbd_conf *mdev); + +/** + * drbd_request_state() - Reqest a state change + * @mdev: DRBD device. + * @mask: mask of state bits to change. + * @val: value of new state bits. + * + * This is the most graceful way of requesting a state change. It is verbose + * quite verbose in case the state change is not possible, and all those + * state changes are globally serialized. + */ +static inline int drbd_request_state(struct drbd_conf *mdev, + union drbd_state mask, + union drbd_state val) +{ + return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED); +} + +#endif From bed879ae905190028a90d53493c4f75dcd78f44d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 4 Feb 2011 14:00:37 +0100 Subject: [PATCH 0062/5831] drbd: Moved the thread name into the data structure Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 20 +++++--------- drivers/block/drbd/drbd_int.h | 2 ++ drivers/block/drbd/drbd_main.c | 46 +++++++++++++++++++------------- 3 files changed, 36 insertions(+), 32 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 4da4c322fa56..e85221f22adc 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -119,13 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) if (!__ratelimit(&drbd_ratelimit_state)) return; dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", - current == mdev->tconn->receiver.task ? "receiver" : - current == mdev->tconn->asender.task ? "asender" : - current == mdev->tconn->worker.task ? "worker" : current->comm, - func, b->bm_why ?: "?", - b->bm_task == mdev->tconn->receiver.task ? "receiver" : - b->bm_task == mdev->tconn->asender.task ? "asender" : - b->bm_task == mdev->tconn->worker.task ? "worker" : "?"); + drbd_task_to_thread_name(mdev, current), + func, b->bm_why ?: "?", + drbd_task_to_thread_name(mdev, b->bm_task)); } void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) @@ -142,13 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) if (trylock_failed) { dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", - current == mdev->tconn->receiver.task ? "receiver" : - current == mdev->tconn->asender.task ? "asender" : - current == mdev->tconn->worker.task ? "worker" : current->comm, - why, b->bm_why ?: "?", - b->bm_task == mdev->tconn->receiver.task ? "receiver" : - b->bm_task == mdev->tconn->asender.task ? "asender" : - b->bm_task == mdev->tconn->worker.task ? "worker" : "?"); + drbd_task_to_thread_name(mdev, current), + why, b->bm_why ?: "?", + drbd_task_to_thread_name(mdev, b->bm_task)); mutex_lock(&b->bm_change); } if (BM_LOCKED_MASK & b->bm_flags) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 98addab2c928..7beb374451b3 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -616,6 +616,7 @@ struct drbd_thread { int (*function) (struct drbd_thread *); struct drbd_conf *mdev; int reset_cpu_mask; + char name[9]; }; static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) @@ -1130,6 +1131,7 @@ enum dds_flags { extern void drbd_init_set_defaults(struct drbd_conf *mdev); extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); +extern char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task); #ifdef CONFIG_SMP extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev); extern void drbd_calc_cpu_mask(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4b39b3d0dd55..852a3e3fbb76 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -462,7 +462,7 @@ restart: */ if (thi->t_state == RESTARTING) { - dev_info(DEV, "Restarting %s\n", current->comm); + dev_info(DEV, "Restarting %s thread\n", thi->name); thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); goto restart; @@ -482,13 +482,14 @@ restart: } static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi, - int (*func) (struct drbd_thread *)) + int (*func) (struct drbd_thread *), char *name) { spin_lock_init(&thi->t_lock); thi->task = NULL; thi->t_state = NONE; thi->function = func; thi->mdev = mdev; + strncpy(thi->name, name, ARRAY_SIZE(thi->name)); } int drbd_thread_start(struct drbd_thread *thi) @@ -497,11 +498,6 @@ int drbd_thread_start(struct drbd_thread *thi) struct task_struct *nt; unsigned long flags; - const char *me = - thi == &mdev->tconn->receiver ? "receiver" : - thi == &mdev->tconn->asender ? "asender" : - thi == &mdev->tconn->worker ? "worker" : "NONSENSE"; - /* is used from state engine doing drbd_thread_stop_nowait, * while holding the req lock irqsave */ spin_lock_irqsave(&thi->t_lock, flags); @@ -509,7 +505,7 @@ int drbd_thread_start(struct drbd_thread *thi) switch (thi->t_state) { case NONE: dev_info(DEV, "Starting %s thread (from %s [%d])\n", - me, current->comm, current->pid); + thi->name, current->comm, current->pid); /* Get ref on module for thread - this is released when thread exits */ if (!try_module_get(THIS_MODULE)) { @@ -526,7 +522,7 @@ int drbd_thread_start(struct drbd_thread *thi) flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ nt = kthread_create(drbd_thread_setup, (void *) thi, - "drbd%d_%s", mdev_to_minor(mdev), me); + "drbd%d_%s", mdev_to_minor(mdev), thi->name); if (IS_ERR(nt)) { dev_err(DEV, "Couldn't start thread\n"); @@ -543,7 +539,7 @@ int drbd_thread_start(struct drbd_thread *thi) case EXITING: thi->t_state = RESTARTING; dev_info(DEV, "Restarting %s thread (from %s [%d])\n", - me, current->comm, current->pid); + thi->name, current->comm, current->pid); /* fall through */ case RUNNING: case RESTARTING: @@ -592,6 +588,23 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) wait_for_completion(&thi->stop); } +static struct drbd_thread *drbd_task_to_thread(struct drbd_conf *mdev, struct task_struct *task) +{ + struct drbd_tconn *tconn = mdev->tconn; + struct drbd_thread *thi = + task == tconn->receiver.task ? &tconn->receiver : + task == tconn->asender.task ? &tconn->asender : + task == tconn->worker.task ? &tconn->worker : NULL; + + return thi; +} + +char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task) +{ + struct drbd_thread *thi = drbd_task_to_thread(mdev, task); + return thi ? thi->name : task->comm; +} + #ifdef CONFIG_SMP /** * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs @@ -629,11 +642,8 @@ void drbd_calc_cpu_mask(struct drbd_conf *mdev) void drbd_thread_current_set_cpu(struct drbd_conf *mdev) { struct task_struct *p = current; - struct drbd_thread *thi = - p == mdev->tconn->asender.task ? &mdev->tconn->asender : - p == mdev->tconn->receiver.task ? &mdev->tconn->receiver : - p == mdev->tconn->worker.task ? &mdev->tconn->worker : - NULL; + struct drbd_thread *thi = drbd_task_to_thread(mdev, p); + if (!expect(thi != NULL)) return; if (!thi->reset_cpu_mask) @@ -1848,9 +1858,9 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) init_waitqueue_head(&mdev->al_wait); init_waitqueue_head(&mdev->seq_wait); - drbd_thread_init(mdev, &mdev->tconn->receiver, drbdd_init); - drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker); - drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender); + drbd_thread_init(mdev, &mdev->tconn->receiver, drbdd_init, "receiver"); + drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker, "worker"); + drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender, "asender"); /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */ mdev->write_ordering = WO_bdev_flush; From bc31fe3352f9cd76195ce6eb638dfc2dac17dc2e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 11:14:38 +0100 Subject: [PATCH 0063/5831] drbd: Eliminated the user of drbd_task_to_thread() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 6 ++---- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7beb374451b3..9a351a2cab7c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1133,10 +1133,10 @@ extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); extern char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task); #ifdef CONFIG_SMP -extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev); +extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev, struct drbd_thread *thi); extern void drbd_calc_cpu_mask(struct drbd_conf *mdev); #else -#define drbd_thread_current_set_cpu(A) ({}) +#define drbd_thread_current_set_cpu(A, B) ({}) #define drbd_calc_cpu_mask(A) ({}) #endif extern void drbd_free_resources(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 852a3e3fbb76..ae995ed0e6f9 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -635,17 +635,15 @@ void drbd_calc_cpu_mask(struct drbd_conf *mdev) /** * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread * @mdev: DRBD device. + * @thi: drbd_thread object * * call in the "main loop" of _all_ threads, no need for any mutex, current won't die * prematurely. */ -void drbd_thread_current_set_cpu(struct drbd_conf *mdev) +void drbd_thread_current_set_cpu(struct drbd_conf *mdev, struct drbd_thread *thi) { struct task_struct *p = current; - struct drbd_thread *thi = drbd_task_to_thread(mdev, p); - if (!expect(thi != NULL)) - return; if (!thi->reset_cpu_mask) return; thi->reset_cpu_mask = 0; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 3a9cd31e094b..dfb59671ff1c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3773,7 +3773,7 @@ static void drbdd(struct drbd_conf *mdev) int rv; while (get_t_state(&mdev->tconn->receiver) == RUNNING) { - drbd_thread_current_set_cpu(mdev); + drbd_thread_current_set_cpu(mdev, &mdev->tconn->receiver); if (!drbd_recv_header(mdev, &cmd, &packet_size)) goto err_out; @@ -4564,7 +4564,7 @@ int drbd_asender(struct drbd_thread *thi) current->rt_priority = 2; /* more important than all other tasks */ while (get_t_state(thi) == RUNNING) { - drbd_thread_current_set_cpu(mdev); + drbd_thread_current_set_cpu(mdev, thi); if (test_and_clear_bit(SEND_PING, &mdev->flags)) { if (!drbd_send_ping(mdev)) { dev_err(DEV, "drbd_send_ping has failed\n"); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index f13d56c2bf05..0dbd20ca6306 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1621,7 +1621,7 @@ int drbd_worker(struct drbd_thread *thi) sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev)); while (get_t_state(thi) == RUNNING) { - drbd_thread_current_set_cpu(mdev); + drbd_thread_current_set_cpu(mdev, thi); if (down_trylock(&mdev->tconn->data.work.s)) { mutex_lock(&mdev->tconn->data.mutex); From 1f04af33fe7db542d75a487b8381b5a3402b7896 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 11:33:59 +0100 Subject: [PATCH 0064/5831] drbd: Moved code Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 43 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 0dbd20ca6306..28925d3d1a2f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -707,28 +707,6 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca return 1; } - -void start_resync_timer_fn(unsigned long data) -{ - struct drbd_conf *mdev = (struct drbd_conf *) data; - - drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work); -} - -int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) -{ - if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) { - dev_warn(DEV, "w_start_resync later...\n"); - mdev->start_resync_timer.expires = jiffies + HZ/10; - add_timer(&mdev->start_resync_timer); - return 1; - } - - drbd_start_resync(mdev, C_SYNC_SOURCE); - clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); - return 1; -} - int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { kfree(w); @@ -1462,6 +1440,27 @@ void drbd_rs_controller_reset(struct drbd_conf *mdev) spin_unlock(&mdev->peer_seq_lock); } +void start_resync_timer_fn(unsigned long data) +{ + struct drbd_conf *mdev = (struct drbd_conf *) data; + + drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work); +} + +int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +{ + if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) { + dev_warn(DEV, "w_start_resync later...\n"); + mdev->start_resync_timer.expires = jiffies + HZ/10; + add_timer(&mdev->start_resync_timer); + return 1; + } + + drbd_start_resync(mdev, C_SYNC_SOURCE); + clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); + return 1; +} + /** * drbd_start_resync() - Start the resync process * @mdev: DRBD device. From e64a32945902a178c9de9b38e0ea3290981605bc Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Sat, 5 Feb 2011 17:34:11 +0100 Subject: [PATCH 0065/5831] drbd: Do no sleep long in drbd_start_resync Work items that sleep too long can cause requests to take as long as the longest sleeping work item. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_worker.c | 58 ++++++++++++++++++++------------ 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 9a351a2cab7c..eec36af56744 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -787,6 +787,7 @@ enum { NEW_CUR_UUID, /* Create new current UUID when thawing IO */ AL_SUSPENDED, /* Activity logging is currently suspended. */ AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ + B_RS_H_DONE, /* Before resync handler done (already executed) */ }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 28925d3d1a2f..a705979c71f8 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1487,35 +1487,49 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) Ahead/Behind and SyncSource/SyncTarget */ } - if (side == C_SYNC_TARGET) { - /* Since application IO was locked out during C_WF_BITMAP_T and - C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET - we check that we might make the data inconsistent. */ - r = drbd_khelper(mdev, "before-resync-target"); - r = (r >> 8) & 0xff; - if (r > 0) { - dev_info(DEV, "before-resync-target handler returned %d, " - "dropping connection.\n", r); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); - return; - } - } else /* C_SYNC_SOURCE */ { - r = drbd_khelper(mdev, "before-resync-source"); - r = (r >> 8) & 0xff; - if (r > 0) { - if (r == 3) { - dev_info(DEV, "before-resync-source handler returned %d, " - "ignoring. Old userland tools?", r); - } else { - dev_info(DEV, "before-resync-source handler returned %d, " + if (!test_bit(B_RS_H_DONE, &mdev->flags)) { + if (side == C_SYNC_TARGET) { + /* Since application IO was locked out during C_WF_BITMAP_T and + C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET + we check that we might make the data inconsistent. */ + r = drbd_khelper(mdev, "before-resync-target"); + r = (r >> 8) & 0xff; + if (r > 0) { + dev_info(DEV, "before-resync-target handler returned %d, " "dropping connection.\n", r); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return; } + } else /* C_SYNC_SOURCE */ { + r = drbd_khelper(mdev, "before-resync-source"); + r = (r >> 8) & 0xff; + if (r > 0) { + if (r == 3) { + dev_info(DEV, "before-resync-source handler returned %d, " + "ignoring. Old userland tools?", r); + } else { + dev_info(DEV, "before-resync-source handler returned %d, " + "dropping connection.\n", r); + drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + return; + } + } } } - drbd_state_lock(mdev); + if (current == mdev->tconn->worker.task) { + /* The worker should not sleep waiting for drbd_state_lock(), + that can take long */ + if (test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { + set_bit(B_RS_H_DONE, &mdev->flags); + mdev->start_resync_timer.expires = jiffies + HZ/5; + add_timer(&mdev->start_resync_timer); + return; + } + } else { + drbd_state_lock(mdev); + } + clear_bit(B_RS_H_DONE, &mdev->flags); if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { drbd_state_unlock(mdev); From 8ea62f546487bc3f4e9a343ec82e5e03d9a3fe06 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Sat, 5 Feb 2011 17:56:05 +0100 Subject: [PATCH 0066/5831] drbd: Revert "Make sure we dont send state if a cluster wide state change is in progress" This reverts commit 6e9fdc92b77915d5c7ab8fea751f48378f8b0080. 1) This did not fixed the issue 2) Long sleeping work items can cause IO requests to take as long as the longest work item Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ae995ed0e6f9..f33ca43659e2 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -951,10 +951,6 @@ int drbd_send_state(struct drbd_conf *mdev) struct p_state p; int ok = 0; - /* Grab state lock so we wont send state if we're in the middle - * of a cluster wide state change on another thread */ - drbd_state_lock(mdev); - mutex_lock(&mdev->tconn->data.mutex); p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */ @@ -966,7 +962,6 @@ int drbd_send_state(struct drbd_conf *mdev) mutex_unlock(&mdev->tconn->data.mutex); - drbd_state_unlock(mdev); return ok; } From b53339fce2a692bf5f7cb583be2685444d52ded9 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 27 Jan 2011 14:37:23 +0100 Subject: [PATCH 0067/5831] drbd: Moving state related macros to drbd_state.h Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 52 ----------------------------------- 1 file changed, 52 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index eec36af56744..0afc83abc6f1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1603,58 +1603,6 @@ void drbd_bcast_ee(struct drbd_conf *, const char *, const int, const char *, const char *, const struct drbd_peer_request *); -/** - * DOC: DRBD State macros - * - * These macros are used to express state changes in easily readable form. - * - * The NS macros expand to a mask and a value, that can be bit ored onto the - * current state as soon as the spinlock (req_lock) was taken. - * - * The _NS macros are used for state functions that get called with the - * spinlock. These macros expand directly to the new state value. - * - * Besides the basic forms NS() and _NS() additional _?NS[23] are defined - * to express state changes that affect more than one aspect of the state. - * - * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY) - * Means that the network connection was established and that the peer - * is in secondary role. - */ -#define role_MASK R_MASK -#define peer_MASK R_MASK -#define disk_MASK D_MASK -#define pdsk_MASK D_MASK -#define conn_MASK C_MASK -#define susp_MASK 1 -#define user_isp_MASK 1 -#define aftr_isp_MASK 1 -#define susp_nod_MASK 1 -#define susp_fen_MASK 1 - -#define NS(T, S) \ - ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ - ({ union drbd_state val; val.i = 0; val.T = (S); val; }) -#define NS2(T1, S1, T2, S2) \ - ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ - mask.T2 = T2##_MASK; mask; }), \ - ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ - val.T2 = (S2); val; }) -#define NS3(T1, S1, T2, S2, T3, S3) \ - ({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \ - mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \ - ({ union drbd_state val; val.i = 0; val.T1 = (S1); \ - val.T2 = (S2); val.T3 = (S3); val; }) - -#define _NS(D, T, S) \ - D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; }) -#define _NS2(D, T1, S1, T2, S2) \ - D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ - __ns.T2 = (S2); __ns; }) -#define _NS3(D, T1, S1, T2, S2, T3, S3) \ - D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \ - __ns.T2 = (S2); __ns.T3 = (S3); __ns; }) - /* * inline helper functions *************************/ From 60ae496626ca62e82b23977ace2e96c4e152edd1 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:01:51 +0100 Subject: [PATCH 0068/5831] drbd: conn_printk() a dev_printk() alike for drbd's connections Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 ++++++++++ drivers/block/drbd/drbd_main.c | 4 +++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0afc83abc6f1..c8a9b5003ae8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -102,6 +102,16 @@ struct drbd_tconn; /* to shorten dev_warn(DEV, "msg"); and relatives statements */ #define DEV (disk_to_dev(mdev->vdisk)) +#define conn_printk(LEVEL, TCONN, FMT, ARGS...) \ + printk(LEVEL "d-con %s: " FMT, TCONN->name , ## ARGS) +#define conn_alert(TCONN, FMT, ARGS...) conn_printk(KERN_ALERT, TCONN, FMT, ## ARGS) +#define conn_crit(TCONN, FMT, ARGS...) conn_printk(KERN_CRIT, TCONN, FMT, ## ARGS) +#define conn_err(TCONN, FMT, ARGS...) conn_printk(KERN_ERR, TCONN, FMT, ## ARGS) +#define conn_warn(TCONN, FMT, ARGS...) conn_printk(KERN_WARNING, TCONN, FMT, ## ARGS) +#define conn_notice(TCONN, FMT, ARGS...) conn_printk(KERN_NOTICE, TCONN, FMT, ## ARGS) +#define conn_info(TCONN, FMT, ARGS...) conn_printk(KERN_INFO, TCONN, FMT, ## ARGS) +#define conn_dbg(TCONN, FMT, ARGS...) conn_printk(KERN_DEBUG, TCONN, FMT, ## ARGS) + #define D_ASSERT(exp) if (!(exp)) \ dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f33ca43659e2..541e35dbd6c9 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2217,12 +2217,14 @@ struct drbd_conf *drbd_new_device(unsigned int minor) struct drbd_conf *mdev; struct gendisk *disk; struct request_queue *q; + char conn_name[9]; /* drbd1234N */ /* GFP_KERNEL, we are outside of all write-out paths */ mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); if (!mdev) return NULL; - mdev->tconn = drbd_new_tconn("dummy"); + sprintf(conn_name, "drbd%d", minor); + mdev->tconn = drbd_new_tconn(conn_name); if (!mdev->tconn) goto out_no_tconn; From eac3e990e40616da1dc52212bc0631f2d029b026 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:05:07 +0100 Subject: [PATCH 0069/5831] drbd: Converted drbd_try_connect() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index dfb59671ff1c..2c5ca8c3029a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -572,7 +572,7 @@ static void drbd_setbufsize(struct socket *sock, unsigned int snd, } } -static struct socket *drbd_try_connect(struct drbd_conf *mdev) +static struct socket *drbd_try_connect(struct drbd_tconn *tconn) { const char *what; struct socket *sock; @@ -580,11 +580,11 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) int err; int disconnect_on_error = 1; - if (!get_net_conf(mdev->tconn)) + if (!get_net_conf(tconn)) return NULL; what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family, + err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (err < 0) { sock = NULL; @@ -592,9 +592,9 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) } sock->sk->sk_rcvtimeo = - sock->sk->sk_sndtimeo = mdev->tconn->net_conf->try_connect_int*HZ; - drbd_setbufsize(sock, mdev->tconn->net_conf->sndbuf_size, - mdev->tconn->net_conf->rcvbuf_size); + sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ; + drbd_setbufsize(sock, tconn->net_conf->sndbuf_size, + tconn->net_conf->rcvbuf_size); /* explicitly bind to the configured IP as source IP * for the outgoing connections. @@ -603,9 +603,9 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) * Make sure to use 0 as port number, so linux selects * a free one dynamically. */ - memcpy(&src_in6, mdev->tconn->net_conf->my_addr, - min_t(int, mdev->tconn->net_conf->my_addr_len, sizeof(src_in6))); - if (((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family == AF_INET6) + memcpy(&src_in6, tconn->net_conf->my_addr, + min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6))); + if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6) src_in6.sin6_port = 0; else ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */ @@ -613,7 +613,7 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) what = "bind before connect"; err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, - mdev->tconn->net_conf->my_addr_len); + tconn->net_conf->my_addr_len); if (err < 0) goto out; @@ -622,8 +622,8 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) disconnect_on_error = 0; what = "connect"; err = sock->ops->connect(sock, - (struct sockaddr *)mdev->tconn->net_conf->peer_addr, - mdev->tconn->net_conf->peer_addr_len, 0); + (struct sockaddr *)tconn->net_conf->peer_addr, + tconn->net_conf->peer_addr_len, 0); out: if (err < 0) { @@ -641,12 +641,12 @@ out: disconnect_on_error = 0; break; default: - dev_err(DEV, "%s failed, err = %d\n", what, err); + conn_err(tconn, "%s failed, err = %d\n", what, err); } if (disconnect_on_error) - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); } - put_net_conf(mdev->tconn); + put_net_conf(tconn); return sock; } @@ -774,7 +774,7 @@ static int drbd_connect(struct drbd_conf *mdev) do { for (try = 0;;) { /* 3 tries, this should take less than a second! */ - s = drbd_try_connect(mdev); + s = drbd_try_connect(mdev->tconn); if (s || ++try >= 3) break; /* give the other side time to call bind() & listen() */ From 7653620de341f45dc259d74d79c8d85df7e11326 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:09:54 +0100 Subject: [PATCH 0070/5831] drbd: Converted drbd_wait_for_connect() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2c5ca8c3029a..2e58d00742d9 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -447,8 +447,7 @@ void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) /* see also kernel_accept; which is only present since 2.6.18. * also we want to log which part of it failed, exactly */ -static int drbd_accept(struct drbd_conf *mdev, const char **what, - struct socket *sock, struct socket **newsock) +static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock) { struct sock *sk = sock->sk; int err = 0; @@ -650,51 +649,51 @@ out: return sock; } -static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) +static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn) { int timeo, err; struct socket *s_estab = NULL, *s_listen; const char *what; - if (!get_net_conf(mdev->tconn)) + if (!get_net_conf(tconn)) return NULL; what = "sock_create_kern"; - err = sock_create_kern(((struct sockaddr *)mdev->tconn->net_conf->my_addr)->sa_family, + err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family, SOCK_STREAM, IPPROTO_TCP, &s_listen); if (err) { s_listen = NULL; goto out; } - timeo = mdev->tconn->net_conf->try_connect_int * HZ; + timeo = tconn->net_conf->try_connect_int * HZ; timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */ s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ s_listen->sk->sk_rcvtimeo = timeo; s_listen->sk->sk_sndtimeo = timeo; - drbd_setbufsize(s_listen, mdev->tconn->net_conf->sndbuf_size, - mdev->tconn->net_conf->rcvbuf_size); + drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size, + tconn->net_conf->rcvbuf_size); what = "bind before listen"; err = s_listen->ops->bind(s_listen, - (struct sockaddr *) mdev->tconn->net_conf->my_addr, - mdev->tconn->net_conf->my_addr_len); + (struct sockaddr *) tconn->net_conf->my_addr, + tconn->net_conf->my_addr_len); if (err < 0) goto out; - err = drbd_accept(mdev, &what, s_listen, &s_estab); + err = drbd_accept(&what, s_listen, &s_estab); out: if (s_listen) sock_release(s_listen); if (err < 0) { if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { - dev_err(DEV, "%s failed, err = %d\n", what, err); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_err(tconn, "%s failed, err = %d\n", what, err); + drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); } } - put_net_conf(mdev->tconn); + put_net_conf(tconn); return s_estab; } @@ -805,7 +804,7 @@ static int drbd_connect(struct drbd_conf *mdev) } retry: - s = drbd_wait_for_connect(mdev); + s = drbd_wait_for_connect(mdev->tconn); if (s) { try = drbd_recv_fp(mdev, s); drbd_socket_okay(mdev, &sock); From 01a311a589709d83a1f2b7d2587e171d97f12017 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:30:33 +0100 Subject: [PATCH 0071/5831] drbd: Started to separated connection flags (tconn) from block device flags (mdev) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 10 +++++++--- drivers/block/drbd/drbd_main.c | 8 ++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index c8a9b5003ae8..f46571acd4d0 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -754,7 +754,7 @@ enum { #define EE_WAS_ERROR (1<<__EE_WAS_ERROR) #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) -/* global flag bits */ +/* flag bits per mdev */ enum { CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */ SIGNAL_ASENDER, /* whether asender wants to be interrupted */ @@ -782,8 +782,6 @@ enum { GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ WAS_IO_ERROR, /* Local disk failed returned IO error */ RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ - NET_CONGESTED, /* The data socket is congested */ - CONFIG_PENDING, /* serialization of (re)configuration requests. * if set, also prevents the device from dying */ DEVICE_DYING, /* device became unconfigured, @@ -910,10 +908,16 @@ struct fifo_buffer { unsigned int size; }; +/* flag bits per tconn */ +enum { + NET_CONGESTED, /* The data socket is congested */ +}; + struct drbd_tconn { /* is a resource from the config file */ char *name; /* Resource name */ struct list_head all_tconn; /* List of all drbd_tconn, prot by global_state_lock */ struct drbd_conf *volume0; /* TODO: Remove me again */ + unsigned long flags; struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ atomic_t net_cnt; /* Users of net_conf */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 541e35dbd6c9..8b443c8b13b3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1358,7 +1358,7 @@ static void drbd_update_congested(struct drbd_conf *mdev) { struct sock *sk = mdev->tconn->data.socket->sk; if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) - set_bit(NET_CONGESTED, &mdev->flags); + set_bit(NET_CONGESTED, &mdev->tconn->flags); } /* The idea of sendpage seems to be to put some kind of reference @@ -1431,7 +1431,7 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, offset += sent; } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/); set_fs(oldfs); - clear_bit(NET_CONGESTED, &mdev->flags); + clear_bit(NET_CONGESTED, &mdev->tconn->flags); ok = (len == 0); if (likely(ok)) @@ -1694,7 +1694,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, } while (sent < size); if (sock == mdev->tconn->data.socket) - clear_bit(NET_CONGESTED, &mdev->flags); + clear_bit(NET_CONGESTED, &mdev->tconn->flags); if (rv <= 0) { if (rv != -EAGAIN) { @@ -2161,7 +2161,7 @@ static int drbd_congested(void *congested_data, int bdi_bits) reason = 'b'; } - if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) { + if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->tconn->flags)) { r |= (1 << BDI_async_congested); reason = reason == 'b' ? 'a' : 'n'; } From 25703f832000393721641772a827469d46b1105b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:35:25 +0100 Subject: [PATCH 0072/5831] drbd: Moved DISCARD_CONCURRENT to the per connection (tconn) flags Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f46571acd4d0..2b8566362b78 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -763,7 +763,6 @@ enum { UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ MD_DIRTY, /* current uuids and flags not yet on disk */ - DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */ CLUSTER_ST_CHANGE, /* Cluster wide state change going on... */ CL_ST_CHG_SUCCESS, @@ -911,6 +910,7 @@ struct fifo_buffer { /* flag bits per tconn */ enum { NET_CONGESTED, /* The data socket is congested */ + DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ }; struct drbd_tconn { /* is a resource from the config file */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index e30d52ba3fcf..fda399ace8dd 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1637,7 +1637,7 @@ void resync_after_online_grow(struct drbd_conf *mdev) if (mdev->state.role != mdev->state.peer) iass = (mdev->state.role == R_PRIMARY); else - iass = test_bit(DISCARD_CONCURRENT, &mdev->flags); + iass = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); if (iass) drbd_start_resync(mdev, C_SYNC_SOURCE); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2e58d00742d9..e2eed149bb9c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -762,7 +762,7 @@ static int drbd_connect(struct drbd_conf *mdev) if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) return -2; - clear_bit(DISCARD_CONCURRENT, &mdev->flags); + clear_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); mdev->tconn->agreed_pro_version = 99; /* agreed_pro_version must be smaller than 100 so we send the old header (h80) in the first packet and in the handshake packet. */ @@ -823,7 +823,7 @@ retry: sock_release(msock); } msock = s; - set_bit(DISCARD_CONCURRENT, &mdev->flags); + set_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); break; default: dev_warn(DEV, "Error receiving initial packet\n"); @@ -1779,7 +1779,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, /* don't get the req_lock yet, * we may sleep in drbd_wait_peer_seq */ const int size = peer_req->i.size; - const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags); + const int discard = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); DEFINE_WAIT(wait); int first; @@ -2239,7 +2239,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) "Using discard-least-changes instead\n"); case ASB_DISCARD_ZERO_CHG: if (ch_peer == 0 && ch_self == 0) { - rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) + rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) ? -1 : 1; break; } else { @@ -2255,7 +2255,7 @@ static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local) rv = 1; else /* ( ch_self == ch_peer ) */ /* Well, then use something else. */ - rv = test_bit(DISCARD_CONCURRENT, &mdev->flags) + rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) ? -1 : 1; break; case ASB_DISCARD_LOCAL: @@ -2468,7 +2468,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l case 1: /* self_pri && !peer_pri */ return 1; case 2: /* !self_pri && peer_pri */ return -1; case 3: /* self_pri && peer_pri */ - dc = test_bit(DISCARD_CONCURRENT, &mdev->flags); + dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); return dc ? -1 : 1; } } @@ -3209,7 +3209,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, mask.i = be32_to_cpu(p->mask); val.i = be32_to_cpu(p->val); - if (test_bit(DISCARD_CONCURRENT, &mdev->flags) && + if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) && test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); return true; From e43ef195f8bc828cac931a58d8c308228c51b7cf Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:40:40 +0100 Subject: [PATCH 0073/5831] drbd: Moved SEND_PING to the per connection (tconn) flags Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_receiver.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2b8566362b78..227c0956e802 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -758,7 +758,6 @@ enum { enum { CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */ SIGNAL_ASENDER, /* whether asender wants to be interrupted */ - SEND_PING, /* whether asender should send a ping asap */ UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ @@ -911,6 +910,7 @@ struct fifo_buffer { enum { NET_CONGESTED, /* The data socket is congested */ DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ + SEND_PING, /* whether asender should send a ping asap */ }; struct drbd_tconn { /* is a resource from the config file */ @@ -1867,7 +1867,7 @@ static inline void wake_asender(struct drbd_conf *mdev) static inline void request_ping(struct drbd_conf *mdev) { - set_bit(SEND_PING, &mdev->flags); + set_bit(SEND_PING, &mdev->tconn->flags); wake_asender(mdev); } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e2eed149bb9c..54bf7a5c2252 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4564,7 +4564,7 @@ int drbd_asender(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(mdev, thi); - if (test_and_clear_bit(SEND_PING, &mdev->flags)) { + if (test_and_clear_bit(SEND_PING, &mdev->tconn->flags)) { if (!drbd_send_ping(mdev)) { dev_err(DEV, "drbd_send_ping has failed\n"); goto reconnect; @@ -4635,7 +4635,7 @@ int drbd_asender(struct drbd_thread *thi) dev_err(DEV, "PingAck did not arrive in time.\n"); goto reconnect; } - set_bit(SEND_PING, &mdev->flags); + set_bit(SEND_PING, &mdev->tconn->flags); continue; } else if (rv == -EINTR) { continue; From 808e37b803958e09494e0c7de492386845060057 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:44:14 +0100 Subject: [PATCH 0074/5831] drbd: Moved SIGNAL_ASENDER to the per connection (tconn) flags Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 5 ++--- drivers/block/drbd/drbd_receiver.c | 8 ++++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 227c0956e802..33882c82b1ac 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -757,8 +757,6 @@ enum { /* flag bits per mdev */ enum { CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */ - SIGNAL_ASENDER, /* whether asender wants to be interrupted */ - UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ MD_DIRTY, /* current uuids and flags not yet on disk */ @@ -911,6 +909,7 @@ enum { NET_CONGESTED, /* The data socket is congested */ DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ SEND_PING, /* whether asender should send a ping asap */ + SIGNAL_ASENDER, /* whether asender wants to be interrupted */ }; struct drbd_tconn { /* is a resource from the config file */ @@ -1861,7 +1860,7 @@ drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) static inline void wake_asender(struct drbd_conf *mdev) { - if (test_bit(SIGNAL_ASENDER, &mdev->flags)) + if (test_bit(SIGNAL_ASENDER, &mdev->tconn->flags)) force_sig(DRBD_SIG, mdev->tconn->asender.task); } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 54bf7a5c2252..b4c357e4ad82 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4580,12 +4580,12 @@ int drbd_asender(struct drbd_thread *thi) 3 < atomic_read(&mdev->unacked_cnt)) drbd_tcp_cork(mdev->tconn->meta.socket); while (1) { - clear_bit(SIGNAL_ASENDER, &mdev->flags); + clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); flush_signals(current); if (!drbd_process_done_ee(mdev)) goto reconnect; /* to avoid race with newly queued ACKs */ - set_bit(SIGNAL_ASENDER, &mdev->flags); + set_bit(SIGNAL_ASENDER, &mdev->tconn->flags); spin_lock_irq(&mdev->tconn->req_lock); empty = list_empty(&mdev->done_ee); spin_unlock_irq(&mdev->tconn->req_lock); @@ -4605,7 +4605,7 @@ int drbd_asender(struct drbd_thread *thi) rv = drbd_recv_short(mdev, mdev->tconn->meta.socket, buf, expect-received, 0); - clear_bit(SIGNAL_ASENDER, &mdev->flags); + clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); flush_signals(current); @@ -4688,7 +4688,7 @@ disconnect: drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); drbd_md_sync(mdev); } - clear_bit(SIGNAL_ASENDER, &mdev->flags); + clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); D_ASSERT(mdev->state.conn < C_CONNECTED); dev_info(DEV, "asender terminated\n"); From 0625ac190d222fd0855bad79e93f1556fc45dd20 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:49:19 +0100 Subject: [PATCH 0075/5831] drbd: Converted wake_asender() and request_ping() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 12 ++++++------ drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_worker.c | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 33882c82b1ac..0b2962c623a8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1858,16 +1858,16 @@ drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) spin_unlock_irqrestore(&q->q_lock, flags); } -static inline void wake_asender(struct drbd_conf *mdev) +static inline void wake_asender(struct drbd_tconn *tconn) { - if (test_bit(SIGNAL_ASENDER, &mdev->tconn->flags)) - force_sig(DRBD_SIG, mdev->tconn->asender.task); + if (test_bit(SIGNAL_ASENDER, &tconn->flags)) + force_sig(DRBD_SIG, tconn->asender.task); } -static inline void request_ping(struct drbd_conf *mdev) +static inline void request_ping(struct drbd_tconn *tconn) { - set_bit(SEND_PING, &mdev->tconn->flags); - wake_asender(mdev); + set_bit(SEND_PING, &tconn->flags); + wake_asender(tconn); } static inline int drbd_send_short_cmd(struct drbd_conf *mdev, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8b443c8b13b3..899bbb1c9865 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1348,7 +1348,7 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * if (!drop_it) { dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n", current->comm, current->pid, mdev->tconn->ko_count); - request_ping(mdev); + request_ping(mdev->tconn); } return drop_it; /* && (mdev->state == R_PRIMARY) */; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index fda399ace8dd..df36a573cd47 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -318,7 +318,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) enum drbd_disk_state nps; if (new_role == R_PRIMARY) - request_ping(mdev); /* Detect a dead peer ASAP */ + request_ping(mdev->tconn); /* Detect a dead peer ASAP */ mutex_lock(&mdev->state_mutex); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b4c357e4ad82..a5234f99de02 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1864,7 +1864,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, * but I don't like the receiver using the msock */ put_ldev(mdev); - wake_asender(mdev); + wake_asender(mdev->tconn); finish_wait(&mdev->misc_wait, &wait); return true; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index a705979c71f8..5be179ba0c76 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -145,7 +145,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel if (do_al_complete_io) drbd_al_complete_io(mdev, e_sector); - wake_asender(mdev); + wake_asender(mdev->tconn); put_ldev(mdev); } @@ -728,7 +728,7 @@ static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int ca static void ping_peer(struct drbd_conf *mdev) { clear_bit(GOT_PING_ACK, &mdev->flags); - request_ping(mdev); + request_ping(mdev->tconn); wait_event(mdev->misc_wait, test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED); } From 1a7ba646e966500d60578aa7406c158c8cca51d4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 14:56:02 +0100 Subject: [PATCH 0076/5831] drbd: Converted helper functions for drbd_send() to tconn * drbd_update_congested() * we_should_drop_the_connection() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 899bbb1c9865..be4cb1ac2e96 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1331,34 +1331,34 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) * returns false if we should retry, * true if we think connection is dead */ -static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock) +static int we_should_drop_the_connection(struct drbd_tconn *tconn, struct socket *sock) { int drop_it; /* long elapsed = (long)(jiffies - mdev->last_received); */ - drop_it = mdev->tconn->meta.socket == sock - || !mdev->tconn->asender.task - || get_t_state(&mdev->tconn->asender) != RUNNING - || mdev->state.conn < C_CONNECTED; + drop_it = tconn->meta.socket == sock + || !tconn->asender.task + || get_t_state(&tconn->asender) != RUNNING + || tconn->volume0->state.conn < C_CONNECTED; if (drop_it) return true; - drop_it = !--mdev->tconn->ko_count; + drop_it = !--tconn->ko_count; if (!drop_it) { - dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n", - current->comm, current->pid, mdev->tconn->ko_count); - request_ping(mdev->tconn); + conn_err(tconn, "[%s/%d] sock_sendmsg time expired, ko = %u\n", + current->comm, current->pid, tconn->ko_count); + request_ping(tconn); } return drop_it; /* && (mdev->state == R_PRIMARY) */; } -static void drbd_update_congested(struct drbd_conf *mdev) +static void drbd_update_congested(struct drbd_tconn *tconn) { - struct sock *sk = mdev->tconn->data.socket->sk; + struct sock *sk = tconn->data.socket->sk; if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5) - set_bit(NET_CONGESTED, &mdev->tconn->flags); + set_bit(NET_CONGESTED, &tconn->flags); } /* The idea of sendpage seems to be to put some kind of reference @@ -1409,14 +1409,14 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, return _drbd_no_send_page(mdev, page, offset, size, msg_flags); msg_flags |= MSG_NOSIGNAL; - drbd_update_congested(mdev); + drbd_update_congested(mdev->tconn); set_fs(KERNEL_DS); do { sent = mdev->tconn->data.socket->ops->sendpage(mdev->tconn->data.socket, page, offset, len, msg_flags); if (sent == -EAGAIN) { - if (we_should_drop_the_connection(mdev, + if (we_should_drop_the_connection(mdev->tconn, mdev->tconn->data.socket)) break; else @@ -1662,7 +1662,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, if (sock == mdev->tconn->data.socket) { mdev->tconn->ko_count = mdev->tconn->net_conf->ko_count; - drbd_update_congested(mdev); + drbd_update_congested(mdev->tconn); } do { /* STRANGE @@ -1676,7 +1676,7 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, */ rv = kernel_sendmsg(sock, &msg, &iov, 1, size); if (rv == -EAGAIN) { - if (we_should_drop_the_connection(mdev, sock)) + if (we_should_drop_the_connection(mdev->tconn, sock)) break; else continue; From bedbd2a53a0bcb5715b4d1f59ec8af045092a167 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 15:08:48 +0100 Subject: [PATCH 0077/5831] drbd: Converted drbd_send() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +-- drivers/block/drbd/drbd_main.c | 45 +++++++++++++++++----------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0b2962c623a8..ccbb0320a2c8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1159,8 +1159,8 @@ extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, extern void tl_clear(struct drbd_conf *mdev); extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); extern void drbd_free_sock(struct drbd_conf *mdev); -extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, - void *buf, size_t size, unsigned msg_flags); +extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock, + void *buf, size_t size, unsigned msg_flags); extern int drbd_send_protocol(struct drbd_conf *mdev); extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index be4cb1ac2e96..d1bfbfcf8f24 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -690,7 +690,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, prepare_header(mdev, h, cmd, size - sizeof(struct p_header)); - sent = drbd_send(mdev, sock, h, size, msg_flags); + sent = drbd_send(mdev->tconn, sock, h, size, msg_flags); ok = (sent == size); if (!ok && !signal_pending(current)) @@ -740,9 +740,9 @@ int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packet cmd, char *data, return 0; ok = (sizeof(h) == - drbd_send(mdev, mdev->tconn->data.socket, &h, sizeof(h), 0)); + drbd_send(mdev->tconn, mdev->tconn->data.socket, &h, sizeof(h), 0)); ok = ok && (size == - drbd_send(mdev, mdev->tconn->data.socket, data, size, 0)); + drbd_send(mdev->tconn, mdev->tconn->data.socket, data, size, 0)); drbd_put_data_sock(mdev); @@ -1306,8 +1306,8 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size, mutex_lock(&mdev->tconn->data.mutex); - ok = (sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), 0)); - ok = ok && (digest_size == drbd_send(mdev, mdev->tconn->data.socket, digest, digest_size, 0)); + ok = (sizeof(p) == drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), 0)); + ok = ok && (digest_size == drbd_send(mdev->tconn, mdev->tconn->data.socket, digest, digest_size, 0)); mutex_unlock(&mdev->tconn->data.mutex); @@ -1385,7 +1385,7 @@ static void drbd_update_congested(struct drbd_tconn *tconn) static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, int offset, size_t size, unsigned msg_flags) { - int sent = drbd_send(mdev, mdev->tconn->data.socket, kmap(page) + offset, size, msg_flags); + int sent = drbd_send(mdev->tconn, mdev->tconn->data.socket, kmap(page) + offset, size, msg_flags); kunmap(page); if (sent == size) mdev->send_cnt += size>>9; @@ -1526,11 +1526,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) p.dp_flags = cpu_to_be32(dp_flags); set_bit(UNPLUG_REMOTE, &mdev->flags); ok = (sizeof(p) == - drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); + drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); if (ok && dgs) { dgb = mdev->tconn->int_dig_out; drbd_csum_bio(mdev, mdev->tconn->integrity_w_tfm, req->master_bio, dgb); - ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); + ok = dgs == drbd_send(mdev->tconn, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) { /* For protocol A, we have to memcpy the payload into @@ -1599,11 +1599,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, if (!drbd_get_data_sock(mdev)) return 0; - ok = sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); + ok = sizeof(p) == drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); if (ok && dgs) { dgb = mdev->tconn->int_dig_out; drbd_csum_ee(mdev, mdev->tconn->integrity_w_tfm, peer_req, dgb); - ok = dgs == drbd_send(mdev, mdev->tconn->data.socket, dgb, dgs, 0); + ok = dgs == drbd_send(mdev->tconn, mdev->tconn->data.socket, dgb, dgs, 0); } if (ok) ok = _drbd_send_zc_ee(mdev, peer_req); @@ -1639,7 +1639,7 @@ int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req) /* * you must have down()ed the appropriate [m]sock_mutex elsewhere! */ -int drbd_send(struct drbd_conf *mdev, struct socket *sock, +int drbd_send(struct drbd_tconn *tconn, struct socket *sock, void *buf, size_t size, unsigned msg_flags) { struct kvec iov; @@ -1660,9 +1660,9 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, msg.msg_controllen = 0; msg.msg_flags = msg_flags | MSG_NOSIGNAL; - if (sock == mdev->tconn->data.socket) { - mdev->tconn->ko_count = mdev->tconn->net_conf->ko_count; - drbd_update_congested(mdev->tconn); + if (sock == tconn->data.socket) { + tconn->ko_count = tconn->net_conf->ko_count; + drbd_update_congested(tconn); } do { /* STRANGE @@ -1676,12 +1676,11 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, */ rv = kernel_sendmsg(sock, &msg, &iov, 1, size); if (rv == -EAGAIN) { - if (we_should_drop_the_connection(mdev->tconn, sock)) + if (we_should_drop_the_connection(tconn, sock)) break; else continue; } - D_ASSERT(rv != 0); if (rv == -EINTR) { flush_signals(current); rv = 0; @@ -1693,17 +1692,17 @@ int drbd_send(struct drbd_conf *mdev, struct socket *sock, iov.iov_len -= rv; } while (sent < size); - if (sock == mdev->tconn->data.socket) - clear_bit(NET_CONGESTED, &mdev->tconn->flags); + if (sock == tconn->data.socket) + clear_bit(NET_CONGESTED, &tconn->flags); if (rv <= 0) { if (rv != -EAGAIN) { - dev_err(DEV, "%s_sendmsg returned %d\n", - sock == mdev->tconn->meta.socket ? "msock" : "sock", - rv); - drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE)); + conn_err(tconn, "%s_sendmsg returned %d\n", + sock == tconn->meta.socket ? "msock" : "sock", + rv); + drbd_force_state(tconn->volume0, NS(conn, C_BROKEN_PIPE)); } else - drbd_force_state(mdev, NS(conn, C_TIMEOUT)); + drbd_force_state(tconn->volume0, NS(conn, C_TIMEOUT)); } return sent; From d38e787eccb7afd069e33b2f4a32e02e0ad8decb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 15:32:04 +0100 Subject: [PATCH 0078/5831] drbd: Converted drbd_send_fp() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 13 ++++++++--- drivers/block/drbd/drbd_main.c | 35 +++++++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 14 +++++------- 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ccbb0320a2c8..e640ffdad9cd 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1168,9 +1168,9 @@ extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); -extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packet cmd, struct p_header *h, - size_t size, unsigned msg_flags); +extern int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, + enum drbd_packet cmd, struct p_header *h, size_t size, + unsigned msg_flags); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, @@ -1870,6 +1870,13 @@ static inline void request_ping(struct drbd_tconn *tconn) wake_asender(tconn); } +static inline int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, + enum drbd_packet cmd, struct p_header *h, size_t size, + unsigned msg_flags) +{ + return _conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size, msg_flags); +} + static inline int drbd_send_short_cmd(struct drbd_conf *mdev, enum drbd_packet cmd) { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d1bfbfcf8f24..2a67e272b16c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -651,51 +651,50 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev, struct drbd_thread *thi } #endif -static void prepare_header80(struct drbd_conf *mdev, struct p_header80 *h, - enum drbd_packet cmd, int size) +static void prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size) { h->magic = cpu_to_be32(DRBD_MAGIC); h->command = cpu_to_be16(cmd); h->length = cpu_to_be16(size); } -static void prepare_header95(struct drbd_conf *mdev, struct p_header95 *h, - enum drbd_packet cmd, int size) +static void prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size) { h->magic = cpu_to_be16(DRBD_MAGIC_BIG); h->command = cpu_to_be16(cmd); h->length = cpu_to_be32(size); } +static void _prepare_header(struct drbd_tconn *tconn, int vnr, struct p_header *h, + enum drbd_packet cmd, int size) +{ + if (tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET) + prepare_header95(&h->h95, cmd, size); + else + prepare_header80(&h->h80, cmd, size); +} + static void prepare_header(struct drbd_conf *mdev, struct p_header *h, enum drbd_packet cmd, int size) { - if (mdev->tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET) - prepare_header95(mdev, &h->h95, cmd, size); - else - prepare_header80(mdev, &h->h80, cmd, size); + _prepare_header(mdev->tconn, mdev->vnr, h, cmd, size); } /* the appropriate socket mutex must be held already */ -int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, +int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size, unsigned msg_flags) { int sent, ok; - if (!expect(h)) - return false; - if (!expect(size)) - return false; + _prepare_header(tconn, vnr, h, cmd, size - sizeof(struct p_header)); - prepare_header(mdev, h, cmd, size - sizeof(struct p_header)); - - sent = drbd_send(mdev->tconn, sock, h, size, msg_flags); + sent = drbd_send(tconn, sock, h, size, msg_flags); ok = (sent == size); if (!ok && !signal_pending(current)) - dev_warn(DEV, "short sent %s size=%d sent=%d\n", - cmdname(cmd), (int)size, sent); + conn_warn(tconn, "short sent %s size=%d sent=%d\n", + cmdname(cmd), (int)size, sent); return ok; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index a5234f99de02..96622d7eadfd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -698,16 +698,14 @@ out: return s_estab; } -static int drbd_send_fp(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packet cmd) +static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd) { - struct p_header *h = &mdev->tconn->data.sbuf.header; + struct p_header *h = &tconn->data.sbuf.header; - return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); + return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0); } -static enum drbd_packet drbd_recv_fp(struct drbd_conf *mdev, - struct socket *sock) +static enum drbd_packet drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) { struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; int rr; @@ -782,11 +780,11 @@ static int drbd_connect(struct drbd_conf *mdev) if (s) { if (!sock) { - drbd_send_fp(mdev, s, P_HAND_SHAKE_S); + drbd_send_fp(mdev->tconn, s, P_HAND_SHAKE_S); sock = s; s = NULL; } else if (!msock) { - drbd_send_fp(mdev, s, P_HAND_SHAKE_M); + drbd_send_fp(mdev->tconn, s, P_HAND_SHAKE_M); msock = s; s = NULL; } else { From dbd9eea094964e31c718ad8ade7c296d9e9da758 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 15:34:16 +0100 Subject: [PATCH 0079/5831] drbd: Removed unused mdev argument from drbd_recv_short() and drbd_socket_okay() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 96622d7eadfd..8d048df04a30 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -476,8 +476,7 @@ out: return err; } -static int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock, - void *buf, size_t size, int flags) +static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags) { mm_segment_t oldfs; struct kvec iov = { @@ -710,7 +709,7 @@ static enum drbd_packet drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; int rr; - rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); + rr = drbd_recv_short(sock, h, sizeof(*h), 0); if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC)) return be16_to_cpu(h->command); @@ -720,10 +719,9 @@ static enum drbd_packet drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock /** * drbd_socket_okay() - Free the socket if its connection is not okay - * @mdev: DRBD device. * @sock: pointer to the pointer to the socket. */ -static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock) +static int drbd_socket_okay(struct socket **sock) { int rr; char tb[4]; @@ -731,7 +729,7 @@ static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock) if (!*sock) return false; - rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); + rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK); if (rr > 0 || rr == -EAGAIN) { return true; @@ -795,8 +793,8 @@ static int drbd_connect(struct drbd_conf *mdev) if (sock && msock) { schedule_timeout_interruptible(mdev->tconn->net_conf->ping_timeo*HZ/10); - ok = drbd_socket_okay(mdev, &sock); - ok = drbd_socket_okay(mdev, &msock) && ok; + ok = drbd_socket_okay(&sock); + ok = drbd_socket_okay(&msock) && ok; if (ok) break; } @@ -805,8 +803,8 @@ retry: s = drbd_wait_for_connect(mdev->tconn); if (s) { try = drbd_recv_fp(mdev, s); - drbd_socket_okay(mdev, &sock); - drbd_socket_okay(mdev, &msock); + drbd_socket_okay(&sock); + drbd_socket_okay(&msock); switch (try) { case P_HAND_SHAKE_S: if (sock) { @@ -841,8 +839,8 @@ retry: } if (sock && msock) { - ok = drbd_socket_okay(mdev, &sock); - ok = drbd_socket_okay(mdev, &msock) && ok; + ok = drbd_socket_okay(&sock); + ok = drbd_socket_okay(&msock) && ok; if (ok) break; } @@ -4601,8 +4599,7 @@ int drbd_asender(struct drbd_thread *thi) if (signal_pending(current)) continue; - rv = drbd_recv_short(mdev, mdev->tconn->meta.socket, - buf, expect-received, 0); + rv = drbd_recv_short(mdev->tconn->meta.socket, buf, expect-received, 0); clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); flush_signals(current); From a25b63f1e75df7dbc27666b627e6277d7fea92b7 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 15:43:45 +0100 Subject: [PATCH 0080/5831] drbd: Converted drbd_recv_fp() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8d048df04a30..60a4f651a084 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -704,9 +704,9 @@ static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0); } -static enum drbd_packet drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) +static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock) { - struct p_header80 *h = &mdev->tconn->data.rbuf.header.h80; + struct p_header80 *h = &tconn->data.rbuf.header.h80; int rr; rr = drbd_recv_short(sock, h, sizeof(*h), 0); @@ -802,7 +802,7 @@ static int drbd_connect(struct drbd_conf *mdev) retry: s = drbd_wait_for_connect(mdev->tconn); if (s) { - try = drbd_recv_fp(mdev, s); + try = drbd_recv_fp(mdev->tconn, s); drbd_socket_okay(&sock); drbd_socket_okay(&msock); switch (try) { From 8a22cccc2068b35124f340fcc3f38b730007deff Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 16:47:12 +0100 Subject: [PATCH 0081/5831] drbd: Converted drbd_send_handshake() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_receiver.c | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e640ffdad9cd..845ff34d2060 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -378,7 +378,7 @@ struct p_block_req { */ struct p_handshake { - struct p_header head; /* Note: You must always use a h80 here */ + struct p_header head; /* Note: vnr will be ignored */ u32 protocol_min; u32 feature_flags; u32 protocol_max; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 60a4f651a084..565f2ea47ab8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3952,28 +3952,28 @@ static void drbd_disconnect(struct drbd_conf *mdev) * * for now, they are expected to be zero, but ignored. */ -static int drbd_send_handshake(struct drbd_conf *mdev) +static int drbd_send_handshake(struct drbd_tconn *tconn) { /* ASSERT current == mdev->tconn->receiver ... */ - struct p_handshake *p = &mdev->tconn->data.sbuf.handshake; + struct p_handshake *p = &tconn->data.sbuf.handshake; int ok; - if (mutex_lock_interruptible(&mdev->tconn->data.mutex)) { - dev_err(DEV, "interrupted during initial handshake\n"); + if (mutex_lock_interruptible(&tconn->data.mutex)) { + conn_err(tconn, "interrupted during initial handshake\n"); return 0; /* interrupted. not ok. */ } - if (mdev->tconn->data.socket == NULL) { - mutex_unlock(&mdev->tconn->data.mutex); + if (tconn->data.socket == NULL) { + mutex_unlock(&tconn->data.mutex); return 0; } memset(p, 0, sizeof(*p)); p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); - ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_HAND_SHAKE, - &p->head, sizeof(*p), 0 ); - mutex_unlock(&mdev->tconn->data.mutex); + ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE, + &p->head, sizeof(*p), 0); + mutex_unlock(&tconn->data.mutex); return ok; } @@ -3993,7 +3993,7 @@ static int drbd_do_handshake(struct drbd_conf *mdev) enum drbd_packet cmd; int rv; - rv = drbd_send_handshake(mdev); + rv = drbd_send_handshake(mdev->tconn); if (!rv) return 0; From de0ff338d61645f39e0687c9c3560d8b64bed4a3 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 16:56:20 +0100 Subject: [PATCH 0082/5831] drbd: Converted drbd_recv() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 46 +++++++++++++++--------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 565f2ea47ab8..1368fc3518df 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -498,7 +498,7 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag return rv; } -static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) +static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size) { mm_segment_t oldfs; struct kvec iov = { @@ -516,7 +516,7 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) set_fs(KERNEL_DS); for (;;) { - rv = sock_recvmsg(mdev->tconn->data.socket, &msg, size, msg.msg_flags); + rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags); if (rv == size) break; @@ -527,12 +527,12 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) if (rv < 0) { if (rv == -ECONNRESET) - dev_info(DEV, "sock was reset by peer\n"); + conn_info(tconn, "sock was reset by peer\n"); else if (rv != -ERESTARTSYS) - dev_err(DEV, "sock_recvmsg returned %d\n", rv); + conn_err(tconn, "sock_recvmsg returned %d\n", rv); break; } else if (rv == 0) { - dev_info(DEV, "sock was shut down by peer\n"); + conn_info(tconn, "sock was shut down by peer\n"); break; } else { /* signal came in, or peer/link went down, @@ -546,7 +546,7 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) set_fs(oldfs); if (rv != size) - drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE)); + drbd_force_state(tconn->volume0, NS(conn, C_BROKEN_PIPE)); return rv; } @@ -949,7 +949,7 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packet *cmd, struct p_header *h = &mdev->tconn->data.rbuf.header; int r; - r = drbd_recv(mdev, h, sizeof(*h)); + r = drbd_recv(mdev->tconn, h, sizeof(*h)); if (unlikely(r != sizeof(*h))) { if (!signal_pending(current)) dev_warn(DEV, "short read expecting header on sock: r=%d\n", r); @@ -1272,7 +1272,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; if (dgs) { - rr = drbd_recv(mdev, dig_in, dgs); + rr = drbd_recv(mdev->tconn, dig_in, dgs); if (rr != dgs) { if (!signal_pending(current)) dev_warn(DEV, @@ -1313,7 +1313,7 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, page_chain_for_each(page) { unsigned len = min_t(int, ds, PAGE_SIZE); data = kmap(page); - rr = drbd_recv(mdev, data, len); + rr = drbd_recv(mdev->tconn, data, len); if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) { dev_err(DEV, "Fault injection: Corrupting data on receive\n"); data[0] = data[0] ^ (unsigned long)-1; @@ -1360,7 +1360,7 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) data = kmap(page); while (data_size) { - rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE)); + rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE)); if (rr != min_t(int, data_size, PAGE_SIZE)) { rv = 0; if (!signal_pending(current)) @@ -1389,7 +1389,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0; if (dgs) { - rr = drbd_recv(mdev, dig_in, dgs); + rr = drbd_recv(mdev->tconn, dig_in, dgs); if (rr != dgs) { if (!signal_pending(current)) dev_warn(DEV, @@ -1410,7 +1410,7 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, bio_for_each_segment(bvec, bio, i) { expect = min_t(int, data_size, bvec->bv_len); - rr = drbd_recv(mdev, + rr = drbd_recv(mdev->tconn, kmap(bvec->bv_page)+bvec->bv_offset, expect); kunmap(bvec->bv_page); @@ -2094,7 +2094,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, peer_req->digest = di; peer_req->flags |= EE_HAS_DIGEST; - if (drbd_recv(mdev, di->digest, digest_size) != digest_size) + if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size) goto out_free_e; if (cmd == P_CSUM_RS_REQUEST) { @@ -2785,7 +2785,7 @@ static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd, if (mdev->tconn->agreed_pro_version >= 87) { unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg; - if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size) + if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size) return false; p_integrity_alg[SHARED_SECRET_MAX-1] = 0; @@ -2871,7 +2871,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, /* initialize verify_alg and csums_alg */ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); - if (drbd_recv(mdev, &p->head.payload, header_size) != header_size) + if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size) return false; mdev->sync_conf.rate = be32_to_cpu(p->rate); @@ -2885,7 +2885,7 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd, return false; } - if (drbd_recv(mdev, p->verify_alg, data_size) != data_size) + if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size) return false; /* we expect NUL terminated string */ @@ -3424,7 +3424,7 @@ receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, } if (want == 0) return 0; - err = drbd_recv(mdev, buffer, want); + err = drbd_recv(mdev->tconn, buffer, want); if (err != want) { if (err >= 0) err = -EIO; @@ -3613,7 +3613,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, /* use the page buff */ p = buffer; memcpy(p, h, sizeof(*h)); - if (drbd_recv(mdev, p->head.payload, data_size) != data_size) + if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size) goto out; if (data_size <= (sizeof(*p) - sizeof(p->head))) { dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size); @@ -3677,7 +3677,7 @@ static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd, size = data_size; while (size > 0) { want = min_t(int, size, sizeof(sink)); - r = drbd_recv(mdev, sink, want); + r = drbd_recv(mdev->tconn, sink, want); if (!expect(r > 0)) break; size -= r; @@ -3784,7 +3784,7 @@ static void drbdd(struct drbd_conf *mdev) } if (shs) { - rv = drbd_recv(mdev, &header->payload, shs); + rv = drbd_recv(mdev->tconn, &header->payload, shs); if (unlikely(rv != shs)) { if (!signal_pending(current)) dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv); @@ -4013,7 +4013,7 @@ static int drbd_do_handshake(struct drbd_conf *mdev) return -1; } - rv = drbd_recv(mdev, &p->head.payload, expect); + rv = drbd_recv(mdev->tconn, &p->head.payload, expect); if (rv != expect) { if (!signal_pending(current)) @@ -4116,7 +4116,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) goto fail; } - rv = drbd_recv(mdev, peers_ch, length); + rv = drbd_recv(mdev->tconn, peers_ch, length); if (rv != length) { if (!signal_pending(current)) @@ -4164,7 +4164,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) goto fail; } - rv = drbd_recv(mdev, response , resp_size); + rv = drbd_recv(mdev->tconn, response , resp_size); if (rv != resp_size) { if (!signal_pending(current)) From 77351055b5244a3131bd8564dccc8bd95a995317 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 17:24:26 +0100 Subject: [PATCH 0083/5831] drbd: struct packet_info to hold information of decoded packets Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 103 +++++++++++++++-------------- 1 file changed, 54 insertions(+), 49 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1368fc3518df..380d24e8434b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -48,6 +48,12 @@ #include "drbd_vli.h" +struct packet_info { + enum drbd_packet cmd; + int size; + int vnr; +}; + enum finish_epoch { FE_STILL_LIVE, FE_DESTROYED, @@ -924,15 +930,15 @@ out_release_sockets: return -1; } -static bool decode_header(struct drbd_conf *mdev, struct p_header *h, - enum drbd_packet *cmd, unsigned int *packet_size) +static bool decode_header(struct drbd_conf *mdev, struct p_header *h, struct packet_info *pi) { if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { - *cmd = be16_to_cpu(h->h80.command); - *packet_size = be16_to_cpu(h->h80.length); + pi->cmd = be16_to_cpu(h->h80.command); + pi->size = be16_to_cpu(h->h80.length); } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) { - *cmd = be16_to_cpu(h->h95.command); - *packet_size = be32_to_cpu(h->h95.length) & 0x00ffffff; + pi->cmd = be16_to_cpu(h->h95.command); + pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff; + pi->vnr = 0; } else { dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n", be32_to_cpu(h->h80.magic), @@ -943,8 +949,7 @@ static bool decode_header(struct drbd_conf *mdev, struct p_header *h, return true; } -static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packet *cmd, - unsigned int *packet_size) +static int drbd_recv_header(struct drbd_conf *mdev, struct packet_info *pi) { struct p_header *h = &mdev->tconn->data.rbuf.header; int r; @@ -956,7 +961,7 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packet *cmd, return false; } - r = decode_header(mdev, h, cmd, packet_size); + r = decode_header(mdev, h, pi); mdev->tconn->last_received = jiffies; return r; @@ -3580,6 +3585,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, int err; int ok = false; struct p_header *h = &mdev->tconn->data.rbuf.header; + struct packet_info pi; drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED); /* you are supposed to send additional out-of-sync information @@ -3633,8 +3639,10 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, goto out; break; } - if (!drbd_recv_header(mdev, &cmd, &data_size)) + if (!drbd_recv_header(mdev, &pi)) goto out; + cmd = pi.cmd; + data_size = pi.size; } INFO_bm_xfer_stats(mdev, "receive", &c); @@ -3762,24 +3770,23 @@ static struct data_cmd drbd_cmd_handler[] = { static void drbdd(struct drbd_conf *mdev) { struct p_header *header = &mdev->tconn->data.rbuf.header; - unsigned int packet_size; - enum drbd_packet cmd; + struct packet_info pi; size_t shs; /* sub header size */ int rv; while (get_t_state(&mdev->tconn->receiver) == RUNNING) { drbd_thread_current_set_cpu(mdev, &mdev->tconn->receiver); - if (!drbd_recv_header(mdev, &cmd, &packet_size)) + if (!drbd_recv_header(mdev, &pi)) goto err_out; - if (unlikely(cmd >= P_MAX_CMD || !drbd_cmd_handler[cmd].function)) { - dev_err(DEV, "unknown packet type %d, l: %d!\n", cmd, packet_size); + if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) { + dev_err(DEV, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size); goto err_out; } - shs = drbd_cmd_handler[cmd].pkt_size - sizeof(struct p_header); - if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) { - dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size); + shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header); + if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) { + dev_err(DEV, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size); goto err_out; } @@ -3792,11 +3799,11 @@ static void drbdd(struct drbd_conf *mdev) } } - rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs); + rv = drbd_cmd_handler[pi.cmd].function(mdev, pi.cmd, pi.size - shs); if (unlikely(!rv)) { dev_err(DEV, "error receiving %s, l: %d!\n", - cmdname(cmd), packet_size); + cmdname(pi.cmd), pi.size); goto err_out; } } @@ -3989,27 +3996,26 @@ static int drbd_do_handshake(struct drbd_conf *mdev) /* ASSERT current == mdev->tconn->receiver ... */ struct p_handshake *p = &mdev->tconn->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); - unsigned int length; - enum drbd_packet cmd; + struct packet_info pi; int rv; rv = drbd_send_handshake(mdev->tconn); if (!rv) return 0; - rv = drbd_recv_header(mdev, &cmd, &length); + rv = drbd_recv_header(mdev, &pi); if (!rv) return 0; - if (cmd != P_HAND_SHAKE) { + if (pi.cmd != P_HAND_SHAKE) { dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n", - cmdname(cmd), cmd); + cmdname(pi.cmd), pi.cmd); return -1; } - if (length != expect) { + if (pi.size != expect) { dev_err(DEV, "expected HandShake length: %u, received: %u\n", - expect, length); + expect, pi.size); return -1; } @@ -4071,8 +4077,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) unsigned int key_len = strlen(mdev->tconn->net_conf->shared_secret); unsigned int resp_size; struct hash_desc desc; - enum drbd_packet cmd; - unsigned int length; + struct packet_info pi; int rv; desc.tfm = mdev->tconn->cram_hmac_tfm; @@ -4092,33 +4097,33 @@ static int drbd_do_auth(struct drbd_conf *mdev) if (!rv) goto fail; - rv = drbd_recv_header(mdev, &cmd, &length); + rv = drbd_recv_header(mdev, &pi); if (!rv) goto fail; - if (cmd != P_AUTH_CHALLENGE) { + if (pi.cmd != P_AUTH_CHALLENGE) { dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n", - cmdname(cmd), cmd); + cmdname(pi.cmd), pi.cmd); rv = 0; goto fail; } - if (length > CHALLENGE_LEN * 2) { + if (pi.size > CHALLENGE_LEN * 2) { dev_err(DEV, "expected AuthChallenge payload too big.\n"); rv = -1; goto fail; } - peers_ch = kmalloc(length, GFP_NOIO); + peers_ch = kmalloc(pi.size, GFP_NOIO); if (peers_ch == NULL) { dev_err(DEV, "kmalloc of peers_ch failed\n"); rv = -1; goto fail; } - rv = drbd_recv(mdev->tconn, peers_ch, length); + rv = drbd_recv(mdev->tconn, peers_ch, pi.size); - if (rv != length) { + if (rv != pi.size) { if (!signal_pending(current)) dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv); rv = 0; @@ -4134,7 +4139,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) } sg_init_table(&sg, 1); - sg_set_buf(&sg, peers_ch, length); + sg_set_buf(&sg, peers_ch, pi.size); rv = crypto_hash_digest(&desc, &sg, sg.length, response); if (rv) { @@ -4147,18 +4152,18 @@ static int drbd_do_auth(struct drbd_conf *mdev) if (!rv) goto fail; - rv = drbd_recv_header(mdev, &cmd, &length); + rv = drbd_recv_header(mdev, &pi); if (!rv) goto fail; - if (cmd != P_AUTH_RESPONSE) { + if (pi.cmd != P_AUTH_RESPONSE) { dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n", - cmdname(cmd), cmd); + cmdname(pi.cmd), pi.cmd); rv = 0; goto fail; } - if (length != resp_size) { + if (pi.size != resp_size) { dev_err(DEV, "expected AuthResponse payload of wrong size\n"); rv = 0; goto fail; @@ -4544,14 +4549,14 @@ int drbd_asender(struct drbd_thread *thi) struct drbd_conf *mdev = thi->mdev; struct p_header *h = &mdev->tconn->meta.rbuf.header; struct asender_cmd *cmd = NULL; + struct packet_info pi; int rv; void *buf = h; int received = 0; int expect = sizeof(struct p_header); int ping_timeout_active = 0; - int empty, pkt_size; - enum drbd_packet cmd_nr; + int empty; sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); @@ -4640,25 +4645,25 @@ int drbd_asender(struct drbd_thread *thi) } if (received == expect && cmd == NULL) { - if (!decode_header(mdev, h, &cmd_nr, &pkt_size)) + if (!decode_header(mdev, h, &pi)) goto reconnect; - cmd = get_asender_cmd(cmd_nr); + cmd = get_asender_cmd(pi.cmd); if (unlikely(cmd == NULL)) { dev_err(DEV, "unknown command %d on meta (l: %d)\n", - cmd_nr, pkt_size); + pi.cmd, pi.size); goto disconnect; } expect = cmd->pkt_size; - if (pkt_size != expect - sizeof(struct p_header)) { + if (pi.size != expect - sizeof(struct p_header)) { dev_err(DEV, "Wrong packet size on meta (c: %d, l: %d)\n", - cmd_nr, pkt_size); + pi.cmd, pi.size); goto reconnect; } } if (received == expect) { mdev->tconn->last_received = jiffies; D_ASSERT(cmd != NULL); - if (!cmd->process(mdev, cmd_nr)) + if (!cmd->process(mdev, pi.cmd)) goto reconnect; /* the idle_timeout (ping-int) From ce24385342d21bd22c95d2f7162f71df313d0dea Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 17:27:47 +0100 Subject: [PATCH 0084/5831] drbd: Converted decode_header() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 380d24e8434b..7d210548a98e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -930,7 +930,7 @@ out_release_sockets: return -1; } -static bool decode_header(struct drbd_conf *mdev, struct p_header *h, struct packet_info *pi) +static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi) { if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { pi->cmd = be16_to_cpu(h->h80.command); @@ -940,7 +940,7 @@ static bool decode_header(struct drbd_conf *mdev, struct p_header *h, struct pac pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff; pi->vnr = 0; } else { - dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n", + conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n", be32_to_cpu(h->h80.magic), be16_to_cpu(h->h80.command), be16_to_cpu(h->h80.length)); @@ -961,7 +961,7 @@ static int drbd_recv_header(struct drbd_conf *mdev, struct packet_info *pi) return false; } - r = decode_header(mdev, h, pi); + r = decode_header(mdev->tconn, h, pi); mdev->tconn->last_received = jiffies; return r; @@ -4645,7 +4645,7 @@ int drbd_asender(struct drbd_thread *thi) } if (received == expect && cmd == NULL) { - if (!decode_header(mdev, h, &pi)) + if (!decode_header(mdev->tconn, h, &pi)) goto reconnect; cmd = get_asender_cmd(pi.cmd); if (unlikely(cmd == NULL)) { From 9ba7aa00ae574714c4decf8f3e0dcdb679a3239e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 17:32:41 +0100 Subject: [PATCH 0085/5831] drbd: Converted drbd_recv_header() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7d210548a98e..ebd8320d123d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -949,20 +949,20 @@ static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct p return true; } -static int drbd_recv_header(struct drbd_conf *mdev, struct packet_info *pi) +static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi) { - struct p_header *h = &mdev->tconn->data.rbuf.header; + struct p_header *h = &tconn->data.rbuf.header; int r; - r = drbd_recv(mdev->tconn, h, sizeof(*h)); + r = drbd_recv(tconn, h, sizeof(*h)); if (unlikely(r != sizeof(*h))) { if (!signal_pending(current)) - dev_warn(DEV, "short read expecting header on sock: r=%d\n", r); + conn_warn(tconn, "short read expecting header on sock: r=%d\n", r); return false; } - r = decode_header(mdev->tconn, h, pi); - mdev->tconn->last_received = jiffies; + r = decode_header(tconn, h, pi); + tconn->last_received = jiffies; return r; } @@ -3639,7 +3639,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd, goto out; break; } - if (!drbd_recv_header(mdev, &pi)) + if (!drbd_recv_header(mdev->tconn, &pi)) goto out; cmd = pi.cmd; data_size = pi.size; @@ -3776,7 +3776,7 @@ static void drbdd(struct drbd_conf *mdev) while (get_t_state(&mdev->tconn->receiver) == RUNNING) { drbd_thread_current_set_cpu(mdev, &mdev->tconn->receiver); - if (!drbd_recv_header(mdev, &pi)) + if (!drbd_recv_header(mdev->tconn, &pi)) goto err_out; if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) { @@ -4003,7 +4003,7 @@ static int drbd_do_handshake(struct drbd_conf *mdev) if (!rv) return 0; - rv = drbd_recv_header(mdev, &pi); + rv = drbd_recv_header(mdev->tconn, &pi); if (!rv) return 0; @@ -4097,7 +4097,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) if (!rv) goto fail; - rv = drbd_recv_header(mdev, &pi); + rv = drbd_recv_header(mdev->tconn, &pi); if (!rv) goto fail; @@ -4152,7 +4152,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) if (!rv) goto fail; - rv = drbd_recv_header(mdev, &pi); + rv = drbd_recv_header(mdev->tconn, &pi); if (!rv) goto fail; From 65d11ed6f2430498bf3735d40a9e243409780fb1 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 7 Feb 2011 17:35:59 +0100 Subject: [PATCH 0086/5831] drbd: Converted drbd_do_handshake() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ebd8320d123d..0a4d15c913e7 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -60,7 +60,7 @@ enum finish_epoch { FE_RECYCLED, }; -static int drbd_do_handshake(struct drbd_conf *mdev); +static int drbd_do_handshake(struct drbd_tconn *tconn); static int drbd_do_auth(struct drbd_conf *mdev); static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); @@ -883,7 +883,7 @@ retry: D_ASSERT(mdev->tconn->asender.task == NULL); - h = drbd_do_handshake(mdev); + h = drbd_do_handshake(mdev->tconn); if (h <= 0) return h; @@ -3991,39 +3991,39 @@ static int drbd_send_handshake(struct drbd_tconn *tconn) * -1 peer talks different language, * no point in trying again, please go standalone. */ -static int drbd_do_handshake(struct drbd_conf *mdev) +static int drbd_do_handshake(struct drbd_tconn *tconn) { - /* ASSERT current == mdev->tconn->receiver ... */ - struct p_handshake *p = &mdev->tconn->data.rbuf.handshake; + /* ASSERT current == tconn->receiver ... */ + struct p_handshake *p = &tconn->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); struct packet_info pi; int rv; - rv = drbd_send_handshake(mdev->tconn); + rv = drbd_send_handshake(tconn); if (!rv) return 0; - rv = drbd_recv_header(mdev->tconn, &pi); + rv = drbd_recv_header(tconn, &pi); if (!rv) return 0; if (pi.cmd != P_HAND_SHAKE) { - dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n", + conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n", cmdname(pi.cmd), pi.cmd); return -1; } if (pi.size != expect) { - dev_err(DEV, "expected HandShake length: %u, received: %u\n", + conn_err(tconn, "expected HandShake length: %u, received: %u\n", expect, pi.size); return -1; } - rv = drbd_recv(mdev->tconn, &p->head.payload, expect); + rv = drbd_recv(tconn, &p->head.payload, expect); if (rv != expect) { if (!signal_pending(current)) - dev_warn(DEV, "short read receiving handshake packet: l=%u\n", rv); + conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv); return 0; } @@ -4036,15 +4036,15 @@ static int drbd_do_handshake(struct drbd_conf *mdev) PRO_VERSION_MIN > p->protocol_max) goto incompat; - mdev->tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); + tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); - dev_info(DEV, "Handshake successful: " - "Agreed network protocol version %d\n", mdev->tconn->agreed_pro_version); + conn_info(tconn, "Handshake successful: " + "Agreed network protocol version %d\n", tconn->agreed_pro_version); return 1; incompat: - dev_err(DEV, "incompatible DRBD dialects: " + conn_err(tconn, "incompatible DRBD dialects: " "I support %d-%d, peer supports %d-%d\n", PRO_VERSION_MIN, PRO_VERSION_MAX, p->protocol_min, p->protocol_max); From 611208706f28c502c8c01791ac4f0b14cde395b2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 09:50:54 +0100 Subject: [PATCH 0087/5831] drbd: Converted drbd_(get|put)_data_sock() and drbd_send_cmd2() to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 20 ++++++++++---------- drivers/block/drbd/drbd_main.c | 26 +++++++++++++------------- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 4 ++-- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 845ff34d2060..f48fe76f0151 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1109,26 +1109,26 @@ static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) /* returns 1 if it was successful, * returns 0 if there was no data socket. * so wherever you are going to use the data.socket, e.g. do - * if (!drbd_get_data_sock(mdev)) + * if (!drbd_get_data_sock(mdev->tconn)) * return 0; * CODE(); - * drbd_put_data_sock(mdev); + * drbd_get_data_sock(mdev->tconn); */ -static inline int drbd_get_data_sock(struct drbd_conf *mdev) +static inline int drbd_get_data_sock(struct drbd_tconn *tconn) { - mutex_lock(&mdev->tconn->data.mutex); + mutex_lock(&tconn->data.mutex); /* drbd_disconnect() could have called drbd_free_sock() * while we were waiting in down()... */ - if (unlikely(mdev->tconn->data.socket == NULL)) { - mutex_unlock(&mdev->tconn->data.mutex); + if (unlikely(tconn->data.socket == NULL)) { + mutex_unlock(&tconn->data.mutex); return 0; } return 1; } -static inline void drbd_put_data_sock(struct drbd_conf *mdev) +static inline void drbd_put_data_sock(struct drbd_tconn *tconn) { - mutex_unlock(&mdev->tconn->data.mutex); + mutex_unlock(&tconn->data.mutex); } /* @@ -1171,12 +1171,12 @@ extern int drbd_send_state(struct drbd_conf *mdev); extern int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size, unsigned msg_flags); +extern int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, + char *data, size_t size); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, enum drbd_packet cmd, struct p_header *h, size_t size); -extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packet cmd, - char *data, size_t size); extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2a67e272b16c..2703504c7c18 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -727,23 +727,23 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, return ok; } -int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packet cmd, char *data, +int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, size_t size) { - struct p_header h; + struct p_header80 h; int ok; - prepare_header(mdev, &h, cmd, size); + prepare_header80(&h, cmd, size); - if (!drbd_get_data_sock(mdev)) + if (!drbd_get_data_sock(tconn)) return 0; ok = (sizeof(h) == - drbd_send(mdev->tconn, mdev->tconn->data.socket, &h, sizeof(h), 0)); + drbd_send(tconn, tconn->data.socket, &h, sizeof(h), 0)); ok = ok && (size == - drbd_send(mdev->tconn, mdev->tconn->data.socket, data, size, 0)); + drbd_send(tconn, tconn->data.socket, data, size, 0)); - drbd_put_data_sock(mdev); + drbd_put_data_sock(tconn); return ok; } @@ -1188,10 +1188,10 @@ int drbd_send_bitmap(struct drbd_conf *mdev) { int err; - if (!drbd_get_data_sock(mdev)) + if (!drbd_get_data_sock(mdev->tconn)) return -1; err = !_drbd_send_bitmap(mdev); - drbd_put_data_sock(mdev); + drbd_put_data_sock(mdev->tconn); return err; } @@ -1505,7 +1505,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) void *dgb; int dgs; - if (!drbd_get_data_sock(mdev)) + if (!drbd_get_data_sock(mdev->tconn)) return 0; dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? @@ -1564,7 +1564,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) } */ } - drbd_put_data_sock(mdev); + drbd_put_data_sock(mdev->tconn); return ok; } @@ -1595,7 +1595,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL * in response to admin command or module unload. */ - if (!drbd_get_data_sock(mdev)) + if (!drbd_get_data_sock(mdev->tconn)) return 0; ok = sizeof(p) == drbd_send(mdev->tconn, mdev->tconn->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); @@ -1607,7 +1607,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd, if (ok) ok = _drbd_send_zc_ee(mdev, peer_req); - drbd_put_data_sock(mdev); + drbd_put_data_sock(mdev->tconn); return ok; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0a4d15c913e7..b95f81e3278b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4093,7 +4093,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) get_random_bytes(my_challenge, CHALLENGE_LEN); - rv = drbd_send_cmd2(mdev, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); + rv = conn_send_cmd2(mdev->tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); if (!rv) goto fail; @@ -4148,7 +4148,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) goto fail; } - rv = drbd_send_cmd2(mdev, P_AUTH_RESPONSE, response, resp_size); + rv = conn_send_cmd2(mdev->tconn, P_AUTH_RESPONSE, response, resp_size); if (!rv) goto fail; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 5be179ba0c76..f5c27bbd814f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1197,7 +1197,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) if (cancel) return 1; - if (!drbd_get_data_sock(mdev)) + if (!drbd_get_data_sock(mdev->tconn)) return 0; p->barrier = b->br_number; /* inc_ap_pending was done where this was queued. @@ -1205,7 +1205,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * or (on connection loss) in w_clear_epoch. */ ok = _drbd_send_cmd(mdev, mdev->tconn->data.socket, P_BARRIER, &p->head, sizeof(*p), 0); - drbd_put_data_sock(mdev); + drbd_put_data_sock(mdev->tconn); return ok; } From 13e6037dc991b0664ebb89226d4b68aa820b1fcd Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 09:54:40 +0100 Subject: [PATCH 0088/5831] drbd: Converted drbd_do_auth() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 58 +++++++++++++++--------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b95f81e3278b..2a3a35be9fc6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -61,7 +61,7 @@ enum finish_epoch { }; static int drbd_do_handshake(struct drbd_tconn *tconn); -static int drbd_do_auth(struct drbd_conf *mdev); +static int drbd_do_auth(struct drbd_tconn *tconn); static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); static int e_end_block(struct drbd_conf *, struct drbd_work *, int); @@ -889,7 +889,7 @@ retry: if (mdev->tconn->cram_hmac_tfm) { /* drbd_request_state(mdev, NS(conn, WFAuth)); */ - switch (drbd_do_auth(mdev)) { + switch (drbd_do_auth(mdev->tconn)) { case -1: dev_err(DEV, "Authentication of peer failed\n"); return -1; @@ -4052,7 +4052,7 @@ static int drbd_do_handshake(struct drbd_tconn *tconn) } #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE) -static int drbd_do_auth(struct drbd_conf *mdev) +static int drbd_do_auth(struct drbd_tconn *tconn) { dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n"); dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n"); @@ -4067,73 +4067,73 @@ static int drbd_do_auth(struct drbd_conf *mdev) -1 - auth failed, don't try again. */ -static int drbd_do_auth(struct drbd_conf *mdev) +static int drbd_do_auth(struct drbd_tconn *tconn) { char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */ struct scatterlist sg; char *response = NULL; char *right_response = NULL; char *peers_ch = NULL; - unsigned int key_len = strlen(mdev->tconn->net_conf->shared_secret); + unsigned int key_len = strlen(tconn->net_conf->shared_secret); unsigned int resp_size; struct hash_desc desc; struct packet_info pi; int rv; - desc.tfm = mdev->tconn->cram_hmac_tfm; + desc.tfm = tconn->cram_hmac_tfm; desc.flags = 0; - rv = crypto_hash_setkey(mdev->tconn->cram_hmac_tfm, - (u8 *)mdev->tconn->net_conf->shared_secret, key_len); + rv = crypto_hash_setkey(tconn->cram_hmac_tfm, + (u8 *)tconn->net_conf->shared_secret, key_len); if (rv) { - dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv); + conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv); rv = -1; goto fail; } get_random_bytes(my_challenge, CHALLENGE_LEN); - rv = conn_send_cmd2(mdev->tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); + rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN); if (!rv) goto fail; - rv = drbd_recv_header(mdev->tconn, &pi); + rv = drbd_recv_header(tconn, &pi); if (!rv) goto fail; if (pi.cmd != P_AUTH_CHALLENGE) { - dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n", + conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n", cmdname(pi.cmd), pi.cmd); rv = 0; goto fail; } if (pi.size > CHALLENGE_LEN * 2) { - dev_err(DEV, "expected AuthChallenge payload too big.\n"); + conn_err(tconn, "expected AuthChallenge payload too big.\n"); rv = -1; goto fail; } peers_ch = kmalloc(pi.size, GFP_NOIO); if (peers_ch == NULL) { - dev_err(DEV, "kmalloc of peers_ch failed\n"); + conn_err(tconn, "kmalloc of peers_ch failed\n"); rv = -1; goto fail; } - rv = drbd_recv(mdev->tconn, peers_ch, pi.size); + rv = drbd_recv(tconn, peers_ch, pi.size); if (rv != pi.size) { if (!signal_pending(current)) - dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv); + conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv); rv = 0; goto fail; } - resp_size = crypto_hash_digestsize(mdev->tconn->cram_hmac_tfm); + resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm); response = kmalloc(resp_size, GFP_NOIO); if (response == NULL) { - dev_err(DEV, "kmalloc of response failed\n"); + conn_err(tconn, "kmalloc of response failed\n"); rv = -1; goto fail; } @@ -4143,44 +4143,44 @@ static int drbd_do_auth(struct drbd_conf *mdev) rv = crypto_hash_digest(&desc, &sg, sg.length, response); if (rv) { - dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv); + conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv); rv = -1; goto fail; } - rv = conn_send_cmd2(mdev->tconn, P_AUTH_RESPONSE, response, resp_size); + rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size); if (!rv) goto fail; - rv = drbd_recv_header(mdev->tconn, &pi); + rv = drbd_recv_header(tconn, &pi); if (!rv) goto fail; if (pi.cmd != P_AUTH_RESPONSE) { - dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n", + conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n", cmdname(pi.cmd), pi.cmd); rv = 0; goto fail; } if (pi.size != resp_size) { - dev_err(DEV, "expected AuthResponse payload of wrong size\n"); + conn_err(tconn, "expected AuthResponse payload of wrong size\n"); rv = 0; goto fail; } - rv = drbd_recv(mdev->tconn, response , resp_size); + rv = drbd_recv(tconn, response , resp_size); if (rv != resp_size) { if (!signal_pending(current)) - dev_warn(DEV, "short read receiving AuthResponse: l=%u\n", rv); + conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv); rv = 0; goto fail; } right_response = kmalloc(resp_size, GFP_NOIO); if (right_response == NULL) { - dev_err(DEV, "kmalloc of right_response failed\n"); + conn_err(tconn, "kmalloc of right_response failed\n"); rv = -1; goto fail; } @@ -4189,7 +4189,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) rv = crypto_hash_digest(&desc, &sg, sg.length, right_response); if (rv) { - dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv); + conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv); rv = -1; goto fail; } @@ -4197,8 +4197,8 @@ static int drbd_do_auth(struct drbd_conf *mdev) rv = !memcmp(response, right_response, resp_size); if (rv) - dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n", - resp_size, mdev->tconn->net_conf->cram_hmac_alg); + conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n", + resp_size, tconn->net_conf->cram_hmac_alg); else rv = -1; From dc8228d107475bdf5458383f0d1fca202d82a184 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 10:13:15 +0100 Subject: [PATCH 0089/5831] drbd: Converted drbd_send_protocol() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 30 +++++++++++++++--------------- drivers/block/drbd/drbd_receiver.c | 2 +- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f48fe76f0151..ddd2ed7dec1c 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1161,7 +1161,7 @@ extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); extern void drbd_free_sock(struct drbd_conf *mdev); extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock, void *buf, size_t size, unsigned msg_flags); -extern int drbd_send_protocol(struct drbd_conf *mdev); +extern int drbd_send_protocol(struct drbd_tconn *tconn); extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2703504c7c18..01749e9731dd 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -796,15 +796,15 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) return rv; } -int drbd_send_protocol(struct drbd_conf *mdev) +int drbd_send_protocol(struct drbd_tconn *tconn) { struct p_protocol *p; int size, cf, rv; size = sizeof(struct p_protocol); - if (mdev->tconn->agreed_pro_version >= 87) - size += strlen(mdev->tconn->net_conf->integrity_alg) + 1; + if (tconn->agreed_pro_version >= 87) + size += strlen(tconn->net_conf->integrity_alg) + 1; /* we must not recurse into our own queue, * as that is blocked during handshake */ @@ -812,30 +812,30 @@ int drbd_send_protocol(struct drbd_conf *mdev) if (p == NULL) return 0; - p->protocol = cpu_to_be32(mdev->tconn->net_conf->wire_protocol); - p->after_sb_0p = cpu_to_be32(mdev->tconn->net_conf->after_sb_0p); - p->after_sb_1p = cpu_to_be32(mdev->tconn->net_conf->after_sb_1p); - p->after_sb_2p = cpu_to_be32(mdev->tconn->net_conf->after_sb_2p); - p->two_primaries = cpu_to_be32(mdev->tconn->net_conf->two_primaries); + p->protocol = cpu_to_be32(tconn->net_conf->wire_protocol); + p->after_sb_0p = cpu_to_be32(tconn->net_conf->after_sb_0p); + p->after_sb_1p = cpu_to_be32(tconn->net_conf->after_sb_1p); + p->after_sb_2p = cpu_to_be32(tconn->net_conf->after_sb_2p); + p->two_primaries = cpu_to_be32(tconn->net_conf->two_primaries); cf = 0; - if (mdev->tconn->net_conf->want_lose) + if (tconn->net_conf->want_lose) cf |= CF_WANT_LOSE; - if (mdev->tconn->net_conf->dry_run) { - if (mdev->tconn->agreed_pro_version >= 92) + if (tconn->net_conf->dry_run) { + if (tconn->agreed_pro_version >= 92) cf |= CF_DRY_RUN; else { - dev_err(DEV, "--dry-run is not supported by peer"); + conn_err(tconn, "--dry-run is not supported by peer"); kfree(p); return -1; } } p->conn_flags = cpu_to_be32(cf); - if (mdev->tconn->agreed_pro_version >= 87) - strcpy(p->integrity_alg, mdev->tconn->net_conf->integrity_alg); + if (tconn->agreed_pro_version >= 87) + strcpy(p->integrity_alg, tconn->net_conf->integrity_alg); - rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, &p->head, size); + rv = conn_send_cmd2(tconn, P_PROTOCOL, p->head.payload, size - sizeof(struct p_header)); kfree(p); return rv; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2a3a35be9fc6..05d6499da63b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -910,7 +910,7 @@ retry: drbd_thread_start(&mdev->tconn->asender); - if (drbd_send_protocol(mdev) == -1) + if (drbd_send_protocol(mdev->tconn) == -1) return -1; drbd_send_sync_param(mdev, &mdev->sync_conf); drbd_send_sizes(mdev, 0, 0); From 062e879c8b473d2dba270f8244a211b0c4dafe28 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 11:09:18 +0100 Subject: [PATCH 0090/5831] drbd: Use and idr data structure to map volume numbers to mdev pointers Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +++- drivers/block/drbd/drbd_main.c | 12 +++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ddd2ed7dec1c..8d32f9dc18ed 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -916,8 +917,9 @@ struct drbd_tconn { /* is a resource from the config file */ char *name; /* Resource name */ struct list_head all_tconn; /* List of all drbd_tconn, prot by global_state_lock */ struct drbd_conf *volume0; /* TODO: Remove me again */ - unsigned long flags; + struct idr volumes; /* to mdev mapping */ + unsigned long flags; struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ atomic_t net_cnt; /* Users of net_conf */ wait_queue_head_t net_cnt_wait; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 01749e9731dd..254e5c141376 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2183,6 +2183,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) atomic_set(&tconn->net_cnt, 0); init_waitqueue_head(&tconn->net_cnt_wait); + idr_init(&tconn->volumes); write_lock_irq(&global_state_lock); list_add(&tconn->all_tconn, &drbd_tconns); @@ -2202,6 +2203,7 @@ void drbd_free_tconn(struct drbd_tconn *tconn) write_lock_irq(&global_state_lock); list_del(&tconn->all_tconn); write_unlock_irq(&global_state_lock); + idr_destroy(&tconn->volumes); kfree(tconn->name); kfree(tconn->int_dig_out); @@ -2216,6 +2218,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) struct gendisk *disk; struct request_queue *q; char conn_name[9]; /* drbd1234N */ + int vnr; /* GFP_KERNEL, we are outside of all write-out paths */ mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); @@ -2225,7 +2228,14 @@ struct drbd_conf *drbd_new_device(unsigned int minor) mdev->tconn = drbd_new_tconn(conn_name); if (!mdev->tconn) goto out_no_tconn; - + if (!idr_pre_get(&mdev->tconn->volumes, GFP_KERNEL)) + goto out_no_cpumask; + if (idr_get_new(&mdev->tconn->volumes, mdev, &vnr)) + goto out_no_cpumask; + if (vnr != 0) { + dev_err(DEV, "vnr = %d\n", vnr); + goto out_no_cpumask; + } if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL)) goto out_no_cpumask; From 907599e0446f03b66257cf79720cc0fc1f37b7e3 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 11:25:37 +0100 Subject: [PATCH 0091/5831] drbd: Converted drbd_connect() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 102 +++++++++++++++-------------- 1 file changed, 53 insertions(+), 49 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 05d6499da63b..28df7cd55b3f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -746,6 +746,24 @@ static int drbd_socket_okay(struct socket **sock) } } +static int drbd_connected(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + int ok = 1; + + atomic_set(&mdev->packet_seq, 0); + mdev->peer_seq = 0; + + ok &= drbd_send_sync_param(mdev, &mdev->sync_conf); + ok &= drbd_send_sizes(mdev, 0, 0); + ok &= drbd_send_uuids(mdev); + ok &= drbd_send_state(mdev); + clear_bit(USE_DEGR_WFC_T, &mdev->flags); + clear_bit(RESIZE_PENDING, &mdev->flags); + + return !ok; +} + /* * return values: * 1 yes, we have a valid connection @@ -754,18 +772,16 @@ static int drbd_socket_okay(struct socket **sock) * no point in trying again, please go standalone. * -2 We do not have a network config... */ -static int drbd_connect(struct drbd_conf *mdev) +static int drbd_connect(struct drbd_tconn *tconn) { struct socket *s, *sock, *msock; int try, h, ok; - D_ASSERT(!mdev->tconn->data.socket); - - if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) + if (drbd_request_state(tconn->volume0, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) return -2; - clear_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); - mdev->tconn->agreed_pro_version = 99; + clear_bit(DISCARD_CONCURRENT, &tconn->flags); + tconn->agreed_pro_version = 99; /* agreed_pro_version must be smaller than 100 so we send the old header (h80) in the first packet and in the handshake packet. */ @@ -775,7 +791,7 @@ static int drbd_connect(struct drbd_conf *mdev) do { for (try = 0;;) { /* 3 tries, this should take less than a second! */ - s = drbd_try_connect(mdev->tconn); + s = drbd_try_connect(tconn); if (s || ++try >= 3) break; /* give the other side time to call bind() & listen() */ @@ -784,21 +800,21 @@ static int drbd_connect(struct drbd_conf *mdev) if (s) { if (!sock) { - drbd_send_fp(mdev->tconn, s, P_HAND_SHAKE_S); + drbd_send_fp(tconn, s, P_HAND_SHAKE_S); sock = s; s = NULL; } else if (!msock) { - drbd_send_fp(mdev->tconn, s, P_HAND_SHAKE_M); + drbd_send_fp(tconn, s, P_HAND_SHAKE_M); msock = s; s = NULL; } else { - dev_err(DEV, "Logic error in drbd_connect()\n"); + conn_err(tconn, "Logic error in drbd_connect()\n"); goto out_release_sockets; } } if (sock && msock) { - schedule_timeout_interruptible(mdev->tconn->net_conf->ping_timeo*HZ/10); + schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10); ok = drbd_socket_okay(&sock); ok = drbd_socket_okay(&msock) && ok; if (ok) @@ -806,41 +822,41 @@ static int drbd_connect(struct drbd_conf *mdev) } retry: - s = drbd_wait_for_connect(mdev->tconn); + s = drbd_wait_for_connect(tconn); if (s) { - try = drbd_recv_fp(mdev->tconn, s); + try = drbd_recv_fp(tconn, s); drbd_socket_okay(&sock); drbd_socket_okay(&msock); switch (try) { case P_HAND_SHAKE_S: if (sock) { - dev_warn(DEV, "initial packet S crossed\n"); + conn_warn(tconn, "initial packet S crossed\n"); sock_release(sock); } sock = s; break; case P_HAND_SHAKE_M: if (msock) { - dev_warn(DEV, "initial packet M crossed\n"); + conn_warn(tconn, "initial packet M crossed\n"); sock_release(msock); } msock = s; - set_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); + set_bit(DISCARD_CONCURRENT, &tconn->flags); break; default: - dev_warn(DEV, "Error receiving initial packet\n"); + conn_warn(tconn, "Error receiving initial packet\n"); sock_release(s); if (random32() & 1) goto retry; } } - if (mdev->state.conn <= C_DISCONNECTING) + if (tconn->volume0->state.conn <= C_DISCONNECTING) goto out_release_sockets; if (signal_pending(current)) { flush_signals(current); smp_rmb(); - if (get_t_state(&mdev->tconn->receiver) == EXITING) + if (get_t_state(&tconn->receiver) == EXITING) goto out_release_sockets; } @@ -862,65 +878,53 @@ retry: msock->sk->sk_priority = TC_PRIO_INTERACTIVE; /* NOT YET ... - * sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10; + * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; * first set it to the P_HAND_SHAKE timeout, * which we set to 4x the configured ping_timeout. */ sock->sk->sk_sndtimeo = - sock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_timeo*4*HZ/10; + sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10; - msock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10; - msock->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; + msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; + msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ; /* we don't want delays. * we use TCP_CORK where appropriate, though */ drbd_tcp_nodelay(sock); drbd_tcp_nodelay(msock); - mdev->tconn->data.socket = sock; - mdev->tconn->meta.socket = msock; - mdev->tconn->last_received = jiffies; + tconn->data.socket = sock; + tconn->meta.socket = msock; + tconn->last_received = jiffies; - D_ASSERT(mdev->tconn->asender.task == NULL); - - h = drbd_do_handshake(mdev->tconn); + h = drbd_do_handshake(tconn); if (h <= 0) return h; - if (mdev->tconn->cram_hmac_tfm) { + if (tconn->cram_hmac_tfm) { /* drbd_request_state(mdev, NS(conn, WFAuth)); */ - switch (drbd_do_auth(mdev->tconn)) { + switch (drbd_do_auth(tconn)) { case -1: - dev_err(DEV, "Authentication of peer failed\n"); + conn_err(tconn, "Authentication of peer failed\n"); return -1; case 0: - dev_err(DEV, "Authentication of peer failed, trying again.\n"); + conn_err(tconn, "Authentication of peer failed, trying again.\n"); return 0; } } - if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) + if (drbd_request_state(tconn->volume0, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) return 0; - sock->sk->sk_sndtimeo = mdev->tconn->net_conf->timeout*HZ/10; + sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; - atomic_set(&mdev->packet_seq, 0); - mdev->peer_seq = 0; + drbd_thread_start(&tconn->asender); - drbd_thread_start(&mdev->tconn->asender); - - if (drbd_send_protocol(mdev->tconn) == -1) + if (drbd_send_protocol(tconn) == -1) return -1; - drbd_send_sync_param(mdev, &mdev->sync_conf); - drbd_send_sizes(mdev, 0, 0); - drbd_send_uuids(mdev); - drbd_send_state(mdev); - clear_bit(USE_DEGR_WFC_T, &mdev->flags); - clear_bit(RESIZE_PENDING, &mdev->flags); - mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ - return 1; + return !idr_for_each(&tconn->volumes, drbd_connected, tconn); out_release_sockets: if (sock) @@ -4222,7 +4226,7 @@ int drbdd_init(struct drbd_thread *thi) dev_info(DEV, "receiver (re)started\n"); do { - h = drbd_connect(mdev); + h = drbd_connect(mdev->tconn); if (h == 0) { drbd_disconnect(mdev); schedule_timeout_interruptible(HZ); From 808222845d62e551630699a1381bbf8a1fd4a286 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 12:46:30 +0100 Subject: [PATCH 0092/5831] drbd: Converted drbd_calc_cpu_mask() and drbd_thread_current_set_cpu() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 ++++---- drivers/block/drbd/drbd_main.c | 25 +++++++++++++++---------- drivers/block/drbd/drbd_nl.c | 6 +++--- drivers/block/drbd/drbd_receiver.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 2 +- 5 files changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8d32f9dc18ed..1cb513e92b8a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -946,6 +946,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct drbd_thread receiver; struct drbd_thread worker; struct drbd_thread asender; + cpumask_var_t cpu_mask; }; struct drbd_conf { @@ -1075,7 +1076,6 @@ struct drbd_conf { spinlock_t peer_seq_lock; unsigned int minor; unsigned long comm_bm_set; /* communicated number of set bits. */ - cpumask_var_t cpu_mask; struct bm_io_work bm_io_work; u64 ed_uuid; /* UUID of the exposed data */ struct mutex state_mutex; @@ -1149,10 +1149,10 @@ extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); extern char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task); #ifdef CONFIG_SMP -extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev, struct drbd_thread *thi); -extern void drbd_calc_cpu_mask(struct drbd_conf *mdev); +extern void drbd_thread_current_set_cpu(struct drbd_thread *thi); +extern void drbd_calc_cpu_mask(struct drbd_tconn *tconn); #else -#define drbd_thread_current_set_cpu(A, B) ({}) +#define drbd_thread_current_set_cpu(A) ({}) #define drbd_calc_cpu_mask(A) ({}) #endif extern void drbd_free_resources(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 254e5c141376..3bb412c82729 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -606,6 +606,12 @@ char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task) } #ifdef CONFIG_SMP +static int conn_lowest_minor(struct drbd_tconn *tconn) +{ + int minor = 0; + idr_get_next(&tconn->volumes, &minor); + return minor; +} /** * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs * @mdev: DRBD device. @@ -613,23 +619,23 @@ char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task) * Forces all threads of a device onto the same CPU. This is beneficial for * DRBD's performance. May be overwritten by user's configuration. */ -void drbd_calc_cpu_mask(struct drbd_conf *mdev) +void drbd_calc_cpu_mask(struct drbd_tconn *tconn) { int ord, cpu; /* user override. */ - if (cpumask_weight(mdev->cpu_mask)) + if (cpumask_weight(tconn->cpu_mask)) return; - ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask); + ord = conn_lowest_minor(tconn) % cpumask_weight(cpu_online_mask); for_each_online_cpu(cpu) { if (ord-- == 0) { - cpumask_set_cpu(cpu, mdev->cpu_mask); + cpumask_set_cpu(cpu, tconn->cpu_mask); return; } } /* should not be reached */ - cpumask_setall(mdev->cpu_mask); + cpumask_setall(tconn->cpu_mask); } /** @@ -640,14 +646,14 @@ void drbd_calc_cpu_mask(struct drbd_conf *mdev) * call in the "main loop" of _all_ threads, no need for any mutex, current won't die * prematurely. */ -void drbd_thread_current_set_cpu(struct drbd_conf *mdev, struct drbd_thread *thi) +void drbd_thread_current_set_cpu(struct drbd_thread *thi) { struct task_struct *p = current; if (!thi->reset_cpu_mask) return; thi->reset_cpu_mask = 0; - set_cpus_allowed_ptr(p, mdev->cpu_mask); + set_cpus_allowed_ptr(p, thi->mdev->tconn->cpu_mask); } #endif @@ -2236,7 +2242,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) dev_err(DEV, "vnr = %d\n", vnr); goto out_no_cpumask; } - if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&mdev->tconn->cpu_mask, GFP_KERNEL)) goto out_no_cpumask; mdev->tconn->volume0 = mdev; @@ -2313,7 +2319,7 @@ out_no_io_page: out_no_disk: blk_cleanup_queue(q); out_no_q: - free_cpumask_var(mdev->cpu_mask); + free_cpumask_var(mdev->tconn->cpu_mask); out_no_cpumask: drbd_free_tconn(mdev->tconn); out_no_tconn: @@ -2332,7 +2338,6 @@ void drbd_free_mdev(struct drbd_conf *mdev) __free_page(mdev->md_io_page); put_disk(mdev->vdisk); blk_cleanup_queue(mdev->rq_queue); - free_cpumask_var(mdev->cpu_mask); kfree(mdev); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index df36a573cd47..331495fec673 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1884,9 +1884,9 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (mdev->state.conn >= C_CONNECTED) drbd_send_sync_param(mdev, &sc); - if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) { - cpumask_copy(mdev->cpu_mask, new_cpu_mask); - drbd_calc_cpu_mask(mdev); + if (!cpumask_equal(mdev->tconn->cpu_mask, new_cpu_mask)) { + cpumask_copy(mdev->tconn->cpu_mask, new_cpu_mask); + drbd_calc_cpu_mask(mdev->tconn); mdev->tconn->receiver.reset_cpu_mask = 1; mdev->tconn->asender.reset_cpu_mask = 1; mdev->tconn->worker.reset_cpu_mask = 1; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 28df7cd55b3f..c8d173c11390 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3779,7 +3779,7 @@ static void drbdd(struct drbd_conf *mdev) int rv; while (get_t_state(&mdev->tconn->receiver) == RUNNING) { - drbd_thread_current_set_cpu(mdev, &mdev->tconn->receiver); + drbd_thread_current_set_cpu(&mdev->tconn->receiver); if (!drbd_recv_header(mdev->tconn, &pi)) goto err_out; @@ -4568,7 +4568,7 @@ int drbd_asender(struct drbd_thread *thi) current->rt_priority = 2; /* more important than all other tasks */ while (get_t_state(thi) == RUNNING) { - drbd_thread_current_set_cpu(mdev, thi); + drbd_thread_current_set_cpu(thi); if (test_and_clear_bit(SEND_PING, &mdev->tconn->flags)) { if (!drbd_send_ping(mdev)) { dev_err(DEV, "drbd_send_ping has failed\n"); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index f5c27bbd814f..16db1f47c609 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1634,7 +1634,7 @@ int drbd_worker(struct drbd_thread *thi) sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev)); while (get_t_state(thi) == RUNNING) { - drbd_thread_current_set_cpu(mdev, thi); + drbd_thread_current_set_cpu(thi); if (down_trylock(&mdev->tconn->data.work.s)) { mutex_lock(&mdev->tconn->data.mutex); From eefc2f7de2e4a35247c932a2c09f1890864a8381 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 12:55:24 +0100 Subject: [PATCH 0093/5831] drbd: Converted drbdd() from mdev to tconn The drbd_md_sync(mdev) happens in the after state change anyways... Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 5 +++++ drivers/block/drbd/drbd_receiver.c | 30 ++++++++++++++---------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 1cb513e92b8a..a51d0a46146e 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1108,6 +1108,11 @@ static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) return mdev->minor; } +static inline struct drbd_conf *vnr_to_mdev(struct drbd_tconn *tconn, int vnr) +{ + return (struct drbd_conf *)idr_find(&tconn->volumes, vnr); +} + /* returns 1 if it was successful, * returns 0 if there was no data socket. * so wherever you are going to use the data.socket, e.g. do diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c8d173c11390..4c61802c3421 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -939,6 +939,7 @@ static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct p if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { pi->cmd = be16_to_cpu(h->h80.command); pi->size = be16_to_cpu(h->h80.length); + pi->vnr = 0; } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) { pi->cmd = be16_to_cpu(h->h95.command); pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff; @@ -3771,42 +3772,42 @@ static struct data_cmd drbd_cmd_handler[] = { p_header, but they may not rely on that. Since there is also p_header95 ! */ -static void drbdd(struct drbd_conf *mdev) +static void drbdd(struct drbd_tconn *tconn) { - struct p_header *header = &mdev->tconn->data.rbuf.header; + struct p_header *header = &tconn->data.rbuf.header; struct packet_info pi; size_t shs; /* sub header size */ int rv; - while (get_t_state(&mdev->tconn->receiver) == RUNNING) { - drbd_thread_current_set_cpu(&mdev->tconn->receiver); - if (!drbd_recv_header(mdev->tconn, &pi)) + while (get_t_state(&tconn->receiver) == RUNNING) { + drbd_thread_current_set_cpu(&tconn->receiver); + if (!drbd_recv_header(tconn, &pi)) goto err_out; if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) { - dev_err(DEV, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size); + conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size); goto err_out; } shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header); if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) { - dev_err(DEV, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size); + conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size); goto err_out; } if (shs) { - rv = drbd_recv(mdev->tconn, &header->payload, shs); + rv = drbd_recv(tconn, &header->payload, shs); if (unlikely(rv != shs)) { if (!signal_pending(current)) - dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv); + conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv); goto err_out; } } - rv = drbd_cmd_handler[pi.cmd].function(mdev, pi.cmd, pi.size - shs); + rv = drbd_cmd_handler[pi.cmd].function(vnr_to_mdev(tconn, pi.vnr), pi.cmd, pi.size - shs); if (unlikely(!rv)) { - dev_err(DEV, "error receiving %s, l: %d!\n", + conn_err(tconn, "error receiving %s, l: %d!\n", cmdname(pi.cmd), pi.size); goto err_out; } @@ -3814,11 +3815,8 @@ static void drbdd(struct drbd_conf *mdev) if (0) { err_out: - drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); + drbd_force_state(tconn->volume0, NS(conn, C_PROTOCOL_ERROR)); } - /* If we leave here, we probably want to update at least the - * "Connected" indicator on stable storage. Do so explicitly here. */ - drbd_md_sync(mdev); } void drbd_flush_workqueue(struct drbd_tconn *tconn) @@ -4239,7 +4237,7 @@ int drbdd_init(struct drbd_thread *thi) if (h > 0) { if (get_net_conf(mdev->tconn)) { - drbdd(mdev); + drbdd(mdev->tconn); put_net_conf(mdev->tconn); } } From 360cc7405295d1f604d5689e8d6c206968d47886 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 14:29:53 +0100 Subject: [PATCH 0094/5831] drbd: Converted drbd_free_sock() and drbd_disconnect() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_main.c | 28 ++++++------ drivers/block/drbd/drbd_receiver.c | 72 +++++++++++++++++------------- 3 files changed, 56 insertions(+), 46 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a51d0a46146e..a70365452d2f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1165,7 +1165,7 @@ extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, unsigned int set_size); extern void tl_clear(struct drbd_conf *mdev); extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); -extern void drbd_free_sock(struct drbd_conf *mdev); +extern void drbd_free_sock(struct drbd_tconn *tconn); extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock, void *buf, size_t size, unsigned msg_flags); extern int drbd_send_protocol(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 3bb412c82729..a26ec93a9d7f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2430,21 +2430,21 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) kfree(ldev); } -void drbd_free_sock(struct drbd_conf *mdev) +void drbd_free_sock(struct drbd_tconn *tconn) { - if (mdev->tconn->data.socket) { - mutex_lock(&mdev->tconn->data.mutex); - kernel_sock_shutdown(mdev->tconn->data.socket, SHUT_RDWR); - sock_release(mdev->tconn->data.socket); - mdev->tconn->data.socket = NULL; - mutex_unlock(&mdev->tconn->data.mutex); + if (tconn->data.socket) { + mutex_lock(&tconn->data.mutex); + kernel_sock_shutdown(tconn->data.socket, SHUT_RDWR); + sock_release(tconn->data.socket); + tconn->data.socket = NULL; + mutex_unlock(&tconn->data.mutex); } - if (mdev->tconn->meta.socket) { - mutex_lock(&mdev->tconn->meta.mutex); - kernel_sock_shutdown(mdev->tconn->meta.socket, SHUT_RDWR); - sock_release(mdev->tconn->meta.socket); - mdev->tconn->meta.socket = NULL; - mutex_unlock(&mdev->tconn->meta.mutex); + if (tconn->meta.socket) { + mutex_lock(&tconn->meta.mutex); + kernel_sock_shutdown(tconn->meta.socket, SHUT_RDWR); + sock_release(tconn->meta.socket); + tconn->meta.socket = NULL; + mutex_unlock(&tconn->meta.mutex); } } @@ -2462,7 +2462,7 @@ void drbd_free_resources(struct drbd_conf *mdev) crypto_free_hash(mdev->tconn->integrity_r_tfm); mdev->tconn->integrity_r_tfm = NULL; - drbd_free_sock(mdev); + drbd_free_sock(mdev->tconn); __no_warn(local, drbd_free_bc(mdev->ldev); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4c61802c3421..2e5318f9422e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -62,6 +62,7 @@ enum finish_epoch { static int drbd_do_handshake(struct drbd_tconn *tconn); static int drbd_do_auth(struct drbd_tconn *tconn); +static int drbd_disconnected(int vnr, void *p, void *data); static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); static int e_end_block(struct drbd_conf *, struct drbd_work *, int); @@ -3829,19 +3830,49 @@ void drbd_flush_workqueue(struct drbd_tconn *tconn) wait_for_completion(&barr.done); } -static void drbd_disconnect(struct drbd_conf *mdev) +static void drbd_disconnect(struct drbd_tconn *tconn) { - enum drbd_fencing_p fp; union drbd_state os, ns; int rv = SS_UNKNOWN_ERROR; - unsigned int i; - if (mdev->state.conn == C_STANDALONE) + if (tconn->volume0->state.conn == C_STANDALONE) return; /* asender does not clean up anything. it must not interfere, either */ - drbd_thread_stop(&mdev->tconn->asender); - drbd_free_sock(mdev); + drbd_thread_stop(&tconn->asender); + drbd_free_sock(tconn); + + idr_for_each(&tconn->volumes, drbd_disconnected, tconn); + + conn_info(tconn, "Connection closed\n"); + + spin_lock_irq(&tconn->req_lock); + os = tconn->volume0->state; + if (os.conn >= C_UNCONNECTED) { + /* Do not restart in case we are C_DISCONNECTING */ + ns.i = os.i; + ns.conn = C_UNCONNECTED; + rv = _drbd_set_state(tconn->volume0, ns, CS_VERBOSE, NULL); + } + spin_unlock_irq(&tconn->req_lock); + + if (os.conn == C_DISCONNECTING) { + wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0); + + crypto_free_hash(tconn->cram_hmac_tfm); + tconn->cram_hmac_tfm = NULL; + + kfree(tconn->net_conf); + tconn->net_conf = NULL; + drbd_request_state(tconn->volume0, NS(conn, C_STANDALONE)); + } +} + +static int drbd_disconnected(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + enum drbd_fencing_p fp; + unsigned int i; /* wait for current activity to cease. */ spin_lock_irq(&mdev->tconn->req_lock); @@ -3887,8 +3918,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (!is_susp(mdev->state)) tl_clear(mdev); - dev_info(DEV, "Connection closed\n"); - drbd_md_sync(mdev); fp = FP_DONT_CARE; @@ -3900,27 +3929,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) drbd_try_outdate_peer_async(mdev); - spin_lock_irq(&mdev->tconn->req_lock); - os = mdev->state; - if (os.conn >= C_UNCONNECTED) { - /* Do not restart in case we are C_DISCONNECTING */ - ns = os; - ns.conn = C_UNCONNECTED; - rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); - } - spin_unlock_irq(&mdev->tconn->req_lock); - - if (os.conn == C_DISCONNECTING) { - wait_event(mdev->tconn->net_cnt_wait, atomic_read(&mdev->tconn->net_cnt) == 0); - - crypto_free_hash(mdev->tconn->cram_hmac_tfm); - mdev->tconn->cram_hmac_tfm = NULL; - - kfree(mdev->tconn->net_conf); - mdev->tconn->net_conf = NULL; - drbd_request_state(mdev, NS(conn, C_STANDALONE)); - } - /* serialize with bitmap writeout triggered by the state change, * if any. */ wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); @@ -3950,6 +3958,8 @@ static void drbd_disconnect(struct drbd_conf *mdev) /* ok, no more ee's on the fly, it is safe to reset the epoch_size */ atomic_set(&mdev->current_epoch->epoch_size, 0); D_ASSERT(list_empty(&mdev->current_epoch->list)); + + return 0; } /* @@ -4226,7 +4236,7 @@ int drbdd_init(struct drbd_thread *thi) do { h = drbd_connect(mdev->tconn); if (h == 0) { - drbd_disconnect(mdev); + drbd_disconnect(mdev->tconn); schedule_timeout_interruptible(HZ); } if (h == -1) { @@ -4242,7 +4252,7 @@ int drbdd_init(struct drbd_thread *thi) } } - drbd_disconnect(mdev); + drbd_disconnect(mdev->tconn); dev_info(DEV, "receiver terminated\n"); return 0; From a21e9298275a0145e43c2413725549112d99ba01 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 15:08:49 +0100 Subject: [PATCH 0095/5831] drbd: Moved the mdev member into drbd_work (from drbd_request and drbd_peer_request) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 ++ drivers/block/drbd/drbd_int.h | 5 ++--- drivers/block/drbd/drbd_main.c | 8 ++++++++ drivers/block/drbd/drbd_nl.c | 6 +++--- drivers/block/drbd/drbd_receiver.c | 10 ++++++---- drivers/block/drbd/drbd_req.c | 11 ++++++----- drivers/block/drbd/drbd_req.h | 4 ++-- drivers/block/drbd/drbd_state.c | 1 + drivers/block/drbd/drbd_worker.c | 8 ++++---- 9 files changed, 34 insertions(+), 21 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 794317778db5..637a9378567a 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -228,6 +228,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) al_work.enr = enr; al_work.old_enr = al_ext->lc_number; al_work.w.cb = w_al_write_transaction; + al_work.w.mdev = mdev; drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); wait_for_completion(&al_work.event); @@ -717,6 +718,7 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, if (udw) { udw->enr = ext->lce.lc_number; udw->w.cb = w_update_odbm; + udw->w.mdev = mdev; drbd_queue_work_front(&mdev->tconn->data.work, &udw->w); } else { dev_warn(DEV, "Could not kmalloc an udw\n"); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a70365452d2f..be067bfbace2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -645,13 +645,13 @@ typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); struct drbd_work { struct list_head list; drbd_work_cb cb; + struct drbd_conf *mdev; }; #include "drbd_interval.h" struct drbd_request { struct drbd_work w; - struct drbd_conf *mdev; /* if local IO is not allowed, will be NULL. * if local IO _is_ allowed, holds the locally submitted bio clone, @@ -715,7 +715,6 @@ struct digest_info { struct drbd_peer_request { struct drbd_work w; struct drbd_epoch *epoch; /* for writes */ - struct drbd_conf *mdev; struct page *pages; atomic_t pending_bios; struct drbd_interval i; @@ -1537,7 +1536,7 @@ extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head); extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); -extern void drbd_flush_workqueue(struct drbd_tconn *tconn); +extern void drbd_flush_workqueue(struct drbd_conf *mdev); /* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a26ec93a9d7f..e89ec80395d3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1836,6 +1836,14 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) mdev->md_sync_work.cb = w_md_sync; mdev->bm_io_work.w.cb = w_bitmap_io; mdev->start_resync_work.cb = w_start_resync; + + mdev->resync_work.mdev = mdev; + mdev->unplug_work.mdev = mdev; + mdev->go_diskless.mdev = mdev; + mdev->md_sync_work.mdev = mdev; + mdev->bm_io_work.w.mdev = mdev; + mdev->start_resync_work.mdev = mdev; + init_timer(&mdev->resync_timer); init_timer(&mdev->md_sync_timer); init_timer(&mdev->start_resync_timer); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 331495fec673..0debe589b674 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -876,7 +876,7 @@ static void drbd_reconfig_start(struct drbd_conf *mdev) wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags)); wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); drbd_thread_start(&mdev->tconn->worker); - drbd_flush_workqueue(mdev->tconn); + drbd_flush_workqueue(mdev); } /* if still unconfigured, stops worker again. @@ -1076,7 +1076,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp /* also wait for the last barrier ack. */ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state)); /* and for any other previously queued work */ - drbd_flush_workqueue(mdev->tconn); + drbd_flush_workqueue(mdev); rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE); retcode = rv; /* FIXME: Type mismatch. */ @@ -1520,7 +1520,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } } - drbd_flush_workqueue(mdev->tconn); + drbd_flush_workqueue(mdev); spin_lock_irq(&mdev->tconn->req_lock); if (mdev->tconn->net_conf != NULL) { retcode = ERR_NET_CONFIGURED; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2e5318f9422e..4aa75bad16cd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -345,7 +345,7 @@ drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector, peer_req->i.waiting = false; peer_req->epoch = NULL; - peer_req->mdev = mdev; + peer_req->w.mdev = mdev; peer_req->pages = page; atomic_set(&peer_req->pending_bios, 0); peer_req->flags = 0; @@ -3820,13 +3820,14 @@ static void drbdd(struct drbd_tconn *tconn) } } -void drbd_flush_workqueue(struct drbd_tconn *tconn) +void drbd_flush_workqueue(struct drbd_conf *mdev) { struct drbd_wq_barrier barr; barr.w.cb = w_prev_work_done; + barr.w.mdev = mdev; init_completion(&barr.done); - drbd_queue_work(&tconn->data.work, &barr.w); + drbd_queue_work(&mdev->tconn->data.work, &barr.w); wait_for_completion(&barr.done); } @@ -3906,7 +3907,7 @@ static int drbd_disconnected(int vnr, void *p, void *data) /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, * w_make_resync_request etc. which may still be on the worker queue * to be "canceled" */ - drbd_flush_workqueue(mdev->tconn); + drbd_flush_workqueue(mdev); /* This also does reclaim_net_ee(). If we do this too early, we might * miss some resync ee and pages.*/ @@ -4507,6 +4508,7 @@ static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd) w = kmalloc(sizeof(*w), GFP_NOIO); if (w) { w->cb = w_ov_finished; + w->mdev = mdev; drbd_queue_work_front(&mdev->tconn->data.work, w); } else { dev_err(DEV, "kmalloc(w) failed."); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 6bcf4171a76f..45a543e5c6a9 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -67,7 +67,7 @@ static struct drbd_request *drbd_req_new(struct drbd_conf *mdev, drbd_req_make_private_bio(req, bio_src); req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0; - req->mdev = mdev; + req->w.mdev = mdev; req->master_bio = bio_src; req->epoch = 0; @@ -155,6 +155,7 @@ static void queue_barrier(struct drbd_conf *mdev) b = mdev->tconn->newest_tle; b->w.cb = w_send_barrier; + b->w.mdev = mdev; /* inc_ap_pending done here, so we won't * get imbalanced on connection loss. * dec_ap_pending will be done in got_BarrierAck @@ -192,7 +193,7 @@ void complete_master_bio(struct drbd_conf *mdev, static void drbd_remove_request_interval(struct rb_root *root, struct drbd_request *req) { - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; struct drbd_interval *i = &req->i; drbd_remove_interval(root, i); @@ -211,7 +212,7 @@ static void drbd_remove_request_interval(struct rb_root *root, void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) { const unsigned long s = req->rq_state; - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; /* only WRITES may end up here without a master bio (on barrier ack) */ int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; @@ -294,7 +295,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m) { - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; if (!is_susp(mdev->state)) _req_may_be_done(req, m); @@ -315,7 +316,7 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e int __req_mod(struct drbd_request *req, enum drbd_req_event what, struct bio_and_error *m) { - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; int rv = 0; m->bio = NULL; diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 431e3f962c3a..e6232ce5a1ca 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -255,7 +255,7 @@ extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); * outside the spinlock, e.g. when walking some list on cleanup. */ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) { - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; struct bio_and_error m; int rv; @@ -275,7 +275,7 @@ static inline int req_mod(struct drbd_request *req, enum drbd_req_event what) { unsigned long flags; - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; struct bio_and_error m; int rv; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 38d330b7b662..36679841af68 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -843,6 +843,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, ascw->ns = ns; ascw->flags = flags; ascw->w.cb = w_after_state_ch; + ascw->w.mdev = mdev; ascw->done = done; drbd_queue_work(&mdev->tconn->data.work, &ascw->w); } else { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 16db1f47c609..cac65f67c144 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -83,7 +83,7 @@ void drbd_md_io_complete(struct bio *bio, int error) void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local) { unsigned long flags = 0; - struct drbd_conf *mdev = peer_req->mdev; + struct drbd_conf *mdev = peer_req->w.mdev; spin_lock_irqsave(&mdev->tconn->req_lock, flags); mdev->read_cnt += peer_req->i.size >> 9; @@ -103,7 +103,7 @@ void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(lo static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) { unsigned long flags = 0; - struct drbd_conf *mdev = peer_req->mdev; + struct drbd_conf *mdev = peer_req->w.mdev; sector_t e_sector; int do_wake; u64 block_id; @@ -155,7 +155,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel void drbd_endio_sec(struct bio *bio, int error) { struct drbd_peer_request *peer_req = bio->bi_private; - struct drbd_conf *mdev = peer_req->mdev; + struct drbd_conf *mdev = peer_req->w.mdev; int uptodate = bio_flagged(bio, BIO_UPTODATE); int is_write = bio_data_dir(bio) == WRITE; @@ -192,7 +192,7 @@ void drbd_endio_pri(struct bio *bio, int error) { unsigned long flags; struct drbd_request *req = bio->bi_private; - struct drbd_conf *mdev = req->mdev; + struct drbd_conf *mdev = req->w.mdev; struct bio_and_error m; enum drbd_req_event what; int uptodate = bio_flagged(bio, BIO_UPTODATE); From f1b3a6ec7d2b3033b18c6ad125f5694c85599c4a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 15:35:58 +0100 Subject: [PATCH 0096/5831] drbd: Consolidated the setup of the thread name into the framework Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 3 +++ drivers/block/drbd/drbd_receiver.c | 5 ----- drivers/block/drbd/drbd_worker.c | 2 -- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e89ec80395d3..0861746a7475 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -446,6 +446,9 @@ static int drbd_thread_setup(void *arg) unsigned long flags; int retval; + snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s", + thi->name[0], thi->mdev->tconn->name); + restart: retval = thi->function(thi); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4aa75bad16cd..ab9b505c0f0b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4227,11 +4227,8 @@ static int drbd_do_auth(struct drbd_tconn *tconn) int drbdd_init(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; - unsigned int minor = mdev_to_minor(mdev); int h; - sprintf(current->comm, "drbd%d_receiver", minor); - dev_info(DEV, "receiver (re)started\n"); do { @@ -4572,8 +4569,6 @@ int drbd_asender(struct drbd_thread *thi) int ping_timeout_active = 0; int empty; - sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); - current->policy = SCHED_RR; /* Make this a realtime task! */ current->rt_priority = 2; /* more important than all other tasks */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index cac65f67c144..6f709621ae26 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1631,8 +1631,6 @@ int drbd_worker(struct drbd_thread *thi) LIST_HEAD(work_list); int intr = 0, i; - sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev)); - while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); From 4d641dd7b027dd494c9ae72b0723f612aca621bd Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 15:40:24 +0100 Subject: [PATCH 0097/5831] drbd: Converted drbdd_init() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index ab9b505c0f0b..16d33315e922 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4226,33 +4226,33 @@ static int drbd_do_auth(struct drbd_tconn *tconn) int drbdd_init(struct drbd_thread *thi) { - struct drbd_conf *mdev = thi->mdev; + struct drbd_tconn *tconn = thi->mdev->tconn; int h; - dev_info(DEV, "receiver (re)started\n"); + conn_info(tconn, "receiver (re)started\n"); do { - h = drbd_connect(mdev->tconn); + h = drbd_connect(tconn); if (h == 0) { - drbd_disconnect(mdev->tconn); + drbd_disconnect(tconn); schedule_timeout_interruptible(HZ); } if (h == -1) { - dev_warn(DEV, "Discarding network configuration.\n"); - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_warn(tconn, "Discarding network configuration.\n"); + drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); } } while (h == 0); if (h > 0) { - if (get_net_conf(mdev->tconn)) { - drbdd(mdev->tconn); - put_net_conf(mdev->tconn); + if (get_net_conf(tconn)) { + drbdd(tconn); + put_net_conf(tconn); } } - drbd_disconnect(mdev->tconn); + drbd_disconnect(tconn); - dev_info(DEV, "receiver terminated\n"); + conn_info(tconn, "receiver terminated\n"); return 0; } From 32862ec705d4b8ecf37e41cc65aa1bd84f990c75 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 8 Feb 2011 16:41:01 +0100 Subject: [PATCH 0098/5831] drbd: Converted drbd_asender() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 125 ++++++++++++++++------------- 1 file changed, 69 insertions(+), 56 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 16d33315e922..f0cd3819fffa 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -392,9 +392,7 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) } -/* - * This function is called from _asender only_ - * but see also comments in _req_mod(,BARRIER_ACKED) +/* See also comments in _req_mod(,BARRIER_ACKED) * and receive_Barrier. * * Move entries from net_ee to done_ee, if ready. @@ -4555,66 +4553,85 @@ static struct asender_cmd *get_asender_cmd(int cmd) return &asender_tbl[cmd]; } +static int _drbd_process_done_ee(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + return !drbd_process_done_ee(mdev); +} + +static int _check_ee_empty(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + struct drbd_tconn *tconn = mdev->tconn; + int not_empty; + + spin_lock_irq(&tconn->req_lock); + not_empty = !list_empty(&mdev->done_ee); + spin_unlock_irq(&tconn->req_lock); + + return not_empty; +} + +static int tconn_process_done_ee(struct drbd_tconn *tconn) +{ + int not_empty, err; + + do { + clear_bit(SIGNAL_ASENDER, &tconn->flags); + flush_signals(current); + err = idr_for_each(&tconn->volumes, _drbd_process_done_ee, NULL); + if (err) + return err; + set_bit(SIGNAL_ASENDER, &tconn->flags); + not_empty = idr_for_each(&tconn->volumes, _check_ee_empty, NULL); + } while (not_empty); + + return 0; +} + int drbd_asender(struct drbd_thread *thi) { - struct drbd_conf *mdev = thi->mdev; - struct p_header *h = &mdev->tconn->meta.rbuf.header; + struct drbd_tconn *tconn = thi->mdev->tconn; + struct p_header *h = &tconn->meta.rbuf.header; struct asender_cmd *cmd = NULL; struct packet_info pi; - int rv; void *buf = h; int received = 0; int expect = sizeof(struct p_header); int ping_timeout_active = 0; - int empty; current->policy = SCHED_RR; /* Make this a realtime task! */ current->rt_priority = 2; /* more important than all other tasks */ while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); - if (test_and_clear_bit(SEND_PING, &mdev->tconn->flags)) { - if (!drbd_send_ping(mdev)) { - dev_err(DEV, "drbd_send_ping has failed\n"); + if (test_and_clear_bit(SEND_PING, &tconn->flags)) { + if (!drbd_send_ping(tconn->volume0)) { + conn_err(tconn, "drbd_send_ping has failed\n"); goto reconnect; } - mdev->tconn->meta.socket->sk->sk_rcvtimeo = - mdev->tconn->net_conf->ping_timeo*HZ/10; + tconn->meta.socket->sk->sk_rcvtimeo = + tconn->net_conf->ping_timeo*HZ/10; ping_timeout_active = 1; } - /* conditionally cork; - * it may hurt latency if we cork without much to send */ - if (!mdev->tconn->net_conf->no_cork && - 3 < atomic_read(&mdev->unacked_cnt)) - drbd_tcp_cork(mdev->tconn->meta.socket); - while (1) { - clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); - flush_signals(current); - if (!drbd_process_done_ee(mdev)) - goto reconnect; - /* to avoid race with newly queued ACKs */ - set_bit(SIGNAL_ASENDER, &mdev->tconn->flags); - spin_lock_irq(&mdev->tconn->req_lock); - empty = list_empty(&mdev->done_ee); - spin_unlock_irq(&mdev->tconn->req_lock); - /* new ack may have been queued right here, - * but then there is also a signal pending, - * and we start over... */ - if (empty) - break; - } + /* TODO: conditionally cork; it may hurt latency if we cork without + much to send */ + if (!tconn->net_conf->no_cork) + drbd_tcp_cork(tconn->meta.socket); + if (tconn_process_done_ee(tconn)) + goto reconnect; /* but unconditionally uncork unless disabled */ - if (!mdev->tconn->net_conf->no_cork) - drbd_tcp_uncork(mdev->tconn->meta.socket); + if (!tconn->net_conf->no_cork) + drbd_tcp_uncork(tconn->meta.socket); /* short circuit, recv_msg would return EINTR anyways. */ if (signal_pending(current)) continue; - rv = drbd_recv_short(mdev->tconn->meta.socket, buf, expect-received, 0); - clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); + rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0); + clear_bit(SIGNAL_ASENDER, &tconn->flags); flush_signals(current); @@ -4632,47 +4649,46 @@ int drbd_asender(struct drbd_thread *thi) received += rv; buf += rv; } else if (rv == 0) { - dev_err(DEV, "meta connection shut down by peer.\n"); + conn_err(tconn, "meta connection shut down by peer.\n"); goto reconnect; } else if (rv == -EAGAIN) { /* If the data socket received something meanwhile, * that is good enough: peer is still alive. */ - if (time_after(mdev->tconn->last_received, - jiffies - mdev->tconn->meta.socket->sk->sk_rcvtimeo)) + if (time_after(tconn->last_received, + jiffies - tconn->meta.socket->sk->sk_rcvtimeo)) continue; if (ping_timeout_active) { - dev_err(DEV, "PingAck did not arrive in time.\n"); + conn_err(tconn, "PingAck did not arrive in time.\n"); goto reconnect; } - set_bit(SEND_PING, &mdev->tconn->flags); + set_bit(SEND_PING, &tconn->flags); continue; } else if (rv == -EINTR) { continue; } else { - dev_err(DEV, "sock_recvmsg returned %d\n", rv); + conn_err(tconn, "sock_recvmsg returned %d\n", rv); goto reconnect; } if (received == expect && cmd == NULL) { - if (!decode_header(mdev->tconn, h, &pi)) + if (!decode_header(tconn, h, &pi)) goto reconnect; cmd = get_asender_cmd(pi.cmd); if (unlikely(cmd == NULL)) { - dev_err(DEV, "unknown command %d on meta (l: %d)\n", + conn_err(tconn, "unknown command %d on meta (l: %d)\n", pi.cmd, pi.size); goto disconnect; } expect = cmd->pkt_size; if (pi.size != expect - sizeof(struct p_header)) { - dev_err(DEV, "Wrong packet size on meta (c: %d, l: %d)\n", + conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n", pi.cmd, pi.size); goto reconnect; } } if (received == expect) { - mdev->tconn->last_received = jiffies; - D_ASSERT(cmd != NULL); - if (!cmd->process(mdev, pi.cmd)) + tconn->last_received = jiffies; + if (!cmd->process(vnr_to_mdev(tconn, pi.vnr), pi.cmd)) goto reconnect; /* the idle_timeout (ping-int) @@ -4689,18 +4705,15 @@ int drbd_asender(struct drbd_thread *thi) if (0) { reconnect: - drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); - drbd_md_sync(mdev); + drbd_force_state(tconn->volume0, NS(conn, C_NETWORK_FAILURE)); } if (0) { disconnect: - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); - drbd_md_sync(mdev); + drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); } - clear_bit(SIGNAL_ASENDER, &mdev->tconn->flags); + clear_bit(SIGNAL_ASENDER, &tconn->flags); - D_ASSERT(mdev->state.conn < C_CONNECTED); - dev_info(DEV, "asender terminated\n"); + conn_info(tconn, "asender terminated\n"); return 0; } From 19393e105f9702a014d3ce08bce92b3ad9cf96b5 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 10:09:07 +0100 Subject: [PATCH 0099/5831] drbd: Converted drbd_worker() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 97 +++++++++++++++++--------------- 1 file changed, 51 insertions(+), 46 deletions(-) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 6f709621ae26..c9b10d6eb88a 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1624,35 +1624,53 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) drbd_state_unlock(mdev); } +static int _worker_dying(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + + D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); + /* _drbd_set_state only uses stop_nowait. + * wait here for the exiting receiver. */ + drbd_thread_stop(&mdev->tconn->receiver); + drbd_mdev_cleanup(mdev); + + clear_bit(DEVICE_DYING, &mdev->flags); + clear_bit(CONFIG_PENDING, &mdev->flags); + wake_up(&mdev->state_wait); + + return 0; +} + int drbd_worker(struct drbd_thread *thi) { - struct drbd_conf *mdev = thi->mdev; + struct drbd_tconn *tconn = thi->mdev->tconn; struct drbd_work *w = NULL; LIST_HEAD(work_list); - int intr = 0, i; + int intr = 0; while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); - if (down_trylock(&mdev->tconn->data.work.s)) { - mutex_lock(&mdev->tconn->data.mutex); - if (mdev->tconn->data.socket && !mdev->tconn->net_conf->no_cork) - drbd_tcp_uncork(mdev->tconn->data.socket); - mutex_unlock(&mdev->tconn->data.mutex); + if (down_trylock(&tconn->data.work.s)) { + mutex_lock(&tconn->data.mutex); + if (tconn->data.socket && !tconn->net_conf->no_cork) + drbd_tcp_uncork(tconn->data.socket); + mutex_unlock(&tconn->data.mutex); - intr = down_interruptible(&mdev->tconn->data.work.s); + intr = down_interruptible(&tconn->data.work.s); - mutex_lock(&mdev->tconn->data.mutex); - if (mdev->tconn->data.socket && !mdev->tconn->net_conf->no_cork) - drbd_tcp_cork(mdev->tconn->data.socket); - mutex_unlock(&mdev->tconn->data.mutex); + mutex_lock(&tconn->data.mutex); + if (tconn->data.socket && !tconn->net_conf->no_cork) + drbd_tcp_cork(tconn->data.socket); + mutex_unlock(&tconn->data.mutex); } if (intr) { - D_ASSERT(intr == -EINTR); flush_signals(current); - if (!expect(get_t_state(thi) != RUNNING)) + if (get_t_state(thi) == RUNNING) { + conn_warn(tconn, "Worker got an unexpected signal\n"); continue; + } break; } @@ -1663,8 +1681,8 @@ int drbd_worker(struct drbd_thread *thi) this... */ w = NULL; - spin_lock_irq(&mdev->tconn->data.work.q_lock); - if (!expect(!list_empty(&mdev->tconn->data.work.q))) { + spin_lock_irq(&tconn->data.work.q_lock); + if (list_empty(&tconn->data.work.q)) { /* something terribly wrong in our logic. * we were able to down() the semaphore, * but the list is empty... doh. @@ -1676,57 +1694,44 @@ int drbd_worker(struct drbd_thread *thi) * * I'll try to get away just starting over this loop. */ - spin_unlock_irq(&mdev->tconn->data.work.q_lock); + conn_warn(tconn, "Work list unexpectedly empty\n"); + spin_unlock_irq(&tconn->data.work.q_lock); continue; } - w = list_entry(mdev->tconn->data.work.q.next, struct drbd_work, list); + w = list_entry(tconn->data.work.q.next, struct drbd_work, list); list_del_init(&w->list); - spin_unlock_irq(&mdev->tconn->data.work.q_lock); + spin_unlock_irq(&tconn->data.work.q_lock); - if (!w->cb(mdev, w, mdev->state.conn < C_CONNECTED)) { + if (!w->cb(w->mdev, w, tconn->volume0->state.conn < C_CONNECTED)) { /* dev_warn(DEV, "worker: a callback failed! \n"); */ - if (mdev->state.conn >= C_CONNECTED) - drbd_force_state(mdev, - NS(conn, C_NETWORK_FAILURE)); + if (tconn->volume0->state.conn >= C_CONNECTED) + drbd_force_state(tconn->volume0, + NS(conn, C_NETWORK_FAILURE)); } } - D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags)); - D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags)); - spin_lock_irq(&mdev->tconn->data.work.q_lock); - i = 0; - while (!list_empty(&mdev->tconn->data.work.q)) { - list_splice_init(&mdev->tconn->data.work.q, &work_list); - spin_unlock_irq(&mdev->tconn->data.work.q_lock); + spin_lock_irq(&tconn->data.work.q_lock); + while (!list_empty(&tconn->data.work.q)) { + list_splice_init(&tconn->data.work.q, &work_list); + spin_unlock_irq(&tconn->data.work.q_lock); while (!list_empty(&work_list)) { w = list_entry(work_list.next, struct drbd_work, list); list_del_init(&w->list); - w->cb(mdev, w, 1); - i++; /* dead debugging code */ + w->cb(w->mdev, w, 1); } - spin_lock_irq(&mdev->tconn->data.work.q_lock); + spin_lock_irq(&tconn->data.work.q_lock); } - sema_init(&mdev->tconn->data.work.s, 0); + sema_init(&tconn->data.work.s, 0); /* DANGEROUS race: if someone did queue his work within the spinlock, * but up() ed outside the spinlock, we could get an up() on the * semaphore without corresponding list entry. * So don't do that. */ - spin_unlock_irq(&mdev->tconn->data.work.q_lock); + spin_unlock_irq(&tconn->data.work.q_lock); - D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); - /* _drbd_set_state only uses stop_nowait. - * wait here for the exiting receiver. */ - drbd_thread_stop(&mdev->tconn->receiver); - drbd_mdev_cleanup(mdev); - - dev_info(DEV, "worker terminated\n"); - - clear_bit(DEVICE_DYING, &mdev->flags); - clear_bit(CONFIG_PENDING, &mdev->flags); - wake_up(&mdev->state_wait); + idr_for_each(&tconn->volumes, _worker_dying, NULL); return 0; } From 392c8801922f51466045ece2f1f2884b8c9cd9a2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 10:33:31 +0100 Subject: [PATCH 0100/5831] drbd: drbd_thread has now a pointer to a tconn instead of to a mdev Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 8 +++--- drivers/block/drbd/drbd_int.h | 4 +-- drivers/block/drbd/drbd_main.c | 43 ++++++++++++++---------------- drivers/block/drbd/drbd_receiver.c | 4 +-- drivers/block/drbd/drbd_worker.c | 2 +- 5 files changed, 29 insertions(+), 32 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e85221f22adc..e8d652f197c3 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -119,9 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) if (!__ratelimit(&drbd_ratelimit_state)) return; dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", - drbd_task_to_thread_name(mdev, current), + drbd_task_to_thread_name(mdev->tconn, current), func, b->bm_why ?: "?", - drbd_task_to_thread_name(mdev, b->bm_task)); + drbd_task_to_thread_name(mdev->tconn, b->bm_task)); } void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) @@ -138,9 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) if (trylock_failed) { dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", - drbd_task_to_thread_name(mdev, current), + drbd_task_to_thread_name(mdev->tconn, current), why, b->bm_why ?: "?", - drbd_task_to_thread_name(mdev, b->bm_task)); + drbd_task_to_thread_name(mdev->tconn, b->bm_task)); mutex_lock(&b->bm_change); } if (BM_LOCKED_MASK & b->bm_flags) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index be067bfbace2..91054e4d0b2f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -625,7 +625,7 @@ struct drbd_thread { struct completion stop; enum drbd_thread_state t_state; int (*function) (struct drbd_thread *); - struct drbd_conf *mdev; + struct drbd_tconn *tconn; int reset_cpu_mask; char name[9]; }; @@ -1151,7 +1151,7 @@ enum dds_flags { extern void drbd_init_set_defaults(struct drbd_conf *mdev); extern int drbd_thread_start(struct drbd_thread *thi); extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait); -extern char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task); +extern char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task); #ifdef CONFIG_SMP extern void drbd_thread_current_set_cpu(struct drbd_thread *thi); extern void drbd_calc_cpu_mask(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0861746a7475..2c44cc36deed 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -442,12 +442,12 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) static int drbd_thread_setup(void *arg) { struct drbd_thread *thi = (struct drbd_thread *) arg; - struct drbd_conf *mdev = thi->mdev; + struct drbd_tconn *tconn = thi->tconn; unsigned long flags; int retval; snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s", - thi->name[0], thi->mdev->tconn->name); + thi->name[0], thi->tconn->name); restart: retval = thi->function(thi); @@ -465,7 +465,7 @@ restart: */ if (thi->t_state == RESTARTING) { - dev_info(DEV, "Restarting %s thread\n", thi->name); + conn_info(tconn, "Restarting %s thread\n", thi->name); thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); goto restart; @@ -477,27 +477,27 @@ restart: complete(&thi->stop); spin_unlock_irqrestore(&thi->t_lock, flags); - dev_info(DEV, "Terminating %s\n", current->comm); + conn_info(tconn, "Terminating %s\n", current->comm); /* Release mod reference taken when thread was started */ module_put(THIS_MODULE); return retval; } -static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi, +static void drbd_thread_init(struct drbd_tconn *tconn, struct drbd_thread *thi, int (*func) (struct drbd_thread *), char *name) { spin_lock_init(&thi->t_lock); thi->task = NULL; thi->t_state = NONE; thi->function = func; - thi->mdev = mdev; + thi->tconn = tconn; strncpy(thi->name, name, ARRAY_SIZE(thi->name)); } int drbd_thread_start(struct drbd_thread *thi) { - struct drbd_conf *mdev = thi->mdev; + struct drbd_tconn *tconn = thi->tconn; struct task_struct *nt; unsigned long flags; @@ -507,28 +507,27 @@ int drbd_thread_start(struct drbd_thread *thi) switch (thi->t_state) { case NONE: - dev_info(DEV, "Starting %s thread (from %s [%d])\n", + conn_info(tconn, "Starting %s thread (from %s [%d])\n", thi->name, current->comm, current->pid); /* Get ref on module for thread - this is released when thread exits */ if (!try_module_get(THIS_MODULE)) { - dev_err(DEV, "Failed to get module reference in drbd_thread_start\n"); + conn_err(tconn, "Failed to get module reference in drbd_thread_start\n"); spin_unlock_irqrestore(&thi->t_lock, flags); return false; } init_completion(&thi->stop); - D_ASSERT(thi->task == NULL); thi->reset_cpu_mask = 1; thi->t_state = RUNNING; spin_unlock_irqrestore(&thi->t_lock, flags); flush_signals(current); /* otherw. may get -ERESTARTNOINTR */ nt = kthread_create(drbd_thread_setup, (void *) thi, - "drbd%d_%s", mdev_to_minor(mdev), thi->name); + "drbd_%c_%s", thi->name[0], thi->tconn->name); if (IS_ERR(nt)) { - dev_err(DEV, "Couldn't start thread\n"); + conn_err(tconn, "Couldn't start thread\n"); module_put(THIS_MODULE); return false; @@ -541,7 +540,7 @@ int drbd_thread_start(struct drbd_thread *thi) break; case EXITING: thi->t_state = RESTARTING; - dev_info(DEV, "Restarting %s thread (from %s [%d])\n", + conn_info(tconn, "Restarting %s thread (from %s [%d])\n", thi->name, current->comm, current->pid); /* fall through */ case RUNNING: @@ -582,7 +581,6 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) init_completion(&thi->stop); if (thi->task != current) force_sig(DRBD_SIGKILL, thi->task); - } spin_unlock_irqrestore(&thi->t_lock, flags); @@ -591,9 +589,8 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) wait_for_completion(&thi->stop); } -static struct drbd_thread *drbd_task_to_thread(struct drbd_conf *mdev, struct task_struct *task) +static struct drbd_thread *drbd_task_to_thread(struct drbd_tconn *tconn, struct task_struct *task) { - struct drbd_tconn *tconn = mdev->tconn; struct drbd_thread *thi = task == tconn->receiver.task ? &tconn->receiver : task == tconn->asender.task ? &tconn->asender : @@ -602,9 +599,9 @@ static struct drbd_thread *drbd_task_to_thread(struct drbd_conf *mdev, struct ta return thi; } -char *drbd_task_to_thread_name(struct drbd_conf *mdev, struct task_struct *task) +char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task) { - struct drbd_thread *thi = drbd_task_to_thread(mdev, task); + struct drbd_thread *thi = drbd_task_to_thread(tconn, task); return thi ? thi->name : task->comm; } @@ -656,7 +653,7 @@ void drbd_thread_current_set_cpu(struct drbd_thread *thi) if (!thi->reset_cpu_mask) return; thi->reset_cpu_mask = 0; - set_cpus_allowed_ptr(p, thi->mdev->tconn->cpu_mask); + set_cpus_allowed_ptr(p, thi->tconn->cpu_mask); } #endif @@ -1866,10 +1863,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) init_waitqueue_head(&mdev->al_wait); init_waitqueue_head(&mdev->seq_wait); - drbd_thread_init(mdev, &mdev->tconn->receiver, drbdd_init, "receiver"); - drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker, "worker"); - drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender, "asender"); - /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */ mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; @@ -2202,6 +2195,10 @@ struct drbd_tconn *drbd_new_tconn(char *name) init_waitqueue_head(&tconn->net_cnt_wait); idr_init(&tconn->volumes); + drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver"); + drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); + drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); + write_lock_irq(&global_state_lock); list_add(&tconn->all_tconn, &drbd_tconns); write_unlock_irq(&global_state_lock); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index f0cd3819fffa..f21b0efff6d8 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4224,7 +4224,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn) int drbdd_init(struct drbd_thread *thi) { - struct drbd_tconn *tconn = thi->mdev->tconn; + struct drbd_tconn *tconn = thi->tconn; int h; conn_info(tconn, "receiver (re)started\n"); @@ -4591,7 +4591,7 @@ static int tconn_process_done_ee(struct drbd_tconn *tconn) int drbd_asender(struct drbd_thread *thi) { - struct drbd_tconn *tconn = thi->mdev->tconn; + struct drbd_tconn *tconn = thi->tconn; struct p_header *h = &tconn->meta.rbuf.header; struct asender_cmd *cmd = NULL; struct packet_info pi; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index c9b10d6eb88a..3f0f84a56ee6 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1643,7 +1643,7 @@ static int _worker_dying(int vnr, void *p, void *data) int drbd_worker(struct drbd_thread *thi) { - struct drbd_tconn *tconn = thi->mdev->tconn; + struct drbd_tconn *tconn = thi->tconn; struct drbd_work *w = NULL; LIST_HEAD(work_list); int intr = 0; From 6699b6553374e85785fada94ac1e8dfc5629b02e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 11:10:24 +0100 Subject: [PATCH 0101/5831] drbd: Moved some initializing code into drbd_new_tconn() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2c44cc36deed..b3f5a02cdb87 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1801,17 +1801,9 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->ap_in_flight, 0); mutex_init(&mdev->md_io_mutex); - mutex_init(&mdev->tconn->data.mutex); - mutex_init(&mdev->tconn->meta.mutex); - sema_init(&mdev->tconn->data.work.s, 0); - sema_init(&mdev->tconn->meta.work.s, 0); mutex_init(&mdev->state_mutex); - spin_lock_init(&mdev->tconn->data.work.q_lock); - spin_lock_init(&mdev->tconn->meta.work.q_lock); - spin_lock_init(&mdev->al_lock); - spin_lock_init(&mdev->tconn->req_lock); spin_lock_init(&mdev->peer_seq_lock); spin_lock_init(&mdev->epoch_lock); @@ -1821,8 +1813,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) INIT_LIST_HEAD(&mdev->read_ee); INIT_LIST_HEAD(&mdev->net_ee); INIT_LIST_HEAD(&mdev->resync_reads); - INIT_LIST_HEAD(&mdev->tconn->data.work.q); - INIT_LIST_HEAD(&mdev->tconn->meta.work.q); INIT_LIST_HEAD(&mdev->resync_work.list); INIT_LIST_HEAD(&mdev->unplug_work.list); INIT_LIST_HEAD(&mdev->go_diskless.list); @@ -2179,6 +2169,13 @@ out: return r; } +static void drbd_init_workqueue(struct drbd_work_queue* wq) +{ + sema_init(&wq->s, 0); + spin_lock_init(&wq->q_lock); + INIT_LIST_HEAD(&wq->q); +} + struct drbd_tconn *drbd_new_tconn(char *name) { struct drbd_tconn *tconn; @@ -2191,10 +2188,17 @@ struct drbd_tconn *drbd_new_tconn(char *name) if (!tconn->name) goto fail; + spin_lock_init(&tconn->req_lock); atomic_set(&tconn->net_cnt, 0); init_waitqueue_head(&tconn->net_cnt_wait); idr_init(&tconn->volumes); + drbd_init_workqueue(&tconn->data.work); + mutex_init(&tconn->data.mutex); + + drbd_init_workqueue(&tconn->meta.work); + mutex_init(&tconn->meta.mutex); + drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver"); drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker"); drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender"); From 00d56944ff086f895e9ad184a7785ca1eece4a3b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 18:09:48 +0100 Subject: [PATCH 0102/5831] drbd: Generalized the work callbacks No longer work callbacks must operate on a mdev. From now on they can also operate on a tconn. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 8 +-- drivers/block/drbd/drbd_int.h | 45 ++++++++-------- drivers/block/drbd/drbd_main.c | 17 ++++--- drivers/block/drbd/drbd_receiver.c | 13 +++-- drivers/block/drbd/drbd_state.c | 5 +- drivers/block/drbd/drbd_worker.c | 82 ++++++++++++++++++------------ 6 files changed, 100 insertions(+), 70 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 637a9378567a..0748871d6b17 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -63,7 +63,7 @@ struct drbd_atodb_wait { }; -int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); +int w_al_write_transaction(struct drbd_work *, int); static int _drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, @@ -291,9 +291,10 @@ static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) } int -w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) +w_al_write_transaction(struct drbd_work *w, int unused) { struct update_al_work *aw = container_of(w, struct update_al_work, w); + struct drbd_conf *mdev = w->mdev; struct lc_element *updated = aw->al_ext; const unsigned int new_enr = aw->enr; const unsigned int evicted = aw->old_enr; @@ -612,9 +613,10 @@ void drbd_al_shrink(struct drbd_conf *mdev) wake_up(&mdev->al_wait); } -static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_update_odbm(struct drbd_work *w, int unused) { struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); + struct drbd_conf *mdev = w->mdev; if (!get_ldev(mdev)) { if (__ratelimit(&drbd_ratelimit_state)) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 91054e4d0b2f..8f9cc9d1bf93 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -641,11 +641,14 @@ static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi) } struct drbd_work; -typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel); +typedef int (*drbd_work_cb)(struct drbd_work *, int cancel); struct drbd_work { struct list_head list; drbd_work_cb cb; - struct drbd_conf *mdev; + union { + struct drbd_conf *mdev; + struct drbd_tconn *tconn; + }; }; #include "drbd_interval.h" @@ -1495,25 +1498,25 @@ extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio * extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_peer_request *, void *); /* worker callbacks */ -extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int); -extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int); -extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int); -extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int); -extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int); -extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int); -extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); -extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); -extern int w_resync_timer(struct drbd_conf *, struct drbd_work *, int); -extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); -extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); -extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); -extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int); -extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); -extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); -extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); -extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int); -extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int); -extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int); +extern int w_req_cancel_conflict(struct drbd_work *, int); +extern int w_read_retry_remote(struct drbd_work *, int); +extern int w_e_end_data_req(struct drbd_work *, int); +extern int w_e_end_rsdata_req(struct drbd_work *, int); +extern int w_e_end_csum_rs_req(struct drbd_work *, int); +extern int w_e_end_ov_reply(struct drbd_work *, int); +extern int w_e_end_ov_req(struct drbd_work *, int); +extern int w_ov_finished(struct drbd_work *, int); +extern int w_resync_timer(struct drbd_work *, int); +extern int w_resume_next_sg(struct drbd_work *, int); +extern int w_send_write_hint(struct drbd_work *, int); +extern int w_send_dblock(struct drbd_work *, int); +extern int w_send_barrier(struct drbd_work *, int); +extern int w_send_read_req(struct drbd_work *, int); +extern int w_prev_work_done(struct drbd_work *, int); +extern int w_e_reissue(struct drbd_work *, int); +extern int w_restart_disk_io(struct drbd_work *, int); +extern int w_send_oos(struct drbd_work *, int); +extern int w_start_resync(struct drbd_work *, int); extern void resync_timer_fn(unsigned long data); extern void start_resync_timer_fn(unsigned long data); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b3f5a02cdb87..d418bca2bb14 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -64,10 +64,10 @@ int drbd_asender(struct drbd_thread *); int drbd_init(void); static int drbd_open(struct block_device *bdev, fmode_t mode); static int drbd_release(struct gendisk *gd, fmode_t mode); -static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static int w_md_sync(struct drbd_work *w, int unused); static void md_sync_timer_fn(unsigned long data); -static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); -static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static int w_bitmap_io(struct drbd_work *w, int unused); +static int w_go_diskless(struct drbd_work *w, int unused); MODULE_AUTHOR("Philipp Reisner , " "Lars Ellenberg "); @@ -2790,9 +2790,10 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev) return rv; } -static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_bitmap_io(struct drbd_work *w, int unused) { struct bm_io_work *work = container_of(w, struct bm_io_work, w); + struct drbd_conf *mdev = w->mdev; int rv = -EIO; D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); @@ -2835,8 +2836,10 @@ void drbd_ldev_destroy(struct drbd_conf *mdev) clear_bit(GO_DISKLESS, &mdev->flags); } -static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_go_diskless(struct drbd_work *w, int unused) { + struct drbd_conf *mdev = w->mdev; + D_ASSERT(mdev->state.disk == D_FAILED); /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will * inc/dec it frequently. Once we are D_DISKLESS, no one will touch @@ -2949,8 +2952,10 @@ static void md_sync_timer_fn(unsigned long data) drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work); } -static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_md_sync(struct drbd_work *w, int unused) { + struct drbd_conf *mdev = w->mdev; + dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); #ifdef DEBUG dev_warn(DEV, "last md_mark_dirty: %s:%u\n", diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index f21b0efff6d8..02fa1b25dce5 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -65,7 +65,7 @@ static int drbd_do_auth(struct drbd_tconn *tconn); static int drbd_disconnected(int vnr, void *p, void *data); static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event); -static int e_end_block(struct drbd_conf *, struct drbd_work *, int); +static int e_end_block(struct drbd_work *, int); #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN) @@ -420,7 +420,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) */ list_for_each_entry_safe(peer_req, t, &work_list, w.list) { /* list_del not necessary, next/prev members not touched */ - ok = peer_req->w.cb(mdev, &peer_req->w, !ok) && ok; + ok = peer_req->w.cb(&peer_req->w, !ok) && ok; drbd_free_ee(mdev, peer_req); } wake_up(&mdev->ee_wait); @@ -1447,9 +1447,10 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, /* e_end_resync_block() is called via * drbd_process_done_ee() by asender only */ -static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int e_end_resync_block(struct drbd_work *w, int unused) { struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + struct drbd_conf *mdev = w->mdev; sector_t sector = peer_req->i.sector; int ok; @@ -1584,9 +1585,10 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, /* e_end_block() is called via drbd_process_done_ee(). * this means this function only runs in the asender thread */ -static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +static int e_end_block(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + struct drbd_conf *mdev = w->mdev; sector_t sector = peer_req->i.sector; int ok = 1, pcmd; @@ -1621,9 +1623,10 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } -static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int e_send_discard_ack(struct drbd_work *w, int unused) { struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + struct drbd_conf *mdev = w->mdev; int ok = 1; D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 36679841af68..30a3a1de07cb 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -40,7 +40,7 @@ struct after_state_chg_work { extern void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state); -static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, @@ -853,10 +853,11 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, return rv; } -static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused) +static int w_after_state_ch(struct drbd_work *w, int unused) { struct after_state_chg_work *ascw = container_of(w, struct after_state_chg_work, w); + struct drbd_conf *mdev = w->mdev; after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags); if (ascw->flags & CS_WAIT_COMPLETE) { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 3f0f84a56ee6..418f44ad9a86 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -38,9 +38,8 @@ #include "drbd_int.h" #include "drbd_req.h" -static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); -static int w_make_resync_request(struct drbd_conf *mdev, - struct drbd_work *w, int cancel); +static int w_make_ov_request(struct drbd_work *w, int cancel); +static int w_make_resync_request(struct drbd_work *w, int cancel); @@ -228,9 +227,10 @@ void drbd_endio_pri(struct bio *bio, int error) complete_master_bio(mdev, &m); } -int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_read_retry_remote(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); + struct drbd_conf *mdev = w->mdev; /* We should not detach for read io-error, * but try to WRITE the P_DATA_REPLY to the failed location, @@ -244,7 +244,7 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) } spin_unlock_irq(&mdev->tconn->req_lock); - return w_send_read_req(mdev, w, 0); + return w_send_read_req(w, 0); } void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, @@ -295,11 +295,10 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * crypto_hash_final(&desc, digest); } -/* TODO merge common code with w_e_end_ov_req */ -int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +static int w_e_send_csum(struct drbd_work *w, int cancel) { - struct drbd_peer_request *peer_req = - container_of(w, struct drbd_peer_request, w); + struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + struct drbd_conf *mdev = w->mdev; int digest_size; void *digest; int ok = 1; @@ -383,14 +382,15 @@ defer: return -EAGAIN; } -int w_resync_timer(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_resync_timer(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; switch (mdev->state.conn) { case C_VERIFY_S: - w_make_ov_request(mdev, w, cancel); + w_make_ov_request(w, cancel); break; case C_SYNC_TARGET: - w_make_resync_request(mdev, w, cancel); + w_make_resync_request(w, cancel); break; } @@ -504,9 +504,9 @@ static int drbd_rs_number_requests(struct drbd_conf *mdev) return number; } -static int w_make_resync_request(struct drbd_conf *mdev, - struct drbd_work *w, int cancel) +static int w_make_resync_request(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; unsigned long bit; sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); @@ -664,8 +664,9 @@ next_sector: return 1; } -static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +static int w_make_ov_request(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; int number, i, size; sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); @@ -707,8 +708,9 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca return 1; } -int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_ov_finished(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; kfree(w); ov_oos_print(mdev); drbd_resync_finished(mdev); @@ -716,8 +718,9 @@ int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return 1; } -static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +static int w_resync_finished(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; kfree(w); drbd_resync_finished(mdev); @@ -901,9 +904,10 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_requ * @w: work object. * @cancel: The connection will be closed anyways */ -int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_e_end_data_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + struct drbd_conf *mdev = w->mdev; int ok; if (unlikely(cancel)) { @@ -937,9 +941,10 @@ int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * @w: work object. * @cancel: The connection will be closed anyways */ -int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_e_end_rsdata_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + struct drbd_conf *mdev = w->mdev; int ok; if (unlikely(cancel)) { @@ -985,9 +990,10 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } -int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + struct drbd_conf *mdev = w->mdev; struct digest_info *di; int digest_size; void *digest = NULL; @@ -1047,10 +1053,10 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } -/* TODO merge common code with w_e_send_csum */ -int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_e_end_ov_req(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + struct drbd_conf *mdev = w->mdev; sector_t sector = peer_req->i.sector; unsigned int size = peer_req->i.size; int digest_size; @@ -1105,9 +1111,10 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size) drbd_set_out_of_sync(mdev, sector, size); } -int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_e_end_ov_reply(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); + struct drbd_conf *mdev = w->mdev; struct digest_info *di; void *digest; sector_t sector = peer_req->i.sector; @@ -1172,16 +1179,18 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } -int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_prev_work_done(struct drbd_work *w, int cancel) { struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w); + complete(&b->done); return 1; } -int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_send_barrier(struct drbd_work *w, int cancel) { struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w); + struct drbd_conf *mdev = w->mdev; struct p_barrier *p = &mdev->tconn->data.sbuf.barrier; int ok = 1; @@ -1210,16 +1219,18 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } -int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_send_write_hint(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; if (cancel) return 1; return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE); } -int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_send_oos(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); + struct drbd_conf *mdev = w->mdev; int ok; if (unlikely(cancel)) { @@ -1239,9 +1250,10 @@ int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * @w: work object. * @cancel: The connection will be closed anyways */ -int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_send_dblock(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); + struct drbd_conf *mdev = w->mdev; int ok; if (unlikely(cancel)) { @@ -1261,9 +1273,10 @@ int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * @w: work object. * @cancel: The connection will be closed anyways */ -int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_send_read_req(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); + struct drbd_conf *mdev = w->mdev; int ok; if (unlikely(cancel)) { @@ -1285,9 +1298,10 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel) return ok; } -int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_restart_disk_io(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); + struct drbd_conf *mdev = w->mdev; if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) drbd_al_begin_io(mdev, req->i.sector); @@ -1447,8 +1461,10 @@ void start_resync_timer_fn(unsigned long data) drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work); } -int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) +int w_start_resync(struct drbd_work *w, int cancel) { + struct drbd_conf *mdev = w->mdev; + if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) { dev_warn(DEV, "w_start_resync later...\n"); mdev->start_resync_timer.expires = jiffies + HZ/10; @@ -1702,7 +1718,7 @@ int drbd_worker(struct drbd_thread *thi) list_del_init(&w->list); spin_unlock_irq(&tconn->data.work.q_lock); - if (!w->cb(w->mdev, w, tconn->volume0->state.conn < C_CONNECTED)) { + if (!w->cb(w, tconn->volume0->state.conn < C_CONNECTED)) { /* dev_warn(DEV, "worker: a callback failed! \n"); */ if (tconn->volume0->state.conn >= C_CONNECTED) drbd_force_state(tconn->volume0, @@ -1718,7 +1734,7 @@ int drbd_worker(struct drbd_thread *thi) while (!list_empty(&work_list)) { w = list_entry(work_list.next, struct drbd_work, list); list_del_init(&w->list); - w->cb(w->mdev, w, 1); + w->cb(w, 1); } spin_lock_irq(&tconn->data.work.q_lock); From 2a67d8b93b3363d4a5608d16d510a4bf6b3863fb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 14:10:32 +0100 Subject: [PATCH 0103/5831] drbd: Converted drbd_send_ping() and related functions from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 21 ++++++++++++++------- drivers/block/drbd/drbd_main.c | 17 +++++++++-------- drivers/block/drbd/drbd_receiver.c | 11 ++++++----- drivers/block/drbd/drbd_state.c | 1 + drivers/block/drbd/drbd_worker.c | 10 ++++++---- 5 files changed, 36 insertions(+), 24 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8f9cc9d1bf93..e2b59f58a0aa 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -790,7 +790,6 @@ enum { RESIZE_PENDING, /* Size change detected locally, waiting for the response from * the peer, if it changed there as well. */ CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ - GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */ NEW_CUR_UUID, /* Create new current UUID when thawing IO */ AL_SUSPENDED, /* Activity logging is currently suspended. */ AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ @@ -913,6 +912,7 @@ enum { DISCARD_CONCURRENT, /* Set on one node, cleared on the peer! */ SEND_PING, /* whether asender should send a ping asap */ SIGNAL_ASENDER, /* whether asender wants to be interrupted */ + GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */ }; struct drbd_tconn { /* is a resource from the config file */ @@ -925,6 +925,7 @@ struct drbd_tconn { /* is a resource from the config file */ struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ atomic_t net_cnt; /* Users of net_conf */ wait_queue_head_t net_cnt_wait; + wait_queue_head_t ping_wait; /* Woken upon reception of a ping, and a state change */ struct drbd_socket data; /* data/barrier/cstate/parameter packets */ struct drbd_socket meta; /* ping/ack (metadata) packets */ @@ -1180,12 +1181,12 @@ extern int drbd_send_state(struct drbd_conf *mdev); extern int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, enum drbd_packet cmd, struct p_header *h, size_t size, unsigned msg_flags); +extern int conn_send_cmd(struct drbd_tconn *tconn, int vnr, int use_data_socket, + enum drbd_packet cmd, struct p_header *h, size_t size); extern int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data, size_t size); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 -extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packet cmd, struct p_header *h, size_t size); extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc); extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size); @@ -1886,6 +1887,12 @@ static inline int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, return _conn_send_cmd(mdev->tconn, mdev->vnr, sock, cmd, h, size, msg_flags); } +static inline int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, + enum drbd_packet cmd, struct p_header *h, size_t size) +{ + return conn_send_cmd(mdev->tconn, mdev->vnr, use_data_socket, cmd, h, size); +} + static inline int drbd_send_short_cmd(struct drbd_conf *mdev, enum drbd_packet cmd) { @@ -1893,16 +1900,16 @@ static inline int drbd_send_short_cmd(struct drbd_conf *mdev, return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); } -static inline int drbd_send_ping(struct drbd_conf *mdev) +static inline int drbd_send_ping(struct drbd_tconn *tconn) { struct p_header h; - return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h)); + return conn_send_cmd(tconn, 0, USE_META_SOCKET, P_PING, &h, sizeof(h)); } -static inline int drbd_send_ping_ack(struct drbd_conf *mdev) +static inline int drbd_send_ping_ack(struct drbd_tconn *tconn) { struct p_header h; - return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); + return conn_send_cmd(tconn, 0, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); } static inline void drbd_thread_stop(struct drbd_thread *thi) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index d418bca2bb14..b43ad87a536a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -707,29 +707,29 @@ int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, /* don't pass the socket. we may only look at it * when we hold the appropriate socket mutex. */ -int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, +int conn_send_cmd(struct drbd_tconn *tconn, int vnr, int use_data_socket, enum drbd_packet cmd, struct p_header *h, size_t size) { int ok = 0; struct socket *sock; if (use_data_socket) { - mutex_lock(&mdev->tconn->data.mutex); - sock = mdev->tconn->data.socket; + mutex_lock(&tconn->data.mutex); + sock = tconn->data.socket; } else { - mutex_lock(&mdev->tconn->meta.mutex); - sock = mdev->tconn->meta.socket; + mutex_lock(&tconn->meta.mutex); + sock = tconn->meta.socket; } /* drbd_disconnect() could have called drbd_free_sock() * while we were waiting in down()... */ if (likely(sock != NULL)) - ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0); + ok = _conn_send_cmd(tconn, vnr, sock, cmd, h, size, 0); if (use_data_socket) - mutex_unlock(&mdev->tconn->data.mutex); + mutex_unlock(&tconn->data.mutex); else - mutex_unlock(&mdev->tconn->meta.mutex); + mutex_unlock(&tconn->meta.mutex); return ok; } @@ -2191,6 +2191,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) spin_lock_init(&tconn->req_lock); atomic_set(&tconn->net_cnt, 0); init_waitqueue_head(&tconn->net_cnt_wait); + init_waitqueue_head(&tconn->ping_wait); idr_init(&tconn->volumes); drbd_init_workqueue(&tconn->data.work); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 02fa1b25dce5..2b69a15a55dd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4279,16 +4279,17 @@ static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd) static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd) { - return drbd_send_ping_ack(mdev); + return drbd_send_ping_ack(mdev->tconn); } static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd) { + struct drbd_tconn *tconn = mdev->tconn; /* restore idle timeout */ - mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ; - if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags)) - wake_up(&mdev->misc_wait); + tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ; + if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags)) + wake_up(&tconn->ping_wait); return true; } @@ -4610,7 +4611,7 @@ int drbd_asender(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); if (test_and_clear_bit(SEND_PING, &tconn->flags)) { - if (!drbd_send_ping(tconn->volume0)) { + if (!drbd_send_ping(tconn)) { conn_err(tconn, "drbd_send_ping has failed\n"); goto reconnect; } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 30a3a1de07cb..d5777159a2b1 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -737,6 +737,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, wake_up(&mdev->misc_wait); wake_up(&mdev->state_wait); + wake_up(&mdev->tconn->ping_wait); /* aborted verify run. log the last position */ if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 418f44ad9a86..8539df25bc22 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -730,10 +730,12 @@ static int w_resync_finished(struct drbd_work *w, int cancel) static void ping_peer(struct drbd_conf *mdev) { - clear_bit(GOT_PING_ACK, &mdev->flags); - request_ping(mdev->tconn); - wait_event(mdev->misc_wait, - test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED); + struct drbd_tconn *tconn = mdev->tconn; + + clear_bit(GOT_PING_ACK, &tconn->flags); + request_ping(tconn); + wait_event(tconn->ping_wait, + test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED); } int drbd_resync_finished(struct drbd_conf *mdev) From d50eee21c45769252f0b54a5804e8b2db735d288 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 14:38:52 +0100 Subject: [PATCH 0104/5831] drbd: Extracted after_conn_state_ch() out of after_state_ch() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index d5777159a2b1..18feba6a27ed 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -45,10 +45,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); -static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); -static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort); /** * cl_wide_st_chg() - true if the state change is a cluster wide one @@ -98,6 +94,15 @@ void drbd_force_state(struct drbd_conf *mdev, drbd_change_state(mdev, CS_HARD, mask, val); } +static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); +static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, + union drbd_state, + union drbd_state); +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, const char **warn_sync_abort); +int drbd_send_state_req(struct drbd_conf *, + union drbd_state, union drbd_state); + static enum drbd_state_rv _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) @@ -123,7 +128,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, if (!rv) { rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) { - rv = is_valid_soft_transition(os, ns); + rv = is_valid_state_transition(mdev, ns, os); if (rv == SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ } @@ -166,7 +171,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, if (cl_wide_st_chg(mdev, os, ns)) { rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) - rv = is_valid_soft_transition(os, ns); + rv = is_valid_state_transition(mdev, ns, os); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (rv < SS_SUCCESS) { @@ -339,13 +344,14 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) } /** - * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible + * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible * @mdev: DRBD device. * @ns: new state. * @os: old state. */ static enum drbd_state_rv -is_valid_soft_transition(union drbd_state os, union drbd_state ns) +is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, + union drbd_state os) { enum drbd_state_rv rv = SS_SUCCESS; @@ -651,9 +657,9 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, this happen...*/ if (is_valid_state(mdev, os) == rv) - rv = is_valid_soft_transition(os, ns); + rv = is_valid_state_transition(mdev, ns, os); } else - rv = is_valid_soft_transition(os, ns); + rv = is_valid_state_transition(mdev, ns, os); } if (rv < SS_SUCCESS) { From a75f34ad0cb37f6fc03ffb109a9a702668b67fe2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 15:10:33 +0100 Subject: [PATCH 0105/5831] drbd: Renamed is_valid_state_transition() to is_valid_soft_transition() And removed the unused mdev parameter, and made the order of the state parameters: os, ns Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 18feba6a27ed..d5777159a2b1 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -45,6 +45,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); +static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); +static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, + union drbd_state ns, const char **warn_sync_abort); /** * cl_wide_st_chg() - true if the state change is a cluster wide one @@ -94,15 +98,6 @@ void drbd_force_state(struct drbd_conf *mdev, drbd_change_state(mdev, CS_HARD, mask, val); } -static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); -static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *, - union drbd_state, - union drbd_state); -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort); -int drbd_send_state_req(struct drbd_conf *, - union drbd_state, union drbd_state); - static enum drbd_state_rv _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val) @@ -128,7 +123,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, if (!rv) { rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) { - rv = is_valid_state_transition(mdev, ns, os); + rv = is_valid_soft_transition(os, ns); if (rv == SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ } @@ -171,7 +166,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, if (cl_wide_st_chg(mdev, os, ns)) { rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) - rv = is_valid_state_transition(mdev, ns, os); + rv = is_valid_soft_transition(os, ns); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); if (rv < SS_SUCCESS) { @@ -344,14 +339,13 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) } /** - * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible + * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible * @mdev: DRBD device. * @ns: new state. * @os: old state. */ static enum drbd_state_rv -is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, - union drbd_state os) +is_valid_soft_transition(union drbd_state os, union drbd_state ns) { enum drbd_state_rv rv = SS_SUCCESS; @@ -657,9 +651,9 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, this happen...*/ if (is_valid_state(mdev, os) == rv) - rv = is_valid_state_transition(mdev, ns, os); + rv = is_valid_soft_transition(os, ns); } else - rv = is_valid_state_transition(mdev, ns, os); + rv = is_valid_soft_transition(os, ns); } if (rv < SS_SUCCESS) { From 3509502dc88ce8226b29bea5e25edf066eca9a8a Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 9 Feb 2011 16:29:33 +0100 Subject: [PATCH 0106/5831] drbd: Extracted is_valid_transition() out of sanitize_state() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 71 +++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index d5777159a2b1..3199bf92e46e 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -47,6 +47,7 @@ static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); +static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, const char **warn_sync_abort); @@ -112,15 +113,17 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags)) return SS_CW_FAILED_BY_PEER; - rv = 0; spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; ns = sanitize_state(mdev, os, ns, NULL); + rv = is_valid_transition(os, ns); + if (rv == SS_SUCCESS) + rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ if (!cl_wide_st_chg(mdev, os, ns)) rv = SS_CW_NO_NEED; - if (!rv) { + if (rv == SS_UNKNOWN_ERROR) { rv = is_valid_state(mdev, ns); if (rv == SS_SUCCESS) { rv = is_valid_soft_transition(os, ns); @@ -160,8 +163,10 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, os, ns, NULL); + rv = is_valid_transition(os, ns); + if (rv < SS_SUCCESS) + goto abort; if (cl_wide_st_chg(mdev, os, ns)) { rv = is_valid_state(mdev, ns); @@ -340,6 +345,8 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) /** * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible + * This function limits state transitions that may be declined by DRBD. I.e. + * user requests (aka soft transitions). * @mdev: DRBD device. * @ns: new state. * @os: old state. @@ -389,6 +396,40 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns) return rv; } +/** + * is_valid_transition() - Returns an SS_ error code if the state transition is not possible + * This limits hard state transitions. Hard state transitions are facts there are + * imposed on DRBD by the environment. E.g. disk broke or network broke down. + * But those hard state transitions are still not allowed to do everything. + * @ns: new state. + * @os: old state. + */ +static enum drbd_state_rv +is_valid_transition(union drbd_state os, union drbd_state ns) +{ + enum drbd_state_rv rv = SS_SUCCESS; + + /* Disallow Network errors to configure a device's network part */ + if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && + os.conn <= C_DISCONNECTING) + rv = SS_NEED_CONNECTION; + + /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */ + if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && + ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING) + rv = SS_IN_TRANSIENT_STATE; + + /* After C_DISCONNECTING only C_STANDALONE may follow */ + if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) + rv = SS_IN_TRANSIENT_STATE; + + /* we cannot fail (again) if we already detached */ + if (ns.disk == D_FAILED && os.disk == D_DISKLESS) + rv = SS_IS_DISKLESS; + + return rv; +} + /** * sanitize_state() - Resolves implicitly necessary additional changes to a state transition * @mdev: DRBD device. @@ -411,30 +452,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state put_ldev(mdev); } - /* Disallow Network errors to configure a device's network part */ - if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && - os.conn <= C_DISCONNECTING) - ns.conn = os.conn; - - /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow. - * If you try to go into some Sync* state, that shall fail (elsewhere). */ - if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && - ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN) - ns.conn = os.conn; - - /* we cannot fail (again) if we already detached */ - if (ns.disk == D_FAILED && os.disk == D_DISKLESS) - ns.disk = D_DISKLESS; - /* if we are only D_ATTACHING yet, * we can (and should) go directly to D_DISKLESS. */ if (ns.disk == D_FAILED && os.disk == D_ATTACHING) ns.disk = D_DISKLESS; - /* After C_DISCONNECTING only C_STANDALONE may follow */ - if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) - ns.conn = os.conn; - + /* Implications from connection to peer and peer_isp */ if (ns.conn < C_CONNECTED) { ns.peer_isp = 0; ns.peer = R_UNKNOWN; @@ -641,6 +664,10 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, if (ns.i == os.i) return SS_NOTHING_TO_DO; + rv = is_valid_transition(os, ns); + if (rv < SS_SUCCESS) + return rv; + if (!(flags & CS_HARD)) { /* pre-state-change checks ; only look at ns */ /* See drbd_state_sw_errors in drbd_strings.c */ From fda74117dc7f07b844c398157f1ed398f3bc02da Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Feb 2011 10:38:06 +0100 Subject: [PATCH 0107/5831] drbd: Extracted is_valid_conn_transition() out of is_valid_transition() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 37 ++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 3199bf92e46e..b381faade0a7 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -396,6 +396,27 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns) return rv; } +static enum drbd_state_rv +is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc) +{ + enum drbd_state_rv rv = SS_SUCCESS; + + /* Disallow Network errors to configure a device's network part */ + if ((nc >= C_TIMEOUT && nc <= C_TEAR_DOWN) && oc <= C_DISCONNECTING) + rv = SS_NEED_CONNECTION; + + /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */ + if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING) + rv = SS_IN_TRANSIENT_STATE; + + /* After C_DISCONNECTING only C_STANDALONE may follow */ + if (oc == C_DISCONNECTING && nc != C_STANDALONE) + rv = SS_IN_TRANSIENT_STATE; + + return rv; +} + + /** * is_valid_transition() - Returns an SS_ error code if the state transition is not possible * This limits hard state transitions. Hard state transitions are facts there are @@ -407,21 +428,9 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns) static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns) { - enum drbd_state_rv rv = SS_SUCCESS; + enum drbd_state_rv rv; - /* Disallow Network errors to configure a device's network part */ - if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) && - os.conn <= C_DISCONNECTING) - rv = SS_NEED_CONNECTION; - - /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */ - if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && - ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING) - rv = SS_IN_TRANSIENT_STATE; - - /* After C_DISCONNECTING only C_STANDALONE may follow */ - if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE) - rv = SS_IN_TRANSIENT_STATE; + rv = is_valid_conn_transition(os.conn, ns.conn); /* we cannot fail (again) if we already detached */ if (ns.disk == D_FAILED && os.disk == D_DISKLESS) From 4308a0a390deee88b6de6dbf8b05bcf0f506bbcf Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Feb 2011 11:24:38 +0100 Subject: [PATCH 0108/5831] drbd: Removed the os parameter form sanitize_state() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 39 ++++++++++++++++----------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index b381faade0a7..02516ed9127c 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -48,8 +48,8 @@ static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort); +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, + const char **warn_sync_abort); /** * cl_wide_st_chg() - true if the state change is a cluster wide one @@ -116,7 +116,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, os, ns, NULL); + ns = sanitize_state(mdev, ns, NULL); rv = is_valid_transition(os, ns); if (rv == SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ @@ -163,7 +163,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, os, ns, NULL); + ns = sanitize_state(mdev, ns, NULL); rv = is_valid_transition(os, ns); if (rv < SS_SUCCESS) goto abort; @@ -436,6 +436,13 @@ is_valid_transition(union drbd_state os, union drbd_state ns) if (ns.disk == D_FAILED && os.disk == D_DISKLESS) rv = SS_IS_DISKLESS; + /* if we are only D_ATTACHING yet, + * we can (and should) go directly to D_DISKLESS. */ + if (ns.disk == D_FAILED && os.disk == D_ATTACHING) { + printk("TODO: FIX ME\n"); + rv = SS_IS_DISKLESS; + } + return rv; } @@ -449,8 +456,8 @@ is_valid_transition(union drbd_state os, union drbd_state ns) * When we loose connection, we have to set the state of the peers disk (pdsk) * to D_UNKNOWN. This rule and many more along those lines are in this function. */ -static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, - union drbd_state ns, const char **warn_sync_abort) +static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, + const char **warn_sync_abort) { enum drbd_fencing_p fp; enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; @@ -461,11 +468,6 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state put_ldev(mdev); } - /* if we are only D_ATTACHING yet, - * we can (and should) go directly to D_DISKLESS. */ - if (ns.disk == D_FAILED && os.disk == D_ATTACHING) - ns.disk = D_DISKLESS; - /* Implications from connection to peer and peer_isp */ if (ns.conn < C_CONNECTED) { ns.peer_isp = 0; @@ -478,12 +480,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) ns.aftr_isp = 0; + /* An implication of the disk states onto the connection state */ /* Abort resync if a disk fails/detaches */ - if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && - (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { + if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { if (warn_sync_abort) *warn_sync_abort = - os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? + ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ? "Online-verify" : "Resync"; ns.conn = C_CONNECTED; } @@ -591,13 +593,11 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state } if (fp == FP_STONITH && - (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && - !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) + (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED)) ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && - (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) && - !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE)) + (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { @@ -668,8 +668,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, os = mdev->state; - ns = sanitize_state(mdev, os, ns, &warn_sync_abort); - + ns = sanitize_state(mdev, ns, &warn_sync_abort); if (ns.i == os.i) return SS_NOTHING_TO_DO; From 56707f9e873108c0173b4edf20ea452e1d2a89d2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 16 Feb 2011 14:57:50 +0100 Subject: [PATCH 0109/5831] drbd: Code de-duplication; new function apply_mask_val() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 02516ed9127c..0100aab1288d 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -69,17 +69,24 @@ static int cl_wide_st_chg(struct drbd_conf *mdev, (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S); } +static union drbd_state +apply_mask_val(union drbd_state os, union drbd_state mask, union drbd_state val) +{ + union drbd_state ns; + ns.i = (os.i & ~mask.i) | val.i; + return ns; +} + enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f, union drbd_state mask, union drbd_state val) { unsigned long flags; - union drbd_state os, ns; + union drbd_state ns; enum drbd_state_rv rv; spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; + ns = apply_mask_val(mdev->state, mask, val); rv = _drbd_set_state(mdev, ns, f, NULL); ns = mdev->state; spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); @@ -115,8 +122,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, ns, NULL); + ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); rv = is_valid_transition(os, ns); if (rv == SS_SUCCESS) rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ @@ -162,8 +168,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; - ns = sanitize_state(mdev, ns, NULL); + ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL); rv = is_valid_transition(os, ns); if (rv < SS_SUCCESS) goto abort; @@ -199,8 +204,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, goto abort; } spin_lock_irqsave(&mdev->tconn->req_lock, flags); - os = mdev->state; - ns.i = (os.i & ~mask.i) | val.i; + ns = apply_mask_val(mdev->state, mask, val); rv = _drbd_set_state(mdev, ns, f, &done); drbd_state_unlock(mdev); } else { From bbeb641c3e4982d6bba21188545a7fd44ab0a715 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Feb 2011 13:45:46 +0100 Subject: [PATCH 0110/5831] drbd: Killed volume0; last step of multi-volume-enablement Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_main.c | 8 +- drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 39 ++-- drivers/block/drbd/drbd_state.c | 294 +++++++++++++++++++++++------ drivers/block/drbd/drbd_state.h | 10 + drivers/block/drbd/drbd_worker.c | 7 +- 7 files changed, 277 insertions(+), 87 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e2b59f58a0aa..f718124c5c82 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -918,8 +918,8 @@ enum { struct drbd_tconn { /* is a resource from the config file */ char *name; /* Resource name */ struct list_head all_tconn; /* List of all drbd_tconn, prot by global_state_lock */ - struct drbd_conf *volume0; /* TODO: Remove me again */ struct idr volumes; /* to mdev mapping */ + enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ unsigned long flags; struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ @@ -2024,7 +2024,7 @@ static inline int get_net_conf(struct drbd_tconn *tconn) int have_net_conf; atomic_inc(&tconn->net_cnt); - have_net_conf = tconn->volume0->state.conn >= C_UNCONNECTED; + have_net_conf = tconn->cstate >= C_UNCONNECTED; if (!have_net_conf) put_net_conf(tconn); return have_net_conf; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b43ad87a536a..b64b7388ee9d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1344,7 +1344,7 @@ static int we_should_drop_the_connection(struct drbd_tconn *tconn, struct socket drop_it = tconn->meta.socket == sock || !tconn->asender.task || get_t_state(&tconn->asender) != RUNNING - || tconn->volume0->state.conn < C_CONNECTED; + || tconn->cstate < C_WF_REPORT_PARAMS; if (drop_it) return true; @@ -1705,9 +1705,9 @@ int drbd_send(struct drbd_tconn *tconn, struct socket *sock, conn_err(tconn, "%s_sendmsg returned %d\n", sock == tconn->meta.socket ? "msock" : "sock", rv); - drbd_force_state(tconn->volume0, NS(conn, C_BROKEN_PIPE)); + conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD); } else - drbd_force_state(tconn->volume0, NS(conn, C_TIMEOUT)); + conn_request_state(tconn, NS(conn, C_TIMEOUT), CS_HARD); } return sent; @@ -2188,6 +2188,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) if (!tconn->name) goto fail; + tconn->cstate = C_STANDALONE; spin_lock_init(&tconn->req_lock); atomic_set(&tconn->net_cnt, 0); init_waitqueue_head(&tconn->net_cnt_wait); @@ -2258,7 +2259,6 @@ struct drbd_conf *drbd_new_device(unsigned int minor) if (!zalloc_cpumask_var(&mdev->tconn->cpu_mask, GFP_KERNEL)) goto out_no_cpumask; - mdev->tconn->volume0 = mdev; mdev->minor = minor; drbd_init_set_defaults(mdev); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0debe589b674..eeb284aef3c8 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1547,7 +1547,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, mdev->tconn->int_dig_out=int_dig_out; mdev->tconn->int_dig_in=int_dig_in; mdev->tconn->int_dig_vv=int_dig_vv; - retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL); + retcode = _conn_request_state(mdev->tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); spin_unlock_irq(&mdev->tconn->req_lock); kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2b69a15a55dd..27e1eb7ce540 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -551,7 +551,7 @@ static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size) set_fs(oldfs); if (rv != size) - drbd_force_state(tconn->volume0, NS(conn, C_BROKEN_PIPE)); + conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD); return rv; } @@ -647,7 +647,7 @@ out: conn_err(tconn, "%s failed, err = %d\n", what, err); } if (disconnect_on_error) - drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); } put_net_conf(tconn); return sock; @@ -694,7 +694,7 @@ out: if (err < 0) { if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) { conn_err(tconn, "%s failed, err = %d\n", what, err); - drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); } } put_net_conf(tconn); @@ -776,7 +776,7 @@ static int drbd_connect(struct drbd_tconn *tconn) struct socket *s, *sock, *msock; int try, h, ok; - if (drbd_request_state(tconn->volume0, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) + if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) return -2; clear_bit(DISCARD_CONCURRENT, &tconn->flags); @@ -850,7 +850,7 @@ retry: } } - if (tconn->volume0->state.conn <= C_DISCONNECTING) + if (tconn->cstate <= C_DISCONNECTING) goto out_release_sockets; if (signal_pending(current)) { flush_signals(current); @@ -912,7 +912,7 @@ retry: } } - if (drbd_request_state(tconn->volume0, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) + if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS) return 0; sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10; @@ -3817,7 +3817,7 @@ static void drbdd(struct drbd_tconn *tconn) if (0) { err_out: - drbd_force_state(tconn->volume0, NS(conn, C_PROTOCOL_ERROR)); + conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD); } } @@ -3834,10 +3834,10 @@ void drbd_flush_workqueue(struct drbd_conf *mdev) static void drbd_disconnect(struct drbd_tconn *tconn) { - union drbd_state os, ns; + enum drbd_conns oc; int rv = SS_UNKNOWN_ERROR; - if (tconn->volume0->state.conn == C_STANDALONE) + if (tconn->cstate == C_STANDALONE) return; /* asender does not clean up anything. it must not interfere, either */ @@ -3849,16 +3849,13 @@ static void drbd_disconnect(struct drbd_tconn *tconn) conn_info(tconn, "Connection closed\n"); spin_lock_irq(&tconn->req_lock); - os = tconn->volume0->state; - if (os.conn >= C_UNCONNECTED) { - /* Do not restart in case we are C_DISCONNECTING */ - ns.i = os.i; - ns.conn = C_UNCONNECTED; - rv = _drbd_set_state(tconn->volume0, ns, CS_VERBOSE, NULL); - } + oc = tconn->cstate; + if (oc >= C_UNCONNECTED) + rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); + spin_unlock_irq(&tconn->req_lock); - if (os.conn == C_DISCONNECTING) { + if (oc == C_DISCONNECTING) { wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0); crypto_free_hash(tconn->cram_hmac_tfm); @@ -3866,7 +3863,7 @@ static void drbd_disconnect(struct drbd_tconn *tconn) kfree(tconn->net_conf); tconn->net_conf = NULL; - drbd_request_state(tconn->volume0, NS(conn, C_STANDALONE)); + conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE); } } @@ -4240,7 +4237,7 @@ int drbdd_init(struct drbd_thread *thi) } if (h == -1) { conn_warn(tconn, "Discarding network configuration.\n"); - drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); } } while (h == 0); @@ -4709,11 +4706,11 @@ int drbd_asender(struct drbd_thread *thi) if (0) { reconnect: - drbd_force_state(tconn->volume0, NS(conn, C_NETWORK_FAILURE)); + conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD); } if (0) { disconnect: - drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING)); + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); } clear_bit(SIGNAL_ASENDER, &tconn->flags); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 0100aab1288d..7376d9dc0bc7 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -43,8 +43,7 @@ int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state); static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); -static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, - union drbd_state ns, enum chg_state_flags flags); +static void after_all_state_ch(struct drbd_tconn *tconn, union drbd_state ns); static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); @@ -275,6 +274,51 @@ void print_st_err(struct drbd_conf *mdev, union drbd_state os, print_st(mdev, "wanted", ns); } +static void print_state_change(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, + enum chg_state_flags flags) +{ + char *pbp, pb[300]; + pbp = pb; + *pbp = 0; + if (ns.role != os.role) + pbp += sprintf(pbp, "role( %s -> %s ) ", + drbd_role_str(os.role), + drbd_role_str(ns.role)); + if (ns.peer != os.peer) + pbp += sprintf(pbp, "peer( %s -> %s ) ", + drbd_role_str(os.peer), + drbd_role_str(ns.peer)); + if (ns.conn != os.conn && !(flags & CS_NO_CSTATE_CHG)) + pbp += sprintf(pbp, "conn( %s -> %s ) ", + drbd_conn_str(os.conn), + drbd_conn_str(ns.conn)); + if (ns.disk != os.disk) + pbp += sprintf(pbp, "disk( %s -> %s ) ", + drbd_disk_str(os.disk), + drbd_disk_str(ns.disk)); + if (ns.pdsk != os.pdsk) + pbp += sprintf(pbp, "pdsk( %s -> %s ) ", + drbd_disk_str(os.pdsk), + drbd_disk_str(ns.pdsk)); + if (is_susp(ns) != is_susp(os)) + pbp += sprintf(pbp, "susp( %d -> %d ) ", + is_susp(os), + is_susp(ns)); + if (ns.aftr_isp != os.aftr_isp) + pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", + os.aftr_isp, + ns.aftr_isp); + if (ns.peer_isp != os.peer_isp) + pbp += sprintf(pbp, "peer_isp( %d -> %d ) ", + os.peer_isp, + ns.peer_isp); + if (ns.user_isp != os.user_isp) + pbp += sprintf(pbp, "user_isp( %d -> %d ) ", + os.user_isp, + ns.user_isp); + if (pbp != pb) + dev_info(DEV, "%s\n", pb); +} /** * is_valid_state() - Returns an SS_ error code if ns is not valid @@ -704,48 +748,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, if (warn_sync_abort) dev_warn(DEV, "%s aborted.\n", warn_sync_abort); - { - char *pbp, pb[300]; - pbp = pb; - *pbp = 0; - if (ns.role != os.role) - pbp += sprintf(pbp, "role( %s -> %s ) ", - drbd_role_str(os.role), - drbd_role_str(ns.role)); - if (ns.peer != os.peer) - pbp += sprintf(pbp, "peer( %s -> %s ) ", - drbd_role_str(os.peer), - drbd_role_str(ns.peer)); - if (ns.conn != os.conn) - pbp += sprintf(pbp, "conn( %s -> %s ) ", - drbd_conn_str(os.conn), - drbd_conn_str(ns.conn)); - if (ns.disk != os.disk) - pbp += sprintf(pbp, "disk( %s -> %s ) ", - drbd_disk_str(os.disk), - drbd_disk_str(ns.disk)); - if (ns.pdsk != os.pdsk) - pbp += sprintf(pbp, "pdsk( %s -> %s ) ", - drbd_disk_str(os.pdsk), - drbd_disk_str(ns.pdsk)); - if (is_susp(ns) != is_susp(os)) - pbp += sprintf(pbp, "susp( %d -> %d ) ", - is_susp(os), - is_susp(ns)); - if (ns.aftr_isp != os.aftr_isp) - pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ", - os.aftr_isp, - ns.aftr_isp); - if (ns.peer_isp != os.peer_isp) - pbp += sprintf(pbp, "peer_isp( %d -> %d ) ", - os.peer_isp, - ns.peer_isp); - if (ns.user_isp != os.user_isp) - pbp += sprintf(pbp, "user_isp( %d -> %d ) ", - os.user_isp, - ns.user_isp); - dev_info(DEV, "%s\n", pb); - } + print_state_change(mdev, os, ns, flags); /* solve the race between becoming unconfigured, * worker doing the cleanup, and @@ -887,7 +890,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, ascw->done = done; drbd_queue_work(&mdev->tconn->data.work, &ascw->w); } else { - dev_warn(DEV, "Could not kmalloc an ascw\n"); + dev_err(DEV, "Could not kmalloc an ascw\n"); } return rv; @@ -1239,21 +1242,202 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, resume_next_sg(mdev); } - after_conn_state_ch(mdev->tconn, os, ns, flags); + after_all_state_ch(mdev->tconn, ns); + drbd_md_sync(mdev); } -static void after_conn_state_ch(struct drbd_tconn *tconn, union drbd_state os, - union drbd_state ns, enum chg_state_flags flags) -{ - /* Upon network configuration, we need to start the receiver */ - if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) - drbd_thread_start(&tconn->receiver); +struct after_conn_state_chg_work { + struct drbd_work w; + enum drbd_conns oc; + union drbd_state nms; /* new, max state, over all mdevs */ + enum chg_state_flags flags; +}; - if (ns.disk == D_DISKLESS && - ns.conn == C_STANDALONE && - ns.role == R_SECONDARY) { +static void after_all_state_ch(struct drbd_tconn *tconn, union drbd_state ns) +{ + if (ns.disk == D_DISKLESS && ns.conn == C_STANDALONE && ns.role == R_SECONDARY) { /* if (test_bit(DEVICE_DYING, &mdev->flags)) TODO: DEVICE_DYING functionality */ drbd_thread_stop_nowait(&tconn->worker); } } + +static int w_after_conn_state_ch(struct drbd_work *w, int unused) +{ + struct after_conn_state_chg_work *acscw = + container_of(w, struct after_conn_state_chg_work, w); + struct drbd_tconn *tconn = w->tconn; + enum drbd_conns oc = acscw->oc; + union drbd_state nms = acscw->nms; + + kfree(acscw); + + /* Upon network configuration, we need to start the receiver */ + if (oc == C_STANDALONE && nms.conn == C_UNCONNECTED) + drbd_thread_start(&tconn->receiver); + + //conn_err(tconn, STATE_FMT, STATE_ARGS("nms", nms)); + after_all_state_ch(tconn, nms); + + return 1; +} + +static void print_conn_state_change(struct drbd_tconn *tconn, enum drbd_conns oc, enum drbd_conns nc) +{ + char *pbp, pb[300]; + pbp = pb; + *pbp = 0; + if (nc != oc) + pbp += sprintf(pbp, "conn( %s -> %s ) ", + drbd_conn_str(oc), + drbd_conn_str(nc)); + + conn_info(tconn, "%s\n", pb); +} + +struct _is_valid_itr_params { + enum chg_state_flags flags; + union drbd_state mask, val; + union drbd_state ms; /* maximal state, over all mdevs */ + enum drbd_conns oc; + enum { + OC_UNINITIALIZED, + OC_CONSISTENT, + OC_INCONSISTENT, + } oc_state; +}; + +static int _is_valid_itr_fn(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + struct _is_valid_itr_params *params = (struct _is_valid_itr_params *)data; + enum chg_state_flags flags = params->flags; + union drbd_state ns, os; + enum drbd_state_rv rv; + + os = mdev->state; + ns = apply_mask_val(os, params->mask, params->val); + ns = sanitize_state(mdev, ns, NULL); + rv = is_valid_state(mdev, ns); + + if (rv < SS_SUCCESS) { + /* If the old state was illegal as well, then let this happen...*/ + + if (is_valid_state(mdev, os) == rv) + rv = is_valid_soft_transition(os, ns); + } else + rv = is_valid_soft_transition(os, ns); + + switch (params->oc_state) { + case OC_UNINITIALIZED: + params->oc = os.conn; + params->oc_state = OC_CONSISTENT; + break; + case OC_CONSISTENT: + if (params->oc != os.conn) + params->oc_state = OC_INCONSISTENT; + break; + case OC_INCONSISTENT: + break; + } + + if (rv < SS_SUCCESS) { + if (flags & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); + return rv; + } else + return 0; +} + +static int _set_state_itr_fn(int vnr, void *p, void *data) +{ + struct drbd_conf *mdev = (struct drbd_conf *)p; + struct _is_valid_itr_params *params = (struct _is_valid_itr_params *)data; + enum chg_state_flags flags = params->flags; + union drbd_state os, ns, ms = params->ms; + enum drbd_state_rv rv; + + os = mdev->state; + ns = apply_mask_val(os, params->mask, params->val); + ns = sanitize_state(mdev, ns, NULL); + + rv = __drbd_set_state(mdev, ns, flags, NULL); + + ms.role = max_t(enum drbd_role, mdev->state.role, ms.role); + ms.peer = max_t(enum drbd_role, mdev->state.peer, ms.peer); + ms.disk = max_t(enum drbd_role, mdev->state.disk, ms.disk); + ms.pdsk = max_t(enum drbd_role, mdev->state.pdsk, ms.pdsk); + params->ms = ms; + + return 0; +} + +enum drbd_state_rv +_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, + enum chg_state_flags flags) +{ + enum drbd_state_rv rv = SS_SUCCESS; + struct _is_valid_itr_params params; + struct after_conn_state_chg_work *acscw; + enum drbd_conns oc = tconn->cstate; + + read_lock(&global_state_lock); + + rv = is_valid_conn_transition(oc, val.conn); + if (rv < SS_SUCCESS) + goto abort; + + params.flags = flags; + params.mask = mask; + params.val = val; + params.oc_state = OC_UNINITIALIZED; + + if (!(flags & CS_HARD)) + rv = idr_for_each(&tconn->volumes, _is_valid_itr_fn, ¶ms); + + if (rv == 0) /* idr_for_each semantics */ + rv = SS_SUCCESS; + + if (rv < SS_SUCCESS) + goto abort; + + if (params.oc_state == OC_CONSISTENT) { + oc = params.oc; + print_conn_state_change(tconn, oc, val.conn); + params.flags |= CS_NO_CSTATE_CHG; + } + tconn->cstate = val.conn; + params.ms.i = 0; + params.ms.conn = val.conn; + idr_for_each(&tconn->volumes, _set_state_itr_fn, ¶ms); + + acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC); + if (acscw) { + acscw->oc = oc; + acscw->nms = params.ms; + acscw->flags = flags; + acscw->w.cb = w_after_conn_state_ch; + acscw->w.tconn = tconn; + drbd_queue_work(&tconn->data.work, &acscw->w); + } else { + conn_err(tconn, "Could not kmalloc an acscw\n"); + } + +abort: + read_unlock(&global_state_lock); + + return rv; +} + +enum drbd_state_rv +conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, + enum chg_state_flags flags) +{ + enum drbd_state_rv rv; + + spin_lock_irq(&tconn->req_lock); + rv = _conn_request_state(tconn, mask, val, flags); + spin_unlock_irq(&tconn->req_lock); + + return rv; +} diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 3ec26e2c4c40..d312d84b8410 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -2,6 +2,7 @@ #define DRBD_STATE_H struct drbd_conf; +struct drbd_tconn; /** * DOC: DRBD State macros @@ -61,6 +62,7 @@ enum chg_state_flags { CS_WAIT_COMPLETE = 4, CS_SERIALIZE = 8, CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, + CS_NO_CSTATE_CHG = 16, /* Do not display changes in cstate. Internal to drbd_state.c */ }; extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, @@ -79,6 +81,14 @@ extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state, extern void print_st_err(struct drbd_conf *, union drbd_state, union drbd_state, int); +enum drbd_state_rv +_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, + enum chg_state_flags flags); + +enum drbd_state_rv +conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, + enum chg_state_flags flags); + extern void drbd_resume_al(struct drbd_conf *mdev); /** diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 8539df25bc22..eee017dd6d7d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1720,11 +1720,10 @@ int drbd_worker(struct drbd_thread *thi) list_del_init(&w->list); spin_unlock_irq(&tconn->data.work.q_lock); - if (!w->cb(w, tconn->volume0->state.conn < C_CONNECTED)) { + if (!w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS)) { /* dev_warn(DEV, "worker: a callback failed! \n"); */ - if (tconn->volume0->state.conn >= C_CONNECTED) - drbd_force_state(tconn->volume0, - NS(conn, C_NETWORK_FAILURE)); + if (tconn->cstate >= C_WF_REPORT_PARAMS) + conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD); } } From dad20554812e73a2bfbe45d1b161d5d3c249e597 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 11 Feb 2011 19:43:55 +0100 Subject: [PATCH 0111/5831] drbd: Removed drbd_state_lock() and drbd_state_unlock() The lock they constructed is only taken when the state_mutex was already taken. It is superficial. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 18 ------------------ drivers/block/drbd/drbd_receiver.c | 5 +++-- drivers/block/drbd/drbd_state.c | 4 ---- drivers/block/drbd/drbd_worker.c | 10 +++++----- 4 files changed, 8 insertions(+), 29 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index f718124c5c82..2dbcd13ba2af 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -764,7 +764,6 @@ enum { UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ MD_DIRTY, /* current uuids and flags not yet on disk */ USE_DEGR_WFC_T, /* degr-wfc-timeout instead of wfc-timeout. */ - CLUSTER_ST_CHANGE, /* Cluster wide state change going on... */ CL_ST_CHG_SUCCESS, CL_ST_CHG_FAIL, CRASHED_PRIMARY, /* This node was a crashed primary. @@ -1664,23 +1663,6 @@ static inline int drbd_ee_has_active_page(struct drbd_peer_request *peer_req) return 0; } - - - - - -static inline void drbd_state_lock(struct drbd_conf *mdev) -{ - wait_event(mdev->misc_wait, - !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags)); -} - -static inline void drbd_state_unlock(struct drbd_conf *mdev) -{ - clear_bit(CLUSTER_ST_CHANGE, &mdev->flags); - wake_up(&mdev->misc_wait); -} - static inline enum drbd_state_rv _drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, enum chg_state_flags flags, struct completion *done) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 27e1eb7ce540..423e4dd2d53e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3167,7 +3167,8 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd, ongoing cluster wide state change is finished. That is important if we are primary and are detaching from our disk. We need to see the new disk state... */ - wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags)); + mutex_lock(&mdev->state_mutex); + mutex_unlock(&mdev->state_mutex); if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); @@ -3218,7 +3219,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, val.i = be32_to_cpu(p->val); if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) && - test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { + mutex_is_locked(&mdev->state_mutex)) { drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); return true; } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 7376d9dc0bc7..91433168e1d4 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -184,9 +184,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, goto abort; } - drbd_state_lock(mdev); if (!drbd_send_state_req(mdev, mask, val)) { - drbd_state_unlock(mdev); rv = SS_CW_FAILED_BY_PEER; if (f & CS_VERBOSE) print_st_err(mdev, os, ns, rv); @@ -197,7 +195,6 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, (rv = _req_st_cond(mdev, mask, val))); if (rv < SS_SUCCESS) { - drbd_state_unlock(mdev); if (f & CS_VERBOSE) print_st_err(mdev, os, ns, rv); goto abort; @@ -205,7 +202,6 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, spin_lock_irqsave(&mdev->tconn->req_lock, flags); ns = apply_mask_val(mdev->state, mask, val); rv = _drbd_set_state(mdev, ns, f, &done); - drbd_state_unlock(mdev); } else { rv = _drbd_set_state(mdev, ns, f, &done); } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index eee017dd6d7d..e8448712b958 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1536,21 +1536,21 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) } if (current == mdev->tconn->worker.task) { - /* The worker should not sleep waiting for drbd_state_lock(), + /* The worker should not sleep waiting for state_mutex, that can take long */ - if (test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags)) { + if (!mutex_trylock(&mdev->state_mutex)) { set_bit(B_RS_H_DONE, &mdev->flags); mdev->start_resync_timer.expires = jiffies + HZ/5; add_timer(&mdev->start_resync_timer); return; } } else { - drbd_state_lock(mdev); + mutex_lock(&mdev->state_mutex); } clear_bit(B_RS_H_DONE, &mdev->flags); if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { - drbd_state_unlock(mdev); + mutex_unlock(&mdev->state_mutex); return; } @@ -1639,7 +1639,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) drbd_md_sync(mdev); } put_ldev(mdev); - drbd_state_unlock(mdev); + mutex_unlock(&mdev->state_mutex); } static int _worker_dying(int vnr, void *p, void *data) From 8410da8f0e3ff5c97bce1b10627316be509ce476 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 11 Feb 2011 20:11:10 +0100 Subject: [PATCH 0112/5831] drbd: Introduced tconn->cstate_mutex In compatibility mode with old DRBDs, use that as the state_mutex as well. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 +++++--- drivers/block/drbd/drbd_main.c | 4 +++- drivers/block/drbd/drbd_nl.c | 8 ++++---- drivers/block/drbd/drbd_receiver.c | 11 ++++++++--- drivers/block/drbd/drbd_state.c | 4 ++-- drivers/block/drbd/drbd_worker.c | 8 ++++---- 6 files changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2dbcd13ba2af..152d07bcfb9f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -917,8 +917,9 @@ enum { struct drbd_tconn { /* is a resource from the config file */ char *name; /* Resource name */ struct list_head all_tconn; /* List of all drbd_tconn, prot by global_state_lock */ - struct idr volumes; /* to mdev mapping */ - enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ + struct idr volumes; /* to mdev mapping */ + enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ + struct mutex cstate_mutex; /* Protects graceful disconnects */ unsigned long flags; struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */ @@ -1080,7 +1081,8 @@ struct drbd_conf { unsigned long comm_bm_set; /* communicated number of set bits. */ struct bm_io_work bm_io_work; u64 ed_uuid; /* UUID of the exposed data */ - struct mutex state_mutex; + struct mutex own_state_mutex; + struct mutex *state_mutex; /* either own_state_mutex or mdev->tconn->cstate_mutex */ char congestion_reason; /* Why we where congested... */ atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */ atomic_t rs_sect_ev; /* for submitted resync data rate, both */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b64b7388ee9d..1781d0ad35e1 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1801,7 +1801,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->ap_in_flight, 0); mutex_init(&mdev->md_io_mutex); - mutex_init(&mdev->state_mutex); + mutex_init(&mdev->own_state_mutex); + mdev->state_mutex = &mdev->own_state_mutex; spin_lock_init(&mdev->al_lock); spin_lock_init(&mdev->peer_seq_lock); @@ -2189,6 +2190,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) goto fail; tconn->cstate = C_STANDALONE; + mutex_init(&tconn->cstate_mutex); spin_lock_init(&tconn->req_lock); atomic_set(&tconn->net_cnt, 0); init_waitqueue_head(&tconn->net_cnt_wait); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index eeb284aef3c8..3d8e63190dcd 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -320,7 +320,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) if (new_role == R_PRIMARY) request_ping(mdev->tconn); /* Detect a dead peer ASAP */ - mutex_lock(&mdev->state_mutex); + mutex_lock(mdev->state_mutex); mask.i = 0; mask.role = R_MASK; val.i = 0; val.role = new_role; @@ -439,7 +439,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); fail: - mutex_unlock(&mdev->state_mutex); + mutex_unlock(mdev->state_mutex); return rv; } @@ -2162,7 +2162,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl return 0; } - mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */ + mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */ if (!get_ldev(mdev)) { retcode = ERR_NO_DISK; @@ -2204,7 +2204,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl out_dec: put_ldev(mdev); out: - mutex_unlock(&mdev->state_mutex); + mutex_unlock(mdev->state_mutex); reply->ret_code = retcode; return 0; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 423e4dd2d53e..94c050ad55b6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -753,6 +753,10 @@ static int drbd_connected(int vnr, void *p, void *data) atomic_set(&mdev->packet_seq, 0); mdev->peer_seq = 0; + mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ? + &mdev->tconn->cstate_mutex : + &mdev->own_state_mutex; + ok &= drbd_send_sync_param(mdev, &mdev->sync_conf); ok &= drbd_send_sizes(mdev, 0, 0); ok &= drbd_send_uuids(mdev); @@ -760,6 +764,7 @@ static int drbd_connected(int vnr, void *p, void *data) clear_bit(USE_DEGR_WFC_T, &mdev->flags); clear_bit(RESIZE_PENDING, &mdev->flags); + return !ok; } @@ -3167,8 +3172,8 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd, ongoing cluster wide state change is finished. That is important if we are primary and are detaching from our disk. We need to see the new disk state... */ - mutex_lock(&mdev->state_mutex); - mutex_unlock(&mdev->state_mutex); + mutex_lock(mdev->state_mutex); + mutex_unlock(mdev->state_mutex); if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT) updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); @@ -3219,7 +3224,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, val.i = be32_to_cpu(p->val); if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) && - mutex_is_locked(&mdev->state_mutex)) { + mutex_is_locked(mdev->state_mutex)) { drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG); return true; } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 91433168e1d4..2cd4fcef554a 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -163,7 +163,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, init_completion(&done); if (f & CS_SERIALIZE) - mutex_lock(&mdev->state_mutex); + mutex_lock(mdev->state_mutex); spin_lock_irqsave(&mdev->tconn->req_lock, flags); os = mdev->state; @@ -215,7 +215,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, abort: if (f & CS_SERIALIZE) - mutex_unlock(&mdev->state_mutex); + mutex_unlock(mdev->state_mutex); return rv; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index e8448712b958..9a9a00eabe0b 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1538,19 +1538,19 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) if (current == mdev->tconn->worker.task) { /* The worker should not sleep waiting for state_mutex, that can take long */ - if (!mutex_trylock(&mdev->state_mutex)) { + if (!mutex_trylock(mdev->state_mutex)) { set_bit(B_RS_H_DONE, &mdev->flags); mdev->start_resync_timer.expires = jiffies + HZ/5; add_timer(&mdev->start_resync_timer); return; } } else { - mutex_lock(&mdev->state_mutex); + mutex_lock(mdev->state_mutex); } clear_bit(B_RS_H_DONE, &mdev->flags); if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { - mutex_unlock(&mdev->state_mutex); + mutex_unlock(mdev->state_mutex); return; } @@ -1639,7 +1639,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) drbd_md_sync(mdev); } put_ldev(mdev); - mutex_unlock(&mdev->state_mutex); + mutex_unlock(mdev->state_mutex); } static int _worker_dying(int vnr, void *p, void *data) From cf29c9d8c8eff69885ee4c8ddf5f9db4dcc5ab6e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 11 Feb 2011 15:11:24 +0100 Subject: [PATCH 0113/5831] drbd: Implemented conn_send_state_req() Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 19 ++++++++++++++++++- drivers/block/drbd/drbd_main.c | 6 +++--- drivers/block/drbd/drbd_state.c | 2 -- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 152d07bcfb9f..4e7454958b85 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -221,8 +221,10 @@ enum drbd_packet { P_DELAY_PROBE = 0x27, /* is used on BOTH sockets */ P_OUT_OF_SYNC = 0x28, /* Mark as out of sync (Outrunning), data socket */ P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */ + P_CONN_ST_CHG_REQ = 0x2a, /* data sock: Connection wide state request */ + P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */ - P_MAX_CMD = 0x2A, + P_MAX_CMD = 0x2c, P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ P_MAX_OPT_CMD = 0x101, @@ -1177,6 +1179,8 @@ extern int drbd_send_uuids(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); +extern int _conn_send_state_req(struct drbd_tconn *, int vnr, enum drbd_packet cmd, + union drbd_state, union drbd_state); extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); extern int _conn_send_cmd(struct drbd_tconn *tconn, int vnr, struct socket *sock, @@ -1896,6 +1900,19 @@ static inline int drbd_send_ping_ack(struct drbd_tconn *tconn) return conn_send_cmd(tconn, 0, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); } +static inline int drbd_send_state_req(struct drbd_conf *mdev, + union drbd_state mask, union drbd_state val) +{ + return _conn_send_state_req(mdev->tconn, mdev->vnr, P_STATE_CHG_REQ, mask, val); +} + +static inline int conn_send_state_req(struct drbd_tconn *tconn, + union drbd_state mask, union drbd_state val) +{ + enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ; + return _conn_send_state_req(tconn, 0, cmd, mask, val); +} + static inline void drbd_thread_stop(struct drbd_thread *thi) { _drbd_thread_stop(thi, false, true); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 1781d0ad35e1..e0efc918a5e6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -970,15 +970,15 @@ int drbd_send_state(struct drbd_conf *mdev) return ok; } -int drbd_send_state_req(struct drbd_conf *mdev, - union drbd_state mask, union drbd_state val) +int _conn_send_state_req(struct drbd_tconn *tconn, int vnr, enum drbd_packet cmd, + union drbd_state mask, union drbd_state val) { struct p_req_state p; p.mask = cpu_to_be32(mask.i); p.val = cpu_to_be32(val.i); - return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ, &p.head, sizeof(p)); + return conn_send_cmd(tconn, vnr, USE_DATA_SOCKET, cmd, &p.head, sizeof(p)); } int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 2cd4fcef554a..f34e7d4c8886 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -37,9 +37,7 @@ struct after_state_chg_work { struct completion *done; }; - extern void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); -int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state); static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); From 5aabf467e3933ba3fc30fd06a70517ab8a27a9bb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 10 Feb 2011 20:27:54 +0100 Subject: [PATCH 0114/5831] drbd: Global_state_lock not necessary here... Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_state.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index f34e7d4c8886..8c49ca8dea3c 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1375,8 +1375,6 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ struct after_conn_state_chg_work *acscw; enum drbd_conns oc = tconn->cstate; - read_lock(&global_state_lock); - rv = is_valid_conn_transition(oc, val.conn); if (rv < SS_SUCCESS) goto abort; @@ -1418,8 +1416,6 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ } abort: - read_unlock(&global_state_lock); - return rv; } From fc3b10a45ffd350e7638e50feae091a401c270bb Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Feb 2011 11:07:59 +0100 Subject: [PATCH 0115/5831] drbd: Implemented receiving of P_CONN_ST_CHG_REPLY Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 ++ drivers/block/drbd/drbd_receiver.c | 28 ++++++++++++++++++++-------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4e7454958b85..4363b393a128 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -914,6 +914,8 @@ enum { SEND_PING, /* whether asender should send a ping asap */ SIGNAL_ASENDER, /* whether asender wants to be interrupted */ GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */ + CONN_WD_ST_CHG_OKAY, + CONN_WD_ST_CHG_FAIL, }; struct drbd_tconn { /* is a resource from the config file */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 94c050ad55b6..2a1094aa35b0 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4265,18 +4265,29 @@ int drbdd_init(struct drbd_thread *thi) static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply; + struct drbd_tconn *tconn = mdev->tconn; int retcode = be32_to_cpu(p->retcode); - if (retcode >= SS_SUCCESS) { - set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); - } else { - set_bit(CL_ST_CHG_FAIL, &mdev->flags); - dev_err(DEV, "Requested state change failed by peer: %s (%d)\n", - drbd_set_st_err_str(retcode), retcode); + if (cmd == P_STATE_CHG_REPLY) { + if (retcode >= SS_SUCCESS) { + set_bit(CL_ST_CHG_SUCCESS, &mdev->flags); + } else { + set_bit(CL_ST_CHG_FAIL, &mdev->flags); + dev_err(DEV, "Requested state change failed by peer: %s (%d)\n", + drbd_set_st_err_str(retcode), retcode); + } + wake_up(&mdev->state_wait); + } else /* conn == P_CONN_ST_CHG_REPLY */ { + if (retcode >= SS_SUCCESS) { + set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags); + } else { + set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags); + conn_err(tconn, "Requested state change failed by peer: %s (%d)\n", + drbd_set_st_err_str(retcode), retcode); + } + wake_up(&tconn->ping_wait); } - wake_up(&mdev->state_wait); - return true; } @@ -4553,6 +4564,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, + [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply }, [P_MAX_CMD] = { 0, NULL }, }; if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) From 047cd4a682b09a7bc5dd5610262405bb085f8b19 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Feb 2011 11:09:33 +0100 Subject: [PATCH 0116/5831] drbd: implemented receiving of P_CONN_ST_CHG_REQ Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 10 ++++++++++ drivers/block/drbd/drbd_receiver.c | 10 ++++++++-- drivers/block/drbd/drbd_state.h | 1 + 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4363b393a128..b287bad4767f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1219,6 +1219,7 @@ extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size) extern int drbd_send_bitmap(struct drbd_conf *mdev); extern int _drbd_send_bitmap(struct drbd_conf *mdev); extern int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode); +extern int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode); extern void drbd_free_bc(struct drbd_backing_dev *ldev); extern void drbd_mdev_cleanup(struct drbd_conf *mdev); void drbd_print_uuids(struct drbd_conf *mdev, const char *text); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index e0efc918a5e6..592f0c949fd0 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -990,6 +990,16 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode) return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, &p.head, sizeof(p)); } +int conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode) +{ + struct p_req_state_reply p; + enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY; + + p.retcode = cpu_to_be32(retcode); + + return conn_send_cmd(tconn, 0, USE_META_SOCKET, cmd, &p.head, sizeof(p)); +} + int fill_bitmap_rle_bits(struct drbd_conf *mdev, struct p_compressed_bm *p, struct bm_xfer_ctx *c) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2a1094aa35b0..c85d290beed3 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3232,9 +3232,14 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd, mask = convert_state(mask); val = convert_state(val); - rv = drbd_change_state(mdev, CS_VERBOSE, mask, val); + if (cmd == P_CONN_ST_CHG_REQ) { + rv = conn_request_state(mdev->tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY); + conn_send_sr_reply(mdev->tconn, rv); + } else { + rv = drbd_change_state(mdev, CS_VERBOSE, mask, val); + drbd_send_sr_reply(mdev, rv); + } - drbd_send_sr_reply(mdev, rv); drbd_md_sync(mdev); return true; @@ -3768,6 +3773,7 @@ static struct data_cmd drbd_cmd_handler[] = { [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, + [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state }, /* anything missing from this table is in * the asender_tbl, see get_asender_cmd */ [P_MAX_CMD] = { 0, 0, NULL }, diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index d312d84b8410..5fdbdf0be707 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -63,6 +63,7 @@ enum chg_state_flags { CS_SERIALIZE = 8, CS_ORDERED = CS_WAIT_COMPLETE + CS_SERIALIZE, CS_NO_CSTATE_CHG = 16, /* Do not display changes in cstate. Internal to drbd_state.c */ + CS_LOCAL_ONLY = 32, /* Do not consider a device pair wide state change */ }; extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev, From df24aa45f4df43e8881c0f80d6a4e2653df7af05 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 15 Feb 2011 11:14:44 +0100 Subject: [PATCH 0117/5831] drbd: Implemented connection wide state changes That is used for graceful disconnect only Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 28 +++++++-------- drivers/block/drbd/drbd_state.c | 62 +++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 14 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 3d8e63190dcd..d6832f8d49a5 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1572,6 +1572,7 @@ fail: static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { + struct drbd_tconn *tconn = mdev->tconn; int retcode; struct disconnect dc; @@ -1582,30 +1583,29 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl } if (dc.force) { - spin_lock_irq(&mdev->tconn->req_lock); - if (mdev->state.conn >= C_WF_CONNECTION) - _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&tconn->req_lock); + if (tconn->cstate >= C_WF_CONNECTION) + _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); + spin_unlock_irq(&tconn->req_lock); goto done; } - retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED); + retcode = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0); if (retcode == SS_NOTHING_TO_DO) goto done; else if (retcode == SS_ALREADY_STANDALONE) goto done; else if (retcode == SS_PRIMARY_NOP) { - /* Our statche checking code wants to see the peer outdated. */ - retcode = drbd_request_state(mdev, NS2(conn, C_DISCONNECTING, - pdsk, D_OUTDATED)); + /* Our state checking code wants to see the peer outdated. */ + retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, + pdsk, D_OUTDATED), CS_VERBOSE); } else if (retcode == SS_CW_FAILED_BY_PEER) { /* The peer probably wants to see us outdated. */ - retcode = _drbd_request_state(mdev, NS2(conn, C_DISCONNECTING, - disk, D_OUTDATED), - CS_ORDERED); + retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING, + disk, D_OUTDATED), 0); if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) { - drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD); retcode = SS_SUCCESS; } } @@ -1613,8 +1613,8 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl if (retcode < SS_SUCCESS) goto fail; - if (wait_event_interruptible(mdev->state_wait, - mdev->state.conn != C_DISCONNECTING)) { + if (wait_event_interruptible(tconn->ping_wait, + tconn->cstate != C_DISCONNECTING)) { /* Do not test for mdev->state.conn == C_STANDALONE, since someone else might connect us in the mean time! */ retcode = ERR_INTR; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 8c49ca8dea3c..d3bf8e39fa56 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1366,6 +1366,61 @@ static int _set_state_itr_fn(int vnr, void *p, void *data) return 0; } +static enum drbd_state_rv +_conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val) +{ + struct _is_valid_itr_params params; + enum drbd_state_rv rv; + + if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags)) + return SS_CW_SUCCESS; + + if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags)) + return SS_CW_FAILED_BY_PEER; + + params.flags = CS_NO_CSTATE_CHG; /* öö think */ + params.mask = mask; + params.val = val; + + spin_lock_irq(&tconn->req_lock); + rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR; + + if (rv == SS_UNKNOWN_ERROR) + rv = idr_for_each(&tconn->volumes, _is_valid_itr_fn, ¶ms); + + if (rv == 0) /* idr_for_each semantics */ + rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ + + spin_unlock_irq(&tconn->req_lock); + + return rv; +} + +static enum drbd_state_rv +conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, + enum chg_state_flags f) +{ + enum drbd_state_rv rv; + + spin_unlock_irq(&tconn->req_lock); + mutex_lock(&tconn->cstate_mutex); + + if (!conn_send_state_req(tconn, mask, val)) { + rv = SS_CW_FAILED_BY_PEER; + /* if (f & CS_VERBOSE) + print_st_err(mdev, os, ns, rv); */ + goto abort; + } + + wait_event(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val))); + +abort: + mutex_unlock(&tconn->cstate_mutex); + spin_lock_irq(&tconn->req_lock); + + return rv; +} + enum drbd_state_rv _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val, enum chg_state_flags flags) @@ -1393,6 +1448,13 @@ _conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_ if (rv < SS_SUCCESS) goto abort; + if (oc == C_WF_REPORT_PARAMS && val.conn == C_DISCONNECTING && + !(flags & (CS_LOCAL_ONLY | CS_HARD))) { + rv = conn_cl_wide(tconn, mask, val, flags); + if (rv < SS_SUCCESS) + goto abort; + } + if (params.oc_state == OC_CONSISTENT) { oc = params.oc; print_conn_state_change(tconn, oc, val.conn); From fbe29dec98622369c106ba72279500fb2f5aba99 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Feb 2011 16:38:35 +0100 Subject: [PATCH 0118/5831] drbd: Rename drbd_submit_ee -> drbd_submit_peer_request Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 5 +++-- drivers/block/drbd/drbd_receiver.c | 13 +++++++------ drivers/block/drbd/drbd_worker.c | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b287bad4767f..93c4db3ac67e 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1532,8 +1532,9 @@ extern void start_resync_timer_fn(unsigned long data); /* drbd_receiver.c */ extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector); -extern int drbd_submit_ee(struct drbd_conf *, struct drbd_peer_request *, - const unsigned, const int); +extern int drbd_submit_peer_request(struct drbd_conf *, + struct drbd_peer_request *, const unsigned, + const int); extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); extern struct drbd_peer_request *drbd_alloc_ee(struct drbd_conf *, u64, sector_t, unsigned int, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c85d290beed3..6b00650d2805 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1092,7 +1092,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) } /** - * drbd_submit_ee() + * drbd_submit_peer_request() * @mdev: DRBD device. * @peer_req: peer request * @rw: flag field, see bio->bi_rw @@ -1108,8 +1108,9 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) * on certain Xen deployments. */ /* TODO allocate from our own bio_set. */ -int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req, - const unsigned rw, const int fault_type) +int drbd_submit_peer_request(struct drbd_conf *mdev, + struct drbd_peer_request *peer_req, + const unsigned rw, const int fault_type) { struct bio *bios = NULL; struct bio *bio; @@ -1496,7 +1497,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si spin_unlock_irq(&mdev->tconn->req_lock); atomic_add(data_size >> 9, &mdev->rs_sect_ev); - if (drbd_submit_ee(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) + if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) return true; /* don't care for the reason here */ @@ -1936,7 +1937,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, drbd_al_begin_io(mdev, peer_req->i.sector); } - if (drbd_submit_ee(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0) + if (drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0) return true; /* don't care for the reason here */ @@ -2193,7 +2194,7 @@ submit: list_add_tail(&peer_req->w.list, &mdev->read_ee); spin_unlock_irq(&mdev->tconn->req_lock); - if (drbd_submit_ee(mdev, peer_req, READ, fault_type) == 0) + if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0) return true; /* don't care for the reason here */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 9a9a00eabe0b..2da2d23344f9 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -365,7 +365,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) spin_unlock_irq(&mdev->tconn->req_lock); atomic_add(size >> 9, &mdev->rs_sect_ev); - if (drbd_submit_ee(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0) + if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0) return 0; /* If it failed because of ENOMEM, retry should help. If it failed From fcefa62e4c26e70c70b9e8252a4bc9b9031a4182 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Feb 2011 16:46:59 +0100 Subject: [PATCH 0119/5831] drbd: Rename drbd_endio_{pri,sec} -> drbd_{,peer_}request_endio Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 +- drivers/block/drbd/drbd_receiver.c | 13 +++++++------ drivers/block/drbd/drbd_req.c | 4 ++-- drivers/block/drbd/drbd_req.h | 2 +- drivers/block/drbd/drbd_worker.c | 8 ++++---- drivers/block/drbd/drbd_wrappers.h | 4 ++-- 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 93c4db3ac67e..93eb3a7ac711 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -661,7 +661,7 @@ struct drbd_request { /* if local IO is not allowed, will be NULL. * if local IO _is_ allowed, holds the locally submitted bio clone, * or, after local IO completion, the ERR_PTR(error). - * see drbd_endio_pri(). */ + * see drbd_request_endio(). */ struct bio *private_bio; struct drbd_interval i; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6b00650d2805..1547c5106ab7 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1136,7 +1136,7 @@ next_bio: bio->bi_bdev = mdev->ldev->backing_bdev; bio->bi_rw = rw; bio->bi_private = peer_req; - bio->bi_end_io = drbd_endio_sec; + bio->bi_end_io = drbd_peer_request_endio; bio->bi_next = bios; bios = bio; @@ -1572,7 +1572,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, if (get_ldev(mdev)) { /* data is submitted to disk within recv_resync_read. * corresponding put_ldev done below on error, - * or in drbd_endio_sec. */ + * or in drbd_peer_request_endio. */ ok = recv_resync_read(mdev, sector, data_size); } else { if (__ratelimit(&drbd_ratelimit_state)) @@ -1760,10 +1760,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, return drbd_drain_block(mdev, data_size); } - /* get_ldev(mdev) successful. - * Corresponding put_ldev done either below (on various errors), - * or in drbd_endio_sec, if we successfully submit the data at - * the end of this function. */ + /* + * Corresponding put_ldev done either below (on various errors), or in + * drbd_peer_request_endio, if we successfully submit the data at the + * end of this function. + */ sector = be64_to_cpu(p->sector); peer_req = read_in_block(mdev, p->block_id, sector, data_size); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 45a543e5c6a9..18eb3d17f174 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -116,7 +116,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const drbd_set_in_sync(mdev, req->i.sector, req->i.size); /* one might be tempted to move the drbd_al_complete_io - * to the local io completion callback drbd_endio_pri. + * to the local io completion callback drbd_request_endio. * but, if this was a mirror write, we may only * drbd_al_complete_io after this is RQ_NET_DONE, * otherwise the extent could be dropped from the al @@ -252,7 +252,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) * what we need to do here is just: complete the master_bio. * * local completion error, if any, has been stored as ERR_PTR - * in private_bio within drbd_endio_pri. + * in private_bio within drbd_request_endio. */ int ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); int error = PTR_ERR(req->private_bio); diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index e6232ce5a1ca..e6f2361d6b19 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -230,7 +230,7 @@ static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bi req->private_bio = bio; bio->bi_private = req; - bio->bi_end_io = drbd_endio_pri; + bio->bi_end_io = drbd_request_endio; bio->bi_next = NULL; } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 2da2d23344f9..01ab0bc0cd9e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -45,8 +45,8 @@ static int w_make_resync_request(struct drbd_work *w, int cancel); /* endio handlers: * drbd_md_io_complete (defined here) - * drbd_endio_pri (defined here) - * drbd_endio_sec (defined here) + * drbd_request_endio (defined here) + * drbd_peer_request_endio (defined here) * bm_async_io_complete (defined in drbd_bitmap.c) * * For all these callbacks, note the following: @@ -151,7 +151,7 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel /* writes on behalf of the partner, or resync writes, * "submitted" by the receiver. */ -void drbd_endio_sec(struct bio *bio, int error) +void drbd_peer_request_endio(struct bio *bio, int error) { struct drbd_peer_request *peer_req = bio->bi_private; struct drbd_conf *mdev = peer_req->w.mdev; @@ -187,7 +187,7 @@ void drbd_endio_sec(struct bio *bio, int error) /* read, readA or write requests on R_PRIMARY coming from drbd_make_request */ -void drbd_endio_pri(struct bio *bio, int error) +void drbd_request_endio(struct bio *bio, int error) { unsigned long flags; struct drbd_request *req = bio->bi_private; diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index 151f1a37478f..decf9b282e8b 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h @@ -20,8 +20,8 @@ static inline void drbd_set_my_capacity(struct drbd_conf *mdev, /* bi_end_io handlers */ extern void drbd_md_io_complete(struct bio *bio, int error); -extern void drbd_endio_sec(struct bio *bio, int error); -extern void drbd_endio_pri(struct bio *bio, int error); +extern void drbd_peer_request_endio(struct bio *bio, int error); +extern void drbd_request_endio(struct bio *bio, int error); /* * used to submit our private bio From d0e22a260c5142171c730436664febb045b9f0f0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Feb 2011 18:11:24 +0100 Subject: [PATCH 0120/5831] drbd: Iterate over all overlapping intervals in a tree Add a macro and helper function for doing that. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_interval.c | 18 ++++++++++++++++++ drivers/block/drbd/drbd_interval.h | 16 ++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index 14dbe2dd2d33..0e53f102e68a 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -157,3 +157,21 @@ drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size) } return overlap; } + +struct drbd_interval * +drbd_next_overlap(struct drbd_interval *i, sector_t sector, unsigned int size) +{ + sector_t end = sector + (size >> 9); + struct rb_node *node; + + for (;;) { + node = rb_next(&i->rb); + if (!node) + return NULL; + i = rb_entry(node, struct drbd_interval, rb); + if (i->sector >= end) + return NULL; + if (sector < i->sector + (i->size >> 9)) + return i; + } +} diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index 4010ad923948..f38fcb00c10d 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -23,10 +23,18 @@ static inline bool drbd_interval_empty(struct drbd_interval *i) return RB_EMPTY_NODE(&i->rb); } -bool drbd_insert_interval(struct rb_root *, struct drbd_interval *); -bool drbd_contains_interval(struct rb_root *, sector_t, struct drbd_interval *); -void drbd_remove_interval(struct rb_root *, struct drbd_interval *); -struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t, +extern bool drbd_insert_interval(struct rb_root *, struct drbd_interval *); +extern bool drbd_contains_interval(struct rb_root *, sector_t, + struct drbd_interval *); +extern void drbd_remove_interval(struct rb_root *, struct drbd_interval *); +extern struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t, + unsigned int); +extern struct drbd_interval *drbd_next_overlap(struct drbd_interval *, sector_t, unsigned int); +#define drbd_for_each_overlap(i, root, sector, size) \ + for (i = drbd_find_overlap(root, sector, size); \ + i; \ + i = drbd_next_overlap(i, sector, size)) + #endif /* __DRBD_INTERVAL_H */ From 8ca9844f105acf6981751e39c1ac1a240afe5a2b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 21 Feb 2011 12:34:58 +0100 Subject: [PATCH 0121/5831] drbd: Remove obsolete comment Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 18eb3d17f174..157c73743408 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -667,10 +667,9 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s return 0; if (mdev->state.disk < D_INCONSISTENT) return 0; - /* state.disk == D_INCONSISTENT We will have a look at the BitMap */ - nr_sectors = drbd_get_capacity(mdev->this_bdev); esector = sector + (size >> 9) - 1; + nr_sectors = drbd_get_capacity(mdev->this_bdev); D_ASSERT(sector < nr_sectors); D_ASSERT(esector < nr_sectors); From c670a398676499913ce72c26a66d204bcbdbc2e9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 21 Feb 2011 12:41:39 +0100 Subject: [PATCH 0122/5831] drbd: Use the IS_ALIGNED() macro in some more places Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 6 +++--- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_req.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 0748871d6b17..ad618637172b 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -767,7 +767,7 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size, int wake_up = 0; unsigned long flags; - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { + if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); return; @@ -832,7 +832,7 @@ int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size, unsigned int enr, count = 0; struct lc_element *e; - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { + if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "sector: %llus, size: %d\n", (unsigned long long)sector, size); return 0; @@ -1217,7 +1217,7 @@ void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size) sector_t esector, nr_sectors; int wake_up = 0; - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { + if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", (unsigned long long)sector, size); return; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 1547c5106ab7..7540b3428329 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2038,7 +2038,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd, sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); - if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) { + if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__, (unsigned long long)sector, size); return false; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 157c73743408..48d313dcae7d 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1023,7 +1023,7 @@ int drbd_make_request(struct request_queue *q, struct bio *bio) * what we "blindly" assume: */ D_ASSERT(bio->bi_size > 0); - D_ASSERT((bio->bi_size & 0x1ff) == 0); + D_ASSERT(IS_ALIGNED(bio->bi_size, 512)); D_ASSERT(bio->bi_idx == 0); /* to make some things easier, force alignment of requests within the From 8c387def58351f571cfcad93a3b57dff415b40c0 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 18 Feb 2011 14:13:07 +0100 Subject: [PATCH 0123/5831] drbd: simplify condition in drbd_may_do_local_read() fold if (x >= (N+1)) return 0; if (x < N) return 0; into if (x != N) return 0; Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 48d313dcae7d..733219884ab7 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -663,9 +663,7 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s if (mdev->state.disk == D_UP_TO_DATE) return 1; - if (mdev->state.disk >= D_OUTDATED) - return 0; - if (mdev->state.disk < D_INCONSISTENT) + if (mdev->state.disk != D_INCONSISTENT) return 0; esector = sector + (size >> 9) - 1; From 867f57483b1759f8cd76ec31ff1f37abde5ad577 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:53 +0100 Subject: [PATCH 0124/5831] drbd: fix typo in comment Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index ad618637172b..1ce3de6eed1b 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -546,7 +546,7 @@ cancel: } /** - * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents + * drbd_al_apply_to_bm() - Sets the bitmap to dirty(1) where covered by active AL extents * @mdev: DRBD device. */ void drbd_al_apply_to_bm(struct drbd_conf *mdev) From 61610420f764acb835af4a450251dbab2ab6d621 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:54 +0100 Subject: [PATCH 0125/5831] drbd: in drbd_suspend_al, set AL_SUSPENDED before unlocking the activity log As using an empty activity log is the whole point of the excercise, make sure it is still empty when setting AL_SUSPENDED. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index d6832f8d49a5..ae8f42e38e4f 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -901,19 +901,17 @@ static void drbd_suspend_al(struct drbd_conf *mdev) { int s = 0; - if (lc_try_lock(mdev->act_log)) { - drbd_al_shrink(mdev); - lc_unlock(mdev->act_log); - } else { + if (!lc_try_lock(mdev->act_log)) { dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n"); return; } + drbd_al_shrink(mdev); spin_lock_irq(&mdev->tconn->req_lock); if (mdev->state.conn < C_CONNECTED) s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags); - spin_unlock_irq(&mdev->tconn->req_lock); + lc_unlock(mdev->act_log); if (s) dev_info(DEV, "Suspended AL updates\n"); From 4738fa16907a933d72bbcae1b8922dc9330fde92 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:55 +0100 Subject: [PATCH 0126/5831] drbd: use clear_bit_unlock() where appropriate Some open-coded clear_bit(); smp_mb__after_clear_bit(); should in fact have been smp_mb__before_clear_bit(); clear_bit(); Instead, use clear_bit_unlock() to annotate the intention, and have it do the right thing. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 3 +-- drivers/block/drbd/drbd_main.c | 3 +-- include/linux/lru_cache.h | 3 +-- lib/lru_cache.c | 10 ++++------ 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e8d652f197c3..4be737055718 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -219,8 +219,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr) { struct drbd_bitmap *b = mdev->bitmap; void *addr = &page_private(b->bm_pages[page_nr]); - clear_bit(BM_PAGE_IO_LOCK, addr); - smp_mb__after_clear_bit(); + clear_bit_unlock(BM_PAGE_IO_LOCK, addr); wake_up(&mdev->bitmap->bm_io_wait); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 592f0c949fd0..c77e51a40926 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2818,8 +2818,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused) put_ldev(mdev); } - clear_bit(BITMAP_IO, &mdev->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(BITMAP_IO, &mdev->flags); wake_up(&mdev->misc_wait); if (work->done) diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 7a71ffad037c..4cceafb0732d 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -275,8 +275,7 @@ static inline int lc_try_lock(struct lru_cache *lc) */ static inline void lc_unlock(struct lru_cache *lc) { - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_DIRTY, &lc->flags); } static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index a07e7268d7ed..9f353f7f41ca 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -44,8 +44,8 @@ MODULE_LICENSE("GPL"); } while (0) #define RETURN(x...) do { \ - clear_bit(__LC_PARANOIA, &lc->flags); \ - smp_mb__after_clear_bit(); return x ; } while (0) + clear_bit_unlock(__LC_PARANOIA, &lc->flags); \ + return x ; } while (0) /* BUG() if e is not one of the elements tracked by lc */ #define PARANOIA_LC_ELEMENT(lc, e) do { \ @@ -438,8 +438,7 @@ void lc_changed(struct lru_cache *lc, struct lc_element *e) hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); lc->changing_element = NULL; lc->new_number = LC_FREE; - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_DIRTY, &lc->flags); RETURN(); } @@ -463,8 +462,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) /* move it to the front of LRU. */ list_move(&e->list, &lc->lru); lc->used--; - clear_bit(__LC_STARVING, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_STARVING, &lc->flags); } RETURN(e->refcnt); } From 0097f0405d365eff66235f887d47fa0b62b28599 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:57 +0100 Subject: [PATCH 0127/5831] lru_cache.h: fix comments referring to ts_ instead of lc_ For some time we contemplated calling the "struct lru_cache" a "struct tracked_set", and some comments kept the ts_ prefix. Fix those to match the member field names. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- lib/lru_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 9f353f7f41ca..4f638b86674f 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -378,7 +378,7 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) /* it was not present in the active set. * we are going to recycle an unused (or even "free") element. * user may need to commit a transaction to record that change. - * we serialize on flags & TF_DIRTY */ + * we serialize on flags & LC_DIRTY */ if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { ++lc->dirty; RETURN(NULL); From a9efc748d679efb39fe7a8a536dde94cee691604 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:58 +0100 Subject: [PATCH 0128/5831] lru_cache: consolidate lc_get and lc_try_get Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- lib/lru_cache.c | 136 ++++++++++++++++++++++++------------------------ 1 file changed, 69 insertions(+), 67 deletions(-) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 4f638b86674f..17621684758a 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -308,6 +308,58 @@ static int lc_unused_element_available(struct lru_cache *lc) return 0; } +static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change) +{ + struct lc_element *e; + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + RETURN(e); + } + + ++lc->misses; + if (!may_change) + RETURN(NULL); + + /* In case there is nothing available and we can not kick out + * the LRU element, we have to wait ... + */ + if (!lc_unused_element_available(lc)) { + __set_bit(__LC_STARVING, &lc->flags); + RETURN(NULL); + } + + /* it was not present in the active set. + * we are going to recycle an unused (or even "free") element. + * user may need to commit a transaction to record that change. + * we serialize on flags & LC_DIRTY */ + if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { + ++lc->dirty; + RETURN(NULL); + } + + e = lc_get_unused_element(lc); + BUG_ON(!e); + + clear_bit(__LC_STARVING, &lc->flags); + BUG_ON(++e->refcnt != 1); + lc->used++; + + lc->changing_element = e; + lc->new_number = enr; + + RETURN(e); +} /** * lc_get - get element by label, maybe change the active set @@ -348,78 +400,28 @@ static int lc_unused_element_available(struct lru_cache *lc) */ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) { - struct lc_element *e; - - PARANOIA_ENTRY(); - if (lc->flags & LC_STARVING) { - ++lc->starving; - RETURN(NULL); - } - - e = lc_find(lc, enr); - if (e) { - ++lc->hits; - if (e->refcnt++ == 0) - lc->used++; - list_move(&e->list, &lc->in_use); /* Not evictable... */ - RETURN(e); - } - - ++lc->misses; - - /* In case there is nothing available and we can not kick out - * the LRU element, we have to wait ... - */ - if (!lc_unused_element_available(lc)) { - __set_bit(__LC_STARVING, &lc->flags); - RETURN(NULL); - } - - /* it was not present in the active set. - * we are going to recycle an unused (or even "free") element. - * user may need to commit a transaction to record that change. - * we serialize on flags & LC_DIRTY */ - if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { - ++lc->dirty; - RETURN(NULL); - } - - e = lc_get_unused_element(lc); - BUG_ON(!e); - - clear_bit(__LC_STARVING, &lc->flags); - BUG_ON(++e->refcnt != 1); - lc->used++; - - lc->changing_element = e; - lc->new_number = enr; - - RETURN(e); + return __lc_get(lc, enr, 1); } -/* similar to lc_get, - * but only gets a new reference on an existing element. - * you either get the requested element, or NULL. - * will be consolidated into one function. +/** + * lc_try_get - get element by label, if present; do not change the active set + * @lc: the lru cache to operate on + * @enr: the label to look up + * + * Finds an element in the cache, increases its usage count, + * "touches" and returns it. + * + * Return values: + * NULL + * The cache was marked %LC_STARVING, + * or the requested label was not in the active set + * + * pointer to the element with the REQUESTED element number. + * In this case, it can be used right away */ struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) { - struct lc_element *e; - - PARANOIA_ENTRY(); - if (lc->flags & LC_STARVING) { - ++lc->starving; - RETURN(NULL); - } - - e = lc_find(lc, enr); - if (e) { - ++lc->hits; - if (e->refcnt++ == 0) - lc->used++; - list_move(&e->list, &lc->in_use); /* Not evictable... */ - } - RETURN(e); + return __lc_get(lc, enr, 0); } /** From 45dfffebd08c1445493bfa8f0ec05b38714b9b2d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:21:00 +0100 Subject: [PATCH 0129/5831] drbd: allow to select specific bitmap pages for writeout We are about to allow several changes to the active set in one activity log transaction. We have to write out the corresponding bitmap pages as well, if changed. Introduce drbd_bm_mark_for_writeout(), then re-use the existing bitmap writeout path to submit all marked pages in one go. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 49 ++++++++++++++++++++++++++++---- drivers/block/drbd/drbd_int.h | 13 ++++++--- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 4be737055718..bc89c4a30cbc 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -188,6 +188,9 @@ void drbd_bm_unlock(struct drbd_conf *mdev) /* to mark for lazy writeout once syncer cleared all clearable bits, * we if bits have been cleared since last IO. */ #define BM_PAGE_LAZY_WRITEOUT 28 +/* pages marked with this "HINT" will be considered for writeout + * on activity log transactions */ +#define BM_PAGE_HINT_WRITEOUT 27 /* store_page_idx uses non-atomic assignment. It is only used directly after * allocating the page. All other bm_set_page_* and bm_clear_page_* need to @@ -237,6 +240,27 @@ static void bm_set_page_need_writeout(struct page *page) set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); } +/** + * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout + * @mdev: DRBD device. + * @page_nr: the bitmap page to mark with the "hint" flag + * + * From within an activity log transaction, we mark a few pages with these + * hints, then call drbd_bm_write_hinted(), which will only write out changed + * pages which are flagged with this mark. + */ +void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr) +{ + struct page *page; + if (page_nr >= mdev->bitmap->bm_number_of_pages) { + dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n", + page_nr, (int)mdev->bitmap->bm_number_of_pages); + return; + } + page = mdev->bitmap->bm_pages[page_nr]; + set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page)); +} + static int bm_test_page_unchanged(struct page *page) { volatile const unsigned long *addr = &page_private(page); @@ -897,6 +921,7 @@ struct bm_aio_ctx { struct completion done; unsigned flags; #define BM_AIO_COPY_PAGES 1 +#define BM_AIO_WRITE_HINTED 2 int error; }; @@ -1007,13 +1032,13 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must /* * bm_rw: read/write the whole bitmap from/to its on disk location. */ -static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local) +static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) { struct bm_aio_ctx ctx = { .mdev = mdev, .in_flight = ATOMIC_INIT(1), .done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), - .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0, + .flags = flags, }; struct drbd_bitmap *b = mdev->bitmap; int num_pages, i, count = 0; @@ -1042,6 +1067,10 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) break; if (rw & WRITE) { + if ((flags & BM_AIO_WRITE_HINTED) && + !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, + &page_private(b->bm_pages[i]))) + continue; if (bm_test_page_unchanged(b->bm_pages[i])) { dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); continue; @@ -1099,7 +1128,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id */ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) { - return bm_rw(mdev, READ, 0); + return bm_rw(mdev, READ, 0, 0); } /** @@ -1110,7 +1139,7 @@ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) */ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) { - return bm_rw(mdev, WRITE, 0); + return bm_rw(mdev, WRITE, 0, 0); } /** @@ -1120,12 +1149,20 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) */ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) { - return bm_rw(mdev, WRITE, upper_idx); + return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx); } +/** + * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed. + * @mdev: DRBD device. + */ +int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); +} /** - * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap + * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap * @mdev: DRBD device. * @idx: bitmap page index * diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 93eb3a7ac711..edfdeb62c18f 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1310,11 +1310,14 @@ struct bm_extent { #define SLEEP_TIME (HZ/10) -#define BM_BLOCK_SHIFT 12 /* 4k per bit */ +/* We do bitmap IO in units of 4k blocks. + * We also still have a hardcoded 4k per bit relation. */ +#define BM_BLOCK_SHIFT 12 /* 4k per bit */ #define BM_BLOCK_SIZE (1< Date: Mon, 21 Feb 2011 13:21:01 +0100 Subject: [PATCH 0130/5831] lru_cache: allow multiple changes per transaction Allow multiple changes to the active set of elements in lru_cache. The only current user of lru_cache, drbd, is driving this generalisation. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 50 ++----- drivers/block/drbd/drbd_nl.c | 4 +- include/linux/lru_cache.h | 70 +++++---- lib/lru_cache.c | 243 +++++++++++++++++++++---------- 4 files changed, 226 insertions(+), 141 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 1ce3de6eed1b..44097c87fed7 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -175,7 +175,6 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) { struct lc_element *al_ext; struct lc_element *tmp; - unsigned long al_flags = 0; int wake; spin_lock_irq(&mdev->al_lock); @@ -190,19 +189,8 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) return NULL; } } - al_ext = lc_get(mdev->act_log, enr); - al_flags = mdev->act_log->flags; + al_ext = lc_get(mdev->act_log, enr); spin_unlock_irq(&mdev->al_lock); - - /* - if (!al_ext) { - if (al_flags & LC_STARVING) - dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n"); - if (al_flags & LC_DIRTY) - dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n"); - } - */ - return al_ext; } @@ -235,7 +223,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) mdev->al_writ_cnt++; spin_lock_irq(&mdev->al_lock); - lc_changed(mdev->act_log, al_ext); + lc_committed(mdev->act_log); spin_unlock_irq(&mdev->al_lock); wake_up(&mdev->al_wait); } @@ -601,7 +589,7 @@ void drbd_al_shrink(struct drbd_conf *mdev) struct lc_element *al_ext; int i; - D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags)); + D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags)); for (i = 0; i < mdev->act_log->nr_elements; i++) { al_ext = lc_element_by_index(mdev->act_log, i); @@ -708,7 +696,9 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, } ext->rs_left = rs_left; ext->rs_failed = success ? 0 : count; - lc_changed(mdev->resync, &ext->lce); + /* we don't keep a persistent log of the resync lru, + * we can commit any change right away. */ + lc_committed(mdev->resync); } lc_put(mdev->resync, &ext->lce); /* no race, we are within the al_lock! */ @@ -892,7 +882,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) if (bm_ext->lce.lc_number != enr) { bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); bm_ext->rs_failed = 0; - lc_changed(mdev->resync, &bm_ext->lce); + lc_committed(mdev->resync); wakeup = 1; } if (bm_ext->lce.refcnt == 1) @@ -908,7 +898,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) if (rs_flags & LC_STARVING) dev_warn(DEV, "Have to wait for element" " (resync LRU too small?)\n"); - BUG_ON(rs_flags & LC_DIRTY); + BUG_ON(rs_flags & LC_LOCKED); } return bm_ext; @@ -916,26 +906,12 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) { - struct lc_element *al_ext; - int rv = 0; + int rv; spin_lock_irq(&mdev->al_lock); - if (unlikely(enr == mdev->act_log->new_number)) - rv = 1; - else { - al_ext = lc_find(mdev->act_log, enr); - if (al_ext) { - if (al_ext->refcnt) - rv = 1; - } - } + rv = lc_is_used(mdev->act_log, enr); spin_unlock_irq(&mdev->al_lock); - /* - if (unlikely(rv)) { - dev_info(DEV, "Delaying sync read until app's write is done\n"); - } - */ return rv; } @@ -1065,13 +1041,13 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) if (rs_flags & LC_STARVING) dev_warn(DEV, "Have to wait for element" " (resync LRU too small?)\n"); - BUG_ON(rs_flags & LC_DIRTY); + BUG_ON(rs_flags & LC_LOCKED); goto try_again; } if (bm_ext->lce.lc_number != enr) { bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); bm_ext->rs_failed = 0; - lc_changed(mdev->resync, &bm_ext->lce); + lc_committed(mdev->resync); wake_up(&mdev->al_wait); D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); } @@ -1082,8 +1058,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) } check_al: for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { - if (unlikely(al_enr+i == mdev->act_log->new_number)) - goto try_again; if (lc_is_used(mdev->act_log, al_enr+i)) goto try_again; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ae8f42e38e4f..0a92f5226c2a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev) in_use = 0; t = mdev->act_log; - n = lc_create("act_log", drbd_al_ext_cache, + n = lc_create("act_log", drbd_al_ext_cache, 1, mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); if (n == NULL) { @@ -1016,7 +1016,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } resync_lru = lc_create("resync", drbd_bm_ext_cache, - 61, sizeof(struct bm_extent), + 1, 61, sizeof(struct bm_extent), offsetof(struct bm_extent, lce)); if (!resync_lru) { retcode = ERR_NOMEM; diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 4cceafb0732d..cbafae40c649 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -166,9 +166,11 @@ struct lc_element { /* if we want to track a larger set of objects, * it needs to become arch independend u64 */ unsigned lc_number; - /* special label when on free list */ #define LC_FREE (~0U) + + /* for pending changes */ + unsigned lc_new_number; }; struct lru_cache { @@ -176,6 +178,7 @@ struct lru_cache { struct list_head lru; struct list_head free; struct list_head in_use; + struct list_head to_be_changed; /* the pre-created kmem cache to allocate the objects from */ struct kmem_cache *lc_cache; @@ -186,7 +189,7 @@ struct lru_cache { size_t element_off; /* number of elements (indices) */ - unsigned int nr_elements; + unsigned int nr_elements; /* Arbitrary limit on maximum tracked objects. Practical limit is much * lower due to allocation failures, probably. For typical use cases, * nr_elements should be a few thousand at most. @@ -194,18 +197,19 @@ struct lru_cache { * 8 high bits of .lc_index to be overloaded with flags in the future. */ #define LC_MAX_ACTIVE (1<<24) + /* allow to accumulate a few (index:label) changes, + * but no more than max_pending_changes */ + unsigned int max_pending_changes; + /* number of elements currently on to_be_changed list */ + unsigned int pending_changes; + /* statistics */ - unsigned used; /* number of lelements currently on in_use list */ - unsigned long hits, misses, starving, dirty, changed; + unsigned used; /* number of elements currently on in_use list */ + unsigned long hits, misses, starving, locked, changed; /* see below: flag-bits for lru_cache */ unsigned long flags; - /* when changing the label of an index element */ - unsigned int new_number; - - /* for paranoia when changing the label of an index element */ - struct lc_element *changing_element; void *lc_private; const char *name; @@ -221,10 +225,15 @@ enum { /* debugging aid, to catch concurrent access early. * user needs to guarantee exclusive access by proper locking! */ __LC_PARANOIA, - /* if we need to change the set, but currently there is a changing - * transaction pending, we are "dirty", and must deferr further - * changing requests */ + + /* annotate that the set is "dirty", possibly accumulating further + * changes, until a transaction is finally triggered */ __LC_DIRTY, + + /* Locked, no further changes allowed. + * Also used to serialize changing transactions. */ + __LC_LOCKED, + /* if we need to change the set, but currently there is no free nor * unused element available, we are "starving", and must not give out * further references, to guarantee that eventually some refcnt will @@ -236,9 +245,11 @@ enum { }; #define LC_PARANOIA (1<<__LC_PARANOIA) #define LC_DIRTY (1<<__LC_DIRTY) +#define LC_LOCKED (1<<__LC_LOCKED) #define LC_STARVING (1<<__LC_STARVING) extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned max_pending_changes, unsigned e_count, size_t e_size, size_t e_off); extern void lc_reset(struct lru_cache *lc); extern void lc_destroy(struct lru_cache *lc); @@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); -extern void lc_changed(struct lru_cache *lc, struct lc_element *e); +extern void lc_committed(struct lru_cache *lc); struct seq_file; extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); @@ -258,16 +269,28 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char void (*detail) (struct seq_file *, struct lc_element *)); /** - * lc_try_lock - can be used to stop lc_get() from changing the tracked set + * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set + * @lc: the lru cache to operate on + * + * Allows (expects) the set to be "dirty". Note that the reference counts and + * order on the active and lru lists may still change. Used to serialize + * changing transactions. Returns true if we aquired the lock. + */ +static inline int lc_try_lock_for_transaction(struct lru_cache *lc) +{ + return !test_and_set_bit(__LC_LOCKED, &lc->flags); +} + +/** + * lc_try_lock - variant to stop lc_get() from changing the tracked set * @lc: the lru cache to operate on * * Note that the reference counts and order on the active and lru lists may - * still change. Returns true if we acquired the lock. + * still change. Only works on a "clean" set. Returns true if we aquired the + * lock, which means there are no pending changes, and any further attempt to + * change the set will not succeed until the next lc_unlock(). */ -static inline int lc_try_lock(struct lru_cache *lc) -{ - return !test_and_set_bit(__LC_DIRTY, &lc->flags); -} +extern int lc_try_lock(struct lru_cache *lc); /** * lc_unlock - unlock @lc, allow lc_get() to change the set again @@ -275,14 +298,11 @@ static inline int lc_try_lock(struct lru_cache *lc) */ static inline void lc_unlock(struct lru_cache *lc) { - clear_bit_unlock(__LC_DIRTY, &lc->flags); + clear_bit(__LC_DIRTY, &lc->flags); + clear_bit_unlock(__LC_LOCKED, &lc->flags); } -static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) -{ - struct lc_element *e = lc_find(lc, enr); - return e && e->refcnt; -} +extern bool lc_is_used(struct lru_cache *lc, unsigned int enr); #define lc_entry(ptr, type, member) \ container_of(ptr, type, member) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 17621684758a..d71d89498943 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -55,9 +55,40 @@ MODULE_LICENSE("GPL"); BUG_ON(i >= lc_->nr_elements); \ BUG_ON(lc_->lc_element[i] != e_); } while (0) + +/* We need to atomically + * - try to grab the lock (set LC_LOCKED) + * - only if there is no pending transaction + * (neither LC_DIRTY nor LC_STARVING is set) + * Because of PARANOIA_ENTRY() above abusing lc->flags as well, + * it is not sufficient to just say + * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED); + */ +int lc_try_lock(struct lru_cache *lc) +{ + unsigned long val; + do { + val = cmpxchg(&lc->flags, 0, LC_LOCKED); + } while (unlikely (val == LC_PARANOIA)); + /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ + return 0 == val; +#if 0 + /* Alternative approach, spin in case someone enters or leaves a + * PARANOIA_ENTRY()/RETURN() section. */ + unsigned long old, new, val; + do { + old = lc->flags & LC_PARANOIA; + new = old | LC_LOCKED; + val = cmpxchg(&lc->flags, old, new); + } while (unlikely (val == (old ^ LC_PARANOIA))); + return old == val; +#endif +} + /** * lc_create - prepares to track objects in an active set * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @max_pending_changes: maximum changes to accumulate until a transaction is required * @e_count: number of elements allowed to be active simultaneously * @e_size: size of the tracked objects * @e_off: offset to the &struct lc_element member in a tracked object @@ -66,6 +97,7 @@ MODULE_LICENSE("GPL"); * or NULL on (allocation) failure. */ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned max_pending_changes, unsigned e_count, size_t e_size, size_t e_off) { struct hlist_head *slot = NULL; @@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); INIT_LIST_HEAD(&lc->free); + INIT_LIST_HEAD(&lc->to_be_changed); lc->name = name; lc->element_size = e_size; lc->element_off = e_off; lc->nr_elements = e_count; - lc->new_number = LC_FREE; + lc->max_pending_changes = max_pending_changes; lc->lc_cache = cache; lc->lc_element = element; lc->lc_slot = slot; @@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, e = p + e_off; e->lc_index = i; e->lc_number = LC_FREE; + e->lc_new_number = LC_FREE; list_add(&e->list, &lc->free); element[i] = e; } @@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc) INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); INIT_LIST_HEAD(&lc->free); + INIT_LIST_HEAD(&lc->to_be_changed); lc->used = 0; lc->hits = 0; lc->misses = 0; lc->starving = 0; - lc->dirty = 0; + lc->locked = 0; lc->changed = 0; + lc->pending_changes = 0; lc->flags = 0; - lc->changing_element = NULL; - lc->new_number = LC_FREE; memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); for (i = 0; i < lc->nr_elements; i++) { @@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc) /* re-init it */ e->lc_index = i; e->lc_number = LC_FREE; + e->lc_new_number = LC_FREE; list_add(&e->list, &lc->free); } } @@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) /* NOTE: * total calls to lc_get are * (starving + hits + misses) - * misses include "dirty" count (update from an other thread in + * misses include "locked" count (update from an other thread in * progress) and "changed", when this in fact lead to an successful * update of the cache. */ return seq_printf(seq, "\t%s: used:%u/%u " - "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", + "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", lc->name, lc->used, lc->nr_elements, - lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); + lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); } static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) @@ -224,6 +259,27 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) } +static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, + bool include_changing) +{ + struct hlist_node *n; + struct lc_element *e; + + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { + /* "about to be changed" elements, pending transaction commit, + * are hashed by their "new number". "Normal" elements have + * lc_number == lc_new_number. */ + if (e->lc_new_number != enr) + continue; + if (e->lc_new_number == e->lc_number || include_changing) + return e; + break; + } + return NULL; +} + /** * lc_find - find element by label, if present in the hash table * @lc: The lru_cache object @@ -232,38 +288,28 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) * Returns the pointer to an element, if the element with the requested * "label" or element number is present in the hash table, * or NULL if not found. Does not change the refcnt. + * Ignores elements that are "about to be used", i.e. not yet in the active + * set, but still pending transaction commit. */ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) { - struct hlist_node *n; - struct lc_element *e; - - BUG_ON(!lc); - BUG_ON(!lc->nr_elements); - hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { - if (e->lc_number == enr) - return e; - } - return NULL; + return __lc_find(lc, enr, 0); } -/* returned element will be "recycled" immediately */ -static struct lc_element *lc_evict(struct lru_cache *lc) +/** + * lc_is_used - find element by label + * @lc: The lru_cache object + * @enr: element number + * + * Returns true, if the element with the requested "label" or element number is + * present in the hash table, and is used (refcnt > 0). + * Also finds elements that are not _currently_ used but only "about to be + * used", i.e. on the "to_be_changed" list, pending transaction commit. + */ +bool lc_is_used(struct lru_cache *lc, unsigned int enr) { - struct list_head *n; - struct lc_element *e; - - if (list_empty(&lc->lru)) - return NULL; - - n = lc->lru.prev; - e = list_entry(n, struct lc_element, list); - - PARANOIA_LC_ELEMENT(lc, e); - - list_del(&e->list); - hlist_del(&e->colision); - return e; + struct lc_element *e = __lc_find(lc, enr, 1); + return e && e->refcnt; } /** @@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e) PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt); - e->lc_number = LC_FREE; + e->lc_number = e->lc_new_number = LC_FREE; hlist_del_init(&e->colision); list_move(&e->list, &lc->free); RETURN(); } -static struct lc_element *lc_get_unused_element(struct lru_cache *lc) +static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number) { struct list_head *n; + struct lc_element *e; - if (list_empty(&lc->free)) - return lc_evict(lc); + if (!list_empty(&lc->free)) + n = lc->free.next; + else if (!list_empty(&lc->lru)) + n = lc->lru.prev; + else + return NULL; - n = lc->free.next; - list_del(n); - return list_entry(n, struct lc_element, list); + e = list_entry(n, struct lc_element, list); + PARANOIA_LC_ELEMENT(lc, e); + + e->lc_new_number = new_number; + if (!hlist_unhashed(&e->colision)) + __hlist_del(&e->colision); + hlist_add_head(&e->colision, lc_hash_slot(lc, new_number)); + list_move(&e->list, &lc->to_be_changed); + + return e; } static int lc_unused_element_available(struct lru_cache *lc) @@ -318,8 +376,12 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool RETURN(NULL); } - e = lc_find(lc, enr); - if (e) { + e = __lc_find(lc, enr, 1); + /* if lc_new_number != lc_number, + * this enr is currently being pulled in already, + * and will be available once the pending transaction + * has been committed. */ + if (e && e->lc_new_number == e->lc_number) { ++lc->hits; if (e->refcnt++ == 0) lc->used++; @@ -331,6 +393,24 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool if (!may_change) RETURN(NULL); + /* It has been found above, but on the "to_be_changed" list, not yet + * committed. Don't pull it in twice, wait for the transaction, then + * try again */ + if (e) + RETURN(NULL); + + /* To avoid races with lc_try_lock(), first, mark us dirty + * (using test_and_set_bit, as it implies memory barriers), ... */ + test_and_set_bit(__LC_DIRTY, &lc->flags); + + /* ... only then check if it is locked anyways. If lc_unlock clears + * the dirty bit again, that's not a problem, we will come here again. + */ + if (test_bit(__LC_LOCKED, &lc->flags)) { + ++lc->locked; + RETURN(NULL); + } + /* In case there is nothing available and we can not kick out * the LRU element, we have to wait ... */ @@ -339,24 +419,19 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool RETURN(NULL); } - /* it was not present in the active set. - * we are going to recycle an unused (or even "free") element. - * user may need to commit a transaction to record that change. - * we serialize on flags & LC_DIRTY */ - if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { - ++lc->dirty; + /* It was not present in the active set. We are going to recycle an + * unused (or even "free") element, but we won't accumulate more than + * max_pending_changes changes. */ + if (lc->pending_changes >= lc->max_pending_changes) RETURN(NULL); - } - e = lc_get_unused_element(lc); + e = lc_prepare_for_change(lc, enr); BUG_ON(!e); clear_bit(__LC_STARVING, &lc->flags); BUG_ON(++e->refcnt != 1); lc->used++; - - lc->changing_element = e; - lc->new_number = enr; + lc->pending_changes++; RETURN(e); } @@ -388,12 +463,15 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool * pointer to an UNUSED element with some different element number, * where that different number may also be %LC_FREE. * - * In this case, the cache is marked %LC_DIRTY (blocking further changes), - * and the returned element pointer is removed from the lru list and - * hash collision chains. The user now should do whatever housekeeping - * is necessary. - * Then he must call lc_changed(lc,element_pointer), to finish - * the change. + * In this case, the cache is marked %LC_DIRTY, + * so lc_try_lock() will no longer succeed. + * The returned element pointer is moved to the "to_be_changed" list, + * and registered with the new element number on the hash collision chains, + * so it is possible to pick it up from lc_is_used(). + * Up to "max_pending_changes" (see lc_create()) can be accumulated. + * The user now should do whatever housekeeping is necessary, + * typically serialize on lc_try_lock_for_transaction(), then call + * lc_committed(lc) and lc_unlock(), to finish the change. * * NOTE: The user needs to check the lc_number on EACH use, so he recognizes * any cache set change. @@ -425,22 +503,25 @@ struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) } /** - * lc_changed - tell @lc that the change has been recorded + * lc_committed - tell @lc that pending changes have been recorded * @lc: the lru cache to operate on - * @e: the element pending label change + * + * User is expected to serialize on explicit lc_try_lock_for_transaction() + * before the transaction is started, and later needs to lc_unlock() explicitly + * as well. */ -void lc_changed(struct lru_cache *lc, struct lc_element *e) +void lc_committed(struct lru_cache *lc) { + struct lc_element *e, *tmp; + PARANOIA_ENTRY(); - BUG_ON(e != lc->changing_element); - PARANOIA_LC_ELEMENT(lc, e); - ++lc->changed; - e->lc_number = lc->new_number; - list_add(&e->list, &lc->in_use); - hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); - lc->changing_element = NULL; - lc->new_number = LC_FREE; - clear_bit_unlock(__LC_DIRTY, &lc->flags); + list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) { + /* count number of changes, not number of transactions */ + ++lc->changed; + e->lc_number = e->lc_new_number; + list_move(&e->list, &lc->in_use); + } + lc->pending_changes = 0; RETURN(); } @@ -459,7 +540,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) PARANOIA_ENTRY(); PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt == 0); - BUG_ON(e == lc->changing_element); + BUG_ON(e->lc_number != e->lc_new_number); if (--e->refcnt == 0) { /* move it to the front of LRU. */ list_move(&e->list, &lc->lru); @@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) void lc_set(struct lru_cache *lc, unsigned int enr, int index) { struct lc_element *e; + struct list_head *lh; if (index < 0 || index >= lc->nr_elements) return; e = lc_element_by_index(lc, index); - e->lc_number = enr; + BUG_ON(e->lc_number != e->lc_new_number); + BUG_ON(e->refcnt != 0); + e->lc_number = e->lc_new_number = enr; hlist_del_init(&e->colision); - hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); - list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); + if (enr == LC_FREE) + lh = &lc->free; + else { + hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); + lh = &lc->lru; + } + list_move(&e->list, lh); } /** @@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get); EXPORT_SYMBOL(lc_find); EXPORT_SYMBOL(lc_get); EXPORT_SYMBOL(lc_put); -EXPORT_SYMBOL(lc_changed); +EXPORT_SYMBOL(lc_committed); EXPORT_SYMBOL(lc_element_by_index); EXPORT_SYMBOL(lc_index_of); EXPORT_SYMBOL(lc_seq_printf_stats); EXPORT_SYMBOL(lc_seq_dump_details); +EXPORT_SYMBOL(lc_try_lock); +EXPORT_SYMBOL(lc_is_used); From 7ad651b52218eea3f9280dbb353dfe0c42742d85 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:21:03 +0100 Subject: [PATCH 0131/5831] drbd: new on-disk activity log transaction format Use a new on-disk transaction format for the activity log, which allows for multiple changes to the active set per transaction. Using 4k transaction blocks, we can now get rid of the work-around code to deal with devices not supporting 512 byte logical block size. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 409 +++++++++++++++++++------------ drivers/block/drbd/drbd_int.h | 40 ++- drivers/block/drbd/drbd_main.c | 4 - drivers/block/drbd/drbd_nl.c | 42 +--- include/linux/drbd.h | 4 + include/linux/drbd_limits.h | 8 +- 6 files changed, 300 insertions(+), 207 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 44097c87fed7..ea3895de4e6d 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -24,21 +24,67 @@ */ #include +#include #include +#include +#include #include "drbd_int.h" #include "drbd_wrappers.h" -/* We maintain a trivial checksum in our on disk activity log. - * With that we can ensure correct operation even when the storage - * device might do a partial (last) sector write while losing power. - */ -struct __packed al_transaction { - u32 magic; - u32 tr_number; - struct __packed { - u32 pos; - u32 extent; } updates[1 + AL_EXTENTS_PT]; - u32 xor_sum; +/* all fields on disc in big endian */ +struct __packed al_transaction_on_disk { + /* don't we all like magic */ + __be32 magic; + + /* to identify the most recent transaction block + * in the on disk ring buffer */ + __be32 tr_number; + + /* checksum on the full 4k block, with this field set to 0. */ + __be32 crc32c; + + /* type of transaction, special transaction types like: + * purge-all, set-all-idle, set-all-active, ... to-be-defined */ + __be16 transaction_type; + + /* we currently allow only a few thousand extents, + * so 16bit will be enough for the slot number. */ + + /* how many updates in this transaction */ + __be16 n_updates; + + /* maximum slot number, "al-extents" in drbd.conf speak. + * Having this in each transaction should make reconfiguration + * of that parameter easier. */ + __be16 context_size; + + /* slot number the context starts with */ + __be16 context_start_slot_nr; + + /* Some reserved bytes. Expected usage is a 64bit counter of + * sectors-written since device creation, and other data generation tag + * supporting usage */ + __be32 __reserved[4]; + + /* --- 36 byte used --- */ + + /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes + * in one transaction, then use the remaining byte in the 4k block for + * context information. "Flexible" number of updates per transaction + * does not help, as we have to account for the case when all update + * slots are used anyways, so it would only complicate code without + * additional benefit. + */ + __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION]; + + /* but the extent number is 32bit, which at an extent size of 4 MiB + * allows to cover device sizes of up to 2**54 Byte (16 PiB) */ + __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION]; + + /* --- 420 bytes used (36 + 64*6) --- */ + + /* 4096 - 420 = 3676 = 919 * 4 */ + __be32 context[AL_CONTEXT_PER_TRANSACTION]; }; struct update_odbm_work { @@ -48,11 +94,8 @@ struct update_odbm_work { struct update_al_work { struct drbd_work w; - struct lc_element *al_ext; struct completion event; - unsigned int enr; - /* if old_enr != LC_FREE, write corresponding bitmap sector, too */ - unsigned int old_enr; + int err; }; struct drbd_atodb_wait { @@ -107,67 +150,30 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw) { - int logical_block_size, mask, ok; - int offset = 0; + int ok; struct page *iop = mdev->md_io_page; D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); BUG_ON(!bdev->md_bdev); - logical_block_size = bdev_logical_block_size(bdev->md_bdev); - if (logical_block_size == 0) - logical_block_size = MD_SECTOR_SIZE; - - /* in case logical_block_size != 512 [ s390 only? ] */ - if (logical_block_size != MD_SECTOR_SIZE) { - mask = (logical_block_size / MD_SECTOR_SIZE) - 1; - D_ASSERT(mask == 1 || mask == 3 || mask == 7); - D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE); - offset = sector & mask; - sector = sector & ~mask; - iop = mdev->md_io_tmpp; - - if (rw & WRITE) { - /* these are GFP_KERNEL pages, pre-allocated - * on device initialization */ - void *p = page_address(mdev->md_io_page); - void *hp = page_address(mdev->md_io_tmpp); - - ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, - READ, logical_block_size); - - if (unlikely(!ok)) { - dev_err(DEV, "drbd_md_sync_page_io(,%llus," - "READ [logical_block_size!=512]) failed!\n", - (unsigned long long)sector); - return 0; - } - - memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE); - } - } + dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n", + current->comm, current->pid, __func__, + (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); if (sector < drbd_md_first_sector(bdev) || - sector > drbd_md_last_sector(bdev)) + sector + 7 > drbd_md_last_sector(bdev)) dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n", current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); - ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size); + ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE); if (unlikely(!ok)) { dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); return 0; } - if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) { - void *p = page_address(mdev->md_io_page); - void *hp = page_address(mdev->md_io_tmpp); - - memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE); - } - return ok; } @@ -211,20 +217,34 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) * current->bio_tail list now. * we have to delegate updates to the activity log * to the worker thread. */ - init_completion(&al_work.event); - al_work.al_ext = al_ext; - al_work.enr = enr; - al_work.old_enr = al_ext->lc_number; - al_work.w.cb = w_al_write_transaction; - al_work.w.mdev = mdev; - drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); - wait_for_completion(&al_work.event); - mdev->al_writ_cnt++; + /* Serialize multiple transactions. + * This uses test_and_set_bit, memory barrier is implicit. + * Optimization potential: + * first check for transaction number > old transaction number, + * so not all waiters have to lock/unlock. */ + wait_event(mdev->al_wait, lc_try_lock_for_transaction(mdev->act_log)); - spin_lock_irq(&mdev->al_lock); - lc_committed(mdev->act_log); - spin_unlock_irq(&mdev->al_lock); + /* Double check: it may have been committed by someone else, + * while we have been waiting for the lock. */ + if (al_ext->lc_number != enr) { + init_completion(&al_work.event); + al_work.w.cb = w_al_write_transaction; + al_work.w.mdev = mdev; + drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); + wait_for_completion(&al_work.event); + + mdev->al_writ_cnt++; + + spin_lock_irq(&mdev->al_lock); + /* FIXME + if (al_work.err) + we need an "lc_cancel" here; + */ + lc_committed(mdev->act_log); + spin_unlock_irq(&mdev->al_lock); + } + lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); } } @@ -283,95 +303,118 @@ w_al_write_transaction(struct drbd_work *w, int unused) { struct update_al_work *aw = container_of(w, struct update_al_work, w); struct drbd_conf *mdev = w->mdev; - struct lc_element *updated = aw->al_ext; - const unsigned int new_enr = aw->enr; - const unsigned int evicted = aw->old_enr; - struct al_transaction *buffer; + struct al_transaction_on_disk *buffer; + struct lc_element *e; sector_t sector; - int i, n, mx; - unsigned int extent_nr; - u32 xor_sum = 0; + int i, mx; + unsigned extent_nr; + unsigned crc = 0; if (!get_ldev(mdev)) { - dev_err(DEV, - "disk is %s, cannot start al transaction (-%d +%d)\n", - drbd_disk_str(mdev->state.disk), evicted, new_enr); + dev_err(DEV, "disk is %s, cannot start al transaction\n", + drbd_disk_str(mdev->state.disk)); + aw->err = -EIO; complete(&((struct update_al_work *)w)->event); return 1; } - /* do we have to do a bitmap write, first? - * TODO reduce maximum latency: - * submit both bios, then wait for both, - * instead of doing two synchronous sector writes. - * For now, we must not write the transaction, - * if we cannot write out the bitmap of the evicted extent. */ - if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) - drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted)); /* The bitmap write may have failed, causing a state change. */ if (mdev->state.disk < D_INCONSISTENT) { dev_err(DEV, - "disk is %s, cannot write al transaction (-%d +%d)\n", - drbd_disk_str(mdev->state.disk), evicted, new_enr); + "disk is %s, cannot write al transaction\n", + drbd_disk_str(mdev->state.disk)); + aw->err = -EIO; complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; } mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ - buffer = (struct al_transaction *)page_address(mdev->md_io_page); + buffer = page_address(mdev->md_io_page); - buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); + memset(buffer, 0, sizeof(*buffer)); + buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); buffer->tr_number = cpu_to_be32(mdev->al_tr_number); - n = lc_index_of(mdev->act_log, updated); + i = 0; - buffer->updates[0].pos = cpu_to_be32(n); - buffer->updates[0].extent = cpu_to_be32(new_enr); + /* Even though no one can start to change this list + * once we set the LC_LOCKED -- from drbd_al_begin_io(), + * lc_try_lock_for_transaction() --, someone may still + * be in the process of changing it. */ + spin_lock_irq(&mdev->al_lock); + list_for_each_entry(e, &mdev->act_log->to_be_changed, list) { + if (i == AL_UPDATES_PER_TRANSACTION) { + i++; + break; + } + buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); + buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); + if (e->lc_number != LC_FREE) + drbd_bm_mark_for_writeout(mdev, + al_extent_to_bm_page(e->lc_number)); + i++; + } + spin_unlock_irq(&mdev->al_lock); + BUG_ON(i > AL_UPDATES_PER_TRANSACTION); - xor_sum ^= new_enr; + buffer->n_updates = cpu_to_be16(i); + for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { + buffer->update_slot_nr[i] = cpu_to_be16(-1); + buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); + } - mx = min_t(int, AL_EXTENTS_PT, + buffer->context_size = cpu_to_be16(mdev->act_log->nr_elements); + buffer->context_start_slot_nr = cpu_to_be16(mdev->al_tr_cycle); + + mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, mdev->act_log->nr_elements - mdev->al_tr_cycle); for (i = 0; i < mx; i++) { unsigned idx = mdev->al_tr_cycle + i; extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number; - buffer->updates[i+1].pos = cpu_to_be32(idx); - buffer->updates[i+1].extent = cpu_to_be32(extent_nr); - xor_sum ^= extent_nr; + buffer->context[i] = cpu_to_be32(extent_nr); } - for (; i < AL_EXTENTS_PT; i++) { - buffer->updates[i+1].pos = __constant_cpu_to_be32(-1); - buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE); - xor_sum ^= LC_FREE; - } - mdev->al_tr_cycle += AL_EXTENTS_PT; + for (; i < AL_CONTEXT_PER_TRANSACTION; i++) + buffer->context[i] = cpu_to_be32(LC_FREE); + + mdev->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) mdev->al_tr_cycle = 0; - buffer->xor_sum = cpu_to_be32(xor_sum); - sector = mdev->ldev->md.md_offset - + mdev->ldev->md.al_offset + mdev->al_tr_pos; + + mdev->ldev->md.al_offset + + mdev->al_tr_pos * (MD_BLOCK_SIZE>>9); - if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) + crc = crc32c(0, buffer, 4096); + buffer->crc32c = cpu_to_be32(crc); + + if (drbd_bm_write_hinted(mdev)) + aw->err = -EIO; + /* drbd_chk_io_error done already */ + else if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { + aw->err = -EIO; drbd_chk_io_error(mdev, 1, true); - - if (++mdev->al_tr_pos > - div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) - mdev->al_tr_pos = 0; - - D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); - mdev->al_tr_number++; + } else { + /* advance ringbuffer position and transaction counter */ + mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); + mdev->al_tr_number++; + } mutex_unlock(&mdev->md_io_mutex); - complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; } +/* FIXME + * reading of the activity log, + * and potentially dirtying of the affected bitmap regions, + * should be done from userland only. + * DRBD would simply always attach with an empty activity log, + * and refuse to attach to something that looks like a crashed primary. + */ + /** * drbd_al_read_tr() - Read a single transaction from the on disk activity log * @mdev: DRBD device. @@ -383,27 +426,39 @@ w_al_write_transaction(struct drbd_work *w, int unused) */ static int drbd_al_read_tr(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, - struct al_transaction *b, int index) { + struct al_transaction_on_disk *b = page_address(mdev->md_io_page); sector_t sector; - int rv, i; - u32 xor_sum = 0; + u32 crc; - sector = bdev->md.md_offset + bdev->md.al_offset + index; + sector = bdev->md.md_offset + + bdev->md.al_offset + + index * (MD_BLOCK_SIZE>>9); /* Dont process error normally, * as this is done before disk is attached! */ if (!drbd_md_sync_page_io(mdev, bdev, sector, READ)) return -1; - rv = (b->magic == cpu_to_be32(DRBD_MAGIC)); + if (!expect(b->magic == cpu_to_be32(DRBD_AL_MAGIC))) + return 0; - for (i = 0; i < AL_EXTENTS_PT + 1; i++) - xor_sum ^= be32_to_cpu(b->updates[i].extent); - rv &= (xor_sum == be32_to_cpu(b->xor_sum)); + if (!expect(be16_to_cpu(b->n_updates) <= AL_UPDATES_PER_TRANSACTION)) + return 0; - return rv; + if (!expect(be16_to_cpu(b->context_size) <= DRBD_AL_EXTENTS_MAX)) + return 0; + + if (!expect(be16_to_cpu(b->context_start_slot_nr) < DRBD_AL_EXTENTS_MAX)) + return 0; + + crc = be32_to_cpu(b->crc32c); + b->crc32c = 0; + if (!expect(crc == crc32c(0, b, 4096))) + return 0; + + return 1; } /** @@ -415,7 +470,7 @@ static int drbd_al_read_tr(struct drbd_conf *mdev, */ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { - struct al_transaction *buffer; + struct al_transaction_on_disk *b; int i; int rv; int mx; @@ -428,25 +483,36 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) u32 to_tnr = 0; u32 cnr; - mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT); + /* Note that this is expected to be called with a newly created, + * clean and all unused activity log of the "expected size". + */ /* lock out all other meta data io for now, * and make sure the page is mapped. */ mutex_lock(&mdev->md_io_mutex); - buffer = page_address(mdev->md_io_page); + b = page_address(mdev->md_io_page); + + /* Always use the full ringbuffer space for now. + * possible optimization: read in all of it, + * then scan the in-memory pages. */ + + mx = (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* Find the valid transaction in the log */ - for (i = 0; i <= mx; i++) { - rv = drbd_al_read_tr(mdev, bdev, buffer, i); + for (i = 0; i < mx; i++) { + rv = drbd_al_read_tr(mdev, bdev, i); + /* invalid data in that block */ if (rv == 0) continue; + + /* IO error */ if (rv == -1) { mutex_unlock(&mdev->md_io_mutex); return 0; } - cnr = be32_to_cpu(buffer->tr_number); + cnr = be32_to_cpu(b->tr_number); if (++found_valid == 1) { from = i; to = i; @@ -454,8 +520,11 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) to_tnr = cnr; continue; } + + D_ASSERT(cnr != to_tnr); + D_ASSERT(cnr != from_tnr); if ((int)cnr - (int)from_tnr < 0) { - D_ASSERT(from_tnr - cnr + i - from == mx+1); + D_ASSERT(from_tnr - cnr + i - from == mx); from = i; from_tnr = cnr; } @@ -476,11 +545,10 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) * dev_info(DEV, "Reading from %d to %d.\n",from,to); */ i = from; while (1) { - int j, pos; - unsigned int extent_nr; - unsigned int trn; + struct lc_element *e; + unsigned j, n, slot, extent_nr; - rv = drbd_al_read_tr(mdev, bdev, buffer, i); + rv = drbd_al_read_tr(mdev, bdev, i); if (!expect(rv != 0)) goto cancel; if (rv == -1) { @@ -488,23 +556,55 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) return 0; } - trn = be32_to_cpu(buffer->tr_number); + /* deal with different transaction types. + * not yet implemented */ + if (!expect(b->transaction_type == 0)) + goto cancel; + /* on the fly re-create/resize activity log? + * will be a special transaction type flag. */ + if (!expect(be16_to_cpu(b->context_size) == mdev->act_log->nr_elements)) + goto cancel; + if (!expect(be16_to_cpu(b->context_start_slot_nr) < mdev->act_log->nr_elements)) + goto cancel; + + /* We are the only user of the activity log right now, + * don't actually need to take that lock. */ spin_lock_irq(&mdev->al_lock); - /* This loop runs backwards because in the cyclic - elements there might be an old version of the - updated element (in slot 0). So the element in slot 0 - can overwrite old versions. */ - for (j = AL_EXTENTS_PT; j >= 0; j--) { - pos = be32_to_cpu(buffer->updates[j].pos); - extent_nr = be32_to_cpu(buffer->updates[j].extent); + /* first, apply the context, ... */ + for (j = 0, slot = be16_to_cpu(b->context_start_slot_nr); + j < AL_CONTEXT_PER_TRANSACTION && + slot < mdev->act_log->nr_elements; j++, slot++) { + extent_nr = be32_to_cpu(b->context[j]); + e = lc_element_by_index(mdev->act_log, slot); + if (e->lc_number != extent_nr) { + if (extent_nr != LC_FREE) + active_extents++; + else + active_extents--; + } + lc_set(mdev->act_log, extent_nr, slot); + } - if (extent_nr == LC_FREE) - continue; - - lc_set(mdev->act_log, extent_nr, pos); - active_extents++; + /* ... then apply the updates, + * which override the context information. + * drbd_al_read_tr already did the rangecheck + * on n <= AL_UPDATES_PER_TRANSACTION */ + n = be16_to_cpu(b->n_updates); + for (j = 0; j < n; j++) { + slot = be16_to_cpu(b->update_slot_nr[j]); + extent_nr = be32_to_cpu(b->update_extent_nr[j]); + if (!expect(slot < mdev->act_log->nr_elements)) + break; + e = lc_element_by_index(mdev->act_log, slot); + if (e->lc_number != extent_nr) { + if (extent_nr != LC_FREE) + active_extents++; + else + active_extents--; + } + lc_set(mdev->act_log, extent_nr, slot); } spin_unlock_irq(&mdev->al_lock); @@ -514,15 +614,12 @@ cancel: if (i == to) break; i++; - if (i > mx) + if (i >= mx) i = 0; } mdev->al_tr_number = to_tnr+1; - mdev->al_tr_pos = to; - if (++mdev->al_tr_pos > - div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) - mdev->al_tr_pos = 0; + mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* ok, we are done with it */ mutex_unlock(&mdev->md_io_mutex); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index edfdeb62c18f..3213808a898a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1069,7 +1069,6 @@ struct drbd_conf { atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ wait_queue_head_t ee_wait; struct page *md_io_page; /* one page buffer for md_io */ - struct page *md_io_tmpp; /* for logical_block_size != 512 */ struct mutex md_io_mutex; /* protects the md_io_buffer */ spinlock_t al_lock; wait_queue_head_t al_wait; @@ -1259,22 +1258,39 @@ extern void drbd_ldev_destroy(struct drbd_conf *mdev); * either at the end of the backing device * or on a separate meta data device. */ -#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ /* The following numbers are sectors */ -#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ -#define MD_AL_MAX_SIZE 64 /* = 32 kb LOG ~ 3776 extents ~ 14 GB Storage */ -/* Allows up to about 3.8TB */ -#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) +/* Allows up to about 3.8TB, so if you want more, + * you need to use the "flexible" meta data format. */ +#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ +#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ +#define MD_AL_SECTORS 64 /* = 32 kB on disk activity log ring buffer */ +#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_SECTORS) -/* Since the smalles IO unit is usually 512 byte */ -#define MD_SECTOR_SHIFT 9 -#define MD_SECTOR_SIZE (1<ldev); mdev->ldev = NULL;); - if (mdev->md_io_tmpp) { - __free_page(mdev->md_io_tmpp); - mdev->md_io_tmpp = NULL; - } clear_bit(GO_DISKLESS, &mdev->flags); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0a92f5226c2a..90d731723205 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -527,7 +527,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, case DRBD_MD_INDEX_FLEX_INT: bdev->md.md_offset = drbd_md_ss__(mdev, bdev); /* al size is still fixed */ - bdev->md.al_offset = -MD_AL_MAX_SIZE; + bdev->md.al_offset = -MD_AL_SECTORS; /* we need (slightly less than) ~ this much bitmap sectors: */ md_size_sect = drbd_get_capacity(bdev->backing_bdev); md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT); @@ -751,8 +751,8 @@ static int drbd_check_al_size(struct drbd_conf *mdev) unsigned int in_use; int i; - if (!expect(mdev->sync_conf.al_extents >= 7)) - mdev->sync_conf.al_extents = 127; + if (!expect(mdev->sync_conf.al_extents >= DRBD_AL_EXTENTS_MIN)) + mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_MIN; if (mdev->act_log && mdev->act_log->nr_elements == mdev->sync_conf.al_extents) @@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev) in_use = 0; t = mdev->act_log; - n = lc_create("act_log", drbd_al_ext_cache, 1, + n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION, mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); if (n == NULL) { @@ -932,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp union drbd_state ns, os; enum drbd_state_rv rv; int cp_discovered = 0; - int logical_block_size; drbd_reconfig_start(mdev); @@ -1087,25 +1086,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_md_set_sector_offsets(mdev, nbc); - /* allocate a second IO page if logical_block_size != 512 */ - logical_block_size = bdev_logical_block_size(nbc->md_bdev); - if (logical_block_size == 0) - logical_block_size = MD_SECTOR_SIZE; - - if (logical_block_size != MD_SECTOR_SIZE) { - if (!mdev->md_io_tmpp) { - struct page *page = alloc_page(GFP_NOIO); - if (!page) - goto force_diskless_dec; - - dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n", - logical_block_size, MD_SECTOR_SIZE); - dev_warn(DEV, "Workaround engaged (has performance impact).\n"); - - mdev->md_io_tmpp = page; - } - } - if (!mdev->bitmap) { if (drbd_bm_init(mdev)) { retcode = ERR_NOMEM; @@ -1804,14 +1784,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (!expect(sc.rate >= 1)) sc.rate = 1; - if (!expect(sc.al_extents >= 7)) - sc.al_extents = 127; /* arbitrary minimum */ -#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT) - if (sc.al_extents > AL_MAX) { - dev_err(DEV, "sc.al_extents > %d\n", AL_MAX); - sc.al_extents = AL_MAX; - } -#undef AL_MAX + + /* clip to allowed range */ + if (!expect(sc.al_extents >= DRBD_AL_EXTENTS_MIN)) + sc.al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) + sc.al_extents = DRBD_AL_EXTENTS_MAX; /* to avoid spurious errors when configuring minors before configuring * the minors they depend on: if necessary, first create the minor we diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 35fc08a0a552..70a688b92c1b 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -336,6 +336,10 @@ enum drbd_timeout_flag { #define DRBD_MAGIC 0x83740267 #define DRBD_MAGIC_BIG 0x835a +/* how I came up with this magic? + * base64 decode "actlog==" ;) */ +#define DRBD_AL_MAGIC 0x69cb65a2 + /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 #define DRBD_MD_INDEX_FLEX_EXT -2 diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 447c36752385..75f05af33725 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -102,10 +102,12 @@ #define DRBD_RATE_DEF 250 /* kb/second */ /* less than 7 would hit performance unnecessarily. - * 3833 is the largest prime that still does fit - * into 64 sectors of activity log */ + * 919 slots context information per transaction, + * 32k activity log, 4k transaction size, + * one transaction in flight: + * 919 * 7 = 6433 */ #define DRBD_AL_EXTENTS_MIN 7 -#define DRBD_AL_EXTENTS_MAX 3833 +#define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 #define DRBD_AFTER_MIN -1 From c9d963a46de87bdbc14565ed692ca3114ddbf11b Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 15:10:23 +0100 Subject: [PATCH 0132/5831] drbd: silence some log messages on bitmap IO Summary log messages meant for global bitmap IO should not be printed for bitmap IO caused by activity log transactions. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index bc89c4a30cbc..3791082979e1 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1096,9 +1096,12 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w */ if (!atomic_dec_and_test(&ctx.in_flight)) wait_for_completion(&ctx.done); - dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", - rw == WRITE ? "WRITE" : "READ", - count, jiffies - now); + + /* summary for global bitmap IO */ + if (flags == 0) + dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", + rw == WRITE ? "WRITE" : "READ", + count, jiffies - now); if (ctx.error) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); @@ -1116,8 +1119,9 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w } now = b->bm_set; - dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", - ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); + if (flags == 0) + dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", + ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); return err; } From 9676c760979371701ea5a6f8adb7ce8125c22c7d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 22 Feb 2011 14:02:31 +0100 Subject: [PATCH 0133/5831] drbd: fix a wrong likely(), updated comments Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_worker.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 01ab0bc0cd9e..8ee5c4f3d1c5 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -295,6 +295,7 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * crypto_hash_final(&desc, digest); } +/* MAYBE merge common code with w_e_end_ov_req */ static int w_e_send_csum(struct drbd_work *w, int cancel) { struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); @@ -306,7 +307,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) if (unlikely(cancel)) goto out; - if (likely((peer_req->flags & EE_WAS_ERROR) != 0)) + if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0)) goto out; digest_size = crypto_hash_digestsize(mdev->csums_tfm); @@ -315,7 +316,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) sector_t sector = peer_req->i.sector; unsigned int size = peer_req->i.size; drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest); - /* Free e and pages before send. + /* Free peer_req and pages before send. * In case we block on congestion, we could otherwise run into * some distributed deadlock, if the other side blocks on * congestion as well, because our receiver blocks in @@ -1151,11 +1152,11 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) } } - /* Free e and pages before send. - * In case we block on congestion, we could otherwise run into - * some distributed deadlock, if the other side blocks on - * congestion as well, because our receiver blocks in - * drbd_pp_alloc due to pp_in_use > max_buffers. */ + /* Free peer_req and pages before send. + * In case we block on congestion, we could otherwise run into + * some distributed deadlock, if the other side blocks on + * congestion as well, because our receiver blocks in + * drbd_pp_alloc due to pp_in_use > max_buffers. */ drbd_free_ee(mdev, peer_req); if (!eq) drbd_ov_oos_found(mdev, sector, size); From 8050e6d00521795d153ea20d81712321e5b46d80 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 18 Feb 2011 16:12:48 +0100 Subject: [PATCH 0134/5831] drbd: Use container_of() instead of casting Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 7540b3428329..c08a99d57c5e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1455,7 +1455,8 @@ static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req, * drbd_process_done_ee() by asender only */ static int e_end_resync_block(struct drbd_work *w, int unused) { - struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + struct drbd_peer_request *peer_req = + container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; sector_t sector = peer_req->i.sector; int ok; @@ -1593,7 +1594,8 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, */ static int e_end_block(struct drbd_work *w, int cancel) { - struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + struct drbd_peer_request *peer_req = + container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; sector_t sector = peer_req->i.sector; int ok = 1, pcmd; @@ -1631,7 +1633,8 @@ static int e_end_block(struct drbd_work *w, int cancel) static int e_send_discard_ack(struct drbd_work *w, int unused) { - struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w; + struct drbd_peer_request *peer_req = + container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; int ok = 1; From 206d3589411fbdead67a358ce9aaa20d771724df Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 26 Feb 2011 23:19:15 +0100 Subject: [PATCH 0135/5831] drbd: Concurrent write detection fix Commit 9b1e63e changed the concurrent write detection algorithm to only insert peer requests into write_requests tree after determining that there is no conflict. With this change, new conflicting local requests could be added while the algorithm runs, but this case was not handled correctly. Instead of making the algorithm deal with this case, switch back to adding peer requests to the write_requests tree immediately: this improves fairness. When a peer request is discarded, remove that request from the write_requests Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 41 +++++++++++++++--------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c08a99d57c5e..c61bf121bd0f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1636,16 +1636,10 @@ static int e_send_discard_ack(struct drbd_work *w, int unused) struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); struct drbd_conf *mdev = w->mdev; - int ok = 1; + int ok; D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req); - - spin_lock_irq(&mdev->tconn->req_lock); - D_ASSERT(!drbd_interval_empty(&peer_req->i)); - drbd_remove_epoch_entry_interval(mdev, peer_req); - spin_unlock_irq(&mdev->tconn->req_lock); - dec_unacked(mdev); return ok; @@ -1836,6 +1830,12 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, spin_lock_irq(&mdev->tconn->req_lock); + /* + * Inserting the peer request into the write_requests tree will + * prevent new conflicting local requests from being added. + */ + drbd_insert_interval(&mdev->write_requests, &peer_req->i); + first = 1; for (;;) { struct drbd_interval *i; @@ -1844,26 +1844,26 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); - i = drbd_find_overlap(&mdev->write_requests, sector, size); - if (i) { + drbd_for_each_overlap(i, &mdev->write_requests, sector, size) { + struct drbd_request *req2; + + if (i == &peer_req->i || !i->local) + continue; + /* only ALERT on first iteration, * we may be woken up early... */ if (first) - dev_alert(DEV, "%s[%u] Concurrent %s write detected!" + dev_alert(DEV, "%s[%u] Concurrent local write detected!" " new: %llus +%u; pending: %llus +%u\n", current->comm, current->pid, - i->local ? "local" : "remote", (unsigned long long)sector, size, (unsigned long long)i->sector, i->size); - if (i->local) { - struct drbd_request *req2; - - req2 = container_of(i, struct drbd_request, i); - if (req2->rq_state & RQ_NET_PENDING) - ++have_unacked; - } + req2 = container_of(i, struct drbd_request, i); + if (req2->rq_state & RQ_NET_PENDING) + ++have_unacked; ++have_conflict; + break; } if (!have_conflict) break; @@ -1872,6 +1872,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, if (first && discard && have_unacked) { dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n", (unsigned long long)sector); + drbd_remove_epoch_entry_interval(mdev, peer_req); inc_unacked(mdev); peer_req->w.cb = e_send_discard_ack; list_add_tail(&peer_req->w.list, &mdev->done_ee); @@ -1888,6 +1889,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, } if (signal_pending(current)) { + drbd_remove_epoch_entry_interval(mdev, peer_req); spin_unlock_irq(&mdev->tconn->req_lock); finish_wait(&mdev->misc_wait, &wait); goto out_interrupted; @@ -1906,12 +1908,11 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, * there must be none now. */ D_ASSERT(have_unacked == 0); } + /* FIXME: Introduce a timeout here after which we disconnect. */ schedule(); spin_lock_irq(&mdev->tconn->req_lock); } finish_wait(&mdev->misc_wait, &wait); - - drbd_insert_interval(&mdev->write_requests, &peer_req->i); } list_add(&peer_req->w.list, &mdev->active_ee); From 8ccf218e9f19ecae4d115eeff686c9f1a1e5bc9e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Feb 2011 11:35:43 +0100 Subject: [PATCH 0136/5831] drbd: Replace atomic_add_return with atomic_inc_return Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/drbd/drbd_nl.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index efedfbc06198..4718aa4e5272 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1242,7 +1242,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd, p.sector = sector; p.block_id = block_id; p.blksize = blksize; - p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq)); + p.seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq)); if (!mdev->tconn->meta.socket || mdev->state.conn < C_CONNECTED) return false; @@ -1530,7 +1530,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) prepare_header(mdev, &p.head, P_DATA, sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); p.sector = cpu_to_be64(req->i.sector); p.block_id = (unsigned long)req; - p.seq_num = cpu_to_be32(req->seq_num = atomic_add_return(1, &mdev->packet_seq)); + p.seq_num = cpu_to_be32(req->seq_num = atomic_inc_return(&mdev->packet_seq)); dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 90d731723205..016858741cfd 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2374,7 +2374,7 @@ void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) cn_reply->id.idx = CN_IDX_DRBD; cn_reply->id.val = CN_VAL_DRBD; - cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); + cn_reply->seq = atomic_inc_return(&drbd_nl_seq); cn_reply->ack = 0; /* not used here. */ cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + (int)((char *)tl - (char *)reply->tag_list); @@ -2406,7 +2406,7 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) cn_reply->id.idx = CN_IDX_DRBD; cn_reply->id.val = CN_VAL_DRBD; - cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); + cn_reply->seq = atomic_inc_return(&drbd_nl_seq); cn_reply->ack = 0; /* not used here. */ cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + (int)((char *)tl - (char *)reply->tag_list); @@ -2485,7 +2485,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs, cn_reply->id.idx = CN_IDX_DRBD; cn_reply->id.val = CN_VAL_DRBD; - cn_reply->seq = atomic_add_return(1,&drbd_nl_seq); + cn_reply->seq = atomic_inc_return(&drbd_nl_seq); cn_reply->ack = 0; // not used here. cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + (int)((char*)tl - (char*)reply->tag_list); @@ -2524,7 +2524,7 @@ void drbd_bcast_sync_progress(struct drbd_conf *mdev) cn_reply->id.idx = CN_IDX_DRBD; cn_reply->id.val = CN_VAL_DRBD; - cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); + cn_reply->seq = atomic_inc_return(&drbd_nl_seq); cn_reply->ack = 0; /* not used here. */ cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + (int)((char *)tl - (char *)reply->tag_list); From 71b1c1eb9c544141e743c4d14b3c576fd4c31a5a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 1 Mar 2011 15:40:43 +0100 Subject: [PATCH 0137/5831] drbd: Use ping-timeout when waiting for missing ack packets When the node with the discard flag resolves write conflicts in dual-primary mode, it may determine that its peer has sent ack packets on the metadata socket which did not arrive, yet. Wait for the next ack with ping-timeout instead of a hard-coded 30 seconds. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c61bf121bd0f..112098bc4c8c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1710,11 +1710,12 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) } p_seq = mdev->peer_seq; spin_unlock(&mdev->peer_seq_lock); - timeout = schedule_timeout(30*HZ); + timeout = mdev->tconn->net_conf->ping_timeo*HZ/10; + timeout = schedule_timeout(timeout); spin_lock(&mdev->peer_seq_lock); if (timeout == 0 && p_seq == mdev->peer_seq) { ret = -ETIMEDOUT; - dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n"); + dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n"); break; } } From 7be8da0798f08fb9564d4f64fe4a7d6fb4fab20b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 22 Feb 2011 02:15:32 +0100 Subject: [PATCH 0138/5831] drbd: Improve how conflicting writes are handled The previous algorithm for dealing with overlapping concurrent writes was generating unnecessary warnings for scenarios which could be legitimate, and did not always handle partially overlapping requests correctly. Improve it algorithm as follows: * While local or remote write requests are in progress, conflicting new local write requests will be delayed (commit 82172f7). * When a conflict between a local and remote write request is detected, the node with the discard flag decides how to resolve the conflict: It will ask its peer to discard conflicting requests which are fully contained in the local request and retry requests which overlap only partially. This involves a protocol change. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 15 +- drivers/block/drbd/drbd_main.c | 35 ++- drivers/block/drbd/drbd_receiver.c | 410 ++++++++++++++++++----------- drivers/block/drbd/drbd_req.c | 75 +++--- drivers/block/drbd/drbd_req.h | 7 +- 5 files changed, 351 insertions(+), 191 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 3213808a898a..17e905d0582d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -200,7 +200,7 @@ enum drbd_packet { P_RECV_ACK = 0x15, /* Used in protocol B */ P_WRITE_ACK = 0x16, /* Used in protocol C */ P_RS_WRITE_ACK = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */ - P_DISCARD_ACK = 0x18, /* Used in proto C, two-primaries conflict detection */ + P_DISCARD_WRITE = 0x18, /* Used in proto C, two-primaries conflict detection */ P_NEG_ACK = 0x19, /* Sent if local disk is unusable */ P_NEG_DREPLY = 0x1a, /* Local disk is broken... */ P_NEG_RS_DREPLY = 0x1b, /* Local disk is broken... */ @@ -223,8 +223,9 @@ enum drbd_packet { P_RS_CANCEL = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */ P_CONN_ST_CHG_REQ = 0x2a, /* data sock: Connection wide state request */ P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */ + P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */ - P_MAX_CMD = 0x2c, + P_MAX_CMD = 0x2d, P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ P_MAX_OPT_CMD = 0x101, @@ -350,7 +351,7 @@ struct p_data { * commands which share a struct: * p_block_ack: * P_RECV_ACK (proto B), P_WRITE_ACK (proto C), - * P_DISCARD_ACK (proto C, two-primaries conflict detection) + * P_DISCARD_WRITE (proto C, two-primaries conflict detection) * p_block_req: * P_DATA_REQUEST, P_RS_DATA_REQUEST */ @@ -362,7 +363,6 @@ struct p_block_ack { u32 seq_num; } __packed; - struct p_block_req { struct p_header head; u64 sector; @@ -655,6 +655,8 @@ struct drbd_work { #include "drbd_interval.h" +extern int drbd_wait_misc(struct drbd_conf *, struct drbd_interval *); + struct drbd_request { struct drbd_work w; @@ -752,12 +754,16 @@ enum { /* This ee has a pointer to a digest instead of a block id */ __EE_HAS_DIGEST, + + /* Conflicting local requests need to be restarted after this request */ + __EE_RESTART_REQUESTS, }; #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) #define EE_RESUBMITTED (1<<__EE_RESUBMITTED) #define EE_WAS_ERROR (1<<__EE_WAS_ERROR) #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) +#define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS) /* flag bits per mdev */ enum { @@ -1478,6 +1484,7 @@ extern void drbd_free_tconn(struct drbd_tconn *tconn); extern int proc_details; /* drbd_req */ +extern int __drbd_make_request(struct drbd_conf *, struct bio *, unsigned long); extern int drbd_make_request(struct request_queue *q, struct bio *bio); extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req); extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4718aa4e5272..b93c5eccd73d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3003,7 +3003,7 @@ const char *cmdname(enum drbd_packet cmd) [P_RECV_ACK] = "RecvAck", [P_WRITE_ACK] = "WriteAck", [P_RS_WRITE_ACK] = "RSWriteAck", - [P_DISCARD_ACK] = "DiscardAck", + [P_DISCARD_WRITE] = "DiscardWrite", [P_NEG_ACK] = "NegAck", [P_NEG_DREPLY] = "NegDReply", [P_NEG_RS_DREPLY] = "NegRSDReply", @@ -3018,6 +3018,7 @@ const char *cmdname(enum drbd_packet cmd) [P_COMPRESSED_BITMAP] = "CBitmap", [P_DELAY_PROBE] = "DelayProbe", [P_OUT_OF_SYNC] = "OutOfSync", + [P_RETRY_WRITE] = "RetryWrite", [P_MAX_CMD] = NULL, }; @@ -3032,6 +3033,38 @@ const char *cmdname(enum drbd_packet cmd) return cmdnames[cmd]; } +/** + * drbd_wait_misc - wait for a request to make progress + * @mdev: device associated with the request + * @i: the struct drbd_interval embedded in struct drbd_request or + * struct drbd_peer_request + */ +int drbd_wait_misc(struct drbd_conf *mdev, struct drbd_interval *i) +{ + struct net_conf *net_conf = mdev->tconn->net_conf; + DEFINE_WAIT(wait); + long timeout; + + if (!net_conf) + return -ETIMEDOUT; + timeout = MAX_SCHEDULE_TIMEOUT; + if (net_conf->ko_count) + timeout = net_conf->timeout * HZ / 10 * net_conf->ko_count; + + /* Indicate to wake up mdev->misc_wait on progress. */ + i->waiting = true; + prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); + spin_unlock_irq(&mdev->tconn->req_lock); + timeout = schedule_timeout(timeout); + finish_wait(&mdev->misc_wait, &wait); + spin_lock_irq(&mdev->tconn->req_lock); + if (!timeout || mdev->state.conn < C_CONNECTED) + return -ETIMEDOUT; + if (signal_pending(current)) + return -ERESTARTSYS; + return 0; +} + #ifdef CONFIG_DRBD_FAULT_INJECTION /* Fault insertion support including random number generator shamelessly * stolen from kernel/rcutorture.c */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 112098bc4c8c..8c82d8945cf6 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -415,7 +415,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) drbd_free_net_ee(mdev, peer_req); /* possible callbacks here: - * e_end_block, and e_end_resync_block, e_send_discard_ack. + * e_end_block, and e_end_resync_block, e_send_discard_write. * all ignore the last argument. */ list_for_each_entry_safe(peer_req, t, &work_list, w.list) { @@ -1589,6 +1589,51 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd, return ok; } +static int w_restart_write(struct drbd_work *w, int cancel) +{ + struct drbd_request *req = container_of(w, struct drbd_request, w); + struct drbd_conf *mdev = w->mdev; + struct bio *bio; + unsigned long start_time; + unsigned long flags; + + spin_lock_irqsave(&mdev->tconn->req_lock, flags); + if (!expect(req->rq_state & RQ_POSTPONED)) { + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + return 0; + } + bio = req->master_bio; + start_time = req->start_time; + /* Postponed requests will not have their master_bio completed! */ + __req_mod(req, DISCARD_WRITE, NULL); + spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); + + while (__drbd_make_request(mdev, bio, start_time)) + /* retry */ ; + return 1; +} + +static void restart_conflicting_writes(struct drbd_conf *mdev, + sector_t sector, int size) +{ + struct drbd_interval *i; + struct drbd_request *req; + + drbd_for_each_overlap(i, &mdev->write_requests, sector, size) { + if (!i->local) + continue; + req = container_of(i, struct drbd_request, i); + if (req->rq_state & RQ_LOCAL_PENDING || + !(req->rq_state & RQ_POSTPONED)) + continue; + if (expect(list_empty(&req->w.list))) { + req->w.mdev = mdev; + req->w.cb = w_restart_write; + drbd_queue_work(&mdev->tconn->data.work, &req->w); + } + } +} + /* e_end_block() is called via drbd_process_done_ee(). * this means this function only runs in the asender thread */ @@ -1622,6 +1667,8 @@ static int e_end_block(struct drbd_work *w, int cancel) spin_lock_irq(&mdev->tconn->req_lock); D_ASSERT(!drbd_interval_empty(&peer_req->i)); drbd_remove_epoch_entry_interval(mdev, peer_req); + if (peer_req->flags & EE_RESTART_REQUESTS) + restart_conflicting_writes(mdev, sector, peer_req->i.size); spin_unlock_irq(&mdev->tconn->req_lock); } else D_ASSERT(drbd_interval_empty(&peer_req->i)); @@ -1631,20 +1678,32 @@ static int e_end_block(struct drbd_work *w, int cancel) return ok; } -static int e_send_discard_ack(struct drbd_work *w, int unused) +static int e_send_ack(struct drbd_work *w, enum drbd_packet ack) { + struct drbd_conf *mdev = w->mdev; struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w); - struct drbd_conf *mdev = w->mdev; int ok; - D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); - ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req); + ok = drbd_send_ack(mdev, ack, peer_req); dec_unacked(mdev); return ok; } +static int e_send_discard_write(struct drbd_work *w, int unused) +{ + return e_send_ack(w, P_DISCARD_WRITE); +} + +static int e_send_retry_write(struct drbd_work *w, int unused) +{ + struct drbd_tconn *tconn = w->mdev->tconn; + + return e_send_ack(w, tconn->agreed_pro_version >= 100 ? + P_RETRY_WRITE : P_DISCARD_WRITE); +} + static bool seq_greater(u32 a, u32 b) { /* @@ -1660,16 +1719,31 @@ static u32 seq_max(u32 a, u32 b) return seq_greater(a, b) ? a : b; } +static bool need_peer_seq(struct drbd_conf *mdev) +{ + struct drbd_tconn *tconn = mdev->tconn; + + /* + * We only need to keep track of the last packet_seq number of our peer + * if we are in dual-primary mode and we have the discard flag set; see + * handle_write_conflicts(). + */ + return tconn->net_conf->two_primaries && + test_bit(DISCARD_CONCURRENT, &tconn->flags); +} + static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) { unsigned int old_peer_seq; - spin_lock(&mdev->peer_seq_lock); - old_peer_seq = mdev->peer_seq; - mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq); - spin_unlock(&mdev->peer_seq_lock); - if (old_peer_seq != peer_seq) - wake_up(&mdev->seq_wait); + if (need_peer_seq(mdev)) { + spin_lock(&mdev->peer_seq_lock); + old_peer_seq = mdev->peer_seq; + mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq); + spin_unlock(&mdev->peer_seq_lock); + if (old_peer_seq != peer_seq) + wake_up(&mdev->seq_wait); + } } /* Called from receive_Data. @@ -1693,36 +1767,39 @@ static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) * * returns 0 if we may process the packet, * -ERESTARTSYS if we were interrupted (by disconnect signal). */ -static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) +static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq) { DEFINE_WAIT(wait); - unsigned int p_seq; long timeout; - int ret = 0; + int ret; + + if (!need_peer_seq(mdev)) + return 0; + spin_lock(&mdev->peer_seq_lock); for (;;) { - prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE); - if (!seq_greater(packet_seq, mdev->peer_seq + 1)) + if (!seq_greater(peer_seq - 1, mdev->peer_seq)) { + mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq); + ret = 0; break; + } if (signal_pending(current)) { ret = -ERESTARTSYS; break; } - p_seq = mdev->peer_seq; + prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE); spin_unlock(&mdev->peer_seq_lock); timeout = mdev->tconn->net_conf->ping_timeo*HZ/10; timeout = schedule_timeout(timeout); spin_lock(&mdev->peer_seq_lock); - if (timeout == 0 && p_seq == mdev->peer_seq) { + if (!timeout) { ret = -ETIMEDOUT; dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n"); break; } } - finish_wait(&mdev->seq_wait, &wait); - if (mdev->peer_seq+1 == packet_seq) - mdev->peer_seq++; spin_unlock(&mdev->peer_seq_lock); + finish_wait(&mdev->seq_wait, &wait); return ret; } @@ -1737,6 +1814,139 @@ static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf) (dpf & DP_DISCARD ? REQ_DISCARD : 0); } +static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector, + unsigned int size) +{ + struct drbd_interval *i; + + repeat: + drbd_for_each_overlap(i, &mdev->write_requests, sector, size) { + struct drbd_request *req; + struct bio_and_error m; + + if (!i->local) + continue; + req = container_of(i, struct drbd_request, i); + if (!(req->rq_state & RQ_POSTPONED)) + continue; + req->rq_state &= ~RQ_POSTPONED; + __req_mod(req, NEG_ACKED, &m); + spin_unlock_irq(&mdev->tconn->req_lock); + if (m.bio) + complete_master_bio(mdev, &m); + spin_lock_irq(&mdev->tconn->req_lock); + goto repeat; + } +} + +static int handle_write_conflicts(struct drbd_conf *mdev, + struct drbd_peer_request *peer_req) +{ + struct drbd_tconn *tconn = mdev->tconn; + bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags); + sector_t sector = peer_req->i.sector; + const unsigned int size = peer_req->i.size; + struct drbd_interval *i; + bool equal; + int err; + + /* + * Inserting the peer request into the write_requests tree will prevent + * new conflicting local requests from being added. + */ + drbd_insert_interval(&mdev->write_requests, &peer_req->i); + + repeat: + drbd_for_each_overlap(i, &mdev->write_requests, sector, size) { + if (i == &peer_req->i) + continue; + + if (!i->local) { + /* + * Our peer has sent a conflicting remote request; this + * should not happen in a two-node setup. Wait for the + * earlier peer request to complete. + */ + err = drbd_wait_misc(mdev, i); + if (err) + goto out; + goto repeat; + } + + equal = i->sector == sector && i->size == size; + if (resolve_conflicts) { + /* + * If the peer request is fully contained within the + * overlapping request, it can be discarded; otherwise, + * it will be retried once all overlapping requests + * have completed. + */ + bool discard = i->sector <= sector && i->sector + + (i->size >> 9) >= sector + (size >> 9); + + if (!equal) + dev_alert(DEV, "Concurrent writes detected: " + "local=%llus +%u, remote=%llus +%u, " + "assuming %s came first\n", + (unsigned long long)i->sector, i->size, + (unsigned long long)sector, size, + discard ? "local" : "remote"); + + inc_unacked(mdev); + peer_req->w.cb = discard ? e_send_discard_write : + e_send_retry_write; + list_add_tail(&peer_req->w.list, &mdev->done_ee); + wake_asender(mdev->tconn); + + err = -ENOENT; + goto out; + } else { + struct drbd_request *req = + container_of(i, struct drbd_request, i); + + if (!equal) + dev_alert(DEV, "Concurrent writes detected: " + "local=%llus +%u, remote=%llus +%u\n", + (unsigned long long)i->sector, i->size, + (unsigned long long)sector, size); + + if (req->rq_state & RQ_LOCAL_PENDING || + !(req->rq_state & RQ_POSTPONED)) { + /* + * Wait for the node with the discard flag to + * decide if this request will be discarded or + * retried. Requests that are discarded will + * disappear from the write_requests tree. + * + * In addition, wait for the conflicting + * request to finish locally before submitting + * the conflicting peer request. + */ + err = drbd_wait_misc(mdev, &req->i); + if (err) { + _conn_request_state(mdev->tconn, + NS(conn, C_TIMEOUT), + CS_HARD); + fail_postponed_requests(mdev, sector, size); + goto out; + } + goto repeat; + } + /* + * Remember to restart the conflicting requests after + * the new peer request has completed. + */ + peer_req->flags |= EE_RESTART_REQUESTS; + } + } + err = 0; + + out: + if (err) + drbd_remove_epoch_entry_interval(mdev, peer_req); + return err; +} + /* mirrored write */ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, unsigned int data_size) @@ -1744,18 +1954,17 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, sector_t sector; struct drbd_peer_request *peer_req; struct p_data *p = &mdev->tconn->data.rbuf.data; + u32 peer_seq = be32_to_cpu(p->seq_num); int rw = WRITE; u32 dp_flags; + int err; + if (!get_ldev(mdev)) { - spin_lock(&mdev->peer_seq_lock); - if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num)) - mdev->peer_seq++; - spin_unlock(&mdev->peer_seq_lock); - + err = wait_for_and_update_peer_seq(mdev, peer_seq); drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size); atomic_inc(&mdev->current_epoch->epoch_size); - return drbd_drain_block(mdev, data_size); + return drbd_drain_block(mdev, data_size) && err == 0; } /* @@ -1785,137 +1994,22 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd, atomic_inc(&peer_req->epoch->active); spin_unlock(&mdev->epoch_lock); - /* I'm the receiver, I do hold a net_cnt reference. */ - if (!mdev->tconn->net_conf->two_primaries) { - spin_lock_irq(&mdev->tconn->req_lock); - } else { - /* don't get the req_lock yet, - * we may sleep in drbd_wait_peer_seq */ - const int size = peer_req->i.size; - const int discard = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags); - DEFINE_WAIT(wait); - int first; - - D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); - - /* conflict detection and handling: - * 1. wait on the sequence number, - * in case this data packet overtook ACK packets. - * 2. check for conflicting write requests. - * - * Note: for two_primaries, we are protocol C, - * so there cannot be any request that is DONE - * but still on the transfer log. - * - * if no conflicting request is found: - * submit. - * - * if any conflicting request is found - * that has not yet been acked, - * AND I have the "discard concurrent writes" flag: - * queue (via done_ee) the P_DISCARD_ACK; OUT. - * - * if any conflicting request is found: - * block the receiver, waiting on misc_wait - * until no more conflicting requests are there, - * or we get interrupted (disconnect). - * - * we do not just write after local io completion of those - * requests, but only after req is done completely, i.e. - * we wait for the P_DISCARD_ACK to arrive! - * - * then proceed normally, i.e. submit. - */ - if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num))) + if (mdev->tconn->net_conf->two_primaries) { + err = wait_for_and_update_peer_seq(mdev, peer_seq); + if (err) goto out_interrupted; - spin_lock_irq(&mdev->tconn->req_lock); - - /* - * Inserting the peer request into the write_requests tree will - * prevent new conflicting local requests from being added. - */ - drbd_insert_interval(&mdev->write_requests, &peer_req->i); - - first = 1; - for (;;) { - struct drbd_interval *i; - int have_unacked = 0; - int have_conflict = 0; - prepare_to_wait(&mdev->misc_wait, &wait, - TASK_INTERRUPTIBLE); - - drbd_for_each_overlap(i, &mdev->write_requests, sector, size) { - struct drbd_request *req2; - - if (i == &peer_req->i || !i->local) - continue; - - /* only ALERT on first iteration, - * we may be woken up early... */ - if (first) - dev_alert(DEV, "%s[%u] Concurrent local write detected!" - " new: %llus +%u; pending: %llus +%u\n", - current->comm, current->pid, - (unsigned long long)sector, size, - (unsigned long long)i->sector, i->size); - - req2 = container_of(i, struct drbd_request, i); - if (req2->rq_state & RQ_NET_PENDING) - ++have_unacked; - ++have_conflict; - break; - } - if (!have_conflict) - break; - - /* Discard Ack only for the _first_ iteration */ - if (first && discard && have_unacked) { - dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n", - (unsigned long long)sector); - drbd_remove_epoch_entry_interval(mdev, peer_req); - inc_unacked(mdev); - peer_req->w.cb = e_send_discard_ack; - list_add_tail(&peer_req->w.list, &mdev->done_ee); - - spin_unlock_irq(&mdev->tconn->req_lock); - - /* we could probably send that P_DISCARD_ACK ourselves, - * but I don't like the receiver using the msock */ - + err = handle_write_conflicts(mdev, peer_req); + if (err) { + spin_unlock_irq(&mdev->tconn->req_lock); + if (err == -ENOENT) { put_ldev(mdev); - wake_asender(mdev->tconn); - finish_wait(&mdev->misc_wait, &wait); return true; } - - if (signal_pending(current)) { - drbd_remove_epoch_entry_interval(mdev, peer_req); - spin_unlock_irq(&mdev->tconn->req_lock); - finish_wait(&mdev->misc_wait, &wait); - goto out_interrupted; - } - - /* Indicate to wake up mdev->misc_wait upon completion. */ - i->waiting = true; - - spin_unlock_irq(&mdev->tconn->req_lock); - if (first) { - first = 0; - dev_alert(DEV, "Concurrent write! [W AFTERWARDS] " - "sec=%llus\n", (unsigned long long)sector); - } else if (discard) { - /* we had none on the first iteration. - * there must be none now. */ - D_ASSERT(have_unacked == 0); - } - /* FIXME: Introduce a timeout here after which we disconnect. */ - schedule(); - spin_lock_irq(&mdev->tconn->req_lock); + goto out_interrupted; } - finish_wait(&mdev->misc_wait, &wait); - } - + } else + spin_lock_irq(&mdev->tconn->req_lock); list_add(&peer_req->w.list, &mdev->active_ee); spin_unlock_irq(&mdev->tconn->req_lock); @@ -4393,9 +4487,13 @@ static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd) D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B); what = RECV_ACKED_BY_PEER; break; - case P_DISCARD_ACK: + case P_DISCARD_WRITE: D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); - what = CONFLICT_DISCARDED_BY_PEER; + what = DISCARD_WRITE; + break; + case P_RETRY_WRITE: + D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C); + what = POSTPONE_WRITE; break; default: D_ASSERT(0); @@ -4446,6 +4544,7 @@ static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd) sector_t sector = be64_to_cpu(p->sector); update_peer_seq(mdev, be32_to_cpu(p->seq_num)); + dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n", (unsigned long long)sector, be32_to_cpu(p->blksize)); @@ -4567,7 +4666,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply}, @@ -4578,6 +4677,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply }, + [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, [P_MAX_CMD] = { 0, NULL }, }; if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 733219884ab7..aab1acdd4fa4 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -225,12 +225,16 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) * the receiver, * the bio_endio completion callbacks. */ + if (s & RQ_LOCAL_PENDING) + return; + if (req->i.waiting) { + /* Retry all conflicting peer requests. */ + wake_up(&mdev->misc_wait); + } if (s & RQ_NET_QUEUED) return; if (s & RQ_NET_PENDING) return; - if (s & RQ_LOCAL_PENDING) - return; if (req->master_bio) { /* this is DATA_RECEIVED (remote read) @@ -267,7 +271,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) else root = &mdev->read_requests; drbd_remove_request_interval(root, req); - } else + } else if (!(s & RQ_POSTPONED)) D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); /* for writes we need to do some extra housekeeping */ @@ -277,8 +281,10 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) /* Update disk stats */ _drbd_end_io_acct(mdev, req); - m->error = ok ? 0 : (error ?: -EIO); - m->bio = req->master_bio; + if (!(s & RQ_POSTPONED)) { + m->error = ok ? 0 : (error ?: -EIO); + m->bio = req->master_bio; + } req->master_bio = NULL; } @@ -318,7 +324,9 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, { struct drbd_conf *mdev = req->w.mdev; int rv = 0; - m->bio = NULL; + + if (m) + m->bio = NULL; switch (what) { default: @@ -332,7 +340,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, */ case TO_BE_SENT: /* via network */ - /* reached via drbd_make_request_common + /* reached via __drbd_make_request * and from w_read_retry_remote */ D_ASSERT(!(req->rq_state & RQ_NET_MASK)); req->rq_state |= RQ_NET_PENDING; @@ -340,7 +348,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case TO_BE_SUBMITTED: /* locally */ - /* reached via drbd_make_request_common */ + /* reached via __drbd_make_request */ D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK)); req->rq_state |= RQ_LOCAL_PENDING; break; @@ -403,7 +411,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * no local disk, * or target area marked as invalid, * or just got an io-error. */ - /* from drbd_make_request_common + /* from __drbd_make_request * or from bio_endio during read io-error recovery */ /* so we can verify the handle in the answer packet @@ -422,7 +430,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case QUEUE_FOR_NET_WRITE: /* assert something? */ - /* from drbd_make_request_common only */ + /* from __drbd_make_request only */ /* corresponding hlist_del is in _req_may_be_done() */ drbd_insert_interval(&mdev->write_requests, &req->i); @@ -436,7 +444,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * * _req_add_to_epoch(req); this has to be after the * _maybe_start_new_epoch(req); which happened in - * drbd_make_request_common, because we now may set the bit + * __drbd_make_request, because we now may set the bit * again ourselves to close the current epoch. * * Add req to the (now) current epoch (barrier). */ @@ -446,7 +454,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * hurting performance. */ set_bit(UNPLUG_REMOTE, &mdev->flags); - /* see drbd_make_request_common, + /* see __drbd_make_request, * just after it grabs the req_lock */ D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0); @@ -535,14 +543,10 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case WRITE_ACKED_BY_PEER_AND_SIS: req->rq_state |= RQ_NET_SIS; - case CONFLICT_DISCARDED_BY_PEER: + case DISCARD_WRITE: /* for discarded conflicting writes of multiple primaries, * there is no need to keep anything in the tl, potential * node crashes are covered by the activity log. */ - if (what == CONFLICT_DISCARDED_BY_PEER) - dev_alert(DEV, "Got DiscardAck packet %llus +%u!" - " DRBD is not a random data generator!\n", - (unsigned long long)req->i.sector, req->i.size); req->rq_state |= RQ_NET_DONE; /* fall through */ case WRITE_ACKED_BY_PEER: @@ -569,6 +573,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done_not_susp(req, m); break; + case POSTPONE_WRITE: + /* + * If this node has already detected the write conflict, the + * worker will be waiting on misc_wait. Wake it up once this + * request has completed locally. + */ + D_ASSERT(req->rq_state & RQ_NET_PENDING); + req->rq_state |= RQ_POSTPONED; + _req_may_be_done_not_susp(req, m); + break; + case NEG_ACKED: /* assert something? */ if (req->rq_state & RQ_NET_PENDING) { @@ -688,24 +703,19 @@ static int complete_conflicting_writes(struct drbd_conf *mdev, sector_t sector, int size) { for(;;) { - DEFINE_WAIT(wait); struct drbd_interval *i; + int err; i = drbd_find_overlap(&mdev->write_requests, sector, size); if (!i) return 0; - i->waiting = true; - prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE); - spin_unlock_irq(&mdev->tconn->req_lock); - schedule(); - finish_wait(&mdev->misc_wait, &wait); - spin_lock_irq(&mdev->tconn->req_lock); - if (signal_pending(current)) - return -ERESTARTSYS; + err = drbd_wait_misc(mdev, i); + if (err) + return err; } } -static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) +int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) { const int rw = bio_rw(bio); const int size = bio->bi_size; @@ -811,7 +821,12 @@ allocate_barrier: if (rw == WRITE) { err = complete_conflicting_writes(mdev, sector, size); if (err) { + if (err != -ERESTARTSYS) + _conn_request_state(mdev->tconn, + NS(conn, C_TIMEOUT), + CS_HARD); spin_unlock_irq(&mdev->tconn->req_lock); + err = -EIO; goto fail_free_complete; } } @@ -1031,7 +1046,7 @@ int drbd_make_request(struct request_queue *q, struct bio *bio) if (likely(s_enr == e_enr)) { inc_ap_bio(mdev, 1); - return drbd_make_request_common(mdev, bio, start_time); + return __drbd_make_request(mdev, bio, start_time); } /* can this bio be split generically? @@ -1069,10 +1084,10 @@ int drbd_make_request(struct request_queue *q, struct bio *bio) D_ASSERT(e_enr == s_enr + 1); - while (drbd_make_request_common(mdev, &bp->bio1, start_time)) + while (__drbd_make_request(mdev, &bp->bio1, start_time)) inc_ap_bio(mdev, 1); - while (drbd_make_request_common(mdev, &bp->bio2, start_time)) + while (__drbd_make_request(mdev, &bp->bio2, start_time)) inc_ap_bio(mdev, 1); dec_ap_bio(mdev); diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index e6f2361d6b19..0b3cd412d52d 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -97,7 +97,8 @@ enum drbd_req_event { RECV_ACKED_BY_PEER, WRITE_ACKED_BY_PEER, WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */ - CONFLICT_DISCARDED_BY_PEER, + DISCARD_WRITE, + POSTPONE_WRITE, NEG_ACKED, BARRIER_ACKED, /* in protocol A and B */ DATA_RECEIVED, /* (remote read) */ @@ -194,6 +195,9 @@ enum drbd_req_state_bits { /* Should call drbd_al_complete_io() for this request... */ __RQ_IN_ACT_LOG, + + /* The peer has sent a retry ACK */ + __RQ_POSTPONED, }; #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) @@ -214,6 +218,7 @@ enum drbd_req_state_bits { #define RQ_WRITE (1UL << __RQ_WRITE) #define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG) +#define RQ_POSTPONED (1UL << __RQ_POSTPONED) /* For waking up the frozen transfer log mod_req() has to return if the request should be counted in the epoch object*/ From 1b3bb47d526c63a845e3374d6272a67fbe20a468 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 28 Jan 2011 13:28:51 +0100 Subject: [PATCH 0139/5831] drbd: Remove redundant check Opening a device only succeeds on a primary node, or when explicitly setting the allow_oos module parameter to allow opening the device read-only on a secondary node. There is no other way that a request can get into drbd_make_request(), so this code cannot trigger. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index aab1acdd4fa4..cfa5fba5303c 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -997,39 +997,12 @@ fail_and_free_req: return ret; } -/* helper function for drbd_make_request - * if we can determine just by the mdev (state) that this request will fail, - * return 1 - * otherwise return 0 - */ -static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write) -{ - if (mdev->state.role != R_PRIMARY && - (!allow_oos || is_write)) { - if (__ratelimit(&drbd_ratelimit_state)) { - dev_err(DEV, "Process %s[%u] tried to %s; " - "since we are not in Primary state, " - "we cannot allow this\n", - current->comm, current->pid, - is_write ? "WRITE" : "READ"); - } - return 1; - } - - return 0; -} - int drbd_make_request(struct request_queue *q, struct bio *bio) { unsigned int s_enr, e_enr; struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata; unsigned long start_time; - if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) { - bio_endio(bio, -EPERM); - return 0; - } - start_time = jiffies; /* From 6e849ce88cd63efc6650a1826d18ed742eb31999 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 14 Mar 2011 17:27:45 +0100 Subject: [PATCH 0140/5831] drbd: Get rid of P_MAX_CMD Instead of artificially enlarging the command decoding arrays to P_MAX_CMD entries, check if an index is within the valid range using the ARRAY_SIZE() macro. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 3 +-- drivers/block/drbd/drbd_receiver.c | 10 ++++------ 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 17e905d0582d..be52b58a97d7 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -225,7 +225,6 @@ enum drbd_packet { P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */ P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */ - P_MAX_CMD = 0x2d, P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ P_MAX_OPT_CMD = 0x101, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index b93c5eccd73d..f43752fb5b52 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3019,7 +3019,6 @@ const char *cmdname(enum drbd_packet cmd) [P_DELAY_PROBE] = "DelayProbe", [P_OUT_OF_SYNC] = "OutOfSync", [P_RETRY_WRITE] = "RetryWrite", - [P_MAX_CMD] = NULL, }; if (cmd == P_HAND_SHAKE_M) @@ -3028,7 +3027,7 @@ const char *cmdname(enum drbd_packet cmd) return "HandShakeS"; if (cmd == P_HAND_SHAKE) return "HandShake"; - if (cmd >= P_MAX_CMD) + if (cmd >= ARRAY_SIZE(cmdnames)) return "Unknown"; return cmdnames[cmd]; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8c82d8945cf6..262e5d97991c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3875,9 +3875,6 @@ static struct data_cmd drbd_cmd_handler[] = { [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state }, - /* anything missing from this table is in - * the asender_tbl, see get_asender_cmd */ - [P_MAX_CMD] = { 0, 0, NULL }, }; /* All handler functions that expect a sub-header get that sub-heder in @@ -3899,7 +3896,8 @@ static void drbdd(struct drbd_tconn *tconn) if (!drbd_recv_header(tconn, &pi)) goto err_out; - if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) { + if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || + !drbd_cmd_handler[pi.cmd].function)) { conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size); goto err_out; } @@ -4678,9 +4676,9 @@ static struct asender_cmd *get_asender_cmd(int cmd) [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply }, [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_MAX_CMD] = { 0, NULL }, }; - if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) + + if (cmd >= ARRAY_SIZE(asender_tbl) || !asender_tbl[cmd].process) return NULL; return &asender_tbl[cmd]; } From 7201b972ded6c3a4d180887b187ca5897dfe49bf Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 14 Mar 2011 18:23:00 +0100 Subject: [PATCH 0141/5831] drbd: Replace get_asender_cmd() with its implementation Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 66 +++++++++++++----------------- 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 262e5d97991c..540fcbf1d1e4 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4648,41 +4648,6 @@ static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd) return true; } -struct asender_cmd { - size_t pkt_size; - int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd); -}; - -static struct asender_cmd *get_asender_cmd(int cmd) -{ - static struct asender_cmd asender_tbl[] = { - /* anything missing from this table is in - * the drbd_cmd_handler (drbd_default_handler) table, - * see the beginning of drbdd() */ - [P_PING] = { sizeof(struct p_header), got_Ping }, - [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, - [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, - [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, - [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, - [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply}, - [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, - [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, - [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, - [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, - [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, - [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, - [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply }, - [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, - }; - - if (cmd >= ARRAY_SIZE(asender_tbl) || !asender_tbl[cmd].process) - return NULL; - return &asender_tbl[cmd]; -} - static int _drbd_process_done_ee(int vnr, void *p, void *data) { struct drbd_conf *mdev = (struct drbd_conf *)p; @@ -4719,6 +4684,31 @@ static int tconn_process_done_ee(struct drbd_tconn *tconn) return 0; } +struct asender_cmd { + size_t pkt_size; + int (*process)(struct drbd_conf *, enum drbd_packet); +}; + +static struct asender_cmd asender_tbl[] = { + [P_PING] = { sizeof(struct p_header), got_Ping }, + [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, + [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, + [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck }, + [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply }, + [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply}, + [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult }, + [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, + [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, + [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, + [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, + [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply}, + [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply }, + [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck }, +}; + int drbd_asender(struct drbd_thread *thi) { struct drbd_tconn *tconn = thi->tconn; @@ -4803,8 +4793,8 @@ int drbd_asender(struct drbd_thread *thi) if (received == expect && cmd == NULL) { if (!decode_header(tconn, h, &pi)) goto reconnect; - cmd = get_asender_cmd(pi.cmd); - if (unlikely(cmd == NULL)) { + cmd = &asender_tbl[pi.cmd]; + if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd) { conn_err(tconn, "unknown command %d on meta (l: %d)\n", pi.cmd, pi.size); goto disconnect; @@ -4823,7 +4813,7 @@ int drbd_asender(struct drbd_thread *thi) /* the idle_timeout (ping-int) * has been restored in got_PingAck() */ - if (cmd == get_asender_cmd(P_PING_ACK)) + if (cmd == &asender_tbl[P_PING_ACK]) ping_timeout_active = 0; buf = h; From c66342d9493804ba92a3c9f48ba225a936c9736f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Mar 2011 14:23:53 +0100 Subject: [PATCH 0142/5831] drbd: Remove left-over function prototypes Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 -- drivers/block/drbd/drbd_receiver.c | 1 - 2 files changed, 3 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index be52b58a97d7..a346eb87d0fb 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1534,7 +1534,6 @@ extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio * extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_peer_request *, void *); /* worker callbacks */ -extern int w_req_cancel_conflict(struct drbd_work *, int); extern int w_read_retry_remote(struct drbd_work *, int); extern int w_e_end_data_req(struct drbd_work *, int); extern int w_e_end_rsdata_req(struct drbd_work *, int); @@ -1543,7 +1542,6 @@ extern int w_e_end_ov_reply(struct drbd_work *, int); extern int w_e_end_ov_req(struct drbd_work *, int); extern int w_ov_finished(struct drbd_work *, int); extern int w_resync_timer(struct drbd_work *, int); -extern int w_resume_next_sg(struct drbd_work *, int); extern int w_send_write_hint(struct drbd_work *, int); extern int w_send_dblock(struct drbd_work *, int); extern int w_send_barrier(struct drbd_work *, int); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 540fcbf1d1e4..0c3a094f6911 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4009,7 +4009,6 @@ static int drbd_disconnected(int vnr, void *p, void *data) del_timer(&mdev->request_timer); - /* make sure syncer is stopped and w_resume_next_sg queued */ del_timer_sync(&mdev->resync_timer); resync_timer_fn((unsigned long)mdev); From 0e29d163f7ec8369b3f1fb70900d29b1c4a1dc8b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 18 Feb 2011 14:23:11 +0100 Subject: [PATCH 0143/5831] drbd: Reworked the unconfiguring and thread stopping code * Moved CONFIG_PENDING and DEVICE_DYING from mdev to tconn. * Renamed drbd_reconfig_start() and drbd_reconfig_done() to conn_reconfig_start() and conn_reconfig_done(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 18 ++++++---- drivers/block/drbd/drbd_nl.c | 40 +++++++++++----------- drivers/block/drbd/drbd_receiver.c | 6 ++-- drivers/block/drbd/drbd_state.c | 54 +++++++++++++++++++----------- drivers/block/drbd/drbd_state.h | 1 + drivers/block/drbd/drbd_worker.c | 31 +++++++---------- 6 files changed, 80 insertions(+), 70 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a346eb87d0fb..145ae57b3113 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -787,12 +787,6 @@ enum { GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ WAS_IO_ERROR, /* Local disk failed returned IO error */ RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ - CONFIG_PENDING, /* serialization of (re)configuration requests. - * if set, also prevents the device from dying */ - DEVICE_DYING, /* device became unconfigured, - * but worker thread is still handling the cleanup. - * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed, - * while this is set. */ RESIZE_PENDING, /* Size change detected locally, waiting for the response from * the peer, if it changed there as well. */ CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ @@ -921,6 +915,12 @@ enum { GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */ CONN_WD_ST_CHG_OKAY, CONN_WD_ST_CHG_FAIL, + CONFIG_PENDING, /* serialization of (re)configuration requests. + * if set, also prevents the device from dying */ + OBJECT_DYING, /* device became unconfigured, + * but worker thread is still handling the cleanup. + * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed, + * while this is set. */ }; struct drbd_tconn { /* is a resource from the config file */ @@ -1574,7 +1574,11 @@ extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head); extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); -extern void drbd_flush_workqueue(struct drbd_conf *mdev); +extern void conn_flush_workqueue(struct drbd_tconn *tconn); +static inline void drbd_flush_workqueue(struct drbd_conf *mdev) +{ + conn_flush_workqueue(mdev->tconn); +} /* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 016858741cfd..8cdfb46243e2 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -871,29 +871,27 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) * or start a new one. Flush any pending work, there may still be an * after_state_change queued. */ -static void drbd_reconfig_start(struct drbd_conf *mdev) +static void conn_reconfig_start(struct drbd_tconn *tconn) { - wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags)); - wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); - drbd_thread_start(&mdev->tconn->worker); - drbd_flush_workqueue(mdev); + wait_event(tconn->ping_wait, !test_and_set_bit(CONFIG_PENDING, &tconn->flags)); + wait_event(tconn->ping_wait, !test_bit(OBJECT_DYING, &tconn->flags)); + drbd_thread_start(&tconn->worker); + conn_flush_workqueue(tconn); } /* if still unconfigured, stops worker again. * if configured now, clears CONFIG_PENDING. * wakes potential waiters */ -static void drbd_reconfig_done(struct drbd_conf *mdev) +static void conn_reconfig_done(struct drbd_tconn *tconn) { - spin_lock_irq(&mdev->tconn->req_lock); - if (mdev->state.disk == D_DISKLESS && - mdev->state.conn == C_STANDALONE && - mdev->state.role == R_SECONDARY) { - set_bit(DEVICE_DYING, &mdev->flags); - drbd_thread_stop_nowait(&mdev->tconn->worker); + spin_lock_irq(&tconn->req_lock); + if (conn_all_vols_unconf(tconn)) { + set_bit(OBJECT_DYING, &tconn->flags); + drbd_thread_stop_nowait(&tconn->worker); } else - clear_bit(CONFIG_PENDING, &mdev->flags); - spin_unlock_irq(&mdev->tconn->req_lock); - wake_up(&mdev->state_wait); + clear_bit(CONFIG_PENDING, &tconn->flags); + spin_unlock_irq(&tconn->req_lock); + wake_up(&tconn->ping_wait); } /* Make sure IO is suspended before calling this function(). */ @@ -933,7 +931,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp enum drbd_state_rv rv; int cp_discovered = 0; - drbd_reconfig_start(mdev); + conn_reconfig_start(mdev->tconn); /* if you want to reconfigure, please tear down first */ if (mdev->state.disk > D_DISKLESS) { @@ -1279,7 +1277,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); put_ldev(mdev); reply->ret_code = retcode; - drbd_reconfig_done(mdev); + conn_reconfig_done(mdev->tconn); return 0; force_diskless_dec: @@ -1300,7 +1298,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp lc_destroy(resync_lru); reply->ret_code = retcode; - drbd_reconfig_done(mdev); + conn_reconfig_done(mdev->tconn); return 0; } @@ -1344,7 +1342,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, void *int_dig_vv = NULL; struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; - drbd_reconfig_start(mdev); + conn_reconfig_start(mdev->tconn); if (mdev->state.conn > C_STANDALONE) { retcode = ERR_NET_CONFIGURED; @@ -1530,7 +1528,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); reply->ret_code = retcode; - drbd_reconfig_done(mdev); + conn_reconfig_done(mdev->tconn); return 0; fail: @@ -1543,7 +1541,7 @@ fail: kfree(new_conf); reply->ret_code = retcode; - drbd_reconfig_done(mdev); + conn_reconfig_done(mdev->tconn); return 0; } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0c3a094f6911..66080e204086 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3932,14 +3932,14 @@ static void drbdd(struct drbd_tconn *tconn) } } -void drbd_flush_workqueue(struct drbd_conf *mdev) +void conn_flush_workqueue(struct drbd_tconn *tconn) { struct drbd_wq_barrier barr; barr.w.cb = w_prev_work_done; - barr.w.mdev = mdev; + barr.w.tconn = tconn; init_completion(&barr.done); - drbd_queue_work(&mdev->tconn->data.work, &barr.w); + drbd_queue_work(&tconn->data.work, &barr.w); wait_for_completion(&barr.done); } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index d3bf8e39fa56..338e1f5c7cd0 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -41,13 +41,29 @@ extern void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); -static void after_all_state_ch(struct drbd_tconn *tconn, union drbd_state ns); +static void after_all_state_ch(struct drbd_tconn *tconn); static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, const char **warn_sync_abort); +int conn_all_vols_unconf(struct drbd_tconn *tconn) +{ + struct drbd_conf *mdev; + int minor, uncfg = 1; + + idr_for_each_entry(&tconn->volumes, mdev, minor) { + uncfg &= (mdev->state.disk == D_DISKLESS && + mdev->state.conn == C_STANDALONE && + mdev->state.role == R_SECONDARY); + if (!uncfg) + break; + } + + return uncfg; +} + /** * cl_wide_st_chg() - true if the state change is a cluster wide one * @mdev: DRBD device. @@ -744,20 +760,6 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, print_state_change(mdev, os, ns, flags); - /* solve the race between becoming unconfigured, - * worker doing the cleanup, and - * admin reconfiguring us: - * on (re)configure, first set CONFIG_PENDING, - * then wait for a potentially exiting worker, - * start the worker, and schedule one no_op. - * then proceed with configuration. - */ - if (ns.disk == D_DISKLESS && - ns.conn == C_STANDALONE && - ns.role == R_SECONDARY && - !test_and_set_bit(CONFIG_PENDING, &mdev->flags)) - set_bit(DEVICE_DYING, &mdev->flags); - /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference * on the ldev here, to be sure the transition -> D_DISKLESS resp. * drbd_ldev_destroy() won't happen before our corresponding @@ -768,6 +770,18 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, mdev->state = ns; + /* solve the race between becoming unconfigured, + * worker doing the cleanup, and + * admin reconfiguring us: + * on (re)configure, first set CONFIG_PENDING, + * then wait for a potentially exiting worker, + * start the worker, and schedule one no_op. + * then proceed with configuration. + */ + if(conn_all_vols_unconf(mdev->tconn) && + !test_and_set_bit(CONFIG_PENDING, &mdev->tconn->flags)) + set_bit(OBJECT_DYING, &mdev->tconn->flags); + if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) drbd_print_uuids(mdev, "attached to UUIDs"); @@ -1236,7 +1250,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, resume_next_sg(mdev); } - after_all_state_ch(mdev->tconn, ns); + after_all_state_ch(mdev->tconn); drbd_md_sync(mdev); } @@ -1248,10 +1262,10 @@ struct after_conn_state_chg_work { enum chg_state_flags flags; }; -static void after_all_state_ch(struct drbd_tconn *tconn, union drbd_state ns) +static void after_all_state_ch(struct drbd_tconn *tconn) { - if (ns.disk == D_DISKLESS && ns.conn == C_STANDALONE && ns.role == R_SECONDARY) { - /* if (test_bit(DEVICE_DYING, &mdev->flags)) TODO: DEVICE_DYING functionality */ + if (conn_all_vols_unconf(tconn) && + test_bit(OBJECT_DYING, &tconn->flags)) { drbd_thread_stop_nowait(&tconn->worker); } } @@ -1271,7 +1285,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) drbd_thread_start(&tconn->receiver); //conn_err(tconn, STATE_FMT, STATE_ARGS("nms", nms)); - after_all_state_ch(tconn, nms); + after_all_state_ch(tconn); return 1; } diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index 5fdbdf0be707..d9536cd798e5 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -91,6 +91,7 @@ conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_s enum chg_state_flags flags); extern void drbd_resume_al(struct drbd_conf *mdev); +extern int conn_all_vols_unconf(struct drbd_tconn *tconn); /** * drbd_request_state() - Reqest a state change diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 8ee5c4f3d1c5..5cb5ffce097c 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1643,29 +1643,13 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) mutex_unlock(mdev->state_mutex); } -static int _worker_dying(int vnr, void *p, void *data) -{ - struct drbd_conf *mdev = (struct drbd_conf *)p; - - D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); - /* _drbd_set_state only uses stop_nowait. - * wait here for the exiting receiver. */ - drbd_thread_stop(&mdev->tconn->receiver); - drbd_mdev_cleanup(mdev); - - clear_bit(DEVICE_DYING, &mdev->flags); - clear_bit(CONFIG_PENDING, &mdev->flags); - wake_up(&mdev->state_wait); - - return 0; -} - int drbd_worker(struct drbd_thread *thi) { struct drbd_tconn *tconn = thi->tconn; struct drbd_work *w = NULL; + struct drbd_conf *mdev; LIST_HEAD(work_list); - int intr = 0; + int minor, intr = 0; while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); @@ -1749,7 +1733,16 @@ int drbd_worker(struct drbd_thread *thi) */ spin_unlock_irq(&tconn->data.work.q_lock); - idr_for_each(&tconn->volumes, _worker_dying, NULL); + drbd_thread_stop(&tconn->receiver); + idr_for_each_entry(&tconn->volumes, mdev, minor) { + D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); + /* _drbd_set_state only uses stop_nowait. + * wait here for the exiting receiver. */ + drbd_mdev_cleanup(mdev); + } + clear_bit(OBJECT_DYING, &tconn->flags); + clear_bit(CONFIG_PENDING, &tconn->flags); + wake_up(&tconn->ping_wait); return 0; } From 3f9cbe937ec41fca8842594e0529537f3019c775 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 17 Feb 2011 22:50:23 +0100 Subject: [PATCH 0144/5831] drbd: Removed the mdev parameter from the ..to_tags() and ...from_tags() functions Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 38 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 8cdfb46243e2..33159e47e6e0 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -50,9 +50,9 @@ static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; /* Generate the tag_list to struct functions */ #define NL_PACKET(name, number, fields) \ -static int name ## _from_tags(struct drbd_conf *mdev, \ +static int name ## _from_tags( \ unsigned short *tags, struct name *arg) __attribute__ ((unused)); \ -static int name ## _from_tags(struct drbd_conf *mdev, \ +static int name ## _from_tags( \ unsigned short *tags, struct name *arg) \ { \ int tag; \ @@ -64,7 +64,7 @@ static int name ## _from_tags(struct drbd_conf *mdev, \ fields \ default: \ if (tag & T_MANDATORY) { \ - dev_err(DEV, "Unknown tag: %d\n", tag_number(tag)); \ + printk(KERN_ERR "drbd: Unknown tag: %d\n", tag_number(tag)); \ return 0; \ } \ } \ @@ -87,7 +87,7 @@ static int name ## _from_tags(struct drbd_conf *mdev, \ #define NL_STRING(pn, pr, member, len) \ case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \ if (dlen > len) { \ - dev_err(DEV, "arg too long: %s (%u wanted, max len: %u bytes)\n", \ + printk(KERN_ERR "drbd: arg too long: %s (%u wanted, max len: %u bytes)\n", \ #member, dlen, (unsigned int)len); \ return 0; \ } \ @@ -99,10 +99,10 @@ static int name ## _from_tags(struct drbd_conf *mdev, \ /* Generate the struct to tag_list functions */ #define NL_PACKET(name, number, fields) \ static unsigned short* \ -name ## _to_tags(struct drbd_conf *mdev, \ +name ## _to_tags( \ struct name *arg, unsigned short *tags) __attribute__ ((unused)); \ static unsigned short* \ -name ## _to_tags(struct drbd_conf *mdev, \ +name ## _to_tags( \ struct name *arg, unsigned short *tags) \ { \ fields \ @@ -483,7 +483,7 @@ static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct primary primary_args; memset(&primary_args, 0, sizeof(struct primary)); - if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) { + if (!primary_from_tags(nlp->tag_list, &primary_args)) { reply->ret_code = ERR_MANDATORY_TAG; return 0; } @@ -956,7 +956,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp nbc->dc.fencing = DRBD_FENCING_DEF; nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF; - if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) { + if (!disk_conf_from_tags(nlp->tag_list, &nbc->dc)) { retcode = ERR_MANDATORY_TAG; goto fail; } @@ -1376,7 +1376,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, new_conf->on_congestion = DRBD_ON_CONGESTION_DEF; new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF; - if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) { + if (!net_conf_from_tags(nlp->tag_list, new_conf)) { retcode = ERR_MANDATORY_TAG; goto fail; } @@ -1553,7 +1553,7 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl struct disconnect dc; memset(&dc, 0, sizeof(struct disconnect)); - if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) { + if (!disconnect_from_tags(nlp->tag_list, &dc)) { retcode = ERR_MANDATORY_TAG; goto fail; } @@ -1630,7 +1630,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, enum dds_flags ddsf; memset(&rs, 0, sizeof(struct resize)); - if (!resize_from_tags(mdev, nlp->tag_list, &rs)) { + if (!resize_from_tags(nlp->tag_list, &rs)) { retcode = ERR_MANDATORY_TAG; goto fail; } @@ -1715,7 +1715,7 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n } else memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); - if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) { + if (!syncer_conf_from_tags(nlp->tag_list, &sc)) { retcode = ERR_MANDATORY_TAG; goto fail; } @@ -2020,15 +2020,15 @@ static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl tl = reply->tag_list; if (get_ldev(mdev)) { - tl = disk_conf_to_tags(mdev, &mdev->ldev->dc, tl); + tl = disk_conf_to_tags(&mdev->ldev->dc, tl); put_ldev(mdev); } if (get_net_conf(mdev->tconn)) { - tl = net_conf_to_tags(mdev, mdev->tconn->net_conf, tl); + tl = net_conf_to_tags(mdev->tconn->net_conf, tl); put_net_conf(mdev->tconn); } - tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl); + tl = syncer_conf_to_tags(&mdev->sync_conf, tl); put_unaligned(TT_END, tl++); /* Close the tag list */ @@ -2043,7 +2043,7 @@ static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp unsigned long rs_left; unsigned int res; - tl = get_state_to_tags(mdev, (struct get_state *)&s, tl); + tl = get_state_to_tags((struct get_state *)&s, tl); /* no local ref, no bitmap, no syncer progress. */ if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) { @@ -2105,7 +2105,7 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct start_ov args = { .start_sector = mdev->ov_start_sector }; - if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) { + if (!start_ov_from_tags(nlp->tag_list, &args)) { reply->ret_code = ERR_MANDATORY_TAG; return 0; } @@ -2131,7 +2131,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl struct new_c_uuid args; memset(&args, 0, sizeof(struct new_c_uuid)); - if (!new_c_uuid_from_tags(mdev, nlp->tag_list, &args)) { + if (!new_c_uuid_from_tags(nlp->tag_list, &args)) { reply->ret_code = ERR_MANDATORY_TAG; return 0; } @@ -2365,7 +2365,7 @@ void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ - tl = get_state_to_tags(mdev, (struct get_state *)&state, tl); + tl = get_state_to_tags((struct get_state *)&state, tl); put_unaligned(TT_END, tl++); /* Close the tag list */ From 49559d87fdfe3ab33c684506c394681da6a746c9 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 21 Feb 2011 14:19:44 +0100 Subject: [PATCH 0145/5831] drbd: Improved the dec_*() macros Now those can be used with a struct drbd_conf * that has an other name than 'mdev'. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 51 ++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 145ae57b3113..103b61748c2d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1988,17 +1988,19 @@ static inline void inc_ap_pending(struct drbd_conf *mdev) atomic_inc(&mdev->ap_pending_cnt); } -#define ERR_IF_CNT_IS_NEGATIVE(which) \ - if (atomic_read(&mdev->which) < 0) \ +#define ERR_IF_CNT_IS_NEGATIVE(which, func, line) \ + if (atomic_read(&mdev->which) < 0) \ dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n", \ - __func__ , __LINE__ , \ - atomic_read(&mdev->which)) + func, line, \ + atomic_read(&mdev->which)) -#define dec_ap_pending(mdev) do { \ - typecheck(struct drbd_conf *, mdev); \ - if (atomic_dec_and_test(&mdev->ap_pending_cnt)) \ - wake_up(&mdev->misc_wait); \ - ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0) +#define dec_ap_pending(mdev) _dec_ap_pending(mdev, __FUNCTION__, __LINE__) +static inline void _dec_ap_pending(struct drbd_conf *mdev, const char *func, int line) +{ + if (atomic_dec_and_test(&mdev->ap_pending_cnt)) + wake_up(&mdev->misc_wait); + ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt, func, line); +} /* counts how many resync-related answers we still expect from the peer * increase decrease @@ -2011,10 +2013,12 @@ static inline void inc_rs_pending(struct drbd_conf *mdev) atomic_inc(&mdev->rs_pending_cnt); } -#define dec_rs_pending(mdev) do { \ - typecheck(struct drbd_conf *, mdev); \ - atomic_dec(&mdev->rs_pending_cnt); \ - ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0) +#define dec_rs_pending(mdev) _dec_rs_pending(mdev, __FUNCTION__, __LINE__) +static inline void _dec_rs_pending(struct drbd_conf *mdev, const char *func, int line) +{ + atomic_dec(&mdev->rs_pending_cnt); + ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt, func, line); +} /* counts how many answers we still need to send to the peer. * increased on @@ -2030,16 +2034,19 @@ static inline void inc_unacked(struct drbd_conf *mdev) atomic_inc(&mdev->unacked_cnt); } -#define dec_unacked(mdev) do { \ - typecheck(struct drbd_conf *, mdev); \ - atomic_dec(&mdev->unacked_cnt); \ - ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) - -#define sub_unacked(mdev, n) do { \ - typecheck(struct drbd_conf *, mdev); \ - atomic_sub(n, &mdev->unacked_cnt); \ - ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0) +#define dec_unacked(mdev) _dec_unacked(mdev, __FUNCTION__, __LINE__) +static inline void _dec_unacked(struct drbd_conf *mdev, const char *func, int line) +{ + atomic_dec(&mdev->unacked_cnt); + ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); +} +#define sub_unacked(mdev, n) _sub_unacked(mdev, n, __FUNCTION__, __LINE__) +static inline void _sub_unacked(struct drbd_conf *mdev, int n, const char *func, int line) +{ + atomic_sub(n, &mdev->unacked_cnt); + ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); +} static inline void put_net_conf(struct drbd_tconn *tconn) { From 2f5cdd0b2cf7a4099faa7e53ba0a29ddf0ddf950 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 21 Feb 2011 14:29:27 +0100 Subject: [PATCH 0146/5831] drbd: Converted the transfer log from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 6 +- drivers/block/drbd/drbd_main.c | 125 +++++++++++++++-------------- drivers/block/drbd/drbd_nl.c | 4 +- drivers/block/drbd/drbd_receiver.c | 6 +- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/drbd/drbd_req.h | 3 +- drivers/block/drbd/drbd_state.c | 5 +- 7 files changed, 79 insertions(+), 72 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 103b61748c2d..48367e53a7a5 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1173,10 +1173,10 @@ extern void drbd_calc_cpu_mask(struct drbd_tconn *tconn); #define drbd_calc_cpu_mask(A) ({}) #endif extern void drbd_free_resources(struct drbd_conf *mdev); -extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, +extern void tl_release(struct drbd_tconn *, unsigned int barrier_nr, unsigned int set_size); -extern void tl_clear(struct drbd_conf *mdev); -extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); +extern void tl_clear(struct drbd_tconn *); +extern void _tl_add_barrier(struct drbd_tconn *, struct drbd_tl_epoch *); extern void drbd_free_sock(struct drbd_tconn *tconn); extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock, void *buf, size_t size, unsigned msg_flags); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f43752fb5b52..cbec5ff2cc74 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -180,7 +180,7 @@ int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) * Each &struct drbd_tl_epoch has a circular double linked list of requests * attached. */ -static int tl_init(struct drbd_conf *mdev) +static int tl_init(struct drbd_tconn *tconn) { struct drbd_tl_epoch *b; @@ -195,21 +195,23 @@ static int tl_init(struct drbd_conf *mdev) b->n_writes = 0; b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ - mdev->tconn->oldest_tle = b; - mdev->tconn->newest_tle = b; - INIT_LIST_HEAD(&mdev->tconn->out_of_sequence_requests); + tconn->oldest_tle = b; + tconn->newest_tle = b; + INIT_LIST_HEAD(&tconn->out_of_sequence_requests); return 1; } -static void tl_cleanup(struct drbd_conf *mdev) +static void tl_cleanup(struct drbd_tconn *tconn) { - D_ASSERT(mdev->tconn->oldest_tle == mdev->tconn->newest_tle); - D_ASSERT(list_empty(&mdev->tconn->out_of_sequence_requests)); - kfree(mdev->tconn->oldest_tle); - mdev->tconn->oldest_tle = NULL; - kfree(mdev->tconn->unused_spare_tle); - mdev->tconn->unused_spare_tle = NULL; + if (tconn->oldest_tle != tconn->newest_tle) + conn_err(tconn, "ASSERT FAILED: oldest_tle == newest_tle\n"); + if (!list_empty(&tconn->out_of_sequence_requests)) + conn_err(tconn, "ASSERT FAILED: list_empty(out_of_sequence_requests)\n"); + kfree(tconn->oldest_tle); + tconn->oldest_tle = NULL; + kfree(tconn->unused_spare_tle); + tconn->unused_spare_tle = NULL; } /** @@ -219,7 +221,7 @@ static void tl_cleanup(struct drbd_conf *mdev) * * The caller must hold the req_lock. */ -void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) +void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new) { struct drbd_tl_epoch *newest_before; @@ -229,13 +231,13 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) new->next = NULL; new->n_writes = 0; - newest_before = mdev->tconn->newest_tle; + newest_before = tconn->newest_tle; /* never send a barrier number == 0, because that is special-cased * when using TCQ for our write ordering code */ new->br_number = (newest_before->br_number+1) ?: 1; - if (mdev->tconn->newest_tle != new) { - mdev->tconn->newest_tle->next = new; - mdev->tconn->newest_tle = new; + if (tconn->newest_tle != new) { + tconn->newest_tle->next = new; + tconn->newest_tle = new; } } @@ -249,31 +251,32 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new) * &struct drbd_tl_epoch objects this function will cause a termination * of the connection. */ -void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, - unsigned int set_size) +void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr, + unsigned int set_size) { + struct drbd_conf *mdev; struct drbd_tl_epoch *b, *nob; /* next old barrier */ struct list_head *le, *tle; struct drbd_request *r; - spin_lock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&tconn->req_lock); - b = mdev->tconn->oldest_tle; + b = tconn->oldest_tle; /* first some paranoia code */ if (b == NULL) { - dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", - barrier_nr); + conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n", + barrier_nr); goto bail; } if (b->br_number != barrier_nr) { - dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n", - barrier_nr, b->br_number); + conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n", + barrier_nr, b->br_number); goto bail; } if (b->n_writes != set_size) { - dev_err(DEV, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n", - barrier_nr, set_size, b->n_writes); + conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n", + barrier_nr, set_size, b->n_writes); goto bail; } @@ -296,28 +299,29 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, _req_mod(, BARRIER_ACKED) above. */ list_del_init(&b->requests); + mdev = b->w.mdev; nob = b->next; if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { - _tl_add_barrier(mdev, b); + _tl_add_barrier(tconn, b); if (nob) - mdev->tconn->oldest_tle = nob; + tconn->oldest_tle = nob; /* if nob == NULL b was the only barrier, and becomes the new - barrier. Therefore mdev->tconn->oldest_tle points already to b */ + barrier. Therefore tconn->oldest_tle points already to b */ } else { D_ASSERT(nob != NULL); - mdev->tconn->oldest_tle = nob; + tconn->oldest_tle = nob; kfree(b); } - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&tconn->req_lock); dec_ap_pending(mdev); return; bail: - spin_unlock_irq(&mdev->tconn->req_lock); - drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); + spin_unlock_irq(&tconn->req_lock); + conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD); } @@ -329,15 +333,15 @@ bail: * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, * RESTART_FROZEN_DISK_IO. */ -void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) +void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) { struct drbd_tl_epoch *b, *tmp, **pn; struct list_head *le, *tle, carry_reads; struct drbd_request *req; int rv, n_writes, n_reads; - b = mdev->tconn->oldest_tle; - pn = &mdev->tconn->oldest_tle; + b = tconn->oldest_tle; + pn = &tconn->oldest_tle; while (b) { n_writes = 0; n_reads = 0; @@ -356,11 +360,11 @@ void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) b->n_writes = n_writes; if (b->w.cb == NULL) { b->w.cb = w_send_barrier; - inc_ap_pending(mdev); - set_bit(CREATE_BARRIER, &mdev->flags); + inc_ap_pending(b->w.mdev); + set_bit(CREATE_BARRIER, &b->w.mdev->flags); } - drbd_queue_work(&mdev->tconn->data.work, &b->w); + drbd_queue_work(&tconn->data.work, &b->w); } pn = &b->next; } else { @@ -374,11 +378,12 @@ void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) * the newest barrier may not have been queued yet, * in which case w.cb is still NULL. */ if (b->w.cb != NULL) - dec_ap_pending(mdev); + dec_ap_pending(b->w.mdev); - if (b == mdev->tconn->newest_tle) { + if (b == tconn->newest_tle) { /* recycle, but reinit! */ - D_ASSERT(tmp == NULL); + if (tmp != NULL) + conn_err(tconn, "ASSERT FAILED tmp == NULL"); INIT_LIST_HEAD(&b->requests); list_splice(&carry_reads, &b->requests); INIT_LIST_HEAD(&b->w.list); @@ -406,20 +411,23 @@ void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) * by the requests on the transfer gets marked as our of sync. Called from the * receiver thread and the worker thread. */ -void tl_clear(struct drbd_conf *mdev) +void tl_clear(struct drbd_tconn *tconn) { + struct drbd_conf *mdev; struct list_head *le, *tle; struct drbd_request *r; + int minor; - spin_lock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&tconn->req_lock); - _tl_restart(mdev, CONNECTION_LOST_WHILE_PENDING); + _tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING); /* we expect this list to be empty. */ - D_ASSERT(list_empty(&mdev->tconn->out_of_sequence_requests)); + if (!list_empty(&tconn->out_of_sequence_requests)) + conn_err(tconn, "ASSERT FAILED list_empty(&out_of_sequence_requests)\n"); /* but just in case, clean it up anyways! */ - list_for_each_safe(le, tle, &mdev->tconn->out_of_sequence_requests) { + list_for_each_safe(le, tle, &tconn->out_of_sequence_requests) { r = list_entry(le, struct drbd_request, tl_requests); /* It would be nice to complete outside of spinlock. * But this is easier for now. */ @@ -427,16 +435,17 @@ void tl_clear(struct drbd_conf *mdev) } /* ensure bit indicating barrier is required is clear */ - clear_bit(CREATE_BARRIER, &mdev->flags); + idr_for_each_entry(&tconn->volumes, mdev, minor) + clear_bit(CREATE_BARRIER, &mdev->flags); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&tconn->req_lock); } -void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) +void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what) { - spin_lock_irq(&mdev->tconn->req_lock); - _tl_restart(mdev, what); - spin_unlock_irq(&mdev->tconn->req_lock); + spin_lock_irq(&tconn->req_lock); + _tl_restart(tconn, what); + spin_unlock_irq(&tconn->req_lock); } static int drbd_thread_setup(void *arg) @@ -2199,6 +2208,9 @@ struct drbd_tconn *drbd_new_tconn(char *name) if (!tconn->name) goto fail; + if (!tl_init(tconn)) + goto fail; + tconn->cstate = C_STANDALONE; mutex_init(&tconn->cstate_mutex); spin_lock_init(&tconn->req_lock); @@ -2224,6 +2236,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) return tconn; fail: + tl_cleanup(tconn); kfree(tconn->name); kfree(tconn); @@ -2316,9 +2329,6 @@ struct drbd_conf *drbd_new_device(unsigned int minor) if (drbd_bm_init(mdev)) goto out_no_bitmap; - /* no need to lock access, we are still initializing this minor device. */ - if (!tl_init(mdev)) - goto out_no_tl; mdev->read_requests = RB_ROOT; mdev->write_requests = RB_ROOT; @@ -2334,8 +2344,6 @@ struct drbd_conf *drbd_new_device(unsigned int minor) /* out_whatever_else: kfree(mdev->current_epoch); */ out_no_epoch: - tl_cleanup(mdev); -out_no_tl: drbd_bm_cleanup(mdev); out_no_bitmap: __free_page(mdev->md_io_page); @@ -2357,7 +2365,6 @@ out_no_tconn: void drbd_free_mdev(struct drbd_conf *mdev) { kfree(mdev->current_epoch); - tl_cleanup(mdev); if (mdev->bitmap) /* should no longer be there. */ drbd_bm_cleanup(mdev); __free_page(mdev->md_io_page); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 33159e47e6e0..b141f891f643 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1996,9 +1996,9 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); if (reply->ret_code == SS_SUCCESS) { if (mdev->state.conn < C_CONNECTED) - tl_clear(mdev); + tl_clear(mdev->tconn); if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED) - tl_restart(mdev, FAIL_FROZEN_DISK_IO); + tl_restart(mdev->tconn, FAIL_FROZEN_DISK_IO); } drbd_resume_io(mdev); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 66080e204086..fcdc2c1cc503 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3466,7 +3466,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd, for temporal network outages! */ spin_unlock_irq(&mdev->tconn->req_lock); dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); - tl_clear(mdev); + tl_clear(mdev->tconn); drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0)); @@ -4025,7 +4025,7 @@ static int drbd_disconnected(int vnr, void *p, void *data) mdev->p_uuid = NULL; if (!is_susp(mdev->state)) - tl_clear(mdev); + tl_clear(mdev->tconn); drbd_md_sync(mdev); @@ -4585,7 +4585,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd) { struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack; - tl_release(mdev, p->barrier, be32_to_cpu(p->set_size)); + tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size)); if (mdev->state.conn == C_AHEAD && atomic_read(&mdev->ap_in_flight) == 0 && diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index cfa5fba5303c..fa799e372bab 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -885,7 +885,7 @@ allocate_barrier: * barrier packet, this request is queued within the same spinlock. */ if ((remote || send_oos) && mdev->tconn->unused_spare_tle && test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { - _tl_add_barrier(mdev, mdev->tconn->unused_spare_tle); + _tl_add_barrier(mdev->tconn, mdev->tconn->unused_spare_tle); mdev->tconn->unused_spare_tle = NULL; } else { D_ASSERT(!(remote && rw == WRITE && diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 0b3cd412d52d..8c8c2588c4b9 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -254,7 +254,8 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, extern void complete_master_bio(struct drbd_conf *mdev, struct bio_and_error *m); extern void request_timer_fn(unsigned long data); -extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); +extern void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what); +extern void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what); /* use this if you don't want to deal with calling complete_master_bio() * outside the spinlock, e.g. when walking some list on cleanup. */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 338e1f5c7cd0..ffee90d6d374 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -37,7 +37,6 @@ struct after_state_chg_work { struct completion *done; }; -extern void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what); static int w_after_state_ch(struct drbd_work *w, int unused); static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns, enum chg_state_flags flags); @@ -1009,7 +1008,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (ns.susp_fen) { /* case1: The outdate peer handler is successful: */ if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { - tl_clear(mdev); + tl_clear(mdev->tconn); if (test_bit(NEW_CUR_UUID, &mdev->flags)) { drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); @@ -1028,7 +1027,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (what != NOTHING) { spin_lock_irq(&mdev->tconn->req_lock); - _tl_restart(mdev, what); + _tl_restart(mdev->tconn, what); nsm.i &= mdev->state.i; _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL); spin_unlock_irq(&mdev->tconn->req_lock); From 1aba4d7fcfabe999e0c99683b394aa76d5c42842 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 21 Feb 2011 15:38:08 +0100 Subject: [PATCH 0147/5831] drbd: Preparing the connector interface to operator on connections Up to now it only operated on minor numbers. Now it can work also on named connections. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 15 ++++++ drivers/block/drbd/drbd_nl.c | 96 ++++++++++++++++++++++------------ include/linux/drbd.h | 19 +++++-- 4 files changed, 94 insertions(+), 37 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 48367e53a7a5..033af1995867 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1479,6 +1479,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev); struct drbd_tconn *drbd_new_tconn(char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); +struct drbd_tconn *conn_by_name(const char *name); extern int proc_details; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index cbec5ff2cc74..4761426f9ad7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2196,6 +2196,21 @@ static void drbd_init_workqueue(struct drbd_work_queue* wq) INIT_LIST_HEAD(&wq->q); } +struct drbd_tconn *conn_by_name(const char *name) +{ + struct drbd_tconn *tconn; + + write_lock_irq(&global_state_lock); + list_for_each_entry(tconn, &drbd_tconns, all_tconn) { + if (!strcmp(tconn->name, name)) + goto found; + } + tconn = NULL; +found: + write_unlock_irq(&global_state_lock); + return tconn; +} + struct drbd_tconn *drbd_new_tconn(char *name) { struct drbd_tconn *tconn; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b141f891f643..27a43d138f6b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2184,42 +2184,57 @@ out: return 0; } +enum cn_handler_type { + CHT_MINOR, + CHT_CONN, + CHT_CTOR, + /* CHT_RES, later */ +}; + struct cn_handler_struct { - int (*function)(struct drbd_conf *, - struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); + enum cn_handler_type type; + union { + int (*minor_based)(struct drbd_conf *, + struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + int (*conn_based)(struct drbd_tconn *, + struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + int (*constructor)(struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + }; int reply_body_size; }; static struct cn_handler_struct cnd_table[] = { - [ P_primary ] = { &drbd_nl_primary, 0 }, - [ P_secondary ] = { &drbd_nl_secondary, 0 }, - [ P_disk_conf ] = { &drbd_nl_disk_conf, 0 }, - [ P_detach ] = { &drbd_nl_detach, 0 }, - [ P_net_conf ] = { &drbd_nl_net_conf, 0 }, - [ P_disconnect ] = { &drbd_nl_disconnect, 0 }, - [ P_resize ] = { &drbd_nl_resize, 0 }, - [ P_syncer_conf ] = { &drbd_nl_syncer_conf, 0 }, - [ P_invalidate ] = { &drbd_nl_invalidate, 0 }, - [ P_invalidate_peer ] = { &drbd_nl_invalidate_peer, 0 }, - [ P_pause_sync ] = { &drbd_nl_pause_sync, 0 }, - [ P_resume_sync ] = { &drbd_nl_resume_sync, 0 }, - [ P_suspend_io ] = { &drbd_nl_suspend_io, 0 }, - [ P_resume_io ] = { &drbd_nl_resume_io, 0 }, - [ P_outdate ] = { &drbd_nl_outdate, 0 }, - [ P_get_config ] = { &drbd_nl_get_config, + [ P_primary ] = { CHT_MINOR, { &drbd_nl_primary }, 0 }, + [ P_secondary ] = { CHT_MINOR, { &drbd_nl_secondary }, 0 }, + [ P_disk_conf ] = { CHT_MINOR, { &drbd_nl_disk_conf }, 0 }, + [ P_detach ] = { CHT_MINOR, { &drbd_nl_detach }, 0 }, + [ P_net_conf ] = { CHT_MINOR, { &drbd_nl_net_conf }, 0 }, + [ P_disconnect ] = { CHT_MINOR, { &drbd_nl_disconnect }, 0 }, + [ P_resize ] = { CHT_MINOR, { &drbd_nl_resize }, 0 }, + [ P_syncer_conf ] = { CHT_MINOR, { &drbd_nl_syncer_conf },0 }, + [ P_invalidate ] = { CHT_MINOR, { &drbd_nl_invalidate }, 0 }, + [ P_invalidate_peer ] = { CHT_MINOR, { &drbd_nl_invalidate_peer },0 }, + [ P_pause_sync ] = { CHT_MINOR, { &drbd_nl_pause_sync }, 0 }, + [ P_resume_sync ] = { CHT_MINOR, { &drbd_nl_resume_sync },0 }, + [ P_suspend_io ] = { CHT_MINOR, { &drbd_nl_suspend_io }, 0 }, + [ P_resume_io ] = { CHT_MINOR, { &drbd_nl_resume_io }, 0 }, + [ P_outdate ] = { CHT_MINOR, { &drbd_nl_outdate }, 0 }, + [ P_get_config ] = { CHT_MINOR, { &drbd_nl_get_config }, sizeof(struct syncer_conf_tag_len_struct) + sizeof(struct disk_conf_tag_len_struct) + sizeof(struct net_conf_tag_len_struct) }, - [ P_get_state ] = { &drbd_nl_get_state, + [ P_get_state ] = { CHT_MINOR, { &drbd_nl_get_state }, sizeof(struct get_state_tag_len_struct) + sizeof(struct sync_progress_tag_len_struct) }, - [ P_get_uuids ] = { &drbd_nl_get_uuids, + [ P_get_uuids ] = { CHT_MINOR, { &drbd_nl_get_uuids }, sizeof(struct get_uuids_tag_len_struct) }, - [ P_get_timeout_flag ] = { &drbd_nl_get_timeout_flag, + [ P_get_timeout_flag ] = { CHT_MINOR, { &drbd_nl_get_timeout_flag }, sizeof(struct get_timeout_flag_tag_len_struct)}, - [ P_start_ov ] = { &drbd_nl_start_ov, 0 }, - [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 }, + [ P_start_ov ] = { CHT_MINOR, { &drbd_nl_start_ov }, 0 }, + [ P_new_c_uuid ] = { CHT_MINOR, { &drbd_nl_new_c_uuid }, 0 }, }; static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) @@ -2229,6 +2244,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms struct cn_msg *cn_reply; struct drbd_nl_cfg_reply *reply; struct drbd_conf *mdev; + struct drbd_tconn *tconn; int retcode, rr; int reply_size = sizeof(struct cn_msg) + sizeof(struct drbd_nl_cfg_reply) @@ -2244,13 +2260,6 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms goto fail; } - mdev = ensure_mdev(nlp->drbd_minor, - (nlp->flags & DRBD_NL_CREATE_DEVICE)); - if (!mdev) { - retcode = ERR_MINOR_INVALID; - goto fail; - } - if (nlp->packet_type >= P_nl_after_last_packet || nlp->packet_type == P_return_code_only) { retcode = ERR_PACKET_NR; @@ -2260,7 +2269,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms cm = cnd_table + nlp->packet_type; /* This may happen if packet number is 0: */ - if (cm->function == NULL) { + if (cm->minor_based == NULL) { retcode = ERR_PACKET_NR; goto fail; } @@ -2281,7 +2290,28 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ /* reply->tag_list; might be modified by cm->function. */ - rr = cm->function(mdev, nlp, reply); + retcode = ERR_MINOR_INVALID; + rr = 0; + switch (cm->type) { + case CHT_MINOR: + mdev = minor_to_mdev(nlp->drbd_minor); + if (!mdev) + goto fail; + rr = cm->minor_based(mdev, nlp, reply); + break; + case CHT_CONN: + tconn = conn_by_name(nlp->obj_name); + if (!tconn) { + retcode = ERR_CONN_NOT_KNOWN; + goto fail; + } + rr = cm->conn_based(tconn, nlp, reply); + break; + case CHT_CTOR: + rr = cm->constructor(nlp, reply); + break; + /* case CHT_RES: */ + } cn_reply->id = req->id; cn_reply->seq = req->seq; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 70a688b92c1b..7683b4ab6583 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -155,6 +155,7 @@ enum drbd_ret_code { ERR_CONG_NOT_PROTO_A = 155, ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, + ERR_CONN_NOT_KNOWN = 158, /* insert new ones above this line */ AFTER_LAST_ERR_CODE @@ -347,8 +348,11 @@ enum drbd_timeout_flag { /* Start of the new netlink/connector stuff */ -#define DRBD_NL_CREATE_DEVICE 0x01 -#define DRBD_NL_SET_DEFAULTS 0x02 +enum drbd_ncr_flags { + DRBD_NL_CREATE_DEVICE = 0x01, + DRBD_NL_SET_DEFAULTS = 0x02, +}; +#define DRBD_NL_OBJ_NAME_LEN 32 /* For searching a vacant cn_idx value */ @@ -356,8 +360,15 @@ enum drbd_timeout_flag { struct drbd_nl_cfg_req { int packet_type; - unsigned int drbd_minor; - int flags; + union { + struct { + unsigned int drbd_minor; + enum drbd_ncr_flags flags; + }; + struct { + char obj_name[DRBD_NL_OBJ_NAME_LEN]; + }; + }; unsigned short tag_list[]; }; From 80883197da071239ed9e76bd3b9d8c9c5e19e4e6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 18 Feb 2011 14:56:45 +0100 Subject: [PATCH 0148/5831] drbd: Converted drbd_nl_(net_conf|disconnect)() from mdev to tconn Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 2 + drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_nl.c | 120 ++++++++++++++++----------------- 3 files changed, 63 insertions(+), 61 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 033af1995867..a27e2a4e038d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -170,6 +170,7 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { extern struct drbd_conf **minor_table; extern struct ratelimit_state drbd_ratelimit_state; +extern struct list_head drbd_tconns; /* on the wire */ enum drbd_packet { @@ -1474,6 +1475,7 @@ extern wait_queue_head_t drbd_pp_wait; extern rwlock_t global_state_lock; +extern int conn_lowest_minor(struct drbd_tconn *tconn); extern struct drbd_conf *drbd_new_device(unsigned int minor); extern void drbd_free_mdev(struct drbd_conf *mdev); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4761426f9ad7..2bfd63058f40 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -615,7 +615,7 @@ char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *tas } #ifdef CONFIG_SMP -static int conn_lowest_minor(struct drbd_tconn *tconn) +int conn_lowest_minor(struct drbd_tconn *tconn) { int minor = 0; idr_get_next(&tconn->volumes, &minor); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 27a43d138f6b..455a51dd364d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1326,7 +1326,7 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, return 0; } -static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { int i; @@ -1335,16 +1335,17 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct crypto_hash *tfm = NULL; struct crypto_hash *integrity_w_tfm = NULL; struct crypto_hash *integrity_r_tfm = NULL; - struct drbd_conf *odev; + struct drbd_conf *mdev; char hmac_name[CRYPTO_MAX_ALG_NAME]; void *int_dig_out = NULL; void *int_dig_in = NULL; void *int_dig_vv = NULL; + struct drbd_tconn *oconn; struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr; - conn_reconfig_start(mdev->tconn); + conn_reconfig_start(tconn); - if (mdev->state.conn > C_STANDALONE) { + if (tconn->cstate > C_STANDALONE) { retcode = ERR_NET_CONFIGURED; goto fail; } @@ -1387,13 +1388,25 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, goto fail; } - if (get_ldev(mdev)) { - enum drbd_fencing_p fp = mdev->ldev->dc.fencing; - put_ldev(mdev); - if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) { - retcode = ERR_STONITH_AND_PROT_A; + idr_for_each_entry(&tconn->volumes, mdev, i) { + if (get_ldev(mdev)) { + enum drbd_fencing_p fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) { + retcode = ERR_STONITH_AND_PROT_A; + goto fail; + } + } + if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { + retcode = ERR_DISCARD; goto fail; } + if (!mdev->bitmap) { + if(drbd_bm_init(mdev)) { + retcode = ERR_NOMEM; + goto fail; + } + } } if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) { @@ -1401,31 +1414,25 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, goto fail; } - if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { - retcode = ERR_DISCARD; - goto fail; - } - retcode = NO_ERROR; new_my_addr = (struct sockaddr *)&new_conf->my_addr; new_peer_addr = (struct sockaddr *)&new_conf->peer_addr; - for (i = 0; i < minor_count; i++) { - odev = minor_to_mdev(i); - if (!odev || odev == mdev) + list_for_each_entry(oconn, &drbd_tconns, all_tconn) { + if (oconn == tconn) continue; - if (get_net_conf(odev->tconn)) { - taken_addr = (struct sockaddr *)&odev->tconn->net_conf->my_addr; - if (new_conf->my_addr_len == odev->tconn->net_conf->my_addr_len && + if (get_net_conf(oconn)) { + taken_addr = (struct sockaddr *)&oconn->net_conf->my_addr; + if (new_conf->my_addr_len == oconn->net_conf->my_addr_len && !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len)) retcode = ERR_LOCAL_ADDR; - taken_addr = (struct sockaddr *)&odev->tconn->net_conf->peer_addr; - if (new_conf->peer_addr_len == odev->tconn->net_conf->peer_addr_len && + taken_addr = (struct sockaddr *)&oconn->net_conf->peer_addr; + if (new_conf->peer_addr_len == oconn->net_conf->peer_addr_len && !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len)) retcode = ERR_PEER_ADDR; - put_net_conf(odev->tconn); + put_net_conf(oconn); if (retcode != NO_ERROR) goto fail; } @@ -1470,6 +1477,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; + /* allocation not in the IO path, cqueue thread context */ if (integrity_w_tfm) { i = crypto_hash_digestsize(integrity_w_tfm); int_dig_out = kmalloc(i, GFP_KERNEL); @@ -1489,46 +1497,40 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } } - if (!mdev->bitmap) { - if(drbd_bm_init(mdev)) { - retcode = ERR_NOMEM; - goto fail; - } - } - - drbd_flush_workqueue(mdev); - spin_lock_irq(&mdev->tconn->req_lock); - if (mdev->tconn->net_conf != NULL) { + conn_flush_workqueue(tconn); + spin_lock_irq(&tconn->req_lock); + if (tconn->net_conf != NULL) { retcode = ERR_NET_CONFIGURED; - spin_unlock_irq(&mdev->tconn->req_lock); + spin_unlock_irq(&tconn->req_lock); goto fail; } - mdev->tconn->net_conf = new_conf; + tconn->net_conf = new_conf; - mdev->send_cnt = 0; - mdev->recv_cnt = 0; + crypto_free_hash(tconn->cram_hmac_tfm); + tconn->cram_hmac_tfm = tfm; - crypto_free_hash(mdev->tconn->cram_hmac_tfm); - mdev->tconn->cram_hmac_tfm = tfm; + crypto_free_hash(tconn->integrity_w_tfm); + tconn->integrity_w_tfm = integrity_w_tfm; - crypto_free_hash(mdev->tconn->integrity_w_tfm); - mdev->tconn->integrity_w_tfm = integrity_w_tfm; + crypto_free_hash(tconn->integrity_r_tfm); + tconn->integrity_r_tfm = integrity_r_tfm; - crypto_free_hash(mdev->tconn->integrity_r_tfm); - mdev->tconn->integrity_r_tfm = integrity_r_tfm; + kfree(tconn->int_dig_out); + kfree(tconn->int_dig_in); + kfree(tconn->int_dig_vv); + tconn->int_dig_out=int_dig_out; + tconn->int_dig_in=int_dig_in; + tconn->int_dig_vv=int_dig_vv; + retcode = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); + spin_unlock_irq(&tconn->req_lock); - kfree(mdev->tconn->int_dig_out); - kfree(mdev->tconn->int_dig_in); - kfree(mdev->tconn->int_dig_vv); - mdev->tconn->int_dig_out=int_dig_out; - mdev->tconn->int_dig_in=int_dig_in; - mdev->tconn->int_dig_vv=int_dig_vv; - retcode = _conn_request_state(mdev->tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE); - spin_unlock_irq(&mdev->tconn->req_lock); - - kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); + idr_for_each_entry(&tconn->volumes, mdev, i) { + mdev->send_cnt = 0; + mdev->recv_cnt = 0; + kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); + } reply->ret_code = retcode; - conn_reconfig_done(mdev->tconn); + conn_reconfig_done(tconn); return 0; fail: @@ -1541,14 +1543,13 @@ fail: kfree(new_conf); reply->ret_code = retcode; - conn_reconfig_done(mdev->tconn); + conn_reconfig_done(tconn); return 0; } -static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, +static int drbd_nl_disconnect(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { - struct drbd_tconn *tconn = mdev->tconn; int retcode; struct disconnect dc; @@ -1600,7 +1601,6 @@ static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl done: retcode = NO_ERROR; fail: - drbd_md_sync(mdev); reply->ret_code = retcode; return 0; } @@ -2211,8 +2211,8 @@ static struct cn_handler_struct cnd_table[] = { [ P_secondary ] = { CHT_MINOR, { &drbd_nl_secondary }, 0 }, [ P_disk_conf ] = { CHT_MINOR, { &drbd_nl_disk_conf }, 0 }, [ P_detach ] = { CHT_MINOR, { &drbd_nl_detach }, 0 }, - [ P_net_conf ] = { CHT_MINOR, { &drbd_nl_net_conf }, 0 }, - [ P_disconnect ] = { CHT_MINOR, { &drbd_nl_disconnect }, 0 }, + [ P_net_conf ] = { CHT_CONN, { .conn_based = &drbd_nl_net_conf }, 0 }, + [ P_disconnect ] = { CHT_CONN, { .conn_based = &drbd_nl_disconnect }, 0 }, [ P_resize ] = { CHT_MINOR, { &drbd_nl_resize }, 0 }, [ P_syncer_conf ] = { CHT_MINOR, { &drbd_nl_syncer_conf },0 }, [ P_invalidate ] = { CHT_MINOR, { &drbd_nl_invalidate }, 0 }, From 774b305518a68a50df4f479bcf79da2add724e6e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 22 Feb 2011 02:07:03 -0500 Subject: [PATCH 0149/5831] drbd: Implemented new commands to create/delete connections/minors Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_main.c | 66 +++++++++++--------- drivers/block/drbd/drbd_nl.c | 106 ++++++++++++++++++++------------- include/linux/drbd.h | 3 + include/linux/drbd_nl.h | 12 ++++ 5 files changed, 119 insertions(+), 72 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a27e2a4e038d..535d503886d8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1258,7 +1258,6 @@ extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); extern void drbd_go_diskless(struct drbd_conf *mdev); extern void drbd_ldev_destroy(struct drbd_conf *mdev); - /* Meta data layout We reserve a 128MB Block (4k aligned) * either at the end of the backing device @@ -1476,8 +1475,9 @@ extern wait_queue_head_t drbd_pp_wait; extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_tconn *tconn); -extern struct drbd_conf *drbd_new_device(unsigned int minor); +enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr); extern void drbd_free_mdev(struct drbd_conf *mdev); +extern void drbd_delete_device(unsigned int minor); struct drbd_tconn *drbd_new_tconn(char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2bfd63058f40..ec7d0d98657c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -614,13 +614,16 @@ char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *tas return thi ? thi->name : task->comm; } -#ifdef CONFIG_SMP int conn_lowest_minor(struct drbd_tconn *tconn) { int minor = 0; - idr_get_next(&tconn->volumes, &minor); + + if (!idr_get_next(&tconn->volumes, &minor)) + return -1; return minor; } + +#ifdef CONFIG_SMP /** * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs * @mdev: DRBD device. @@ -2078,15 +2081,16 @@ static void drbd_release_ee_lists(struct drbd_conf *mdev) dev_err(DEV, "%d EEs in net list found!\n", rr); } -/* caution. no locking. - * currently only used from module cleanup code. */ -static void drbd_delete_device(unsigned int minor) +/* caution. no locking. */ +void drbd_delete_device(unsigned int minor) { struct drbd_conf *mdev = minor_to_mdev(minor); if (!mdev) return; + idr_remove(&mdev->tconn->volumes, minor); + /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); D_ASSERT(list_empty(&mdev->tconn->data.work.q)); @@ -2101,7 +2105,6 @@ static void drbd_delete_device(unsigned int minor) bdput(mdev->this_bdev); drbd_free_resources(mdev); - drbd_free_tconn(mdev->tconn); drbd_release_ee_lists(mdev); @@ -2223,6 +2226,9 @@ struct drbd_tconn *drbd_new_tconn(char *name) if (!tconn->name) goto fail; + if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL)) + goto fail; + if (!tl_init(tconn)) goto fail; @@ -2252,6 +2258,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) fail: tl_cleanup(tconn); + free_cpumask_var(tconn->cpu_mask); kfree(tconn->name); kfree(tconn); @@ -2265,6 +2272,7 @@ void drbd_free_tconn(struct drbd_tconn *tconn) write_unlock_irq(&global_state_lock); idr_destroy(&tconn->volumes); + free_cpumask_var(tconn->cpu_mask); kfree(tconn->name); kfree(tconn->int_dig_out); kfree(tconn->int_dig_in); @@ -2272,32 +2280,31 @@ void drbd_free_tconn(struct drbd_tconn *tconn) kfree(tconn); } -struct drbd_conf *drbd_new_device(unsigned int minor) +enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr) { struct drbd_conf *mdev; struct gendisk *disk; struct request_queue *q; - char conn_name[9]; /* drbd1234N */ - int vnr; + int vnr_got = vnr; + + mdev = minor_to_mdev(minor); + if (mdev) + return ERR_MINOR_EXISTS; /* GFP_KERNEL, we are outside of all write-out paths */ mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); if (!mdev) - return NULL; - sprintf(conn_name, "drbd%d", minor); - mdev->tconn = drbd_new_tconn(conn_name); - if (!mdev->tconn) - goto out_no_tconn; - if (!idr_pre_get(&mdev->tconn->volumes, GFP_KERNEL)) - goto out_no_cpumask; - if (idr_get_new(&mdev->tconn->volumes, mdev, &vnr)) - goto out_no_cpumask; - if (vnr != 0) { - dev_err(DEV, "vnr = %d\n", vnr); - goto out_no_cpumask; + return ERR_NOMEM; + + mdev->tconn = tconn; + if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) + goto out_no_idr; + if (idr_get_new(&tconn->volumes, mdev, &vnr_got)) + goto out_no_idr; + if (vnr_got != vnr) { + dev_err(DEV, "vnr_got (%d) != vnr (%d)\n", vnr_got, vnr); + goto out_no_q; } - if (!zalloc_cpumask_var(&mdev->tconn->cpu_mask, GFP_KERNEL)) - goto out_no_cpumask; mdev->minor = minor; @@ -2354,7 +2361,10 @@ struct drbd_conf *drbd_new_device(unsigned int minor) INIT_LIST_HEAD(&mdev->current_epoch->list); mdev->epochs = 1; - return mdev; + minor_table[minor] = mdev; + add_disk(disk); + + return NO_ERROR; /* out_whatever_else: kfree(mdev->current_epoch); */ @@ -2367,12 +2377,10 @@ out_no_io_page: out_no_disk: blk_cleanup_queue(q); out_no_q: - free_cpumask_var(mdev->tconn->cpu_mask); -out_no_cpumask: - drbd_free_tconn(mdev->tconn); -out_no_tconn: + idr_remove(&tconn->volumes, vnr_got); +out_no_idr: kfree(mdev); - return NULL; + return ERR_NOMEM; } /* counterpart of drbd_new_device. diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 455a51dd364d..f2739fd188a0 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -443,40 +443,6 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) return rv; } -static struct drbd_conf *ensure_mdev(int minor, int create) -{ - struct drbd_conf *mdev; - - if (minor >= minor_count) - return NULL; - - mdev = minor_to_mdev(minor); - - if (!mdev && create) { - struct gendisk *disk = NULL; - mdev = drbd_new_device(minor); - - spin_lock_irq(&drbd_pp_lock); - if (minor_table[minor] == NULL) { - minor_table[minor] = mdev; - disk = mdev->vdisk; - mdev = NULL; - } /* else: we lost the race */ - spin_unlock_irq(&drbd_pp_lock); - - if (disk) /* we won the race above */ - /* in case we ever add a drbd_delete_device(), - * don't forget the del_gendisk! */ - add_disk(disk); - else /* we lost the race above */ - drbd_free_mdev(mdev); - - mdev = minor_to_mdev(minor); - } - - return mdev; -} - static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { @@ -1789,12 +1755,6 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) sc.al_extents = DRBD_AL_EXTENTS_MAX; - /* to avoid spurious errors when configuring minors before configuring - * the minors they depend on: if necessary, first create the minor we - * depend on */ - if (sc.after >= 0) - ensure_mdev(sc.after, 1); - /* most sanity checks done, try to assign the new sync-after * dependency. need to hold the global lock in there, * to avoid a race in the dependency loop check. */ @@ -2184,13 +2144,73 @@ out: return 0; } +static int drbd_nl_new_conn(struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + struct new_connection args; + + if (!new_connection_from_tags(nlp->tag_list, &args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + + reply->ret_code = NO_ERROR; + if (!drbd_new_tconn(args.name)) + reply->ret_code = ERR_NOMEM; + + return 0; +} + +static int drbd_nl_new_minor(struct drbd_tconn *tconn, + struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + struct new_minor args; + + args.vol_nr = 0; + args.minor = 0; + + if (!new_minor_from_tags(nlp->tag_list, &args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + + reply->ret_code = conn_new_minor(tconn, args.minor, args.vol_nr); + + return 0; +} + +static int drbd_nl_del_minor(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + if (mdev->state.disk == D_DISKLESS && + mdev->state.conn == C_STANDALONE && + mdev->state.role == R_SECONDARY) { + drbd_delete_device(mdev_to_minor(mdev)); + reply->ret_code = NO_ERROR; + } else { + reply->ret_code = ERR_MINOR_CONFIGURED; + } + return 0; +} + +static int drbd_nl_del_conn(struct drbd_tconn *tconn, + struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + if (conn_lowest_minor(tconn) < 0) { + drbd_free_tconn(tconn); + reply->ret_code = NO_ERROR; + } else { + reply->ret_code = ERR_CONN_IN_USE; + } + + return 0; +} + enum cn_handler_type { CHT_MINOR, CHT_CONN, CHT_CTOR, /* CHT_RES, later */ }; - struct cn_handler_struct { enum cn_handler_type type; union { @@ -2235,6 +2255,10 @@ static struct cn_handler_struct cnd_table[] = { sizeof(struct get_timeout_flag_tag_len_struct)}, [ P_start_ov ] = { CHT_MINOR, { &drbd_nl_start_ov }, 0 }, [ P_new_c_uuid ] = { CHT_MINOR, { &drbd_nl_new_c_uuid }, 0 }, + [ P_new_connection ] = { CHT_CTOR, { .constructor = &drbd_nl_new_conn }, 0 }, + [ P_new_minor ] = { CHT_CONN, { .conn_based = &drbd_nl_new_minor }, 0 }, + [ P_del_minor ] = { CHT_MINOR, { &drbd_nl_del_minor }, 0 }, + [ P_del_connection ] = { CHT_CONN, { .conn_based = &drbd_nl_del_conn }, 0 }, }; static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 7683b4ab6583..e192167e6145 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -156,6 +156,9 @@ enum drbd_ret_code { ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, ERR_CONN_NOT_KNOWN = 158, + ERR_CONN_IN_USE = 159, + ERR_MINOR_CONFIGURED = 160, + ERR_MINOR_EXISTS = 161, /* insert new ones above this line */ AFTER_LAST_ERR_CODE diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index ab6159e4fcf0..1216c7a432c5 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -152,6 +152,18 @@ NL_PACKET(new_c_uuid, 26, NL_RESPONSE(return_code_only, 27) #endif +NL_PACKET(new_connection, 28, /* CHT_CTOR */ + NL_STRING( 85, T_MANDATORY, name, DRBD_NL_OBJ_NAME_LEN) +) + +NL_PACKET(new_minor, 29, /* CHT_CONN */ + NL_INTEGER( 86, T_MANDATORY, minor) + NL_INTEGER( 87, T_MANDATORY, vol_nr) +) + +NL_PACKET(del_minor, 30, ) /* CHT_MINOR */ +NL_PACKET(del_connection, 31, ) /* CHT_CONN */ + #undef NL_PACKET #undef NL_INTEGER #undef NL_INT64 From 81a5d60ecfe1d94627abb54810445f0fd5892f42 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 22 Feb 2011 19:53:16 -0500 Subject: [PATCH 0150/5831] drbd: Replaced the minor_table array by an idr Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 ++---- drivers/block/drbd/drbd_main.c | 42 ++++++++++++++++-------------- drivers/block/drbd/drbd_proc.c | 14 +++------- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/drbd/drbd_worker.c | 10 ++----- 5 files changed, 31 insertions(+), 45 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 535d503886d8..783526ab7b20 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -168,8 +168,8 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) { /* 4th incarnation of the disk layout. */ #define DRBD_MD_MAGIC (DRBD_MAGIC+4) -extern struct drbd_conf **minor_table; extern struct ratelimit_state drbd_ratelimit_state; +extern struct idr minors; extern struct list_head drbd_tconns; /* on the wire */ @@ -1109,11 +1109,7 @@ struct drbd_conf { static inline struct drbd_conf *minor_to_mdev(unsigned int minor) { - struct drbd_conf *mdev; - - mdev = minor < minor_count ? minor_table[minor] : NULL; - - return mdev; + return (struct drbd_conf *)idr_find(&minors, minor); } static inline unsigned int mdev_to_minor(struct drbd_conf *mdev) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ec7d0d98657c..6e190c0c9f6c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -74,7 +74,7 @@ MODULE_AUTHOR("Philipp Reisner , " MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION); MODULE_VERSION(REL_VERSION); MODULE_LICENSE("GPL"); -MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (" +MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices (" __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")"); MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR); @@ -120,7 +120,7 @@ module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0 /* in 2.6.x, our device mapping and config info contains our virtual gendisks * as member "struct gendisk *vdisk;" */ -struct drbd_conf **minor_table; +struct idr minors; struct list_head drbd_tconns; /* list of struct drbd_tconn */ struct kmem_cache *drbd_request_cache; @@ -2118,11 +2118,13 @@ void drbd_delete_device(unsigned int minor) * allocated from drbd_new_device * and actually free the mdev itself */ drbd_free_mdev(mdev); + idr_remove(&minors, minor); } static void drbd_cleanup(void) { unsigned int i; + struct drbd_conf *mdev; unregister_reboot_notifier(&drbd_notifier); @@ -2139,17 +2141,13 @@ static void drbd_cleanup(void) drbd_nl_cleanup(); - if (minor_table) { - i = minor_count; - while (i--) - drbd_delete_device(i); - drbd_destroy_mempools(); - } - - kfree(minor_table); - + idr_for_each_entry(&minors, mdev, i) + drbd_delete_device(i); + drbd_destroy_mempools(); unregister_blkdev(DRBD_MAJOR, "drbd"); + idr_destroy(&minors); + printk(KERN_INFO "drbd: module cleanup done.\n"); } @@ -2286,6 +2284,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, struct gendisk *disk; struct request_queue *q; int vnr_got = vnr; + int minor_got = minor; mdev = minor_to_mdev(minor); if (mdev) @@ -2361,13 +2360,20 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, INIT_LIST_HEAD(&mdev->current_epoch->list); mdev->epochs = 1; - minor_table[minor] = mdev; + if (!idr_pre_get(&minors, GFP_KERNEL)) + goto out_no_minor_idr; + if (idr_get_new(&minors, mdev, &minor_got)) + goto out_no_minor_idr; + if (minor_got != minor) { + idr_remove(&minors, minor_got); + goto out_no_minor_idr; + } add_disk(disk); return NO_ERROR; -/* out_whatever_else: - kfree(mdev->current_epoch); */ +out_no_minor_idr: + kfree(mdev->current_epoch); out_no_epoch: drbd_bm_cleanup(mdev); out_no_bitmap: @@ -2406,7 +2412,7 @@ int __init drbd_init(void) if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { printk(KERN_ERR - "drbd: invalid minor_count (%d)\n", minor_count); + "drbd: invalid minor_count (%d)\n", minor_count); #ifdef MODULE return -EINVAL; #else @@ -2436,10 +2442,7 @@ int __init drbd_init(void) init_waitqueue_head(&drbd_pp_wait); drbd_proc = NULL; /* play safe for drbd_cleanup */ - minor_table = kzalloc(sizeof(struct drbd_conf *)*minor_count, - GFP_KERNEL); - if (!minor_table) - goto Enomem; + idr_init(&minors); err = drbd_create_mempools(); if (err) @@ -2460,7 +2463,6 @@ int __init drbd_init(void) printk(KERN_INFO "drbd: %s\n", drbd_buildtag()); printk(KERN_INFO "drbd: registered as block device major %d\n", DRBD_MAJOR); - printk(KERN_INFO "drbd: minor_table @ 0x%p\n", minor_table); return 0; /* Success! */ diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 4e53cb3d99e7..36c9a6cecdc6 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -194,7 +194,7 @@ static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) static int drbd_seq_show(struct seq_file *seq, void *v) { - int i, hole = 0; + int i, prev_i = -1; const char *sn; struct drbd_conf *mdev; @@ -227,16 +227,10 @@ static int drbd_seq_show(struct seq_file *seq, void *v) oos .. known out-of-sync kB */ - for (i = 0; i < minor_count; i++) { - mdev = minor_to_mdev(i); - if (!mdev) { - hole = 1; - continue; - } - if (hole) { - hole = 0; + idr_for_each_entry(&minors, mdev, i) { + if (prev_i != i - 1) seq_printf(seq, "\n"); - } + prev_i = i; sn = drbd_conn_str(mdev->state.conn); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fcdc2c1cc503..e44bf3c25718 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -284,7 +284,7 @@ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net) atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; int i; - if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count) + if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count) i = page_chain_free(page); else { struct page *tmp; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 5cb5ffce097c..e459cb2076bd 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1349,10 +1349,7 @@ static int _drbd_pause_after(struct drbd_conf *mdev) struct drbd_conf *odev; int i, rv = 0; - for (i = 0; i < minor_count; i++) { - odev = minor_to_mdev(i); - if (!odev) - continue; + idr_for_each_entry(&minors, odev, i) { if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) continue; if (!_drbd_may_sync_now(odev)) @@ -1374,10 +1371,7 @@ static int _drbd_resume_next(struct drbd_conf *mdev) struct drbd_conf *odev; int i, rv = 0; - for (i = 0; i < minor_count; i++) { - odev = minor_to_mdev(i); - if (!odev) - continue; + idr_for_each_entry(&minors, odev, i) { if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS) continue; if (odev->state.aftr_isp) { From 2c4a48d097e511e325b63c9caca3a9b94fe03be4 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 17:18:24 +0100 Subject: [PATCH 0151/5831] drbd: remove unused define Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6e190c0c9f6c..12c9a704ea01 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -149,8 +149,6 @@ static const struct block_device_operations drbd_ops = { .release = drbd_release, }; -#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0])) - #ifdef __CHECKER__ /* When checking with sparse, and this is an inline function, sparse will give tons of false positives. When this is a real functions sparse works. From a5df0e199cf6b31400fa86f6c3f73fa6e127e9ed Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 12:51:43 +0100 Subject: [PATCH 0152/5831] drbd: default to detach on-io-error Old default behaviour was "pass-on", which is not useful in production at all. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 75f05af33725..22920a8af4e2 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -125,7 +125,7 @@ #define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40)) #define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ -#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON +#define DRBD_ON_IO_ERROR_DEF EP_DETACH #define DRBD_FENCING_DEF FP_DONT_CARE #define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT From 3c13b680ce210313c6f7ad163435b62979958c09 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 16:10:01 +0100 Subject: [PATCH 0153/5831] drbd: only wakeup if something changed in update_peer_seq This commit got it wrong: drbd: Make the peer_seq updating code more obvious Make it more clear that update_peer_seq() is supposed to wake up the seq_wait queue whenever the sequence number changes. We don't need to wake up everytime we receive a sequence number that is _different_ from our currently stored "newest" sequence number, but only if we receive a sequence number _newer_ than what we already have, when we actually change mdev->peer_seq. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e44bf3c25718..cd78ebfefe52 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1734,14 +1734,15 @@ static bool need_peer_seq(struct drbd_conf *mdev) static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq) { - unsigned int old_peer_seq; + unsigned int newest_peer_seq; if (need_peer_seq(mdev)) { spin_lock(&mdev->peer_seq_lock); - old_peer_seq = mdev->peer_seq; - mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq); + newest_peer_seq = seq_max(mdev->peer_seq, peer_seq); + mdev->peer_seq = newest_peer_seq; spin_unlock(&mdev->peer_seq_lock); - if (old_peer_seq != peer_seq) + /* wake up only if we actually changed mdev->peer_seq */ + if (peer_seq == newest_peer_seq) wake_up(&mdev->seq_wait); } } From 35abf5942427f5062e4aae90dab9edb9dda8d200 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 12:39:46 +0100 Subject: [PATCH 0154/5831] drbd: add page pool to be used for meta data IO Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 23 ++++++++++++++++++++++- drivers/block/drbd/drbd_main.c | 9 +++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 783526ab7b20..2444a1683475 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1463,11 +1463,32 @@ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ extern mempool_t *drbd_request_mempool; extern mempool_t *drbd_ee_mempool; -extern struct page *drbd_pp_pool; /* drbd's page pool */ +/* drbd's page pool, used to buffer data received from the peer, + * or data requested by the peer. + * + * This does not have an emergency reserve. + * + * When allocating from this pool, it first takes pages from the pool. + * Only if the pool is depleted will try to allocate from the system. + * + * The assumption is that pages taken from this pool will be processed, + * and given back, "quickly", and then can be recycled, so we can avoid + * frequent calls to alloc_page(), and still will be able to make progress even + * under memory pressure. + */ +extern struct page *drbd_pp_pool; extern spinlock_t drbd_pp_lock; extern int drbd_pp_vacant; extern wait_queue_head_t drbd_pp_wait; +/* We also need a standard (emergency-reserve backed) page pool + * for meta data IO (activity log, bitmap). + * We can keep it global, as long as it is used as "N pages at a time". + * 128 should be plenty, currently we probably can get away with as few as 1. + */ +#define DRBD_MIN_POOL_PAGES 128 +extern mempool_t *drbd_md_io_page_pool; + extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 12c9a704ea01..5f4c95905d5e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -129,6 +129,7 @@ struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t *drbd_request_mempool; mempool_t *drbd_ee_mempool; +mempool_t *drbd_md_io_page_pool; /* I do not use a standard mempool, because: 1) I want to hand out the pre-allocated objects first. @@ -1952,6 +1953,8 @@ static void drbd_destroy_mempools(void) /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ + if (drbd_md_io_page_pool) + mempool_destroy(drbd_md_io_page_pool); if (drbd_ee_mempool) mempool_destroy(drbd_ee_mempool); if (drbd_request_mempool) @@ -1965,6 +1968,7 @@ static void drbd_destroy_mempools(void) if (drbd_al_ext_cache) kmem_cache_destroy(drbd_al_ext_cache); + drbd_md_io_page_pool = NULL; drbd_ee_mempool = NULL; drbd_request_mempool = NULL; drbd_ee_cache = NULL; @@ -1988,6 +1992,7 @@ static int drbd_create_mempools(void) drbd_bm_ext_cache = NULL; drbd_al_ext_cache = NULL; drbd_pp_pool = NULL; + drbd_md_io_page_pool = NULL; /* caches */ drbd_request_cache = kmem_cache_create( @@ -2011,6 +2016,10 @@ static int drbd_create_mempools(void) goto Enomem; /* mempools */ + drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0); + if (drbd_md_io_page_pool == NULL) + goto Enomem; + drbd_request_mempool = mempool_create(number, mempool_alloc_slab, mempool_free_slab, drbd_request_cache); if (drbd_request_mempool == NULL) From 9db4e77f8cbbeeb32a4d2aea022c80333c445984 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 15:38:47 +0100 Subject: [PATCH 0155/5831] drbd: use the newly introduced page pool for bitmap IO Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 3791082979e1..0009e40744ab 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -963,9 +963,8 @@ static void bm_async_io_complete(struct bio *bio, int error) bm_page_unlock_io(mdev, idx); - /* FIXME give back to page pool */ if (ctx->flags & BM_AIO_COPY_PAGES) - put_page(bio->bi_io_vec[0].bv_page); + mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool); bio_put(bio); @@ -999,10 +998,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must bm_set_page_unchanged(b->bm_pages[page_nr]); if (ctx->flags & BM_AIO_COPY_PAGES) { - /* FIXME alloc_page is good enough for now, but actually needs - * to use pre-allocated page pool */ void *src, *dest; - page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); + page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT); dest = kmap_atomic(page, KM_USER0); src = kmap_atomic(b->bm_pages[page_nr], KM_USER1); memcpy(dest, src, PAGE_SIZE); @@ -1014,6 +1011,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must bio->bi_bdev = mdev->ldev->md_bdev; bio->bi_sector = on_disk_sector; + /* bio_add_page of a single page to an empty bio will always succeed, + * according to api. Do we want to assert that? */ bio_add_page(bio, page, len, 0); bio->bi_private = ctx; bio->bi_end_io = bm_async_io_complete; From da4a75d2ef064501f6756986af6ea330ba0585d7 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 17:02:01 +0100 Subject: [PATCH 0156/5831] drbd: introduce a bio_set to allocate housekeeping bios from Don't rely on availability of bios from the global fs_bio_set, we should use our own bio_set for meta data IO. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 2 +- drivers/block/drbd/drbd_bitmap.c | 3 +-- drivers/block/drbd/drbd_int.h | 6 ++++++ drivers/block/drbd/drbd_main.c | 28 ++++++++++++++++++++++++++++ drivers/block/drbd/drbd_receiver.c | 6 +++++- 5 files changed, 41 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index ea3895de4e6d..7cd78617669b 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -125,7 +125,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, rw |= REQ_FUA | REQ_FLUSH; rw |= REQ_SYNC; - bio = bio_alloc(GFP_NOIO, 1); + bio = bio_alloc_drbd(GFP_NOIO); bio->bi_bdev = bdev->md_bdev; bio->bi_sector = sector; ok = (bio_add_page(bio, page, size, 0) == size); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 0009e40744ab..52c48143b22a 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -974,8 +974,7 @@ static void bm_async_io_complete(struct bio *bio, int error) static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) { - /* we are process context. we always get a bio */ - struct bio *bio = bio_alloc(GFP_KERNEL, 1); + struct bio *bio = bio_alloc_drbd(GFP_KERNEL); struct drbd_conf *mdev = ctx->mdev; struct drbd_bitmap *b = mdev->bitmap; struct page *page; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2444a1683475..e68758344647 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1489,6 +1489,12 @@ extern wait_queue_head_t drbd_pp_wait; #define DRBD_MIN_POOL_PAGES 128 extern mempool_t *drbd_md_io_page_pool; +/* We also need to make sure we get a bio + * when we need it for housekeeping purposes */ +extern struct bio_set *drbd_md_io_bio_set; +/* to allocate from that set */ +extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); + extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 5f4c95905d5e..997b2e214670 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -130,6 +130,7 @@ struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ mempool_t *drbd_request_mempool; mempool_t *drbd_ee_mempool; mempool_t *drbd_md_io_page_pool; +struct bio_set *drbd_md_io_bio_set; /* I do not use a standard mempool, because: 1) I want to hand out the pre-allocated objects first. @@ -150,6 +151,25 @@ static const struct block_device_operations drbd_ops = { .release = drbd_release, }; +static void bio_destructor_drbd(struct bio *bio) +{ + bio_free(bio, drbd_md_io_bio_set); +} + +struct bio *bio_alloc_drbd(gfp_t gfp_mask) +{ + struct bio *bio; + + if (!drbd_md_io_bio_set) + return bio_alloc(gfp_mask, 1); + + bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); + if (!bio) + return NULL; + bio->bi_destructor = bio_destructor_drbd; + return bio; +} + #ifdef __CHECKER__ /* When checking with sparse, and this is an inline function, sparse will give tons of false positives. When this is a real functions sparse works. @@ -1953,6 +1973,8 @@ static void drbd_destroy_mempools(void) /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ + if (drbd_md_io_bio_set) + bioset_free(drbd_md_io_bio_set); if (drbd_md_io_page_pool) mempool_destroy(drbd_md_io_page_pool); if (drbd_ee_mempool) @@ -1968,6 +1990,7 @@ static void drbd_destroy_mempools(void) if (drbd_al_ext_cache) kmem_cache_destroy(drbd_al_ext_cache); + drbd_md_io_bio_set = NULL; drbd_md_io_page_pool = NULL; drbd_ee_mempool = NULL; drbd_request_mempool = NULL; @@ -1993,6 +2016,7 @@ static int drbd_create_mempools(void) drbd_al_ext_cache = NULL; drbd_pp_pool = NULL; drbd_md_io_page_pool = NULL; + drbd_md_io_bio_set = NULL; /* caches */ drbd_request_cache = kmem_cache_create( @@ -2016,6 +2040,10 @@ static int drbd_create_mempools(void) goto Enomem; /* mempools */ + drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0); + if (drbd_md_io_bio_set == NULL) + goto Enomem; + drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0); if (drbd_md_io_page_pool == NULL) goto Enomem; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index cd78ebfefe52..6dcf65484c26 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1124,7 +1124,11 @@ int drbd_submit_peer_request(struct drbd_conf *mdev, /* In most cases, we will only need one bio. But in case the lower * level restrictions happen to be different at this offset on this * side than those of the sending peer, we may need to submit the - * request in more than one bio. */ + * request in more than one bio. + * + * Plain bio_alloc is good enough here, this is no DRBD internally + * generated bio, but a bio allocated on behalf of the peer. + */ next_bio: bio = bio_alloc(GFP_NOIO, nr_pages); if (!bio) { From 569083c08dc16c043b4bdd473d41ff85a2b2df9e Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 09:49:02 +0100 Subject: [PATCH 0157/5831] drbd: fix drbd_delete_device: remove vnr from volumes; idr_remove(); synchronize_rcu(); before cleanup Still missing: rcu_readlock() on the various call sites that access/iterate over those idrs. We don't need a specific write lock, as we only modify from configuration context, which is already strictly serialized. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 42 +++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 997b2e214670..9f6db5947c65 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2124,7 +2124,9 @@ void drbd_delete_device(unsigned int minor) if (!mdev) return; - idr_remove(&mdev->tconn->volumes, minor); + idr_remove(&mdev->tconn->volumes, mdev->vnr); + idr_remove(&minors, minor); + synchronize_rcu(); /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); @@ -2153,7 +2155,6 @@ void drbd_delete_device(unsigned int minor) * allocated from drbd_new_device * and actually free the mdev itself */ drbd_free_mdev(mdev); - idr_remove(&minors, minor); } static void drbd_cleanup(void) @@ -2331,15 +2332,6 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, return ERR_NOMEM; mdev->tconn = tconn; - if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) - goto out_no_idr; - if (idr_get_new(&tconn->volumes, mdev, &vnr_got)) - goto out_no_idr; - if (vnr_got != vnr) { - dev_err(DEV, "vnr_got (%d) != vnr (%d)\n", vnr_got, vnr); - goto out_no_q; - } - mdev->minor = minor; drbd_init_set_defaults(mdev); @@ -2395,19 +2387,35 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, INIT_LIST_HEAD(&mdev->current_epoch->list); mdev->epochs = 1; + if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) + goto out_no_vol_idr; + if (idr_get_new(&tconn->volumes, mdev, &vnr_got)) + goto out_no_vol_idr; + if (vnr_got != vnr) { + dev_err(DEV, "vnr_got (%d) != vnr (%d)\n", vnr_got, vnr); + goto out_idr_remove_vol; + } + if (!idr_pre_get(&minors, GFP_KERNEL)) - goto out_no_minor_idr; + goto out_idr_remove_vol; if (idr_get_new(&minors, mdev, &minor_got)) - goto out_no_minor_idr; + goto out_idr_remove_vol; if (minor_got != minor) { - idr_remove(&minors, minor_got); - goto out_no_minor_idr; + /* minor exists, or other idr strangeness? */ + dev_err(DEV, "available minor (%d) != requested minor (%d)\n", + minor_got, minor); + goto out_idr_remove_minor; } add_disk(disk); return NO_ERROR; -out_no_minor_idr: +out_idr_remove_minor: + idr_remove(&minors, minor_got); +out_idr_remove_vol: + idr_remove(&tconn->volumes, vnr_got); + synchronize_rcu(); +out_no_vol_idr: kfree(mdev->current_epoch); out_no_epoch: drbd_bm_cleanup(mdev); @@ -2418,8 +2426,6 @@ out_no_io_page: out_no_disk: blk_cleanup_queue(q); out_no_q: - idr_remove(&tconn->volumes, vnr_got); -out_no_idr: kfree(mdev); return ERR_NOMEM; } From 3cb7a2a90fe35eb3059e8860d0c6917eb414f791 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 10:00:58 +0100 Subject: [PATCH 0158/5831] drbd: get rid of drbd_bcast_ee, it is of no use anymore This function was used to broadcast the (leading part of the) bio payload in case we see a data integrity error. It could be received from userland with the drbdsetup events subcommand, to have a peek into the payload that caused the checksum mismatch, and guess from there what may have caused the mismatch, mainly to guess wether it was modification of in-flight data, or data corruption by broken hardware or software bugs. Meanwhile we support bios that are larger than the maximum payload a netlink datagram can carry. And we have means to reliably detect modification of in-flight data by calculating, and comparing, the checksum before and after sendmsg. There is no need to carry this around anymore. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6dcf65484c26..1aace37c5160 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1353,8 +1353,6 @@ read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, if (memcmp(dig_in, dig_vv, dgs)) { dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n", (unsigned long long)sector, data_size); - drbd_bcast_ee(mdev, "digest failed", - dgs, dig_in, dig_vv, peer_req); drbd_free_ee(mdev, peer_req); return NULL; } From ec2c35ac1ea288f5c931e32452ecea50068e8450 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 10:20:08 +0100 Subject: [PATCH 0159/5831] drbd: prepare the transition from connector to genetlink This adds the new API header and helper files. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 349 +++++++++++++++++++++++++ include/linux/drbd_genl_api.h | 55 ++++ include/linux/genl_magic_func.h | 417 ++++++++++++++++++++++++++++++ include/linux/genl_magic_struct.h | 260 +++++++++++++++++++ 4 files changed, 1081 insertions(+) create mode 100644 include/linux/drbd_genl.h create mode 100644 include/linux/drbd_genl_api.h create mode 100644 include/linux/genl_magic_func.h create mode 100644 include/linux/genl_magic_struct.h diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h new file mode 100644 index 000000000000..84e16848f7a1 --- /dev/null +++ b/include/linux/drbd_genl.h @@ -0,0 +1,349 @@ +/* + * General overview: + * full generic netlink message: + * |nlmsghdr|genlmsghdr| + * + * payload: + * |optional fixed size family header| + * + * sequence of netlink attributes: + * I chose to have all "top level" attributes NLA_NESTED, + * corresponding to some real struct. + * So we have a sequence of |tla, len| + * + * nested nla sequence: + * may be empty, or contain a sequence of netlink attributes + * representing the struct fields. + * + * The tag number of any field (regardless of containing struct) + * will be available as T_ ## field_name, + * so you cannot have the same field name in two differnt structs. + * + * The tag numbers themselves are per struct, though, + * so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type, + * which we won't use here). + * The tag numbers are used as index in the respective nla_policy array. + * + * GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy + * genl_magic_struct.h + * generates the struct declaration, + * generates an entry in the tla enum, + * genl_magic_func.h + * generates an entry in the static tla policy + * with .type = NLA_NESTED + * generates the static _nl_policy definition, + * and static conversion functions + * + * genl_magic_func.h + * + * GENL_mc_group(group) + * genl_magic_struct.h + * does nothing + * genl_magic_func.h + * defines and registers the mcast group, + * and provides a send helper + * + * GENL_notification(op_name, op_num, mcast_group, tla list) + * These are notifications to userspace. + * + * genl_magic_struct.h + * generates an entry in the genl_ops enum, + * genl_magic_func.h + * does nothing + * + * mcast group: the name of the mcast group this notification should be + * expected on + * tla list: the list of expected top level attributes, + * for documentation and sanity checking. + * + * GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations" + * These are requests from userspace. + * + * _op and _notification share the same "number space", + * op_nr will be assigned to "genlmsghdr->cmd" + * + * genl_magic_struct.h + * generates an entry in the genl_ops enum, + * genl_magic_func.h + * generates an entry in the static genl_ops array, + * and static register/unregister functions to + * genl_register_family_with_ops(). + * + * flags and handler: + * GENL_op_init( .doit = x, .dumpit = y, .flags = something) + * GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM + * tla list: the list of expected top level attributes, + * for documentation and sanity checking. + */ + +/* + * STRUCTS + */ + +/* this is sent kernel -> userland on various error conditions, and contains + * informational textual info, which is supposedly human readable. + * The computer relevant return code is in the drbd_genlmsghdr. + */ +GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, + /* "arbitrary" size strings, nla_policy.len = 0 */ + __str_field(1, GENLA_F_MANDATORY, info_text, 0) +) + +/* Configuration requests typically need a context to operate on. + * Possible keys are device minor (fits in the drbd_genlmsghdr), + * the replication link (aka connection) name, + * and/or the replication group (aka resource) name, + * and the volume id within the resource. */ +GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, + /* currently only 256 volumes per group, + * but maybe we still change that */ + __u32_field(1, GENLA_F_MANDATORY, ctx_volume) + __str_field(2, GENLA_F_MANDATORY, ctx_conn_name, 128) +) + +GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, + __u64_field(1, GENLA_F_MANDATORY, disk_size) + __str_field(2, GENLA_F_REQUIRED, backing_dev, 128) + __str_field(3, GENLA_F_REQUIRED, meta_dev, 128) + __u32_field(4, GENLA_F_REQUIRED, meta_dev_idx) + __u32_field(5, GENLA_F_MANDATORY, max_bio_bvecs) + __u32_field(6, GENLA_F_MANDATORY, on_io_error) + __u32_field(7, GENLA_F_MANDATORY, fencing) + __flg_field(8, GENLA_F_MANDATORY, no_disk_barrier) + __flg_field(9, GENLA_F_MANDATORY, no_disk_flush) + __flg_field(10, GENLA_F_MANDATORY, no_disk_drain) + __flg_field(11, GENLA_F_MANDATORY, no_md_flush) + __flg_field(12, GENLA_F_MANDATORY, use_bmbv) +) + +GENL_struct(DRBD_NLA_SYNCER_CONF, 4, syncer_conf, + __u32_field(1, GENLA_F_MANDATORY, rate) + __u32_field(2, GENLA_F_MANDATORY, after) + __u32_field(3, GENLA_F_MANDATORY, al_extents) + __str_field(4, GENLA_F_MANDATORY, cpu_mask, 32) + __str_field(5, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field(6, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __flg_field(7, GENLA_F_MANDATORY, use_rle) + __u32_field(8, GENLA_F_MANDATORY, on_no_data) + __u32_field(9, GENLA_F_MANDATORY, c_plan_ahead) + __u32_field(10, GENLA_F_MANDATORY, c_delay_target) + __u32_field(11, GENLA_F_MANDATORY, c_fill_target) + __u32_field(12, GENLA_F_MANDATORY, c_max_rate) + __u32_field(13, GENLA_F_MANDATORY, c_min_rate) +) + +GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, + __str_field(1, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + shared_secret, SHARED_SECRET_MAX) + __str_field(2, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field(3, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field(4, GENLA_F_REQUIRED, my_addr, 128) + __str_field(5, GENLA_F_REQUIRED, peer_addr, 128) + __u32_field(6, GENLA_F_REQUIRED, wire_protocol) + __u32_field(7, GENLA_F_MANDATORY, try_connect_int) + __u32_field(8, GENLA_F_MANDATORY, timeout) + __u32_field(9, GENLA_F_MANDATORY, ping_int) + __u32_field(10, GENLA_F_MANDATORY, ping_timeo) + __u32_field(11, GENLA_F_MANDATORY, sndbuf_size) + __u32_field(12, GENLA_F_MANDATORY, rcvbuf_size) + __u32_field(13, GENLA_F_MANDATORY, ko_count) + __u32_field(14, GENLA_F_MANDATORY, max_buffers) + __u32_field(15, GENLA_F_MANDATORY, max_epoch_size) + __u32_field(16, GENLA_F_MANDATORY, unplug_watermark) + __u32_field(17, GENLA_F_MANDATORY, after_sb_0p) + __u32_field(18, GENLA_F_MANDATORY, after_sb_1p) + __u32_field(19, GENLA_F_MANDATORY, after_sb_2p) + __u32_field(20, GENLA_F_MANDATORY, rr_conflict) + __u32_field(21, GENLA_F_MANDATORY, on_congestion) + __u32_field(22, GENLA_F_MANDATORY, cong_fill) + __u32_field(23, GENLA_F_MANDATORY, cong_extents) + __flg_field(24, GENLA_F_MANDATORY, two_primaries) + __flg_field(25, GENLA_F_MANDATORY, want_lose) + __flg_field(26, GENLA_F_MANDATORY, no_cork) + __flg_field(27, GENLA_F_MANDATORY, always_asbp) + __flg_field(28, GENLA_F_MANDATORY, dry_run) +) + +GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, + __flg_field(1, GENLA_F_MANDATORY, assume_uptodate) +) + +GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, + __u64_field(1, GENLA_F_MANDATORY, resize_size) + __flg_field(2, GENLA_F_MANDATORY, resize_force) + __flg_field(3, GENLA_F_MANDATORY, no_resync) +) + +GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, + /* the reason of the broadcast, + * if this is an event triggered broadcast. */ + __u32_field(1, GENLA_F_MANDATORY, sib_reason) + __u32_field(2, GENLA_F_REQUIRED, current_state) + __u64_field(3, GENLA_F_MANDATORY, capacity) + __u64_field(4, GENLA_F_MANDATORY, ed_uuid) + + /* These are for broadcast from after state change work. + * prev_state and new_state are from the moment the state change took + * place, new_state is not neccessarily the same as current_state, + * there may have been more state changes since. Which will be + * broadcasted soon, in their respective after state change work. */ + __u32_field(5, GENLA_F_MANDATORY, prev_state) + __u32_field(6, GENLA_F_MANDATORY, new_state) + + /* if we have a local disk: */ + __bin_field(7, GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64))) + __u32_field(8, GENLA_F_MANDATORY, disk_flags) + __u64_field(9, GENLA_F_MANDATORY, bits_total) + __u64_field(10, GENLA_F_MANDATORY, bits_oos) + /* and in case resync or online verify is active */ + __u64_field(11, GENLA_F_MANDATORY, bits_rs_total) + __u64_field(12, GENLA_F_MANDATORY, bits_rs_failed) + + /* for pre and post notifications of helper execution */ + __str_field(13, GENLA_F_MANDATORY, helper, 32) + __u32_field(14, GENLA_F_MANDATORY, helper_exit_code) +) + +GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, + __u64_field(1, GENLA_F_MANDATORY, ov_start_sector) +) + +GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, + __flg_field(1, GENLA_F_MANDATORY, clear_bm) +) + +GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms, + __u32_field(1, GENLA_F_REQUIRED, timeout_type) +) + +GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, + __flg_field(1, GENLA_F_MANDATORY, force_disconnect) +) + +/* + * Notifications and commands (genlmsghdr->cmd) + */ +GENL_mc_group(events) + + /* kernel -> userspace announcement of changes */ +GENL_notification( + DRBD_EVENT, 1, events, + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_STATE_INFO, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) +) + + /* query kernel for specific or all info */ +GENL_op( + DRBD_ADM_GET_STATUS, 2, + GENL_op_init( + .doit = drbd_adm_get_status, + .dumpit = drbd_adm_get_status_all, + /* anyone may ask for the status, + * it is broadcasted anyways */ + ), + /* To select the object .doit. + * Or a subset of objects in .dumpit. */ + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_MANDATORY) +) + +#if 0 + /* TO BE DONE */ + /* create or destroy resources, aka replication groups */ +GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +#endif + + /* add DRBD minor devices as volumes to resources */ +GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* add or delete replication links to resources */ +GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* operates on replication links */ +GENL_op(DRBD_ADM_SYNCER, 9, + GENL_doit(drbd_adm_syncer), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) +) + +GENL_op( + DRBD_ADM_CONNECT, 10, + GENL_doit(drbd_adm_connect), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) +) + +GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* operates on minors */ +GENL_op(DRBD_ADM_ATTACH, 12, + GENL_doit(drbd_adm_attach), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_RESIZE, 13, + GENL_doit(drbd_adm_resize), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY) +) + + /* operates on all volumes within a resource */ +GENL_op( + DRBD_ADM_PRIMARY, 14, + GENL_doit(drbd_adm_set_role), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_SECONDARY, 15, + GENL_doit(drbd_adm_set_role), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_NEW_C_UUID, 16, + GENL_doit(drbd_adm_new_c_uuid), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, GENLA_F_MANDATORY) +) + +GENL_op( + DRBD_ADM_START_OV, 17, + GENL_doit(drbd_adm_start_ov), + GENL_tla_expected(DRBD_NLA_START_OV_PARMS, GENLA_F_MANDATORY) +) + +GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_PAUSE_SYNC, 21, GENL_doit(drbd_adm_pause_sync), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_RESUME_SYNC, 22, GENL_doit(drbd_adm_resume_sync), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_SUSPEND_IO, 23, GENL_doit(drbd_adm_suspend_io), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_RESUME_IO, 24, GENL_doit(drbd_adm_resume_io), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) diff --git a/include/linux/drbd_genl_api.h b/include/linux/drbd_genl_api.h new file mode 100644 index 000000000000..9ef50d51e34e --- /dev/null +++ b/include/linux/drbd_genl_api.h @@ -0,0 +1,55 @@ +#ifndef DRBD_GENL_STRUCT_H +#define DRBD_GENL_STRUCT_H + +/** + * struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests + * @minor: + * For admin requests (user -> kernel): which minor device to operate on. + * For (unicast) replies or informational (broadcast) messages + * (kernel -> user): which minor device the information is about. + * If we do not operate on minors, but on connections or resources, + * the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT + * is used instead. + * @flags: possible operation modifiers (relevant only for user->kernel): + * DRBD_GENL_F_SET_DEFAULTS + * @volume: + * When creating a new minor (adding it to a resource), the resource needs + * to know which volume number within the resource this is supposed to be. + * The volume number corresponds to the same volume number on the remote side, + * whereas the minor number on the remote side may be different + * (union with flags). + * @ret_code: kernel->userland unicast cfg reply return code (union with flags); + */ +struct drbd_genlmsghdr { + __u32 minor; + union { + __u32 flags; + __s32 ret_code; + }; +}; + +/* To be used in drbd_genlmsghdr.flags */ +enum { + DRBD_GENL_F_SET_DEFAULTS = 1, +}; + +enum drbd_state_info_bcast_reason { + SIB_GET_STATUS_REPLY = 1, + SIB_STATE_CHANGE = 2, + SIB_HELPER_PRE = 3, + SIB_HELPER_POST = 4, + SIB_SYNC_PROGRESS = 5, +}; + +/* hack around predefined gcc/cpp "linux=1", + * we cannot possibly include <1/drbd_genl.h> */ +#undef linux + +#include +#define GENL_MAGIC_VERSION API_VERSION +#define GENL_MAGIC_FAMILY drbd +#define GENL_MAGIC_FAMILY_HDRSZ sizeof(struct drbd_genlmsghdr) +#define GENL_MAGIC_INCLUDE_FILE +#include + +#endif diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h new file mode 100644 index 000000000000..8a86f659d363 --- /dev/null +++ b/include/linux/genl_magic_func.h @@ -0,0 +1,417 @@ +#ifndef GENL_MAGIC_FUNC_H +#define GENL_MAGIC_FUNC_H + +#include + +/* + * Extension of genl attribute validation policies {{{1 + * {{{2 + */ + +/** + * nla_is_required - return true if this attribute is required + * @nla: netlink attribute + */ +static inline int nla_is_required(const struct nlattr *nla) +{ + return nla->nla_type & GENLA_F_REQUIRED; +} + +/** + * nla_is_mandatory - return true if understanding this attribute is mandatory + * @nla: netlink attribute + * Note: REQUIRED attributes are implicitly MANDATORY as well + */ +static inline int nla_is_mandatory(const struct nlattr *nla) +{ + return nla->nla_type & (GENLA_F_MANDATORY | GENLA_F_REQUIRED); +} + +/* Functionality to be integrated into nla_parse(), and validate_nla(), + * respectively. + * + * Enforcing the "mandatory" bit is done here, + * by rejecting unknown mandatory attributes. + * + * Part of enforcing the "required" flag would mean to embed it into + * nla_policy.type, and extending validate_nla(), which currently does + * BUG_ON(pt->type > NLA_TYPE_MAX); we have to work on existing kernels, + * so we cannot do that. Thats why enforcing "required" is done in the + * generated assignment functions below. */ +static int nla_check_unknown(int maxtype, struct nlattr *head, int len) +{ + struct nlattr *nla; + int rem; + nla_for_each_attr(nla, head, len, rem) { + __u16 type = nla_type(nla); + if (type > maxtype && nla_is_mandatory(nla)) + return -EOPNOTSUPP; + } + return 0; +} + +/* + * Magic: declare tla policy {{{1 + * Magic: declare nested policies + * {{{2 + */ +#undef GENL_mc_group +#define GENL_mc_group(group) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + [tag_name] = { .type = NLA_NESTED }, + +static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ +{ s_fields }; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, __put) \ + [__nla_type(attr_nr)] = { .type = nla_type }, + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \ + __get, __put) \ + [__nla_type(attr_nr)] = { .type = nla_type, \ + .len = maxlen - (nla_type == NLA_NUL_STRING) }, + +#include GENL_MAGIC_INCLUDE_FILE + +#ifndef __KERNEL__ +#ifndef pr_info +#define pr_info(args...) fprintf(stderr, args); +#endif +#endif + +#if 1 +static void dprint_field(const char *dir, int nla_type, + const char *name, void *valp) +{ + __u64 val = valp ? *(__u32 *)valp : 1; + switch (nla_type) { + case NLA_U8: val = (__u8)val; + case NLA_U16: val = (__u16)val; + case NLA_U32: val = (__u32)val; + pr_info("%s attr %s: %d 0x%08x\n", dir, + name, (int)val, (unsigned)val); + break; + case NLA_U64: + val = *(__u64*)valp; + pr_info("%s attr %s: %lld 0x%08llx\n", dir, + name, (long long)val, (unsigned long long)val); + break; + case NLA_FLAG: + if (val) + pr_info("%s attr %s: set\n", dir, name); + break; + } +} + +static void dprint_array(const char *dir, int nla_type, + const char *name, const char *val, unsigned len) +{ + switch (nla_type) { + case NLA_NUL_STRING: + if (len && val[len-1] == '\0') + len--; + pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val); + break; + default: + /* we can always show 4 byte, + * thats what nlattr are aligned to. */ + pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n", + dir, name, len, val[0], val[1], val[2], val[3]); + } +} + +#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b); + +/* Name is a member field name of the struct s. + * If s is NULL (only parsing, no copy requested in *_from_attrs()), + * nla is supposed to point to the attribute containing the information + * corresponding to that struct member. */ +#define DPRINT_FIELD(dir, nla_type, name, s, nla) \ + do { \ + if (s) \ + dprint_field(dir, nla_type, #name, &s->name); \ + else if (nla) \ + dprint_field(dir, nla_type, #name, \ + (nla_type == NLA_FLAG) ? NULL \ + : nla_data(nla)); \ + } while (0) + +#define DPRINT_ARRAY(dir, nla_type, name, s, nla) \ + do { \ + if (s) \ + dprint_array(dir, nla_type, #name, \ + s->name, s->name ## _len); \ + else if (nla) \ + dprint_array(dir, nla_type, #name, \ + nla_data(nla), nla_len(nla)); \ + } while (0) +#else +#define DPRINT_TLA(a, op, b) do {} while (0) +#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0) +#define DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0) +#endif + +/* + * Magic: provide conversion functions {{{1 + * populate struct from attribute table: + * {{{2 + */ + +/* processing of generic netlink messages is serialized. + * use one static buffer for parsing of nested attributes */ +static struct nlattr *nested_attr_tb[128]; + +#ifndef BUILD_BUG_ON +/* Force a compilation error if condition is true */ +#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition)) +/* Force a compilation error if condition is true, but also produce a + result (of value 0 and type size_t), so the expression can be used + e.g. in a structure initializer (or where-ever else comma expressions + aren't permitted). */ +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) +#endif + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + /* static, potentially unused */ \ +int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[]) \ +{ \ + const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \ + struct nlattr *tla = tb[tag_number]; \ + struct nlattr **ntb = nested_attr_tb; \ + struct nlattr *nla; \ + int err; \ + BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb)); \ + if (!tla) \ + return -ENOMSG; \ + DPRINT_TLA(#s_name, "<=-", #tag_name); \ + err = nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \ + if (err) \ + return err; \ + err = nla_check_unknown(maxtype, nla_data(tla), nla_len(tla)); \ + if (err) \ + return err; \ + \ + s_fields \ + return 0; \ +} + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + nla = ntb[__nla_type(attr_nr)]; \ + if (nla) { \ + if (s) \ + s->name = __get(nla); \ + DPRINT_FIELD("<<", nla_type, name, s, nla); \ + } else if ((attr_flag) & GENLA_F_REQUIRED) { \ + pr_info("<< missing attr: %s\n", #name); \ + return -ENOMSG; \ + } + +/* validate_nla() already checked nla_len <= maxlen appropriately. */ +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + nla = ntb[__nla_type(attr_nr)]; \ + if (nla) { \ + if (s) \ + s->name ## _len = \ + __get(s->name, nla, maxlen); \ + DPRINT_ARRAY("<<", nla_type, name, s, nla); \ + } else if ((attr_flag) & GENLA_F_REQUIRED) { \ + pr_info("<< missing attr: %s\n", #name); \ + return -ENOMSG; \ + } \ + +#include GENL_MAGIC_INCLUDE_FILE + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +/* + * Magic: define op number to op name mapping {{{1 + * {{{2 + */ +const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) +{ + switch (cmd) { +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ + case op_num: return #op_name; +#include GENL_MAGIC_INCLUDE_FILE + default: + return "unknown"; + } +} + +#ifdef __KERNEL__ +#include +/* + * Magic: define genl_ops {{{1 + * {{{2 + */ + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ +{ \ + handler \ + .cmd = op_name, \ + .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \ +}, + +#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops) +static struct genl_ops ZZZ_genl_ops[] __read_mostly = { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +/* + * Define the genl_family, multicast groups, {{{1 + * and provide register/unregister functions. + * {{{2 + */ +#define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) +static struct genl_family ZZZ_genl_family __read_mostly = { + .id = GENL_ID_GENERATE, + .name = __stringify(GENL_MAGIC_FAMILY), + .version = GENL_MAGIC_VERSION, +#ifdef GENL_MAGIC_FAMILY_HDRSZ + .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), +#endif + .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, +}; + +/* + * Magic: define multicast groups + * Magic: define multicast group registration helper + */ +#undef GENL_mc_group +#define GENL_mc_group(group) \ +static struct genl_multicast_group \ +CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = { \ + .name = #group, \ +}; \ +static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ + struct sk_buff *skb, gfp_t flags) \ +{ \ + unsigned int group_id = \ + CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id; \ + if (!group_id) \ + return -EINVAL; \ + return genlmsg_multicast(skb, 0, group_id, flags); \ +} + +#include GENL_MAGIC_INCLUDE_FILE + +int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) +{ + int err = genl_register_family_with_ops(&ZZZ_genl_family, + ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops)); + if (err) + return err; +#undef GENL_mc_group +#define GENL_mc_group(group) \ + err = genl_register_mc_group(&ZZZ_genl_family, \ + &CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group)); \ + if (err) \ + goto fail; \ + else \ + pr_info("%s: mcg %s: %u\n", #group, \ + __stringify(GENL_MAGIC_FAMILY), \ + CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id); + +#include GENL_MAGIC_INCLUDE_FILE + +#undef GENL_mc_group +#define GENL_mc_group(group) + return 0; +fail: + genl_unregister_family(&ZZZ_genl_family); + return err; +} + +void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) +{ + genl_unregister_family(&ZZZ_genl_family); +} + +/* + * Magic: provide conversion functions {{{1 + * populate skb from struct. + * {{{2 + */ + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s, \ + const bool exclude_sensitive) \ +{ \ + struct nlattr *tla = nla_nest_start(skb, tag_number); \ + if (!tla) \ + goto nla_put_failure; \ + DPRINT_TLA(#s_name, "-=>", #tag_name); \ + s_fields \ + nla_nest_end(skb, tla); \ + return 0; \ + \ +nla_put_failure: \ + if (tla) \ + nla_nest_cancel(skb, tla); \ + return -EMSGSIZE; \ +} \ +static inline int s_name ## _to_priv_skb(struct sk_buff *skb, \ + struct s_name *s) \ +{ \ + return s_name ## _to_skb(skb, s, 0); \ +} \ +static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ + struct s_name *s) \ +{ \ + return s_name ## _to_skb(skb, s, 1); \ +} + + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + DPRINT_FIELD(">>", nla_type, name, s, NULL); \ + __put(skb, attr_nr, s->name); \ + } + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ + __put(skb, attr_nr, min_t(int, maxlen, \ + s->name ## _len + (nla_type == NLA_NUL_STRING)),\ + s->name); \ + } + +#include GENL_MAGIC_INCLUDE_FILE + +#endif /* __KERNEL__ */ + +/* }}}1 */ +#endif /* GENL_MAGIC_FUNC_H */ +/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h new file mode 100644 index 000000000000..745ebfd6c7e5 --- /dev/null +++ b/include/linux/genl_magic_struct.h @@ -0,0 +1,260 @@ +#ifndef GENL_MAGIC_STRUCT_H +#define GENL_MAGIC_STRUCT_H + +#ifndef GENL_MAGIC_FAMILY +# error "you need to define GENL_MAGIC_FAMILY before inclusion" +#endif + +#ifndef GENL_MAGIC_VERSION +# error "you need to define GENL_MAGIC_VERSION before inclusion" +#endif + +#ifndef GENL_MAGIC_INCLUDE_FILE +# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion" +#endif + +#include +#include + +#define CONCAT__(a,b) a ## b +#define CONCAT_(a,b) CONCAT__(a,b) + +extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void); +extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); + +/* + * Extension of genl attribute validation policies {{{2 + */ + +/** + * GENLA_F_FLAGS - policy type flags to ease compatible ABI evolvement + * + * @GENLA_F_REQUIRED: attribute has to be present, or message is considered invalid. + * Adding new REQUIRED attributes breaks ABI compatibility, so don't do that. + * + * @GENLA_F_MANDATORY: if present, receiver _must_ understand it. + * Without this, unknown attributes (> maxtype) are _silently_ ignored + * by validate_nla(). + * + * To be used for API extensions, so older kernel can reject requests for not + * yet implemented features, if newer userland tries to use them even though + * the genl_family version clearly indicates they are not available. + * + * @GENLA_F_MAY_IGNORE: To clearly document the fact, for good measure. + * To be used for API extensions for things that have sane defaults, + * so newer userland can still talk to older kernel, knowing it will + * silently ignore these attributes if not yet known. + * + * NOTE: These flags overload + * NLA_F_NESTED (1 << 15) + * NLA_F_NET_BYTEORDER (1 << 14) + * from linux/netlink.h, which are not useful for validate_nla(): + * NET_BYTEORDER is not used anywhere, and NESTED would be specified by setting + * .type = NLA_NESTED in the appropriate policy. + * + * See also: nla_type() + */ +enum { + GENLA_F_MAY_IGNORE = 0, + GENLA_F_MANDATORY = 1 << 14, + GENLA_F_REQUIRED = 1 << 15, + + /* This will not be present in the __u16 .nla_type, but can be + * triggered on in _to_skb, to exclude "sensitive" + * information from broadcasts, or on unpriviledged get requests. + * This is useful because genetlink multicast groups can be listened in + * on by anyone. */ + GENLA_F_SENSITIVE = 1 << 16, +}; + +#define __nla_type(x) ((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK)) + +/* }}}1 + * MAGIC + * multi-include macro expansion magic starts here + */ + +/* MAGIC helpers {{{2 */ + +/* possible field types */ +#define __flg_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_FLAG, char, \ + nla_get_flag, __nla_put_flag) +#define __u8_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ + nla_get_u8, NLA_PUT_U8) +#define __u16_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U16, __u16, \ + nla_get_u16, NLA_PUT_U16) +#define __u32_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ + nla_get_u32, NLA_PUT_U32) +#define __u64_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ + nla_get_u64, NLA_PUT_U64) +#define __str_field(attr_nr, attr_flag, name, maxlen) \ + __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ + nla_strlcpy, NLA_PUT) +#define __bin_field(attr_nr, attr_flag, name, maxlen) \ + __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ + nla_memcpy, NLA_PUT) + +#define __nla_put_flag(skb, attrtype, value) \ + do { \ + if (value) \ + NLA_PUT_FLAG(skb, attrtype); \ + } while (0) + +#define GENL_op_init(args...) args +#define GENL_doit(handler) \ + .doit = handler, \ + .flags = GENL_ADMIN_PERM, +#define GENL_dumpit(handler) \ + .dumpit = handler, \ + .flags = GENL_ADMIN_PERM, + +/* }}}1 + * Magic: define the enum symbols for genl_ops + * Magic: define the enum symbols for top level attributes + * Magic: define the enum symbols for nested attributes + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +#undef GENL_mc_group +#define GENL_mc_group(group) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) \ + op_name = op_num, + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ + op_name = op_num, + +enum { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + tag_name = tag_number, + +enum { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +enum { \ + s_fields \ +}; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + T_ ## name = (__u16)(attr_nr | attr_flag), + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + T_ ## name = (__u16)(attr_nr | attr_flag), + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 + * Magic: compile time assert unique numbers for operations + * Magic: -"- unique numbers for top level attributes + * Magic: -"- unique numbers for nested attributes + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) \ + case op_name: + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) \ + case op_name: + +static inline void ct_assert_unique_operations(void) +{ + switch (0) { +#include GENL_MAGIC_INCLUDE_FILE + ; + } +} + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + case tag_number: + +static inline void ct_assert_unique_top_level_attributes(void) +{ + switch (0) { +#include GENL_MAGIC_INCLUDE_FILE + ; + } +} + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ +{ \ + switch (0) { \ + s_fields \ + ; \ + } \ +} + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + case attr_nr: + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + case attr_nr: + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 + * Magic: declare structs + * struct { + * fields + * }; + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +struct s_name { s_fields }; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + type name; + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + type name[maxlen]; \ + __u32 name ## _len; + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 */ +#endif /* GENL_MAGIC_STRUCT_H */ +/* vim: set foldmethod=marker nofoldenable : */ From d4f65b5d2497b2fd9c45f06b71deb4ab084a5b66 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Sep 2012 13:06:29 +0100 Subject: [PATCH 0160/5831] KEYS: Add payload preparsing opportunity prior to key instantiate or update Give the key type the opportunity to preparse the payload prior to the instantiation and update routines being called. This is done with the provision of two new key type operations: int (*preparse)(struct key_preparsed_payload *prep); void (*free_preparse)(struct key_preparsed_payload *prep); If the first operation is present, then it is called before key creation (in the add/update case) or before the key semaphore is taken (in the update and instantiate cases). The second operation is called to clean up if the first was called. preparse() is given the opportunity to fill in the following structure: struct key_preparsed_payload { char *description; void *type_data[2]; void *payload; const void *data; size_t datalen; size_t quotalen; }; Before the preparser is called, the first three fields will have been cleared, the payload pointer and size will be stored in data and datalen and the default quota size from the key_type struct will be stored into quotalen. The preparser may parse the payload in any way it likes and may store data in the type_data[] and payload fields for use by the instantiate() and update() ops. The preparser may also propose a description for the key by attaching it as a string to the description field. This can be used by passing a NULL or "" description to the add_key() system call or the key_create_or_update() function. This cannot work with request_key() as that required the description to tell the upcall about the key to be created. This, for example permits keys that store PGP public keys to generate their own name from the user ID and public key fingerprint in the key. The instantiate() and update() operations are then modified to look like this: int (*instantiate)(struct key *key, struct key_preparsed_payload *prep); int (*update)(struct key *key, struct key_preparsed_payload *prep); and the new payload data is passed in *prep, whether or not it was preparsed. Signed-off-by: David Howells --- Documentation/security/keys.txt | 50 +++++++++- fs/cifs/cifs_spnego.c | 6 +- fs/cifs/cifsacl.c | 8 +- include/keys/user-type.h | 6 +- include/linux/key-type.h | 35 ++++++- net/ceph/crypto.c | 9 +- net/dns_resolver/dns_key.c | 6 +- net/rxrpc/ar-key.c | 40 ++++---- security/keys/encrypted-keys/encrypted.c | 16 ++-- security/keys/key.c | 114 ++++++++++++++++------- security/keys/keyctl.c | 18 +++- security/keys/keyring.c | 6 +- security/keys/request_key_auth.c | 8 +- security/keys/trusted.c | 16 ++-- security/keys/user_defined.c | 14 +-- 15 files changed, 250 insertions(+), 102 deletions(-) diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index aa0dbd74b71b..7d9ca92022d8 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -412,6 +412,10 @@ The main syscalls are: to the keyring. In this case, an error will be generated if the process does not have permission to write to the keyring. + If the key type supports it, if the description is NULL or an empty + string, the key type will try and generate a description from the content + of the payload. + The payload is optional, and the pointer can be NULL if not required by the type. The payload is plen in size, and plen can be zero for an empty payload. @@ -1114,12 +1118,53 @@ The structure has a number of fields, some of which are mandatory: it should return 0. - (*) int (*instantiate)(struct key *key, const void *data, size_t datalen); + (*) int (*preparse)(struct key_preparsed_payload *prep); + + This optional method permits the key type to attempt to parse payload + before a key is created (add key) or the key semaphore is taken (update or + instantiate key). The structure pointed to by prep looks like: + + struct key_preparsed_payload { + char *description; + void *type_data[2]; + void *payload; + const void *data; + size_t datalen; + size_t quotalen; + }; + + Before calling the method, the caller will fill in data and datalen with + the payload blob parameters; quotalen will be filled in with the default + quota size from the key type and the rest will be cleared. + + If a description can be proposed from the payload contents, that should be + attached as a string to the description field. This will be used for the + key description if the caller of add_key() passes NULL or "". + + The method can attach anything it likes to type_data[] and payload. These + are merely passed along to the instantiate() or update() operations. + + The method should return 0 if success ful or a negative error code + otherwise. + + + (*) void (*free_preparse)(struct key_preparsed_payload *prep); + + This method is only required if the preparse() method is provided, + otherwise it is unused. It cleans up anything attached to the + description, type_data and payload fields of the key_preparsed_payload + struct as filled in by the preparse() method. + + + (*) int (*instantiate)(struct key *key, struct key_preparsed_payload *prep); This method is called to attach a payload to a key during construction. The payload attached need not bear any relation to the data passed to this function. + The prep->data and prep->datalen fields will define the original payload + blob. If preparse() was supplied then other fields may be filled in also. + If the amount of data attached to the key differs from the size in keytype->def_datalen, then key_payload_reserve() should be called. @@ -1135,6 +1180,9 @@ The structure has a number of fields, some of which are mandatory: If this type of key can be updated, then this method should be provided. It is called to update a key's payload from the blob of data provided. + The prep->data and prep->datalen fields will define the original payload + blob. If preparse() was supplied then other fields may be filled in also. + key_payload_reserve() should be called if the data length might change before any changes are actually made. Note that if this succeeds, the type is committed to changing the key because it's already been altered, so all diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index e622863b292f..086f381d6489 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -31,18 +31,18 @@ /* create a new cifs key */ static int -cifs_spnego_key_instantiate(struct key *key, const void *data, size_t datalen) +cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { char *payload; int ret; ret = -ENOMEM; - payload = kmalloc(datalen, GFP_KERNEL); + payload = kmalloc(prep->datalen, GFP_KERNEL); if (!payload) goto error; /* attach the data */ - memcpy(payload, data, datalen); + memcpy(payload, prep->data, prep->datalen); key->payload.data = payload; ret = 0; diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 05f4dc263a23..f3c60e264ca8 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -167,17 +167,17 @@ static struct shrinker cifs_shrinker = { }; static int -cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen) +cifs_idmap_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { char *payload; - payload = kmalloc(datalen, GFP_KERNEL); + payload = kmalloc(prep->datalen, GFP_KERNEL); if (!payload) return -ENOMEM; - memcpy(payload, data, datalen); + memcpy(payload, prep->data, prep->datalen); key->payload.data = payload; - key->datalen = datalen; + key->datalen = prep->datalen; return 0; } diff --git a/include/keys/user-type.h b/include/keys/user-type.h index bc9ec1d7698c..5e452c84f1e6 100644 --- a/include/keys/user-type.h +++ b/include/keys/user-type.h @@ -35,8 +35,10 @@ struct user_key_payload { extern struct key_type key_type_user; extern struct key_type key_type_logon; -extern int user_instantiate(struct key *key, const void *data, size_t datalen); -extern int user_update(struct key *key, const void *data, size_t datalen); +struct key_preparsed_payload; + +extern int user_instantiate(struct key *key, struct key_preparsed_payload *prep); +extern int user_update(struct key *key, struct key_preparsed_payload *prep); extern int user_match(const struct key *key, const void *criterion); extern void user_revoke(struct key *key); extern void user_destroy(struct key *key); diff --git a/include/linux/key-type.h b/include/linux/key-type.h index f0c651cda7b0..518a53afb9ea 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -26,6 +26,27 @@ struct key_construction { struct key *authkey;/* authorisation for key being constructed */ }; +/* + * Pre-parsed payload, used by key add, update and instantiate. + * + * This struct will be cleared and data and datalen will be set with the data + * and length parameters from the caller and quotalen will be set from + * def_datalen from the key type. Then if the preparse() op is provided by the + * key type, that will be called. Then the struct will be passed to the + * instantiate() or the update() op. + * + * If the preparse() op is given, the free_preparse() op will be called to + * clear the contents. + */ +struct key_preparsed_payload { + char *description; /* Proposed key description (or NULL) */ + void *type_data[2]; /* Private key-type data */ + void *payload; /* Proposed payload */ + const void *data; /* Raw data */ + size_t datalen; /* Raw datalen */ + size_t quotalen; /* Quota length for proposed payload */ +}; + typedef int (*request_key_actor_t)(struct key_construction *key, const char *op, void *aux); @@ -45,18 +66,28 @@ struct key_type { /* vet a description */ int (*vet_description)(const char *description); + /* Preparse the data blob from userspace that is to be the payload, + * generating a proposed description and payload that will be handed to + * the instantiate() and update() ops. + */ + int (*preparse)(struct key_preparsed_payload *prep); + + /* Free a preparse data structure. + */ + void (*free_preparse)(struct key_preparsed_payload *prep); + /* instantiate a key of this type * - this method should call key_payload_reserve() to determine if the * user's quota will hold the payload */ - int (*instantiate)(struct key *key, const void *data, size_t datalen); + int (*instantiate)(struct key *key, struct key_preparsed_payload *prep); /* update a key of this type (optional) * - this method should call key_payload_reserve() to recalculate the * quota consumption * - the key must be locked against read when modifying */ - int (*update)(struct key *key, const void *data, size_t datalen); + int (*update)(struct key *key, struct key_preparsed_payload *prep); /* match a key against a description */ int (*match)(const struct key *key, const void *desc); diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 9da7fdd3cd8a..af14cb425164 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -423,14 +423,15 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, } } -int ceph_key_instantiate(struct key *key, const void *data, size_t datalen) +int ceph_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { struct ceph_crypto_key *ckey; + size_t datalen = prep->datalen; int ret; void *p; ret = -EINVAL; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) goto err; ret = key_payload_reserve(key, datalen); @@ -443,8 +444,8 @@ int ceph_key_instantiate(struct key *key, const void *data, size_t datalen) goto err; /* TODO ceph_crypto_key_decode should really take const input */ - p = (void *)data; - ret = ceph_crypto_key_decode(ckey, &p, (char*)data+datalen); + p = (void *)prep->data; + ret = ceph_crypto_key_decode(ckey, &p, (char*)prep->data+datalen); if (ret < 0) goto err_ckey; diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index d9507dd05818..859ab8b6ec34 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -59,13 +59,13 @@ const struct cred *dns_resolver_cache; * "ip1,ip2,...#foo=bar" */ static int -dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen) +dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep) { struct user_key_payload *upayload; unsigned long derrno; int ret; - size_t result_len = 0; - const char *data = _data, *end, *opt; + size_t datalen = prep->datalen, result_len = 0; + const char *data = prep->data, *end, *opt; kenter("%%%d,%s,'%*.*s',%zu", key->serial, key->description, diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 8b1f9f49960f..106c5a6b1ab2 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -26,8 +26,8 @@ #include "ar-internal.h" static int rxrpc_vet_description_s(const char *); -static int rxrpc_instantiate(struct key *, const void *, size_t); -static int rxrpc_instantiate_s(struct key *, const void *, size_t); +static int rxrpc_instantiate(struct key *, struct key_preparsed_payload *); +static int rxrpc_instantiate_s(struct key *, struct key_preparsed_payload *); static void rxrpc_destroy(struct key *); static void rxrpc_destroy_s(struct key *); static void rxrpc_describe(const struct key *, struct seq_file *); @@ -678,7 +678,7 @@ error: * * if no data is provided, then a no-security key is made */ -static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) +static int rxrpc_instantiate(struct key *key, struct key_preparsed_payload *prep) { const struct rxrpc_key_data_v1 *v1; struct rxrpc_key_token *token, **pp; @@ -686,26 +686,26 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) u32 kver; int ret; - _enter("{%x},,%zu", key_serial(key), datalen); + _enter("{%x},,%zu", key_serial(key), prep->datalen); /* handle a no-security key */ - if (!data && datalen == 0) + if (!prep->data && prep->datalen == 0) return 0; /* determine if the XDR payload format is being used */ - if (datalen > 7 * 4) { - ret = rxrpc_instantiate_xdr(key, data, datalen); + if (prep->datalen > 7 * 4) { + ret = rxrpc_instantiate_xdr(key, prep->data, prep->datalen); if (ret != -EPROTO) return ret; } /* get the key interface version number */ ret = -EINVAL; - if (datalen <= 4 || !data) + if (prep->datalen <= 4 || !prep->data) goto error; - memcpy(&kver, data, sizeof(kver)); - data += sizeof(kver); - datalen -= sizeof(kver); + memcpy(&kver, prep->data, sizeof(kver)); + prep->data += sizeof(kver); + prep->datalen -= sizeof(kver); _debug("KEY I/F VERSION: %u", kver); @@ -715,11 +715,11 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) /* deal with a version 1 key */ ret = -EINVAL; - if (datalen < sizeof(*v1)) + if (prep->datalen < sizeof(*v1)) goto error; - v1 = data; - if (datalen != sizeof(*v1) + v1->ticket_length) + v1 = prep->data; + if (prep->datalen != sizeof(*v1) + v1->ticket_length) goto error; _debug("SCIX: %u", v1->security_index); @@ -784,17 +784,17 @@ error: * instantiate a server secret key * data should be a pointer to the 8-byte secret key */ -static int rxrpc_instantiate_s(struct key *key, const void *data, - size_t datalen) +static int rxrpc_instantiate_s(struct key *key, + struct key_preparsed_payload *prep) { struct crypto_blkcipher *ci; - _enter("{%x},,%zu", key_serial(key), datalen); + _enter("{%x},,%zu", key_serial(key), prep->datalen); - if (datalen != 8) + if (prep->datalen != 8) return -EINVAL; - memcpy(&key->type_data, data, 8); + memcpy(&key->type_data, prep->data, 8); ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(ci)) { @@ -802,7 +802,7 @@ static int rxrpc_instantiate_s(struct key *key, const void *data, return PTR_ERR(ci); } - if (crypto_blkcipher_setkey(ci, data, 8) < 0) + if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0) BUG(); key->payload.data = ci; diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 2d1bb8af7696..9e1e005c7596 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -773,8 +773,8 @@ static int encrypted_init(struct encrypted_key_payload *epayload, * * On success, return 0. Otherwise return errno. */ -static int encrypted_instantiate(struct key *key, const void *data, - size_t datalen) +static int encrypted_instantiate(struct key *key, + struct key_preparsed_payload *prep) { struct encrypted_key_payload *epayload = NULL; char *datablob = NULL; @@ -782,16 +782,17 @@ static int encrypted_instantiate(struct key *key, const void *data, char *master_desc = NULL; char *decrypted_datalen = NULL; char *hex_encoded_iv = NULL; + size_t datalen = prep->datalen; int ret; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; datablob = kmalloc(datalen + 1, GFP_KERNEL); if (!datablob) return -ENOMEM; datablob[datalen] = 0; - memcpy(datablob, data, datalen); + memcpy(datablob, prep->data, datalen); ret = datablob_parse(datablob, &format, &master_desc, &decrypted_datalen, &hex_encoded_iv); if (ret < 0) @@ -834,16 +835,17 @@ static void encrypted_rcu_free(struct rcu_head *rcu) * * On success, return 0. Otherwise return errno. */ -static int encrypted_update(struct key *key, const void *data, size_t datalen) +static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) { struct encrypted_key_payload *epayload = key->payload.data; struct encrypted_key_payload *new_epayload; char *buf; char *new_master_desc = NULL; const char *format = NULL; + size_t datalen = prep->datalen; int ret = 0; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; buf = kmalloc(datalen + 1, GFP_KERNEL); @@ -851,7 +853,7 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen) return -ENOMEM; buf[datalen] = 0; - memcpy(buf, data, datalen); + memcpy(buf, prep->data, datalen); ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL); if (ret < 0) goto out; diff --git a/security/keys/key.c b/security/keys/key.c index 50d96d4e06f2..1d039af99f50 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -412,8 +412,7 @@ EXPORT_SYMBOL(key_payload_reserve); * key_construction_mutex. */ static int __key_instantiate_and_link(struct key *key, - const void *data, - size_t datalen, + struct key_preparsed_payload *prep, struct key *keyring, struct key *authkey, unsigned long *_prealloc) @@ -431,7 +430,7 @@ static int __key_instantiate_and_link(struct key *key, /* can't instantiate twice */ if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { /* instantiate the key */ - ret = key->type->instantiate(key, data, datalen); + ret = key->type->instantiate(key, prep); if (ret == 0) { /* mark the key as being instantiated */ @@ -482,22 +481,37 @@ int key_instantiate_and_link(struct key *key, struct key *keyring, struct key *authkey) { + struct key_preparsed_payload prep; unsigned long prealloc; int ret; + memset(&prep, 0, sizeof(prep)); + prep.data = data; + prep.datalen = datalen; + prep.quotalen = key->type->def_datalen; + if (key->type->preparse) { + ret = key->type->preparse(&prep); + if (ret < 0) + goto error; + } + if (keyring) { ret = __key_link_begin(keyring, key->type, key->description, &prealloc); if (ret < 0) - return ret; + goto error_free_preparse; } - ret = __key_instantiate_and_link(key, data, datalen, keyring, authkey, + ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &prealloc); if (keyring) __key_link_end(keyring, key->type, prealloc); +error_free_preparse: + if (key->type->preparse) + key->type->free_preparse(&prep); +error: return ret; } @@ -706,7 +720,7 @@ void key_type_put(struct key_type *ktype) * if we get an error. */ static inline key_ref_t __key_update(key_ref_t key_ref, - const void *payload, size_t plen) + struct key_preparsed_payload *prep) { struct key *key = key_ref_to_ptr(key_ref); int ret; @@ -722,7 +736,7 @@ static inline key_ref_t __key_update(key_ref_t key_ref, down_write(&key->sem); - ret = key->type->update(key, payload, plen); + ret = key->type->update(key, prep); if (ret == 0) /* updating a negative key instantiates it */ clear_bit(KEY_FLAG_NEGATIVE, &key->flags); @@ -774,6 +788,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, unsigned long flags) { unsigned long prealloc; + struct key_preparsed_payload prep; const struct cred *cred = current_cred(); struct key_type *ktype; struct key *keyring, *key = NULL; @@ -789,8 +804,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, } key_ref = ERR_PTR(-EINVAL); - if (!ktype->match || !ktype->instantiate) - goto error_2; + if (!ktype->match || !ktype->instantiate || + (!description && !ktype->preparse)) + goto error_put_type; keyring = key_ref_to_ptr(keyring_ref); @@ -798,18 +814,37 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, key_ref = ERR_PTR(-ENOTDIR); if (keyring->type != &key_type_keyring) - goto error_2; + goto error_put_type; + + memset(&prep, 0, sizeof(prep)); + prep.data = payload; + prep.datalen = plen; + prep.quotalen = ktype->def_datalen; + if (ktype->preparse) { + ret = ktype->preparse(&prep); + if (ret < 0) { + key_ref = ERR_PTR(ret); + goto error_put_type; + } + if (!description) + description = prep.description; + key_ref = ERR_PTR(-EINVAL); + if (!description) + goto error_free_prep; + } ret = __key_link_begin(keyring, ktype, description, &prealloc); - if (ret < 0) - goto error_2; + if (ret < 0) { + key_ref = ERR_PTR(ret); + goto error_free_prep; + } /* if we're going to allocate a new key, we're going to have * to modify the keyring */ ret = key_permission(keyring_ref, KEY_WRITE); if (ret < 0) { key_ref = ERR_PTR(ret); - goto error_3; + goto error_link_end; } /* if it's possible to update this type of key, search for an existing @@ -840,25 +875,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, perm, flags); if (IS_ERR(key)) { key_ref = ERR_CAST(key); - goto error_3; + goto error_link_end; } /* instantiate it and link it into the target keyring */ - ret = __key_instantiate_and_link(key, payload, plen, keyring, NULL, - &prealloc); + ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc); if (ret < 0) { key_put(key); key_ref = ERR_PTR(ret); - goto error_3; + goto error_link_end; } key_ref = make_key_ref(key, is_key_possessed(keyring_ref)); - error_3: +error_link_end: __key_link_end(keyring, ktype, prealloc); - error_2: +error_free_prep: + if (ktype->preparse) + ktype->free_preparse(&prep); +error_put_type: key_type_put(ktype); - error: +error: return key_ref; found_matching_key: @@ -866,10 +903,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, * - we can drop the locks first as we have the key pinned */ __key_link_end(keyring, ktype, prealloc); - key_type_put(ktype); - key_ref = __key_update(key_ref, payload, plen); - goto error; + key_ref = __key_update(key_ref, &prep); + goto error_free_prep; } EXPORT_SYMBOL(key_create_or_update); @@ -888,6 +924,7 @@ EXPORT_SYMBOL(key_create_or_update); */ int key_update(key_ref_t key_ref, const void *payload, size_t plen) { + struct key_preparsed_payload prep; struct key *key = key_ref_to_ptr(key_ref); int ret; @@ -900,18 +937,31 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen) /* attempt to update it if supported */ ret = -EOPNOTSUPP; - if (key->type->update) { - down_write(&key->sem); + if (!key->type->update) + goto error; - ret = key->type->update(key, payload, plen); - if (ret == 0) - /* updating a negative key instantiates it */ - clear_bit(KEY_FLAG_NEGATIVE, &key->flags); - - up_write(&key->sem); + memset(&prep, 0, sizeof(prep)); + prep.data = payload; + prep.datalen = plen; + prep.quotalen = key->type->def_datalen; + if (key->type->preparse) { + ret = key->type->preparse(&prep); + if (ret < 0) + goto error; } - error: + down_write(&key->sem); + + ret = key->type->update(key, &prep); + if (ret == 0) + /* updating a negative key instantiates it */ + clear_bit(KEY_FLAG_NEGATIVE, &key->flags); + + up_write(&key->sem); + + if (key->type->preparse) + key->type->free_preparse(&prep); +error: return ret; } EXPORT_SYMBOL(key_update); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 3364fbf46807..505d40be196c 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -46,6 +46,9 @@ static int key_get_type_from_user(char *type, * Extract the description of a new key from userspace and either add it as a * new key to the specified keyring or update a matching key in that keyring. * + * If the description is NULL or an empty string, the key type is asked to + * generate one from the payload. + * * The keyring must be writable so that we can attach the key to it. * * If successful, the new key's serial number is returned, otherwise an error @@ -72,10 +75,17 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type, if (ret < 0) goto error; - description = strndup_user(_description, PAGE_SIZE); - if (IS_ERR(description)) { - ret = PTR_ERR(description); - goto error; + description = NULL; + if (_description) { + description = strndup_user(_description, PAGE_SIZE); + if (IS_ERR(description)) { + ret = PTR_ERR(description); + goto error; + } + if (!*description) { + kfree(description); + description = NULL; + } } /* pull the payload in if one was supplied */ diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 81e7852d281d..f04d8cf81f3c 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -66,7 +66,7 @@ static inline unsigned keyring_hash(const char *desc) * operations. */ static int keyring_instantiate(struct key *keyring, - const void *data, size_t datalen); + struct key_preparsed_payload *prep); static int keyring_match(const struct key *keyring, const void *criterion); static void keyring_revoke(struct key *keyring); static void keyring_destroy(struct key *keyring); @@ -121,12 +121,12 @@ static void keyring_publish_name(struct key *keyring) * Returns 0 on success, -EINVAL if given any data. */ static int keyring_instantiate(struct key *keyring, - const void *data, size_t datalen) + struct key_preparsed_payload *prep) { int ret; ret = -EINVAL; - if (datalen == 0) { + if (prep->datalen == 0) { /* make the keyring available by name if it has one */ keyring_publish_name(keyring); ret = 0; diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 60d4e3f5e4bb..85730d5a5a59 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -19,7 +19,8 @@ #include #include "internal.h" -static int request_key_auth_instantiate(struct key *, const void *, size_t); +static int request_key_auth_instantiate(struct key *, + struct key_preparsed_payload *); static void request_key_auth_describe(const struct key *, struct seq_file *); static void request_key_auth_revoke(struct key *); static void request_key_auth_destroy(struct key *); @@ -42,10 +43,9 @@ struct key_type key_type_request_key_auth = { * Instantiate a request-key authorisation key. */ static int request_key_auth_instantiate(struct key *key, - const void *data, - size_t datalen) + struct key_preparsed_payload *prep) { - key->payload.data = (struct request_key_auth *) data; + key->payload.data = (struct request_key_auth *)prep->data; return 0; } diff --git a/security/keys/trusted.c b/security/keys/trusted.c index 2d5d041f2049..42036c7a0856 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -927,22 +927,23 @@ static struct trusted_key_payload *trusted_payload_alloc(struct key *key) * * On success, return 0. Otherwise return errno. */ -static int trusted_instantiate(struct key *key, const void *data, - size_t datalen) +static int trusted_instantiate(struct key *key, + struct key_preparsed_payload *prep) { struct trusted_key_payload *payload = NULL; struct trusted_key_options *options = NULL; + size_t datalen = prep->datalen; char *datablob; int ret = 0; int key_cmd; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; datablob = kmalloc(datalen + 1, GFP_KERNEL); if (!datablob) return -ENOMEM; - memcpy(datablob, data, datalen); + memcpy(datablob, prep->data, datalen); datablob[datalen] = '\0'; options = trusted_options_alloc(); @@ -1011,17 +1012,18 @@ static void trusted_rcu_free(struct rcu_head *rcu) /* * trusted_update - reseal an existing key with new PCR values */ -static int trusted_update(struct key *key, const void *data, size_t datalen) +static int trusted_update(struct key *key, struct key_preparsed_payload *prep) { struct trusted_key_payload *p = key->payload.data; struct trusted_key_payload *new_p; struct trusted_key_options *new_o; + size_t datalen = prep->datalen; char *datablob; int ret = 0; if (!p->migratable) return -EPERM; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; datablob = kmalloc(datalen + 1, GFP_KERNEL); @@ -1038,7 +1040,7 @@ static int trusted_update(struct key *key, const void *data, size_t datalen) goto out; } - memcpy(datablob, data, datalen); + memcpy(datablob, prep->data, datalen); datablob[datalen] = '\0'; ret = datablob_parse(datablob, new_p, new_o); if (ret != Opt_update) { diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index c7660a25a3e4..55dc88939185 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -58,13 +58,14 @@ EXPORT_SYMBOL_GPL(key_type_logon); /* * instantiate a user defined key */ -int user_instantiate(struct key *key, const void *data, size_t datalen) +int user_instantiate(struct key *key, struct key_preparsed_payload *prep) { struct user_key_payload *upayload; + size_t datalen = prep->datalen; int ret; ret = -EINVAL; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) goto error; ret = key_payload_reserve(key, datalen); @@ -78,7 +79,7 @@ int user_instantiate(struct key *key, const void *data, size_t datalen) /* attach the data */ upayload->datalen = datalen; - memcpy(upayload->data, data, datalen); + memcpy(upayload->data, prep->data, datalen); rcu_assign_keypointer(key, upayload); ret = 0; @@ -92,13 +93,14 @@ EXPORT_SYMBOL_GPL(user_instantiate); * update a user defined key * - the key's semaphore is write-locked */ -int user_update(struct key *key, const void *data, size_t datalen) +int user_update(struct key *key, struct key_preparsed_payload *prep) { struct user_key_payload *upayload, *zap; + size_t datalen = prep->datalen; int ret; ret = -EINVAL; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) goto error; /* construct a replacement payload */ @@ -108,7 +110,7 @@ int user_update(struct key *key, const void *data, size_t datalen) goto error; upayload->datalen = datalen; - memcpy(upayload->data, data, datalen); + memcpy(upayload->data, prep->data, datalen); /* check the quota and attach the new data */ zap = upayload; From cc769b6257884e26476e1643d7203b4e875b2387 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 20 Sep 2012 18:36:03 -0300 Subject: [PATCH 0161/5831] drm/i915: don't recheck for invalid pipe bpp As noticed by Daniel Vetter, intel_pipe_choose_bpp_dither should already check for invalid bpp values and set a valid value, so remove the recheck inside ironlake_crtc_mode_set and also replace a "default" switch case inside ironlake_set_pipeconf with a BUG(). Signed-off-by: Paulo Zanoni Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6f5aafa1b633..1458563913f3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4626,8 +4626,8 @@ static void ironlake_set_pipeconf(struct drm_crtc *crtc, val |= PIPE_12BPC; break; default: - val |= PIPE_8BPC; - break; + /* Case prevented by intel_choose_pipe_bpp_dither. */ + BUG(); } val &= ~(PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_MASK); @@ -4728,7 +4728,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, struct fdi_m_n m_n = {0}; u32 temp; int target_clock, pixel_multiplier, lane, link_bw, factor; - unsigned int pipe_bpp; bool dither; bool is_cpu_edp = false, is_pch_edp = false; @@ -4802,18 +4801,10 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, target_clock = adjusted_mode->clock; /* determine panel color depth */ - dither = intel_choose_pipe_bpp_dither(crtc, fb, &pipe_bpp, mode); + dither = intel_choose_pipe_bpp_dither(crtc, fb, &intel_crtc->bpp, mode); if (is_lvds && dev_priv->lvds_dither) dither = true; - if (pipe_bpp != 18 && pipe_bpp != 24 && pipe_bpp != 30 && - pipe_bpp != 36) { - WARN(1, "intel_choose_pipe_bpp returned invalid value %d\n", - pipe_bpp); - pipe_bpp = 24; - } - intel_crtc->bpp = pipe_bpp; - if (!lane) { /* * Account for spread spectrum to avoid From f48d8f235a9e3d2331b25743807340fc65b86587 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 20 Sep 2012 18:36:04 -0300 Subject: [PATCH 0162/5831] drm/i915: extract set_m_n from ironlake_crtc_mode_set The set_m_n code was spread all over the mode_set function. Version 2: Don't set the DP M/N registers on ironlake_set_m_n. Daniel Vetter has plans to add some encoder-specific callbacks. Also, on this version we don't change the order we're writing the registers, making the code change safer. Signed-off-by: Paulo Zanoni Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 131 ++++++++++++++++----------- 1 file changed, 79 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1458563913f3..e92c6d329137 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4707,6 +4707,82 @@ static bool ironlake_compute_clocks(struct drm_crtc *crtc, return true; } +static void ironlake_set_m_n(struct drm_crtc *crtc, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + enum pipe pipe = intel_crtc->pipe; + struct intel_encoder *intel_encoder, *edp_encoder = NULL; + struct fdi_m_n m_n = {0}; + int target_clock, pixel_multiplier, lane, link_bw; + bool is_dp = false, is_cpu_edp = false; + + for_each_encoder_on_crtc(dev, crtc, intel_encoder) { + switch (intel_encoder->type) { + case INTEL_OUTPUT_DISPLAYPORT: + is_dp = true; + break; + case INTEL_OUTPUT_EDP: + is_dp = true; + if (!intel_encoder_is_pch_edp(&intel_encoder->base)) + is_cpu_edp = true; + edp_encoder = intel_encoder; + break; + } + } + + /* FDI link */ + pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode); + lane = 0; + /* CPU eDP doesn't require FDI link, so just set DP M/N + according to current link config */ + if (is_cpu_edp) { + intel_edp_link_config(edp_encoder, &lane, &link_bw); + } else { + /* FDI is a binary signal running at ~2.7GHz, encoding + * each output octet as 10 bits. The actual frequency + * is stored as a divider into a 100MHz clock, and the + * mode pixel clock is stored in units of 1KHz. + * Hence the bw of each lane in terms of the mode signal + * is: + */ + link_bw = intel_fdi_link_freq(dev) * MHz(100)/KHz(1)/10; + } + + /* [e]DP over FDI requires target mode clock instead of link clock. */ + if (edp_encoder) + target_clock = intel_edp_target_clock(edp_encoder, mode); + else if (is_dp) + target_clock = mode->clock; + else + target_clock = adjusted_mode->clock; + + if (!lane) { + /* + * Account for spread spectrum to avoid + * oversubscribing the link. Max center spread + * is 2.5%; use 5% for safety's sake. + */ + u32 bps = target_clock * intel_crtc->bpp * 21 / 20; + lane = bps / (link_bw * 8) + 1; + } + + intel_crtc->fdi_lanes = lane; + + if (pixel_multiplier > 1) + link_bw *= pixel_multiplier; + ironlake_compute_m_n(intel_crtc->bpp, lane, target_clock, link_bw, + &m_n); + + I915_WRITE(PIPE_DATA_M1(pipe), TU_SIZE(m_n.tu) | m_n.gmch_m); + I915_WRITE(PIPE_DATA_N1(pipe), m_n.gmch_n); + I915_WRITE(PIPE_LINK_M1(pipe), m_n.link_m); + I915_WRITE(PIPE_LINK_N1(pipe), m_n.link_n); +} + static int ironlake_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, @@ -4723,11 +4799,9 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, u32 dpll, fp = 0, fp2 = 0; bool ok, has_reduced_clock = false, is_sdvo = false; bool is_crt = false, is_lvds = false, is_tv = false, is_dp = false; - struct intel_encoder *encoder, *edp_encoder = NULL; - int ret; - struct fdi_m_n m_n = {0}; + struct intel_encoder *encoder; u32 temp; - int target_clock, pixel_multiplier, lane, link_bw, factor; + int ret, factor; bool dither; bool is_cpu_edp = false, is_pch_edp = false; @@ -4757,7 +4831,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, is_pch_edp = true; else is_cpu_edp = true; - edp_encoder = encoder; break; } @@ -4774,54 +4847,11 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, /* Ensure that the cursor is valid for the new mode before changing... */ intel_crtc_update_cursor(crtc, true); - /* FDI link */ - pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode); - lane = 0; - /* CPU eDP doesn't require FDI link, so just set DP M/N - according to current link config */ - if (is_cpu_edp) { - intel_edp_link_config(edp_encoder, &lane, &link_bw); - } else { - /* FDI is a binary signal running at ~2.7GHz, encoding - * each output octet as 10 bits. The actual frequency - * is stored as a divider into a 100MHz clock, and the - * mode pixel clock is stored in units of 1KHz. - * Hence the bw of each lane in terms of the mode signal - * is: - */ - link_bw = intel_fdi_link_freq(dev) * MHz(100)/KHz(1)/10; - } - - /* [e]DP over FDI requires target mode clock instead of link clock. */ - if (edp_encoder) - target_clock = intel_edp_target_clock(edp_encoder, mode); - else if (is_dp) - target_clock = mode->clock; - else - target_clock = adjusted_mode->clock; - /* determine panel color depth */ dither = intel_choose_pipe_bpp_dither(crtc, fb, &intel_crtc->bpp, mode); if (is_lvds && dev_priv->lvds_dither) dither = true; - if (!lane) { - /* - * Account for spread spectrum to avoid - * oversubscribing the link. Max center spread - * is 2.5%; use 5% for safety's sake. - */ - u32 bps = target_clock * intel_crtc->bpp * 21 / 20; - lane = bps / (link_bw * 8) + 1; - } - - intel_crtc->fdi_lanes = lane; - - if (pixel_multiplier > 1) - link_bw *= pixel_multiplier; - ironlake_compute_m_n(intel_crtc->bpp, lane, target_clock, link_bw, - &m_n); - fp = clock.n << 16 | clock.m1 << 8 | clock.m2; if (has_reduced_clock) fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 | @@ -5018,10 +5048,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, I915_WRITE(PIPESRC(pipe), ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1)); - I915_WRITE(PIPE_DATA_M1(pipe), TU_SIZE(m_n.tu) | m_n.gmch_m); - I915_WRITE(PIPE_DATA_N1(pipe), m_n.gmch_n); - I915_WRITE(PIPE_LINK_M1(pipe), m_n.link_m); - I915_WRITE(PIPE_LINK_N1(pipe), m_n.link_n); + ironlake_set_m_n(crtc, mode, adjusted_mode); if (is_cpu_edp) ironlake_set_pll_edp(crtc, adjusted_mode->clock); From de13a2e3f88a4da8e85063b6de37096795079e41 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 20 Sep 2012 18:36:05 -0300 Subject: [PATCH 0163/5831] drm/i915: extract compute_dpll from ironlake_crtc_mode_set Too many lines just to compute the value of a single variable, so move this to its own function. Signed-off-by: Paulo Zanoni Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 165 +++++++++++++++++---------- 1 file changed, 105 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e92c6d329137..c7fa2a82c84a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4783,6 +4783,109 @@ static void ironlake_set_m_n(struct drm_crtc *crtc, I915_WRITE(PIPE_LINK_N1(pipe), m_n.link_n); } +static uint32_t ironlake_compute_dpll(struct intel_crtc *intel_crtc, + struct drm_display_mode *adjusted_mode, + intel_clock_t *clock, u32 fp) +{ + struct drm_crtc *crtc = &intel_crtc->base; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_encoder *intel_encoder; + uint32_t dpll; + int factor, pixel_multiplier, num_connectors = 0; + bool is_lvds = false, is_sdvo = false, is_tv = false; + bool is_dp = false, is_cpu_edp = false; + + for_each_encoder_on_crtc(dev, crtc, intel_encoder) { + switch (intel_encoder->type) { + case INTEL_OUTPUT_LVDS: + is_lvds = true; + break; + case INTEL_OUTPUT_SDVO: + case INTEL_OUTPUT_HDMI: + is_sdvo = true; + if (intel_encoder->needs_tv_clock) + is_tv = true; + break; + case INTEL_OUTPUT_TVOUT: + is_tv = true; + break; + case INTEL_OUTPUT_DISPLAYPORT: + is_dp = true; + break; + case INTEL_OUTPUT_EDP: + is_dp = true; + if (!intel_encoder_is_pch_edp(&intel_encoder->base)) + is_cpu_edp = true; + break; + } + + num_connectors++; + } + + /* Enable autotuning of the PLL clock (if permissible) */ + factor = 21; + if (is_lvds) { + if ((intel_panel_use_ssc(dev_priv) && + dev_priv->lvds_ssc_freq == 100) || + (I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP) + factor = 25; + } else if (is_sdvo && is_tv) + factor = 20; + + if (clock->m < factor * clock->n) + fp |= FP_CB_TUNE; + + dpll = 0; + + if (is_lvds) + dpll |= DPLLB_MODE_LVDS; + else + dpll |= DPLLB_MODE_DAC_SERIAL; + if (is_sdvo) { + pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode); + if (pixel_multiplier > 1) { + dpll |= (pixel_multiplier - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT; + } + dpll |= DPLL_DVO_HIGH_SPEED; + } + if (is_dp && !is_cpu_edp) + dpll |= DPLL_DVO_HIGH_SPEED; + + /* compute bitmask from p1 value */ + dpll |= (1 << (clock->p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT; + /* also FPA1 */ + dpll |= (1 << (clock->p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT; + + switch (clock->p2) { + case 5: + dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_5; + break; + case 7: + dpll |= DPLLB_LVDS_P2_CLOCK_DIV_7; + break; + case 10: + dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_10; + break; + case 14: + dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14; + break; + } + + if (is_sdvo && is_tv) + dpll |= PLL_REF_INPUT_TVCLKINBC; + else if (is_tv) + /* XXX: just matching BIOS for now */ + /* dpll |= PLL_REF_INPUT_TVCLKINBC; */ + dpll |= 3; + else if (is_lvds && intel_panel_use_ssc(dev_priv) && num_connectors < 2) + dpll |= PLLB_REF_INPUT_SPREADSPECTRUMIN; + else + dpll |= PLL_REF_INPUT_DREFCLK; + + return dpll; +} + static int ironlake_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, @@ -4801,7 +4904,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, bool is_crt = false, is_lvds = false, is_tv = false, is_dp = false; struct intel_encoder *encoder; u32 temp; - int ret, factor; + int ret; bool dither; bool is_cpu_edp = false, is_pch_edp = false; @@ -4857,65 +4960,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 | reduced_clock.m2; - /* Enable autotuning of the PLL clock (if permissible) */ - factor = 21; - if (is_lvds) { - if ((intel_panel_use_ssc(dev_priv) && - dev_priv->lvds_ssc_freq == 100) || - (I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP) - factor = 25; - } else if (is_sdvo && is_tv) - factor = 20; - - if (clock.m < factor * clock.n) - fp |= FP_CB_TUNE; - - dpll = 0; - - if (is_lvds) - dpll |= DPLLB_MODE_LVDS; - else - dpll |= DPLLB_MODE_DAC_SERIAL; - if (is_sdvo) { - int pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode); - if (pixel_multiplier > 1) { - dpll |= (pixel_multiplier - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT; - } - dpll |= DPLL_DVO_HIGH_SPEED; - } - if (is_dp && !is_cpu_edp) - dpll |= DPLL_DVO_HIGH_SPEED; - - /* compute bitmask from p1 value */ - dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT; - /* also FPA1 */ - dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT; - - switch (clock.p2) { - case 5: - dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_5; - break; - case 7: - dpll |= DPLLB_LVDS_P2_CLOCK_DIV_7; - break; - case 10: - dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_10; - break; - case 14: - dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14; - break; - } - - if (is_sdvo && is_tv) - dpll |= PLL_REF_INPUT_TVCLKINBC; - else if (is_tv) - /* XXX: just matching BIOS for now */ - /* dpll |= PLL_REF_INPUT_TVCLKINBC; */ - dpll |= 3; - else if (is_lvds && intel_panel_use_ssc(dev_priv) && num_connectors < 2) - dpll |= PLLB_REF_INPUT_SPREADSPECTRUMIN; - else - dpll |= PLL_REF_INPUT_DREFCLK; + dpll = ironlake_compute_dpll(intel_crtc, adjusted_mode, &clock, fp); DRM_DEBUG_KMS("Mode for pipe %d:\n", pipe); drm_mode_debug_printmodeline(mode); From e2f12b070d715315f0ed549fe8d722b681518c1b Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 20 Sep 2012 18:36:06 -0300 Subject: [PATCH 0164/5831] drm/i915: remove unused variables from ironlake_crtc_mode_set The last patches moved a lot of code from ironlake_crtc_mode_set to sub-functions, so these variables became useless. You could get warnings by enabling -Wunused-but-set-variable. Signed-off-by: Paulo Zanoni Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c7fa2a82c84a..08c3f69bfc75 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4900,39 +4900,24 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, int num_connectors = 0; intel_clock_t clock, reduced_clock; u32 dpll, fp = 0, fp2 = 0; - bool ok, has_reduced_clock = false, is_sdvo = false; - bool is_crt = false, is_lvds = false, is_tv = false, is_dp = false; + bool ok, has_reduced_clock = false; + bool is_lvds = false, is_dp = false, is_cpu_edp = false; struct intel_encoder *encoder; u32 temp; int ret; bool dither; - bool is_cpu_edp = false, is_pch_edp = false; for_each_encoder_on_crtc(dev, crtc, encoder) { switch (encoder->type) { case INTEL_OUTPUT_LVDS: is_lvds = true; break; - case INTEL_OUTPUT_SDVO: - case INTEL_OUTPUT_HDMI: - is_sdvo = true; - if (encoder->needs_tv_clock) - is_tv = true; - break; - case INTEL_OUTPUT_TVOUT: - is_tv = true; - break; - case INTEL_OUTPUT_ANALOG: - is_crt = true; - break; case INTEL_OUTPUT_DISPLAYPORT: is_dp = true; break; case INTEL_OUTPUT_EDP: is_dp = true; - if (intel_encoder_is_pch_edp(&encoder->base)) - is_pch_edp = true; - else + if (!intel_encoder_is_pch_edp(&encoder->base)) is_cpu_edp = true; break; } From 015b85a067bd9949756e49b01b70c820d31316d7 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Tue, 18 Sep 2012 10:58:47 -0400 Subject: [PATCH 0165/5831] drm: Export drm_probe_ddc() Tested-by: Takashi Iwai Signed-off-by: Adam Jackson Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_edid.c | 3 ++- include/drm/drm_crtc.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 289c9b18c0d3..d738a38e2329 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -395,13 +395,14 @@ out: * \param adapter : i2c device adaptor * \return 1 on success */ -static bool +bool drm_probe_ddc(struct i2c_adapter *adapter) { unsigned char out; return (drm_do_probe_ddc_edid(adapter, &out, 0, 1) == 0); } +EXPORT_SYMBOL(drm_probe_ddc); /** * drm_get_edid - get EDID data, if available diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index eb91d520ce0b..07e2956d9644 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -861,6 +861,7 @@ extern char *drm_get_tv_subconnector_name(int val); extern char *drm_get_tv_select_name(int val); extern void drm_fb_release(struct drm_file *file_priv); extern int drm_mode_group_init_legacy_group(struct drm_device *dev, struct drm_mode_group *group); +extern bool drm_probe_ddc(struct i2c_adapter *adapter); extern struct edid *drm_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter); extern int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid); From e89861dfa3ed0d8e9d184becf25aebea14e98372 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Tue, 18 Sep 2012 10:58:48 -0400 Subject: [PATCH 0166/5831] drm/dp: Update DPCD defines Sources: DP, eDP, and DP interop specs, and a VESA slideshow about DP 1.2 for the MST bits. Tested-by: Takashi Iwai Signed-off-by: Adam Jackson Acked-by: Dave Airlie Reviewed-by: Paulo Zanoni Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- include/drm/drm_dp_helper.h | 60 ++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 1744b18c06b3..f9888c3cb955 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -69,16 +69,30 @@ /* 10b = TMDS or HDMI */ /* 11b = Other */ # define DP_FORMAT_CONVERSION (1 << 3) +# define DP_DETAILED_CAP_INFO_AVAILABLE (1 << 4) #define DP_MAIN_LINK_CHANNEL_CODING 0x006 #define DP_DOWN_STREAM_PORT_COUNT 0x007 -#define DP_PORT_COUNT_MASK 0x0f -#define DP_OUI_SUPPORT (1 << 7) +# define DP_PORT_COUNT_MASK 0x0f +# define DP_MSA_TIMING_PAR_IGNORED (1 << 6) +# define DP_OUI_SUPPORT (1 << 7) + +#define DP_I2C_SPEED_CAP 0x00c +# define DP_I2C_SPEED_1K 0x01 +# define DP_I2C_SPEED_5K 0x02 +# define DP_I2C_SPEED_10K 0x04 +# define DP_I2C_SPEED_100K 0x08 +# define DP_I2C_SPEED_400K 0x10 +# define DP_I2C_SPEED_1M 0x20 #define DP_EDP_CONFIGURATION_CAP 0x00d #define DP_TRAINING_AUX_RD_INTERVAL 0x00e +/* Multiple stream transport */ +#define DP_MSTM_CAP 0x021 +# define DP_MST_CAP (1 << 0) + #define DP_PSR_SUPPORT 0x070 # define DP_PSR_IS_SUPPORTED 1 #define DP_PSR_CAPS 0x071 @@ -93,6 +107,31 @@ # define DP_PSR_SETUP_TIME_MASK (7 << 1) # define DP_PSR_SETUP_TIME_SHIFT 1 +/* + * 0x80-0x8f describe downstream port capabilities, but there are two layouts + * based on whether DP_DETAILED_CAP_INFO_AVAILABLE was set. If it was not, + * each port's descriptor is one byte wide. If it was set, each port's is + * four bytes wide, starting with the one byte from the base info. As of + * DP interop v1.1a only VGA defines additional detail. + */ + +/* offset 0 */ +#define DP_DOWNSTREAM_PORT_0 0x80 +# define DP_DS_PORT_TYPE_MASK (7 << 0) +# define DP_DS_PORT_TYPE_DP 0 +# define DP_DS_PORT_TYPE_VGA 1 +# define DP_DS_PORT_TYPE_DVI 2 +# define DP_DS_PORT_TYPE_HDMI 3 +# define DP_DS_PORT_TYPE_NON_EDID 4 +# define DP_DS_PORT_HPD (1 << 3) +/* offset 1 for VGA is maximum megapixels per second / 8 */ +/* offset 2 */ +# define DP_DS_VGA_MAX_BPC_MASK (3 << 0) +# define DP_DS_VGA_8BPC 0 +# define DP_DS_VGA_10BPC 1 +# define DP_DS_VGA_12BPC 2 +# define DP_DS_VGA_16BPC 3 + /* link configuration */ #define DP_LINK_BW_SET 0x100 # define DP_LINK_BW_1_62 0x06 @@ -148,24 +187,37 @@ #define DP_DOWNSPREAD_CTRL 0x107 # define DP_SPREAD_AMP_0_5 (1 << 4) +# define DP_MSA_TIMING_PAR_IGNORE_EN (1 << 7) #define DP_MAIN_LINK_CHANNEL_CODING_SET 0x108 # define DP_SET_ANSI_8B10B (1 << 0) +#define DP_I2C_SPEED_CONTROL_STATUS 0x109 +/* bitmask as for DP_I2C_SPEED_CAP */ + +#define DP_EDP_CONFIGURATION_SET 0x10a + +#define DP_MSTM_CTRL 0x111 +# define DP_MST_EN (1 << 0) +# define DP_UP_REQ_EN (1 << 1) +# define DP_UPSTREAM_IS_SRC (1 << 2) + #define DP_PSR_EN_CFG 0x170 # define DP_PSR_ENABLE (1 << 0) # define DP_PSR_MAIN_LINK_ACTIVE (1 << 1) # define DP_PSR_CRC_VERIFICATION (1 << 2) # define DP_PSR_FRAME_CAPTURE (1 << 3) +#define DP_SINK_COUNT 0x200 +# define DP_SINK_COUNT_MASK (31 << 0) +# define DP_SINK_CP_READY (1 << 6) + #define DP_DEVICE_SERVICE_IRQ_VECTOR 0x201 # define DP_REMOTE_CONTROL_COMMAND_PENDING (1 << 0) # define DP_AUTOMATED_TEST_REQUEST (1 << 1) # define DP_CP_IRQ (1 << 2) # define DP_SINK_SPECIFIC_IRQ (1 << 6) -#define DP_EDP_CONFIGURATION_SET 0x10a - #define DP_LANE0_1_STATUS 0x202 #define DP_LANE2_3_STATUS 0x203 # define DP_LANE_CR_DONE (1 << 0) From b091cd928dfa81e8c28b9707899201358b4e50b5 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Tue, 18 Sep 2012 10:58:49 -0400 Subject: [PATCH 0167/5831] drm/i915/dp: Fetch downstream port info if needed during DPCD fetch v2: Fix parenthesis mismatch, spotted by Jani Nikula Tested-by: Takashi Iwai Signed-off-by: Adam Jackson Reviewed-by: Jani Nikula [danvet: Fixup merge conflict and MAX_DOWNSTREAM #define as spotted by Jani.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 24 +++++++++++++++++++----- drivers/gpu/drm/i915/intel_drv.h | 2 ++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 1474f84fdbd0..42bdca47c5d6 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -37,6 +37,7 @@ #include "i915_drm.h" #include "i915_drv.h" +#define DP_RECEIVER_CAP_SIZE 0xf #define DP_LINK_STATUS_SIZE 6 #define DP_LINK_CHECK_TIMEOUT (10 * 1000) @@ -1964,12 +1965,25 @@ static bool intel_dp_get_dpcd(struct intel_dp *intel_dp) { if (intel_dp_aux_native_read_retry(intel_dp, 0x000, intel_dp->dpcd, - sizeof(intel_dp->dpcd)) && - (intel_dp->dpcd[DP_DPCD_REV] != 0)) { - return true; - } + sizeof(intel_dp->dpcd)) == 0) + return false; /* aux transfer failed */ - return false; + if (intel_dp->dpcd[DP_DPCD_REV] == 0) + return false; /* DPCD not present */ + + if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & + DP_DWN_STRM_PORT_PRESENT)) + return true; /* native DP sink */ + + if (intel_dp->dpcd[DP_DPCD_REV] == 0x10) + return true; /* no per-port downstream info */ + + if (intel_dp_aux_native_read_retry(intel_dp, DP_DOWNSTREAM_PORT_0, + intel_dp->downstream_ports, + DP_MAX_DOWNSTREAM_PORTS) == 0) + return false; /* downstream port status fetch failed */ + + return true; } static void diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 351fd7179cd7..79f8ed66574e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -332,6 +332,7 @@ struct intel_hdmi { }; #define DP_RECEIVER_CAP_SIZE 0xf +#define DP_MAX_DOWNSTREAM_PORTS 0x10 #define DP_LINK_CONFIGURATION_SIZE 9 struct intel_dp { @@ -346,6 +347,7 @@ struct intel_dp { uint8_t link_bw; uint8_t lane_count; uint8_t dpcd[DP_RECEIVER_CAP_SIZE]; + uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS]; struct i2c_adapter adapter; struct i2c_algo_dp_aux_data algo; bool is_pch_edp; From 07d3dc18396790d9d394f4f0717a91b3a845c758 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Tue, 18 Sep 2012 10:58:50 -0400 Subject: [PATCH 0168/5831] drm/i915/dp: Be smarter about connection sense for branch devices If there's no downstream device, DPCD success is good enough. If there's a hotplug-capable downstream device, count the number of connected sinks in DP_SINK_STATUS and return success if it's non-zero. Otherwise, probe DDC and report appropriately. v2: Check DP_SINK_STATUS instead of something unrelated to sink status. Tested-by: Takashi Iwai Signed-off-by: Adam Jackson Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 35 ++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 42bdca47c5d6..4f2a38181491 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2083,11 +2083,44 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) } } +/* XXX this is probably wrong for multiple downstream ports */ static enum drm_connector_status intel_dp_detect_dpcd(struct intel_dp *intel_dp) { - if (intel_dp_get_dpcd(intel_dp)) + uint8_t *dpcd = intel_dp->dpcd; + bool hpd; + uint8_t type; + + if (!intel_dp_get_dpcd(intel_dp)) + return connector_status_disconnected; + + /* if there's no downstream port, we're done */ + if (!(dpcd[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_PRESENT)) return connector_status_connected; + + /* If we're HPD-aware, SINK_COUNT changes dynamically */ + hpd = !!(intel_dp->downstream_ports[0] & DP_DS_PORT_HPD); + if (hpd) { + uint8_t sink_count; + if (!intel_dp_aux_native_read_retry(intel_dp, DP_SINK_COUNT, + &sink_count, 1)) + return connector_status_unknown; + sink_count &= DP_SINK_COUNT_MASK; + return sink_count ? connector_status_connected + : connector_status_disconnected; + } + + /* If no HPD, poke DDC gently */ + if (drm_probe_ddc(&intel_dp->adapter)) + return connector_status_connected; + + /* Well we tried, say unknown for unreliable port types */ + type = intel_dp->downstream_ports[0] & DP_DS_PORT_TYPE_MASK; + if (type == DP_DS_PORT_TYPE_VGA || type == DP_DS_PORT_TYPE_NON_EDID) + return connector_status_unknown; + + /* Anything else is out of spec, warn and ignore */ + DRM_DEBUG_KMS("Broken DP branch device, ignoring\n"); return connector_status_disconnected; } From a477f4fcbdd48837610974b0d025af1d46051a57 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Thu, 20 Sep 2012 16:42:44 -0400 Subject: [PATCH 0169/5831] drm/dp: Document DP spec versions for various DPCD registers Note with a comment anything newer than DP 1.1a. Obviously this needs some work still... Signed-off-by: Adam Jackson Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- include/drm/drm_dp_helper.h | 52 +++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index f9888c3cb955..38ffcb4332aa 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -26,7 +26,19 @@ #include #include -/* From the VESA DisplayPort spec */ +/* + * Unless otherwise noted, all values are from the DP 1.1a spec. Note that + * DP and DPCD versions are independent. Differences from 1.0 are not noted, + * 1.0 devices basically don't exist in the wild. + * + * Abbreviations, in chronological order: + * + * eDP: Embedded DisplayPort version 1 + * DPI: DisplayPort Interoperability Guideline v1.1a + * 1.2: DisplayPort 1.2 + * + * 1.2 formally includes both eDP and DPI definitions. + */ #define AUX_NATIVE_WRITE 0x8 #define AUX_NATIVE_READ 0x9 @@ -53,7 +65,7 @@ #define DP_MAX_LANE_COUNT 0x002 # define DP_MAX_LANE_COUNT_MASK 0x1f -# define DP_TPS3_SUPPORTED (1 << 6) +# define DP_TPS3_SUPPORTED (1 << 6) /* 1.2 */ # define DP_ENHANCED_FRAME_CAP (1 << 7) #define DP_MAX_DOWNSPREAD 0x003 @@ -69,16 +81,16 @@ /* 10b = TMDS or HDMI */ /* 11b = Other */ # define DP_FORMAT_CONVERSION (1 << 3) -# define DP_DETAILED_CAP_INFO_AVAILABLE (1 << 4) +# define DP_DETAILED_CAP_INFO_AVAILABLE (1 << 4) /* DPI */ #define DP_MAIN_LINK_CHANNEL_CODING 0x006 #define DP_DOWN_STREAM_PORT_COUNT 0x007 # define DP_PORT_COUNT_MASK 0x0f -# define DP_MSA_TIMING_PAR_IGNORED (1 << 6) +# define DP_MSA_TIMING_PAR_IGNORED (1 << 6) /* eDP */ # define DP_OUI_SUPPORT (1 << 7) -#define DP_I2C_SPEED_CAP 0x00c +#define DP_I2C_SPEED_CAP 0x00c /* DPI */ # define DP_I2C_SPEED_1K 0x01 # define DP_I2C_SPEED_5K 0x02 # define DP_I2C_SPEED_10K 0x04 @@ -86,16 +98,16 @@ # define DP_I2C_SPEED_400K 0x10 # define DP_I2C_SPEED_1M 0x20 -#define DP_EDP_CONFIGURATION_CAP 0x00d -#define DP_TRAINING_AUX_RD_INTERVAL 0x00e +#define DP_EDP_CONFIGURATION_CAP 0x00d /* XXX 1.2? */ +#define DP_TRAINING_AUX_RD_INTERVAL 0x00e /* XXX 1.2? */ /* Multiple stream transport */ -#define DP_MSTM_CAP 0x021 +#define DP_MSTM_CAP 0x021 /* 1.2 */ # define DP_MST_CAP (1 << 0) -#define DP_PSR_SUPPORT 0x070 +#define DP_PSR_SUPPORT 0x070 /* XXX 1.2? */ # define DP_PSR_IS_SUPPORTED 1 -#define DP_PSR_CAPS 0x071 +#define DP_PSR_CAPS 0x071 /* XXX 1.2? */ # define DP_PSR_NO_TRAIN_ON_EXIT 1 # define DP_PSR_SETUP_TIME_330 (0 << 1) # define DP_PSR_SETUP_TIME_275 (1 << 1) @@ -136,7 +148,7 @@ #define DP_LINK_BW_SET 0x100 # define DP_LINK_BW_1_62 0x06 # define DP_LINK_BW_2_7 0x0a -# define DP_LINK_BW_5_4 0x14 +# define DP_LINK_BW_5_4 0x14 /* 1.2 */ #define DP_LANE_COUNT_SET 0x101 # define DP_LANE_COUNT_MASK 0x0f @@ -146,7 +158,7 @@ # define DP_TRAINING_PATTERN_DISABLE 0 # define DP_TRAINING_PATTERN_1 1 # define DP_TRAINING_PATTERN_2 2 -# define DP_TRAINING_PATTERN_3 3 +# define DP_TRAINING_PATTERN_3 3 /* 1.2 */ # define DP_TRAINING_PATTERN_MASK 0x3 # define DP_LINK_QUAL_PATTERN_DISABLE (0 << 2) @@ -187,22 +199,22 @@ #define DP_DOWNSPREAD_CTRL 0x107 # define DP_SPREAD_AMP_0_5 (1 << 4) -# define DP_MSA_TIMING_PAR_IGNORE_EN (1 << 7) +# define DP_MSA_TIMING_PAR_IGNORE_EN (1 << 7) /* eDP */ #define DP_MAIN_LINK_CHANNEL_CODING_SET 0x108 # define DP_SET_ANSI_8B10B (1 << 0) -#define DP_I2C_SPEED_CONTROL_STATUS 0x109 +#define DP_I2C_SPEED_CONTROL_STATUS 0x109 /* DPI */ /* bitmask as for DP_I2C_SPEED_CAP */ -#define DP_EDP_CONFIGURATION_SET 0x10a +#define DP_EDP_CONFIGURATION_SET 0x10a /* XXX 1.2? */ -#define DP_MSTM_CTRL 0x111 +#define DP_MSTM_CTRL 0x111 /* 1.2 */ # define DP_MST_EN (1 << 0) # define DP_UP_REQ_EN (1 << 1) # define DP_UPSTREAM_IS_SRC (1 << 2) -#define DP_PSR_EN_CFG 0x170 +#define DP_PSR_EN_CFG 0x170 /* XXX 1.2? */ # define DP_PSR_ENABLE (1 << 0) # define DP_PSR_MAIN_LINK_ACTIVE (1 << 1) # define DP_PSR_CRC_VERIFICATION (1 << 2) @@ -277,14 +289,14 @@ # define DP_SET_POWER_D0 0x1 # define DP_SET_POWER_D3 0x2 -#define DP_PSR_ERROR_STATUS 0x2006 +#define DP_PSR_ERROR_STATUS 0x2006 /* XXX 1.2? */ # define DP_PSR_LINK_CRC_ERROR (1 << 0) # define DP_PSR_RFB_STORAGE_ERROR (1 << 1) -#define DP_PSR_ESI 0x2007 +#define DP_PSR_ESI 0x2007 /* XXX 1.2? */ # define DP_PSR_CAPS_CHANGE (1 << 0) -#define DP_PSR_STATUS 0x2008 +#define DP_PSR_STATUS 0x2008 /* XXX 1.2? */ # define DP_PSR_SINK_INACTIVE 0 # define DP_PSR_SINK_ACTIVE_SRC_SYNCED 1 # define DP_PSR_SINK_ACTIVE_RFB 2 From da131a46268bf2a67e7b7fa137a90a1279866367 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Thu, 20 Sep 2012 16:42:45 -0400 Subject: [PATCH 0170/5831] drm/dp: Make sink count DP 1.2 aware Signed-off-by: Adam Jackson Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 9 ++++----- include/drm/drm_dp_helper.h | 3 ++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 4f2a38181491..c63f54e84847 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2101,13 +2101,12 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp) /* If we're HPD-aware, SINK_COUNT changes dynamically */ hpd = !!(intel_dp->downstream_ports[0] & DP_DS_PORT_HPD); if (hpd) { - uint8_t sink_count; + uint8_t reg; if (!intel_dp_aux_native_read_retry(intel_dp, DP_SINK_COUNT, - &sink_count, 1)) + ®, 1)) return connector_status_unknown; - sink_count &= DP_SINK_COUNT_MASK; - return sink_count ? connector_status_connected - : connector_status_disconnected; + return DP_GET_SINK_COUNT(reg) ? connector_status_connected + : connector_status_disconnected; } /* If no HPD, poke DDC gently */ diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 38ffcb4332aa..fe061489f91f 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -221,7 +221,8 @@ # define DP_PSR_FRAME_CAPTURE (1 << 3) #define DP_SINK_COUNT 0x200 -# define DP_SINK_COUNT_MASK (31 << 0) +/* prior to 1.2 bit 7 was reserved mbz */ +# define DP_GET_SINK_COUNT(x) ((((x) & 0x80) >> 1) | ((x) & 0x3f)) # define DP_SINK_CP_READY (1 << 6) #define DP_DEVICE_SERVICE_IRQ_VECTOR 0x201 From 3bcedbe5f2a3da65326d99803cac71c1e89bc93f Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 19 Sep 2012 13:29:01 -0700 Subject: [PATCH 0171/5831] drm/i915: limit VLV IRQ enables to those we use To match IVB. Signed-off-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d7f0066538a9..b160bb09efae 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1957,6 +1957,7 @@ static int valleyview_irq_postinstall(struct drm_device *dev) u32 enable_mask; u32 hotplug_en = I915_READ(PORT_HOTPLUG_EN); u32 pipestat_enable = PLANE_FLIP_DONE_INT_EN_VLV; + u32 render_irqs; u16 msid; enable_mask = I915_DISPLAY_PORT_INTERRUPT; @@ -1996,21 +1997,12 @@ static int valleyview_irq_postinstall(struct drm_device *dev) I915_WRITE(VLV_IIR, 0xffffffff); I915_WRITE(VLV_IIR, 0xffffffff); - dev_priv->gt_irq_mask = ~0; - - I915_WRITE(GTIIR, I915_READ(GTIIR)); I915_WRITE(GTIIR, I915_READ(GTIIR)); I915_WRITE(GTIMR, dev_priv->gt_irq_mask); - I915_WRITE(GTIER, GT_GEN6_BLT_FLUSHDW_NOTIFY_INTERRUPT | - GT_GEN6_BLT_CS_ERROR_INTERRUPT | - GT_GEN6_BLT_USER_INTERRUPT | - GT_GEN6_BSD_USER_INTERRUPT | - GT_GEN6_BSD_CS_ERROR_INTERRUPT | - GT_GEN7_L3_PARITY_ERROR_INTERRUPT | - GT_PIPE_NOTIFY | - GT_RENDER_CS_ERROR_INTERRUPT | - GT_SYNC_STATUS | - GT_USER_INTERRUPT); + + render_irqs = GT_USER_INTERRUPT | GEN6_BSD_USER_INTERRUPT | + GEN6_BLITTER_USER_INTERRUPT; + I915_WRITE(GTIER, render_irqs); POSTING_READ(GTIER); /* ack & enable invalid PTE error interrupts */ From 631527703d1aa2f0c5dc2af0d998f4da95c83f0e Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 28 Sep 2012 12:20:02 +0100 Subject: [PATCH 0172/5831] keys: Fix unreachable code We set ret to NULL then test it. Remove the bogus test Signed-off-by: Alan Cox Signed-off-by: David Howells --- security/keys/process_keys.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 54339cfd6734..178b8c3b130a 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -357,8 +357,6 @@ key_ref_t search_my_process_keyrings(struct key_type *type, switch (PTR_ERR(key_ref)) { case -EAGAIN: /* no key */ - if (ret) - break; case -ENOKEY: /* negative key */ ret = key_ref; break; From a84a921978b7d56e0e4b87ffaca6367429b4d8ff Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 28 Sep 2012 12:20:02 +0100 Subject: [PATCH 0173/5831] key: Fix resource leak On an error iov may still have been reallocated and need freeing Signed-off-by: Alan Cox Signed-off-by: David Howells --- security/keys/keyctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 3364fbf46807..a0d373f76815 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1112,12 +1112,12 @@ long keyctl_instantiate_key_iov(key_serial_t id, ret = rw_copy_check_uvector(WRITE, _payload_iov, ioc, ARRAY_SIZE(iovstack), iovstack, &iov); if (ret < 0) - return ret; + goto err; if (ret == 0) goto no_payload_free; ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid); - +err: if (iov != iovstack) kfree(iov); return ret; From 9473c8f485e1e3740d5aebf1de4838b615f9dedc Mon Sep 17 00:00:00 2001 From: Vijay Purushothaman Date: Thu, 27 Sep 2012 19:13:01 +0530 Subject: [PATCH 0174/5831] drm/i915: Set aux clk to 100MHz for Valleyview Set hrawclk to 200 MHz and aux divider clock to 100 MHz for Valleyview. This enables the aux transactions in Valleyview. Signed-off-by: Vijay Purushothaman Signed-off-by: Ben Widawsky Acked-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index c63f54e84847..94945ce0048f 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -286,6 +286,10 @@ intel_hrawclk(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; uint32_t clkcfg; + /* There is no CLKCFG reg in Valleyview. VLV hrawclk is 200 MHz */ + if (IS_VALLEYVIEW(dev)) + return 200; + clkcfg = I915_READ(CLKCFG); switch (clkcfg & CLKCFG_FSB_MASK) { case CLKCFG_FSB_400: @@ -366,7 +370,9 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, * clock divider. */ if (is_cpu_edp(intel_dp)) { - if (IS_GEN6(dev) || IS_GEN7(dev)) + if (IS_VALLEYVIEW(dev)) + aux_clock_divider = 100; + else if (IS_GEN6(dev) || IS_GEN7(dev)) aux_clock_divider = 200; /* SNB & IVB eDP input clock at 400Mhz */ else aux_clock_divider = 225; /* eDP input clock at 450Mhz */ From ae33cdcfc6aba86505f7a8ec288e3e1f7277de62 Mon Sep 17 00:00:00 2001 From: Vijay Purushothaman Date: Thu, 27 Sep 2012 19:13:02 +0530 Subject: [PATCH 0175/5831] drm/i915: Fix SDVO IER and status bits for Valleyview Fixed SDVOB and SDVOC bit definitions for Valleyview. Signed-off-by: Vijay Purushothaman Signed-off-by: Ben Widawsky Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b160bb09efae..f7ec794480c8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2012,7 +2012,6 @@ static int valleyview_irq_postinstall(struct drm_device *dev) #endif I915_WRITE(VLV_MASTER_IER, MASTER_INTERRUPT_ENABLE); -#if 0 /* FIXME: check register definitions; some have moved */ /* Note HDMI and DP share bits */ if (dev_priv->hotplug_supported_mask & HDMIB_HOTPLUG_INT_STATUS) hotplug_en |= HDMIB_HOTPLUG_INT_EN; @@ -2020,15 +2019,14 @@ static int valleyview_irq_postinstall(struct drm_device *dev) hotplug_en |= HDMIC_HOTPLUG_INT_EN; if (dev_priv->hotplug_supported_mask & HDMID_HOTPLUG_INT_STATUS) hotplug_en |= HDMID_HOTPLUG_INT_EN; - if (dev_priv->hotplug_supported_mask & SDVOC_HOTPLUG_INT_STATUS) + if (dev_priv->hotplug_supported_mask & SDVOC_HOTPLUG_INT_STATUS_I915) hotplug_en |= SDVOC_HOTPLUG_INT_EN; - if (dev_priv->hotplug_supported_mask & SDVOB_HOTPLUG_INT_STATUS) + if (dev_priv->hotplug_supported_mask & SDVOB_HOTPLUG_INT_STATUS_I915) hotplug_en |= SDVOB_HOTPLUG_INT_EN; if (dev_priv->hotplug_supported_mask & CRT_HOTPLUG_INT_STATUS) { hotplug_en |= CRT_HOTPLUG_INT_EN; hotplug_en |= CRT_HOTPLUG_VOLTAGE_COMPARE_50; } -#endif I915_WRITE(PORT_HOTPLUG_EN, hotplug_en); From b56747aace48a269fefa7d337963cbae6e95b0a0 Mon Sep 17 00:00:00 2001 From: Vijay Purushothaman Date: Thu, 27 Sep 2012 19:13:03 +0530 Subject: [PATCH 0176/5831] drm/i915: Add Valleyview lane control definitions Added DPIO data lane register definitions for Valleyview Signed-off-by: Vijay Purushothaman Signed-off-by: Ben Widawsky Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a828e90602b9..3f75ee6b5b21 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -369,6 +369,7 @@ #define DPIO_PLL_MODESEL_SHIFT 24 /* 3 bits */ #define DPIO_BIAS_CURRENT_CTL_SHIFT 21 /* 3 bits, always 0x7 */ #define DPIO_PLL_REFCLK_SEL_SHIFT 16 /* 2 bits */ +#define DPIO_PLL_REFCLK_SEL_MASK 3 #define DPIO_DRIVER_CTL_SHIFT 12 /* always set to 0x8 */ #define DPIO_CLK_BIAS_CTL_SHIFT 8 /* always set to 0x5 */ #define _DPIO_REFSFR_B 0x8034 @@ -384,6 +385,13 @@ #define DPIO_FASTCLK_DISABLE 0x8100 +#define _DPIO_DATA_LANE0 0x0220 +#define _DPIO_DATA_LANE1 0x0420 +#define _DPIO_DATA_LANE2 0x2620 +#define _DPIO_DATA_LANE3 0x2820 +#define DPIO_DATA_LANE_A(pipe) _PIPE(pipe, _DPIO_DATA_LANE0, _DPIO_DATA_LANE2) +#define DPIO_DATA_LANE_B(pipe) _PIPE(pipe, _DPIO_DATA_LANE1, _DPIO_DATA_LANE3) + /* * Fence registers */ From 74a4dd2e4594804ffeb04b3e60ff4cfbf6b8ce10 Mon Sep 17 00:00:00 2001 From: Vijay Purushothaman Date: Thu, 27 Sep 2012 19:13:04 +0530 Subject: [PATCH 0177/5831] drm/i915: Program correct m n tu register for Valleyview m n tu register offset has changed in Valleyview. Also fixed DP limit frequencies. Signed-off-by: Vijay Purushothaman Signed-off-by: Ben Widawsky Acked-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 6 +++--- drivers/gpu/drm/i915/intel_dp.c | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 08c3f69bfc75..647898196693 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -393,10 +393,10 @@ static const intel_limit_t intel_limits_vlv_hdmi = { }; static const intel_limit_t intel_limits_vlv_dp = { - .dot = { .min = 162000, .max = 270000 }, - .vco = { .min = 5994000, .max = 4000000 }, + .dot = { .min = 25000, .max = 270000 }, + .vco = { .min = 4000000, .max = 6000000 }, .n = { .min = 1, .max = 7 }, - .m = { .min = 60, .max = 300 }, /* guess */ + .m = { .min = 22, .max = 450 }, .m1 = { .min = 2, .max = 3 }, .m2 = { .min = 11, .max = 156 }, .p = { .min = 10, .max = 30 }, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 94945ce0048f..7fe9b9c72aab 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -805,6 +805,11 @@ intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode, I915_WRITE(TRANSDATA_N1(pipe), m_n.gmch_n); I915_WRITE(TRANSDPLINK_M1(pipe), m_n.link_m); I915_WRITE(TRANSDPLINK_N1(pipe), m_n.link_n); + } else if (IS_VALLEYVIEW(dev)) { + I915_WRITE(PIPE_DATA_M1(pipe), TU_SIZE(m_n.tu) | m_n.gmch_m); + I915_WRITE(PIPE_DATA_N1(pipe), m_n.gmch_n); + I915_WRITE(PIPE_LINK_M1(pipe), m_n.link_m); + I915_WRITE(PIPE_LINK_N1(pipe), m_n.link_n); } else { I915_WRITE(PIPE_GMCH_DATA_M(pipe), ((m_n.tu - 1) << PIPE_GMCH_DATA_M_TU_SIZE_SHIFT) | From 2a8f64ca23447248efaf87c5c7c2cb0c5c3f27e8 Mon Sep 17 00:00:00 2001 From: Vijay Purushothaman Date: Thu, 27 Sep 2012 19:13:06 +0530 Subject: [PATCH 0178/5831] drm/i915: Enable DisplayPort in Valleyview In valleyview voltageswing, pre-emphasis and lane control registers can be programmed only through the h/w side band fabric. Cleaned up DPLL calculations for Valleyview to support multi display configurations. v2: Based on Daniel's feedbacak, moved crt hotplug detect work around as separate patch. Also moved i9xx_update_pll_dividers to i8xx_update_pll and i9xx_update_pll. Signed-off-by: Vijay Purushothaman Signed-off-by: Gajanan Bhat Reviewed-by: Jesse Barnes [danvet: drop spurious whitespace changes.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 8 +-- drivers/gpu/drm/i915/intel_display.c | 88 ++++++++++++++++++++-------- 2 files changed, 65 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3f75ee6b5b21..0fe4aad8e424 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -385,12 +385,8 @@ #define DPIO_FASTCLK_DISABLE 0x8100 -#define _DPIO_DATA_LANE0 0x0220 -#define _DPIO_DATA_LANE1 0x0420 -#define _DPIO_DATA_LANE2 0x2620 -#define _DPIO_DATA_LANE3 0x2820 -#define DPIO_DATA_LANE_A(pipe) _PIPE(pipe, _DPIO_DATA_LANE0, _DPIO_DATA_LANE2) -#define DPIO_DATA_LANE_B(pipe) _PIPE(pipe, _DPIO_DATA_LANE1, _DPIO_DATA_LANE3) +#define DPIO_DATA_CHANNEL1 0x8220 +#define DPIO_DATA_CHANNEL2 0x8420 /* * Fence registers diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 647898196693..6458f95263d5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4019,7 +4019,7 @@ static void vlv_update_pll(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, intel_clock_t *clock, intel_clock_t *reduced_clock, - int refclk, int num_connectors) + int num_connectors) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -4027,9 +4027,19 @@ static void vlv_update_pll(struct drm_crtc *crtc, int pipe = intel_crtc->pipe; u32 dpll, mdiv, pdiv; u32 bestn, bestm1, bestm2, bestp1, bestp2; - bool is_hdmi; + bool is_sdvo; + u32 temp; - is_hdmi = intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI); + is_sdvo = intel_pipe_has_type(crtc, INTEL_OUTPUT_SDVO) || + intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI); + + dpll = DPLL_VGA_MODE_DIS; + dpll |= DPLL_EXT_BUFFER_ENABLE_VLV; + dpll |= DPLL_REFA_CLK_ENABLE_VLV; + dpll |= DPLL_INTEGRATED_CLOCK_VLV; + + I915_WRITE(DPLL(pipe), dpll); + POSTING_READ(DPLL(pipe)); bestn = clock->n; bestm1 = clock->m1; @@ -4037,12 +4047,10 @@ static void vlv_update_pll(struct drm_crtc *crtc, bestp1 = clock->p1; bestp2 = clock->p2; - /* Enable DPIO clock input */ - dpll = DPLL_EXT_BUFFER_ENABLE_VLV | DPLL_REFA_CLK_ENABLE_VLV | - DPLL_VGA_MODE_DIS | DPLL_INTEGRATED_CLOCK_VLV; - I915_WRITE(DPLL(pipe), dpll); - POSTING_READ(DPLL(pipe)); - + /* + * In Valleyview PLL and program lane counter registers are exposed + * through DPIO interface + */ mdiv = ((bestm1 << DPIO_M1DIV_SHIFT) | (bestm2 & DPIO_M2DIV_MASK)); mdiv |= ((bestp1 << DPIO_P1_SHIFT) | (bestp2 << DPIO_P2_SHIFT)); mdiv |= ((bestn << DPIO_N_SHIFT)); @@ -4053,12 +4061,13 @@ static void vlv_update_pll(struct drm_crtc *crtc, intel_dpio_write(dev_priv, DPIO_CORE_CLK(pipe), 0x01000000); - pdiv = DPIO_REFSEL_OVERRIDE | (5 << DPIO_PLL_MODESEL_SHIFT) | + pdiv = (1 << DPIO_REFSEL_OVERRIDE) | (5 << DPIO_PLL_MODESEL_SHIFT) | (3 << DPIO_BIAS_CURRENT_CTL_SHIFT) | (1<<20) | - (8 << DPIO_DRIVER_CTL_SHIFT) | (5 << DPIO_CLK_BIAS_CTL_SHIFT); + (7 << DPIO_PLL_REFCLK_SEL_SHIFT) | (8 << DPIO_DRIVER_CTL_SHIFT) | + (5 << DPIO_CLK_BIAS_CTL_SHIFT); intel_dpio_write(dev_priv, DPIO_REFSFR(pipe), pdiv); - intel_dpio_write(dev_priv, DPIO_LFP_COEFF(pipe), 0x009f0051); + intel_dpio_write(dev_priv, DPIO_LFP_COEFF(pipe), 0x005f003b); dpll |= DPLL_VCO_ENABLE; I915_WRITE(DPLL(pipe), dpll); @@ -4066,19 +4075,44 @@ static void vlv_update_pll(struct drm_crtc *crtc, if (wait_for(((I915_READ(DPLL(pipe)) & DPLL_LOCK_VLV) == DPLL_LOCK_VLV), 1)) DRM_ERROR("DPLL %d failed to lock\n", pipe); - if (is_hdmi) { - u32 temp = intel_mode_get_pixel_multiplier(adjusted_mode); + intel_dpio_write(dev_priv, DPIO_FASTCLK_DISABLE, 0x620); + if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) + intel_dp_set_m_n(crtc, mode, adjusted_mode); + + I915_WRITE(DPLL(pipe), dpll); + + /* Wait for the clocks to stabilize. */ + POSTING_READ(DPLL(pipe)); + udelay(150); + + temp = 0; + if (is_sdvo) { + temp = intel_mode_get_pixel_multiplier(adjusted_mode); if (temp > 1) temp = (temp - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT; else temp = 0; - - I915_WRITE(DPLL_MD(pipe), temp); - POSTING_READ(DPLL_MD(pipe)); } + I915_WRITE(DPLL_MD(pipe), temp); + POSTING_READ(DPLL_MD(pipe)); - intel_dpio_write(dev_priv, DPIO_FASTCLK_DISABLE, 0x641); /* ??? */ + /* Now program lane control registers */ + if(intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT) + || intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI)) + { + temp = 0x1000C4; + if(pipe == 1) + temp |= (1 << 21); + intel_dpio_write(dev_priv, DPIO_DATA_CHANNEL1, temp); + } + if(intel_pipe_has_type(crtc,INTEL_OUTPUT_EDP)) + { + temp = 0x1000C4; + if(pipe == 1) + temp |= (1 << 21); + intel_dpio_write(dev_priv, DPIO_DATA_CHANNEL2, temp); + } } static void i9xx_update_pll(struct drm_crtc *crtc, @@ -4094,6 +4128,8 @@ static void i9xx_update_pll(struct drm_crtc *crtc, u32 dpll; bool is_sdvo; + i9xx_update_pll_dividers(crtc, clock, reduced_clock); + is_sdvo = intel_pipe_has_type(crtc, INTEL_OUTPUT_SDVO) || intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI); @@ -4194,7 +4230,7 @@ static void i9xx_update_pll(struct drm_crtc *crtc, static void i8xx_update_pll(struct drm_crtc *crtc, struct drm_display_mode *adjusted_mode, - intel_clock_t *clock, + intel_clock_t *clock, intel_clock_t *reduced_clock, int num_connectors) { struct drm_device *dev = crtc->dev; @@ -4203,6 +4239,8 @@ static void i8xx_update_pll(struct drm_crtc *crtc, int pipe = intel_crtc->pipe; u32 dpll; + i9xx_update_pll_dividers(crtc, clock, reduced_clock); + dpll = DPLL_VGA_MODE_DIS; if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { @@ -4329,14 +4367,14 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc, if (is_sdvo && is_tv) i9xx_adjust_sdvo_tv_clock(adjusted_mode, &clock); - i9xx_update_pll_dividers(crtc, &clock, has_reduced_clock ? - &reduced_clock : NULL); - if (IS_GEN2(dev)) - i8xx_update_pll(crtc, adjusted_mode, &clock, num_connectors); + i8xx_update_pll(crtc, adjusted_mode, &clock, + has_reduced_clock ? &reduced_clock : NULL, + num_connectors); else if (IS_VALLEYVIEW(dev)) - vlv_update_pll(crtc, mode,adjusted_mode, &clock, NULL, - refclk, num_connectors); + vlv_update_pll(crtc, mode, adjusted_mode, &clock, + has_reduced_clock ? &reduced_clock : NULL, + num_connectors); else i9xx_update_pll(crtc, mode, adjusted_mode, &clock, has_reduced_clock ? &reduced_clock : NULL, From 19c03924d4b72eedff517f80edc6b33c14f0fe53 Mon Sep 17 00:00:00 2001 From: Gajanan Bhat Date: Thu, 27 Sep 2012 19:13:07 +0530 Subject: [PATCH 0179/5831] drm/i915: Add eDP support for Valleyview Eventhough Valleyview display block is derived from Cantiga, VLV supports eDP. So, added eDP checks in i9xx_crtc_mode_set path. v2: use different DPIO_DIVISOR values for VGA, DP and eDP v3: fix DPIO value calculation to use same values for all display interfaces v4: removed unconditional enabling of 6bpc dithering based on comments from Daniel & Jani Nikula. Also changed the display enabling order to force eDP detection first. Signed-off-by: Gajanan Bhat Signed-off-by: Vijay Purushothaman Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 15 ++++++++++++--- drivers/gpu/drm/i915/intel_dp.c | 17 ++++++++++++----- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6458f95263d5..e9c1f3c00143 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4415,6 +4415,14 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc, } } + if (IS_VALLEYVIEW(dev) && intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP)) { + if (adjusted_mode->private_flags & INTEL_MODE_DP_FORCE_6BPC) { + pipeconf |= PIPECONF_BPP_6 | + PIPECONF_ENABLE | + I965_PIPECONF_ACTIVE; + } + } + DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B'); drm_mode_debug_printmodeline(mode); @@ -7673,6 +7681,10 @@ static void intel_setup_outputs(struct drm_device *dev) } else if (IS_VALLEYVIEW(dev)) { int found; + /* Check for built-in panel first. Shares lanes with HDMI on SDVOC */ + if (I915_READ(DP_C) & DP_DETECTED) + intel_dp_init(dev, DP_C, PORT_C); + if (I915_READ(SDVOB) & PORT_DETECTED) { /* SDVOB multiplex with HDMIB */ found = intel_sdvo_init(dev, SDVOB, true); @@ -7685,9 +7697,6 @@ static void intel_setup_outputs(struct drm_device *dev) if (I915_READ(SDVOC) & PORT_DETECTED) intel_hdmi_init(dev, SDVOC, PORT_C); - /* Shares lanes with HDMI on SDVOC */ - if (I915_READ(DP_C) & DP_DETECTED) - intel_dp_init(dev, DP_C, PORT_C); } else if (SUPPORTS_DIGITAL_OUTPUTS(dev)) { bool found = false; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 7fe9b9c72aab..fcce39284e5d 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -886,7 +886,7 @@ intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, /* Split out the IBX/CPU vs CPT settings */ - if (is_cpu_edp(intel_dp) && IS_GEN7(dev)) { + if (is_cpu_edp(intel_dp) && IS_GEN7(dev) && !IS_VALLEYVIEW(dev)) { if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC) intel_dp->DP |= DP_SYNC_HS_HIGH; if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC) @@ -1475,7 +1475,7 @@ intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing) { struct drm_device *dev = intel_dp->base.base.dev; - if (IS_GEN7(dev) && is_cpu_edp(intel_dp)) { + if (IS_GEN7(dev) && is_cpu_edp(intel_dp) && !IS_VALLEYVIEW(dev)) { switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { case DP_TRAIN_VOLTAGE_SWING_400: return DP_TRAIN_PRE_EMPHASIS_6; @@ -1774,7 +1774,7 @@ intel_dp_start_link_train(struct intel_dp *intel_dp) uint32_t signal_levels; - if (IS_GEN7(dev) && is_cpu_edp(intel_dp)) { + if (IS_GEN7(dev) && is_cpu_edp(intel_dp) && !IS_VALLEYVIEW(dev)) { signal_levels = intel_gen7_edp_signal_levels(intel_dp->train_set[0]); DP = (DP & ~EDP_LINK_TRAIN_VOL_EMP_MASK_IVB) | signal_levels; } else if (IS_GEN6(dev) && is_cpu_edp(intel_dp)) { @@ -1860,7 +1860,7 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp) break; } - if (IS_GEN7(dev) && is_cpu_edp(intel_dp)) { + if (IS_GEN7(dev) && is_cpu_edp(intel_dp) && !IS_VALLEYVIEW(dev)) { signal_levels = intel_gen7_edp_signal_levels(intel_dp->train_set[0]); DP = (DP & ~EDP_LINK_TRAIN_VOL_EMP_MASK_IVB) | signal_levels; } else if (IS_GEN6(dev) && is_cpu_edp(intel_dp)) { @@ -2517,7 +2517,14 @@ intel_dp_init(struct drm_device *dev, int output_reg, enum port port) if (intel_dpd_is_edp(dev)) intel_dp->is_pch_edp = true; - if (output_reg == DP_A || is_pch_edp(intel_dp)) { + /* + * FIXME : We need to initialize built-in panels before external panels. + * For X0, DP_C is fixed as eDP. Revisit this as part of VLV eDP cleanup + */ + if (IS_VALLEYVIEW(dev) && output_reg == DP_C) { + type = DRM_MODE_CONNECTOR_eDP; + intel_encoder->type = INTEL_OUTPUT_EDP; + } else if (output_reg == DP_A || is_pch_edp(intel_dp)) { type = DRM_MODE_CONNECTOR_eDP; intel_encoder->type = INTEL_OUTPUT_EDP; } else { From 17dc92574b696ba07512f7e6cfb7941a6f873f45 Mon Sep 17 00:00:00 2001 From: Vijay Purushothaman Date: Thu, 27 Sep 2012 19:13:09 +0530 Subject: [PATCH 0180/5831] drm/i915: Fixup HDMI output on Valleyview Fixed correct min, max vco limits and dip ctl reg Signed-off-by: Vijay Purushothaman Signed-off-by: Gajanan Bhat Signed-off-by: Ben Widawsky Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0fe4aad8e424..d17bef7f8040 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3683,7 +3683,7 @@ #define TVIDEO_DIP_DATA(pipe) _PIPE(pipe, _VIDEO_DIP_DATA_A, _VIDEO_DIP_DATA_B) #define TVIDEO_DIP_GCP(pipe) _PIPE(pipe, _VIDEO_DIP_GCP_A, _VIDEO_DIP_GCP_B) -#define VLV_VIDEO_DIP_CTL_A 0x60220 +#define VLV_VIDEO_DIP_CTL_A 0x60200 #define VLV_VIDEO_DIP_DATA_A 0x60208 #define VLV_VIDEO_DIP_GDCP_PAYLOAD_A 0x60210 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e9c1f3c00143..ed0dcea4a727 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -380,7 +380,7 @@ static const intel_limit_t intel_limits_vlv_dac = { static const intel_limit_t intel_limits_vlv_hdmi = { .dot = { .min = 20000, .max = 165000 }, - .vco = { .min = 5994000, .max = 4000000 }, + .vco = { .min = 4000000, .max = 5994000}, .n = { .min = 1, .max = 7 }, .m = { .min = 60, .max = 300 }, /* guess */ .m1 = { .min = 2, .max = 3 }, From e6459606b04e6385ccd3c2060fc10f78a92c7700 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 1 Oct 2012 00:23:52 +0200 Subject: [PATCH 0181/5831] lib: Add early cpio decoder Add a simple cpio decoder without library dependencies for the purpose of extracting components from the initramfs blob for early kernel uses. Intended consumers so far are microcode and ACPI override. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1349043837-22659-2-git-send-email-trenn@suse.de Cc: Len Brown Cc: Fenghua Yu Signed-off-by: Thomas Renninger Signed-off-by: H. Peter Anvin --- include/linux/earlycpio.h | 17 +++++ lib/Makefile | 2 +- lib/earlycpio.c | 145 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 include/linux/earlycpio.h create mode 100644 lib/earlycpio.c diff --git a/include/linux/earlycpio.h b/include/linux/earlycpio.h new file mode 100644 index 000000000000..111f46d83d00 --- /dev/null +++ b/include/linux/earlycpio.h @@ -0,0 +1,17 @@ +#ifndef _LINUX_EARLYCPIO_H +#define _LINUX_EARLYCPIO_H + +#include + +#define MAX_CPIO_FILE_NAME 18 + +struct cpio_data { + void *data; + size_t size; + char name[MAX_CPIO_FILE_NAME]; +}; + +struct cpio_data find_cpio_data(const char *path, void *data, size_t len, + long *offset); + +#endif /* _LINUX_EARLYCPIO_H */ diff --git a/lib/Makefile b/lib/Makefile index 42d283edc4d3..0924041b6959 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ - is_single_threaded.o plist.o decompress.o + is_single_threaded.o plist.o decompress.o earlycpio.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/earlycpio.c b/lib/earlycpio.c new file mode 100644 index 000000000000..8078ef49cb79 --- /dev/null +++ b/lib/earlycpio.c @@ -0,0 +1,145 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2012 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available + * under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * ----------------------------------------------------------------------- */ + +/* + * earlycpio.c + * + * Find a specific cpio member; must precede any compressed content. + * This is used to locate data items in the initramfs used by the + * kernel itself during early boot (before the main initramfs is + * decompressed.) It is the responsibility of the initramfs creator + * to ensure that these items are uncompressed at the head of the + * blob. Depending on the boot loader or package tool that may be a + * separate file or part of the same file. + */ + +#include +#include +#include + +enum cpio_fields { + C_MAGIC, + C_INO, + C_MODE, + C_UID, + C_GID, + C_NLINK, + C_MTIME, + C_FILESIZE, + C_MAJ, + C_MIN, + C_RMAJ, + C_RMIN, + C_NAMESIZE, + C_CHKSUM, + C_NFIELDS +}; + +/** + * cpio_data find_cpio_data - Search for files in an uncompressed cpio + * @path: The directory to search for, including a slash at the end + * @data: Pointer to the the cpio archive or a header inside + * @len: Remaining length of the cpio based on data pointer + * @offset: When a matching file is found, this is the offset to the + * beginning of the cpio. It can be used to iterate through + * the cpio to find all files inside of a directory path + * + * @return: struct cpio_data containing the address, length and + * filename (with the directory path cut off) of the found file. + * If you search for a filename and not for files in a directory, + * pass the absolute path of the filename in the cpio and make sure + * the match returned an empty filename string. + */ + +struct cpio_data __cpuinit find_cpio_data(const char *path, void *data, + size_t len, long *offset) +{ + const size_t cpio_header_len = 8*C_NFIELDS - 2; + struct cpio_data cd = { NULL, 0, "" }; + const char *p, *dptr, *nptr; + unsigned int ch[C_NFIELDS], *chp, v; + unsigned char c, x; + size_t mypathsize = strlen(path); + int i, j; + + p = data; + + while (len > cpio_header_len) { + if (!*p) { + /* All cpio headers need to be 4-byte aligned */ + p += 4; + len -= 4; + continue; + } + + j = 6; /* The magic field is only 6 characters */ + chp = ch; + for (i = C_NFIELDS; i; i--) { + v = 0; + while (j--) { + v <<= 4; + c = *p++; + + x = c - '0'; + if (x < 10) { + v += x; + continue; + } + + x = (c | 0x20) - 'a'; + if (x < 6) { + v += x + 10; + continue; + } + + goto quit; /* Invalid hexadecimal */ + } + *chp++ = v; + j = 8; /* All other fields are 8 characters */ + } + + if ((ch[C_MAGIC] - 0x070701) > 1) + goto quit; /* Invalid magic */ + + len -= cpio_header_len; + + dptr = PTR_ALIGN(p + ch[C_NAMESIZE], 4); + nptr = PTR_ALIGN(dptr + ch[C_FILESIZE], 4); + + if (nptr > p + len || dptr < p || nptr < dptr) + goto quit; /* Buffer overrun */ + + if ((ch[C_MODE] & 0170000) == 0100000 && + ch[C_NAMESIZE] >= mypathsize && + !memcmp(p, path, mypathsize)) { + *offset = (long)nptr - (long)data; + if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) { + pr_warn( + "File %s exceeding MAX_CPIO_FILE_NAME [%d]\n", + p, MAX_CPIO_FILE_NAME); + } + strlcpy(cd.name, p + mypathsize, MAX_CPIO_FILE_NAME); + + cd.data = (void *)dptr; + cd.size = ch[C_FILESIZE]; + return cd; /* Found it! */ + } + len -= (nptr - p); + p = nptr; + } + +quit: + return cd; +} From 8e30524dcc0d0ac1a18a5cee482b9d9cde3cb332 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 1 Oct 2012 00:23:53 +0200 Subject: [PATCH 0182/5831] x86, acpi: Introduce x86 arch specific arch_reserve_mem_area() for e820 handling This is needed for ACPI table overriding via initrd. Beside reserving memblocks, X86 also requires to flag the memory area to E820_RESERVED or E820_ACPI in the e820 mappings to be able to io(re)map it later. Signed-off-by: Thomas Renninger Link: http://lkml.kernel.org/r/1349043837-22659-3-git-send-email-trenn@suse.de Cc: Len Brown Cc: Robert Moore Cc: Yinghai Lu Cc: Eric Piel Signed-off-by: H. Peter Anvin --- arch/x86/kernel/acpi/boot.c | 6 ++++++ include/linux/acpi.h | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b2297e58c6ed..6b75777c0a8d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1700,3 +1700,9 @@ int __acpi_release_global_lock(unsigned int *lock) } while (unlikely (val != old)); return old & 0x1; } + +void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) +{ + e820_add_region(addr, size, E820_ACPI); + update_e820(); +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4f2a76224509..946fd1ea79ff 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -426,6 +426,14 @@ void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control); +#if CONFIG_X86 +void arch_reserve_mem_area(acpi_physical_address addr, size_t size); +#else +static inline void arch_reserve_mem_area(acpi_physical_address addr, + size_t size) +{ +} +#endif /* CONFIG_X86 */ #else #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif From 53aac44c904abbad9f474f652f099de13b5c3563 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 1 Oct 2012 00:23:54 +0200 Subject: [PATCH 0183/5831] ACPI: Store valid ACPI tables passed via early initrd in reserved memblock areas A later patch will compare them with ACPI tables that get loaded at boot or runtime and if criteria match, a stored one is loaded. Signed-off-by: Thomas Renninger Link: http://lkml.kernel.org/r/1349043837-22659-4-git-send-email-trenn@suse.de Cc: Len Brown Cc: Robert Moore Cc: Yinghai Lu Cc: Eric Piel Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 2 + drivers/acpi/Kconfig | 9 +++ drivers/acpi/osl.c | 122 ++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 8 +++ 4 files changed, 141 insertions(+) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f4b9b80e1b95..764e543b2297 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -941,6 +941,8 @@ void __init setup_arch(char **cmdline_p) reserve_initrd(); + acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start); + reserve_crashkernel(); vsmp_init(); diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 80998958cf45..8cf719547b51 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -261,6 +261,15 @@ config ACPI_CUSTOM_DSDT bool default ACPI_CUSTOM_DSDT_FILE != "" +config ACPI_INITRD_TABLE_OVERRIDE + bool "ACPI tables can be passed via uncompressed cpio in initrd" + default n + help + This option provides functionality to override arbitrary ACPI tables + via initrd. No functional change if no ACPI tables are passed via + initrd, therefore it's safe to say Y. + See Documentation/acpi/initrd_table_override.txt for details + config ACPI_BLACKLIST_YEAR int "Disable ACPI for systems before Jan 1st this year" if X86_32 default 0 diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 9eaf708f5885..b20b07903218 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -534,6 +534,128 @@ acpi_os_predefined_override(const struct acpi_predefined_names *init_val, return AE_OK; } +#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE +#include +#include + +static u64 acpi_tables_addr; +static int all_tables_size; + +/* Copied from acpica/tbutils.c:acpi_tb_checksum() */ +u8 __init acpi_table_checksum(u8 *buffer, u32 length) +{ + u8 sum = 0; + u8 *end = buffer + length; + + while (buffer < end) + sum = (u8) (sum + *(buffer++)); + return sum; +} + +/* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */ +static const char * const table_sigs[] = { + ACPI_SIG_BERT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ, + ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT, + ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, ACPI_SIG_ASF, + ACPI_SIG_BOOT, ACPI_SIG_DBGP, ACPI_SIG_DMAR, ACPI_SIG_HPET, + ACPI_SIG_IBFT, ACPI_SIG_IVRS, ACPI_SIG_MCFG, ACPI_SIG_MCHI, + ACPI_SIG_SLIC, ACPI_SIG_SPCR, ACPI_SIG_SPMI, ACPI_SIG_TCPA, + ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT, + ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT, + ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, NULL }; + +/* Non-fatal errors: Affected tables/files are ignored */ +#define INVALID_TABLE(x, path, name) \ + { pr_err("ACPI OVERRIDE: " x " [%s%s]\n", path, name); continue; } + +#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header) + +/* Must not increase 10 or needs code modification below */ +#define ACPI_OVERRIDE_TABLES 10 + +void __init acpi_initrd_override(void *data, size_t size) +{ + int sig, no, table_nr = 0, total_offset = 0; + long offset = 0; + struct acpi_table_header *table; + char cpio_path[32] = "kernel/firmware/acpi/"; + struct cpio_data file; + struct cpio_data early_initrd_files[ACPI_OVERRIDE_TABLES]; + char *p; + + if (data == NULL || size == 0) + return; + + for (no = 0; no < ACPI_OVERRIDE_TABLES; no++) { + file = find_cpio_data(cpio_path, data, size, &offset); + if (!file.data) + break; + + data += offset; + size -= offset; + + if (file.size < sizeof(struct acpi_table_header)) + INVALID_TABLE("Table smaller than ACPI header", + cpio_path, file.name); + + table = file.data; + + for (sig = 0; table_sigs[sig]; sig++) + if (!memcmp(table->signature, table_sigs[sig], 4)) + break; + + if (!table_sigs[sig]) + INVALID_TABLE("Unknown signature", + cpio_path, file.name); + if (file.size != table->length) + INVALID_TABLE("File length does not match table length", + cpio_path, file.name); + if (acpi_table_checksum(file.data, table->length)) + INVALID_TABLE("Bad table checksum", + cpio_path, file.name); + + pr_info("%4.4s ACPI table found in initrd [%s%s][0x%x]\n", + table->signature, cpio_path, file.name, table->length); + + all_tables_size += table->length; + early_initrd_files[table_nr].data = file.data; + early_initrd_files[table_nr].size = file.size; + table_nr++; + } + if (table_nr == 0) + return; + + acpi_tables_addr = + memblock_find_in_range(0, max_low_pfn_mapped << PAGE_SHIFT, + all_tables_size, PAGE_SIZE); + if (!acpi_tables_addr) { + WARN_ON(1); + return; + } + /* + * Only calling e820_add_reserve does not work and the + * tables are invalid (memory got used) later. + * memblock_reserve works as expected and the tables won't get modified. + * But it's not enough on X86 because ioremap will + * complain later (used by acpi_os_map_memory) that the pages + * that should get mapped are not marked "reserved". + * Both memblock_reserve and e820_add_region (via arch_reserve_mem_area) + * works fine. + */ + memblock_reserve(acpi_tables_addr, acpi_tables_addr + all_tables_size); + arch_reserve_mem_area(acpi_tables_addr, all_tables_size); + + p = early_ioremap(acpi_tables_addr, all_tables_size); + + for (no = 0; no < table_nr; no++) { + memcpy(p + total_offset, early_initrd_files[no].data, + early_initrd_files[no].size); + total_offset += early_initrd_files[no].size; + } + early_iounmap(p, all_tables_size); +} +#endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */ + acpi_status acpi_os_table_override(struct acpi_table_header * existing_table, struct acpi_table_header ** new_table) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 946fd1ea79ff..d14081c10c17 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -76,6 +76,14 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table); typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); +#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE +void acpi_initrd_override(void *data, size_t size); +#else +static inline void acpi_initrd_override(void *data, size_t size) +{ +} +#endif + char * __acpi_map_table (unsigned long phys_addr, unsigned long size); void __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); From b2a35003dfbcc7b7a5e5c6e524e7d49ba66e0bb5 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 1 Oct 2012 00:23:55 +0200 Subject: [PATCH 0184/5831] ACPI: Implement physical address table override Previous patches stored ACPI tables provided via initrd in a memblock reserved area. If a table is loaded and the table type of an initrd provided one matches, the one from initrd is prefered. In case of a SSDT table, the OEM table id also has to match. ACPI tables can be loaded at boot time (static table pointers in XSDT), but also dynamically any time later via ASL commands load() or loadTable(). The override mechanism always works. Signed-off-by: Thomas Renninger Link: http://lkml.kernel.org/r/1349043837-22659-5-git-send-email-trenn@suse.de Cc: Len Brown Cc: Robert Moore Cc: Yinghai Lu Cc: Eric Piel Signed-off-by: H. Peter Anvin --- drivers/acpi/osl.c | 60 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index b20b07903218..007224bd4db7 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -681,12 +681,64 @@ acpi_os_table_override(struct acpi_table_header * existing_table, acpi_status acpi_os_physical_table_override(struct acpi_table_header *existing_table, - acpi_physical_address * new_address, - u32 *new_table_length) + acpi_physical_address *address, + u32 *table_length) { - return AE_SUPPORT; -} +#ifndef CONFIG_ACPI_INITRD_TABLE_OVERRIDE + *table_length = 0; + *address = 0; + return AE_OK; +#else + int table_offset = 0; + struct acpi_table_header *table; + *table_length = 0; + *address = 0; + + if (!acpi_tables_addr) + return AE_OK; + + do { + if (table_offset + ACPI_HEADER_SIZE > all_tables_size) { + WARN_ON(1); + return AE_OK; + } + + table = acpi_os_map_memory(acpi_tables_addr + table_offset, + ACPI_HEADER_SIZE); + + if (table_offset + table->length > all_tables_size) { + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); + WARN_ON(1); + return AE_OK; + } + + table_offset += table->length; + + if (memcmp(existing_table->signature, table->signature, 4)) { + acpi_os_unmap_memory(table, + ACPI_HEADER_SIZE); + continue; + } + + /* Only override tables with matching oem id */ + if (memcmp(table->oem_table_id, existing_table->oem_table_id, + ACPI_OEM_TABLE_ID_SIZE)) { + acpi_os_unmap_memory(table, + ACPI_HEADER_SIZE); + continue; + } + + table_offset -= table->length; + *table_length = table->length; + acpi_os_unmap_memory(table, ACPI_HEADER_SIZE); + *address = acpi_tables_addr + table_offset; + break; + } while (table_offset + ACPI_HEADER_SIZE < all_tables_size); + + return AE_OK; +#endif +} static irqreturn_t acpi_irq(int irq, void *dev_id) { From 325a8d36035f0623950e38e9cf7a47a48e72df11 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 1 Oct 2012 00:23:56 +0200 Subject: [PATCH 0185/5831] ACPI: Create acpi_table_taint() function to avoid code duplication There are two ways of overriding ACPI tables now, both need to taint the the kernel. Signed-off-by: Thomas Renninger Link: http://lkml.kernel.org/r/1349043837-22659-6-git-send-email-trenn@suse.de Cc: Len Brown Cc: Robert Moore Cc: Yinghai Lu Cc: Eric Piel Signed-off-by: H. Peter Anvin --- drivers/acpi/osl.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 007224bd4db7..a2845fff3997 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -656,6 +656,15 @@ void __init acpi_initrd_override(void *data, size_t size) } #endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */ +static void acpi_table_taint(struct acpi_table_header *table) +{ + pr_warn(PREFIX + "Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n", + table->signature, table->oem_table_id); + add_taint(TAINT_OVERRIDDEN_ACPI_TABLE); +} + + acpi_status acpi_os_table_override(struct acpi_table_header * existing_table, struct acpi_table_header ** new_table) @@ -669,13 +678,8 @@ acpi_os_table_override(struct acpi_table_header * existing_table, if (strncmp(existing_table->signature, "DSDT", 4) == 0) *new_table = (struct acpi_table_header *)AmlCode; #endif - if (*new_table != NULL) { - printk(KERN_WARNING PREFIX "Override [%4.4s-%8.8s], " - "this is unsafe: tainting kernel\n", - existing_table->signature, - existing_table->oem_table_id); - add_taint(TAINT_OVERRIDDEN_ACPI_TABLE); - } + if (*new_table != NULL) + acpi_table_taint(existing_table); return AE_OK; } @@ -736,6 +740,8 @@ acpi_os_physical_table_override(struct acpi_table_header *existing_table, break; } while (table_offset + ACPI_HEADER_SIZE < all_tables_size); + if (*address != 0) + acpi_table_taint(existing_table); return AE_OK; #endif } From 8347bbecf3518aa1518f6157e661812a35775130 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 1 Oct 2012 00:23:57 +0200 Subject: [PATCH 0186/5831] ACPI: Document ACPI table overriding via initrd Signed-off-by: Thomas Renninger Link: http://lkml.kernel.org/r/1349043837-22659-7-git-send-email-trenn@suse.de Cc: Len Brown Cc: Robert Moore Cc: Yinghai Lu Cc: Eric Piel Signed-off-by: H. Peter Anvin --- Documentation/acpi/initrd_table_override.txt | 94 ++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 Documentation/acpi/initrd_table_override.txt diff --git a/Documentation/acpi/initrd_table_override.txt b/Documentation/acpi/initrd_table_override.txt new file mode 100644 index 000000000000..35c3f5415476 --- /dev/null +++ b/Documentation/acpi/initrd_table_override.txt @@ -0,0 +1,94 @@ +Overriding ACPI tables via initrd +================================= + +1) Introduction (What is this about) +2) What is this for +3) How does it work +4) References (Where to retrieve userspace tools) + +1) What is this about +--------------------- + +If the ACPI_INITRD_TABLE_OVERRIDE compile option is true, it is possible to +override nearly any ACPI table provided by the BIOS with an instrumented, +modified one. + +For a full list of ACPI tables that can be overridden, take a look at +the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in drivers/acpi/osl.c +All ACPI tables iasl (Intel's ACPI compiler and disassembler) knows should +be overridable, except: + - ACPI_SIG_RSDP (has a signature of 6 bytes) + - ACPI_SIG_FACS (does not have an ordinary ACPI table header) +Both could get implemented as well. + + +2) What is this for +------------------- + +Please keep in mind that this is a debug option. +ACPI tables should not get overridden for productive use. +If BIOS ACPI tables are overridden the kernel will get tainted with the +TAINT_OVERRIDDEN_ACPI_TABLE flag. +Complain to your platform/BIOS vendor if you find a bug which is so sever +that a workaround is not accepted in the Linux kernel. + +Still, it can and should be enabled in any kernel, because: + - There is no functional change with not instrumented initrds + - It provides a powerful feature to easily debug and test ACPI BIOS table + compatibility with the Linux kernel. + + +3) How does it work +------------------- + +# Extract the machine's ACPI tables: +cd /tmp +acpidump >acpidump +acpixtract -a acpidump +# Disassemble, modify and recompile them: +iasl -d *.dat +# For example add this statement into a _PRT (PCI Routing Table) function +# of the DSDT: +Store("HELLO WORLD", debug) +iasl -sa dsdt.dsl +# Add the raw ACPI tables to an uncompressed cpio archive. +# They must be put into a /kernel/firmware/acpi directory inside the +# cpio archive. +# The uncompressed cpio archive must be the first. +# Other, typically compressed cpio archives, must be +# concatenated on top of the uncompressed one. +mkdir -p kernel/firmware/acpi +cp dsdt.aml kernel/firmware/acpi +# A maximum of: #define ACPI_OVERRIDE_TABLES 10 +# tables are currently allowed (see osl.c): +iasl -sa facp.dsl +iasl -sa ssdt1.dsl +cp facp.aml kernel/firmware/acpi +cp ssdt1.aml kernel/firmware/acpi +# Create the uncompressed cpio archive and concatenate the original initrd +# on top: +find kernel | cpio -H newc --create > /boot/instrumented_initrd +cat /boot/initrd >>/boot/instrumented_initrd +# reboot with increased acpi debug level, e.g. boot params: +acpi.debug_level=0x2 acpi.debug_layer=0xFFFFFFFF +# and check your syslog: +[ 1.268089] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT] +[ 1.272091] [ACPI Debug] String [0x0B] "HELLO WORLD" + +iasl is able to disassemble and recompile quite a lot different, +also static ACPI tables. + + +4) Where to retrieve userspace tools +------------------------------------ + +iasl and acpixtract are part of Intel's ACPICA project: +http://acpica.org/ +and should be packaged by distributions (for example in the acpica package +on SUSE). + +acpidump can be found in Len Browns pmtools: +ftp://kernel.org/pub/linux/kernel/people/lenb/acpi/utils/pmtools/acpidump +This tool is also part of the acpica package on SUSE. +Alternatively, used ACPI tables can be retrieved via sysfs in latest kernels: +/sys/firmware/acpi/tables From 749052fba650a644b29ed2ae35cd985f736401b3 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 1 Oct 2012 14:25:30 +0300 Subject: [PATCH 0187/5831] drm/i915: add debug logging to ASLE backlight set requests Make it easier to track backlight set requests coming through ASLE instead of the driver's own backlight sysfs interface. We've had enough of backlight issues to warrant some extra debug logs in the area. Signed-off-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_opregion.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index e27c17012628..0f2db1648f04 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -154,6 +154,8 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp) struct opregion_asle __iomem *asle = dev_priv->opregion.asle; u32 max; + DRM_DEBUG_DRIVER("bclp = 0x%08x\n", bclp); + if (!(bclp & ASLE_BCLP_VALID)) return ASLE_BACKLIGHT_FAILED; From 3dfd8235002727dbd759bb0f80f8ac862f392071 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 1 Oct 2012 20:38:47 -0700 Subject: [PATCH 0188/5831] ACPI: Fix build when disabled "ACPI: Store valid ACPI tables passed via early initrd in reserved memblock areas" breaks the build if either CONFIG_ACPI or CONFIG_BLK_DEV_INITRD is disabled: arch/x86/kernel/setup.c: In function 'setup_arch': arch/x86/kernel/setup.c:944: error: implicit declaration of function 'acpi_initrd_override' or arch/x86/built-in.o: In function `setup_arch': (.init.text+0x1397): undefined reference to `initrd_start' arch/x86/built-in.o: In function `setup_arch': (.init.text+0x139e): undefined reference to `initrd_end' The dummy acpi_initrd_override() function in acpi.h isn't defined without CONFIG_ACPI and initrd_{start,end} are declared but not defined without CONFIG_BLK_DEV_INITRD. [ hpa: applying this as a fix, but this really should be done cleaner ] Signed-off-by: David Rientjes Link: http://lkml.kernel.org/r/alpine.DEB.2.00.1210012032470.31644@chino.kir.corp.google.com Signed-off-by: H. Peter Anvin Cc: Thomas Renninger Cc: Len Brown --- arch/x86/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 764e543b2297..bf82c1e05464 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -941,7 +941,9 @@ void __init setup_arch(char **cmdline_p) reserve_initrd(); +#if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD) acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start); +#endif reserve_crashkernel(); From 87b526d349b04c31d7b3a40b434eb3f825d22305 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 1 Oct 2012 11:40:45 -0700 Subject: [PATCH 0189/5831] seccomp: Make syscall skipping and nr changes more consistent This fixes two issues that could cause incompatibility between kernel versions: - If a tracer uses SECCOMP_RET_TRACE to select a syscall number higher than the largest known syscall, emulate the unknown vsyscall by returning -ENOSYS. (This is unlikely to make a noticeable difference on x86-64 due to the way the system call entry works.) - On x86-64 with vsyscall=emulate, skipped vsyscalls were buggy. This updates the documentation accordingly. Signed-off-by: Andy Lutomirski Acked-by: Will Drewry Signed-off-by: James Morris --- Documentation/prctl/seccomp_filter.txt | 74 ++++++++++++++-- arch/x86/kernel/vsyscall_64.c | 112 +++++++++++++------------ kernel/seccomp.c | 13 ++- 3 files changed, 138 insertions(+), 61 deletions(-) diff --git a/Documentation/prctl/seccomp_filter.txt b/Documentation/prctl/seccomp_filter.txt index 597c3c581375..1e469ef75778 100644 --- a/Documentation/prctl/seccomp_filter.txt +++ b/Documentation/prctl/seccomp_filter.txt @@ -95,12 +95,15 @@ SECCOMP_RET_KILL: SECCOMP_RET_TRAP: Results in the kernel sending a SIGSYS signal to the triggering - task without executing the system call. The kernel will - rollback the register state to just before the system call - entry such that a signal handler in the task will be able to - inspect the ucontext_t->uc_mcontext registers and emulate - system call success or failure upon return from the signal - handler. + task without executing the system call. siginfo->si_call_addr + will show the address of the system call instruction, and + siginfo->si_syscall and siginfo->si_arch will indicate which + syscall was attempted. The program counter will be as though + the syscall happened (i.e. it will not point to the syscall + instruction). The return value register will contain an arch- + dependent value -- if resuming execution, set it to something + sensible. (The architecture dependency is because replacing + it with -ENOSYS could overwrite some useful information.) The SECCOMP_RET_DATA portion of the return value will be passed as si_errno. @@ -123,6 +126,18 @@ SECCOMP_RET_TRACE: the BPF program return value will be available to the tracer via PTRACE_GETEVENTMSG. + The tracer can skip the system call by changing the syscall number + to -1. Alternatively, the tracer can change the system call + requested by changing the system call to a valid syscall number. If + the tracer asks to skip the system call, then the system call will + appear to return the value that the tracer puts in the return value + register. + + The seccomp check will not be run again after the tracer is + notified. (This means that seccomp-based sandboxes MUST NOT + allow use of ptrace, even of other sandboxed processes, without + extreme care; ptracers can use this mechanism to escape.) + SECCOMP_RET_ALLOW: Results in the system call being executed. @@ -161,3 +176,50 @@ architecture supports both ptrace_event and seccomp, it will be able to support seccomp filter with minor fixup: SIGSYS support and seccomp return value checking. Then it must just add CONFIG_HAVE_ARCH_SECCOMP_FILTER to its arch-specific Kconfig. + + + +Caveats +------- + +The vDSO can cause some system calls to run entirely in userspace, +leading to surprises when you run programs on different machines that +fall back to real syscalls. To minimize these surprises on x86, make +sure you test with +/sys/devices/system/clocksource/clocksource0/current_clocksource set to +something like acpi_pm. + +On x86-64, vsyscall emulation is enabled by default. (vsyscalls are +legacy variants on vDSO calls.) Currently, emulated vsyscalls will honor seccomp, with a few oddities: + +- A return value of SECCOMP_RET_TRAP will set a si_call_addr pointing to + the vsyscall entry for the given call and not the address after the + 'syscall' instruction. Any code which wants to restart the call + should be aware that (a) a ret instruction has been emulated and (b) + trying to resume the syscall will again trigger the standard vsyscall + emulation security checks, making resuming the syscall mostly + pointless. + +- A return value of SECCOMP_RET_TRACE will signal the tracer as usual, + but the syscall may not be changed to another system call using the + orig_rax register. It may only be changed to -1 order to skip the + currently emulated call. Any other change MAY terminate the process. + The rip value seen by the tracer will be the syscall entry address; + this is different from normal behavior. The tracer MUST NOT modify + rip or rsp. (Do not rely on other changes terminating the process. + They might work. For example, on some kernels, choosing a syscall + that only exists in future kernels will be correctly emulated (by + returning -ENOSYS). + +To detect this quirky behavior, check for addr & ~0x0C00 == +0xFFFFFFFFFF600000. (For SECCOMP_RET_TRACE, use rip. For +SECCOMP_RET_TRAP, use siginfo->si_call_addr.) Do not check any other +condition: future kernels may improve vsyscall emulation and current +kernels in vsyscall=native mode will behave differently, but the +instructions at 0xF...F600{0,4,8,C}00 will not be system calls in these +cases. + +Note that modern systems are unlikely to use vsyscalls at all -- they +are a legacy feature and they are considerably slower than standard +syscalls. New code will use the vDSO, and vDSO-issued system calls +are indistinguishable from normal system calls. diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8d141b309046..b2e58a248b3b 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -136,19 +136,6 @@ static int addr_to_vsyscall_nr(unsigned long addr) return nr; } -#ifdef CONFIG_SECCOMP -static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr) -{ - if (!seccomp_mode(&tsk->seccomp)) - return 0; - task_pt_regs(tsk)->orig_ax = syscall_nr; - task_pt_regs(tsk)->ax = syscall_nr; - return __secure_computing(syscall_nr); -} -#else -#define vsyscall_seccomp(_tsk, _nr) 0 -#endif - static bool write_ok_or_segv(unsigned long ptr, size_t size) { /* @@ -181,10 +168,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) { struct task_struct *tsk; unsigned long caller; - int vsyscall_nr; + int vsyscall_nr, syscall_nr, tmp; int prev_sig_on_uaccess_error; long ret; - int skip; /* * No point in checking CS -- the only way to get here is a user mode @@ -216,6 +202,64 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) } tsk = current; + + /* + * Check for access_ok violations and find the syscall nr. + * + * NULL is a valid user pointer (in the access_ok sense) on 32-bit and + * 64-bit, so we don't need to special-case it here. For all the + * vsyscalls, NULL means "don't write anything" not "write it at + * address 0". + */ + switch (vsyscall_nr) { + case 0: + if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || + !write_ok_or_segv(regs->si, sizeof(struct timezone))) { + ret = -EFAULT; + goto check_fault; + } + + syscall_nr = __NR_gettimeofday; + break; + + case 1: + if (!write_ok_or_segv(regs->di, sizeof(time_t))) { + ret = -EFAULT; + goto check_fault; + } + + syscall_nr = __NR_time; + break; + + case 2: + if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || + !write_ok_or_segv(regs->si, sizeof(unsigned))) { + ret = -EFAULT; + goto check_fault; + } + + syscall_nr = __NR_getcpu; + break; + } + + /* + * Handle seccomp. regs->ip must be the original value. + * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt. + * + * We could optimize the seccomp disabled case, but performance + * here doesn't matter. + */ + regs->orig_ax = syscall_nr; + regs->ax = -ENOSYS; + tmp = secure_computing(syscall_nr); + if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { + warn_bad_vsyscall(KERN_DEBUG, regs, + "seccomp tried to change syscall nr or ip"); + do_exit(SIGSYS); + } + if (tmp) + goto do_ret; /* skip requested */ + /* * With a real vsyscall, page faults cause SIGSEGV. We want to * preserve that behavior to make writing exploits harder. @@ -223,49 +267,19 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; current_thread_info()->sig_on_uaccess_error = 1; - /* - * NULL is a valid user pointer (in the access_ok sense) on 32-bit and - * 64-bit, so we don't need to special-case it here. For all the - * vsyscalls, NULL means "don't write anything" not "write it at - * address 0". - */ ret = -EFAULT; - skip = 0; switch (vsyscall_nr) { case 0: - skip = vsyscall_seccomp(tsk, __NR_gettimeofday); - if (skip) - break; - - if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || - !write_ok_or_segv(regs->si, sizeof(struct timezone))) - break; - ret = sys_gettimeofday( (struct timeval __user *)regs->di, (struct timezone __user *)regs->si); break; case 1: - skip = vsyscall_seccomp(tsk, __NR_time); - if (skip) - break; - - if (!write_ok_or_segv(regs->di, sizeof(time_t))) - break; - ret = sys_time((time_t __user *)regs->di); break; case 2: - skip = vsyscall_seccomp(tsk, __NR_getcpu); - if (skip) - break; - - if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || - !write_ok_or_segv(regs->si, sizeof(unsigned))) - break; - ret = sys_getcpu((unsigned __user *)regs->di, (unsigned __user *)regs->si, NULL); @@ -274,12 +288,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; - if (skip) { - if ((long)regs->ax <= 0L) /* seccomp errno emulation */ - goto do_ret; - goto done; /* seccomp trace/trap */ - } - +check_fault: if (ret == -EFAULT) { /* Bad news -- userspace fed a bad pointer to a vsyscall. */ warn_bad_vsyscall(KERN_INFO, regs, @@ -302,7 +311,6 @@ do_ret: /* Emulate a ret instruction. */ regs->ip = caller; regs->sp += 8; -done: return true; sigsegv: diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ee376beedaf9..5af44b593770 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -396,25 +396,29 @@ int __secure_computing(int this_syscall) #ifdef CONFIG_SECCOMP_FILTER case SECCOMP_MODE_FILTER: { int data; + struct pt_regs *regs = task_pt_regs(current); ret = seccomp_run_filters(this_syscall); data = ret & SECCOMP_RET_DATA; ret &= SECCOMP_RET_ACTION; switch (ret) { case SECCOMP_RET_ERRNO: /* Set the low-order 16-bits as a errno. */ - syscall_set_return_value(current, task_pt_regs(current), + syscall_set_return_value(current, regs, -data, 0); goto skip; case SECCOMP_RET_TRAP: /* Show the handler the original registers. */ - syscall_rollback(current, task_pt_regs(current)); + syscall_rollback(current, regs); /* Let the filter pass back 16 bits of data. */ seccomp_send_sigsys(this_syscall, data); goto skip; case SECCOMP_RET_TRACE: /* Skip these calls if there is no tracer. */ - if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) + if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { + syscall_set_return_value(current, regs, + -ENOSYS, 0); goto skip; + } /* Allow the BPF to provide the event message */ ptrace_event(PTRACE_EVENT_SECCOMP, data); /* @@ -425,6 +429,9 @@ int __secure_computing(int this_syscall) */ if (fatal_signal_pending(current)) break; + if (syscall_get_nr(current, regs) < 0) + goto skip; /* Explicit request to skip. */ + return 0; case SECCOMP_RET_ALLOW: return 0; From b0e77b9c6b2fc21ec2e3d8b54edf8757a7c6a8dd Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Mon, 1 Oct 2012 18:10:53 -0300 Subject: [PATCH 0190/5831] drm/i915: extract intel_set_pipe_timings from crtc_mode_set Version 2: call intel_set_pipe_timings from both i9xx_crtc_mode_set and ironlake_crtc_mode_set, instead of just ironlake, as requested by Daniel Vetter. The problem caused by calling this function from i9xx_crtc_mode_set too is that now on i9xx we write to PIPESRC before writing to DSPSIZE and DSPPOS. I could not find any evidence in our documentation that this won't work, and the docs actually say the pipe registers should be set before the plane registers. Version 3: don't remove pipeconf bits on i9xx_crtc_mode_set. Signed-off-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 124 ++++++++++++--------------- 1 file changed, 54 insertions(+), 70 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ed0dcea4a727..6cf0d003d715 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4290,6 +4290,55 @@ static void i8xx_update_pll(struct drm_crtc *crtc, I915_WRITE(DPLL(pipe), dpll); } +static void intel_set_pipe_timings(struct intel_crtc *intel_crtc, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + struct drm_device *dev = intel_crtc->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + enum pipe pipe = intel_crtc->pipe; + uint32_t vsyncshift; + + if (!IS_GEN2(dev) && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) { + /* the chip adds 2 halflines automatically */ + adjusted_mode->crtc_vtotal -= 1; + adjusted_mode->crtc_vblank_end -= 1; + vsyncshift = adjusted_mode->crtc_hsync_start + - adjusted_mode->crtc_htotal / 2; + } else { + vsyncshift = 0; + } + + if (INTEL_INFO(dev)->gen > 3) + I915_WRITE(VSYNCSHIFT(pipe), vsyncshift); + + I915_WRITE(HTOTAL(pipe), + (adjusted_mode->crtc_hdisplay - 1) | + ((adjusted_mode->crtc_htotal - 1) << 16)); + I915_WRITE(HBLANK(pipe), + (adjusted_mode->crtc_hblank_start - 1) | + ((adjusted_mode->crtc_hblank_end - 1) << 16)); + I915_WRITE(HSYNC(pipe), + (adjusted_mode->crtc_hsync_start - 1) | + ((adjusted_mode->crtc_hsync_end - 1) << 16)); + + I915_WRITE(VTOTAL(pipe), + (adjusted_mode->crtc_vdisplay - 1) | + ((adjusted_mode->crtc_vtotal - 1) << 16)); + I915_WRITE(VBLANK(pipe), + (adjusted_mode->crtc_vblank_start - 1) | + ((adjusted_mode->crtc_vblank_end - 1) << 16)); + I915_WRITE(VSYNC(pipe), + (adjusted_mode->crtc_vsync_start - 1) | + ((adjusted_mode->crtc_vsync_end - 1) << 16)); + + /* pipesrc controls the size that is scaled from, which should + * always be the user's requested size. + */ + I915_WRITE(PIPESRC(pipe), + ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1)); +} + static int i9xx_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, @@ -4303,7 +4352,7 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc, int plane = intel_crtc->plane; int refclk, num_connectors = 0; intel_clock_t clock, reduced_clock; - u32 dspcntr, pipeconf, vsyncshift; + u32 dspcntr, pipeconf; bool ok, has_reduced_clock = false, is_sdvo = false; bool is_lvds = false, is_tv = false, is_dp = false; struct intel_encoder *encoder; @@ -4438,40 +4487,12 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc, pipeconf &= ~PIPECONF_INTERLACE_MASK; if (!IS_GEN2(dev) && - adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) { + adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) pipeconf |= PIPECONF_INTERLACE_W_FIELD_INDICATION; - /* the chip adds 2 halflines automatically */ - adjusted_mode->crtc_vtotal -= 1; - adjusted_mode->crtc_vblank_end -= 1; - vsyncshift = adjusted_mode->crtc_hsync_start - - adjusted_mode->crtc_htotal/2; - } else { + else pipeconf |= PIPECONF_PROGRESSIVE; - vsyncshift = 0; - } - if (!IS_GEN3(dev)) - I915_WRITE(VSYNCSHIFT(pipe), vsyncshift); - - I915_WRITE(HTOTAL(pipe), - (adjusted_mode->crtc_hdisplay - 1) | - ((adjusted_mode->crtc_htotal - 1) << 16)); - I915_WRITE(HBLANK(pipe), - (adjusted_mode->crtc_hblank_start - 1) | - ((adjusted_mode->crtc_hblank_end - 1) << 16)); - I915_WRITE(HSYNC(pipe), - (adjusted_mode->crtc_hsync_start - 1) | - ((adjusted_mode->crtc_hsync_end - 1) << 16)); - - I915_WRITE(VTOTAL(pipe), - (adjusted_mode->crtc_vdisplay - 1) | - ((adjusted_mode->crtc_vtotal - 1) << 16)); - I915_WRITE(VBLANK(pipe), - (adjusted_mode->crtc_vblank_start - 1) | - ((adjusted_mode->crtc_vblank_end - 1) << 16)); - I915_WRITE(VSYNC(pipe), - (adjusted_mode->crtc_vsync_start - 1) | - ((adjusted_mode->crtc_vsync_end - 1) << 16)); + intel_set_pipe_timings(intel_crtc, mode, adjusted_mode); /* pipesrc and dspsize control the size that is scaled from, * which should always be the user's requested size. @@ -4480,8 +4501,6 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc, ((mode->vdisplay - 1) << 16) | (mode->hdisplay - 1)); I915_WRITE(DSPPOS(plane), 0); - I915_WRITE(PIPESRC(pipe), - ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1)); I915_WRITE(PIPECONF(pipe), pipeconf); POSTING_READ(PIPECONF(pipe)); @@ -5087,42 +5106,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, } } - if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) { - /* the chip adds 2 halflines automatically */ - adjusted_mode->crtc_vtotal -= 1; - adjusted_mode->crtc_vblank_end -= 1; - I915_WRITE(VSYNCSHIFT(pipe), - adjusted_mode->crtc_hsync_start - - adjusted_mode->crtc_htotal/2); - } else { - I915_WRITE(VSYNCSHIFT(pipe), 0); - } - - I915_WRITE(HTOTAL(pipe), - (adjusted_mode->crtc_hdisplay - 1) | - ((adjusted_mode->crtc_htotal - 1) << 16)); - I915_WRITE(HBLANK(pipe), - (adjusted_mode->crtc_hblank_start - 1) | - ((adjusted_mode->crtc_hblank_end - 1) << 16)); - I915_WRITE(HSYNC(pipe), - (adjusted_mode->crtc_hsync_start - 1) | - ((adjusted_mode->crtc_hsync_end - 1) << 16)); - - I915_WRITE(VTOTAL(pipe), - (adjusted_mode->crtc_vdisplay - 1) | - ((adjusted_mode->crtc_vtotal - 1) << 16)); - I915_WRITE(VBLANK(pipe), - (adjusted_mode->crtc_vblank_start - 1) | - ((adjusted_mode->crtc_vblank_end - 1) << 16)); - I915_WRITE(VSYNC(pipe), - (adjusted_mode->crtc_vsync_start - 1) | - ((adjusted_mode->crtc_vsync_end - 1) << 16)); - - /* pipesrc controls the size that is scaled from, which should - * always be the user's requested size. - */ - I915_WRITE(PIPESRC(pipe), - ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1)); + intel_set_pipe_timings(intel_crtc, mode, adjusted_mode); ironlake_set_m_n(crtc, mode, adjusted_mode); From ff1f525ef4da9d5abdb40893044ef5b041da52c0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 2 Oct 2012 15:10:55 +0200 Subject: [PATCH 0191/5831] drm/i915: s/DRM_IRQ_ARGS/int irq, void *arg I'm official fed up with the yelling and useless indirection. Let it burn! Reviewed-by: Jani Nikula Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f7ec794480c8..337c5cdf5598 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -521,7 +521,7 @@ static void gen6_queue_rps_work(struct drm_i915_private *dev_priv, queue_work(dev_priv->wq, &dev_priv->rps.work); } -static irqreturn_t valleyview_irq_handler(DRM_IRQ_ARGS) +static irqreturn_t valleyview_irq_handler(int irq, void *arg) { struct drm_device *dev = (struct drm_device *) arg; drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; @@ -671,7 +671,7 @@ static void cpt_irq_handler(struct drm_device *dev, u32 pch_iir) I915_READ(FDI_RX_IIR(pipe))); } -static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS) +static irqreturn_t ivybridge_irq_handler(int irq, void *arg) { struct drm_device *dev = (struct drm_device *) arg; drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; @@ -746,7 +746,7 @@ static void ilk_gt_irq_handler(struct drm_device *dev, notify_ring(dev, &dev_priv->ring[VCS]); } -static irqreturn_t ironlake_irq_handler(DRM_IRQ_ARGS) +static irqreturn_t ironlake_irq_handler(int irq, void *arg) { struct drm_device *dev = (struct drm_device *) arg; drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; @@ -2120,7 +2120,7 @@ static int i8xx_irq_postinstall(struct drm_device *dev) return 0; } -static irqreturn_t i8xx_irq_handler(DRM_IRQ_ARGS) +static irqreturn_t i8xx_irq_handler(int irq, void *arg) { struct drm_device *dev = (struct drm_device *) arg; drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; @@ -2298,7 +2298,7 @@ static int i915_irq_postinstall(struct drm_device *dev) return 0; } -static irqreturn_t i915_irq_handler(DRM_IRQ_ARGS) +static irqreturn_t i915_irq_handler(int irq, void *arg) { struct drm_device *dev = (struct drm_device *) arg; drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; @@ -2536,7 +2536,7 @@ static int i965_irq_postinstall(struct drm_device *dev) return 0; } -static irqreturn_t i965_irq_handler(DRM_IRQ_ARGS) +static irqreturn_t i965_irq_handler(int irq, void *arg) { struct drm_device *dev = (struct drm_device *) arg; drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; From 3a50597de8635cd05133bd12c95681c82fe7b878 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Oct 2012 19:24:29 +0100 Subject: [PATCH 0192/5831] KEYS: Make the session and process keyrings per-thread Make the session keyring per-thread rather than per-process, but still inherited from the parent thread to solve a problem with PAM and gdm. The problem is that join_session_keyring() will reject attempts to change the session keyring of a multithreaded program but gdm is now multithreaded before it gets to the point of starting PAM and running pam_keyinit to create the session keyring. See: https://bugs.freedesktop.org/show_bug.cgi?id=49211 The reason that join_session_keyring() will only change the session keyring under a single-threaded environment is that it's hard to alter the other thread's credentials to effect the change in a multi-threaded program. The problems are such as: (1) How to prevent two threads both running join_session_keyring() from racing. (2) Another thread's credentials may not be modified directly by this process. (3) The number of threads is uncertain whilst we're not holding the appropriate spinlock, making preallocation slightly tricky. (4) We could use TIF_NOTIFY_RESUME and key_replace_session_keyring() to get another thread to replace its keyring, but that means preallocating for each thread. A reasonable way around this is to make the session keyring per-thread rather than per-process and just document that if you want a common session keyring, you must get it before you spawn any threads - which is the current situation anyway. Whilst we're at it, we can the process keyring behave in the same way. This means we can clean up some of the ickyness in the creds code. Basically, after this patch, the session, process and thread keyrings are about inheritance rules only and not about sharing changes of keyring. Reported-by: Mantas M. Signed-off-by: David Howells Tested-by: Ray Strode --- include/linux/cred.h | 17 +---- kernel/cred.c | 127 +++++------------------------------ security/keys/keyctl.c | 11 +-- security/keys/process_keys.c | 64 ++++++------------ security/keys/request_key.c | 10 +-- 5 files changed, 49 insertions(+), 180 deletions(-) diff --git a/include/linux/cred.h b/include/linux/cred.h index ebbed2ce6637..0142aacb70b7 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -76,21 +76,6 @@ extern int groups_search(const struct group_info *, kgid_t); extern int in_group_p(kgid_t); extern int in_egroup_p(kgid_t); -/* - * The common credentials for a thread group - * - shared by CLONE_THREAD - */ -#ifdef CONFIG_KEYS -struct thread_group_cred { - atomic_t usage; - pid_t tgid; /* thread group process ID */ - spinlock_t lock; - struct key __rcu *session_keyring; /* keyring inherited over fork */ - struct key *process_keyring; /* keyring private to this process */ - struct rcu_head rcu; /* RCU deletion hook */ -}; -#endif - /* * The security context of a task * @@ -139,6 +124,8 @@ struct cred { #ifdef CONFIG_KEYS unsigned char jit_keyring; /* default keyring to attach requested * keys to */ + struct key __rcu *session_keyring; /* keyring inherited over fork */ + struct key *process_keyring; /* keyring private to this process */ struct key *thread_keyring; /* keyring private to this thread */ struct key *request_key_auth; /* assumed request_key authority */ struct thread_group_cred *tgcred; /* thread-group shared credentials */ diff --git a/kernel/cred.c b/kernel/cred.c index de728ac50d82..3f7ad1ec2ae4 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -29,17 +29,6 @@ static struct kmem_cache *cred_jar; -/* - * The common credentials for the initial task's thread group - */ -#ifdef CONFIG_KEYS -static struct thread_group_cred init_tgcred = { - .usage = ATOMIC_INIT(2), - .tgid = 0, - .lock = __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock), -}; -#endif - /* * The initial credentials for the initial task */ @@ -65,9 +54,6 @@ struct cred init_cred = { .user = INIT_USER, .user_ns = &init_user_ns, .group_info = &init_groups, -#ifdef CONFIG_KEYS - .tgcred = &init_tgcred, -#endif }; static inline void set_cred_subscribers(struct cred *cred, int n) @@ -95,36 +81,6 @@ static inline void alter_cred_subscribers(const struct cred *_cred, int n) #endif } -/* - * Dispose of the shared task group credentials - */ -#ifdef CONFIG_KEYS -static void release_tgcred_rcu(struct rcu_head *rcu) -{ - struct thread_group_cred *tgcred = - container_of(rcu, struct thread_group_cred, rcu); - - BUG_ON(atomic_read(&tgcred->usage) != 0); - - key_put(tgcred->session_keyring); - key_put(tgcred->process_keyring); - kfree(tgcred); -} -#endif - -/* - * Release a set of thread group credentials. - */ -static void release_tgcred(struct cred *cred) -{ -#ifdef CONFIG_KEYS - struct thread_group_cred *tgcred = cred->tgcred; - - if (atomic_dec_and_test(&tgcred->usage)) - call_rcu(&tgcred->rcu, release_tgcred_rcu); -#endif -} - /* * The RCU callback to actually dispose of a set of credentials */ @@ -150,9 +106,10 @@ static void put_cred_rcu(struct rcu_head *rcu) #endif security_cred_free(cred); + key_put(cred->session_keyring); + key_put(cred->process_keyring); key_put(cred->thread_keyring); key_put(cred->request_key_auth); - release_tgcred(cred); if (cred->group_info) put_group_info(cred->group_info); free_uid(cred->user); @@ -246,15 +203,6 @@ struct cred *cred_alloc_blank(void) if (!new) return NULL; -#ifdef CONFIG_KEYS - new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL); - if (!new->tgcred) { - kmem_cache_free(cred_jar, new); - return NULL; - } - atomic_set(&new->tgcred->usage, 1); -#endif - atomic_set(&new->usage, 1); #ifdef CONFIG_DEBUG_CREDENTIALS new->magic = CRED_MAGIC; @@ -308,9 +256,10 @@ struct cred *prepare_creds(void) get_user_ns(new->user_ns); #ifdef CONFIG_KEYS + key_get(new->session_keyring); + key_get(new->process_keyring); key_get(new->thread_keyring); key_get(new->request_key_auth); - atomic_inc(&new->tgcred->usage); #endif #ifdef CONFIG_SECURITY @@ -334,39 +283,20 @@ EXPORT_SYMBOL(prepare_creds); */ struct cred *prepare_exec_creds(void) { - struct thread_group_cred *tgcred = NULL; struct cred *new; -#ifdef CONFIG_KEYS - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) - return NULL; -#endif - new = prepare_creds(); - if (!new) { - kfree(tgcred); + if (!new) return new; - } #ifdef CONFIG_KEYS /* newly exec'd tasks don't get a thread keyring */ key_put(new->thread_keyring); new->thread_keyring = NULL; - /* create a new per-thread-group creds for all this set of threads to - * share */ - memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred)); - - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - /* inherit the session keyring; new process keyring */ - key_get(tgcred->session_keyring); - tgcred->process_keyring = NULL; - - release_tgcred(new); - new->tgcred = tgcred; + key_put(new->process_keyring); + new->process_keyring = NULL; #endif return new; @@ -383,9 +313,6 @@ struct cred *prepare_exec_creds(void) */ int copy_creds(struct task_struct *p, unsigned long clone_flags) { -#ifdef CONFIG_KEYS - struct thread_group_cred *tgcred; -#endif struct cred *new; int ret; @@ -425,22 +352,12 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) install_thread_keyring_to_cred(new); } - /* we share the process and session keyrings between all the threads in - * a process - this is slightly icky as we violate COW credentials a - * bit */ + /* The process keyring is only shared between the threads in a process; + * anything outside of those threads doesn't inherit. + */ if (!(clone_flags & CLONE_THREAD)) { - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) { - ret = -ENOMEM; - goto error_put; - } - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - tgcred->process_keyring = NULL; - tgcred->session_keyring = key_get(new->tgcred->session_keyring); - - release_tgcred(new); - new->tgcred = tgcred; + key_put(new->process_keyring); + new->process_keyring = NULL; } #endif @@ -643,9 +560,6 @@ void __init cred_init(void) */ struct cred *prepare_kernel_cred(struct task_struct *daemon) { -#ifdef CONFIG_KEYS - struct thread_group_cred *tgcred; -#endif const struct cred *old; struct cred *new; @@ -653,14 +567,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) if (!new) return NULL; -#ifdef CONFIG_KEYS - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) { - kmem_cache_free(cred_jar, new); - return NULL; - } -#endif - kdebug("prepare_kernel_cred() alloc %p", new); if (daemon) @@ -678,13 +584,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) get_group_info(new->group_info); #ifdef CONFIG_KEYS - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - tgcred->process_keyring = NULL; - tgcred->session_keyring = NULL; - new->tgcred = tgcred; - new->request_key_auth = NULL; + new->session_keyring = NULL; + new->process_keyring = NULL; new->thread_keyring = NULL; + new->request_key_auth = NULL; new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; #endif diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index a0d373f76815..65b38417c211 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1475,7 +1475,8 @@ long keyctl_session_to_parent(void) goto error_keyring; newwork = &cred->rcu; - cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r); + cred->session_keyring = key_ref_to_ptr(keyring_r); + keyring_r = NULL; init_task_work(newwork, key_change_session_keyring); me = current; @@ -1500,7 +1501,7 @@ long keyctl_session_to_parent(void) mycred = current_cred(); pcred = __task_cred(parent); if (mycred == pcred || - mycred->tgcred->session_keyring == pcred->tgcred->session_keyring) { + mycred->session_keyring == pcred->session_keyring) { ret = 0; goto unlock; } @@ -1516,9 +1517,9 @@ long keyctl_session_to_parent(void) goto unlock; /* the keyrings must have the same UID */ - if ((pcred->tgcred->session_keyring && - pcred->tgcred->session_keyring->uid != mycred->euid) || - mycred->tgcred->session_keyring->uid != mycred->euid) + if ((pcred->session_keyring && + pcred->session_keyring->uid != mycred->euid) || + mycred->session_keyring->uid != mycred->euid) goto unlock; /* cancel an already pending keyring replacement */ diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 178b8c3b130a..9de5dc598276 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -169,9 +169,8 @@ static int install_thread_keyring(void) int install_process_keyring_to_cred(struct cred *new) { struct key *keyring; - int ret; - if (new->tgcred->process_keyring) + if (new->process_keyring) return -EEXIST; keyring = keyring_alloc("_pid", new->uid, new->gid, @@ -179,17 +178,8 @@ int install_process_keyring_to_cred(struct cred *new) if (IS_ERR(keyring)) return PTR_ERR(keyring); - spin_lock_irq(&new->tgcred->lock); - if (!new->tgcred->process_keyring) { - new->tgcred->process_keyring = keyring; - keyring = NULL; - ret = 0; - } else { - ret = -EEXIST; - } - spin_unlock_irq(&new->tgcred->lock); - key_put(keyring); - return ret; + new->process_keyring = keyring; + return 0; } /* @@ -230,7 +220,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) /* create an empty session keyring */ if (!keyring) { flags = KEY_ALLOC_QUOTA_OVERRUN; - if (cred->tgcred->session_keyring) + if (cred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; keyring = keyring_alloc("_ses", cred->uid, cred->gid, @@ -242,17 +232,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) } /* install the keyring */ - spin_lock_irq(&cred->tgcred->lock); - old = cred->tgcred->session_keyring; - rcu_assign_pointer(cred->tgcred->session_keyring, keyring); - spin_unlock_irq(&cred->tgcred->lock); + old = cred->session_keyring; + rcu_assign_pointer(cred->session_keyring, keyring); - /* we're using RCU on the pointer, but there's no point synchronising - * on it if it didn't previously point to anything */ - if (old) { - synchronize_rcu(); + if (old) key_put(old); - } return 0; } @@ -367,9 +351,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } /* search the process keyring second */ - if (cred->tgcred->process_keyring) { + if (cred->process_keyring) { key_ref = keyring_search_aux( - make_key_ref(cred->tgcred->process_keyring, 1), + make_key_ref(cred->process_keyring, 1), cred, type, description, match, no_state_check); if (!IS_ERR(key_ref)) goto found; @@ -388,12 +372,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } /* search the session keyring */ - if (cred->tgcred->session_keyring) { + if (cred->session_keyring) { rcu_read_lock(); key_ref = keyring_search_aux( - make_key_ref(rcu_dereference( - cred->tgcred->session_keyring), - 1), + make_key_ref(rcu_dereference(cred->session_keyring), 1), cred, type, description, match, no_state_check); rcu_read_unlock(); @@ -563,7 +545,7 @@ try_again: break; case KEY_SPEC_PROCESS_KEYRING: - if (!cred->tgcred->process_keyring) { + if (!cred->process_keyring) { if (!(lflags & KEY_LOOKUP_CREATE)) goto error; @@ -575,13 +557,13 @@ try_again: goto reget_creds; } - key = cred->tgcred->process_keyring; + key = cred->process_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_SESSION_KEYRING: - if (!cred->tgcred->session_keyring) { + if (!cred->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ ret = install_user_keyrings(); @@ -596,7 +578,7 @@ try_again: if (ret < 0) goto error; goto reget_creds; - } else if (cred->tgcred->session_keyring == + } else if (cred->session_keyring == cred->user->session_keyring && lflags & KEY_LOOKUP_CREATE) { ret = join_session_keyring(NULL); @@ -606,7 +588,7 @@ try_again: } rcu_read_lock(); - key = rcu_dereference(cred->tgcred->session_keyring); + key = rcu_dereference(cred->session_keyring); atomic_inc(&key->usage); rcu_read_unlock(); key_ref = make_key_ref(key, 1); @@ -766,12 +748,6 @@ long join_session_keyring(const char *name) struct key *keyring; long ret, serial; - /* only permit this if there's a single thread in the thread group - - * this avoids us having to adjust the creds on all threads and risking - * ENOMEM */ - if (!current_is_single_threaded()) - return -EMLINK; - new = prepare_creds(); if (!new) return -ENOMEM; @@ -783,7 +759,7 @@ long join_session_keyring(const char *name) if (ret < 0) goto error; - serial = new->tgcred->session_keyring->serial; + serial = new->session_keyring->serial; ret = commit_creds(new); if (ret == 0) ret = serial; @@ -806,6 +782,9 @@ long join_session_keyring(const char *name) } else if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; + } else if (keyring == new->session_keyring) { + ret = 0; + goto error2; } /* we've got a keyring - now to install it */ @@ -862,8 +841,7 @@ void key_change_session_keyring(struct callback_head *twork) new->jit_keyring = old->jit_keyring; new->thread_keyring = key_get(old->thread_keyring); - new->tgcred->tgid = old->tgcred->tgid; - new->tgcred->process_keyring = key_get(old->tgcred->process_keyring); + new->process_keyring = key_get(old->process_keyring); security_transfer_creds(new, old); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 000e75017520..275c4f9e4b8c 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -150,12 +150,12 @@ static int call_sbin_request_key(struct key_construction *cons, cred->thread_keyring ? cred->thread_keyring->serial : 0); prkey = 0; - if (cred->tgcred->process_keyring) - prkey = cred->tgcred->process_keyring->serial; + if (cred->process_keyring) + prkey = cred->process_keyring->serial; sprintf(keyring_str[1], "%d", prkey); rcu_read_lock(); - session = rcu_dereference(cred->tgcred->session_keyring); + session = rcu_dereference(cred->session_keyring); if (!session) session = cred->user->session_keyring; sskey = session->serial; @@ -297,14 +297,14 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) break; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - dest_keyring = key_get(cred->tgcred->process_keyring); + dest_keyring = key_get(cred->process_keyring); if (dest_keyring) break; case KEY_REQKEY_DEFL_SESSION_KEYRING: rcu_read_lock(); dest_keyring = key_get( - rcu_dereference(cred->tgcred->session_keyring)); + rcu_dereference(cred->session_keyring)); rcu_read_unlock(); if (dest_keyring) From 96b5c8fea6c0861621051290d705ec2e971963f1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Oct 2012 19:24:56 +0100 Subject: [PATCH 0193/5831] KEYS: Reduce initial permissions on keys Reduce the initial permissions on new keys to grant the possessor everything, view permission only to the user (so the keys can be seen in /proc/keys) and nothing else. This gives the creator a chance to adjust the permissions mask before other processes can access the new key or create a link to it. To aid with this, keyring_alloc() now takes a permission argument rather than setting the permissions itself. The following permissions are now set: (1) The user and user-session keyrings grant the user that owns them full permissions and grant a possessor everything bar SETATTR. (2) The process and thread keyrings grant the possessor full permissions but only grant the user VIEW. This permits the user to see them in /proc/keys, but not to do anything with them. (3) Anonymous session keyrings grant the possessor full permissions, but only grant the user VIEW and READ. This means that the user can see them in /proc/keys and can list them, but nothing else. Possibly READ shouldn't be provided either. (4) Named session keyrings grant everything an anonymous session keyring does, plus they grant the user LINK permission. The whole point of named session keyrings is that others can also subscribe to them. Possibly this should be a separate permission to LINK. (5) The temporary session keyring created by call_sbin_request_key() gets the same permissions as an anonymous session keyring. (6) Keys created by add_key() get VIEW, SEARCH, LINK and SETATTR for the possessor, plus READ and/or WRITE if the key type supports them. The used only gets VIEW now. (7) Keys created by request_key() now get the same as those created by add_key(). Reported-by: Lennart Poettering Reported-by: Stef Walter Signed-off-by: David Howells --- include/linux/key.h | 1 + security/keys/key.c | 6 +++--- security/keys/keyring.c | 9 +++------ security/keys/process_keys.c | 26 +++++++++++++++++--------- security/keys/request_key.c | 11 ++++++++++- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/include/linux/key.h b/include/linux/key.h index cef3b315ba7c..890699815212 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -264,6 +264,7 @@ extern int key_unlink(struct key *keyring, extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, const struct cred *cred, + key_perm_t perm, unsigned long flags, struct key *dest); diff --git a/security/keys/key.c b/security/keys/key.c index 50d96d4e06f2..bebeca3a78e4 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -826,13 +826,13 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, /* if the client doesn't provide, decide on the permissions we want */ if (perm == KEY_PERM_UNDEF) { perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; - perm |= KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK | KEY_USR_SETATTR; + perm |= KEY_USR_VIEW; if (ktype->read) - perm |= KEY_POS_READ | KEY_USR_READ; + perm |= KEY_POS_READ; if (ktype == &key_type_keyring || ktype->update) - perm |= KEY_USR_WRITE; + perm |= KEY_POS_WRITE; } /* allocate a new key */ diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 81e7852d281d..cf704a92083f 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -257,17 +257,14 @@ error: * Allocate a keyring and link into the destination keyring. */ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, - const struct cred *cred, unsigned long flags, - struct key *dest) + const struct cred *cred, key_perm_t perm, + unsigned long flags, struct key *dest) { struct key *keyring; int ret; keyring = key_alloc(&key_type_keyring, description, - uid, gid, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL, - flags); - + uid, gid, cred, perm, flags); if (!IS_ERR(keyring)) { ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL); if (ret < 0) { diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 9de5dc598276..b58d93892740 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -46,9 +46,11 @@ int install_user_keyrings(void) struct user_struct *user; const struct cred *cred; struct key *uid_keyring, *session_keyring; + key_perm_t user_keyring_perm; char buf[20]; int ret; + user_keyring_perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL; cred = current_cred(); user = cred->user; @@ -72,8 +74,8 @@ int install_user_keyrings(void) uid_keyring = find_keyring_by_name(buf, true); if (IS_ERR(uid_keyring)) { uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - cred, KEY_ALLOC_IN_QUOTA, - NULL); + cred, user_keyring_perm, + KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(uid_keyring)) { ret = PTR_ERR(uid_keyring); goto error; @@ -88,7 +90,8 @@ int install_user_keyrings(void) if (IS_ERR(session_keyring)) { session_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - cred, KEY_ALLOC_IN_QUOTA, NULL); + cred, user_keyring_perm, + KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(session_keyring)) { ret = PTR_ERR(session_keyring); goto error_release; @@ -129,6 +132,7 @@ int install_thread_keyring_to_cred(struct cred *new) struct key *keyring; keyring = keyring_alloc("_tid", new->uid, new->gid, new, + KEY_POS_ALL | KEY_USR_VIEW, KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); @@ -173,8 +177,9 @@ int install_process_keyring_to_cred(struct cred *new) if (new->process_keyring) return -EEXIST; - keyring = keyring_alloc("_pid", new->uid, new->gid, - new, KEY_ALLOC_QUOTA_OVERRUN, NULL); + keyring = keyring_alloc("_pid", new->uid, new->gid, new, + KEY_POS_ALL | KEY_USR_VIEW, + KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); @@ -223,8 +228,9 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) if (cred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc("_ses", cred->uid, cred->gid, - cred, flags, NULL); + keyring = keyring_alloc("_ses", cred->uid, cred->gid, cred, + KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ, + flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); } else { @@ -773,8 +779,10 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, old->uid, old->gid, old, - KEY_ALLOC_IN_QUOTA, NULL); + keyring = keyring_alloc( + name, old->uid, old->gid, old, + KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_LINK, + KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 275c4f9e4b8c..0ae3a2202771 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -126,6 +126,7 @@ static int call_sbin_request_key(struct key_construction *cons, cred = get_current_cred(); keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred, + KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ, KEY_ALLOC_QUOTA_OVERRUN, NULL); put_cred(cred); if (IS_ERR(keyring)) { @@ -347,6 +348,7 @@ static int construct_alloc_key(struct key_type *type, const struct cred *cred = current_cred(); unsigned long prealloc; struct key *key; + key_perm_t perm; key_ref_t key_ref; int ret; @@ -355,8 +357,15 @@ static int construct_alloc_key(struct key_type *type, *_key = NULL; mutex_lock(&user->cons_lock); + perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; + perm |= KEY_USR_VIEW; + if (type->read) + perm |= KEY_POS_READ; + if (type == &key_type_keyring || type->update) + perm |= KEY_POS_WRITE; + key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred, - KEY_POS_ALL, flags); + perm, flags); if (IS_ERR(key)) goto alloc_failed; From f8aa23a55f813c9bddec2a6176e0e67274e6e7c1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Oct 2012 19:24:56 +0100 Subject: [PATCH 0194/5831] KEYS: Use keyring_alloc() to create special keyrings Use keyring_alloc() to create special keyrings now that it has a permissions parameter rather than using key_alloc() + key_instantiate_and_link(). Also document and export keyring_alloc() so that modules can use it too. Signed-off-by: David Howells --- Documentation/security/keys.txt | 17 +++++++++++++++++ fs/cifs/cifsacl.c | 12 ++++-------- fs/nfs/idmap.c | 12 ++++-------- net/dns_resolver/dns_key.c | 13 +++++-------- security/keys/keyring.c | 1 + 5 files changed, 31 insertions(+), 24 deletions(-) diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index aa0dbd74b71b..a4f9125c0333 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -990,6 +990,23 @@ payload contents" for more information. reference pointer if successful. +(*) A keyring can be created by: + + struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, + const struct cred *cred, + key_perm_t perm, + unsigned long flags, + struct key *dest); + + This creates a keyring with the given attributes and returns it. If dest + is not NULL, the new keyring will be linked into the keyring to which it + points. No permission checks are made upon the destination keyring. + + Error EDQUOT can be returned if the keyring would overload the quota (pass + KEY_ALLOC_NOT_IN_QUOTA in flags if the keyring shouldn't be accounted + towards the user's quota). Error ENOMEM can also be returned. + + (*) To check the validity of a key, this function can be called: int validate_key(struct key *key); diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 05f4dc263a23..a8a753c8fcd5 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -537,19 +537,15 @@ init_cifs_idmap(void) if (!cred) return -ENOMEM; - keyring = key_alloc(&key_type_keyring, ".cifs_idmap", 0, 0, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_NOT_IN_QUOTA); + keyring = keyring_alloc(".cifs_idmap", 0, 0, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; } - ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); - if (ret < 0) - goto failed_put_key; - ret = register_key_type(&cifs_idmap_key_type); if (ret < 0) goto failed_put_key; diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index a850079467d8..957134b4c0fd 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -192,19 +192,15 @@ static int nfs_idmap_init_keyring(void) if (!cred) return -ENOMEM; - keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_NOT_IN_QUOTA); + keyring = keyring_alloc(".id_resolver", 0, 0, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; } - ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); - if (ret < 0) - goto failed_put_key; - ret = register_key_type(&key_type_id_resolver); if (ret < 0) goto failed_put_key; diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index d9507dd05818..f2c379d835e3 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -259,19 +259,15 @@ static int __init init_dns_resolver(void) if (!cred) return -ENOMEM; - keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ, - KEY_ALLOC_NOT_IN_QUOTA); + keyring = keyring_alloc(".dns_resolver", 0, 0, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; } - ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); - if (ret < 0) - goto failed_put_key; - ret = register_key_type(&key_type_dns_resolver); if (ret < 0) goto failed_put_key; @@ -303,3 +299,4 @@ static void __exit exit_dns_resolver(void) module_init(init_dns_resolver) module_exit(exit_dns_resolver) MODULE_LICENSE("GPL"); + diff --git a/security/keys/keyring.c b/security/keys/keyring.c index cf704a92083f..8c25558da14e 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -275,6 +275,7 @@ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, return keyring; } +EXPORT_SYMBOL(keyring_alloc); /** * keyring_search_aux - Search a keyring tree for a key matching some criteria From 61939d977d66951b04cfd4fbe75705614b98ecad Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 2 Oct 2012 17:43:38 -0500 Subject: [PATCH 0195/5831] drm/i915: implement WaForceL3Serialization on VLV and IVB References: https://bugs.freedesktop.org/show_bug.cgi?id=50250 Signed-off-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_pm.c | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d17bef7f8040..ea67351368d9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3448,6 +3448,9 @@ #define GEN7_L3_CHICKEN_MODE_REGISTER 0xB030 #define GEN7_WA_L3_CHICKEN_MODE 0x20000000 +#define GEN7_L3SQCREG4 0xb034 +#define L3SQ_URB_READ_CAM_MATCH_DISABLE (1<<27) + /* WaCatErrorRejectionIssue */ #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG 0x9030 #define GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB (1<<11) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a3e4f8bdd23b..f1f1fd005c78 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3547,6 +3547,10 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE); + /* WaForceL3Serialization */ + I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & + ~L3SQ_URB_READ_CAM_MATCH_DISABLE); + /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock * gating disable must be set. Failure to set it results in * flickering pixels due to Z write ordering failures after @@ -3617,6 +3621,10 @@ static void valleyview_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL); I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE); + /* WaForceL3Serialization */ + I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & + ~L3SQ_URB_READ_CAM_MATCH_DISABLE); + /* This is required by WaCatErrorRejectionIssue */ I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | From 87f8020ec9e3069597746040a4e8655189bc0c1a Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 2 Oct 2012 17:43:41 -0500 Subject: [PATCH 0196/5831] drm/i915: implement WaDisableEarlyCull for VLV and IVB Workaround for a culling optimization. Signed-off-by: Jesse Barnes Reviewed-by: Ben Widawsky [danvet: Also apply to haswell, spotted by Damien.] Reviewed-by: "Lespiau, Damien" Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ea67351368d9..f3a06b421be4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -525,6 +525,7 @@ */ # define _3D_CHICKEN2_WM_READ_PIPELINED (1 << 14) #define _3D_CHICKEN3 0x02090 +#define _3D_CHICKEN_SF_DISABLE_OBJEND_CULL (1 << 10) #define _3D_CHICKEN_SF_DISABLE_FASTCLIP_CULL (1 << 5) #define MI_MODE 0x0209c diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f1f1fd005c78..62fe848d60f0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3475,6 +3475,10 @@ static void haswell_init_clock_gating(struct drm_device *dev) I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE); + /* WaDisableEarlyCull */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); + I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); @@ -3533,6 +3537,10 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE); + /* WaDisableEarlyCull */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); + I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); @@ -3609,6 +3617,10 @@ static void valleyview_init_clock_gating(struct drm_device *dev) I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE); + /* WaDisableEarlyCull */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); + I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); From e13dcc1ab593452b99c39de2cb588c39f6d6a7e9 Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Tue, 3 Jul 2012 05:48:49 +0000 Subject: [PATCH 0197/5831] PPC: epapr: create define for return code value of success Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index bf2c06c33871..c0c7adcd21e3 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -88,7 +88,8 @@ #define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num)) #define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num) -/* epapr error codes */ +/* epapr return codes */ +#define EV_SUCCESS 0 #define EV_EPERM 1 /* Operation not permitted */ #define EV_ENOENT 2 /* Entry Not Found */ #define EV_EIO 3 /* I/O error occured */ From fdcf8bd7e711d4c0fe3ef624cfb5e3808149ff7f Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Tue, 3 Jul 2012 05:48:50 +0000 Subject: [PATCH 0198/5831] KVM: PPC: use definitions in epapr header for hcalls Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_para.h | 21 +++++++++++---------- arch/powerpc/kernel/kvm.c | 2 +- arch/powerpc/kvm/powerpc.c | 10 +++++----- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index c18916bff689..a168ce37d85c 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -75,9 +75,10 @@ struct kvm_vcpu_arch_shared { }; #define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */ -#define HC_VENDOR_KVM (42 << 16) -#define HC_EV_SUCCESS 0 -#define HC_EV_UNIMPLEMENTED 12 + +#define KVM_HCALL_TOKEN(num) _EV_HCALL_TOKEN(EV_KVM_VENDOR_ID, num) + +#include #define KVM_FEATURE_MAGIC_PAGE 1 @@ -121,7 +122,7 @@ static unsigned long kvm_hypercall(unsigned long *in, unsigned long *out, unsigned long nr) { - return HC_EV_UNIMPLEMENTED; + return EV_UNIMPLEMENTED; } #endif @@ -132,7 +133,7 @@ static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2) unsigned long out[8]; unsigned long r; - r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + r = kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); *r2 = out[0]; return r; @@ -143,7 +144,7 @@ static inline long kvm_hypercall0(unsigned int nr) unsigned long in[8]; unsigned long out[8]; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) @@ -152,7 +153,7 @@ static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) unsigned long out[8]; in[0] = p1; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, @@ -163,7 +164,7 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, in[0] = p1; in[1] = p2; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, @@ -175,7 +176,7 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, in[0] = p1; in[1] = p2; in[2] = p3; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, @@ -189,7 +190,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, in[1] = p2; in[2] = p3; in[3] = p4; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 867db1de8949..a61b133c4f99 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -419,7 +419,7 @@ static void kvm_map_magic_page(void *data) in[0] = KVM_MAGIC_PAGE; in[1] = KVM_MAGIC_PAGE; - kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE); + kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE)); *features = out[0]; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4d213b8b0fb5..0368a9391b21 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -67,18 +67,18 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) } switch (nr) { - case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE: + case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE): { vcpu->arch.magic_page_pa = param1; vcpu->arch.magic_page_ea = param2; r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; - r = HC_EV_SUCCESS; + r = EV_SUCCESS; break; } - case HC_VENDOR_KVM | KVM_HC_FEATURES: - r = HC_EV_SUCCESS; + case KVM_HCALL_TOKEN(KVM_HC_FEATURES): + r = EV_SUCCESS; #if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2) /* XXX Missing magic page on 44x */ r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); @@ -87,7 +87,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) /* Second return value is in r4 */ break; default: - r = HC_EV_UNIMPLEMENTED; + r = EV_UNIMPLEMENTED; break; } From 784bafac79e7646e56f40998a6dde0e1ed5595f8 Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Tue, 3 Jul 2012 05:48:51 +0000 Subject: [PATCH 0199/5831] KVM: PPC: add pvinfo for hcall opcodes on e500mc/e5500 Signed-off-by: Liu Yu [stuart: factored this out from idle hcall support in host patch] Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/kvm/powerpc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0368a9391b21..a478e662b2bc 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -751,9 +751,16 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) { + u32 inst_nop = 0x60000000; +#ifdef CONFIG_KVM_BOOKE_HV + u32 inst_sc1 = 0x44000022; + pvinfo->hcall[0] = inst_sc1; + pvinfo->hcall[1] = inst_nop; + pvinfo->hcall[2] = inst_nop; + pvinfo->hcall[3] = inst_nop; +#else u32 inst_lis = 0x3c000000; u32 inst_ori = 0x60000000; - u32 inst_nop = 0x60000000; u32 inst_sc = 0x44000002; u32 inst_imm_mask = 0xffff; @@ -770,6 +777,7 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask); pvinfo->hcall[2] = inst_sc; pvinfo->hcall[3] = inst_nop; +#endif return 0; } From 9202e07636f0c4858ba6c30773a3f160b2b5659a Mon Sep 17 00:00:00 2001 From: Liu Yu-B13201 Date: Tue, 3 Jul 2012 05:48:52 +0000 Subject: [PATCH 0200/5831] KVM: PPC: Add support for ePAPR idle hcall in host kernel And add a new flag definition in kvm_ppc_pvinfo to indicate whether the host supports the EV_IDLE hcall. Signed-off-by: Liu Yu [stuart.yoder@freescale.com: cleanup,fixes for conditions allowing idle] Signed-off-by: Stuart Yoder [agraf: fix typo] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 7 +++++-- arch/powerpc/include/asm/Kbuild | 1 + arch/powerpc/kvm/powerpc.c | 10 ++++++++-- include/linux/kvm.h | 2 ++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index f6ec3a92e621..170afb1fbc2e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1194,12 +1194,15 @@ struct kvm_ppc_pvinfo { This ioctl fetches PV specific information that need to be passed to the guest using the device tree or other means from vm context. -For now the only implemented piece of information distributed here is an array -of 4 instructions that make up a hypercall. +The hcall array defines 4 instructions that make up a hypercall. If any additional field gets added to this structure later on, a bit for that additional piece of information will be set in the flags bitmap. +The flags bitmap is defined as: + + /* the host supports the ePAPR idle hcall + #define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) 4.48 KVM_ASSIGN_PCI_DEVICE diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 7e313f1ed183..13d6b7bf3b69 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -34,5 +34,6 @@ header-y += termios.h header-y += types.h header-y += ucontext.h header-y += unistd.h +header-y += epapr_hcalls.h generic-y += rwsem.h diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index a478e662b2bc..dbf56e173c25 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -38,8 +38,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return !(v->arch.shared->msr & MSR_WE) || - !!(v->arch.pending_exceptions) || + return !!(v->arch.pending_exceptions) || v->requests; } @@ -86,6 +85,11 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) /* Second return value is in r4 */ break; + case EV_HCALL_TOKEN(EV_IDLE): + r = EV_SUCCESS; + kvm_vcpu_block(vcpu); + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + break; default: r = EV_UNIMPLEMENTED; break; @@ -779,6 +783,8 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) pvinfo->hcall[3] = inst_nop; #endif + pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE; + return 0; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 0a6d6ba44c85..4cb3761bebae 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -477,6 +477,8 @@ struct kvm_ppc_smmu_info { struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; }; +#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) + #define KVMIO 0xAE /* machine type bits, to be used as argument to KVM_CREATE_VM */ From 2f979de8a716bdbdc9f4db532652fbca08ed710c Mon Sep 17 00:00:00 2001 From: Liu Yu-B13201 Date: Tue, 3 Jul 2012 05:48:53 +0000 Subject: [PATCH 0201/5831] KVM: PPC: ev_idle hcall support for e500 guests Signed-off-by: Liu Yu [varun: 64-bit changes] Signed-off-by: Varun Sethi Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 11 +++++----- arch/powerpc/kernel/epapr_hcalls.S | 28 +++++++++++++++++++++++++ arch/powerpc/kernel/epapr_paravirt.c | 11 +++++++++- 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index c0c7adcd21e3..833ce2c2d505 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -50,10 +50,6 @@ #ifndef _EPAPR_HCALLS_H #define _EPAPR_HCALLS_H -#include -#include -#include - #define EV_BYTE_CHANNEL_SEND 1 #define EV_BYTE_CHANNEL_RECEIVE 2 #define EV_BYTE_CHANNEL_POLL 3 @@ -109,6 +105,11 @@ #define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */ #define EV_BUFFER_OVERFLOW 13 /* Caller-supplied buffer too small */ +#ifndef __ASSEMBLY__ +#include +#include +#include + /* * Hypercall register clobber list * @@ -506,5 +507,5 @@ static inline unsigned int ev_idle(void) return r3; } - +#endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 697b390ebfd8..62c0dc237826 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -8,13 +8,41 @@ */ #include +#include #include #include #include #include #include +#include #include +/* epapr_ev_idle() was derived from e500_idle() */ +_GLOBAL(epapr_ev_idle) + CURRENT_THREAD_INFO(r3, r1) + PPC_LL r4, TI_LOCAL_FLAGS(r3) /* set napping bit */ + ori r4, r4,_TLF_NAPPING /* so when we take an exception */ + PPC_STL r4, TI_LOCAL_FLAGS(r3) /* it will return to our caller */ + + wrteei 1 + +idle_loop: + LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE)) + +.global epapr_ev_idle_start +epapr_ev_idle_start: + li r3, -1 + nop + nop + nop + + /* + * Guard against spurious wakeups from a hypervisor -- + * only interrupt will cause us to return to LR due to + * _TLF_NAPPING. + */ + b idle_loop + /* Hypercall entry point. Will be patched with device tree instructions. */ .global epapr_hypercall_start epapr_hypercall_start: diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 028aeae370b6..f3eab8594d9f 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -21,6 +21,10 @@ #include #include #include +#include + +extern void epapr_ev_idle(void); +extern u32 epapr_ev_idle_start[]; bool epapr_paravirt_enabled; @@ -41,8 +45,13 @@ static int __init epapr_paravirt_init(void) if (len % 4 || len > (4 * 4)) return -ENODEV; - for (i = 0; i < (len / 4); i++) + for (i = 0; i < (len / 4); i++) { patch_instruction(epapr_hypercall_start + i, insts[i]); + patch_instruction(epapr_ev_idle_start + i, insts[i]); + } + + if (of_get_property(hyper_node, "has-idle", NULL)) + ppc_md.power_save = epapr_ev_idle; epapr_paravirt_enabled = true; From 40656397241860bb21f2802af17ac1de607fb7a9 Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Tue, 3 Jul 2012 05:48:54 +0000 Subject: [PATCH 0202/5831] PPC: select EPAPR_PARAVIRT for all users of epapr hcalls Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/platforms/Kconfig | 1 + drivers/tty/Kconfig | 1 + drivers/virt/Kconfig | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index e7a896acd982..48a920d51489 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -90,6 +90,7 @@ config MPIC config PPC_EPAPR_HV_PIC bool default n + select EPAPR_PARAVIRT config MPIC_WEIRD bool diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index 830cd62d8492..aa99cd26ba8b 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -358,6 +358,7 @@ config TRACE_SINK config PPC_EPAPR_HV_BYTECHAN tristate "ePAPR hypervisor byte channel driver" depends on PPC + select EPAPR_PARAVIRT help This driver creates /dev entries for each ePAPR hypervisor byte channel, thereby allowing applications to communicate with byte diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig index 2dcdbc9364d8..99ebdde590f8 100644 --- a/drivers/virt/Kconfig +++ b/drivers/virt/Kconfig @@ -15,6 +15,7 @@ if VIRT_DRIVERS config FSL_HV_MANAGER tristate "Freescale hypervisor management driver" depends on FSL_SOC + select EPAPR_PARAVIRT help The Freescale hypervisor management driver provides several services to drivers and applications related to the Freescale hypervisor: From 305bcf26128e380bb1296d2802387659ab8b038e Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 3 Jul 2012 05:48:55 +0000 Subject: [PATCH 0203/5831] powerpc/fsl-soc: use CONFIG_EPAPR_PARAVIRT for hcalls Signed-off-by: Scott Wood Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/sysdev/fsl_msi.c | 9 +++++++-- arch/powerpc/sysdev/fsl_soc.c | 2 ++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 6e097de00e09..7e2b2f2e3ecd 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -236,7 +236,6 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) u32 intr_index; u32 have_shift = 0; struct fsl_msi_cascade_data *cascade_data; - unsigned int ret; cascade_data = irq_get_handler_data(irq); msi_data = cascade_data->msi_data; @@ -268,7 +267,9 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) case FSL_PIC_IP_IPIC: msir_value = fsl_msi_read(msi_data->msi_regs, msir_index * 0x4); break; - case FSL_PIC_IP_VMPIC: +#ifdef CONFIG_EPAPR_PARAVIRT + case FSL_PIC_IP_VMPIC: { + unsigned int ret; ret = fh_vmpic_get_msir(virq_to_hw(irq), &msir_value); if (ret) { pr_err("fsl-msi: fh_vmpic_get_msir() failed for " @@ -277,6 +278,8 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) } break; } +#endif + } while (msir_value) { intr_index = ffs(msir_value) - 1; @@ -508,10 +511,12 @@ static const struct of_device_id fsl_of_msi_ids[] = { .compatible = "fsl,ipic-msi", .data = (void *)&ipic_msi_feature, }, +#ifdef CONFIG_EPAPR_PARAVIRT { .compatible = "fsl,vmpic-msi", .data = (void *)&vmpic_msi_feature, }, +#endif {} }; diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index c449dbd1c938..97118dc3d285 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -253,6 +253,7 @@ struct platform_diu_data_ops diu_ops; EXPORT_SYMBOL(diu_ops); #endif +#ifdef CONFIG_EPAPR_PARAVIRT /* * Restart the current partition * @@ -278,3 +279,4 @@ void fsl_hv_halt(void) pr_info("hv exit\n"); fh_partition_stop(-1); } +#endif From 8e525d59d024f54b88a038faac38f76b9094774e Mon Sep 17 00:00:00 2001 From: Liu Yu-B13201 Date: Tue, 3 Jul 2012 05:48:56 +0000 Subject: [PATCH 0204/5831] PPC: Don't use hardcoded opcode for ePAPR hcall invocation Signed-off-by: Liu Yu Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 22 +++++++-------- arch/powerpc/include/asm/fsl_hcalls.h | 36 ++++++++++++------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index 833ce2c2d505..b8d94459a929 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -195,7 +195,7 @@ static inline unsigned int ev_int_set_config(unsigned int interrupt, r5 = priority; r6 = destination; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6) : : EV_HCALL_CLOBBERS4 ); @@ -224,7 +224,7 @@ static inline unsigned int ev_int_get_config(unsigned int interrupt, r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG); r3 = interrupt; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6) : : EV_HCALL_CLOBBERS4 ); @@ -254,7 +254,7 @@ static inline unsigned int ev_int_set_mask(unsigned int interrupt, r3 = interrupt; r4 = mask; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -279,7 +279,7 @@ static inline unsigned int ev_int_get_mask(unsigned int interrupt, r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK); r3 = interrupt; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -307,7 +307,7 @@ static inline unsigned int ev_int_eoi(unsigned int interrupt) r11 = EV_HCALL_TOKEN(EV_INT_EOI); r3 = interrupt; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -346,7 +346,7 @@ static inline unsigned int ev_byte_channel_send(unsigned int handle, r7 = be32_to_cpu(p[2]); r8 = be32_to_cpu(p[3]); - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8) : : EV_HCALL_CLOBBERS6 @@ -385,7 +385,7 @@ static inline unsigned int ev_byte_channel_receive(unsigned int handle, r3 = handle; r4 = *count; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8) : : EV_HCALL_CLOBBERS6 @@ -423,7 +423,7 @@ static inline unsigned int ev_byte_channel_poll(unsigned int handle, r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5) : : EV_HCALL_CLOBBERS3 ); @@ -456,7 +456,7 @@ static inline unsigned int ev_int_iack(unsigned int handle, r11 = EV_HCALL_TOKEN(EV_INT_IACK); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -480,7 +480,7 @@ static inline unsigned int ev_doorbell_send(unsigned int handle) r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -500,7 +500,7 @@ static inline unsigned int ev_idle(void) r11 = EV_HCALL_TOKEN(EV_IDLE); - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "=r" (r3) : : EV_HCALL_CLOBBERS1 ); diff --git a/arch/powerpc/include/asm/fsl_hcalls.h b/arch/powerpc/include/asm/fsl_hcalls.h index 922d9b5fe3d5..3abb58394da4 100644 --- a/arch/powerpc/include/asm/fsl_hcalls.h +++ b/arch/powerpc/include/asm/fsl_hcalls.h @@ -96,7 +96,7 @@ static inline unsigned int fh_send_nmi(unsigned int vcpu_mask) r11 = FH_HCALL_TOKEN(FH_SEND_NMI); r3 = vcpu_mask; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -151,7 +151,7 @@ static inline unsigned int fh_partition_get_dtprop(int handle, r9 = (uint32_t)propvalue_addr; r10 = *propvalue_len; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8), "+r" (r9), "+r" (r10) @@ -205,7 +205,7 @@ static inline unsigned int fh_partition_set_dtprop(int handle, r9 = (uint32_t)propvalue_addr; r10 = propvalue_len; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8), "+r" (r9), "+r" (r10) @@ -229,7 +229,7 @@ static inline unsigned int fh_partition_restart(unsigned int partition) r11 = FH_HCALL_TOKEN(FH_PARTITION_RESTART); r3 = partition; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -262,7 +262,7 @@ static inline unsigned int fh_partition_get_status(unsigned int partition, r11 = FH_HCALL_TOKEN(FH_PARTITION_GET_STATUS); r3 = partition; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -295,7 +295,7 @@ static inline unsigned int fh_partition_start(unsigned int partition, r4 = entry_point; r5 = load; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5) : : EV_HCALL_CLOBBERS3 ); @@ -317,7 +317,7 @@ static inline unsigned int fh_partition_stop(unsigned int partition) r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP); r3 = partition; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -376,7 +376,7 @@ static inline unsigned int fh_partition_memcpy(unsigned int source, #endif r7 = count; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7) : : EV_HCALL_CLOBBERS5 @@ -399,7 +399,7 @@ static inline unsigned int fh_dma_enable(unsigned int liodn) r11 = FH_HCALL_TOKEN(FH_DMA_ENABLE); r3 = liodn; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -421,7 +421,7 @@ static inline unsigned int fh_dma_disable(unsigned int liodn) r11 = FH_HCALL_TOKEN(FH_DMA_DISABLE); r3 = liodn; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -447,7 +447,7 @@ static inline unsigned int fh_vmpic_get_msir(unsigned int interrupt, r11 = FH_HCALL_TOKEN(FH_VMPIC_GET_MSIR); r3 = interrupt; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -469,7 +469,7 @@ static inline unsigned int fh_system_reset(void) r11 = FH_HCALL_TOKEN(FH_SYSTEM_RESET); - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "=r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -506,7 +506,7 @@ static inline unsigned int fh_err_get_info(int queue, uint32_t *bufsize, r6 = addr_lo; r7 = peek; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7) : : EV_HCALL_CLOBBERS5 @@ -542,7 +542,7 @@ static inline unsigned int fh_get_core_state(unsigned int handle, r3 = handle; r4 = vcpu; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -572,7 +572,7 @@ static inline unsigned int fh_enter_nap(unsigned int handle, unsigned int vcpu) r3 = handle; r4 = vcpu; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -597,7 +597,7 @@ static inline unsigned int fh_exit_nap(unsigned int handle, unsigned int vcpu) r3 = handle; r4 = vcpu; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -618,7 +618,7 @@ static inline unsigned int fh_claim_device(unsigned int handle) r11 = FH_HCALL_TOKEN(FH_CLAIM_DEVICE); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -645,7 +645,7 @@ static inline unsigned int fh_partition_stop_dma(unsigned int handle) r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP_DMA); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); From 97c95059848358f1577f471ec47cf68690f996e4 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 2 Aug 2012 15:10:00 +0200 Subject: [PATCH 0205/5831] KVM: PPC: PR: Use generic tracepoint for guest exit We want to have tracing information on guest exits for booke as well as book3s. Since most information is identical, use a common trace point. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 2 +- arch/powerpc/kvm/booke.c | 3 ++ arch/powerpc/kvm/trace.h | 79 +++++++++++++++++++++++------------- 3 files changed, 55 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 05c28f59f77f..7f0fe6f9e297 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -549,7 +549,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* We get here with MSR.EE=0, so enable it to be a nice citizen */ __hard_irq_enable(); - trace_kvm_book3s_exit(exit_nr, vcpu); + trace_kvm_exit(exit_nr, vcpu); preempt_enable(); kvm_resched(vcpu); switch (exit_nr) { diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index d25a097c852b..7ce2ed07831f 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -39,6 +39,7 @@ #include "timing.h" #include "booke.h" +#include "trace.h" unsigned long kvmppc_booke_handlers; @@ -677,6 +678,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_enable(); + trace_kvm_exit(exit_nr, vcpu); + run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 877186b7b1c3..9fab6eddc7e4 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -31,6 +31,57 @@ TRACE_EVENT(kvm_ppc_instr, __entry->inst, __entry->pc, __entry->emulate) ); +TRACE_EVENT(kvm_exit, + TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), + TP_ARGS(exit_nr, vcpu), + + TP_STRUCT__entry( + __field( unsigned int, exit_nr ) + __field( unsigned long, pc ) + __field( unsigned long, msr ) + __field( unsigned long, dar ) +#ifdef CONFIG_KVM_BOOK3S_PR + __field( unsigned long, srr1 ) +#endif + __field( unsigned long, last_inst ) + ), + + TP_fast_assign( +#ifdef CONFIG_KVM_BOOK3S_PR + struct kvmppc_book3s_shadow_vcpu *svcpu; +#endif + __entry->exit_nr = exit_nr; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->dar = kvmppc_get_fault_dar(vcpu); + __entry->msr = vcpu->arch.shared->msr; +#ifdef CONFIG_KVM_BOOK3S_PR + svcpu = svcpu_get(vcpu); + __entry->srr1 = svcpu->shadow_srr1; + svcpu_put(svcpu); +#endif + __entry->last_inst = vcpu->arch.last_inst; + ), + + TP_printk("exit=0x%x" + " | pc=0x%lx" + " | msr=0x%lx" + " | dar=0x%lx" +#ifdef CONFIG_KVM_BOOK3S_PR + " | srr1=0x%lx" +#endif + " | last_inst=0x%lx" + , + __entry->exit_nr, + __entry->pc, + __entry->msr, + __entry->dar, +#ifdef CONFIG_KVM_BOOK3S_PR + __entry->srr1, +#endif + __entry->last_inst + ) +); + TRACE_EVENT(kvm_stlb_inval, TP_PROTO(unsigned int stlb_index), TP_ARGS(stlb_index), @@ -105,34 +156,6 @@ TRACE_EVENT(kvm_gtlb_write, #ifdef CONFIG_KVM_BOOK3S_PR -TRACE_EVENT(kvm_book3s_exit, - TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), - TP_ARGS(exit_nr, vcpu), - - TP_STRUCT__entry( - __field( unsigned int, exit_nr ) - __field( unsigned long, pc ) - __field( unsigned long, msr ) - __field( unsigned long, dar ) - __field( unsigned long, srr1 ) - ), - - TP_fast_assign( - struct kvmppc_book3s_shadow_vcpu *svcpu; - __entry->exit_nr = exit_nr; - __entry->pc = kvmppc_get_pc(vcpu); - __entry->dar = kvmppc_get_fault_dar(vcpu); - __entry->msr = vcpu->arch.shared->msr; - svcpu = svcpu_get(vcpu); - __entry->srr1 = svcpu->shadow_srr1; - svcpu_put(svcpu); - ), - - TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx", - __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar, - __entry->srr1) -); - TRACE_EVENT(kvm_book3s_reenter, TP_PROTO(int r, struct kvm_vcpu *vcpu), TP_ARGS(r, vcpu), From f4800b1f4d23156e9080a08d6114e5d8bb767964 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 7 Aug 2012 10:24:14 +0200 Subject: [PATCH 0206/5831] KVM: PPC: Expose SYNC cap based on mmu notifiers Semantically, the "SYNC" cap means that we have mmu notifiers available. Express this in our #ifdef'ery around the feature, so that we can be sure we don't miss out on ppc targets when they get their implementation. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/powerpc.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index dbf56e173c25..45fe433316ea 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -264,10 +264,16 @@ int kvm_dev_ioctl_check_extension(long ext) if (cpu_has_feature(CPU_FTR_ARCH_201)) r = 2; break; - case KVM_CAP_SYNC_MMU: - r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; - break; #endif + case KVM_CAP_SYNC_MMU: +#ifdef CONFIG_KVM_BOOK3S_64_HV + r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; +#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER) + r = 1; +#else + r = 0; +#endif + break; case KVM_CAP_NR_VCPUS: /* * Recommending a number of CPUs is somewhat arbitrary; we From cf1c5ca47319d9eb49166859921822fea354d4b3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 1 Aug 2012 12:56:51 +0200 Subject: [PATCH 0207/5831] KVM: PPC: BookE: Expose remote TLB flushes in debugfs We're already counting remote TLB flushes in a variable, but don't export it to user space yet. Do so, so we know what's going on. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 7ce2ed07831f..1d4ce9a80f55 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "doorbell", VCPU_STAT(dbell_exits) }, { "guest doorbell", VCPU_STAT(gdbell_exits) }, + { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, { NULL } }; From 2bb890f5ee79c85b9d3b7df37ecb639d8d4b961e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 2 Aug 2012 13:38:49 +0200 Subject: [PATCH 0208/5831] KVM: PPC: E500: Fix clear_tlb_refs Our mapping code assumes that TLB0 entries are always mapped. However, after calling clear_tlb_refs() this is no longer the case. Map them dynamically if we find an entry unmapped in TLB0. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index ff38b664195d..b56b6e14df6c 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1039,8 +1039,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, sesel = 0; /* unused */ priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; - kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, - &priv->ref, eaddr, &stlbe); + /* Only triggers after clear_tlb_refs */ + if (unlikely(!(priv->ref.flags & E500_TLB_VALID))) + kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); + else + kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, + &priv->ref, eaddr, &stlbe); break; case 1: { From 1340f3e8871b9f35b39c33d0140383c6c6c1f005 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 6 Aug 2012 00:04:14 +0000 Subject: [PATCH 0209/5831] KVM: PPC: Quieten message about allocating linear regions This is printed once for every RMA or HPT region that get preallocated. If one preallocates hundreds of such regions (in order to run hundreds of KVM guests), that gets rather painful, so make it a bit quieter. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv_builtin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index fb4eac290fef..ec0a9e5de100 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -157,8 +157,8 @@ static void __init kvm_linear_init_one(ulong size, int count, int type) linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); for (i = 0; i < count; ++i) { linear = alloc_bootmem_align(size, size); - pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, - size >> 20); + pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, + size >> 20); linear_info[i].base_virt = linear; linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT; linear_info[i].npages = npages; From 8043e494da644ec174f7df0b67f88ccf8777a1ce Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 10 Aug 2012 12:21:21 +0000 Subject: [PATCH 0210/5831] powerpc/epapr: export epapr_hypercall_start This fixes breakage introduced by the following commit: commit 6d2d82627f4f1e96a33664ace494fa363e0495cb Author: Liu Yu-B13201 Date: Tue Jul 3 05:48:56 2012 +0000 PPC: Don't use hardcoded opcode for ePAPR hcall invocation when a driver that uses ePAPR hypercalls is built as a module. Reported-by: Geert Uytterhoeven Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kernel/ppc_ksyms.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 3e4031581c65..e597dde124e8 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef CONFIG_PPC32 extern void transfer_to_handler(void); @@ -192,3 +193,7 @@ EXPORT_SYMBOL(__arch_hweight64); #ifdef CONFIG_PPC_BOOK3S_64 EXPORT_SYMBOL_GPL(mmu_psize_defs); #endif + +#ifdef CONFIG_EPAPR_PARAVIRT +EXPORT_SYMBOL(epapr_hypercall_start); +#endif From 4ffc6356ec690f77f65b7b78e0047a3fe8316371 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 8 Aug 2012 20:31:13 +0200 Subject: [PATCH 0211/5831] KVM: PPC: BookE: Add check_requests helper function We need a central place to check for pending requests in. Add one that only does the timer check we already do in a different place. Later, this central function can be extended by more checks. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1d4ce9a80f55..bcf87fe89179 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -419,13 +419,6 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) unsigned long *pending = &vcpu->arch.pending_exceptions; unsigned int priority; - if (vcpu->requests) { - if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) { - smp_mb(); - update_timer_ints(vcpu); - } - } - priority = __ffs(*pending); while (priority < BOOKE_IRQPRIO_MAX) { if (kvmppc_booke_irqprio_deliver(vcpu, priority)) @@ -461,6 +454,14 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) return r; } +static void kvmppc_check_requests(struct kvm_vcpu *vcpu) +{ + if (vcpu->requests) { + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) + update_timer_ints(vcpu); + } +} + /* * Common checks before entering the guest world. Call with interrupts * disabled. @@ -485,6 +486,15 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) break; } + smp_mb(); + if (vcpu->requests) { + /* Make sure we process requests preemptable */ + local_irq_enable(); + kvmppc_check_requests(vcpu); + local_irq_disable(); + continue; + } + if (kvmppc_core_prepare_to_enter(vcpu)) { /* interrupts got enabled in between, so we are back at square 1 */ From d69c6436443c05a64452054f51a79316297755f4 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 8 Aug 2012 20:44:20 +0200 Subject: [PATCH 0212/5831] KVM: PPC: BookE: Add support for vcpu->mode Generic KVM code might want to know whether we are inside guest context or outside. It also wants to be able to push us out of guest context. Add support to the BookE code for the generic vcpu->mode field that describes the above states. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index bcf87fe89179..70a86c0a9d85 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -501,6 +501,15 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) continue; } + if (vcpu->mode == EXITING_GUEST_MODE) { + r = 1; + break; + } + + /* Going into guest context! Yay! */ + vcpu->mode = IN_GUEST_MODE; + smp_wmb(); + break; } @@ -572,6 +581,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvm_guest_exit(); out: + vcpu->mode = OUTSIDE_GUEST_MODE; + smp_wmb(); local_irq_enable(); return ret; } From 862d31f788f9a249f7656d02d8d4006e306108ce Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 31 Jul 2012 00:19:50 +0200 Subject: [PATCH 0213/5831] KVM: PPC: E500: Implement MMU notifiers The e500 target has lived without mmu notifiers ever since it got introduced, but fails for the user space check on them with hugetlbfs. So in order to get that one working, implement mmu notifiers in a reasonably dumb fashion and be happy. On embedded hardware, we almost never end up with mmu notifier calls, since most people don't overcommit. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 3 +- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/Kconfig | 2 + arch/powerpc/kvm/booke.c | 6 +++ arch/powerpc/kvm/e500_tlb.c | 60 ++++++++++++++++++++++++++--- 5 files changed, 65 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 28e8f5e5c63e..cea9d3aab71c 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -46,7 +46,8 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif -#ifdef CONFIG_KVM_BOOK3S_64_HV +#if defined(CONFIG_KVM_BOOK3S_64_HV) || defined(CONFIG_KVM_E500V2) || \ + defined(CONFIG_KVM_E500MC) #include #define KVM_ARCH_WANT_MMU_NOTIFIER diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e006f0bdea95..88de3146838b 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -104,6 +104,7 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); +extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int op, int *advance); diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index f4dacb9c57fa..40cad8c8bd0e 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -123,6 +123,7 @@ config KVM_E500V2 depends on EXPERIMENTAL && E500 && !PPC_E500MC select KVM select KVM_MMIO + select MMU_NOTIFIER ---help--- Support running unmodified E500 guest kernels in virtual machines on E500v2 host processors. @@ -138,6 +139,7 @@ config KVM_E500MC select KVM select KVM_MMIO select KVM_BOOKE_HV + select MMU_NOTIFIER ---help--- Support running unmodified E500MC/E5500 (32-bit) guest kernels in virtual machines on E500MC/E5500 host processors. diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 70a86c0a9d85..52f6cbb4923e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -459,6 +459,10 @@ static void kvmppc_check_requests(struct kvm_vcpu *vcpu) if (vcpu->requests) { if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) update_timer_ints(vcpu); +#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) + kvmppc_core_flush_tlb(vcpu); +#endif } } @@ -579,6 +583,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif kvm_guest_exit(); + vcpu->mode = OUTSIDE_GUEST_MODE; + smp_wmb(); out: vcpu->mode = OUTSIDE_GUEST_MODE; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index b56b6e14df6c..de8ea29409f2 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -303,18 +303,15 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, ref->pfn = pfn; ref->flags = E500_TLB_VALID; - if (tlbe_is_writable(gtlbe)) + if (tlbe_is_writable(gtlbe)) { ref->flags |= E500_TLB_DIRTY; + kvm_set_pfn_dirty(pfn); + } } static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) { if (ref->flags & E500_TLB_VALID) { - if (ref->flags & E500_TLB_DIRTY) - kvm_release_pfn_dirty(ref->pfn); - else - kvm_release_pfn_clean(ref->pfn); - ref->flags = 0; } } @@ -357,6 +354,13 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) clear_tlb_privs(vcpu_e500); } +void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + clear_tlb_refs(vcpu_e500); + clear_tlb1_bitmap(vcpu_e500); +} + static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, unsigned int eaddr, int as) { @@ -541,6 +545,9 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, /* Clear i-cache for new pages */ kvmppc_mmu_flush_icache(pfn); + + /* Drop refcount on page, so that mmu notifiers can clear it */ + kvm_release_pfn_clean(pfn); } /* XXX only map the one-one case, for now use TLB0 */ @@ -1064,6 +1071,47 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); } +/************* MMU Notifiers *************/ + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + /* + * Flush all shadow tlb entries everywhere. This is slow, but + * we are 100% sure that we catch the to be unmapped page + */ + kvm_flush_remote_tlbs(kvm); + + return 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ + /* kvm_unmap_hva flushes everything anyways */ + kvm_unmap_hva(kvm, start); + + return 0; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + /* The page will get remapped properly on its next fault */ + kvm_unmap_hva(kvm, hva); +} + +/*****************************************/ + static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) { int i; From 6346046c3a69edc9149311473b940f3af7c93752 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 8 Aug 2012 00:44:52 +0200 Subject: [PATCH 0214/5831] KVM: PPC: BookE: Add some more trace points Without trace points, debugging what exactly is going on inside guest code can be very tricky. Add a few more trace points at places that hopefully tell us more when things go wrong. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 3 ++ arch/powerpc/kvm/e500_tlb.c | 3 ++ arch/powerpc/kvm/trace.h | 71 +++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 52f6cbb4923e..00bcc57428c7 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -143,6 +143,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) { + trace_kvm_booke_queue_irqprio(vcpu, priority); set_bit(priority, &vcpu->arch.pending_exceptions); } @@ -457,6 +458,8 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) static void kvmppc_check_requests(struct kvm_vcpu *vcpu) { if (vcpu->requests) { + trace_kvm_check_requests(vcpu); + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) update_timer_ints(vcpu); #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index de8ea29409f2..1af6fab58995 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -312,6 +312,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) { if (ref->flags & E500_TLB_VALID) { + trace_kvm_booke206_ref_release(ref->pfn, ref->flags); ref->flags = 0; } } @@ -1075,6 +1076,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) { + trace_kvm_unmap_hva(hva); + /* * Flush all shadow tlb entries everywhere. This is slow, but * we are 100% sure that we catch the to be unmapped page diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 9fab6eddc7e4..cb2780a42fd8 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -82,6 +82,21 @@ TRACE_EVENT(kvm_exit, ) ); +TRACE_EVENT(kvm_unmap_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("unmap hva 0x%lx\n", __entry->hva) +); + TRACE_EVENT(kvm_stlb_inval, TP_PROTO(unsigned int stlb_index), TP_ARGS(stlb_index), @@ -149,6 +164,24 @@ TRACE_EVENT(kvm_gtlb_write, __entry->word1, __entry->word2) ); +TRACE_EVENT(kvm_check_requests, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field( __u32, cpu_nr ) + __field( __u32, requests ) + ), + + TP_fast_assign( + __entry->cpu_nr = vcpu->vcpu_id; + __entry->requests = vcpu->requests; + ), + + TP_printk("vcpu=%x requests=%x", + __entry->cpu_nr, __entry->requests) +); + /************************************************************************* * Book3S trace points * @@ -418,6 +451,44 @@ TRACE_EVENT(kvm_booke206_gtlb_write, __entry->mas2, __entry->mas7_3) ); +TRACE_EVENT(kvm_booke206_ref_release, + TP_PROTO(__u64 pfn, __u32 flags), + TP_ARGS(pfn, flags), + + TP_STRUCT__entry( + __field( __u64, pfn ) + __field( __u32, flags ) + ), + + TP_fast_assign( + __entry->pfn = pfn; + __entry->flags = flags; + ), + + TP_printk("pfn=%llx flags=%x", + __entry->pfn, __entry->flags) +); + +TRACE_EVENT(kvm_booke_queue_irqprio, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), + TP_ARGS(vcpu, priority), + + TP_STRUCT__entry( + __field( __u32, cpu_nr ) + __field( __u32, priority ) + __field( unsigned long, pending ) + ), + + TP_fast_assign( + __entry->cpu_nr = vcpu->vcpu_id; + __entry->priority = priority; + __entry->pending = vcpu->arch.pending_exceptions; + ), + + TP_printk("vcpu=%x prio=%x pending=%lx", + __entry->cpu_nr, __entry->priority, __entry->pending) +); + #endif #endif /* _TRACE_KVM_H */ From 2d8185d4ee22f425001d28d1817fc8d478e6fa02 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 10 Aug 2012 12:31:12 +0200 Subject: [PATCH 0215/5831] KVM: PPC: BookE: No duplicate request != 0 check We only call kvmppc_check_requests() when vcpu->requests != 0, so drop the redundant check in the function itself Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 00bcc57428c7..683cbd686d01 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -457,16 +457,14 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) static void kvmppc_check_requests(struct kvm_vcpu *vcpu) { - if (vcpu->requests) { - trace_kvm_check_requests(vcpu); + trace_kvm_check_requests(vcpu); - if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) - update_timer_ints(vcpu); + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) + update_timer_ints(vcpu); #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) - if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) - kvmppc_core_flush_tlb(vcpu); + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) + kvmppc_core_flush_tlb(vcpu); #endif - } } /* From 03d25c5bd5c3125055bd36f4813ddb817def19dd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 10 Aug 2012 12:28:50 +0200 Subject: [PATCH 0216/5831] KVM: PPC: Use same kvmppc_prepare_to_enter code for booke and book3s_pr We need to do the same things when preparing to enter a guest for booke and book3s_pr cores. Fold the generic code into a generic function that both call. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 3 ++ arch/powerpc/kvm/book3s_pr.c | 22 ++++-------- arch/powerpc/kvm/booke.c | 58 +----------------------------- arch/powerpc/kvm/powerpc.c | 57 +++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 73 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 88de3146838b..59b7c87e47f7 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -112,6 +112,7 @@ extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong val); extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *val); +extern void kvmppc_core_check_requests(struct kvm_vcpu *vcpu); extern int kvmppc_booke_init(void); extern void kvmppc_booke_exit(void); @@ -150,6 +151,8 @@ extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, extern int kvmppc_bookehv_init(void); extern void kvmppc_bookehv_exit(void); +extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 7f0fe6f9e297..cae2defd1462 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -88,6 +88,10 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_VSX); } +void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) +{ +} + static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) { ulong smsr = vcpu->arch.shared->msr; @@ -815,19 +819,9 @@ program_interrupt: * again due to a host external interrupt. */ __hard_irq_disable(); - if (signal_pending(current)) { - __hard_irq_enable(); -#ifdef EXIT_DEBUG - printk(KERN_EMERG "KVM: Going back to host\n"); -#endif - vcpu->stat.signal_exits++; + if (kvmppc_prepare_to_enter(vcpu)) { run->exit_reason = KVM_EXIT_INTR; r = -EINTR; - } else { - /* In case an interrupt came in that was triggered - * from userspace (like DEC), we need to check what - * to inject now! */ - kvmppc_core_prepare_to_enter(vcpu); } } @@ -1029,8 +1023,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) goto out; } - kvmppc_core_prepare_to_enter(vcpu); - /* * Interrupts could be timers for the guest which we have to inject * again, so let's postpone them until we're in the guest and if we @@ -1038,9 +1030,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * a host external interrupt. */ __hard_irq_disable(); - - /* No need to go into the guest when all we do is going out */ - if (signal_pending(current)) { + if (kvmppc_prepare_to_enter(vcpu)) { __hard_irq_enable(); kvm_run->exit_reason = KVM_EXIT_INTR; ret = -EINTR; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 683cbd686d01..4652e0bfa781 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -455,10 +455,8 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) return r; } -static void kvmppc_check_requests(struct kvm_vcpu *vcpu) +void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { - trace_kvm_check_requests(vcpu); - if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) update_timer_ints(vcpu); #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) @@ -467,60 +465,6 @@ static void kvmppc_check_requests(struct kvm_vcpu *vcpu) #endif } -/* - * Common checks before entering the guest world. Call with interrupts - * disabled. - * - * returns !0 if a signal is pending and check_signal is true - */ -static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) -{ - int r = 0; - - WARN_ON_ONCE(!irqs_disabled()); - while (true) { - if (need_resched()) { - local_irq_enable(); - cond_resched(); - local_irq_disable(); - continue; - } - - if (signal_pending(current)) { - r = 1; - break; - } - - smp_mb(); - if (vcpu->requests) { - /* Make sure we process requests preemptable */ - local_irq_enable(); - kvmppc_check_requests(vcpu); - local_irq_disable(); - continue; - } - - if (kvmppc_core_prepare_to_enter(vcpu)) { - /* interrupts got enabled in between, so we - are back at square 1 */ - continue; - } - - if (vcpu->mode == EXITING_GUEST_MODE) { - r = 1; - break; - } - - /* Going into guest context! Yay! */ - vcpu->mode = IN_GUEST_MODE; - smp_wmb(); - - break; - } - - return r; -} - int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int ret; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 45fe433316ea..153a26abc915 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -47,6 +47,63 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return 1; } +#ifndef CONFIG_KVM_BOOK3S_64_HV +/* + * Common checks before entering the guest world. Call with interrupts + * disabled. + * + * returns !0 if a signal is pending and check_signal is true + */ +int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) +{ + int r = 0; + + WARN_ON_ONCE(!irqs_disabled()); + while (true) { + if (need_resched()) { + local_irq_enable(); + cond_resched(); + local_irq_disable(); + continue; + } + + if (signal_pending(current)) { + r = 1; + break; + } + + smp_mb(); + if (vcpu->requests) { + /* Make sure we process requests preemptable */ + local_irq_enable(); + trace_kvm_check_requests(vcpu); + kvmppc_core_check_requests(vcpu); + local_irq_disable(); + continue; + } + + if (kvmppc_core_prepare_to_enter(vcpu)) { + /* interrupts got enabled in between, so we + are back at square 1 */ + continue; + } + + if (vcpu->mode == EXITING_GUEST_MODE) { + r = 1; + break; + } + + /* Going into guest context! Yay! */ + vcpu->mode = IN_GUEST_MODE; + smp_wmb(); + + break; + } + + return r; +} +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) { int nr = kvmppc_get_gpr(vcpu, 11); From 9b0cb3c808fef0d75d6f79ab9684246e6879f9c1 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 10 Aug 2012 13:23:55 +0200 Subject: [PATCH 0217/5831] KVM: PPC: Book3s: PR: Add (dumb) MMU Notifier support Now that we have very simple MMU Notifier support for e500 in place, also add the same simple support to book3s. It gets us one step closer to actual fast support. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 3 +- arch/powerpc/kvm/Kconfig | 1 + arch/powerpc/kvm/book3s_32_mmu_host.c | 1 + arch/powerpc/kvm/book3s_64_mmu_host.c | 1 + arch/powerpc/kvm/book3s_mmu_hpte.c | 5 --- arch/powerpc/kvm/book3s_pr.c | 47 +++++++++++++++++++++++++++ 6 files changed, 51 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index cea9d3aab71c..4a5ec8f573c7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -46,8 +46,7 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif -#if defined(CONFIG_KVM_BOOK3S_64_HV) || defined(CONFIG_KVM_E500V2) || \ - defined(CONFIG_KVM_E500MC) +#if !defined(CONFIG_KVM_440) #include #define KVM_ARCH_WANT_MMU_NOTIFIER diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 40cad8c8bd0e..71f0cd9edf33 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -36,6 +36,7 @@ config KVM_BOOK3S_64_HANDLER config KVM_BOOK3S_PR bool select KVM_MMIO + select MMU_NOTIFIER config KVM_BOOK3S_32 tristate "KVM support for PowerPC book3s_32 processors" diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 837f13e7b6bf..9fac0101ffb9 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -254,6 +254,7 @@ next_pteg: kvmppc_mmu_hpte_cache_map(vcpu, pte); + kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); out: return r; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 0688b6b39585..6b2c80e49681 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -168,6 +168,7 @@ map_again: kvmppc_mmu_hpte_cache_map(vcpu, pte); } + kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); out: return r; diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index 41cb0017e757..2c86b0d63714 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -114,11 +114,6 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) hlist_del_init_rcu(&pte->list_vpte); hlist_del_init_rcu(&pte->list_vpte_long); - if (pte->pte.may_write) - kvm_release_pfn_dirty(pte->pfn); - else - kvm_release_pfn_clean(pte->pfn); - spin_unlock(&vcpu3s->mmu_lock); vcpu3s->hpte_cache_count--; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index cae2defd1462..10f8217b8c38 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -90,8 +90,55 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { + /* We misuse TLB_FLUSH to indicate that we want to clear + all shadow cache entries */ + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) + kvmppc_mmu_pte_flush(vcpu, 0, 0); } +/************* MMU Notifiers *************/ + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + trace_kvm_unmap_hva(hva); + + /* + * Flush all shadow tlb entries everywhere. This is slow, but + * we are 100% sure that we catch the to be unmapped page + */ + kvm_flush_remote_tlbs(kvm); + + return 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ + /* kvm_unmap_hva flushes everything anyways */ + kvm_unmap_hva(kvm, start); + + return 0; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + /* The page will get remapped properly on its next fault */ + kvm_unmap_hva(kvm, hva); +} + +/*****************************************/ + static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) { ulong smsr = vcpu->arch.shared->msr; From e85ad380c6bf6dcd4776d313c81d16a6293db136 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 11:13:25 +0200 Subject: [PATCH 0218/5831] KVM: PPC: BookE: Drop redundant vcpu->mode set We only need to set vcpu->mode to outside once. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 4652e0bfa781..492c343f598e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -528,8 +528,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif kvm_guest_exit(); - vcpu->mode = OUTSIDE_GUEST_MODE; - smp_wmb(); out: vcpu->mode = OUTSIDE_GUEST_MODE; From c63ddcb4540db95e5a4223cfa8cdbe6efbd5e386 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 11:27:49 +0200 Subject: [PATCH 0219/5831] KVM: PPC: Book3S: PR: Only do resched check once per exit Now that we use our generic exit helper, we can safely drop our previous kvm_resched that we used to trigger at the beginning of the exit handler function. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 10f8217b8c38..2c268a15b20f 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -602,7 +602,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, trace_kvm_exit(exit_nr, vcpu); preempt_enable(); - kvm_resched(vcpu); + switch (exit_nr) { case BOOK3S_INTERRUPT_INST_STORAGE: { From 706fb730cb4f9db2e3de33391475dd0616c2c935 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 11:29:09 +0200 Subject: [PATCH 0220/5831] KVM: PPC: Exit guest context while handling exit The x86 implementation of KVM accounts for host time while processing guest exits. Do the same for us. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 2 ++ arch/powerpc/kvm/booke.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 2c268a15b20f..b4ae11ec068f 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -601,6 +601,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, __hard_irq_enable(); trace_kvm_exit(exit_nr, vcpu); + kvm_guest_exit(); preempt_enable(); switch (exit_nr) { @@ -872,6 +873,7 @@ program_interrupt: } } + kvm_guest_enter(); trace_kvm_book3s_reenter(r, vcpu); return r; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 492c343f598e..887c7cc02146 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -650,6 +650,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_enable(); trace_kvm_exit(exit_nr, vcpu); + kvm_guest_exit(); run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; @@ -952,6 +953,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } } + kvm_guest_enter(); + return r; } From 0652eaaebea0995b3236e51dec727d62264f4248 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 11:34:21 +0200 Subject: [PATCH 0221/5831] KVM: PPC: Book3S: PR: Indicate we're out of guest mode When going out of guest mode, indicate that we are in vcpu->mode. That way requests from other CPUs don't needlessly need to kick us to process them, because it'll just happen next time we enter the guest. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index b4ae11ec068f..9430a362e5a3 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1152,6 +1152,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif out: + vcpu->mode = OUTSIDE_GUEST_MODE; preempt_enable(); return ret; } From 24afa37b9c8f035d2fe2028e4824bc4e49bafe73 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 12:42:30 +0200 Subject: [PATCH 0222/5831] KVM: PPC: Consistentify vcpu exit path When getting out of __vcpu_run, let's be consistent about the state we return in. We want to always * have IRQs enabled * have called kvm_guest_exit before Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 8 ++++++-- arch/powerpc/kvm/booke.c | 13 ++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 9430a362e5a3..3dec346c4b93 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -868,12 +868,15 @@ program_interrupt: */ __hard_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { + /* local_irq_enable(); */ run->exit_reason = KVM_EXIT_INTR; r = -EINTR; + } else { + /* Going back to guest */ + kvm_guest_enter(); } } - kvm_guest_enter(); trace_kvm_book3s_reenter(r, vcpu); return r; @@ -1123,7 +1126,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ret = __kvmppc_vcpu_run(kvm_run, vcpu); - kvm_guest_exit(); + /* No need for kvm_guest_exit. It's done in handle_exit. + We also get here with interrupts enabled. */ current->thread.regs->msr = ext_msr; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 887c7cc02146..aae535f6d9de 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -481,6 +481,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) local_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { + local_irq_enable(); kvm_run->exit_reason = KVM_EXIT_INTR; ret = -EINTR; goto out; @@ -512,6 +513,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ret = __kvmppc_vcpu_run(kvm_run, vcpu); + /* No need for kvm_guest_exit. It's done in handle_exit. + We also get here with interrupts enabled. */ + #ifdef CONFIG_PPC_FPU kvmppc_save_guest_fp(vcpu); @@ -527,12 +531,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) current->thread.fpexc_mode = fpexc_mode; #endif - kvm_guest_exit(); - out: vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - local_irq_enable(); return ret; } @@ -947,14 +948,16 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, if (!(r & RESUME_HOST)) { local_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { + local_irq_enable(); run->exit_reason = KVM_EXIT_INTR; r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); kvmppc_account_exit(vcpu, SIGNAL_EXITS); + } else { + /* Going back to guest */ + kvm_guest_enter(); } } - kvm_guest_enter(); - return r; } From bd2be6836ee493d41fe42367a2b129aa771185c1 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 01:04:19 +0200 Subject: [PATCH 0223/5831] KVM: PPC: Book3S: PR: Rework irq disabling Today, we disable preemption while inside guest context, because we need to expose to the world that we are not in a preemptible context. However, during that time we already have interrupts disabled, which would indicate that we are in a non-preemptible context. The reason the checks for irqs_disabled() fail for us though is that we manually control hard IRQs and ignore all the lazy EE framework. Let's stop doing that. Instead, let's always use lazy EE to indicate when we want to disable IRQs, but do a special final switch that gets us into EE disabled, but soft enabled state. That way when we get back out of guest state, we are immediately ready to process interrupts. This simplifies the code drastically and reduces the time that we appear as preempt disabled. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 10 ++++++++++ arch/powerpc/kvm/book3s_pr.c | 21 +++++++-------------- arch/powerpc/kvm/book3s_rmhandlers.S | 15 ++++++++------- arch/powerpc/kvm/booke.c | 2 ++ arch/powerpc/kvm/powerpc.c | 14 ++++++++++++++ 5 files changed, 41 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 59b7c87e47f7..545936428bf6 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -234,5 +234,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn) } } +/* Please call after prepare_to_enter. This function puts the lazy ee state + back to normal mode, without actually enabling interrupts. */ +static inline void kvmppc_lazy_ee_enable(void) +{ +#ifdef CONFIG_PPC64 + /* Only need to enable IRQs by hard enabling them after this */ + local_paca->irq_happened = 0; + local_paca->soft_enabled = 1; +#endif +} #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 3dec346c4b93..e737db8a5ca7 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -52,8 +52,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, #define MSR_USER32 MSR_USER #define MSR_USER64 MSR_USER #define HW_PAGE_SIZE PAGE_SIZE -#define __hard_irq_disable local_irq_disable -#define __hard_irq_enable local_irq_enable #endif void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -597,12 +595,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; - /* We get here with MSR.EE=0, so enable it to be a nice citizen */ - __hard_irq_enable(); + /* We get here with MSR.EE=1 */ trace_kvm_exit(exit_nr, vcpu); kvm_guest_exit(); - preempt_enable(); switch (exit_nr) { case BOOK3S_INTERRUPT_INST_STORAGE: @@ -854,7 +850,6 @@ program_interrupt: } } - preempt_disable(); if (!(r & RESUME_HOST)) { /* To avoid clobbering exit_reason, only check for signals if * we aren't already exiting to userspace for some other @@ -866,14 +861,15 @@ program_interrupt: * and if we really did time things so badly, then we just exit * again due to a host external interrupt. */ - __hard_irq_disable(); + local_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { - /* local_irq_enable(); */ + local_irq_enable(); run->exit_reason = KVM_EXIT_INTR; r = -EINTR; } else { /* Going back to guest */ kvm_guest_enter(); + kvmppc_lazy_ee_enable(); } } @@ -1066,8 +1062,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif ulong ext_msr; - preempt_disable(); - /* Check if we can run the vcpu at all */ if (!vcpu->arch.sane) { kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -1081,9 +1075,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * really did time things so badly, then we just exit again due to * a host external interrupt. */ - __hard_irq_disable(); + local_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { - __hard_irq_enable(); + local_irq_enable(); kvm_run->exit_reason = KVM_EXIT_INTR; ret = -EINTR; goto out; @@ -1122,7 +1116,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vcpu->arch.shared->msr & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); - kvm_guest_enter(); + kvmppc_lazy_ee_enable(); ret = __kvmppc_vcpu_run(kvm_run, vcpu); @@ -1157,7 +1151,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) out: vcpu->mode = OUTSIDE_GUEST_MODE; - preempt_enable(); return ret; } diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 9ecf6e35cd8d..b2f8258b545a 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -170,20 +170,21 @@ kvmppc_handler_skip_ins: * Call kvmppc_handler_trampoline_enter in real mode * * On entry, r4 contains the guest shadow MSR + * MSR.EE has to be 0 when calling this function */ _GLOBAL(kvmppc_entry_trampoline) mfmsr r5 LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) toreal(r7) - li r9, MSR_RI - ori r9, r9, MSR_EE - andc r9, r5, r9 /* Clear EE and RI in MSR value */ li r6, MSR_IR | MSR_DR - ori r6, r6, MSR_EE - andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */ - MTMSR_EERI(r9) /* Clear EE and RI in MSR */ - mtsrr0 r7 /* before we set srr0/1 */ + andc r6, r5, r6 /* Clear DR and IR in MSR value */ + /* + * Set EE in HOST_MSR so that it's enabled when we get into our + * C exit handler function + */ + ori r5, r5, MSR_EE + mtsrr0 r7 mtsrr1 r6 RFI diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index aae535f6d9de..2bd190c488ef 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -486,6 +486,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ret = -EINTR; goto out; } + kvmppc_lazy_ee_enable(); kvm_guest_enter(); @@ -955,6 +956,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } else { /* Going back to guest */ kvm_guest_enter(); + kvmppc_lazy_ee_enable(); } } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 153a26abc915..266549979e9f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "timing.h" #include "../mm/mmu_decl.h" @@ -93,6 +94,19 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) break; } +#ifdef CONFIG_PPC64 + /* lazy EE magic */ + hard_irq_disable(); + if (lazy_irq_pending()) { + /* Got an interrupt in between, try again */ + local_irq_enable(); + local_irq_disable(); + continue; + } + + trace_hardirqs_on(); +#endif + /* Going into guest context! Yay! */ vcpu->mode = IN_GUEST_MODE; smp_wmb(); From 3766a4c693358cff33441310413e3776dbbf8ef0 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 01:24:01 +0200 Subject: [PATCH 0224/5831] KVM: PPC: Move kvm_guest_enter call into generic code We need to call kvm_guest_enter in booke and book3s, so move its call to generic code. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 2 -- arch/powerpc/kvm/booke.c | 2 -- arch/powerpc/kvm/powerpc.c | 3 +++ 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e737db8a5ca7..1ff0d6ccc589 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -867,8 +867,6 @@ program_interrupt: run->exit_reason = KVM_EXIT_INTR; r = -EINTR; } else { - /* Going back to guest */ - kvm_guest_enter(); kvmppc_lazy_ee_enable(); } } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 2bd190c488ef..5e8dc1909130 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -954,8 +954,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); kvmppc_account_exit(vcpu, SIGNAL_EXITS); } else { - /* Going back to guest */ - kvm_guest_enter(); kvmppc_lazy_ee_enable(); } } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 266549979e9f..6646574bf930 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -101,12 +101,15 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) /* Got an interrupt in between, try again */ local_irq_enable(); local_irq_disable(); + kvm_guest_exit(); continue; } trace_hardirqs_on(); #endif + kvm_guest_enter(); + /* Going into guest context! Yay! */ vcpu->mode = IN_GUEST_MODE; smp_wmb(); From 206c2ed7f1ea55222bde2954ee3d65c2e9cfb750 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 12:43:33 +0200 Subject: [PATCH 0225/5831] KVM: PPC: Ignore EXITING_GUEST_MODE mode We don't need to do anything when mode is EXITING_GUEST_MODE, because we essentially are outside of guest mode and did everything it asked us to do by the time we check it. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/powerpc.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6646574bf930..dc86371b9953 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -89,11 +89,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) continue; } - if (vcpu->mode == EXITING_GUEST_MODE) { - r = 1; - break; - } - #ifdef CONFIG_PPC64 /* lazy EE magic */ hard_irq_disable(); From 7ee788556bf395a8ef413bea33494df29a3409e0 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 12:44:41 +0200 Subject: [PATCH 0226/5831] KVM: PPC: Add return value in prepare_to_enter Our prepare_to_enter helper wants to be able to return in more circumstances to the host than only when an interrupt is pending. Broaden the interface a bit and move even more generic code to the generic helper. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 12 ++++++------ arch/powerpc/kvm/booke.c | 16 ++++++++-------- arch/powerpc/kvm/powerpc.c | 11 ++++++++--- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 1ff0d6ccc589..71fa0f1873b3 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -589,6 +589,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int exit_nr) { int r = RESUME_HOST; + int s; vcpu->stat.sum_exits++; @@ -862,10 +863,10 @@ program_interrupt: * again due to a host external interrupt. */ local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu)) { + s = kvmppc_prepare_to_enter(vcpu); + if (s <= 0) { local_irq_enable(); - run->exit_reason = KVM_EXIT_INTR; - r = -EINTR; + r = s; } else { kvmppc_lazy_ee_enable(); } @@ -1074,10 +1075,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * a host external interrupt. */ local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu)) { + ret = kvmppc_prepare_to_enter(vcpu); + if (ret <= 0) { local_irq_enable(); - kvm_run->exit_reason = KVM_EXIT_INTR; - ret = -EINTR; goto out; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 5e8dc1909130..1917802463f5 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -467,7 +467,7 @@ void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { - int ret; + int ret, s; #ifdef CONFIG_PPC_FPU unsigned int fpscr; int fpexc_mode; @@ -480,10 +480,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu)) { + s = kvmppc_prepare_to_enter(vcpu); + if (s <= 0) { local_irq_enable(); - kvm_run->exit_reason = KVM_EXIT_INTR; - ret = -EINTR; + ret = s; goto out; } kvmppc_lazy_ee_enable(); @@ -642,6 +642,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int exit_nr) { int r = RESUME_HOST; + int s; /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); @@ -948,11 +949,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, */ if (!(r & RESUME_HOST)) { local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu)) { + s = kvmppc_prepare_to_enter(vcpu); + if (s <= 0) { local_irq_enable(); - run->exit_reason = KVM_EXIT_INTR; - r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); - kvmppc_account_exit(vcpu, SIGNAL_EXITS); + r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); } else { kvmppc_lazy_ee_enable(); } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index dc86371b9953..0e2a98ab6a77 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -53,11 +53,14 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) * Common checks before entering the guest world. Call with interrupts * disabled. * - * returns !0 if a signal is pending and check_signal is true + * returns: + * + * == 1 if we're ready to go into guest state + * <= 0 if we need to go back to the host with return value */ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) { - int r = 0; + int r = 1; WARN_ON_ONCE(!irqs_disabled()); while (true) { @@ -69,7 +72,9 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) } if (signal_pending(current)) { - r = 1; + kvmppc_account_exit(vcpu, SIGNAL_EXITS); + vcpu->run->exit_reason = KVM_EXIT_INTR; + r = -EINTR; break; } From 7c973a2ebb8fb9c8ee2ae9647f9ad7b0ad58a3e6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 12:50:35 +0200 Subject: [PATCH 0227/5831] KVM: PPC: Add return value to core_check_requests Requests may want to tell us that we need to go back into host state, so add a return value for the checks. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kvm/book3s_pr.c | 6 +++++- arch/powerpc/kvm/booke.c | 6 +++++- arch/powerpc/kvm/powerpc.c | 6 ++++-- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 545936428bf6..3dfc437fb9d9 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -112,7 +112,7 @@ extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong val); extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *val); -extern void kvmppc_core_check_requests(struct kvm_vcpu *vcpu); +extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu); extern int kvmppc_booke_init(void); extern void kvmppc_booke_exit(void); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 71fa0f1873b3..b3c584f94cb3 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -86,12 +86,16 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_VSX); } -void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) +int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { + int r = 1; /* Indicate we want to get back into the guest */ + /* We misuse TLB_FLUSH to indicate that we want to clear all shadow cache entries */ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) kvmppc_mmu_pte_flush(vcpu, 0, 0); + + return r; } /************* MMU Notifiers *************/ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1917802463f5..c36493087dbf 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -455,14 +455,18 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) return r; } -void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) +int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { + int r = 1; /* Indicate we want to get back into the guest */ + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) update_timer_ints(vcpu); #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) kvmppc_core_flush_tlb(vcpu); #endif + + return r; } int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0e2a98ab6a77..54b12af577d0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -83,9 +83,11 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) /* Make sure we process requests preemptable */ local_irq_enable(); trace_kvm_check_requests(vcpu); - kvmppc_core_check_requests(vcpu); + r = kvmppc_core_check_requests(vcpu); local_irq_disable(); - continue; + if (r > 0) + continue; + break; } if (kvmppc_core_prepare_to_enter(vcpu)) { From f61c94bb99ca4253ac5dd57750e1af209a4beb7a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 8 Aug 2012 20:38:19 +0000 Subject: [PATCH 0228/5831] KVM: PPC: booke: Add watchdog emulation This patch adds the watchdog emulation in KVM. The watchdog emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl. The kernel timer are used for watchdog emulation and emulates h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how it is configured. Signed-off-by: Liu Yu Signed-off-by: Scott Wood [bharat.bhushan@freescale.com: reworked patch] Signed-off-by: Bharat Bhushan [agraf: adjust to new request framework] Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 3 + arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/include/asm/reg_booke.h | 7 ++ arch/powerpc/kvm/book3s.c | 9 ++ arch/powerpc/kvm/booke.c | 155 +++++++++++++++++++++++++++ arch/powerpc/kvm/booke_emulate.c | 8 ++ arch/powerpc/kvm/powerpc.c | 14 ++- include/linux/kvm.h | 2 + include/linux/kvm_host.h | 1 + 9 files changed, 199 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 4a5ec8f573c7..51b0ccd56769 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -471,6 +471,8 @@ struct kvm_vcpu_arch { ulong fault_esr; ulong queued_dear; ulong queued_esr; + spinlock_t wdt_lock; + struct timer_list wdt_timer; u32 tlbcfg[4]; u32 mmucfg; u32 epr; @@ -486,6 +488,7 @@ struct kvm_vcpu_arch { u8 osi_needed; u8 osi_enabled; u8 papr_enabled; + u8 watchdog_enabled; u8 sane; u8 cpu_type; u8 hcall_needed; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 3dfc437fb9d9..c06a64b53362 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -68,6 +68,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); extern void kvmppc_decrementer_func(unsigned long data); extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); +extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu); +extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu); /* Core-specific hooks */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 2d916c4982c5..e07e6af5e1ff 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -539,6 +539,13 @@ #define TCR_FIE 0x00800000 /* FIT Interrupt Enable */ #define TCR_ARE 0x00400000 /* Auto Reload Enable */ +#ifdef CONFIG_E500 +#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \ + (((tcr) & 0x1E0000) >> 15)) +#else +#define TCR_GET_WP(tcr) (((tcr) & 0xC0000000) >> 30) +#endif + /* Bit definitions for the TSR. */ #define TSR_ENW 0x80000000 /* Enable Next Watchdog */ #define TSR_WIS 0x40000000 /* WDT Interrupt Status */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 3f2a8360c857..e94666566fa9 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return 0; } +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ +} + int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index c36493087dbf..09e8bf33a8c9 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -209,6 +209,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); } +static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG); +} + +static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu) +{ + clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions); +} + static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) { #ifdef CONFIG_KVM_BOOKE_HV @@ -328,6 +338,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, msr_mask = MSR_CE | MSR_ME | MSR_DE; int_class = INT_CLASS_NONCRIT; break; + case BOOKE_IRQPRIO_WATCHDOG: case BOOKE_IRQPRIO_CRITICAL: case BOOKE_IRQPRIO_DBELL_CRIT: allowed = vcpu->arch.shared->msr & MSR_CE; @@ -407,12 +418,121 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, return allowed; } +/* + * Return the number of jiffies until the next timeout. If the timeout is + * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA + * because the larger value can break the timer APIs. + */ +static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu) +{ + u64 tb, wdt_tb, wdt_ticks = 0; + u64 nr_jiffies = 0; + u32 period = TCR_GET_WP(vcpu->arch.tcr); + + wdt_tb = 1ULL << (63 - period); + tb = get_tb(); + /* + * The watchdog timeout will hapeen when TB bit corresponding + * to watchdog will toggle from 0 to 1. + */ + if (tb & wdt_tb) + wdt_ticks = wdt_tb; + + wdt_ticks += wdt_tb - (tb & (wdt_tb - 1)); + + /* Convert timebase ticks to jiffies */ + nr_jiffies = wdt_ticks; + + if (do_div(nr_jiffies, tb_ticks_per_jiffy)) + nr_jiffies++; + + return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA); +} + +static void arm_next_watchdog(struct kvm_vcpu *vcpu) +{ + unsigned long nr_jiffies; + unsigned long flags; + + /* + * If TSR_ENW and TSR_WIS are not set then no need to exit to + * userspace, so clear the KVM_REQ_WATCHDOG request. + */ + if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS)) + clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests); + + spin_lock_irqsave(&vcpu->arch.wdt_lock, flags); + nr_jiffies = watchdog_next_timeout(vcpu); + /* + * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA + * then do not run the watchdog timer as this can break timer APIs. + */ + if (nr_jiffies < NEXT_TIMER_MAX_DELTA) + mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies); + else + del_timer(&vcpu->arch.wdt_timer); + spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags); +} + +void kvmppc_watchdog_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + u32 tsr, new_tsr; + int final; + + do { + new_tsr = tsr = vcpu->arch.tsr; + final = 0; + + /* Time out event */ + if (tsr & TSR_ENW) { + if (tsr & TSR_WIS) + final = 1; + else + new_tsr = tsr | TSR_WIS; + } else { + new_tsr = tsr | TSR_ENW; + } + } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr); + + if (new_tsr & TSR_WIS) { + smp_wmb(); + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_vcpu_kick(vcpu); + } + + /* + * If this is final watchdog expiry and some action is required + * then exit to userspace. + */ + if (final && (vcpu->arch.tcr & TCR_WRC_MASK) && + vcpu->arch.watchdog_enabled) { + smp_wmb(); + kvm_make_request(KVM_REQ_WATCHDOG, vcpu); + kvm_vcpu_kick(vcpu); + } + + /* + * Stop running the watchdog timer after final expiration to + * prevent the host from being flooded with timers if the + * guest sets a short period. + * Timers will resume when TSR/TCR is updated next time. + */ + if (!final) + arm_next_watchdog(vcpu); +} + static void update_timer_ints(struct kvm_vcpu *vcpu) { if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) kvmppc_core_queue_dec(vcpu); else kvmppc_core_dequeue_dec(vcpu); + + if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS)) + kvmppc_core_queue_watchdog(vcpu); + else + kvmppc_core_dequeue_watchdog(vcpu); } static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) @@ -466,6 +586,11 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) kvmppc_core_flush_tlb(vcpu); #endif + if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_WATCHDOG; + r = 0; + } + return r; } @@ -995,6 +1120,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return r; } +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) +{ + /* setup watchdog timer once */ + spin_lock_init(&vcpu->arch.wdt_lock); + setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, + (unsigned long)vcpu); + + return 0; +} + +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + del_timer_sync(&vcpu->arch.wdt_timer); +} + int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; @@ -1090,7 +1230,13 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, } if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { + u32 old_tsr = vcpu->arch.tsr; + vcpu->arch.tsr = sregs->u.e.tsr; + + if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) + arm_next_watchdog(vcpu); + update_timer_ints(vcpu); } @@ -1251,6 +1397,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) { vcpu->arch.tcr = new_tcr; + arm_next_watchdog(vcpu); update_timer_ints(vcpu); } @@ -1265,6 +1412,14 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) { clear_bits(tsr_bits, &vcpu->arch.tsr); + + /* + * We may have stopped the watchdog due to + * being stuck on final expiration. + */ + if (tsr_bits & (TSR_ENW | TSR_WIS)) + arm_next_watchdog(vcpu); + update_timer_ints(vcpu); } diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 12834bb608ab..5a66ade7fd17 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) kvmppc_clr_tsr_bits(vcpu, spr_val); break; case SPRN_TCR: + /* + * WRC is a 2-bit field that is supposed to preserve its + * value once written to non-zero. + */ + if (vcpu->arch.tcr & TCR_WRC_MASK) { + spr_val &= ~TCR_WRC_MASK; + spr_val |= vcpu->arch.tcr & TCR_WRC_MASK; + } kvmppc_set_tcr(vcpu, spr_val); break; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 54b12af577d0..0ffd7d17adc7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -300,6 +300,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_SREGS: + case KVM_CAP_PPC_BOOKE_WATCHDOG: #else case KVM_CAP_PPC_SEGSTATE: case KVM_CAP_PPC_HIOR: @@ -476,6 +477,8 @@ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { + int ret; + hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; @@ -484,13 +487,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) #ifdef CONFIG_KVM_EXIT_TIMING mutex_init(&vcpu->arch.exit_timing_lock); #endif - - return 0; + ret = kvmppc_subarch_vcpu_init(vcpu); + return ret; } void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { kvmppc_mmu_destroy(vcpu); + kvmppc_subarch_vcpu_uninit(vcpu); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -735,6 +739,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; +#ifdef CONFIG_BOOKE + case KVM_CAP_PPC_BOOKE_WATCHDOG: + r = 0; + vcpu->arch.watchdog_enabled = true; + break; +#endif #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: { struct kvm_config_tlb cfg; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 4cb3761bebae..1649d4b57e1f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -167,6 +167,7 @@ struct kvm_pit_config { #define KVM_EXIT_OSI 18 #define KVM_EXIT_PAPR_HCALL 19 #define KVM_EXIT_S390_UCONTROL 20 +#define KVM_EXIT_WATCHDOG 21 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -628,6 +629,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_READONLY_MEM 81 #endif #define KVM_CAP_IRQFD_RESAMPLE 82 +#define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2850656e2e96..0ca3663206f8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -118,6 +118,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_IMMEDIATE_EXIT 15 #define KVM_REQ_PMU 16 #define KVM_REQ_PMI 17 +#define KVM_REQ_WATCHDOG 18 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 From 6df8d3fc58dde84fc82a9ec2581440e54dfd3d14 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 8 Aug 2012 21:17:55 +0000 Subject: [PATCH 0229/5831] booke: Added ONE_REG interface for IAC/DAC debug registers IAC/DAC are defined as 32 bit while they are 64 bit wide. So ONE_REG interface is added to set/get them. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm.h | 12 ++++++++ arch/powerpc/include/asm/kvm_host.h | 24 +++++++++++++-- arch/powerpc/kvm/booke.c | 48 +++++++++++++++++++++++++++-- arch/powerpc/kvm/booke_emulate.c | 8 ++--- 4 files changed, 84 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 1bea4d8ea6f4..3c14202a3c84 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -221,6 +221,12 @@ struct kvm_sregs { __u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */ __u32 dbcr[3]; + /* + * iac/dac registers are 64bit wide, while this API + * interface provides only lower 32 bits on 64 bit + * processors. ONE_REG interface is added for 64bit + * iac/dac registers. + */ __u32 iac[4]; __u32 dac[2]; __u32 dvc[2]; @@ -326,5 +332,11 @@ struct kvm_book3e_206_tlb_params { }; #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) +#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) +#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) +#define KVM_REG_PPC_IAC3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x4) +#define KVM_REG_PPC_IAC4 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5) +#define KVM_REG_PPC_DAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6) +#define KVM_REG_PPC_DAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 51b0ccd56769..f20a5ef1c7e8 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -346,6 +346,27 @@ struct kvmppc_slb { bool class : 1; }; +# ifdef CONFIG_PPC_FSL_BOOK3E +#define KVMPPC_BOOKE_IAC_NUM 2 +#define KVMPPC_BOOKE_DAC_NUM 2 +# else +#define KVMPPC_BOOKE_IAC_NUM 4 +#define KVMPPC_BOOKE_DAC_NUM 2 +# endif +#define KVMPPC_BOOKE_MAX_IAC 4 +#define KVMPPC_BOOKE_MAX_DAC 2 + +struct kvmppc_booke_debug_reg { + u32 dbcr0; + u32 dbcr1; + u32 dbcr2; +#ifdef CONFIG_KVM_E500MC + u32 dbcr4; +#endif + u64 iac[KVMPPC_BOOKE_MAX_IAC]; + u64 dac[KVMPPC_BOOKE_MAX_DAC]; +}; + struct kvm_vcpu_arch { ulong host_stack; u32 host_pid; @@ -440,8 +461,6 @@ struct kvm_vcpu_arch { u32 ccr0; u32 ccr1; - u32 dbcr0; - u32 dbcr1; u32 dbsr; u64 mmcr[3]; @@ -476,6 +495,7 @@ struct kvm_vcpu_arch { u32 tlbcfg[4]; u32 mmucfg; u32 epr; + struct kvmppc_booke_debug_reg dbg_reg; #endif gpa_t paddr_accessed; gva_t vaddr_accessed; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 09e8bf33a8c9..959aae96469c 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1351,12 +1351,56 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { - return -EINVAL; + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_IAC1: + case KVM_REG_PPC_IAC2: + case KVM_REG_PPC_IAC3: + case KVM_REG_PPC_IAC4: { + int iac = reg->id - KVM_REG_PPC_IAC1; + r = copy_to_user((u64 __user *)(long)reg->addr, + &vcpu->arch.dbg_reg.iac[iac], sizeof(u64)); + break; + } + case KVM_REG_PPC_DAC1: + case KVM_REG_PPC_DAC2: { + int dac = reg->id - KVM_REG_PPC_DAC1; + r = copy_to_user((u64 __user *)(long)reg->addr, + &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); + break; + } + default: + break; + } + return r; } int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { - return -EINVAL; + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_IAC1: + case KVM_REG_PPC_IAC2: + case KVM_REG_PPC_IAC3: + case KVM_REG_PPC_IAC4: { + int iac = reg->id - KVM_REG_PPC_IAC1; + r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac], + (u64 __user *)(long)reg->addr, sizeof(u64)); + break; + } + case KVM_REG_PPC_DAC1: + case KVM_REG_PPC_DAC2: { + int dac = reg->id - KVM_REG_PPC_DAC1; + r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac], + (u64 __user *)(long)reg->addr, sizeof(u64)); + break; + } + default: + break; + } + return r; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 5a66ade7fd17..cc99a0b3d202 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -133,10 +133,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) vcpu->arch.csrr1 = spr_val; break; case SPRN_DBCR0: - vcpu->arch.dbcr0 = spr_val; + vcpu->arch.dbg_reg.dbcr0 = spr_val; break; case SPRN_DBCR1: - vcpu->arch.dbcr1 = spr_val; + vcpu->arch.dbg_reg.dbcr1 = spr_val; break; case SPRN_DBSR: vcpu->arch.dbsr &= ~spr_val; @@ -266,10 +266,10 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) *spr_val = vcpu->arch.csrr1; break; case SPRN_DBCR0: - *spr_val = vcpu->arch.dbcr0; + *spr_val = vcpu->arch.dbg_reg.dbcr0; break; case SPRN_DBCR1: - *spr_val = vcpu->arch.dbcr1; + *spr_val = vcpu->arch.dbg_reg.dbcr1; break; case SPRN_DBSR: *spr_val = vcpu->arch.dbsr; From 491dd5b8a4926393308172da80c73faf242a4057 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 14:40:29 +0200 Subject: [PATCH 0230/5831] KVM: PPC: 44x: Initialize PVR We need to make sure that vcpu->arch.pvr is initialized to a sane value, so let's just take the host PVR. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/44x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 50e7dbc7356c..3d7fd21c65f9 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -83,6 +83,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) vcpu_44x->shadow_refs[i].gtlb_index = -1; vcpu->arch.cpu_type = KVM_CPU_440; + vcpu->arch.pvr = mfspr(SPRN_PVR); return 0; } From 50c871edf59b4585fd2c17acfe4e7cd3752418b7 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 14:50:54 +0200 Subject: [PATCH 0231/5831] KVM: PPC: BookE: Add MCSR SPR support Add support for the MCSR SPR. This only implements the SPR storage bits, not actual machine checks. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke_emulate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index cc99a0b3d202..514790f41aba 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -237,6 +237,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_IVOR15: vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val; break; + case SPRN_MCSR: + vcpu->arch.mcsr &= ~spr_val; + break; default: emulated = EMULATE_FAIL; @@ -329,6 +332,9 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_IVOR15: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; break; + case SPRN_MCSR: + *spr_val = vcpu->arch.mcsr; + break; default: emulated = EMULATE_FAIL; From 166a2b7000c388aee81168987ce2eddb6783f550 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 15 Aug 2012 01:38:43 +0200 Subject: [PATCH 0232/5831] KVM: PPC: Use symbols for exit trace Exit traces are a lot easier to read when you don't have to remember cryptic numbers for guest exit reasons. Symbolify them in our trace output. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/trace.h | 58 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index cb2780a42fd8..519aba8bb3d3 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -31,6 +31,60 @@ TRACE_EVENT(kvm_ppc_instr, __entry->inst, __entry->pc, __entry->emulate) ); +#ifdef CONFIG_PPC_BOOK3S +#define kvm_trace_symbol_exit \ + {0x100, "SYSTEM_RESET"}, \ + {0x200, "MACHINE_CHECK"}, \ + {0x300, "DATA_STORAGE"}, \ + {0x380, "DATA_SEGMENT"}, \ + {0x400, "INST_STORAGE"}, \ + {0x480, "INST_SEGMENT"}, \ + {0x500, "EXTERNAL"}, \ + {0x501, "EXTERNAL_LEVEL"}, \ + {0x502, "EXTERNAL_HV"}, \ + {0x600, "ALIGNMENT"}, \ + {0x700, "PROGRAM"}, \ + {0x800, "FP_UNAVAIL"}, \ + {0x900, "DECREMENTER"}, \ + {0x980, "HV_DECREMENTER"}, \ + {0xc00, "SYSCALL"}, \ + {0xd00, "TRACE"}, \ + {0xe00, "H_DATA_STORAGE"}, \ + {0xe20, "H_INST_STORAGE"}, \ + {0xe40, "H_EMUL_ASSIST"}, \ + {0xf00, "PERFMON"}, \ + {0xf20, "ALTIVEC"}, \ + {0xf40, "VSX"} +#else +#define kvm_trace_symbol_exit \ + {0, "CRITICAL"}, \ + {1, "MACHINE_CHECK"}, \ + {2, "DATA_STORAGE"}, \ + {3, "INST_STORAGE"}, \ + {4, "EXTERNAL"}, \ + {5, "ALIGNMENT"}, \ + {6, "PROGRAM"}, \ + {7, "FP_UNAVAIL"}, \ + {8, "SYSCALL"}, \ + {9, "AP_UNAVAIL"}, \ + {10, "DECREMENTER"}, \ + {11, "FIT"}, \ + {12, "WATCHDOG"}, \ + {13, "DTLB_MISS"}, \ + {14, "ITLB_MISS"}, \ + {15, "DEBUG"}, \ + {32, "SPE_UNAVAIL"}, \ + {33, "SPE_FP_DATA"}, \ + {34, "SPE_FP_ROUND"}, \ + {35, "PERFORMANCE_MONITOR"}, \ + {36, "DOORBELL"}, \ + {37, "DOORBELL_CRITICAL"}, \ + {38, "GUEST_DBELL"}, \ + {39, "GUEST_DBELL_CRIT"}, \ + {40, "HV_SYSCALL"}, \ + {41, "HV_PRIV"} +#endif + TRACE_EVENT(kvm_exit, TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), TP_ARGS(exit_nr, vcpu), @@ -62,7 +116,7 @@ TRACE_EVENT(kvm_exit, __entry->last_inst = vcpu->arch.last_inst; ), - TP_printk("exit=0x%x" + TP_printk("exit=%s" " | pc=0x%lx" " | msr=0x%lx" " | dar=0x%lx" @@ -71,7 +125,7 @@ TRACE_EVENT(kvm_exit, #endif " | last_inst=0x%lx" , - __entry->exit_nr, + __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), __entry->pc, __entry->msr, __entry->dar, From 430c7ff52ffb902e1e08b255b93c28fcad8cb9ef Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 15 Aug 2012 11:42:07 +0200 Subject: [PATCH 0233/5831] KVM: PPC: E500: Remove E500_TLB_DIRTY flag Since we always mark pages as dirty immediately when mapping them read/write now, there's no need for the dirty flag in our cache. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500.h | 3 +-- arch/powerpc/kvm/e500_tlb.c | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index aa8b81428bf4..d1622864549e 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -27,8 +27,7 @@ #define E500_TLB_NUM 2 #define E500_TLB_VALID 1 -#define E500_TLB_DIRTY 2 -#define E500_TLB_BITMAP 4 +#define E500_TLB_BITMAP 2 struct tlbe_ref { pfn_t pfn; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 1af6fab58995..43489a8fa985 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -303,10 +303,8 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, ref->pfn = pfn; ref->flags = E500_TLB_VALID; - if (tlbe_is_writable(gtlbe)) { - ref->flags |= E500_TLB_DIRTY; + if (tlbe_is_writable(gtlbe)) kvm_set_pfn_dirty(pfn); - } } static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) From 2e2327023fe090d68aacad395178298645eaffea Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 15 Aug 2012 17:37:13 +0000 Subject: [PATCH 0234/5831] Document IACx/DACx registers access using ONE_REG API Patch to access the debug registers (IACx/DACx) using ONE_REG api was sent earlier. But that missed the respective documentation. Also corrected the index number referencing in section 4.69 Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 170afb1fbc2e..c84739e2c0f3 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1734,7 +1734,12 @@ registers, find a list below: Arch | Register | Width (bits) | | PPC | KVM_REG_PPC_HIOR | 64 - + PPC | KVM_REG_PPC_IAC1 | 64 + PPC | KVM_REG_PPC_IAC2 | 64 + PPC | KVM_REG_PPC_IAC3 | 64 + PPC | KVM_REG_PPC_IAC4 | 64 + PPC | KVM_REG_PPC_DAC1 | 64 + PPC | KVM_REG_PPC_DAC2 | 64 4.69 KVM_GET_ONE_REG @@ -1750,7 +1755,7 @@ kvm_one_reg struct passed in. On success, the register value can be found at the memory location pointed to by "addr". The list of registers accessible using this interface is identical to the -list in 4.64. +list in 4.68. 4.70 KVM_KVMCLOCK_CTRL From e4dcfe88fb30bcedda80c151018086fffb8280e6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Aug 2012 00:28:09 +0200 Subject: [PATCH 0235/5831] KVM: PPC: 440: Implement mtdcrx We need mtdcrx to execute properly on 460 cores. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/44x_emulate.c | 36 ++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index c8c61578fdfc..3843a75e3e98 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -28,11 +28,29 @@ #include "44x_tlb.h" #define XOP_MFDCR 323 +#define XOP_MTDCRX 387 #define XOP_MTDCR 451 #define XOP_TLBSX 914 #define XOP_ICCCI 966 #define XOP_TLBWE 978 +static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn) +{ + /* emulate some access in kernel */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs); + return EMULATE_DONE; + default: + vcpu->run->dcr.dcrn = dcrn; + vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs); + vcpu->run->dcr.is_write = 1; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + return EMULATE_DO_DCR; + } +} + int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -85,20 +103,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case XOP_MTDCR: - /* emulate some access in kernel */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs); - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = kvmppc_get_gpr(vcpu, rs); - run->dcr.is_write = 1; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - emulated = EMULATE_DO_DCR; - } + emulated = emulate_mtdcr(vcpu, rs, dcrn); + break; + case XOP_MTDCRX: + emulated = emulate_mtdcr(vcpu, rs, + kvmppc_get_gpr(vcpu, ra)); break; case XOP_TLBWE: From ceb985f9d18cba2efdef08b8d31751c2c2b20d77 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Aug 2012 00:34:58 +0200 Subject: [PATCH 0236/5831] KVM: PPC: 440: Implement mfdcrx We need mfdcrx to execute properly on 460 cores. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/44x_emulate.c | 74 ++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 3843a75e3e98..1a793c4c4a67 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -27,6 +27,7 @@ #include "booke.h" #include "44x_tlb.h" +#define XOP_MFDCRX 259 #define XOP_MFDCR 323 #define XOP_MTDCRX 387 #define XOP_MTDCR 451 @@ -51,6 +52,43 @@ static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn) } } +static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn) +{ + /* The guest may access CPR0 registers to determine the timebase + * frequency, and it must know the real host frequency because it + * can directly access the timebase registers. + * + * It would be possible to emulate those accesses in userspace, + * but userspace can really only figure out the end frequency. + * We could decompose that into the factors that compute it, but + * that's tricky math, and it's easier to just report the real + * CPR0 values. + */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr); + break; + case DCRN_CPR0_CONFIG_DATA: + local_irq_disable(); + mtdcr(DCRN_CPR0_CONFIG_ADDR, + vcpu->arch.cpr0_cfgaddr); + kvmppc_set_gpr(vcpu, rt, + mfdcr(DCRN_CPR0_CONFIG_DATA)); + local_irq_enable(); + break; + default: + vcpu->run->dcr.dcrn = dcrn; + vcpu->run->dcr.data = 0; + vcpu->run->dcr.is_write = 0; + vcpu->arch.io_gpr = rt; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + return EMULATE_DO_DCR; + } + + return EMULATE_DONE; +} + int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -68,38 +106,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (get_xop(inst)) { case XOP_MFDCR: - /* The guest may access CPR0 registers to determine the timebase - * frequency, and it must know the real host frequency because it - * can directly access the timebase registers. - * - * It would be possible to emulate those accesses in userspace, - * but userspace can really only figure out the end frequency. - * We could decompose that into the factors that compute it, but - * that's tricky math, and it's easier to just report the real - * CPR0 values. - */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr); - break; - case DCRN_CPR0_CONFIG_DATA: - local_irq_disable(); - mtdcr(DCRN_CPR0_CONFIG_ADDR, - vcpu->arch.cpr0_cfgaddr); - kvmppc_set_gpr(vcpu, rt, - mfdcr(DCRN_CPR0_CONFIG_DATA)); - local_irq_enable(); - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = 0; - run->dcr.is_write = 0; - vcpu->arch.io_gpr = rt; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - emulated = EMULATE_DO_DCR; - } + emulated = emulate_mfdcr(vcpu, rt, dcrn); + break; + case XOP_MFDCRX: + emulated = emulate_mfdcr(vcpu, rt, + kvmppc_get_gpr(vcpu, ra)); break; case XOP_MTDCR: From 7a08c2740f07fb8c3769d1f137721835ead7652f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Aug 2012 13:10:16 +0200 Subject: [PATCH 0237/5831] KVM: PPC: BookE: Support FPU on non-hv systems When running on HV aware hosts, we can not trap when the guest sets the FP bit, so we just let it do so when it wants to, because it has full access to MSR. For non-HV aware hosts with an FPU (like 440), we need to also adjust the shadow MSR though. Otherwise the guest gets an FP unavailable trap even when it really enabled the FP bit in MSR. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 959aae96469c..5f0476a602d8 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -122,6 +122,16 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu) } #endif +static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) +{ +#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV) + /* We always treat the FP bit as enabled from the host + perspective, so only need to adjust the shadow MSR */ + vcpu->arch.shadow_msr &= ~MSR_FP; + vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_FP; +#endif +} + /* * Helper function for "full" MSR writes. No need to call this if only * EE/CE/ME/DE/RI are changing. @@ -138,6 +148,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) kvmppc_mmu_msr_notify(vcpu, old_msr); kvmppc_vcpu_sync_spe(vcpu); + kvmppc_vcpu_sync_fpu(vcpu); } static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, From d61966fc08b84857b697ebae4489c652dd87e48a Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 12 Sep 2012 03:18:14 +0000 Subject: [PATCH 0238/5831] KVM: PPC: bookehv: Allow duplicate calls of DO_KVM macro The current form of DO_KVM macro restricts its use to one call per input parameter set. This is caused by kvmppc_resume_\intno\()_\srr1 symbol definition. Duplicate calls of DO_KVM are required by distinct implementations of exeption handlers which are delegated at runtime. Use a rare label number to avoid conflicts with the calling contexts. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_booke_hv_asm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h index 30a600fa1b6a..a37a12a9a7d7 100644 --- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h +++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h @@ -38,9 +38,9 @@ #ifdef CONFIG_KVM_BOOKE_HV BEGIN_FTR_SECTION mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */ - bf 3, kvmppc_resume_\intno\()_\srr1 + bf 3, 1975f b kvmppc_handler_\intno\()_\srr1 -kvmppc_resume_\intno\()_\srr1: +1975: END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) #endif .endm From 2c9097e4c1340208ef93371abd4b3bd7e989381b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 11 Sep 2012 13:27:01 +0000 Subject: [PATCH 0239/5831] KVM: PPC: Book3S HV: Take the SRCU read lock before looking up memslots The generic KVM code uses SRCU (sleeping RCU) to protect accesses to the memslots data structures against updates due to userspace adding, modifying or removing memory slots. We need to do that too, both to avoid accessing stale copies of the memslots and to avoid lockdep warnings. This therefore adds srcu_read_lock/unlock pairs around code that accesses and uses memslots. Since the real-mode handlers for H_ENTER, H_REMOVE and H_BULK_REMOVE need to access the memslots, and we don't want to call the SRCU code in real mode (since we have no assurance that it would only access the linear mapping), we hold the SRCU read lock for the VM while in the guest. This does mean that adding or removing memory slots while some vcpus are executing in the guest will block for up to two jiffies. This tradeoff is acceptable since adding/removing memory slots only happens rarely, while H_ENTER/H_REMOVE/H_BULK_REMOVE are performance-critical hot paths. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 17 +++++++++++++---- arch/powerpc/kvm/book3s_hv.c | 27 +++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d95d11322a15..0f031c07f7e5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -1057,20 +1058,22 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, unsigned long hva, psize, offset; unsigned long pa; unsigned long *physp; + int srcu_idx; + srcu_idx = srcu_read_lock(&kvm->srcu); memslot = gfn_to_memslot(kvm, gfn); if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) - return NULL; + goto err; if (!kvm->arch.using_mmu_notifiers) { physp = kvm->arch.slot_phys[memslot->id]; if (!physp) - return NULL; + goto err; physp += gfn - memslot->base_gfn; pa = *physp; if (!pa) { if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0) - return NULL; + goto err; pa = *physp; } page = pfn_to_page(pa >> PAGE_SHIFT); @@ -1079,9 +1082,11 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, hva = gfn_to_hva_memslot(memslot, gfn); npages = get_user_pages_fast(hva, 1, 1, pages); if (npages < 1) - return NULL; + goto err; page = pages[0]; } + srcu_read_unlock(&kvm->srcu, srcu_idx); + psize = PAGE_SIZE; if (PageHuge(page)) { page = compound_head(page); @@ -1091,6 +1096,10 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, if (nb_ret) *nb_ret = psize - offset; return page_address(page) + offset; + + err: + srcu_read_unlock(&kvm->srcu, srcu_idx); + return NULL; } void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 83e929e66f9d..48b0d4a73b9d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -366,13 +367,16 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) unsigned long req = kvmppc_get_gpr(vcpu, 3); unsigned long target, ret = H_SUCCESS; struct kvm_vcpu *tvcpu; + int idx; switch (req) { case H_ENTER: + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6), kvmppc_get_gpr(vcpu, 7)); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; case H_CEDE: break; @@ -411,6 +415,7 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { int r = RESUME_HOST; + int srcu_idx; vcpu->stat.sum_exits++; @@ -470,12 +475,16 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * have been handled already. */ case BOOK3S_INTERRUPT_H_DATA_STORAGE: + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvmppc_book3s_hv_page_fault(run, vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; case BOOK3S_INTERRUPT_H_INST_STORAGE: + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvmppc_book3s_hv_page_fault(run, vcpu, kvmppc_get_pc(vcpu), 0); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; /* * This occurs if the guest executes an illegal instruction. @@ -820,6 +829,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) long ret; u64 now; int ptid, i, need_vpa_update; + int srcu_idx; /* don't start if any threads have a signal pending */ need_vpa_update = 0; @@ -898,6 +908,9 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) spin_unlock(&vc->lock); kvm_guest_enter(); + + srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu); + __kvmppc_vcore_entry(NULL, vcpu0); for (i = 0; i < threads_per_core; ++i) kvmppc_release_hwthread(vc->pcpu + i); @@ -913,6 +926,8 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) vc->vcore_state = VCORE_EXITING; spin_unlock(&vc->lock); + srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx); + /* make sure updates to secondary vcpu structs are visible now */ smp_mb(); kvm_guest_exit(); @@ -1362,6 +1377,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) unsigned long rmls; unsigned long *physp; unsigned long i, npages; + int srcu_idx; mutex_lock(&kvm->lock); if (kvm->arch.rma_setup_done) @@ -1377,12 +1393,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) } /* Look up the memslot for guest physical address 0 */ + srcu_idx = srcu_read_lock(&kvm->srcu); memslot = gfn_to_memslot(kvm, 0); /* We must have some memory at 0 by now */ err = -EINVAL; if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) - goto out; + goto out_srcu; /* Look up the VMA for the start of this memory slot */ hva = memslot->userspace_addr; @@ -1406,14 +1423,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) err = -EPERM; if (cpu_has_feature(CPU_FTR_ARCH_201)) { pr_err("KVM: CPU requires an RMO\n"); - goto out; + goto out_srcu; } /* We can handle 4k, 64k or 16M pages in the VRMA */ err = -EINVAL; if (!(psize == 0x1000 || psize == 0x10000 || psize == 0x1000000)) - goto out; + goto out_srcu; /* Update VRMASD field in the LPCR */ senc = slb_pgsize_encoding(psize); @@ -1436,7 +1453,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) err = -EINVAL; if (rmls < 0) { pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); - goto out; + goto out_srcu; } atomic_inc(&ri->use_count); kvm->arch.rma = ri; @@ -1476,6 +1493,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) smp_wmb(); kvm->arch.rma_setup_done = 1; err = 0; + out_srcu: + srcu_read_unlock(&kvm->srcu, srcu_idx); out: mutex_unlock(&kvm->lock); return err; From a66b48c3a39fa1c4223d4f847fdc7a04ed1618de Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 11 Sep 2012 13:27:46 +0000 Subject: [PATCH 0240/5831] KVM: PPC: Move kvm->arch.slot_phys into memslot.arch Now that we have an architecture-specific field in the kvm_memory_slot structure, we can use it to store the array of page physical addresses that we need for Book3S HV KVM on PPC970 processors. This reduces the size of struct kvm_arch for Book3S HV, and also reduces the size of struct kvm_arch_memory_slot for other PPC KVM variants since the fields in it are now only compiled in for Book3S HV. This necessitates making the kvm_arch_create_memslot and kvm_arch_free_memslot operations specific to each PPC KVM variant. That in turn means that we now don't allocate the rmap arrays on Book3S PR and Book E. Since we now unpin pages and free the slot_phys array in kvmppc_core_free_memslot, we no longer need to do it in kvmppc_core_destroy_vm, since the generic code takes care to free all the memslots when destroying a VM. We now need the new memslot to be passed in to kvmppc_core_prepare_memory_region, since we need to initialize its arch.slot_phys member on Book3S HV. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 9 ++- arch/powerpc/include/asm/kvm_ppc.h | 5 ++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 6 +- arch/powerpc/kvm/book3s_hv.c | 112 ++++++++++++++++------------ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- arch/powerpc/kvm/book3s_pr.c | 12 +++ arch/powerpc/kvm/booke.c | 12 +++ arch/powerpc/kvm/powerpc.c | 13 +--- 8 files changed, 106 insertions(+), 65 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index f20a5ef1c7e8..68f5a308737a 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -204,7 +204,7 @@ struct revmap_entry { }; /* - * We use the top bit of each memslot->rmap entry as a lock bit, + * We use the top bit of each memslot->arch.rmap entry as a lock bit, * and bit 32 as a present flag. The bottom 32 bits are the * index in the guest HPT of a HPTE that points to the page. */ @@ -215,14 +215,17 @@ struct revmap_entry { #define KVMPPC_RMAP_PRESENT 0x100000000ul #define KVMPPC_RMAP_INDEX 0xfffffffful -/* Low-order bits in kvm->arch.slot_phys[][] */ +/* Low-order bits in memslot->arch.slot_phys[] */ #define KVMPPC_PAGE_ORDER_MASK 0x1f #define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */ #define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */ #define KVMPPC_GOT_PAGE 0x80 struct kvm_arch_memory_slot { +#ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long *rmap; + unsigned long *slot_phys; +#endif /* CONFIG_KVM_BOOK3S_64_HV */ }; struct kvm_arch { @@ -246,8 +249,6 @@ struct kvm_arch { unsigned long hpt_npte; unsigned long hpt_mask; spinlock_t slot_phys_lock; - unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; - int slot_npages[KVM_MEM_SLOTS_NUM]; unsigned short last_vcpu[NR_CPUS]; struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; struct kvmppc_linear_info *hpt_li; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c06a64b53362..41a00eae68c7 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -143,7 +143,12 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void); extern void kvm_release_hpt(struct kvmppc_linear_info *li); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); +extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont); +extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 0f031c07f7e5..a389cc62b16c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -261,7 +261,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) /* * This is called to get a reference to a guest page if there isn't - * one already in the kvm->arch.slot_phys[][] arrays. + * one already in the memslot->arch.slot_phys[] array. */ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, struct kvm_memory_slot *memslot, @@ -276,7 +276,7 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, struct vm_area_struct *vma; unsigned long pfn, i, npages; - physp = kvm->arch.slot_phys[memslot->id]; + physp = memslot->arch.slot_phys; if (!physp) return -EINVAL; if (physp[gfn - memslot->base_gfn]) @@ -1065,7 +1065,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) goto err; if (!kvm->arch.using_mmu_notifiers) { - physp = kvm->arch.slot_phys[memslot->id]; + physp = memslot->arch.slot_phys; if (!physp) goto err; physp += gfn - memslot->base_gfn; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 48b0d4a73b9d..817837de7362 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1314,50 +1314,69 @@ static unsigned long slb_pgsize_encoding(unsigned long psize) return senc; } -int kvmppc_core_prepare_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) -{ - unsigned long npages; - unsigned long *phys; - - /* Allocate a slot_phys array */ - phys = kvm->arch.slot_phys[mem->slot]; - if (!kvm->arch.using_mmu_notifiers && !phys) { - npages = mem->memory_size >> PAGE_SHIFT; - phys = vzalloc(npages * sizeof(unsigned long)); - if (!phys) - return -ENOMEM; - kvm->arch.slot_phys[mem->slot] = phys; - kvm->arch.slot_npages[mem->slot] = npages; - } - - return 0; -} - -static void unpin_slot(struct kvm *kvm, int slot_id) +static void unpin_slot(struct kvm_memory_slot *memslot) { unsigned long *physp; unsigned long j, npages, pfn; struct page *page; - physp = kvm->arch.slot_phys[slot_id]; - npages = kvm->arch.slot_npages[slot_id]; - if (physp) { - spin_lock(&kvm->arch.slot_phys_lock); - for (j = 0; j < npages; j++) { - if (!(physp[j] & KVMPPC_GOT_PAGE)) - continue; - pfn = physp[j] >> PAGE_SHIFT; - page = pfn_to_page(pfn); - SetPageDirty(page); - put_page(page); - } - kvm->arch.slot_phys[slot_id] = NULL; - spin_unlock(&kvm->arch.slot_phys_lock); - vfree(physp); + physp = memslot->arch.slot_phys; + npages = memslot->npages; + if (!physp) + return; + for (j = 0; j < npages; j++) { + if (!(physp[j] & KVMPPC_GOT_PAGE)) + continue; + pfn = physp[j] >> PAGE_SHIFT; + page = pfn_to_page(pfn); + SetPageDirty(page); + put_page(page); } } +void kvmppc_core_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ + if (!dont || free->arch.rmap != dont->arch.rmap) { + vfree(free->arch.rmap); + free->arch.rmap = NULL; + } + if (!dont || free->arch.slot_phys != dont->arch.slot_phys) { + unpin_slot(free); + vfree(free->arch.slot_phys); + free->arch.slot_phys = NULL; + } +} + +int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages) +{ + slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); + if (!slot->arch.rmap) + return -ENOMEM; + slot->arch.slot_phys = NULL; + + return 0; +} + +int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem) +{ + unsigned long *phys; + + /* Allocate a slot_phys array if needed */ + phys = memslot->arch.slot_phys; + if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) { + phys = vzalloc(memslot->npages * sizeof(unsigned long)); + if (!phys) + return -ENOMEM; + memslot->arch.slot_phys = phys; + } + + return 0; +} + void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { @@ -1482,11 +1501,16 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) /* Initialize phys addrs of pages in RMO */ npages = ri->npages; porder = __ilog2(npages); - physp = kvm->arch.slot_phys[memslot->id]; - spin_lock(&kvm->arch.slot_phys_lock); - for (i = 0; i < npages; ++i) - physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder; - spin_unlock(&kvm->arch.slot_phys_lock); + physp = memslot->arch.slot_phys; + if (physp) { + if (npages > memslot->npages) + npages = memslot->npages; + spin_lock(&kvm->arch.slot_phys_lock); + for (i = 0; i < npages; ++i) + physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + + porder; + spin_unlock(&kvm->arch.slot_phys_lock); + } } /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ @@ -1547,12 +1571,6 @@ int kvmppc_core_init_vm(struct kvm *kvm) void kvmppc_core_destroy_vm(struct kvm *kvm) { - unsigned long i; - - if (!kvm->arch.using_mmu_notifiers) - for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) - unpin_slot(kvm, i); - if (kvm->arch.rma) { kvm_release_rma(kvm->arch.rma); kvm->arch.rma = NULL; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index fb0e821622d4..63eb94e63cc3 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -183,7 +183,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, rmap = &memslot->arch.rmap[slot_fn]; if (!kvm->arch.using_mmu_notifiers) { - physp = kvm->arch.slot_phys[memslot->id]; + physp = memslot->arch.slot_phys; if (!physp) return H_PARAMETER; physp += slot_fn; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index b3c584f94cb3..fdadc9e57da2 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1220,7 +1220,19 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) } #endif /* CONFIG_PPC64 */ +void kvmppc_core_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem) { return 0; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 5f0476a602d8..514405752988 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1438,7 +1438,19 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -ENOTSUPP; } +void kvmppc_core_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem) { return 0; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0ffd7d17adc7..33122dd89da9 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -389,19 +389,12 @@ long kvm_arch_dev_ioctl(struct file *filp, void kvm_arch_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - if (!dont || free->arch.rmap != dont->arch.rmap) { - vfree(free->arch.rmap); - free->arch.rmap = NULL; - } + kvmppc_core_free_memslot(free, dont); } int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) { - slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); - if (!slot->arch.rmap) - return -ENOMEM; - - return 0; + return kvmppc_core_create_memslot(slot, npages); } int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -410,7 +403,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, int user_alloc) { - return kvmppc_core_prepare_memory_region(kvm, mem); + return kvmppc_core_prepare_memory_region(kvm, memslot, mem); } void kvm_arch_commit_memory_region(struct kvm *kvm, From dfe49dbd1fc7310a4e0e2f83ae737cd7d34fa0cd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 11 Sep 2012 13:28:18 +0000 Subject: [PATCH 0241/5831] KVM: PPC: Book3S HV: Handle memory slot deletion and modification correctly This adds an implementation of kvm_arch_flush_shadow_memslot for Book3S HV, and arranges for kvmppc_core_commit_memory_region to flush the dirty log when modifying an existing slot. With this, we can handle deletion and modification of memory slots. kvm_arch_flush_shadow_memslot calls kvmppc_core_flush_memslot, which on Book3S HV now traverses the reverse map chains to remove any HPT (hashed page table) entries referring to pages in the memslot. This gets called by generic code whenever deleting a memslot or changing the guest physical address for a memslot. We flush the dirty log in kvmppc_core_commit_memory_region for consistency with what x86 does. We only need to flush when an existing memslot is being modified, because for a new memslot the rmap array (which stores the dirty bits) is all zero, meaning that every page is considered clean already, and when deleting a memslot we obviously don't care about the dirty bits any more. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 2 +- arch/powerpc/include/asm/kvm_ppc.h | 5 +++- arch/powerpc/kvm/book3s_64_mmu_hv.c | 33 +++++++++++++++++++++++---- arch/powerpc/kvm/book3s_hv.c | 18 +++++++++++++-- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- arch/powerpc/kvm/book3s_pr.c | 7 +++++- arch/powerpc/kvm/booke.c | 7 +++++- arch/powerpc/kvm/powerpc.c | 3 ++- 8 files changed, 64 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index f0e0c6a66d97..ab738005d2ea 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -160,7 +160,7 @@ extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel); extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, - struct kvm_memory_slot *memslot); + struct kvm_memory_slot *memslot, unsigned long *map); extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 41a00eae68c7..3fb980d293e5 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -151,9 +151,12 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); +extern void kvmppc_core_flush_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot); extern int kvmppc_bookehv_init(void); extern void kvmppc_bookehv_exit(void); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index a389cc62b16c..f598366e51c6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -851,7 +851,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, psize = hpte_page_size(hptep[0], ptel); if ((hptep[0] & HPTE_V_VALID) && hpte_rpn(ptel, psize) == gfn) { - hptep[0] |= HPTE_V_ABSENT; + if (kvm->arch.using_mmu_notifiers) + hptep[0] |= HPTE_V_ABSENT; kvmppc_invalidate_hpte(kvm, hptep, i); /* Harvest R and C */ rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); @@ -878,6 +879,28 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) return 0; } +void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) +{ + unsigned long *rmapp; + unsigned long gfn; + unsigned long n; + + rmapp = memslot->arch.rmap; + gfn = memslot->base_gfn; + for (n = memslot->npages; n; --n) { + /* + * Testing the present bit without locking is OK because + * the memslot has been marked invalid already, and hence + * no new HPTEs referencing this page can be created, + * thus the present bit can't go from 0 to 1. + */ + if (*rmapp & KVMPPC_RMAP_PRESENT) + kvm_unmap_rmapp(kvm, rmapp, gfn); + ++rmapp; + ++gfn; + } +} + static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, unsigned long gfn) { @@ -1031,16 +1054,16 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) return ret; } -long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) +long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long *map) { unsigned long i; - unsigned long *rmapp, *map; + unsigned long *rmapp; preempt_disable(); rmapp = memslot->arch.rmap; - map = memslot->dirty_bitmap; for (i = 0; i < memslot->npages; ++i) { - if (kvm_test_clear_dirty(kvm, rmapp)) + if (kvm_test_clear_dirty(kvm, rmapp) && map) __set_bit_le(i, map); ++rmapp; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 817837de7362..38c7f1bc3495 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1288,7 +1288,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); - r = kvmppc_hv_get_dirty_log(kvm, memslot); + r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap); if (r) goto out; @@ -1378,8 +1378,22 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, } void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old) { + unsigned long npages = mem->memory_size >> PAGE_SHIFT; + struct kvm_memory_slot *memslot; + + if (npages && old.npages) { + /* + * If modifying a memslot, reset all the rmap dirty bits. + * If this is a new memslot, we don't need to do anything + * since the rmap array starts out as all zeroes, + * i.e. no pages are dirty. + */ + memslot = id_to_memslot(kvm->memslots, mem->slot); + kvmppc_hv_get_dirty_log(kvm, memslot, NULL); + } } static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 63eb94e63cc3..9955216477a4 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -81,7 +81,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, ptel = rev->guest_rpte |= rcbits; gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); - if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) + if (!memslot) return; rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index fdadc9e57da2..4d0667a810a4 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1239,7 +1239,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, } void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old) +{ +} + +void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 514405752988..3a6490fc6fcd 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1457,7 +1457,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, } void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old) +{ +} + +void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 33122dd89da9..8443e23f3605 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -411,7 +411,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_memory_slot old, int user_alloc) { - kvmppc_core_commit_memory_region(kvm, mem); + kvmppc_core_commit_memory_region(kvm, mem, old); } void kvm_arch_flush_shadow_all(struct kvm *kvm) @@ -421,6 +421,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { + kvmppc_core_flush_memslot(kvm, slot); } struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) From ed7a8d7a3c5dd4adedb271112b6faba45c7037f9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 13 Sep 2012 21:44:30 +0000 Subject: [PATCH 0242/5831] KVM: Move some PPC ioctl definitions to the correct place This moves the definitions of KVM_CREATE_SPAPR_TCE and KVM_ALLOCATE_RMA in include/linux/kvm.h from the section listing the vcpu ioctls to the section listing VM ioctls, as these are both implemented and documented as VM ioctls. Fortunately there is no actual collision of ioctl numbers at this point. Moving these to the correct section will reduce the probability of a future collision. This does not change the user/kernel ABI at all. Signed-off-by: Paul Mackerras Acked-by: Alexander Graf Signed-off-by: Alexander Graf --- include/linux/kvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 1649d4b57e1f..65ad5c624c70 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -852,6 +852,9 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info) /* Available with KVM_CAP_PPC_ALLOC_HTAB */ #define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32) +#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) +/* Available with KVM_CAP_RMA */ +#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) /* * ioctls for vcpu fds @@ -915,9 +918,6 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_XCRS */ #define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) #define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) -#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) -/* Available with KVM_CAP_RMA */ -#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) /* Available with KVM_CAP_SW_TLB */ #define KVM_DIRTY_TLB _IOW(KVMIO, 0xaa, struct kvm_dirty_tlb) /* Available with KVM_CAP_ONE_REG */ From a47d72f3613d5edfd8e752c9b804d7df35810649 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 20 Sep 2012 19:35:51 +0000 Subject: [PATCH 0243/5831] KVM: PPC: Book3S HV: Fix updates of vcpu->cpu This removes the powerpc "generic" updates of vcpu->cpu in load and put, and moves them to the various backends. The reason is that "HV" KVM does its own sauce with that field and the generic updates might corrupt it. The field contains the CPU# of the -first- HW CPU of the core always for all the VCPU threads of a core (the one that's online from a host Linux perspective). However, the preempt notifiers are going to be called on the threads VCPUs when they are running (due to them sleeping on our private waitqueue) causing unload to be called, potentially clobbering the value. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 3 ++- arch/powerpc/kvm/booke.c | 2 ++ arch/powerpc/kvm/powerpc.c | 2 -- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 4d0667a810a4..bf3ec5d66d8c 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -64,7 +64,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; svcpu_put(svcpu); #endif - + vcpu->cpu = smp_processor_id(); #ifdef CONFIG_PPC_BOOK3S_32 current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; #endif @@ -84,6 +84,7 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_FP); kvmppc_giveup_ext(vcpu, MSR_VEC); kvmppc_giveup_ext(vcpu, MSR_VSX); + vcpu->cpu = -1; } int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3a6490fc6fcd..69d047c22d20 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1509,12 +1509,14 @@ void kvmppc_decrementer_func(unsigned long data) void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + vcpu->cpu = smp_processor_id(); current->thread.kvm_vcpu = vcpu; } void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) { current->thread.kvm_vcpu = NULL; + vcpu->cpu = -1; } int __init kvmppc_booke_init(void) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8443e23f3605..6002ea938a48 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -504,7 +504,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); #endif kvmppc_core_vcpu_load(vcpu, cpu); - vcpu->cpu = smp_processor_id(); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -513,7 +512,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #ifdef CONFIG_BOOKE vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); #endif - vcpu->cpu = -1; } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, From 964ee98ccde0534548565a201827cf06d813180f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 20 Sep 2012 19:36:32 +0000 Subject: [PATCH 0244/5831] KVM: PPC: Book3S HV: Remove bogus update of physical thread IDs When making a vcpu non-runnable we incorrectly changed the thread IDs of all other threads on the core, just remove that code. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 38c7f1bc3495..c9ae3148c981 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -706,17 +706,11 @@ extern void xics_wake_cpu(int cpu); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, struct kvm_vcpu *vcpu) { - struct kvm_vcpu *v; - if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) return; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; --vc->n_runnable; ++vc->n_busy; - /* decrement the physical thread id of each following vcpu */ - v = vcpu; - list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list) - --v->arch.ptid; list_del(&vcpu->arch.run_list); } From 70bddfefbdcdbfdebd81d8b59ff8a7fa5d450ccc Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 20 Sep 2012 19:39:21 +0000 Subject: [PATCH 0245/5831] KVM: PPC: Book3S HV: Fix calculation of guest phys address for MMIO emulation In the case where the host kernel is using a 64kB base page size and the guest uses a 4k HPTE (hashed page table entry) to map an emulated MMIO device, we were calculating the guest physical address wrongly. We were calculating a gfn as the guest physical address shifted right 16 bits (PAGE_SHIFT) but then only adding back in 12 bits from the effective address, since the HPTE had a 4k page size. Thus the gpa reported to userspace was missing 4 bits. Instead, we now compute the guest physical address from the HPTE without reference to the host page size, and then compute the gfn by shifting the gpa right PAGE_SHIFT bits. Reported-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f598366e51c6..7a4aae99ac5b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -571,7 +571,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, struct kvm *kvm = vcpu->kvm; unsigned long *hptep, hpte[3], r; unsigned long mmu_seq, psize, pte_size; - unsigned long gfn, hva, pfn; + unsigned long gpa, gfn, hva, pfn; struct kvm_memory_slot *memslot; unsigned long *rmap; struct revmap_entry *rev; @@ -609,15 +609,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Translate the logical address and get the page */ psize = hpte_page_size(hpte[0], r); - gfn = hpte_rpn(r, psize); + gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1)); + gfn = gpa >> PAGE_SHIFT; memslot = gfn_to_memslot(kvm, gfn); /* No memslot means it's an emulated MMIO region */ - if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { - unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1)); + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, dsisr & DSISR_ISSTORE); - } if (!kvm->arch.using_mmu_notifiers) return -EFAULT; /* should never get here */ From e400e72f250d2567e89c9bafb47ab91e8d9a15a2 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 22 Aug 2012 15:04:23 +0000 Subject: [PATCH 0246/5831] KVM: PPC: e500: fix allocation size error on g2h_tlb1_map We were only allocating half the bytes we need, which was made more obvious by a recent fix to the memset in clear_tlb1_bitmap(). Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Cc: stable@vger.kernel.org --- arch/powerpc/kvm/e500_tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 43489a8fa985..a27d134eef36 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1385,7 +1385,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (!vcpu_e500->gtlb_priv[1]) goto err; - vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(unsigned int) * + vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) * vcpu_e500->gtlb_params[1].entries, GFP_KERNEL); if (!vcpu_e500->g2h_tlb1_map) From adbb48a854bf8dee556dc42b96dd61503351a82d Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 22 Aug 2012 15:04:24 +0000 Subject: [PATCH 0247/5831] KVM: PPC: e500: MMU API: fix leak of shared_tlb_pages This was found by kmemleak. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index a27d134eef36..641f97847b95 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1134,6 +1134,8 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) } vcpu_e500->num_shared_tlb_pages = 0; + + kfree(vcpu_e500->shared_tlb_pages); vcpu_e500->shared_tlb_pages = NULL; } else { kfree(vcpu_e500->gtlb_arch); From 5bd1cf118533aba41b3fbd4834e6362a9237db71 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 22 Aug 2012 15:03:50 +0000 Subject: [PATCH 0248/5831] KVM: PPC: set IN_GUEST_MODE before checking requests Avoid a race as described in the code comment. Also remove a related smp_wmb() from booke's kvmppc_prepare_to_enter(). I can't see any reason for it, and the book3s_pr version doesn't have it. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 1 - arch/powerpc/kvm/powerpc.c | 14 +++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 69d047c22d20..3d1f35dc7862 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -674,7 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) out: vcpu->mode = OUTSIDE_GUEST_MODE; - smp_wmb(); return ret; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6002ea938a48..deb0d596d815 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -78,7 +78,16 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) break; } + vcpu->mode = IN_GUEST_MODE; + + /* + * Reading vcpu->requests must happen after setting vcpu->mode, + * so we don't miss a request because the requester sees + * OUTSIDE_GUEST_MODE and assumes we'll be checking requests + * before next entering the guest (and thus doesn't IPI). + */ smp_mb(); + if (vcpu->requests) { /* Make sure we process requests preemptable */ local_irq_enable(); @@ -111,11 +120,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) #endif kvm_guest_enter(); - - /* Going into guest context! Yay! */ - vcpu->mode = IN_GUEST_MODE; - smp_wmb(); - break; } From a136a8bdc02fc14625ac45ee846cc646fc46597e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Sep 2012 20:31:56 +0000 Subject: [PATCH 0249/5831] KVM: PPC: Book3S: Get/set guest SPRs using the GET/SET_ONE_REG interface This enables userspace to get and set various SPRs (special-purpose registers) using the KVM_[GS]ET_ONE_REG ioctls. With this, userspace can get and set all the SPRs that are part of the guest state, either through the KVM_[GS]ET_REGS ioctls, the KVM_[GS]ET_SREGS ioctls, or the KVM_[GS]ET_ONE_REG ioctls. The SPRs that are added here are: - DABR: Data address breakpoint register - DSCR: Data stream control register - PURR: Processor utilization of resources register - SPURR: Scaled PURR - DAR: Data address register - DSISR: Data storage interrupt status register - AMR: Authority mask register - UAMOR: User authority mask override register - MMCR0, MMCR1, MMCRA: Performance monitor unit control registers - PMC1..PMC8: Performance monitor unit counter registers In order to reduce code duplication between PR and HV KVM code, this moves the kvm_vcpu_ioctl_[gs]et_one_reg functions into book3s.c and centralizes the copying between user and kernel space there. The registers that are handled differently between PR and HV, and those that exist only in one flavor, are handled in kvmppc_[gs]et_one_reg() functions that are specific to each flavor. Signed-off-by: Paul Mackerras [agraf: minimal style fixes] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 19 ++++++++ arch/powerpc/include/asm/kvm.h | 21 +++++++++ arch/powerpc/include/asm/kvm_ppc.h | 32 +++++++++++++ arch/powerpc/kvm/book3s.c | 68 ++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 76 +++++++++++++++++++++++++----- arch/powerpc/kvm/book3s_pr.c | 23 +++++---- 6 files changed, 215 insertions(+), 24 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index c84739e2c0f3..af9b7a06bcae 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1740,6 +1740,25 @@ registers, find a list below: PPC | KVM_REG_PPC_IAC4 | 64 PPC | KVM_REG_PPC_DAC1 | 64 PPC | KVM_REG_PPC_DAC2 | 64 + PPC | KVM_REG_PPC_DABR | 64 + PPC | KVM_REG_PPC_DSCR | 64 + PPC | KVM_REG_PPC_PURR | 64 + PPC | KVM_REG_PPC_SPURR | 64 + PPC | KVM_REG_PPC_DAR | 64 + PPC | KVM_REG_PPC_DSISR | 32 + PPC | KVM_REG_PPC_AMR | 64 + PPC | KVM_REG_PPC_UAMOR | 64 + PPC | KVM_REG_PPC_MMCR0 | 64 + PPC | KVM_REG_PPC_MMCR1 | 64 + PPC | KVM_REG_PPC_MMCRA | 64 + PPC | KVM_REG_PPC_PMC1 | 32 + PPC | KVM_REG_PPC_PMC2 | 32 + PPC | KVM_REG_PPC_PMC3 | 32 + PPC | KVM_REG_PPC_PMC4 | 32 + PPC | KVM_REG_PPC_PMC5 | 32 + PPC | KVM_REG_PPC_PMC6 | 32 + PPC | KVM_REG_PPC_PMC7 | 32 + PPC | KVM_REG_PPC_PMC8 | 32 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 3c14202a3c84..9557576a5325 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -338,5 +338,26 @@ struct kvm_book3e_206_tlb_params { #define KVM_REG_PPC_IAC4 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5) #define KVM_REG_PPC_DAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6) #define KVM_REG_PPC_DAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7) +#define KVM_REG_PPC_DABR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8) +#define KVM_REG_PPC_DSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9) +#define KVM_REG_PPC_PURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa) +#define KVM_REG_PPC_SPURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb) +#define KVM_REG_PPC_DAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc) +#define KVM_REG_PPC_DSISR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd) +#define KVM_REG_PPC_AMR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xe) +#define KVM_REG_PPC_UAMOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xf) + +#define KVM_REG_PPC_MMCR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10) +#define KVM_REG_PPC_MMCR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11) +#define KVM_REG_PPC_MMCRA (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12) + +#define KVM_REG_PPC_PMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18) +#define KVM_REG_PPC_PMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19) +#define KVM_REG_PPC_PMC3 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1a) +#define KVM_REG_PPC_PMC4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1b) +#define KVM_REG_PPC_PMC5 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1c) +#define KVM_REG_PPC_PMC6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1d) +#define KVM_REG_PPC_PMC7 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e) +#define KVM_REG_PPC_PMC8 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 3fb980d293e5..709f0ddae1f1 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -28,6 +28,7 @@ #include #include #include +#include #ifdef CONFIG_PPC_BOOK3S #include #else @@ -196,6 +197,35 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) return r; } +union kvmppc_one_reg { + u32 wval; + u64 dval; +}; + +#define one_reg_size(id) \ + (1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + +#define get_reg_val(id, reg) ({ \ + union kvmppc_one_reg __u; \ + switch (one_reg_size(id)) { \ + case 4: __u.wval = (reg); break; \ + case 8: __u.dval = (reg); break; \ + default: BUG(); \ + } \ + __u; \ +}) + + +#define set_reg_val(id, val) ({ \ + u64 __v; \ + switch (one_reg_size(id)) { \ + case 4: __v = (val).wval; break; \ + case 8: __v = (val).dval; break; \ + default: BUG(); \ + } \ + __v; \ +}) + void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); @@ -204,6 +234,8 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index e94666566fa9..a5af28fc3a8f 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -485,6 +485,74 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -ENOTSUPP; } +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r; + union kvmppc_one_reg val; + int size; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + r = kvmppc_get_one_reg(vcpu, reg->id, &val); + + if (r == -EINVAL) { + r = 0; + switch (reg->id) { + case KVM_REG_PPC_DAR: + val = get_reg_val(reg->id, vcpu->arch.shared->dar); + break; + case KVM_REG_PPC_DSISR: + val = get_reg_val(reg->id, vcpu->arch.shared->dsisr); + break; + default: + r = -EINVAL; + break; + } + } + if (r) + return r; + + if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) + r = -EFAULT; + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r; + union kvmppc_one_reg val; + int size; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) + return -EFAULT; + + r = kvmppc_set_one_reg(vcpu, reg->id, &val); + + if (r == -EINVAL) { + r = 0; + switch (reg->id) { + case KVM_REG_PPC_DAR: + vcpu->arch.shared->dar = set_reg_val(reg->id, val); + break; + case KVM_REG_PPC_DSISR: + vcpu->arch.shared->dsisr = set_reg_val(reg->id, val); + break; + default: + r = -EINVAL; + break; + } + } + + return r; +} + int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c9ae3148c981..1cc6b77fa63d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -544,36 +544,88 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = -EINVAL; + int r = 0; + long int i; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_HIOR: - r = put_user(0, (u64 __user *)reg->addr); + *val = get_reg_val(id, 0); + break; + case KVM_REG_PPC_DABR: + *val = get_reg_val(id, vcpu->arch.dabr); + break; + case KVM_REG_PPC_DSCR: + *val = get_reg_val(id, vcpu->arch.dscr); + break; + case KVM_REG_PPC_PURR: + *val = get_reg_val(id, vcpu->arch.purr); + break; + case KVM_REG_PPC_SPURR: + *val = get_reg_val(id, vcpu->arch.spurr); + break; + case KVM_REG_PPC_AMR: + *val = get_reg_val(id, vcpu->arch.amr); + break; + case KVM_REG_PPC_UAMOR: + *val = get_reg_val(id, vcpu->arch.uamor); + break; + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: + i = id - KVM_REG_PPC_MMCR0; + *val = get_reg_val(id, vcpu->arch.mmcr[i]); + break; + case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: + i = id - KVM_REG_PPC_PMC1; + *val = get_reg_val(id, vcpu->arch.pmc[i]); break; default: + r = -EINVAL; break; } return r; } -int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = -EINVAL; + int r = 0; + long int i; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_HIOR: - { - u64 hior; /* Only allow this to be set to zero */ - r = get_user(hior, (u64 __user *)reg->addr); - if (!r && (hior != 0)) + if (set_reg_val(id, *val)) r = -EINVAL; break; - } + case KVM_REG_PPC_DABR: + vcpu->arch.dabr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_DSCR: + vcpu->arch.dscr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_PURR: + vcpu->arch.purr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_SPURR: + vcpu->arch.spurr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_AMR: + vcpu->arch.amr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_UAMOR: + vcpu->arch.uamor = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: + i = id - KVM_REG_PPC_MMCR0; + vcpu->arch.mmcr[i] = set_reg_val(id, *val); + break; + case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: + i = id - KVM_REG_PPC_PMC1; + vcpu->arch.pmc[i] = set_reg_val(id, *val); + break; default: + r = -EINVAL; break; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index bf3ec5d66d8c..c81109f3a376 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -945,34 +945,33 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = -EINVAL; + int r = 0; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_HIOR: - r = copy_to_user((u64 __user *)(long)reg->addr, - &to_book3s(vcpu)->hior, sizeof(u64)); + *val = get_reg_val(id, to_book3s(vcpu)->hior); break; default: + r = -EINVAL; break; } return r; } -int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = -EINVAL; + int r = 0; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_HIOR: - r = copy_from_user(&to_book3s(vcpu)->hior, - (u64 __user *)(long)reg->addr, sizeof(u64)); - if (!r) - to_book3s(vcpu)->hior_explicit = true; + to_book3s(vcpu)->hior = set_reg_val(id, *val); + to_book3s(vcpu)->hior_explicit = true; break; default: + r = -EINVAL; break; } From a8bd19ef4dd49f0eef86a4a8eb43d60f967236b8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Sep 2012 20:32:30 +0000 Subject: [PATCH 0250/5831] KVM: PPC: Book3S: Get/set guest FP regs using the GET/SET_ONE_REG interface This enables userspace to get and set all the guest floating-point state using the KVM_[GS]ET_ONE_REG ioctls. The floating-point state includes all of the traditional floating-point registers and the FPSCR (floating point status/control register), all the VMX/Altivec vector registers and the VSCR (vector status/control register), and on POWER7, the vector-scalar registers (note that each FP register is the high-order half of the corresponding VSR). Most of these are implemented in common Book 3S code, except for VSX on POWER7. Because HV and PR differ in how they store the FP and VSX registers on POWER7, the code for these cases is not common. On POWER7, the FP registers are the upper halves of the VSX registers vsr0 - vsr31. PR KVM stores vsr0 - vsr31 in two halves, with the upper halves in the arch.fpr[] array and the lower halves in the arch.vsr[] array, whereas HV KVM on POWER7 stores the whole VSX register in arch.vsr[]. Signed-off-by: Paul Mackerras [agraf: fix whitespace, vsx compilation] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 11 +++++++ arch/powerpc/include/asm/kvm.h | 20 +++++++++++++ arch/powerpc/include/asm/kvm_ppc.h | 2 ++ arch/powerpc/kvm/book3s.c | 48 ++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 42 ++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_pr.c | 26 ++++++++++++++++ 6 files changed, 149 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index af9b7a06bcae..71dc7e2ec8eb 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1759,6 +1759,17 @@ registers, find a list below: PPC | KVM_REG_PPC_PMC6 | 32 PPC | KVM_REG_PPC_PMC7 | 32 PPC | KVM_REG_PPC_PMC8 | 32 + PPC | KVM_REG_PPC_FPR0 | 64 + ... + PPC | KVM_REG_PPC_FPR31 | 64 + PPC | KVM_REG_PPC_VR0 | 128 + ... + PPC | KVM_REG_PPC_VR31 | 128 + PPC | KVM_REG_PPC_VSR0 | 128 + ... + PPC | KVM_REG_PPC_VSR31 | 128 + PPC | KVM_REG_PPC_FPSCR | 64 + PPC | KVM_REG_PPC_VSCR | 32 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 9557576a5325..1466975129c7 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -360,4 +360,24 @@ struct kvm_book3e_206_tlb_params { #define KVM_REG_PPC_PMC7 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e) #define KVM_REG_PPC_PMC8 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f) +/* 32 floating-point registers */ +#define KVM_REG_PPC_FPR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x20) +#define KVM_REG_PPC_FPR(n) (KVM_REG_PPC_FPR0 + (n)) +#define KVM_REG_PPC_FPR31 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3f) + +/* 32 VMX/Altivec vector registers */ +#define KVM_REG_PPC_VR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x40) +#define KVM_REG_PPC_VR(n) (KVM_REG_PPC_VR0 + (n)) +#define KVM_REG_PPC_VR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x5f) + +/* 32 double-width FP registers for VSX */ +/* High-order halves overlap with FP regs */ +#define KVM_REG_PPC_VSR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x60) +#define KVM_REG_PPC_VSR(n) (KVM_REG_PPC_VSR0 + (n)) +#define KVM_REG_PPC_VSR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x7f) + +/* FP and vector status/control registers */ +#define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80) +#define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 709f0ddae1f1..51604a16c8a5 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -200,6 +200,8 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) union kvmppc_one_reg { u32 wval; u64 dval; + vector128 vval; + u64 vsxval[2]; }; #define one_reg_size(id) \ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a5af28fc3a8f..a4b645285240 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -490,6 +490,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) int r; union kvmppc_one_reg val; int size; + long int i; size = one_reg_size(reg->id); if (size > sizeof(val)) @@ -506,6 +507,29 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_DSISR: val = get_reg_val(reg->id, vcpu->arch.shared->dsisr); break; + case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: + i = reg->id - KVM_REG_PPC_FPR0; + val = get_reg_val(reg->id, vcpu->arch.fpr[i]); + break; + case KVM_REG_PPC_FPSCR: + val = get_reg_val(reg->id, vcpu->arch.fpscr); + break; +#ifdef CONFIG_ALTIVEC + case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0]; + break; + case KVM_REG_PPC_VSCR: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); + break; +#endif /* CONFIG_ALTIVEC */ default: r = -EINVAL; break; @@ -525,6 +549,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) int r; union kvmppc_one_reg val; int size; + long int i; size = one_reg_size(reg->id); if (size > sizeof(val)) @@ -544,6 +569,29 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_DSISR: vcpu->arch.shared->dsisr = set_reg_val(reg->id, val); break; + case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: + i = reg->id - KVM_REG_PPC_FPR0; + vcpu->arch.fpr[i] = set_reg_val(reg->id, val); + break; + case KVM_REG_PPC_FPSCR: + vcpu->arch.fpscr = set_reg_val(reg->id, val); + break; +#ifdef CONFIG_ALTIVEC + case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; + break; + case KVM_REG_PPC_VSCR: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); + break; +#endif /* CONFIG_ALTIVEC */ default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1cc6b77fa63d..94ec0e30969d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -579,6 +579,27 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) i = id - KVM_REG_PPC_PMC1; *val = get_reg_val(id, vcpu->arch.pmc[i]); break; +#ifdef CONFIG_VSX + case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + /* VSX => FP reg i is stored in arch.vsr[2*i] */ + long int i = id - KVM_REG_PPC_FPR0; + *val = get_reg_val(id, vcpu->arch.vsr[2 * i]); + } else { + /* let generic code handle it */ + r = -EINVAL; + } + break; + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + long int i = id - KVM_REG_PPC_VSR0; + val->vsxval[0] = vcpu->arch.vsr[2 * i]; + val->vsxval[1] = vcpu->arch.vsr[2 * i + 1]; + } else { + r = -ENXIO; + } + break; +#endif /* CONFIG_VSX */ default: r = -EINVAL; break; @@ -624,6 +645,27 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) i = id - KVM_REG_PPC_PMC1; vcpu->arch.pmc[i] = set_reg_val(id, *val); break; +#ifdef CONFIG_VSX + case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + /* VSX => FP reg i is stored in arch.vsr[2*i] */ + long int i = id - KVM_REG_PPC_FPR0; + vcpu->arch.vsr[2 * i] = set_reg_val(id, *val); + } else { + /* let generic code handle it */ + r = -EINVAL; + } + break; + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + long int i = id - KVM_REG_PPC_VSR0; + vcpu->arch.vsr[2 * i] = val->vsxval[0]; + vcpu->arch.vsr[2 * i + 1] = val->vsxval[1]; + } else { + r = -ENXIO; + } + break; +#endif /* CONFIG_VSX */ default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index c81109f3a376..b853696b6d8e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -953,6 +953,19 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) case KVM_REG_PPC_HIOR: *val = get_reg_val(id, to_book3s(vcpu)->hior); break; +#ifdef CONFIG_VSX + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: { + long int i = id - KVM_REG_PPC_VSR0; + + if (!cpu_has_feature(CPU_FTR_VSX)) { + r = -ENXIO; + break; + } + val->vsxval[0] = vcpu->arch.fpr[i]; + val->vsxval[1] = vcpu->arch.vsr[i]; + break; + } +#endif /* CONFIG_VSX */ default: r = -EINVAL; break; @@ -970,6 +983,19 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) to_book3s(vcpu)->hior = set_reg_val(id, *val); to_book3s(vcpu)->hior_explicit = true; break; +#ifdef CONFIG_VSX + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: { + long int i = id - KVM_REG_PPC_VSR0; + + if (!cpu_has_feature(CPU_FTR_VSX)) { + r = -ENXIO; + break; + } + vcpu->arch.fpr[i] = val->vsxval[0]; + vcpu->arch.vsr[i] = val->vsxval[1]; + break; + } +#endif /* CONFIG_VSX */ default: r = -EINVAL; break; From 55b665b0263ae88a776071306ef1eee4b769016b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Sep 2012 20:33:06 +0000 Subject: [PATCH 0251/5831] KVM: PPC: Book3S HV: Provide a way for userspace to get/set per-vCPU areas The PAPR paravirtualization interface lets guests register three different types of per-vCPU buffer areas in its memory for communication with the hypervisor. These are called virtual processor areas (VPAs). Currently the hypercalls to register and unregister VPAs are handled by KVM in the kernel, and userspace has no way to know about or save and restore these registrations across a migration. This adds "register" codes for these three areas that userspace can use with the KVM_GET/SET_ONE_REG ioctls to see what addresses have been registered, and to register or unregister them. This will be needed for guest hibernation and migration, and is also needed so that userspace can unregister them on reset (otherwise we corrupt guest memory after reboot by writing to the VPAs registered by the previous kernel). The "register" for the VPA is a 64-bit value containing the address, since the length of the VPA is fixed. The "registers" for the SLB shadow buffer and dispatch trace log (DTL) are 128 bits long, consisting of the guest physical address in the high (first) 64 bits and the length in the low 64 bits. This also fixes a bug where we were calling init_vpa unconditionally, leading to an oops when unregistering the VPA. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 3 ++ arch/powerpc/include/asm/kvm.h | 6 +++ arch/powerpc/include/asm/kvm_ppc.h | 4 ++ arch/powerpc/kvm/book3s_hv.c | 64 +++++++++++++++++++++++++++++- 4 files changed, 76 insertions(+), 1 deletion(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 71dc7e2ec8eb..e726d76eeebb 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1770,6 +1770,9 @@ registers, find a list below: PPC | KVM_REG_PPC_VSR31 | 128 PPC | KVM_REG_PPC_FPSCR | 64 PPC | KVM_REG_PPC_VSCR | 32 + PPC | KVM_REG_PPC_VPA_ADDR | 64 + PPC | KVM_REG_PPC_VPA_SLB | 128 + PPC | KVM_REG_PPC_VPA_DTL | 128 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 1466975129c7..b89ae4db45ce 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -380,4 +380,10 @@ struct kvm_book3e_206_tlb_params { #define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80) #define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81) +/* Virtual processor areas */ +/* For SLB & DTL, address in high (first) half, length in low half */ +#define KVM_REG_PPC_VPA_ADDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x82) +#define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83) +#define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 51604a16c8a5..609cca3e9426 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -202,6 +202,10 @@ union kvmppc_one_reg { u64 dval; vector128 vval; u64 vsxval[2]; + struct { + u64 addr; + u64 length; + } vpaval; }; #define one_reg_size(id) \ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 94ec0e30969d..9a15da76e56b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -143,6 +143,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) vpa->yield_count = 1; } +static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, + unsigned long addr, unsigned long len) +{ + /* check address is cacheline aligned */ + if (addr & (L1_CACHE_BYTES - 1)) + return -EINVAL; + spin_lock(&vcpu->arch.vpa_update_lock); + if (v->next_gpa != addr || v->len != len) { + v->next_gpa = addr; + v->len = addr ? len : 0; + v->update_pending = 1; + } + spin_unlock(&vcpu->arch.vpa_update_lock); + return 0; +} + /* Length for a per-processor buffer is passed in at offset 4 in the buffer */ struct reg_vpa { u32 dummy; @@ -321,7 +337,8 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) spin_lock(&vcpu->arch.vpa_update_lock); if (vcpu->arch.vpa.update_pending) { kvmppc_update_vpa(vcpu, &vcpu->arch.vpa); - init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); + if (vcpu->arch.vpa.pinned_addr) + init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); } if (vcpu->arch.dtl.update_pending) { kvmppc_update_vpa(vcpu, &vcpu->arch.dtl); @@ -600,6 +617,23 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) } break; #endif /* CONFIG_VSX */ + case KVM_REG_PPC_VPA_ADDR: + spin_lock(&vcpu->arch.vpa_update_lock); + *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); + spin_unlock(&vcpu->arch.vpa_update_lock); + break; + case KVM_REG_PPC_VPA_SLB: + spin_lock(&vcpu->arch.vpa_update_lock); + val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa; + val->vpaval.length = vcpu->arch.slb_shadow.len; + spin_unlock(&vcpu->arch.vpa_update_lock); + break; + case KVM_REG_PPC_VPA_DTL: + spin_lock(&vcpu->arch.vpa_update_lock); + val->vpaval.addr = vcpu->arch.dtl.next_gpa; + val->vpaval.length = vcpu->arch.dtl.len; + spin_unlock(&vcpu->arch.vpa_update_lock); + break; default: r = -EINVAL; break; @@ -612,6 +646,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { int r = 0; long int i; + unsigned long addr, len; switch (id) { case KVM_REG_PPC_HIOR: @@ -666,6 +701,33 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) } break; #endif /* CONFIG_VSX */ + case KVM_REG_PPC_VPA_ADDR: + addr = set_reg_val(id, *val); + r = -EINVAL; + if (!addr && (vcpu->arch.slb_shadow.next_gpa || + vcpu->arch.dtl.next_gpa)) + break; + r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca)); + break; + case KVM_REG_PPC_VPA_SLB: + addr = val->vpaval.addr; + len = val->vpaval.length; + r = -EINVAL; + if (addr && !vcpu->arch.vpa.next_gpa) + break; + r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len); + break; + case KVM_REG_PPC_VPA_DTL: + addr = val->vpaval.addr; + len = val->vpaval.length; + r = -EINVAL; + if (len < sizeof(struct dtl_entry)) + break; + if (addr && !vcpu->arch.vpa.next_gpa) + break; + len -= len % sizeof(struct dtl_entry); + r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); + break; default: r = -EINVAL; break; From 12ecd9570d8941c15602a11725ec9b0ede48d6c2 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 4 Aug 2012 23:52:33 +0000 Subject: [PATCH 0252/5831] arch/powerpc/kvm/e500_tlb.c: fix error return code Convert a 0 error return code to a negative one, as returned elsewhere in the function. A new label is also added to avoid freeing things that are known to not yet be allocated. A simplified version of the semantic match that finds the first problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier ret; expression e,e1,e2,e3,e4,x; @@ ( if (\(ret != 0\|ret < 0\) || ...) { ... return ...; } | ret = 0 ) ... when != ret = e1 *x = \(kmalloc\|kzalloc\|kcalloc\|devm_kzalloc\|ioremap\|ioremap_nocache\|devm_ioremap\|devm_ioremap_nocache\)(...); ... when != x = e2 when != ret = e3 *if (x == NULL || ...) { ... when != ret = e4 * return ret; } // Signed-off-by: Julia Lawall Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 641f97847b95..c73389477d17 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1233,21 +1233,27 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, } virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); - if (!virt) + if (!virt) { + ret = -ENOMEM; goto err_put_page; + } privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0], GFP_KERNEL); privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1], GFP_KERNEL); - if (!privs[0] || !privs[1]) - goto err_put_page; + if (!privs[0] || !privs[1]) { + ret = -ENOMEM; + goto err_privs; + } g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1], GFP_KERNEL); - if (!g2h_bitmap) - goto err_put_page; + if (!g2h_bitmap) { + ret = -ENOMEM; + goto err_privs; + } free_gtlb(vcpu_e500); @@ -1287,10 +1293,11 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, kvmppc_recalc_tlb1map_range(vcpu_e500); return 0; -err_put_page: +err_privs: kfree(privs[0]); kfree(privs[1]); +err_put_page: for (i = 0; i < num_pages; i++) put_page(pages[i]); From 385ddeac7ed99cf7dc62d76274d55fbd7cae1b5a Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Fri, 5 Oct 2012 15:05:34 -0700 Subject: [PATCH 0253/5831] X86 ACPI: Use #ifdef not #if for CONFIG_X86 check Fix a build warning on ia64: include/linux/acpi.h:437:5: warning: "CONFIG_X86" is not defined Signed-off-by: Tony Luck Link: http://lkml.kernel.org/r/506f59ae9600b36a4@agluck-desktop.sc.intel.com Cc: Len Brown Cc: Thomas Renninger Signed-off-by: H. Peter Anvin --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d14081c10c17..49fe586e3b1e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -434,7 +434,7 @@ void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control); -#if CONFIG_X86 +#ifdef CONFIG_X86 void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else static inline void arch_reserve_mem_area(acpi_physical_address addr, From b57a1e962eadb36b06f92e70ca45536754786b0f Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 5 Oct 2012 12:51:06 +0100 Subject: [PATCH 0254/5831] drm/i915: Remove the disabling of VHR unit clock gating for HSW There's is another register (a read only, so no harm done) at 0x42020 on Haswell GPUs. Let's just remove the write from the copy&paste that introduced haswell_init_clock_gating(). A note for the interested reader, it does seem we have a duplication of the 0x42020 register definition, hence the removal of 2 writes. That duplication could be the object of a later patch. Signed-off-by: Damien Lespiau Cc: Paulo Zanoni Reviewed-by: Paulo Zanoni Tested-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 62fe848d60f0..b5b772af6b81 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3460,9 +3460,6 @@ static void haswell_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int pipe; - uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE; - - I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate); I915_WRITE(WM3_LP_ILK, 0); I915_WRITE(WM2_LP_ILK, 0); @@ -3473,8 +3470,6 @@ static void haswell_init_clock_gating(struct drm_device *dev) */ I915_WRITE(GEN6_UCGCTL2, GEN6_RCZUNIT_CLOCK_GATE_DISABLE); - I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE); - /* WaDisableEarlyCull */ I915_WRITE(_3D_CHICKEN3, _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); From 1a4bd9eaf078a0f95b4c92a02fc6d16647212bb5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 7 Oct 2012 21:26:35 +0800 Subject: [PATCH 0255/5831] drm/i915: remove duplicated include from intel_modes.c Remove duplicated include. dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_modes.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_modes.c b/drivers/gpu/drm/i915/intel_modes.c index 4bc1c0fc342a..1773fb871c2c 100644 --- a/drivers/gpu/drm/i915/intel_modes.c +++ b/drivers/gpu/drm/i915/intel_modes.c @@ -28,7 +28,6 @@ #include #include #include "drmP.h" -#include "drm_edid.h" #include "intel_drv.h" #include "i915_drv.h" From 62cb944fa24728d164497834e4181ba0d266b32b Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Thu, 4 Oct 2012 18:49:23 +0100 Subject: [PATCH 0256/5831] drm/i915: Document that we are implementing WaDisableBackToBackFlipFix For the next person that checks these kind of things, without having to dig up the register definition. Signed-off-by: Damien Lespiau Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b5b772af6b81..0dd2ca707d0a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3536,6 +3536,7 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) I915_WRITE(_3D_CHICKEN3, _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); + /* WaDisableBackToBackFlipFix */ I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); @@ -3616,6 +3617,7 @@ static void valleyview_init_clock_gating(struct drm_device *dev) I915_WRITE(_3D_CHICKEN3, _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); + /* WaDisableBackToBackFlipFix */ I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); From a9627b881680c4419c61e70f60e8f957d8ef225c Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Thu, 4 Oct 2012 18:49:24 +0100 Subject: [PATCH 0257/5831] drm/i915: Remove the WaDisableBackToBackFlipFix w/a for Haswell This workaround is only valid for IVB and VLV and the write triggers an error on HSW. Signed-off-by: Damien Lespiau Cc: Paulo Zanoni Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0dd2ca707d0a..eb757e5f2d87 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3474,10 +3474,6 @@ static void haswell_init_clock_gating(struct drm_device *dev) I915_WRITE(_3D_CHICKEN3, _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); - I915_WRITE(IVB_CHICKEN3, - CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | - CHICKEN3_DGMG_DONE_FIX_DISABLE); - /* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */ I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); From cecc21fea98cd18419e26a46e4e8123f55fead91 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Oct 2012 17:02:56 +0100 Subject: [PATCH 0258/5831] drm/i915: Align the hangcheck wakeup to the nearest second round_jiffies() aligns the wakeup time to the nearest second in order to batch wakeups and reduce system load, which is useful for unimportant coarse timers like our hangcheck. v2: round_jiffies_relative() returns the relative jiffie value, whereas we need the absolute value for the timer. Suggested-by: Arjan van de Ven Signed-off-by: Chris Wilson Cc: Arjan van de Ven Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 3 +-- drivers/gpu/drm/i915/i915_irq.c | 5 ++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index af5ceb46e7bd..7f05736c6d8a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -452,6 +452,7 @@ typedef struct drm_i915_private { /* For hangcheck timer */ #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ +#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) struct timer_list hangcheck_timer; int hangcheck_count; uint32_t last_acthd[I915_NUM_RINGS]; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2f76905ab2b0..ca3ab04c5373 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2015,8 +2015,7 @@ i915_add_request(struct intel_ring_buffer *ring, if (!dev_priv->mm.suspended) { if (i915_enable_hangcheck) { mod_timer(&dev_priv->hangcheck_timer, - jiffies + - msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); + round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); } if (was_empty) { queue_delayed_work(dev_priv->wq, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 337c5cdf5598..1406d79251ad 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -353,8 +353,7 @@ static void notify_ring(struct drm_device *dev, if (i915_enable_hangcheck) { dev_priv->hangcheck_count = 0; mod_timer(&dev_priv->hangcheck_timer, - jiffies + - msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); + round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); } } @@ -1752,7 +1751,7 @@ void i915_hangcheck_elapsed(unsigned long data) repeat: /* Reset timer case chip hangs without another request being added */ mod_timer(&dev_priv->hangcheck_timer, - jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); + round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); } /* drm_dma.h hooks From bcb450861653167c4fec10be3d916299c34bb1d8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 5 Oct 2012 17:02:57 +0100 Subject: [PATCH 0259/5831] drm/i915: Align the retire_requests worker to the nearest second By using round_jiffies() we can align the wakeup of our worker to the nearest second in order to batch wakeups and reduce system load, which is useful for unimportant coarse tasks like our retire_requests. v2: round_jiffies_relative() already returns the relative timeout value, so no need to incorrectly perform the subtraction twice. The timer interface still leaves the possibility for the value of jiffies to change be we program the timer. Suggested-by: Arjan van de Ven Signed-off-by: Chris Wilson Cc: Arjan van de Ven Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ca3ab04c5373..eb3316bb4c3c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2019,7 +2019,8 @@ i915_add_request(struct intel_ring_buffer *ring, } if (was_empty) { queue_delayed_work(dev_priv->wq, - &dev_priv->mm.retire_work, HZ); + &dev_priv->mm.retire_work, + round_jiffies_up_relative(HZ)); intel_mark_busy(dev_priv->dev); } } @@ -2208,7 +2209,8 @@ i915_gem_retire_work_handler(struct work_struct *work) /* Come back later if the device is busy... */ if (!mutex_trylock(&dev->struct_mutex)) { - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, + round_jiffies_up_relative(HZ)); return; } @@ -2226,7 +2228,8 @@ i915_gem_retire_work_handler(struct work_struct *work) } if (!dev_priv->mm.suspended && !idle) - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, + round_jiffies_up_relative(HZ)); if (idle) intel_mark_idle(dev); From 8b6e4547e0e4b6aac11df6d8d4e71ea2ab159b5e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 20 Sep 2012 07:43:08 +0200 Subject: [PATCH 0260/5831] KVM: x86: Convert kvm_arch_vcpu_reset into private kvm_vcpu_reset There are no external callers of this function as there is no concept of resetting a vcpu from generic code. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 8 +++++--- include/linux/kvm_host.h | 1 - 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1eefebe5d727..dfed7ca2d27a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -160,6 +160,8 @@ u64 __read_mostly host_xcr0; int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); +static int kvm_vcpu_reset(struct kvm_vcpu *vcpu); + static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { int i; @@ -5426,7 +5428,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) pr_debug("vcpu %d received sipi with vector # %x\n", vcpu->vcpu_id, vcpu->arch.sipi_vector); kvm_lapic_reset(vcpu); - r = kvm_arch_vcpu_reset(vcpu); + r = kvm_vcpu_reset(vcpu); if (r) return r; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -6036,7 +6038,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) r = vcpu_load(vcpu); if (r) return r; - r = kvm_arch_vcpu_reset(vcpu); + r = kvm_vcpu_reset(vcpu); if (r == 0) r = kvm_mmu_setup(vcpu); vcpu_put(vcpu); @@ -6058,7 +6060,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_free(vcpu); } -int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) +static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending = 0; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 93bfc9f9815c..c35b1c08c004 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -582,7 +582,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id); int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu); int kvm_arch_hardware_enable(void *garbage); void kvm_arch_hardware_disable(void *garbage); int kvm_arch_hardware_setup(void); From b6785def8368f9a69505797fcbd9f280d91b673e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 20 Sep 2012 07:43:17 +0200 Subject: [PATCH 0261/5831] KVM: x86: Make emulator_fix_hypercall static No users outside of kvm/x86.c. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dfed7ca2d27a..841d09b123db 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -158,7 +158,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { u64 __read_mostly host_xcr0; -int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); +static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); static int kvm_vcpu_reset(struct kvm_vcpu *vcpu); @@ -5068,7 +5068,7 @@ out: } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); -int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) +static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); char instruction[3]; From 852e372532bed457eaeb3edd198b939caeb863c5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:46:41 +0100 Subject: [PATCH 0262/5831] UAPI: (Scripted) Disintegrate arch/cris/include/arch-v10/arch Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- arch/cris/include/arch-v10/arch/Kbuild | 4 ---- arch/cris/include/uapi/arch-v10/arch/Kbuild | 4 ++++ arch/cris/include/{ => uapi}/arch-v10/arch/sv_addr.agh | 0 arch/cris/include/{ => uapi}/arch-v10/arch/sv_addr_ag.h | 0 arch/cris/include/{ => uapi}/arch-v10/arch/svinto.h | 0 arch/cris/include/{ => uapi}/arch-v10/arch/user.h | 0 6 files changed, 4 insertions(+), 4 deletions(-) rename arch/cris/include/{ => uapi}/arch-v10/arch/sv_addr.agh (100%) rename arch/cris/include/{ => uapi}/arch-v10/arch/sv_addr_ag.h (100%) rename arch/cris/include/{ => uapi}/arch-v10/arch/svinto.h (100%) rename arch/cris/include/{ => uapi}/arch-v10/arch/user.h (100%) diff --git a/arch/cris/include/arch-v10/arch/Kbuild b/arch/cris/include/arch-v10/arch/Kbuild index 7a192e1290b1..e69de29bb2d1 100644 --- a/arch/cris/include/arch-v10/arch/Kbuild +++ b/arch/cris/include/arch-v10/arch/Kbuild @@ -1,4 +0,0 @@ -header-y += user.h -header-y += svinto.h -header-y += sv_addr_ag.h -header-y += sv_addr.agh diff --git a/arch/cris/include/uapi/arch-v10/arch/Kbuild b/arch/cris/include/uapi/arch-v10/arch/Kbuild index aafaa5aa54d4..9048c87a782b 100644 --- a/arch/cris/include/uapi/arch-v10/arch/Kbuild +++ b/arch/cris/include/uapi/arch-v10/arch/Kbuild @@ -1 +1,5 @@ # UAPI Header export list +header-y += sv_addr.agh +header-y += sv_addr_ag.h +header-y += svinto.h +header-y += user.h diff --git a/arch/cris/include/arch-v10/arch/sv_addr.agh b/arch/cris/include/uapi/arch-v10/arch/sv_addr.agh similarity index 100% rename from arch/cris/include/arch-v10/arch/sv_addr.agh rename to arch/cris/include/uapi/arch-v10/arch/sv_addr.agh diff --git a/arch/cris/include/arch-v10/arch/sv_addr_ag.h b/arch/cris/include/uapi/arch-v10/arch/sv_addr_ag.h similarity index 100% rename from arch/cris/include/arch-v10/arch/sv_addr_ag.h rename to arch/cris/include/uapi/arch-v10/arch/sv_addr_ag.h diff --git a/arch/cris/include/arch-v10/arch/svinto.h b/arch/cris/include/uapi/arch-v10/arch/svinto.h similarity index 100% rename from arch/cris/include/arch-v10/arch/svinto.h rename to arch/cris/include/uapi/arch-v10/arch/svinto.h diff --git a/arch/cris/include/arch-v10/arch/user.h b/arch/cris/include/uapi/arch-v10/arch/user.h similarity index 100% rename from arch/cris/include/arch-v10/arch/user.h rename to arch/cris/include/uapi/arch-v10/arch/user.h From 98d848ea4c017101f7e4a85917d0e0991f388cd8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:46:42 +0100 Subject: [PATCH 0263/5831] UAPI: (Scripted) Disintegrate arch/cris/include/arch-v32/arch Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- arch/cris/include/arch-v32/arch/Kbuild | 2 - arch/cris/include/arch-v32/arch/cryptocop.h | 116 +---------------- arch/cris/include/uapi/arch-v32/arch/Kbuild | 2 + .../include/uapi/arch-v32/arch/cryptocop.h | 122 ++++++++++++++++++ .../include/{ => uapi}/arch-v32/arch/user.h | 0 5 files changed, 125 insertions(+), 117 deletions(-) create mode 100644 arch/cris/include/uapi/arch-v32/arch/cryptocop.h rename arch/cris/include/{ => uapi}/arch-v32/arch/user.h (100%) diff --git a/arch/cris/include/arch-v32/arch/Kbuild b/arch/cris/include/arch-v32/arch/Kbuild index 35f2fc4f993e..e69de29bb2d1 100644 --- a/arch/cris/include/arch-v32/arch/Kbuild +++ b/arch/cris/include/arch-v32/arch/Kbuild @@ -1,2 +0,0 @@ -header-y += user.h -header-y += cryptocop.h diff --git a/arch/cris/include/arch-v32/arch/cryptocop.h b/arch/cris/include/arch-v32/arch/cryptocop.h index e1cd83dfabb5..716e434e9269 100644 --- a/arch/cris/include/arch-v32/arch/cryptocop.h +++ b/arch/cris/include/arch-v32/arch/cryptocop.h @@ -2,124 +2,12 @@ * The device /dev/cryptocop is accessible using this driver using * CRYPTOCOP_MAJOR (254) and minor number 0. */ - #ifndef CRYPTOCOP_H #define CRYPTOCOP_H -#include +#include -#define CRYPTOCOP_SESSION_ID_NONE (0) - -typedef unsigned long long int cryptocop_session_id; - -/* cryptocop ioctls */ -#define ETRAXCRYPTOCOP_IOCTYPE (250) - -#define CRYPTOCOP_IO_CREATE_SESSION _IOWR(ETRAXCRYPTOCOP_IOCTYPE, 1, struct strcop_session_op) -#define CRYPTOCOP_IO_CLOSE_SESSION _IOW(ETRAXCRYPTOCOP_IOCTYPE, 2, struct strcop_session_op) -#define CRYPTOCOP_IO_PROCESS_OP _IOWR(ETRAXCRYPTOCOP_IOCTYPE, 3, struct strcop_crypto_op) -#define CRYPTOCOP_IO_MAXNR (3) - -typedef enum { - cryptocop_cipher_des = 0, - cryptocop_cipher_3des = 1, - cryptocop_cipher_aes = 2, - cryptocop_cipher_m2m = 3, /* mem2mem is essentially a NULL cipher with blocklength=1 */ - cryptocop_cipher_none -} cryptocop_cipher_type; - -typedef enum { - cryptocop_digest_sha1 = 0, - cryptocop_digest_md5 = 1, - cryptocop_digest_none -} cryptocop_digest_type; - -typedef enum { - cryptocop_csum_le = 0, - cryptocop_csum_be = 1, - cryptocop_csum_none -} cryptocop_csum_type; - -typedef enum { - cryptocop_cipher_mode_ecb = 0, - cryptocop_cipher_mode_cbc, - cryptocop_cipher_mode_none -} cryptocop_cipher_mode; - -typedef enum { - cryptocop_3des_eee = 0, - cryptocop_3des_eed = 1, - cryptocop_3des_ede = 2, - cryptocop_3des_edd = 3, - cryptocop_3des_dee = 4, - cryptocop_3des_ded = 5, - cryptocop_3des_dde = 6, - cryptocop_3des_ddd = 7 -} cryptocop_3des_mode; - -/* Usermode accessible (ioctl) operations. */ -struct strcop_session_op{ - cryptocop_session_id ses_id; - - cryptocop_cipher_type cipher; /* AES, DES, 3DES, m2m, none */ - - cryptocop_cipher_mode cmode; /* ECB, CBC, none */ - cryptocop_3des_mode des3_mode; - - cryptocop_digest_type digest; /* MD5, SHA1, none */ - - cryptocop_csum_type csum; /* BE, LE, none */ - - unsigned char *key; - size_t keylen; -}; - -#define CRYPTOCOP_CSUM_LENGTH (2) -#define CRYPTOCOP_MAX_DIGEST_LENGTH (20) /* SHA-1 20, MD5 16 */ -#define CRYPTOCOP_MAX_IV_LENGTH (16) /* (3)DES==8, AES == 16 */ -#define CRYPTOCOP_MAX_KEY_LENGTH (32) - -struct strcop_crypto_op{ - cryptocop_session_id ses_id; - - /* Indata. */ - unsigned char *indata; - size_t inlen; /* Total indata length. */ - - /* Cipher configuration. */ - unsigned char do_cipher:1; - unsigned char decrypt:1; /* 1 == decrypt, 0 == encrypt */ - unsigned char cipher_explicit:1; - size_t cipher_start; - size_t cipher_len; - /* cipher_iv is used if do_cipher and cipher_explicit and the cipher - mode is CBC. The length is controlled by the type of cipher, - e.g. DES/3DES 8 octets and AES 16 octets. */ - unsigned char cipher_iv[CRYPTOCOP_MAX_IV_LENGTH]; - /* Outdata. */ - unsigned char *cipher_outdata; - size_t cipher_outlen; - - /* digest configuration. */ - unsigned char do_digest:1; - size_t digest_start; - size_t digest_len; - /* Outdata. The actual length is determined by the type of the digest. */ - unsigned char digest[CRYPTOCOP_MAX_DIGEST_LENGTH]; - - /* Checksum configuration. */ - unsigned char do_csum:1; - size_t csum_start; - size_t csum_len; - /* Outdata. */ - unsigned char csum[CRYPTOCOP_CSUM_LENGTH]; -}; - - - -#ifdef __KERNEL__ - /********** The API to use from inside the kernel. ************/ #include @@ -267,6 +155,4 @@ int cryptocop_job_queue_insert_crypto(struct cryptocop_operation *operation); int cryptocop_job_queue_insert_user_job(struct cryptocop_operation *operation); -#endif /* __KERNEL__ */ - #endif /* CRYPTOCOP_H */ diff --git a/arch/cris/include/uapi/arch-v32/arch/Kbuild b/arch/cris/include/uapi/arch-v32/arch/Kbuild index aafaa5aa54d4..59efffd16b61 100644 --- a/arch/cris/include/uapi/arch-v32/arch/Kbuild +++ b/arch/cris/include/uapi/arch-v32/arch/Kbuild @@ -1 +1,3 @@ # UAPI Header export list +header-y += cryptocop.h +header-y += user.h diff --git a/arch/cris/include/uapi/arch-v32/arch/cryptocop.h b/arch/cris/include/uapi/arch-v32/arch/cryptocop.h new file mode 100644 index 000000000000..694fd13ce1cf --- /dev/null +++ b/arch/cris/include/uapi/arch-v32/arch/cryptocop.h @@ -0,0 +1,122 @@ +/* + * The device /dev/cryptocop is accessible using this driver using + * CRYPTOCOP_MAJOR (254) and minor number 0. + */ + +#ifndef _UAPICRYPTOCOP_H +#define _UAPICRYPTOCOP_H + +#include + + +#define CRYPTOCOP_SESSION_ID_NONE (0) + +typedef unsigned long long int cryptocop_session_id; + +/* cryptocop ioctls */ +#define ETRAXCRYPTOCOP_IOCTYPE (250) + +#define CRYPTOCOP_IO_CREATE_SESSION _IOWR(ETRAXCRYPTOCOP_IOCTYPE, 1, struct strcop_session_op) +#define CRYPTOCOP_IO_CLOSE_SESSION _IOW(ETRAXCRYPTOCOP_IOCTYPE, 2, struct strcop_session_op) +#define CRYPTOCOP_IO_PROCESS_OP _IOWR(ETRAXCRYPTOCOP_IOCTYPE, 3, struct strcop_crypto_op) +#define CRYPTOCOP_IO_MAXNR (3) + +typedef enum { + cryptocop_cipher_des = 0, + cryptocop_cipher_3des = 1, + cryptocop_cipher_aes = 2, + cryptocop_cipher_m2m = 3, /* mem2mem is essentially a NULL cipher with blocklength=1 */ + cryptocop_cipher_none +} cryptocop_cipher_type; + +typedef enum { + cryptocop_digest_sha1 = 0, + cryptocop_digest_md5 = 1, + cryptocop_digest_none +} cryptocop_digest_type; + +typedef enum { + cryptocop_csum_le = 0, + cryptocop_csum_be = 1, + cryptocop_csum_none +} cryptocop_csum_type; + +typedef enum { + cryptocop_cipher_mode_ecb = 0, + cryptocop_cipher_mode_cbc, + cryptocop_cipher_mode_none +} cryptocop_cipher_mode; + +typedef enum { + cryptocop_3des_eee = 0, + cryptocop_3des_eed = 1, + cryptocop_3des_ede = 2, + cryptocop_3des_edd = 3, + cryptocop_3des_dee = 4, + cryptocop_3des_ded = 5, + cryptocop_3des_dde = 6, + cryptocop_3des_ddd = 7 +} cryptocop_3des_mode; + +/* Usermode accessible (ioctl) operations. */ +struct strcop_session_op{ + cryptocop_session_id ses_id; + + cryptocop_cipher_type cipher; /* AES, DES, 3DES, m2m, none */ + + cryptocop_cipher_mode cmode; /* ECB, CBC, none */ + cryptocop_3des_mode des3_mode; + + cryptocop_digest_type digest; /* MD5, SHA1, none */ + + cryptocop_csum_type csum; /* BE, LE, none */ + + unsigned char *key; + size_t keylen; +}; + +#define CRYPTOCOP_CSUM_LENGTH (2) +#define CRYPTOCOP_MAX_DIGEST_LENGTH (20) /* SHA-1 20, MD5 16 */ +#define CRYPTOCOP_MAX_IV_LENGTH (16) /* (3)DES==8, AES == 16 */ +#define CRYPTOCOP_MAX_KEY_LENGTH (32) + +struct strcop_crypto_op{ + cryptocop_session_id ses_id; + + /* Indata. */ + unsigned char *indata; + size_t inlen; /* Total indata length. */ + + /* Cipher configuration. */ + unsigned char do_cipher:1; + unsigned char decrypt:1; /* 1 == decrypt, 0 == encrypt */ + unsigned char cipher_explicit:1; + size_t cipher_start; + size_t cipher_len; + /* cipher_iv is used if do_cipher and cipher_explicit and the cipher + mode is CBC. The length is controlled by the type of cipher, + e.g. DES/3DES 8 octets and AES 16 octets. */ + unsigned char cipher_iv[CRYPTOCOP_MAX_IV_LENGTH]; + /* Outdata. */ + unsigned char *cipher_outdata; + size_t cipher_outlen; + + /* digest configuration. */ + unsigned char do_digest:1; + size_t digest_start; + size_t digest_len; + /* Outdata. The actual length is determined by the type of the digest. */ + unsigned char digest[CRYPTOCOP_MAX_DIGEST_LENGTH]; + + /* Checksum configuration. */ + unsigned char do_csum:1; + size_t csum_start; + size_t csum_len; + /* Outdata. */ + unsigned char csum[CRYPTOCOP_CSUM_LENGTH]; +}; + + + + +#endif /* _UAPICRYPTOCOP_H */ diff --git a/arch/cris/include/arch-v32/arch/user.h b/arch/cris/include/uapi/arch-v32/arch/user.h similarity index 100% rename from arch/cris/include/arch-v32/arch/user.h rename to arch/cris/include/uapi/arch-v32/arch/user.h From e717abac8a9f65eee6de3bb37e10c6916bced483 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:46:45 +0100 Subject: [PATCH 0264/5831] UAPI: (Scripted) Disintegrate arch/cris/include/asm Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- arch/cris/include/asm/Kbuild | 5 - arch/cris/include/asm/ptrace.h | 5 +- arch/cris/include/asm/signal.h | 122 +------ arch/cris/include/asm/swab.h | 3 +- arch/cris/include/asm/termios.h | 43 +-- arch/cris/include/asm/types.h | 5 +- arch/cris/include/asm/unistd.h | 342 +---------------- arch/cris/include/uapi/asm/Kbuild | 34 ++ arch/cris/include/{ => uapi}/asm/auxvec.h | 0 .../cris/include/{ => uapi}/asm/bitsperlong.h | 0 arch/cris/include/{ => uapi}/asm/byteorder.h | 0 arch/cris/include/{ => uapi}/asm/errno.h | 0 arch/cris/include/{ => uapi}/asm/ethernet.h | 0 arch/cris/include/{ => uapi}/asm/etraxgpio.h | 0 arch/cris/include/{ => uapi}/asm/fcntl.h | 0 arch/cris/include/{ => uapi}/asm/ioctl.h | 0 arch/cris/include/{ => uapi}/asm/ioctls.h | 0 arch/cris/include/{ => uapi}/asm/ipcbuf.h | 0 arch/cris/include/{ => uapi}/asm/mman.h | 0 arch/cris/include/{ => uapi}/asm/msgbuf.h | 0 arch/cris/include/{ => uapi}/asm/param.h | 0 arch/cris/include/{ => uapi}/asm/poll.h | 0 .../cris/include/{ => uapi}/asm/posix_types.h | 0 arch/cris/include/uapi/asm/ptrace.h | 1 + arch/cris/include/{ => uapi}/asm/resource.h | 0 arch/cris/include/{ => uapi}/asm/rs485.h | 0 arch/cris/include/{ => uapi}/asm/sembuf.h | 0 arch/cris/include/{ => uapi}/asm/setup.h | 0 arch/cris/include/{ => uapi}/asm/shmbuf.h | 0 arch/cris/include/{ => uapi}/asm/sigcontext.h | 0 arch/cris/include/{ => uapi}/asm/siginfo.h | 0 arch/cris/include/uapi/asm/signal.h | 122 +++++++ arch/cris/include/{ => uapi}/asm/socket.h | 0 arch/cris/include/{ => uapi}/asm/sockios.h | 0 arch/cris/include/{ => uapi}/asm/stat.h | 0 arch/cris/include/{ => uapi}/asm/statfs.h | 0 arch/cris/include/uapi/asm/swab.h | 0 .../cris/include/{ => uapi}/asm/sync_serial.h | 0 arch/cris/include/{ => uapi}/asm/termbits.h | 0 arch/cris/include/uapi/asm/termios.h | 45 +++ arch/cris/include/uapi/asm/types.h | 1 + arch/cris/include/uapi/asm/unistd.h | 344 ++++++++++++++++++ 42 files changed, 553 insertions(+), 519 deletions(-) rename arch/cris/include/{ => uapi}/asm/auxvec.h (100%) rename arch/cris/include/{ => uapi}/asm/bitsperlong.h (100%) rename arch/cris/include/{ => uapi}/asm/byteorder.h (100%) rename arch/cris/include/{ => uapi}/asm/errno.h (100%) rename arch/cris/include/{ => uapi}/asm/ethernet.h (100%) rename arch/cris/include/{ => uapi}/asm/etraxgpio.h (100%) rename arch/cris/include/{ => uapi}/asm/fcntl.h (100%) rename arch/cris/include/{ => uapi}/asm/ioctl.h (100%) rename arch/cris/include/{ => uapi}/asm/ioctls.h (100%) rename arch/cris/include/{ => uapi}/asm/ipcbuf.h (100%) rename arch/cris/include/{ => uapi}/asm/mman.h (100%) rename arch/cris/include/{ => uapi}/asm/msgbuf.h (100%) rename arch/cris/include/{ => uapi}/asm/param.h (100%) rename arch/cris/include/{ => uapi}/asm/poll.h (100%) rename arch/cris/include/{ => uapi}/asm/posix_types.h (100%) create mode 100644 arch/cris/include/uapi/asm/ptrace.h rename arch/cris/include/{ => uapi}/asm/resource.h (100%) rename arch/cris/include/{ => uapi}/asm/rs485.h (100%) rename arch/cris/include/{ => uapi}/asm/sembuf.h (100%) rename arch/cris/include/{ => uapi}/asm/setup.h (100%) rename arch/cris/include/{ => uapi}/asm/shmbuf.h (100%) rename arch/cris/include/{ => uapi}/asm/sigcontext.h (100%) rename arch/cris/include/{ => uapi}/asm/siginfo.h (100%) create mode 100644 arch/cris/include/uapi/asm/signal.h rename arch/cris/include/{ => uapi}/asm/socket.h (100%) rename arch/cris/include/{ => uapi}/asm/sockios.h (100%) rename arch/cris/include/{ => uapi}/asm/stat.h (100%) rename arch/cris/include/{ => uapi}/asm/statfs.h (100%) create mode 100644 arch/cris/include/uapi/asm/swab.h rename arch/cris/include/{ => uapi}/asm/sync_serial.h (100%) rename arch/cris/include/{ => uapi}/asm/termbits.h (100%) create mode 100644 arch/cris/include/uapi/asm/termios.h create mode 100644 arch/cris/include/uapi/asm/types.h create mode 100644 arch/cris/include/uapi/asm/unistd.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index a8eab26a1ec7..9fa0059df4c8 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -1,11 +1,6 @@ -include include/asm-generic/Kbuild.asm header-y += arch-v10/ header-y += arch-v32/ -header-y += ethernet.h -header-y += etraxgpio.h -header-y += rs485.h -header-y += sync_serial.h generic-y += clkdev.h diff --git a/arch/cris/include/asm/ptrace.h b/arch/cris/include/asm/ptrace.h index 6618893bfe8e..2de84d7061c7 100644 --- a/arch/cris/include/asm/ptrace.h +++ b/arch/cris/include/asm/ptrace.h @@ -1,9 +1,8 @@ #ifndef _CRIS_PTRACE_H #define _CRIS_PTRACE_H -#include +#include -#ifdef __KERNEL__ /* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ #define PTRACE_GETREGS 12 @@ -11,6 +10,4 @@ #define profile_pc(regs) instruction_pointer(regs) -#endif /* __KERNEL__ */ - #endif /* _CRIS_PTRACE_H */ diff --git a/arch/cris/include/asm/signal.h b/arch/cris/include/asm/signal.h index ea6af9aad76c..b3c5ef05011f 100644 --- a/arch/cris/include/asm/signal.h +++ b/arch/cris/include/asm/signal.h @@ -1,12 +1,8 @@ #ifndef _ASM_CRIS_SIGNAL_H #define _ASM_CRIS_SIGNAL_H -#include +#include -/* Avoid too many header ordering problems. */ -struct siginfo; - -#ifdef __KERNEL__ /* Most things should be clean enough to redefine this at will, if care is taken to make libc match. */ @@ -20,95 +16,6 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; -#else -/* Here we must cater to libcs that poke about in kernel headers. */ - -#define NSIG 32 -typedef unsigned long sigset_t; - -#endif /* __KERNEL__ */ - -#define SIGHUP 1 -#define SIGINT 2 -#define SIGQUIT 3 -#define SIGILL 4 -#define SIGTRAP 5 -#define SIGABRT 6 -#define SIGIOT 6 -#define SIGBUS 7 -#define SIGFPE 8 -#define SIGKILL 9 -#define SIGUSR1 10 -#define SIGSEGV 11 -#define SIGUSR2 12 -#define SIGPIPE 13 -#define SIGALRM 14 -#define SIGTERM 15 -#define SIGSTKFLT 16 -#define SIGCHLD 17 -#define SIGCONT 18 -#define SIGSTOP 19 -#define SIGTSTP 20 -#define SIGTTIN 21 -#define SIGTTOU 22 -#define SIGURG 23 -#define SIGXCPU 24 -#define SIGXFSZ 25 -#define SIGVTALRM 26 -#define SIGPROF 27 -#define SIGWINCH 28 -#define SIGIO 29 -#define SIGPOLL SIGIO -/* -#define SIGLOST 29 -*/ -#define SIGPWR 30 -#define SIGSYS 31 -#define SIGUNUSED 31 - -/* These should not be considered constants from userland. */ -#define SIGRTMIN 32 -#define SIGRTMAX _NSIG - -/* - * SA_FLAGS values: - * - * SA_ONSTACK indicates that a registered stack_t will be used. - * SA_RESTART flag to get restarting signals (which were the default long ago) - * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. - * SA_RESETHAND clears the handler when the signal is delivered. - * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. - * SA_NODEFER prevents the current signal from being masked in the handler. - * - * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single - * Unix names RESETHAND and NODEFER respectively. - */ - -#define SA_NOCLDSTOP 0x00000001u -#define SA_NOCLDWAIT 0x00000002u -#define SA_SIGINFO 0x00000004u -#define SA_ONSTACK 0x08000000u -#define SA_RESTART 0x10000000u -#define SA_NODEFER 0x40000000u -#define SA_RESETHAND 0x80000000u - -#define SA_NOMASK SA_NODEFER -#define SA_ONESHOT SA_RESETHAND - -#define SA_RESTORER 0x04000000 - -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - -#define MINSIGSTKSZ 2048 -#define SIGSTKSZ 8192 - -#include - -#ifdef __KERNEL__ struct old_sigaction { __sighandler_t sa_handler; old_sigset_t sa_mask; @@ -126,31 +33,6 @@ struct sigaction { struct k_sigaction { struct sigaction sa; }; -#else -/* Here we must cater to libcs that poke about in kernel headers. */ - -struct sigaction { - union { - __sighandler_t _sa_handler; - void (*_sa_sigaction)(int, struct siginfo *, void *); - } _u; - sigset_t sa_mask; - unsigned long sa_flags; - void (*sa_restorer)(void); -}; - -#define sa_handler _u._sa_handler -#define sa_sigaction _u._sa_sigaction - -#endif /* __KERNEL__ */ - -typedef struct sigaltstack { - void *ss_sp; - int ss_flags; - size_t ss_size; -} stack_t; - -#ifdef __KERNEL__ #include /* here we could define asm-optimized sigaddset, sigdelset etc. operations. @@ -158,6 +40,4 @@ typedef struct sigaltstack { */ #define ptrace_signal_deliver(regs, cookie) do { } while (0) -#endif /* __KERNEL__ */ - #endif diff --git a/arch/cris/include/asm/swab.h b/arch/cris/include/asm/swab.h index 80668e88419c..991b6ace1ba9 100644 --- a/arch/cris/include/asm/swab.h +++ b/arch/cris/include/asm/swab.h @@ -1,8 +1,7 @@ #ifndef _CRIS_SWAB_H #define _CRIS_SWAB_H -#ifdef __KERNEL__ #include -#endif /* __KERNEL__ */ +#include #endif /* _CRIS_SWAB_H */ diff --git a/arch/cris/include/asm/termios.h b/arch/cris/include/asm/termios.h index 1265109f4ce3..1991cd9e4083 100644 --- a/arch/cris/include/asm/termios.h +++ b/arch/cris/include/asm/termios.h @@ -1,47 +1,8 @@ #ifndef _CRIS_TERMIOS_H #define _CRIS_TERMIOS_H -#include -#include -#include -#include +#include -struct winsize { - unsigned short ws_row; - unsigned short ws_col; - unsigned short ws_xpixel; - unsigned short ws_ypixel; -}; - -#define NCC 8 -struct termio { - unsigned short c_iflag; /* input mode flags */ - unsigned short c_oflag; /* output mode flags */ - unsigned short c_cflag; /* control mode flags */ - unsigned short c_lflag; /* local mode flags */ - unsigned char c_line; /* line discipline */ - unsigned char c_cc[NCC]; /* control characters */ -}; - -/* modem lines */ -#define TIOCM_LE 0x001 -#define TIOCM_DTR 0x002 -#define TIOCM_RTS 0x004 -#define TIOCM_ST 0x008 -#define TIOCM_SR 0x010 -#define TIOCM_CTS 0x020 -#define TIOCM_CAR 0x040 -#define TIOCM_RNG 0x080 -#define TIOCM_DSR 0x100 -#define TIOCM_CD TIOCM_CAR -#define TIOCM_RI TIOCM_RNG -#define TIOCM_OUT1 0x2000 -#define TIOCM_OUT2 0x4000 -#define TIOCM_LOOP 0x8000 - -/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ - -#ifdef __KERNEL__ /* intr=^C quit=^\ erase=del kill=^U eof=^D vtime=\0 vmin=\1 sxtc=\0 @@ -87,6 +48,4 @@ struct termio { #define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) #define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) -#endif /* __KERNEL__ */ - #endif /* _CRIS_TERMIOS_H */ diff --git a/arch/cris/include/asm/types.h b/arch/cris/include/asm/types.h index adaf82780bb4..a3cac7757c7f 100644 --- a/arch/cris/include/asm/types.h +++ b/arch/cris/include/asm/types.h @@ -1,15 +1,12 @@ #ifndef _ETRAX_TYPES_H #define _ETRAX_TYPES_H -#include +#include /* * These aren't exported outside the kernel to avoid name space clashes */ -#ifdef __KERNEL__ #define BITS_PER_LONG 32 -#endif /* __KERNEL__ */ - #endif diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h index 51873a446f87..4fb59e107024 100644 --- a/arch/cris/include/asm/unistd.h +++ b/arch/cris/include/asm/unistd.h @@ -1,347 +1,8 @@ #ifndef _ASM_CRIS_UNISTD_H_ #define _ASM_CRIS_UNISTD_H_ -/* - * This file contains the system call numbers, and stub macros for libc. - */ +#include -#define __NR_restart_syscall 0 -#define __NR_exit 1 -#define __NR_fork 2 -#define __NR_read 3 -#define __NR_write 4 -#define __NR_open 5 -#define __NR_close 6 -#define __NR_waitpid 7 -#define __NR_creat 8 -#define __NR_link 9 -#define __NR_unlink 10 -#define __NR_execve 11 -#define __NR_chdir 12 -#define __NR_time 13 -#define __NR_mknod 14 -#define __NR_chmod 15 -#define __NR_lchown 16 -#define __NR_break 17 -#define __NR_oldstat 18 -#define __NR_lseek 19 -#define __NR_getpid 20 -#define __NR_mount 21 -#define __NR_umount 22 -#define __NR_setuid 23 -#define __NR_getuid 24 -#define __NR_stime 25 -#define __NR_ptrace 26 -#define __NR_alarm 27 -#define __NR_oldfstat 28 -#define __NR_pause 29 -#define __NR_utime 30 -#define __NR_stty 31 -#define __NR_gtty 32 -#define __NR_access 33 -#define __NR_nice 34 -#define __NR_ftime 35 -#define __NR_sync 36 -#define __NR_kill 37 -#define __NR_rename 38 -#define __NR_mkdir 39 -#define __NR_rmdir 40 -#define __NR_dup 41 -#define __NR_pipe 42 -#define __NR_times 43 -#define __NR_prof 44 -#define __NR_brk 45 -#define __NR_setgid 46 -#define __NR_getgid 47 -#define __NR_signal 48 -#define __NR_geteuid 49 -#define __NR_getegid 50 -#define __NR_acct 51 -#define __NR_umount2 52 -#define __NR_lock 53 -#define __NR_ioctl 54 -#define __NR_fcntl 55 -#define __NR_mpx 56 -#define __NR_setpgid 57 -#define __NR_ulimit 58 -#define __NR_oldolduname 59 -#define __NR_umask 60 -#define __NR_chroot 61 -#define __NR_ustat 62 -#define __NR_dup2 63 -#define __NR_getppid 64 -#define __NR_getpgrp 65 -#define __NR_setsid 66 -#define __NR_sigaction 67 -#define __NR_sgetmask 68 -#define __NR_ssetmask 69 -#define __NR_setreuid 70 -#define __NR_setregid 71 -#define __NR_sigsuspend 72 -#define __NR_sigpending 73 -#define __NR_sethostname 74 -#define __NR_setrlimit 75 -#define __NR_getrlimit 76 -#define __NR_getrusage 77 -#define __NR_gettimeofday 78 -#define __NR_settimeofday 79 -#define __NR_getgroups 80 -#define __NR_setgroups 81 -#define __NR_select 82 -#define __NR_symlink 83 -#define __NR_oldlstat 84 -#define __NR_readlink 85 -#define __NR_uselib 86 -#define __NR_swapon 87 -#define __NR_reboot 88 -#define __NR_readdir 89 -#define __NR_mmap 90 -#define __NR_munmap 91 -#define __NR_truncate 92 -#define __NR_ftruncate 93 -#define __NR_fchmod 94 -#define __NR_fchown 95 -#define __NR_getpriority 96 -#define __NR_setpriority 97 -#define __NR_profil 98 -#define __NR_statfs 99 -#define __NR_fstatfs 100 -#define __NR_ioperm 101 -#define __NR_socketcall 102 -#define __NR_syslog 103 -#define __NR_setitimer 104 -#define __NR_getitimer 105 -#define __NR_stat 106 -#define __NR_lstat 107 -#define __NR_fstat 108 -#define __NR_olduname 109 -#define __NR_iopl 110 -#define __NR_vhangup 111 -#define __NR_idle 112 -#define __NR_vm86 113 -#define __NR_wait4 114 -#define __NR_swapoff 115 -#define __NR_sysinfo 116 -#define __NR_ipc 117 -#define __NR_fsync 118 -#define __NR_sigreturn 119 -#define __NR_clone 120 -#define __NR_setdomainname 121 -#define __NR_uname 122 -#define __NR_modify_ldt 123 -#define __NR_adjtimex 124 -#define __NR_mprotect 125 -#define __NR_sigprocmask 126 -#define __NR_create_module 127 -#define __NR_init_module 128 -#define __NR_delete_module 129 -#define __NR_get_kernel_syms 130 -#define __NR_quotactl 131 -#define __NR_getpgid 132 -#define __NR_fchdir 133 -#define __NR_bdflush 134 -#define __NR_sysfs 135 -#define __NR_personality 136 -#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ -#define __NR_setfsuid 138 -#define __NR_setfsgid 139 -#define __NR__llseek 140 -#define __NR_getdents 141 -#define __NR__newselect 142 -#define __NR_flock 143 -#define __NR_msync 144 -#define __NR_readv 145 -#define __NR_writev 146 -#define __NR_getsid 147 -#define __NR_fdatasync 148 -#define __NR__sysctl 149 -#define __NR_mlock 150 -#define __NR_munlock 151 -#define __NR_mlockall 152 -#define __NR_munlockall 153 -#define __NR_sched_setparam 154 -#define __NR_sched_getparam 155 -#define __NR_sched_setscheduler 156 -#define __NR_sched_getscheduler 157 -#define __NR_sched_yield 158 -#define __NR_sched_get_priority_max 159 -#define __NR_sched_get_priority_min 160 -#define __NR_sched_rr_get_interval 161 -#define __NR_nanosleep 162 -#define __NR_mremap 163 -#define __NR_setresuid 164 -#define __NR_getresuid 165 - -#define __NR_query_module 167 -#define __NR_poll 168 -#define __NR_nfsservctl 169 -#define __NR_setresgid 170 -#define __NR_getresgid 171 -#define __NR_prctl 172 -#define __NR_rt_sigreturn 173 -#define __NR_rt_sigaction 174 -#define __NR_rt_sigprocmask 175 -#define __NR_rt_sigpending 176 -#define __NR_rt_sigtimedwait 177 -#define __NR_rt_sigqueueinfo 178 -#define __NR_rt_sigsuspend 179 -#define __NR_pread64 180 -#define __NR_pwrite64 181 -#define __NR_chown 182 -#define __NR_getcwd 183 -#define __NR_capget 184 -#define __NR_capset 185 -#define __NR_sigaltstack 186 -#define __NR_sendfile 187 -#define __NR_getpmsg 188 /* some people actually want streams */ -#define __NR_putpmsg 189 /* some people actually want streams */ -#define __NR_vfork 190 -#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ -#define __NR_mmap2 192 -#define __NR_truncate64 193 -#define __NR_ftruncate64 194 -#define __NR_stat64 195 -#define __NR_lstat64 196 -#define __NR_fstat64 197 -#define __NR_lchown32 198 -#define __NR_getuid32 199 -#define __NR_getgid32 200 -#define __NR_geteuid32 201 -#define __NR_getegid32 202 -#define __NR_setreuid32 203 -#define __NR_setregid32 204 -#define __NR_getgroups32 205 -#define __NR_setgroups32 206 -#define __NR_fchown32 207 -#define __NR_setresuid32 208 -#define __NR_getresuid32 209 -#define __NR_setresgid32 210 -#define __NR_getresgid32 211 -#define __NR_chown32 212 -#define __NR_setuid32 213 -#define __NR_setgid32 214 -#define __NR_setfsuid32 215 -#define __NR_setfsgid32 216 -#define __NR_pivot_root 217 -#define __NR_mincore 218 -#define __NR_madvise 219 -#define __NR_getdents64 220 -#define __NR_fcntl64 221 -/* 223 is unused */ -#define __NR_gettid 224 -#define __NR_readahead 225 -#define __NR_setxattr 226 -#define __NR_lsetxattr 227 -#define __NR_fsetxattr 228 -#define __NR_getxattr 229 -#define __NR_lgetxattr 230 -#define __NR_fgetxattr 231 -#define __NR_listxattr 232 -#define __NR_llistxattr 233 -#define __NR_flistxattr 234 -#define __NR_removexattr 235 -#define __NR_lremovexattr 236 -#define __NR_fremovexattr 237 -#define __NR_tkill 238 -#define __NR_sendfile64 239 -#define __NR_futex 240 -#define __NR_sched_setaffinity 241 -#define __NR_sched_getaffinity 242 -#define __NR_set_thread_area 243 -#define __NR_get_thread_area 244 -#define __NR_io_setup 245 -#define __NR_io_destroy 246 -#define __NR_io_getevents 247 -#define __NR_io_submit 248 -#define __NR_io_cancel 249 -#define __NR_fadvise64 250 -/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */ -#define __NR_exit_group 252 -#define __NR_lookup_dcookie 253 -#define __NR_epoll_create 254 -#define __NR_epoll_ctl 255 -#define __NR_epoll_wait 256 -#define __NR_remap_file_pages 257 -#define __NR_set_tid_address 258 -#define __NR_timer_create 259 -#define __NR_timer_settime (__NR_timer_create+1) -#define __NR_timer_gettime (__NR_timer_create+2) -#define __NR_timer_getoverrun (__NR_timer_create+3) -#define __NR_timer_delete (__NR_timer_create+4) -#define __NR_clock_settime (__NR_timer_create+5) -#define __NR_clock_gettime (__NR_timer_create+6) -#define __NR_clock_getres (__NR_timer_create+7) -#define __NR_clock_nanosleep (__NR_timer_create+8) -#define __NR_statfs64 268 -#define __NR_fstatfs64 269 -#define __NR_tgkill 270 -#define __NR_utimes 271 -#define __NR_fadvise64_64 272 -#define __NR_vserver 273 -#define __NR_mbind 274 -#define __NR_get_mempolicy 275 -#define __NR_set_mempolicy 276 -#define __NR_mq_open 277 -#define __NR_mq_unlink (__NR_mq_open+1) -#define __NR_mq_timedsend (__NR_mq_open+2) -#define __NR_mq_timedreceive (__NR_mq_open+3) -#define __NR_mq_notify (__NR_mq_open+4) -#define __NR_mq_getsetattr (__NR_mq_open+5) -#define __NR_kexec_load 283 -#define __NR_waitid 284 -/* #define __NR_sys_setaltroot 285 */ -#define __NR_add_key 286 -#define __NR_request_key 287 -#define __NR_keyctl 288 -#define __NR_ioprio_set 289 -#define __NR_ioprio_get 290 -#define __NR_inotify_init 291 -#define __NR_inotify_add_watch 292 -#define __NR_inotify_rm_watch 293 -#define __NR_migrate_pages 294 -#define __NR_openat 295 -#define __NR_mkdirat 296 -#define __NR_mknodat 297 -#define __NR_fchownat 298 -#define __NR_futimesat 299 -#define __NR_fstatat64 300 -#define __NR_unlinkat 301 -#define __NR_renameat 302 -#define __NR_linkat 303 -#define __NR_symlinkat 304 -#define __NR_readlinkat 305 -#define __NR_fchmodat 306 -#define __NR_faccessat 307 -#define __NR_pselect6 308 -#define __NR_ppoll 309 -#define __NR_unshare 310 -#define __NR_set_robust_list 311 -#define __NR_get_robust_list 312 -#define __NR_splice 313 -#define __NR_sync_file_range 314 -#define __NR_tee 315 -#define __NR_vmsplice 316 -#define __NR_move_pages 317 -#define __NR_getcpu 318 -#define __NR_epoll_pwait 319 -#define __NR_utimensat 320 -#define __NR_signalfd 321 -#define __NR_timerfd_create 322 -#define __NR_eventfd 323 -#define __NR_fallocate 324 -#define __NR_timerfd_settime 325 -#define __NR_timerfd_gettime 326 -#define __NR_signalfd4 327 -#define __NR_eventfd2 328 -#define __NR_epoll_create1 329 -#define __NR_dup3 330 -#define __NR_pipe2 331 -#define __NR_inotify_init1 332 -#define __NR_preadv 333 -#define __NR_pwritev 334 -#define __NR_setns 335 - -#ifdef __KERNEL__ #define NR_syscalls 336 @@ -380,5 +41,4 @@ */ #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") -#endif /* __KERNEL__ */ #endif /* _ASM_CRIS_UNISTD_H_ */ diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild index f50236ae9ca3..7d47b366ad82 100644 --- a/arch/cris/include/uapi/asm/Kbuild +++ b/arch/cris/include/uapi/asm/Kbuild @@ -3,3 +3,37 @@ include include/uapi/asm-generic/Kbuild.asm header-y += arch-v10/ header-y += arch-v32/ +header-y += auxvec.h +header-y += bitsperlong.h +header-y += byteorder.h +header-y += errno.h +header-y += ethernet.h +header-y += etraxgpio.h +header-y += fcntl.h +header-y += ioctl.h +header-y += ioctls.h +header-y += ipcbuf.h +header-y += mman.h +header-y += msgbuf.h +header-y += param.h +header-y += poll.h +header-y += posix_types.h +header-y += ptrace.h +header-y += resource.h +header-y += rs485.h +header-y += sembuf.h +header-y += setup.h +header-y += shmbuf.h +header-y += sigcontext.h +header-y += siginfo.h +header-y += signal.h +header-y += socket.h +header-y += sockios.h +header-y += stat.h +header-y += statfs.h +header-y += swab.h +header-y += sync_serial.h +header-y += termbits.h +header-y += termios.h +header-y += types.h +header-y += unistd.h diff --git a/arch/cris/include/asm/auxvec.h b/arch/cris/include/uapi/asm/auxvec.h similarity index 100% rename from arch/cris/include/asm/auxvec.h rename to arch/cris/include/uapi/asm/auxvec.h diff --git a/arch/cris/include/asm/bitsperlong.h b/arch/cris/include/uapi/asm/bitsperlong.h similarity index 100% rename from arch/cris/include/asm/bitsperlong.h rename to arch/cris/include/uapi/asm/bitsperlong.h diff --git a/arch/cris/include/asm/byteorder.h b/arch/cris/include/uapi/asm/byteorder.h similarity index 100% rename from arch/cris/include/asm/byteorder.h rename to arch/cris/include/uapi/asm/byteorder.h diff --git a/arch/cris/include/asm/errno.h b/arch/cris/include/uapi/asm/errno.h similarity index 100% rename from arch/cris/include/asm/errno.h rename to arch/cris/include/uapi/asm/errno.h diff --git a/arch/cris/include/asm/ethernet.h b/arch/cris/include/uapi/asm/ethernet.h similarity index 100% rename from arch/cris/include/asm/ethernet.h rename to arch/cris/include/uapi/asm/ethernet.h diff --git a/arch/cris/include/asm/etraxgpio.h b/arch/cris/include/uapi/asm/etraxgpio.h similarity index 100% rename from arch/cris/include/asm/etraxgpio.h rename to arch/cris/include/uapi/asm/etraxgpio.h diff --git a/arch/cris/include/asm/fcntl.h b/arch/cris/include/uapi/asm/fcntl.h similarity index 100% rename from arch/cris/include/asm/fcntl.h rename to arch/cris/include/uapi/asm/fcntl.h diff --git a/arch/cris/include/asm/ioctl.h b/arch/cris/include/uapi/asm/ioctl.h similarity index 100% rename from arch/cris/include/asm/ioctl.h rename to arch/cris/include/uapi/asm/ioctl.h diff --git a/arch/cris/include/asm/ioctls.h b/arch/cris/include/uapi/asm/ioctls.h similarity index 100% rename from arch/cris/include/asm/ioctls.h rename to arch/cris/include/uapi/asm/ioctls.h diff --git a/arch/cris/include/asm/ipcbuf.h b/arch/cris/include/uapi/asm/ipcbuf.h similarity index 100% rename from arch/cris/include/asm/ipcbuf.h rename to arch/cris/include/uapi/asm/ipcbuf.h diff --git a/arch/cris/include/asm/mman.h b/arch/cris/include/uapi/asm/mman.h similarity index 100% rename from arch/cris/include/asm/mman.h rename to arch/cris/include/uapi/asm/mman.h diff --git a/arch/cris/include/asm/msgbuf.h b/arch/cris/include/uapi/asm/msgbuf.h similarity index 100% rename from arch/cris/include/asm/msgbuf.h rename to arch/cris/include/uapi/asm/msgbuf.h diff --git a/arch/cris/include/asm/param.h b/arch/cris/include/uapi/asm/param.h similarity index 100% rename from arch/cris/include/asm/param.h rename to arch/cris/include/uapi/asm/param.h diff --git a/arch/cris/include/asm/poll.h b/arch/cris/include/uapi/asm/poll.h similarity index 100% rename from arch/cris/include/asm/poll.h rename to arch/cris/include/uapi/asm/poll.h diff --git a/arch/cris/include/asm/posix_types.h b/arch/cris/include/uapi/asm/posix_types.h similarity index 100% rename from arch/cris/include/asm/posix_types.h rename to arch/cris/include/uapi/asm/posix_types.h diff --git a/arch/cris/include/uapi/asm/ptrace.h b/arch/cris/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..c689c9bbbe50 --- /dev/null +++ b/arch/cris/include/uapi/asm/ptrace.h @@ -0,0 +1 @@ +#include diff --git a/arch/cris/include/asm/resource.h b/arch/cris/include/uapi/asm/resource.h similarity index 100% rename from arch/cris/include/asm/resource.h rename to arch/cris/include/uapi/asm/resource.h diff --git a/arch/cris/include/asm/rs485.h b/arch/cris/include/uapi/asm/rs485.h similarity index 100% rename from arch/cris/include/asm/rs485.h rename to arch/cris/include/uapi/asm/rs485.h diff --git a/arch/cris/include/asm/sembuf.h b/arch/cris/include/uapi/asm/sembuf.h similarity index 100% rename from arch/cris/include/asm/sembuf.h rename to arch/cris/include/uapi/asm/sembuf.h diff --git a/arch/cris/include/asm/setup.h b/arch/cris/include/uapi/asm/setup.h similarity index 100% rename from arch/cris/include/asm/setup.h rename to arch/cris/include/uapi/asm/setup.h diff --git a/arch/cris/include/asm/shmbuf.h b/arch/cris/include/uapi/asm/shmbuf.h similarity index 100% rename from arch/cris/include/asm/shmbuf.h rename to arch/cris/include/uapi/asm/shmbuf.h diff --git a/arch/cris/include/asm/sigcontext.h b/arch/cris/include/uapi/asm/sigcontext.h similarity index 100% rename from arch/cris/include/asm/sigcontext.h rename to arch/cris/include/uapi/asm/sigcontext.h diff --git a/arch/cris/include/asm/siginfo.h b/arch/cris/include/uapi/asm/siginfo.h similarity index 100% rename from arch/cris/include/asm/siginfo.h rename to arch/cris/include/uapi/asm/siginfo.h diff --git a/arch/cris/include/uapi/asm/signal.h b/arch/cris/include/uapi/asm/signal.h new file mode 100644 index 000000000000..21624948a96d --- /dev/null +++ b/arch/cris/include/uapi/asm/signal.h @@ -0,0 +1,122 @@ +#ifndef _UAPI_ASM_CRIS_SIGNAL_H +#define _UAPI_ASM_CRIS_SIGNAL_H + +#include + +/* Avoid too many header ordering problems. */ +struct siginfo; + +#ifndef __KERNEL__ +/* Here we must cater to libcs that poke about in kernel headers. */ + +#define NSIG 32 +typedef unsigned long sigset_t; + +#endif /* __KERNEL__ */ + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX _NSIG + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ + +#define SA_NOCLDSTOP 0x00000001u +#define SA_NOCLDWAIT 0x00000002u +#define SA_SIGINFO 0x00000004u +#define SA_ONSTACK 0x08000000u +#define SA_RESTART 0x10000000u +#define SA_NODEFER 0x40000000u +#define SA_RESETHAND 0x80000000u + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +#define SA_RESTORER 0x04000000 + +/* + * sigaltstack controls + */ +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#include + +#ifndef __KERNEL__ +/* Here we must cater to libcs that poke about in kernel headers. */ + +struct sigaction { + union { + __sighandler_t _sa_handler; + void (*_sa_sigaction)(int, struct siginfo *, void *); + } _u; + sigset_t sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +}; + +#define sa_handler _u._sa_handler +#define sa_sigaction _u._sa_sigaction + +#endif /* __KERNEL__ */ + +typedef struct sigaltstack { + void *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + + +#endif /* _UAPI_ASM_CRIS_SIGNAL_H */ diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/uapi/asm/socket.h similarity index 100% rename from arch/cris/include/asm/socket.h rename to arch/cris/include/uapi/asm/socket.h diff --git a/arch/cris/include/asm/sockios.h b/arch/cris/include/uapi/asm/sockios.h similarity index 100% rename from arch/cris/include/asm/sockios.h rename to arch/cris/include/uapi/asm/sockios.h diff --git a/arch/cris/include/asm/stat.h b/arch/cris/include/uapi/asm/stat.h similarity index 100% rename from arch/cris/include/asm/stat.h rename to arch/cris/include/uapi/asm/stat.h diff --git a/arch/cris/include/asm/statfs.h b/arch/cris/include/uapi/asm/statfs.h similarity index 100% rename from arch/cris/include/asm/statfs.h rename to arch/cris/include/uapi/asm/statfs.h diff --git a/arch/cris/include/uapi/asm/swab.h b/arch/cris/include/uapi/asm/swab.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/arch/cris/include/asm/sync_serial.h b/arch/cris/include/uapi/asm/sync_serial.h similarity index 100% rename from arch/cris/include/asm/sync_serial.h rename to arch/cris/include/uapi/asm/sync_serial.h diff --git a/arch/cris/include/asm/termbits.h b/arch/cris/include/uapi/asm/termbits.h similarity index 100% rename from arch/cris/include/asm/termbits.h rename to arch/cris/include/uapi/asm/termbits.h diff --git a/arch/cris/include/uapi/asm/termios.h b/arch/cris/include/uapi/asm/termios.h new file mode 100644 index 000000000000..0a0386a55027 --- /dev/null +++ b/arch/cris/include/uapi/asm/termios.h @@ -0,0 +1,45 @@ +#ifndef _UAPI_CRIS_TERMIOS_H +#define _UAPI_CRIS_TERMIOS_H + +#include +#include +#include +#include + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + + +#endif /* _UAPI_CRIS_TERMIOS_H */ diff --git a/arch/cris/include/uapi/asm/types.h b/arch/cris/include/uapi/asm/types.h new file mode 100644 index 000000000000..9ec9d4c5ac4d --- /dev/null +++ b/arch/cris/include/uapi/asm/types.h @@ -0,0 +1 @@ +#include diff --git a/arch/cris/include/uapi/asm/unistd.h b/arch/cris/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..48842896f6c2 --- /dev/null +++ b/arch/cris/include/uapi/asm/unistd.h @@ -0,0 +1,344 @@ +#ifndef _UAPI_ASM_CRIS_UNISTD_H_ +#define _UAPI_ASM_CRIS_UNISTD_H_ + +/* + * This file contains the system call numbers, and stub macros for libc. + */ + +#define __NR_restart_syscall 0 +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_waitpid 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_time 13 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lchown 16 +#define __NR_break 17 +#define __NR_oldstat 18 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_oldfstat 28 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_stty 31 +#define __NR_gtty 32 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_ftime 35 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_prof 44 +#define __NR_brk 45 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_signal 48 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_lock 53 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_mpx 56 +#define __NR_setpgid 57 +#define __NR_ulimit 58 +#define __NR_oldolduname 59 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sgetmask 68 +#define __NR_ssetmask 69 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrlimit 76 +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_select 82 +#define __NR_symlink 83 +#define __NR_oldlstat 84 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_fchown 95 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_profil 98 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_ioperm 101 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_olduname 109 +#define __NR_iopl 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_vm86 113 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_modify_ldt 123 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR_getdents 141 +#define __NR__newselect 142 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_setresuid 164 +#define __NR_getresuid 165 + +#define __NR_query_module 167 +#define __NR_poll 168 +#define __NR_nfsservctl 169 +#define __NR_setresgid 170 +#define __NR_getresgid 171 +#define __NR_prctl 172 +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigtimedwait 177 +#define __NR_rt_sigqueueinfo 178 +#define __NR_rt_sigsuspend 179 +#define __NR_pread64 180 +#define __NR_pwrite64 181 +#define __NR_chown 182 +#define __NR_getcwd 183 +#define __NR_capget 184 +#define __NR_capset 185 +#define __NR_sigaltstack 186 +#define __NR_sendfile 187 +#define __NR_getpmsg 188 /* some people actually want streams */ +#define __NR_putpmsg 189 /* some people actually want streams */ +#define __NR_vfork 190 +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_lchown32 198 +#define __NR_getuid32 199 +#define __NR_getgid32 200 +#define __NR_geteuid32 201 +#define __NR_getegid32 202 +#define __NR_setreuid32 203 +#define __NR_setregid32 204 +#define __NR_getgroups32 205 +#define __NR_setgroups32 206 +#define __NR_fchown32 207 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#define __NR_chown32 212 +#define __NR_setuid32 213 +#define __NR_setgid32 214 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#define __NR_pivot_root 217 +#define __NR_mincore 218 +#define __NR_madvise 219 +#define __NR_getdents64 220 +#define __NR_fcntl64 221 +/* 223 is unused */ +#define __NR_gettid 224 +#define __NR_readahead 225 +#define __NR_setxattr 226 +#define __NR_lsetxattr 227 +#define __NR_fsetxattr 228 +#define __NR_getxattr 229 +#define __NR_lgetxattr 230 +#define __NR_fgetxattr 231 +#define __NR_listxattr 232 +#define __NR_llistxattr 233 +#define __NR_flistxattr 234 +#define __NR_removexattr 235 +#define __NR_lremovexattr 236 +#define __NR_fremovexattr 237 +#define __NR_tkill 238 +#define __NR_sendfile64 239 +#define __NR_futex 240 +#define __NR_sched_setaffinity 241 +#define __NR_sched_getaffinity 242 +#define __NR_set_thread_area 243 +#define __NR_get_thread_area 244 +#define __NR_io_setup 245 +#define __NR_io_destroy 246 +#define __NR_io_getevents 247 +#define __NR_io_submit 248 +#define __NR_io_cancel 249 +#define __NR_fadvise64 250 +/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */ +#define __NR_exit_group 252 +#define __NR_lookup_dcookie 253 +#define __NR_epoll_create 254 +#define __NR_epoll_ctl 255 +#define __NR_epoll_wait 256 +#define __NR_remap_file_pages 257 +#define __NR_set_tid_address 258 +#define __NR_timer_create 259 +#define __NR_timer_settime (__NR_timer_create+1) +#define __NR_timer_gettime (__NR_timer_create+2) +#define __NR_timer_getoverrun (__NR_timer_create+3) +#define __NR_timer_delete (__NR_timer_create+4) +#define __NR_clock_settime (__NR_timer_create+5) +#define __NR_clock_gettime (__NR_timer_create+6) +#define __NR_clock_getres (__NR_timer_create+7) +#define __NR_clock_nanosleep (__NR_timer_create+8) +#define __NR_statfs64 268 +#define __NR_fstatfs64 269 +#define __NR_tgkill 270 +#define __NR_utimes 271 +#define __NR_fadvise64_64 272 +#define __NR_vserver 273 +#define __NR_mbind 274 +#define __NR_get_mempolicy 275 +#define __NR_set_mempolicy 276 +#define __NR_mq_open 277 +#define __NR_mq_unlink (__NR_mq_open+1) +#define __NR_mq_timedsend (__NR_mq_open+2) +#define __NR_mq_timedreceive (__NR_mq_open+3) +#define __NR_mq_notify (__NR_mq_open+4) +#define __NR_mq_getsetattr (__NR_mq_open+5) +#define __NR_kexec_load 283 +#define __NR_waitid 284 +/* #define __NR_sys_setaltroot 285 */ +#define __NR_add_key 286 +#define __NR_request_key 287 +#define __NR_keyctl 288 +#define __NR_ioprio_set 289 +#define __NR_ioprio_get 290 +#define __NR_inotify_init 291 +#define __NR_inotify_add_watch 292 +#define __NR_inotify_rm_watch 293 +#define __NR_migrate_pages 294 +#define __NR_openat 295 +#define __NR_mkdirat 296 +#define __NR_mknodat 297 +#define __NR_fchownat 298 +#define __NR_futimesat 299 +#define __NR_fstatat64 300 +#define __NR_unlinkat 301 +#define __NR_renameat 302 +#define __NR_linkat 303 +#define __NR_symlinkat 304 +#define __NR_readlinkat 305 +#define __NR_fchmodat 306 +#define __NR_faccessat 307 +#define __NR_pselect6 308 +#define __NR_ppoll 309 +#define __NR_unshare 310 +#define __NR_set_robust_list 311 +#define __NR_get_robust_list 312 +#define __NR_splice 313 +#define __NR_sync_file_range 314 +#define __NR_tee 315 +#define __NR_vmsplice 316 +#define __NR_move_pages 317 +#define __NR_getcpu 318 +#define __NR_epoll_pwait 319 +#define __NR_utimensat 320 +#define __NR_signalfd 321 +#define __NR_timerfd_create 322 +#define __NR_eventfd 323 +#define __NR_fallocate 324 +#define __NR_timerfd_settime 325 +#define __NR_timerfd_gettime 326 +#define __NR_signalfd4 327 +#define __NR_eventfd2 328 +#define __NR_epoll_create1 329 +#define __NR_dup3 330 +#define __NR_pipe2 331 +#define __NR_inotify_init1 332 +#define __NR_preadv 333 +#define __NR_pwritev 334 +#define __NR_setns 335 + +#endif /* _UAPI_ASM_CRIS_UNISTD_H_ */ From 8618e30bc14b06bfafa0f164cca7b0e06451f88a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 8 Oct 2012 20:37:30 -0700 Subject: [PATCH 0265/5831] rbd: let con_work() handle backoff Both ceph_fault() and con_work() include handling for imposing a delay before doing further processing on a faulted connection. The latter is used only if ceph_fault() is unable to. Instead, just let con_work() always be responsible for implementing the delay. After setting up the delay value, set the BACKOFF flag on the connection unconditionally and call queue_con() to ensure con_work() will get called to handle it. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/messenger.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index cad0d17ec45e..973c16c20c42 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2398,24 +2398,8 @@ static void ceph_fault(struct ceph_connection *con) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) con->delay *= 2; - con->ops->get(con); - if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay))) { - dout("fault queued %p delay %lu\n", con, con->delay); - } else { - con->ops->put(con); - dout("fault failed to queue %p delay %lu, backoff\n", - con, con->delay); - /* - * In many cases we see a socket state change - * while con_work is running and end up - * queuing (non-delayed) work, such that we - * can't backoff with a delay. Set a flag so - * that when con_work restarts we schedule the - * delay then. - */ - set_bit(CON_FLAG_BACKOFF, &con->flags); - } + set_bit(CON_FLAG_BACKOFF, &con->flags); + queue_con(con); } out_unlock: From 802c6d967fbdcd2cbc91b917425661bb8bbfaade Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 8 Oct 2012 20:37:30 -0700 Subject: [PATCH 0266/5831] rbd: define common queue_con_delay() This patch defines a single function, queue_con_delay() to call queue_delayed_work() for a connection. It basically generalizes what was previously queue_con() by adding the delay argument. queue_con() is now a simple helper that passes 0 for its delay. queue_con_delay() returns 0 if it queued work or an errno if it did not for some reason. If con_work() finds the BACKOFF flag set for a connection, it now calls queue_con_delay() to handle arranging to start again after a delay. Note about connection reference counts: con_work() only ever gets called as a work item function. At the time that work is scheduled, a reference to the connection is acquired, and the corresponding con_work() call is then responsible for dropping that reference before it returns. Previously, the backoff handling inside con_work() silently handed off its reference to delayed work it scheduled. Now that queue_con_delay() is used, a new reference is acquired for the newly-scheduled work, and the original reference is dropped by the con->ops->put() call at the end of the function. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/messenger.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 973c16c20c42..66f6f56bcb23 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2244,22 +2244,33 @@ bad_tag: /* - * Atomically queue work on a connection. Bump @con reference to - * avoid races with connection teardown. + * Atomically queue work on a connection after the specified delay. + * Bump @con reference to avoid races with connection teardown. + * Returns 0 if work was queued, or an error code otherwise. */ -static void queue_con(struct ceph_connection *con) +static int queue_con_delay(struct ceph_connection *con, unsigned long delay) { if (!con->ops->get(con)) { - dout("queue_con %p ref count 0\n", con); - return; + dout("%s %p ref count 0\n", __func__, con); + + return -ENOENT; } - if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) { - dout("queue_con %p - already queued\n", con); + if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { + dout("%s %p - already queued\n", __func__, con); con->ops->put(con); - } else { - dout("queue_con %p\n", con); + + return -EBUSY; } + + dout("%s %p %lu\n", __func__, con, delay); + + return 0; +} + +static void queue_con(struct ceph_connection *con) +{ + (void) queue_con_delay(con, 0); } /* @@ -2294,14 +2305,11 @@ restart: if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) { dout("con_work %p backing off\n", con); - if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay))) { - dout("con_work %p backoff %lu\n", con, con->delay); - mutex_unlock(&con->mutex); - return; - } else { + ret = queue_con_delay(con, round_jiffies_relative(con->delay)); + if (ret) { dout("con_work %p FAILED to back off %lu\n", con, con->delay); + BUG_ON(ret == -ENOENT); set_bit(CON_FLAG_BACKOFF, &con->flags); } goto done; From 79f689aa6b14e058d4effd734e9920ed9531401d Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 5 Oct 2012 12:05:52 -0300 Subject: [PATCH 0267/5831] drm/i915: rewrite the LCPLL code Right now, we're trying to enable LCPLL at every mode set, but we're never disabling it. Also, we really don't want to be disabling LCPLL since it requires a very complex disable/enable sequence. This register should really be set by the BIOS and we shouldn't be touching it. Still, let's try to check its value and print some errors in case we find something wrong. We're also adding intel_ddi_get_cdclk_freq which will be used later in other places. Signed-off-by: Paulo Zanoni Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 6 +++++ drivers/gpu/drm/i915/intel_ddi.c | 37 +++++++++++++++++++++++----- drivers/gpu/drm/i915/intel_display.c | 7 ++++++ drivers/gpu/drm/i915/intel_drv.h | 1 + 4 files changed, 45 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f3a06b421be4..5107ceece3b0 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3456,6 +3456,9 @@ #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG 0x9030 #define GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB (1<<11) +#define HSW_FUSE_STRAP 0x42014 +#define HSW_CDCLK_LIMIT (1 << 24) + /* PCH */ /* south display engine interrupt: IBX */ @@ -4531,8 +4534,11 @@ #define LCPLL_CTL 0x130040 #define LCPLL_PLL_DISABLE (1<<31) #define LCPLL_PLL_LOCK (1<<30) +#define LCPLL_CLK_FREQ_MASK (3<<26) +#define LCPLL_CLK_FREQ_450 (0<<26) #define LCPLL_CD_CLOCK_DISABLE (1<<25) #define LCPLL_CD2X_CLOCK_DISABLE (1<<23) +#define LCPLL_CD_SOURCE_FCLK (1<<21) /* Pipe WM_LINETIME - watermark line time */ #define PIPE_WM_LINETIME_A 0x45270 diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index bfe375466a0e..187ea3bdc663 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -682,12 +682,6 @@ void intel_ddi_mode_set(struct drm_encoder *encoder, DRM_DEBUG_KMS("WR PLL: %dKHz refresh rate with p=%d, n2=%d r2=%d\n", crtc->mode.clock, p, n2, r2); - /* Enable LCPLL if disabled */ - temp = I915_READ(LCPLL_CTL); - if (temp & LCPLL_PLL_DISABLE) - I915_WRITE(LCPLL_CTL, - temp & ~LCPLL_PLL_DISABLE); - /* Configure WR PLL 1, program the correct divider values for * the desired frequency and wait for warmup */ I915_WRITE(WRPLL_CTL1, @@ -817,3 +811,34 @@ void intel_disable_ddi(struct intel_encoder *encoder) I915_WRITE(DDI_BUF_CTL(port), temp); } + +static int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv) +{ + if (I915_READ(HSW_FUSE_STRAP) & HSW_CDCLK_LIMIT) + return 450; + else if ((I915_READ(LCPLL_CTL) & LCPLL_CLK_FREQ_MASK) == + LCPLL_CLK_FREQ_450) + return 450; + else + return 540; +} + +void intel_ddi_pll_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t val = I915_READ(LCPLL_CTL); + + /* The LCPLL register should be turned on by the BIOS. For now let's + * just check its state and print errors in case something is wrong. + * Don't even try to turn it on. + */ + + DRM_DEBUG_KMS("CDCLK running at %dMHz\n", + intel_ddi_get_cdclk_freq(dev_priv)); + + if (val & LCPLL_CD_SOURCE_FCLK) + DRM_ERROR("CDCLK source is not LCPLL\n"); + + if (val & LCPLL_PLL_DISABLE) + DRM_ERROR("LCPLL is disabled\n"); +} diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6cf0d003d715..40f98d179d80 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7477,6 +7477,12 @@ static const struct drm_crtc_funcs intel_crtc_funcs = { .page_flip = intel_crtc_page_flip, }; +static void intel_cpu_pll_init(struct drm_device *dev) +{ + if (IS_HASWELL(dev)) + intel_ddi_pll_init(dev); +} + static void intel_pch_pll_init(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; @@ -8085,6 +8091,7 @@ void intel_modeset_init(struct drm_device *dev) DRM_DEBUG_KMS("plane %d init failed: %d\n", i, ret); } + intel_cpu_pll_init(dev); intel_pch_pll_init(dev); /* Just disable it once at startup */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 79f8ed66574e..57566b713a7a 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -580,5 +580,6 @@ extern bool intel_ddi_get_hw_state(struct intel_encoder *encoder, extern void intel_ddi_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); +extern void intel_ddi_pll_init(struct drm_device *dev); #endif /* __INTEL_DRV_H__ */ From 8d9ddbcbd0e3fc9368a5972346dc726493d31c9c Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 5 Oct 2012 12:05:53 -0300 Subject: [PATCH 0268/5831] drm/i915: enable and disable DDI_FUNC_CTL at the right time And the right time is exactly after/before changing PIPE_CONF. See the documentation about the mode set sequence. This code is not inside any encoder-specific callback because DDI_FUNC_CTL is part of the pipe, so it is used by all encoders. Signed-off-by: Paulo Zanoni Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ddi.c | 85 +++++++++++++++++++++------- drivers/gpu/drm/i915/intel_display.c | 6 ++ drivers/gpu/drm/i915/intel_drv.h | 3 + 4 files changed, 75 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5107ceece3b0..d1b58d047e76 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4394,6 +4394,7 @@ /* Those bits are ignored by pipe EDP since it can only connect to DDI A */ #define PIPE_DDI_PORT_MASK (7<<28) #define PIPE_DDI_SELECT_PORT(x) ((x)<<28) +#define PIPE_DDI_PORT_NONE (0<<28) #define PIPE_DDI_MODE_SELECT_MASK (7<<24) #define PIPE_DDI_MODE_SELECT_HDMI (0<<24) #define PIPE_DDI_MODE_SELECT_DVI (1<<24) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 187ea3bdc663..fafa79dd5e5b 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -203,15 +203,6 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) DP_TP_CTL_ENHANCED_FRAME_ENABLE | DP_TP_CTL_ENABLE); - /* Enable PIPE_DDI_FUNC_CTL for the pipe to work in FDI mode */ - temp = I915_READ(DDI_FUNC_CTL(pipe)); - temp &= ~PIPE_DDI_PORT_MASK; - temp |= PIPE_DDI_SELECT_PORT(PORT_E) | - PIPE_DDI_MODE_SELECT_FDI | - PIPE_DDI_FUNC_ENABLE | - PIPE_DDI_PORT_WIDTH_X2; - I915_WRITE(DDI_FUNC_CTL(pipe), - temp); break; } else { DRM_ERROR("Error training BUF_CTL %d\n", i); @@ -657,7 +648,7 @@ void intel_ddi_mode_set(struct drm_encoder *encoder, int port = intel_hdmi->ddi_port; int pipe = intel_crtc->pipe; int p, n2, r2; - u32 temp, i; + u32 i; /* On Haswell, we need to enable the clocks and prepare DDI function to * work in HDMI mode for this pipe. @@ -715,8 +706,40 @@ void intel_ddi_mode_set(struct drm_encoder *encoder, intel_write_eld(encoder, adjusted_mode); } + intel_hdmi->set_infoframes(encoder, adjusted_mode); +} + +static struct intel_encoder * +intel_ddi_get_crtc_encoder(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_encoder *intel_encoder, *ret = NULL; + int num_encoders = 0; + + for_each_encoder_on_crtc(dev, crtc, intel_encoder) { + ret = intel_encoder; + num_encoders++; + } + + if (num_encoders != 1) + WARN(1, "%d encoders on crtc for pipe %d\n", num_encoders, + intel_crtc->pipe); + + BUG_ON(ret == NULL); + return ret; +} + +void intel_ddi_enable_pipe_func(struct drm_crtc *crtc) +{ + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); + struct drm_i915_private *dev_priv = crtc->dev->dev_private; + enum pipe pipe = intel_crtc->pipe; + uint32_t temp; + /* Enable PIPE_DDI_FUNC_CTL for the pipe to work in HDMI mode */ - temp = PIPE_DDI_FUNC_ENABLE | PIPE_DDI_SELECT_PORT(port); + temp = PIPE_DDI_FUNC_ENABLE; switch (intel_crtc->bpp) { case 18: @@ -736,19 +759,41 @@ void intel_ddi_mode_set(struct drm_encoder *encoder, intel_crtc->bpp); } - if (intel_hdmi->has_hdmi_sink) - temp |= PIPE_DDI_MODE_SELECT_HDMI; - else - temp |= PIPE_DDI_MODE_SELECT_DVI; - - if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC) + if (crtc->mode.flags & DRM_MODE_FLAG_PVSYNC) temp |= PIPE_DDI_PVSYNC; - if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC) + if (crtc->mode.flags & DRM_MODE_FLAG_PHSYNC) temp |= PIPE_DDI_PHSYNC; - I915_WRITE(DDI_FUNC_CTL(pipe), temp); + if (intel_encoder->type == INTEL_OUTPUT_HDMI) { + struct intel_hdmi *intel_hdmi = + enc_to_intel_hdmi(&intel_encoder->base); - intel_hdmi->set_infoframes(encoder, adjusted_mode); + if (intel_hdmi->has_hdmi_sink) + temp |= PIPE_DDI_MODE_SELECT_HDMI; + else + temp |= PIPE_DDI_MODE_SELECT_DVI; + + temp |= PIPE_DDI_SELECT_PORT(intel_hdmi->ddi_port); + } else if (intel_encoder->type == INTEL_OUTPUT_ANALOG) { + temp |= PIPE_DDI_MODE_SELECT_FDI; + temp |= PIPE_DDI_SELECT_PORT(PORT_E); + } else { + WARN(1, "Invalid encoder type %d for pipe %d\n", + intel_encoder->type, pipe); + } + + I915_WRITE(DDI_FUNC_CTL(pipe), temp); +} + +void intel_ddi_disable_pipe_func(struct drm_i915_private *dev_priv, + enum pipe pipe) +{ + uint32_t reg = DDI_FUNC_CTL(pipe); + uint32_t val = I915_READ(reg); + + val &= ~(PIPE_DDI_FUNC_ENABLE | PIPE_DDI_PORT_MASK); + val |= PIPE_DDI_PORT_NONE; + I915_WRITE(reg, val); } bool intel_ddi_get_hw_state(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 40f98d179d80..e4f07a275fe2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3215,6 +3215,9 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) */ intel_crtc_load_lut(crtc); + if (IS_HASWELL(dev)) + intel_ddi_enable_pipe_func(crtc); + intel_enable_pipe(dev_priv, pipe, is_pch_port); intel_enable_plane(dev_priv, plane, pipe); @@ -3262,6 +3265,9 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc) intel_disable_pipe(dev_priv, pipe); + if (IS_HASWELL(dev)) + intel_ddi_disable_pipe_func(dev_priv, pipe); + /* Disable PF */ I915_WRITE(PF_CTL(pipe), 0); I915_WRITE(PF_WIN_SZ(pipe), 0); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 57566b713a7a..0253bb4b70fd 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -581,5 +581,8 @@ extern void intel_ddi_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); extern void intel_ddi_pll_init(struct drm_device *dev); +extern void intel_ddi_enable_pipe_func(struct drm_crtc *crtc); +extern void intel_ddi_disable_pipe_func(struct drm_i915_private *dev_priv, + enum pipe pipe); #endif /* __INTEL_DRV_H__ */ From fc914639b12054c930fe75a2ff4221311699e9c2 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 5 Oct 2012 12:05:54 -0300 Subject: [PATCH 0269/5831] drm/i915: enable and disable PIPE_CLK_SEL at the right time Previously we were enabling it at mode_set but never disabling. Let's follow the mode set sequence. Signed-off-by: Paulo Zanoni Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 37 +++++++++++++++++++++++++--- drivers/gpu/drm/i915/intel_display.c | 6 +++++ drivers/gpu/drm/i915/intel_drv.h | 2 ++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index fafa79dd5e5b..8740a5cf1359 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -58,6 +58,22 @@ static const u32 hsw_ddi_translations_fdi[] = { 0x00FFFFFF, 0x00040006 /* HDMI parameters */ }; +static enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder) +{ + int type = intel_encoder->type; + + if (type == INTEL_OUTPUT_HDMI) { + struct intel_hdmi *intel_hdmi = + enc_to_intel_hdmi(&intel_encoder->base); + return intel_hdmi->ddi_port; + } else if (type == INTEL_OUTPUT_ANALOG) { + return PORT_E; + } else { + DRM_ERROR("Invalid DDI encoder type %d\n", type); + BUG(); + } +} + /* On Haswell, DDI port buffers must be programmed with correct values * in advance. The buffer values are different for FDI and DP modes, * but the HDMI/DVI fields are shared among those. So we program the DDI @@ -145,8 +161,6 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) /* Use SPLL to drive the output when in FDI mode */ I915_WRITE(PORT_CLK_SEL(PORT_E), PORT_CLK_SEL_SPLL); - I915_WRITE(PIPE_CLK_SEL(pipe), - PIPE_CLK_SEL_PORT(PORT_E)); udelay(20); @@ -689,8 +703,6 @@ void intel_ddi_mode_set(struct drm_encoder *encoder, */ I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_WRPLL1); - I915_WRITE(PIPE_CLK_SEL(pipe), - PIPE_CLK_SEL_PORT(port)); udelay(20); @@ -825,6 +837,23 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, return true; } +void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc) +{ + struct drm_crtc *crtc = &intel_crtc->base; + struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); + enum port port = intel_ddi_get_encoder_port(intel_encoder); + + I915_WRITE(PIPE_CLK_SEL(intel_crtc->pipe), PIPE_CLK_SEL_PORT(port)); +} + +void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc) +{ + struct drm_i915_private *dev_priv = intel_crtc->base.dev->dev_private; + + I915_WRITE(PIPE_CLK_SEL(intel_crtc->pipe), PIPE_CLK_SEL_DISABLED); +} + void intel_enable_ddi(struct intel_encoder *encoder) { struct drm_device *dev = encoder->base.dev; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e4f07a275fe2..faa20131c65b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3197,6 +3197,9 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) if (encoder->pre_enable) encoder->pre_enable(encoder); + if (IS_HASWELL(dev)) + intel_ddi_enable_pipe_clock(intel_crtc); + /* Enable panel fitting for LVDS */ if (dev_priv->pch_pf_size && (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) || HAS_eDP)) { @@ -3272,6 +3275,9 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc) I915_WRITE(PF_CTL(pipe), 0); I915_WRITE(PF_WIN_SZ(pipe), 0); + if (IS_HASWELL(dev)) + intel_ddi_disable_pipe_clock(intel_crtc); + for_each_encoder_on_crtc(dev, crtc, encoder) if (encoder->post_disable) encoder->post_disable(encoder); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 0253bb4b70fd..5de365d70de9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -584,5 +584,7 @@ extern void intel_ddi_pll_init(struct drm_device *dev); extern void intel_ddi_enable_pipe_func(struct drm_crtc *crtc); extern void intel_ddi_disable_pipe_func(struct drm_i915_private *dev_priv, enum pipe pipe); +extern void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc); +extern void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc); #endif /* __INTEL_DRV_H__ */ From 09b4ddf95d54bd622a1f1ddc14412a5b3001e023 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 5 Oct 2012 12:05:55 -0300 Subject: [PATCH 0270/5831] drm/i915: add haswell_crtc_mode_set It's just a copy of ironlake_crtc_mode_set. Signed-off-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 187 ++++++++++++++++++++++++++- 1 file changed, 186 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index faa20131c65b..ef8913246477 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5142,6 +5142,185 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, return ret; } +static int haswell_crtc_mode_set(struct drm_crtc *crtc, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode, + int x, int y, + struct drm_framebuffer *fb) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; + int plane = intel_crtc->plane; + int num_connectors = 0; + intel_clock_t clock, reduced_clock; + u32 dpll, fp = 0, fp2 = 0; + bool ok, has_reduced_clock = false; + bool is_lvds = false, is_dp = false, is_cpu_edp = false; + struct intel_encoder *encoder; + u32 temp; + int ret; + bool dither; + + for_each_encoder_on_crtc(dev, crtc, encoder) { + switch (encoder->type) { + case INTEL_OUTPUT_LVDS: + is_lvds = true; + break; + case INTEL_OUTPUT_DISPLAYPORT: + is_dp = true; + break; + case INTEL_OUTPUT_EDP: + is_dp = true; + if (!intel_encoder_is_pch_edp(&encoder->base)) + is_cpu_edp = true; + break; + } + + num_connectors++; + } + + ok = ironlake_compute_clocks(crtc, adjusted_mode, &clock, + &has_reduced_clock, &reduced_clock); + if (!ok) { + DRM_ERROR("Couldn't find PLL settings for mode!\n"); + return -EINVAL; + } + + /* Ensure that the cursor is valid for the new mode before changing... */ + intel_crtc_update_cursor(crtc, true); + + /* determine panel color depth */ + dither = intel_choose_pipe_bpp_dither(crtc, fb, &intel_crtc->bpp, mode); + if (is_lvds && dev_priv->lvds_dither) + dither = true; + + fp = clock.n << 16 | clock.m1 << 8 | clock.m2; + if (has_reduced_clock) + fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 | + reduced_clock.m2; + + dpll = ironlake_compute_dpll(intel_crtc, adjusted_mode, &clock, fp); + + DRM_DEBUG_KMS("Mode for pipe %d:\n", pipe); + drm_mode_debug_printmodeline(mode); + + /* CPU eDP is the only output that doesn't need a PCH PLL of its own on + * pre-Haswell/LPT generation */ + if (HAS_PCH_LPT(dev)) { + DRM_DEBUG_KMS("LPT detected: no PLL for pipe %d necessary\n", + pipe); + } else if (!is_cpu_edp) { + struct intel_pch_pll *pll; + + pll = intel_get_pch_pll(intel_crtc, dpll, fp); + if (pll == NULL) { + DRM_DEBUG_DRIVER("failed to find PLL for pipe %d\n", + pipe); + return -EINVAL; + } + } else + intel_put_pch_pll(intel_crtc); + + /* The LVDS pin pair needs to be on before the DPLLs are enabled. + * This is an exception to the general rule that mode_set doesn't turn + * things on. + */ + if (is_lvds) { + temp = I915_READ(PCH_LVDS); + temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP; + if (HAS_PCH_CPT(dev)) { + temp &= ~PORT_TRANS_SEL_MASK; + temp |= PORT_TRANS_SEL_CPT(pipe); + } else { + if (pipe == 1) + temp |= LVDS_PIPEB_SELECT; + else + temp &= ~LVDS_PIPEB_SELECT; + } + + /* set the corresponsding LVDS_BORDER bit */ + temp |= dev_priv->lvds_border_bits; + /* Set the B0-B3 data pairs corresponding to whether we're going to + * set the DPLLs for dual-channel mode or not. + */ + if (clock.p2 == 7) + temp |= LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP; + else + temp &= ~(LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP); + + /* It would be nice to set 24 vs 18-bit mode (LVDS_A3_POWER_UP) + * appropriately here, but we need to look more thoroughly into how + * panels behave in the two modes. + */ + temp &= ~(LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY); + if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC) + temp |= LVDS_HSYNC_POLARITY; + if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC) + temp |= LVDS_VSYNC_POLARITY; + I915_WRITE(PCH_LVDS, temp); + } + + if (is_dp && !is_cpu_edp) { + intel_dp_set_m_n(crtc, mode, adjusted_mode); + } else { + /* For non-DP output, clear any trans DP clock recovery setting.*/ + I915_WRITE(TRANSDATA_M1(pipe), 0); + I915_WRITE(TRANSDATA_N1(pipe), 0); + I915_WRITE(TRANSDPLINK_M1(pipe), 0); + I915_WRITE(TRANSDPLINK_N1(pipe), 0); + } + + if (intel_crtc->pch_pll) { + I915_WRITE(intel_crtc->pch_pll->pll_reg, dpll); + + /* Wait for the clocks to stabilize. */ + POSTING_READ(intel_crtc->pch_pll->pll_reg); + udelay(150); + + /* The pixel multiplier can only be updated once the + * DPLL is enabled and the clocks are stable. + * + * So write it again. + */ + I915_WRITE(intel_crtc->pch_pll->pll_reg, dpll); + } + + intel_crtc->lowfreq_avail = false; + if (intel_crtc->pch_pll) { + if (is_lvds && has_reduced_clock && i915_powersave) { + I915_WRITE(intel_crtc->pch_pll->fp1_reg, fp2); + intel_crtc->lowfreq_avail = true; + } else { + I915_WRITE(intel_crtc->pch_pll->fp1_reg, fp); + } + } + + intel_set_pipe_timings(intel_crtc, mode, adjusted_mode); + + ironlake_set_m_n(crtc, mode, adjusted_mode); + + if (is_cpu_edp) + ironlake_set_pll_edp(crtc, adjusted_mode->clock); + + ironlake_set_pipeconf(crtc, adjusted_mode, dither); + + intel_wait_for_vblank(dev, pipe); + + /* Set up the display plane register */ + I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE); + POSTING_READ(DSPCNTR(plane)); + + ret = intel_pipe_set_base(crtc, x, y, fb); + + intel_update_watermarks(dev); + + intel_update_linetime_watermarks(dev, pipe, adjusted_mode); + + return ret; +} + static int intel_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, @@ -7852,7 +8031,13 @@ static void intel_init_display(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; /* We always want a DPMS function */ - if (HAS_PCH_SPLIT(dev)) { + if (IS_HASWELL(dev)) { + dev_priv->display.crtc_mode_set = haswell_crtc_mode_set; + dev_priv->display.crtc_enable = ironlake_crtc_enable; + dev_priv->display.crtc_disable = ironlake_crtc_disable; + dev_priv->display.off = ironlake_crtc_off; + dev_priv->display.update_plane = ironlake_update_plane; + } else if (HAS_PCH_SPLIT(dev)) { dev_priv->display.crtc_mode_set = ironlake_crtc_mode_set; dev_priv->display.crtc_enable = ironlake_crtc_enable; dev_priv->display.crtc_disable = ironlake_crtc_disable; From 5dc5298bb3e5d72af6bab5c1d43dad9a07052982 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 5 Oct 2012 12:05:56 -0300 Subject: [PATCH 0271/5831] drm/i915: add proper CPU/PCH checks to crtc_mode_set functions On ironlake_crtc_mode_set, WARN if not using IBX or CPT. On haswell_crtc_mode_set, only run IBX/CPT code on IBX/CPT. I am still not sure whether IBX/CPT will be possible with a Haswell CPU, so leave the code there for now and put a WARN in case we spot it. Signed-off-by: Paulo Zanoni Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 212 +++++++++++++++------------ 1 file changed, 115 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ef8913246477..213831f57ef1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5002,6 +5002,9 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, num_connectors++; } + WARN(!(HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)), + "Unexpected PCH type %d\n", INTEL_PCH_TYPE(dev)); + ok = ironlake_compute_clocks(crtc, adjusted_mode, &clock, &has_reduced_clock, &reduced_clock); if (!ok) { @@ -5027,12 +5030,8 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, DRM_DEBUG_KMS("Mode for pipe %d:\n", pipe); drm_mode_debug_printmodeline(mode); - /* CPU eDP is the only output that doesn't need a PCH PLL of its own on - * pre-Haswell/LPT generation */ - if (HAS_PCH_LPT(dev)) { - DRM_DEBUG_KMS("LPT detected: no PLL for pipe %d necessary\n", - pipe); - } else if (!is_cpu_edp) { + /* CPU eDP is the only output that doesn't need a PCH PLL of its own. */ + if (!is_cpu_edp) { struct intel_pch_pll *pll; pll = intel_get_pch_pll(intel_crtc, dpll, fp); @@ -5155,7 +5154,7 @@ static int haswell_crtc_mode_set(struct drm_crtc *crtc, int plane = intel_crtc->plane; int num_connectors = 0; intel_clock_t clock, reduced_clock; - u32 dpll, fp = 0, fp2 = 0; + u32 dpll = 0, fp = 0, fp2 = 0; bool ok, has_reduced_clock = false; bool is_lvds = false, is_dp = false, is_cpu_edp = false; struct intel_encoder *encoder; @@ -5181,11 +5180,21 @@ static int haswell_crtc_mode_set(struct drm_crtc *crtc, num_connectors++; } - ok = ironlake_compute_clocks(crtc, adjusted_mode, &clock, - &has_reduced_clock, &reduced_clock); - if (!ok) { - DRM_ERROR("Couldn't find PLL settings for mode!\n"); - return -EINVAL; + /* We are not sure yet this won't happen. */ + WARN(!HAS_PCH_LPT(dev), "Unexpected PCH type %d\n", + INTEL_PCH_TYPE(dev)); + + WARN(num_connectors != 1, "%d connectors attached to pipe %c\n", + num_connectors, pipe_name(pipe)); + + if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) { + ok = ironlake_compute_clocks(crtc, adjusted_mode, &clock, + &has_reduced_clock, + &reduced_clock); + if (!ok) { + DRM_ERROR("Couldn't find PLL settings for mode!\n"); + return -EINVAL; + } } /* Ensure that the cursor is valid for the new mode before changing... */ @@ -5196,104 +5205,112 @@ static int haswell_crtc_mode_set(struct drm_crtc *crtc, if (is_lvds && dev_priv->lvds_dither) dither = true; - fp = clock.n << 16 | clock.m1 << 8 | clock.m2; - if (has_reduced_clock) - fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 | - reduced_clock.m2; - - dpll = ironlake_compute_dpll(intel_crtc, adjusted_mode, &clock, fp); - DRM_DEBUG_KMS("Mode for pipe %d:\n", pipe); drm_mode_debug_printmodeline(mode); - /* CPU eDP is the only output that doesn't need a PCH PLL of its own on - * pre-Haswell/LPT generation */ - if (HAS_PCH_LPT(dev)) { - DRM_DEBUG_KMS("LPT detected: no PLL for pipe %d necessary\n", - pipe); - } else if (!is_cpu_edp) { - struct intel_pch_pll *pll; + if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) { + fp = clock.n << 16 | clock.m1 << 8 | clock.m2; + if (has_reduced_clock) + fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 | + reduced_clock.m2; - pll = intel_get_pch_pll(intel_crtc, dpll, fp); - if (pll == NULL) { - DRM_DEBUG_DRIVER("failed to find PLL for pipe %d\n", - pipe); - return -EINVAL; - } - } else - intel_put_pch_pll(intel_crtc); + dpll = ironlake_compute_dpll(intel_crtc, adjusted_mode, &clock, + fp); - /* The LVDS pin pair needs to be on before the DPLLs are enabled. - * This is an exception to the general rule that mode_set doesn't turn - * things on. - */ - if (is_lvds) { - temp = I915_READ(PCH_LVDS); - temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP; - if (HAS_PCH_CPT(dev)) { - temp &= ~PORT_TRANS_SEL_MASK; - temp |= PORT_TRANS_SEL_CPT(pipe); - } else { - if (pipe == 1) - temp |= LVDS_PIPEB_SELECT; + /* CPU eDP is the only output that doesn't need a PCH PLL of its + * own on pre-Haswell/LPT generation */ + if (!is_cpu_edp) { + struct intel_pch_pll *pll; + + pll = intel_get_pch_pll(intel_crtc, dpll, fp); + if (pll == NULL) { + DRM_DEBUG_DRIVER("failed to find PLL for pipe %d\n", + pipe); + return -EINVAL; + } + } else + intel_put_pch_pll(intel_crtc); + + /* The LVDS pin pair needs to be on before the DPLLs are + * enabled. This is an exception to the general rule that + * mode_set doesn't turn things on. + */ + if (is_lvds) { + temp = I915_READ(PCH_LVDS); + temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP; + if (HAS_PCH_CPT(dev)) { + temp &= ~PORT_TRANS_SEL_MASK; + temp |= PORT_TRANS_SEL_CPT(pipe); + } else { + if (pipe == 1) + temp |= LVDS_PIPEB_SELECT; + else + temp &= ~LVDS_PIPEB_SELECT; + } + + /* set the corresponsding LVDS_BORDER bit */ + temp |= dev_priv->lvds_border_bits; + /* Set the B0-B3 data pairs corresponding to whether + * we're going to set the DPLLs for dual-channel mode or + * not. + */ + if (clock.p2 == 7) + temp |= LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP; else - temp &= ~LVDS_PIPEB_SELECT; + temp &= ~(LVDS_B0B3_POWER_UP | + LVDS_CLKB_POWER_UP); + + /* It would be nice to set 24 vs 18-bit mode + * (LVDS_A3_POWER_UP) appropriately here, but we need to + * look more thoroughly into how panels behave in the + * two modes. + */ + temp &= ~(LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY); + if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC) + temp |= LVDS_HSYNC_POLARITY; + if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC) + temp |= LVDS_VSYNC_POLARITY; + I915_WRITE(PCH_LVDS, temp); } - - /* set the corresponsding LVDS_BORDER bit */ - temp |= dev_priv->lvds_border_bits; - /* Set the B0-B3 data pairs corresponding to whether we're going to - * set the DPLLs for dual-channel mode or not. - */ - if (clock.p2 == 7) - temp |= LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP; - else - temp &= ~(LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP); - - /* It would be nice to set 24 vs 18-bit mode (LVDS_A3_POWER_UP) - * appropriately here, but we need to look more thoroughly into how - * panels behave in the two modes. - */ - temp &= ~(LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY); - if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC) - temp |= LVDS_HSYNC_POLARITY; - if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC) - temp |= LVDS_VSYNC_POLARITY; - I915_WRITE(PCH_LVDS, temp); } if (is_dp && !is_cpu_edp) { intel_dp_set_m_n(crtc, mode, adjusted_mode); } else { - /* For non-DP output, clear any trans DP clock recovery setting.*/ - I915_WRITE(TRANSDATA_M1(pipe), 0); - I915_WRITE(TRANSDATA_N1(pipe), 0); - I915_WRITE(TRANSDPLINK_M1(pipe), 0); - I915_WRITE(TRANSDPLINK_N1(pipe), 0); - } - - if (intel_crtc->pch_pll) { - I915_WRITE(intel_crtc->pch_pll->pll_reg, dpll); - - /* Wait for the clocks to stabilize. */ - POSTING_READ(intel_crtc->pch_pll->pll_reg); - udelay(150); - - /* The pixel multiplier can only be updated once the - * DPLL is enabled and the clocks are stable. - * - * So write it again. - */ - I915_WRITE(intel_crtc->pch_pll->pll_reg, dpll); + if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) { + /* For non-DP output, clear any trans DP clock recovery + * setting.*/ + I915_WRITE(TRANSDATA_M1(pipe), 0); + I915_WRITE(TRANSDATA_N1(pipe), 0); + I915_WRITE(TRANSDPLINK_M1(pipe), 0); + I915_WRITE(TRANSDPLINK_N1(pipe), 0); + } } intel_crtc->lowfreq_avail = false; - if (intel_crtc->pch_pll) { - if (is_lvds && has_reduced_clock && i915_powersave) { - I915_WRITE(intel_crtc->pch_pll->fp1_reg, fp2); - intel_crtc->lowfreq_avail = true; - } else { - I915_WRITE(intel_crtc->pch_pll->fp1_reg, fp); + if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) { + if (intel_crtc->pch_pll) { + I915_WRITE(intel_crtc->pch_pll->pll_reg, dpll); + + /* Wait for the clocks to stabilize. */ + POSTING_READ(intel_crtc->pch_pll->pll_reg); + udelay(150); + + /* The pixel multiplier can only be updated once the + * DPLL is enabled and the clocks are stable. + * + * So write it again. + */ + I915_WRITE(intel_crtc->pch_pll->pll_reg, dpll); + } + + if (intel_crtc->pch_pll) { + if (is_lvds && has_reduced_clock && i915_powersave) { + I915_WRITE(intel_crtc->pch_pll->fp1_reg, fp2); + intel_crtc->lowfreq_avail = true; + } else { + I915_WRITE(intel_crtc->pch_pll->fp1_reg, fp); + } } } @@ -5301,8 +5318,9 @@ static int haswell_crtc_mode_set(struct drm_crtc *crtc, ironlake_set_m_n(crtc, mode, adjusted_mode); - if (is_cpu_edp) - ironlake_set_pll_edp(crtc, adjusted_mode->clock); + if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) + if (is_cpu_edp) + ironlake_set_pll_edp(crtc, adjusted_mode->clock); ironlake_set_pipeconf(crtc, adjusted_mode, dither); From ee2b0b382a7e6cbf3549559ec7dc86c63f5aa3d1 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 5 Oct 2012 12:05:57 -0300 Subject: [PATCH 0272/5831] drm/i915: add haswell_set_pipeconf It's a copy of ironlake_set_pipeconf with 2 differences: - There is no BPC field to set. - The interlaced mask is now 2 bits instead of 3. Signed-off-by: Paulo Zanoni Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_display.c | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d1b58d047e76..fd9a319b86a7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2639,6 +2639,7 @@ #define PIPECONF_GAMMA (1<<24) #define PIPECONF_FORCE_BORDER (1<<25) #define PIPECONF_INTERLACE_MASK (7 << 21) +#define PIPECONF_INTERLACE_MASK_HSW (3 << 21) /* Note that pre-gen3 does not support interlaced display directly. Panel * fitting must be disabled on pre-ilk for interlaced. */ #define PIPECONF_PROGRESSIVE (0 << 21) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 213831f57ef1..39d0753d6a37 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4721,6 +4721,31 @@ static void ironlake_set_pipeconf(struct drm_crtc *crtc, POSTING_READ(PIPECONF(pipe)); } +static void haswell_set_pipeconf(struct drm_crtc *crtc, + struct drm_display_mode *adjusted_mode, + bool dither) +{ + struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; + uint32_t val; + + val = I915_READ(PIPECONF(pipe)); + + val &= ~(PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_MASK); + if (dither) + val |= (PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_SP); + + val &= ~PIPECONF_INTERLACE_MASK_HSW; + if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) + val |= PIPECONF_INTERLACED_ILK; + else + val |= PIPECONF_PROGRESSIVE; + + I915_WRITE(PIPECONF(pipe), val); + POSTING_READ(PIPECONF(pipe)); +} + static bool ironlake_compute_clocks(struct drm_crtc *crtc, struct drm_display_mode *adjusted_mode, intel_clock_t *clock, @@ -5322,7 +5347,7 @@ static int haswell_crtc_mode_set(struct drm_crtc *crtc, if (is_cpu_edp) ironlake_set_pll_edp(crtc, adjusted_mode->clock); - ironlake_set_pipeconf(crtc, adjusted_mode, dither); + haswell_set_pipeconf(crtc, adjusted_mode, dither); intel_wait_for_vblank(dev, pipe); From 9478554ae5d21d65e948a3eff4ee2a8ad30d70e9 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 9 Oct 2012 13:50:17 -0700 Subject: [PATCH 0273/5831] rbd: define rbd_update_mapping_size() Encapsulate the code that handles updating the size of a mapping after an rbd image has been refreshed. This is done in anticipation of the next patch, which will make this common code for format 1 and 2 images. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index bb3d9be3b1b4..b64125d1d7bd 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1716,6 +1716,19 @@ static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) __rbd_remove_snap_dev(snap); } +static void rbd_update_mapping_size(struct rbd_device *rbd_dev) +{ + sector_t size; + + if (rbd_dev->mapping.snap_id != CEPH_NOSNAP) + return; + + size = (sector_t) rbd_dev->header.image_size / SECTOR_SIZE; + dout("setting size to %llu sectors", (unsigned long long) size); + rbd_dev->mapping.size = (u64) size; + set_capacity(rbd_dev->disk, size); +} + /* * only read the first part of the ondisk header, without the snaps info */ @@ -1730,17 +1743,9 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) down_write(&rbd_dev->header_rwsem); - /* resized? */ - if (rbd_dev->mapping.snap_id == CEPH_NOSNAP) { - sector_t size = (sector_t) h.image_size / SECTOR_SIZE; - - if (size != (sector_t) rbd_dev->mapping.size) { - dout("setting size to %llu sectors", - (unsigned long long) size); - rbd_dev->mapping.size = (u64) size; - set_capacity(rbd_dev->disk, size); - } - } + /* Update image size, and check for resize of mapped image */ + rbd_dev->header.image_size = h.image_size; + rbd_update_mapping_size(rbd_dev); /* rbd_dev->header.object_prefix shouldn't change */ kfree(rbd_dev->header.snap_sizes); From 117973fb4c91f3fd913127577e9f71b3aa6cb556 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 31 Aug 2012 17:29:55 -0500 Subject: [PATCH 0274/5831] rbd: define rbd_dev_v2_refresh() Define a new function rbd_dev_v2_refresh() to update/refresh the snapshot context for a format version 2 rbd image. This function will update anything that is not fixed for the life of an rbd image--at the moment this is mainly the snapshot context and (for a base mapping) the size. Update rbd_refresh_header() so it selects which function to use based on the image format. Rename __rbd_refresh_header() to be rbd_dev_v1_refresh() to be consistent with the naming of its version 2 counterpart. Similarly rename rbd_refresh_header() to be rbd_dev_refresh(). Unrelated--we use rbd_image_format_valid() here. Delete the other use of it, which was primarily put in place to ensure that function was referenced at the time it was defined. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 55 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b64125d1d7bd..f11b839166ef 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -268,7 +268,8 @@ static void rbd_put_dev(struct rbd_device *rbd_dev) put_device(&rbd_dev->dev); } -static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver); +static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver); +static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); static int rbd_open(struct block_device *bdev, fmode_t mode) { @@ -1304,7 +1305,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", rbd_dev->header_name, (unsigned long long) notify_id, (unsigned int) opcode); - rc = rbd_refresh_header(rbd_dev, &hver); + rc = rbd_dev_refresh(rbd_dev, &hver); if (rc) pr_warning(RBD_DRV_NAME "%d got notification but failed to " " update snaps: %d\n", rbd_dev->major, rc); @@ -1732,7 +1733,7 @@ static void rbd_update_mapping_size(struct rbd_device *rbd_dev) /* * only read the first part of the ondisk header, without the snaps info */ -static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) +static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev, u64 *hver) { int ret; struct rbd_image_header h; @@ -1773,12 +1774,16 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) return ret; } -static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) +static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver) { int ret; + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - ret = __rbd_refresh_header(rbd_dev, hver); + if (rbd_dev->image_format == 1) + ret = rbd_dev_v1_refresh(rbd_dev, hver); + else + ret = rbd_dev_v2_refresh(rbd_dev, hver); mutex_unlock(&ctl_mutex); return ret; @@ -1938,7 +1943,7 @@ static ssize_t rbd_image_refresh(struct device *dev, struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); int ret; - ret = rbd_refresh_header(rbd_dev, NULL); + ret = rbd_dev_refresh(rbd_dev, NULL); return ret < 0 ? ret : size; } @@ -2402,6 +2407,41 @@ static char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which, return ERR_PTR(-EINVAL); } +static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver) +{ + int ret; + __u8 obj_order; + + down_write(&rbd_dev->header_rwsem); + + /* Grab old order first, to see if it changes */ + + obj_order = rbd_dev->header.obj_order, + ret = rbd_dev_v2_image_size(rbd_dev); + if (ret) + goto out; + if (rbd_dev->header.obj_order != obj_order) { + ret = -EIO; + goto out; + } + rbd_update_mapping_size(rbd_dev); + + ret = rbd_dev_v2_snap_context(rbd_dev, hver); + dout("rbd_dev_v2_snap_context returned %d\n", ret); + if (ret) + goto out; + ret = rbd_dev_snaps_update(rbd_dev); + dout("rbd_dev_snaps_update returned %d\n", ret); + if (ret) + goto out; + ret = rbd_dev_snaps_register(rbd_dev); + dout("rbd_dev_snaps_register returned %d\n", ret); +out: + up_write(&rbd_dev->header_rwsem); + + return ret; +} + /* * Scan the rbd device's current snapshot list and compare it to the * newly-received snapshot context. Remove any existing snapshots @@ -2564,7 +2604,7 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev) do { ret = rbd_req_sync_watch(rbd_dev); if (ret == -ERANGE) { - rc = rbd_refresh_header(rbd_dev, NULL); + rc = rbd_dev_refresh(rbd_dev, NULL); if (rc < 0) return rc; } @@ -3045,7 +3085,6 @@ static ssize_t rbd_add(struct bus_type *bus, rc = rbd_dev_probe(rbd_dev); if (rc < 0) goto err_out_client; - rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); /* no need to lock here, as rbd_dev is not registered yet */ rc = rbd_dev_snaps_update(rbd_dev); From d889140c4a1c5edb6a7bd90392b9d878bfaccfb6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 9 Oct 2012 13:50:17 -0700 Subject: [PATCH 0275/5831] rbd: implement feature checks Version 2 images have two sets of feature bit fields. The first indicates features possibly used by the image. The second indicates features that the client *must* support in order to use the image. When an image (or snapshot) is first examined, we need to make sure that the local implementation supports the image's required features. If not, fail the probe for the image. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index f11b839166ef..0f260a6e97c4 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -70,6 +70,14 @@ #define RBD_IMAGE_ID_LEN_MAX 64 #define RBD_OBJ_PREFIX_LEN_MAX 64 +/* Feature bits */ + +#define RBD_FEATURE_LAYERING 1 + +/* Features supported by this (client software) implementation. */ + +#define RBD_FEATURES_ALL (0) + /* * An RBD device name will be "rbd#", where the "rbd" comes from * RBD_DRV_NAME above, and # is a unique integer identifier. @@ -2226,6 +2234,7 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, __le64 features; __le64 incompat; } features_buf = { 0 }; + u64 incompat; int ret; ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, @@ -2236,6 +2245,11 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); if (ret < 0) return ret; + + incompat = le64_to_cpu(features_buf.incompat); + if (incompat & ~RBD_FEATURES_ALL) + return -ENOTSUPP; + *snap_features = le64_to_cpu(features_buf.features); dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n", @@ -2977,7 +2991,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) if (ret < 0) goto out_err; - /* Get the features for the image */ + /* Get the and check features for the image */ ret = rbd_dev_v2_features(rbd_dev); if (ret < 0) From 35152979e6181b1fbb4b61c3ff641c14df53ad66 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 31 Aug 2012 17:29:55 -0500 Subject: [PATCH 0276/5831] rbd: activate v2 image support Now that v2 images support is fully implemented, have rbd_dev_v2_probe() return 0 to indicate a successful probe. (Note that an image that implements layering will fail the probe early because of the feature chekc.) Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 0f260a6e97c4..8f56d37637a7 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3014,7 +3014,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) dout("discovered version 2 image, header name is %s\n", rbd_dev->header_name); - return -ENOTSUPP; + return 0; out_err: kfree(rbd_dev->header_name); rbd_dev->header_name = NULL; From 6441ab5f8ffdf7e99eefe0fb747858e0c12b567e Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 5 Oct 2012 12:05:58 -0300 Subject: [PATCH 0277/5831] drm/i915: completely rewrite the Haswell PLL handling code Problems with the previous code: - HDMI just uses WRPLL1 for everything, so dual head cases might not work sometimes. - At encoder->mode_set we just write the PLL register without doing any kind of check (e.g., check if the PLL is already being used). - There is no way to fail and return error codes at encoder->mode_set. - We write to PORT_CLK_SEL at mode_set and we never disable it. - Machines hang due to wrong clock enable/disable sequence. So here we rewrite the code, making it a little more like the pre-Haswell PLL mode set code: - Check PLL availability at ironlake_crtc_mode_set. - Try to use both WRPLLs. - Check if PLLs are used before actually trying to use them, and properly fail with error messages. - Enable/disable PORT_CLK_SEL at the right place. - Add some WARNs to check for bugs. The next improvement will be to try to reuse PLLs if the timings match, but this is content for another patch and it's already documented with a TODO comment. Signed-off-by: Paulo Zanoni Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 7 + drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ddi.c | 263 +++++++++++++++++++++------ drivers/gpu/drm/i915/intel_display.c | 13 +- drivers/gpu/drm/i915/intel_drv.h | 6 + drivers/gpu/drm/i915/intel_hdmi.c | 2 + 6 files changed, 238 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7f05736c6d8a..9e446b621642 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -93,6 +93,12 @@ struct intel_pch_pll { }; #define I915_NUM_PLLS 2 +struct intel_ddi_plls { + int spll_refcount; + int wrpll1_refcount; + int wrpll2_refcount; +}; + /* Interface history: * * 1.1: Original. @@ -812,6 +818,7 @@ typedef struct drm_i915_private { wait_queue_head_t pending_flip_queue; struct intel_pch_pll pch_plls[I915_NUM_PLLS]; + struct intel_ddi_plls ddi_plls; /* Reclocking support */ bool render_reclock_avail; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index fd9a319b86a7..c8c8dd0ff7b4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4523,6 +4523,7 @@ #define PORT_CLK_SEL_SPLL (3<<29) #define PORT_CLK_SEL_WRPLL1 (4<<29) #define PORT_CLK_SEL_WRPLL2 (5<<29) +#define PORT_CLK_SEL_NONE (7<<29) /* Pipe clock selection */ #define PIPE_CLK_SEL_A 0x46140 diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 8740a5cf1359..ab48083ffcec 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -152,18 +152,6 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) int pipe = intel_crtc->pipe; u32 reg, temp, i; - /* Configure CPU PLL, wait for warmup */ - I915_WRITE(SPLL_CTL, - SPLL_PLL_ENABLE | - SPLL_PLL_FREQ_1350MHz | - SPLL_PLL_SCC); - - /* Use SPLL to drive the output when in FDI mode */ - I915_WRITE(PORT_CLK_SEL(PORT_E), - PORT_CLK_SEL_SPLL); - - udelay(20); - /* Start the training iterating through available voltages and emphasis */ for (i=0; i < ARRAY_SIZE(hsw_ddi_buf_ctl_values); i++) { /* Configure DP_TP_CTL with auto-training */ @@ -654,58 +642,17 @@ void intel_ddi_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { - struct drm_device *dev = encoder->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_crtc *crtc = encoder->crtc; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); int port = intel_hdmi->ddi_port; int pipe = intel_crtc->pipe; - int p, n2, r2; - u32 i; /* On Haswell, we need to enable the clocks and prepare DDI function to * work in HDMI mode for this pipe. */ DRM_DEBUG_KMS("Preparing HDMI DDI mode for Haswell on port %c, pipe %c\n", port_name(port), pipe_name(pipe)); - for (i = 0; i < ARRAY_SIZE(wrpll_tmds_clock_table); i++) - if (crtc->mode.clock <= wrpll_tmds_clock_table[i].clock) - break; - - if (i == ARRAY_SIZE(wrpll_tmds_clock_table)) - i--; - - p = wrpll_tmds_clock_table[i].p; - n2 = wrpll_tmds_clock_table[i].n2; - r2 = wrpll_tmds_clock_table[i].r2; - - if (wrpll_tmds_clock_table[i].clock != crtc->mode.clock) - DRM_INFO("WR PLL: using settings for %dKHz on %dKHz mode\n", - wrpll_tmds_clock_table[i].clock, crtc->mode.clock); - - DRM_DEBUG_KMS("WR PLL: %dKHz refresh rate with p=%d, n2=%d r2=%d\n", - crtc->mode.clock, p, n2, r2); - - /* Configure WR PLL 1, program the correct divider values for - * the desired frequency and wait for warmup */ - I915_WRITE(WRPLL_CTL1, - WRPLL_PLL_ENABLE | - WRPLL_PLL_SELECT_LCPLL_2700 | - WRPLL_DIVIDER_REFERENCE(r2) | - WRPLL_DIVIDER_FEEDBACK(n2) | - WRPLL_DIVIDER_POST(p)); - - udelay(20); - - /* Use WRPLL1 clock to drive the output to the port, and tell the pipe to use - * this port for connection. - */ - I915_WRITE(PORT_CLK_SEL(port), - PORT_CLK_SEL_WRPLL1); - - udelay(20); - if (intel_hdmi->has_audio) { /* Proper support for digital audio needs a new logic and a new set * of registers, so we leave it for future patch bombing. @@ -742,6 +689,144 @@ intel_ddi_get_crtc_encoder(struct drm_crtc *crtc) return ret; } +void intel_ddi_put_crtc_pll(struct drm_crtc *crtc) +{ + struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct intel_ddi_plls *plls = &dev_priv->ddi_plls; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + uint32_t val; + + switch (intel_crtc->ddi_pll_sel) { + case PORT_CLK_SEL_SPLL: + plls->spll_refcount--; + if (plls->spll_refcount == 0) { + DRM_DEBUG_KMS("Disabling SPLL\n"); + val = I915_READ(SPLL_CTL); + WARN_ON(!(val & SPLL_PLL_ENABLE)); + I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE); + POSTING_READ(SPLL_CTL); + } + break; + case PORT_CLK_SEL_WRPLL1: + plls->wrpll1_refcount--; + if (plls->wrpll1_refcount == 0) { + DRM_DEBUG_KMS("Disabling WRPLL 1\n"); + val = I915_READ(WRPLL_CTL1); + WARN_ON(!(val & WRPLL_PLL_ENABLE)); + I915_WRITE(WRPLL_CTL1, val & ~WRPLL_PLL_ENABLE); + POSTING_READ(WRPLL_CTL1); + } + break; + case PORT_CLK_SEL_WRPLL2: + plls->wrpll2_refcount--; + if (plls->wrpll2_refcount == 0) { + DRM_DEBUG_KMS("Disabling WRPLL 2\n"); + val = I915_READ(WRPLL_CTL2); + WARN_ON(!(val & WRPLL_PLL_ENABLE)); + I915_WRITE(WRPLL_CTL2, val & ~WRPLL_PLL_ENABLE); + POSTING_READ(WRPLL_CTL2); + } + break; + } + + WARN(plls->spll_refcount < 0, "Invalid SPLL refcount\n"); + WARN(plls->wrpll1_refcount < 0, "Invalid WRPLL1 refcount\n"); + WARN(plls->wrpll2_refcount < 0, "Invalid WRPLL2 refcount\n"); + + intel_crtc->ddi_pll_sel = PORT_CLK_SEL_NONE; +} + +static void intel_ddi_calculate_wrpll(int clock, int *p, int *n2, int *r2) +{ + u32 i; + + for (i = 0; i < ARRAY_SIZE(wrpll_tmds_clock_table); i++) + if (clock <= wrpll_tmds_clock_table[i].clock) + break; + + if (i == ARRAY_SIZE(wrpll_tmds_clock_table)) + i--; + + *p = wrpll_tmds_clock_table[i].p; + *n2 = wrpll_tmds_clock_table[i].n2; + *r2 = wrpll_tmds_clock_table[i].r2; + + if (wrpll_tmds_clock_table[i].clock != clock) + DRM_INFO("WRPLL: using settings for %dKHz on %dKHz mode\n", + wrpll_tmds_clock_table[i].clock, clock); + + DRM_DEBUG_KMS("WRPLL: %dKHz refresh rate with p=%d, n2=%d r2=%d\n", + clock, *p, *n2, *r2); +} + +bool intel_ddi_pll_mode_set(struct drm_crtc *crtc, int clock) +{ + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); + struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct intel_ddi_plls *plls = &dev_priv->ddi_plls; + int type = intel_encoder->type; + enum pipe pipe = intel_crtc->pipe; + uint32_t reg, val; + + /* TODO: reuse PLLs when possible (compare values) */ + + intel_ddi_put_crtc_pll(crtc); + + if (type == INTEL_OUTPUT_HDMI) { + int p, n2, r2; + + if (plls->wrpll1_refcount == 0) { + DRM_DEBUG_KMS("Using WRPLL 1 on pipe %c\n", + pipe_name(pipe)); + plls->wrpll1_refcount++; + reg = WRPLL_CTL1; + intel_crtc->ddi_pll_sel = PORT_CLK_SEL_WRPLL1; + } else if (plls->wrpll2_refcount == 0) { + DRM_DEBUG_KMS("Using WRPLL 2 on pipe %c\n", + pipe_name(pipe)); + plls->wrpll2_refcount++; + reg = WRPLL_CTL2; + intel_crtc->ddi_pll_sel = PORT_CLK_SEL_WRPLL2; + } else { + DRM_ERROR("No WRPLLs available!\n"); + return false; + } + + WARN(I915_READ(reg) & WRPLL_PLL_ENABLE, + "WRPLL already enabled\n"); + + intel_ddi_calculate_wrpll(clock, &p, &n2, &r2); + + val = WRPLL_PLL_ENABLE | WRPLL_PLL_SELECT_LCPLL_2700 | + WRPLL_DIVIDER_REFERENCE(r2) | WRPLL_DIVIDER_FEEDBACK(n2) | + WRPLL_DIVIDER_POST(p); + + } else if (type == INTEL_OUTPUT_ANALOG) { + if (plls->spll_refcount == 0) { + DRM_DEBUG_KMS("Using SPLL on pipe %c\n", + pipe_name(pipe)); + plls->spll_refcount++; + reg = SPLL_CTL; + intel_crtc->ddi_pll_sel = PORT_CLK_SEL_SPLL; + } + + WARN(I915_READ(reg) & SPLL_PLL_ENABLE, + "SPLL already enabled\n"); + + val = SPLL_PLL_ENABLE | SPLL_PLL_FREQ_1350MHz | SPLL_PLL_SCC; + + } else { + WARN(1, "Invalid DDI encoder type %d\n", type); + return false; + } + + I915_WRITE(reg, val); + udelay(20); + + return true; +} + void intel_ddi_enable_pipe_func(struct drm_crtc *crtc) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); @@ -837,6 +922,57 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, return true; } +static uint32_t intel_ddi_get_crtc_pll(struct drm_i915_private *dev_priv, + enum pipe pipe) +{ + uint32_t temp, ret; + enum port port; + int i; + + temp = I915_READ(DDI_FUNC_CTL(pipe)); + temp &= PIPE_DDI_PORT_MASK; + for (i = PORT_A; i <= PORT_E; i++) + if (temp == PIPE_DDI_SELECT_PORT(i)) + port = i; + + ret = I915_READ(PORT_CLK_SEL(port)); + + DRM_DEBUG_KMS("Pipe %c connected to port %c using clock 0x%08x\n", + pipe_name(pipe), port_name(port), ret); + + return ret; +} + +void intel_ddi_setup_hw_pll_state(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + enum pipe pipe; + struct intel_crtc *intel_crtc; + + for_each_pipe(pipe) { + intel_crtc = + to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); + + if (!intel_crtc->active) + continue; + + intel_crtc->ddi_pll_sel = intel_ddi_get_crtc_pll(dev_priv, + pipe); + + switch (intel_crtc->ddi_pll_sel) { + case PORT_CLK_SEL_SPLL: + dev_priv->ddi_plls.spll_refcount++; + break; + case PORT_CLK_SEL_WRPLL1: + dev_priv->ddi_plls.wrpll1_refcount++; + break; + case PORT_CLK_SEL_WRPLL2: + dev_priv->ddi_plls.wrpll2_refcount++; + break; + } + } +} + void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc) { struct drm_crtc *crtc = &intel_crtc->base; @@ -854,6 +990,27 @@ void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc) I915_WRITE(PIPE_CLK_SEL(intel_crtc->pipe), PIPE_CLK_SEL_DISABLED); } +void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) +{ + struct drm_crtc *crtc = intel_encoder->base.crtc; + struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + enum port port = intel_ddi_get_encoder_port(intel_encoder); + + WARN_ON(intel_crtc->ddi_pll_sel == PORT_CLK_SEL_NONE); + + I915_WRITE(PORT_CLK_SEL(port), intel_crtc->ddi_pll_sel); +} + +void intel_ddi_post_disable(struct intel_encoder *intel_encoder) +{ + struct drm_encoder *encoder = &intel_encoder->base; + struct drm_i915_private *dev_priv = encoder->dev->dev_private; + enum port port = intel_ddi_get_encoder_port(intel_encoder); + + I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); +} + void intel_enable_ddi(struct intel_encoder *encoder) { struct drm_device *dev = encoder->base.dev; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 39d0753d6a37..709497dc807b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3332,6 +3332,11 @@ static void ironlake_crtc_off(struct drm_crtc *crtc) intel_put_pch_pll(intel_crtc); } +static void haswell_crtc_off(struct drm_crtc *crtc) +{ + intel_ddi_put_crtc_pll(crtc); +} + static void intel_crtc_dpms_overlay(struct intel_crtc *intel_crtc, bool enable) { if (!enable && intel_crtc->overlay) { @@ -5212,6 +5217,9 @@ static int haswell_crtc_mode_set(struct drm_crtc *crtc, WARN(num_connectors != 1, "%d connectors attached to pipe %c\n", num_connectors, pipe_name(pipe)); + if (!intel_ddi_pll_mode_set(crtc, adjusted_mode->clock)) + return -EINVAL; + if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) { ok = ironlake_compute_clocks(crtc, adjusted_mode, &clock, &has_reduced_clock, @@ -8078,7 +8086,7 @@ static void intel_init_display(struct drm_device *dev) dev_priv->display.crtc_mode_set = haswell_crtc_mode_set; dev_priv->display.crtc_enable = ironlake_crtc_enable; dev_priv->display.crtc_disable = ironlake_crtc_disable; - dev_priv->display.off = ironlake_crtc_off; + dev_priv->display.off = haswell_crtc_off; dev_priv->display.update_plane = ironlake_update_plane; } else if (HAS_PCH_SPLIT(dev)) { dev_priv->display.crtc_mode_set = ironlake_crtc_mode_set; @@ -8533,6 +8541,9 @@ void intel_modeset_setup_hw_state(struct drm_device *dev) crtc->active ? "enabled" : "disabled"); } + if (IS_HASWELL(dev)) + intel_ddi_setup_hw_pll_state(dev); + list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { pipe = 0; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 5de365d70de9..245319a45597 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -212,6 +212,7 @@ struct intel_crtc { /* We can share PLLs across outputs if the timings match */ struct intel_pch_pll *pch_pll; + uint32_t ddi_pll_sel; }; struct intel_plane { @@ -586,5 +587,10 @@ extern void intel_ddi_disable_pipe_func(struct drm_i915_private *dev_priv, enum pipe pipe); extern void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc); extern void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc); +extern void intel_ddi_setup_hw_pll_state(struct drm_device *dev); +extern bool intel_ddi_pll_mode_set(struct drm_crtc *crtc, int clock); +extern void intel_ddi_pre_enable(struct intel_encoder *intel_encoder); +extern void intel_ddi_post_disable(struct intel_encoder *intel_encoder); +extern void intel_ddi_put_crtc_pll(struct drm_crtc *crtc); #endif /* __INTEL_DRV_H__ */ diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index f9fb47cd1779..a6dd00d99dad 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1013,8 +1013,10 @@ void intel_hdmi_init(struct drm_device *dev, int sdvox_reg, enum port port) } if (IS_HASWELL(dev)) { + intel_encoder->pre_enable = intel_ddi_pre_enable; intel_encoder->enable = intel_enable_ddi; intel_encoder->disable = intel_disable_ddi; + intel_encoder->post_disable = intel_ddi_post_disable; intel_encoder->get_hw_state = intel_ddi_get_hw_state; drm_encoder_helper_add(&intel_encoder->base, &intel_hdmi_helper_funcs_hsw); From 1e6210f45d95f1db7831072b7be64f9562280df1 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 5 Oct 2012 12:05:59 -0300 Subject: [PATCH 0278/5831] drm/i915: don't rely on previous values set on DDI_BUF_CTL Just set the only bit we need, everything else is either ignored on HDMI or should be set to zero. Signed-off-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ab48083ffcec..9a95beafc507 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1017,16 +1017,12 @@ void intel_enable_ddi(struct intel_encoder *encoder) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); int port = intel_hdmi->ddi_port; - u32 temp; - - temp = I915_READ(DDI_BUF_CTL(port)); - temp |= DDI_BUF_CTL_ENABLE; /* Enable DDI_BUF_CTL. In HDMI/DVI mode, the port width, * and swing/emphasis values are ignored so nothing special needs * to be done besides enabling the port. */ - I915_WRITE(DDI_BUF_CTL(port), temp); + I915_WRITE(DDI_BUF_CTL(port), DDI_BUF_CTL_ENABLE); } void intel_disable_ddi(struct intel_encoder *encoder) From d151f63f20133aef9dbda209faddebcddbfccf31 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 8 Oct 2012 18:36:11 -0600 Subject: [PATCH 0279/5831] KVM: change kvm_assign_device() to print return value when iommu_attach_device() fails Change existing kernel error message to include return value from iommu_attach_device() when it fails. This will help debug device assignment failures more effectively. Signed-off-by: Shuah Khan Signed-off-by: Marcelo Tosatti --- virt/kvm/iommu.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 037cb6730e68..18e1e30019e3 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -168,11 +168,7 @@ int kvm_assign_device(struct kvm *kvm, r = iommu_attach_device(domain, &pdev->dev); if (r) { - printk(KERN_ERR "assign device %x:%x:%x.%x failed", - pci_domain_nr(pdev->bus), - pdev->bus->number, - PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn)); + dev_err(&pdev->dev, "kvm assign device failed ret %d", r); return r; } From 27c6f0a5897c06417e39f2d20a783f84a54cb0b3 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 10 Oct 2012 18:09:59 -0300 Subject: [PATCH 0280/5831] drm/i915: don't implement WaDisableEarlyCull for Haswell Introduced in commit 87f8020ec9e3069597746040a4e8655189bc0c1a: drm/i915: implement WaDisableEarlyCull for VLV and IVB Notice that the original patch sent to the mailing list did not include the Haswell chunk, it was added later. The bit set by the commit does not exist on Haswell machines (at least that's what the documentation says). Also, the commit gives me a GPU hang every time we're loading the driver. So let's revert the Haswell chunk, making the patch do only what its title actually says. Signed-off-by: Paulo Zanoni Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index eb757e5f2d87..07da990eb77d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3470,10 +3470,6 @@ static void haswell_init_clock_gating(struct drm_device *dev) */ I915_WRITE(GEN6_UCGCTL2, GEN6_RCZUNIT_CLOCK_GATE_DISABLE); - /* WaDisableEarlyCull */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); - /* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */ I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); From 82a1273713df37375a75d6a700deef70c0d21360 Mon Sep 17 00:00:00 2001 From: "Jason J. Herne" Date: Tue, 2 Oct 2012 16:25:36 +0200 Subject: [PATCH 0281/5831] s390/kvm: Interrupt injection bugfix EXTERNAL_CALL and EMERGENCY type interrupts need to preserve their interrupt code parameter when being injected from user space. Signed-off-by: Jason J. Herne Reviewed-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/interrupt.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index ff1e2f8ef94a..c30615e605ac 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -629,11 +629,28 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, break; case KVM_S390_SIGP_STOP: case KVM_S390_RESTART: - case KVM_S390_INT_EXTERNAL_CALL: - case KVM_S390_INT_EMERGENCY: VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); inti->type = s390int->type; break; + case KVM_S390_INT_EXTERNAL_CALL: + if (s390int->parm & 0xffff0000) { + kfree(inti); + return -EINVAL; + } + VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u", + s390int->parm); + inti->type = s390int->type; + inti->extcall.code = s390int->parm; + break; + case KVM_S390_INT_EMERGENCY: + if (s390int->parm & 0xffff0000) { + kfree(inti); + return -EINVAL; + } + VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm); + inti->type = s390int->type; + inti->emerg.code = s390int->parm; + break; case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: default: From 416ad65f73f8a68783c98dd734f570e4829d996b Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 2 Oct 2012 16:25:37 +0200 Subject: [PATCH 0282/5831] s390/kvm: Add documentation for KVM_S390_INTERRUPT Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- Documentation/virtual/kvm/api.txt | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index f6ec3a92e621..a46a416810fb 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1997,6 +1997,39 @@ return the hash table order in the parameter. (If the guest is using the virtualized real-mode area (VRMA) facility, the kernel will re-create the VMRA HPTEs on the next KVM_RUN of any vcpu.) +4.77 KVM_S390_INTERRUPT + +Capability: basic +Architectures: s390 +Type: vm ioctl, vcpu ioctl +Parameters: struct kvm_s390_interrupt (in) +Returns: 0 on success, -1 on error + +Allows to inject an interrupt to the guest. Interrupts can be floating +(vm ioctl) or per cpu (vcpu ioctl), depending on the interrupt type. + +Interrupt parameters are passed via kvm_s390_interrupt: + +struct kvm_s390_interrupt { + __u32 type; + __u32 parm; + __u64 parm64; +}; + +type can be one of the following: + +KVM_S390_SIGP_STOP (vcpu) - sigp restart +KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm +KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm +KVM_S390_RESTART (vcpu) - restart +KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt + parameters in parm and parm64 +KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm +KVM_S390_INT_EMERGENCY (vcpu) - sigp emergency; source cpu in parm +KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm + +Note that the vcpu ioctl is asynchronous to vcpu execution. + 5. The kvm_run structure ------------------------ From 87cac8f879a5ecd7109dbe688087e8810b3364eb Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 2 Oct 2012 16:25:38 +0200 Subject: [PATCH 0283/5831] s390/kvm: dont announce RRBM support Newer kernels (linux-next with the transparent huge page patches) use rrbm if the feature is announced via feature bit 66. RRBM will cause intercepts, so KVM does not handle it right now, causing an illegal instruction in the guest. The easy solution is to disable the feature bit for the guest. This fixes bugs like: Kernel BUG at 0000000000124c2a [verbose debug info unavailable] illegal operation: 0001 [#1] SMP Modules linked in: virtio_balloon virtio_net ipv6 autofs4 CPU: 0 Not tainted 3.5.4 #1 Process fmempig (pid: 659, task: 000000007b712fd0, ksp: 000000007bed3670) Krnl PSW : 0704d00180000000 0000000000124c2a (pmdp_clear_flush_young+0x5e/0x80) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 EA:3 00000000003cc000 0000000000000004 0000000000000000 0000000079800000 0000000000040000 0000000000000000 000000007bed3918 000000007cf40000 0000000000000001 000003fff7f00000 000003d281a94000 000000007bed383c 000000007bed3918 00000000005ecbf8 00000000002314a6 000000007bed36e0 Krnl Code:>0000000000124c2a: b9810025 ogr %r2,%r5 0000000000124c2e: 41343000 la %r3,0(%r4,%r3) 0000000000124c32: a716fffa brct %r1,124c26 0000000000124c36: b9010022 lngr %r2,%r2 0000000000124c3a: e3d0f0800004 lg %r13,128(%r15) 0000000000124c40: eb22003f000c srlg %r2,%r2,63 [ 2150.713198] Call Trace: [ 2150.713223] ([<00000000002312c4>] page_referenced_one+0x6c/0x27c) [ 2150.713749] [<0000000000233812>] page_referenced+0x32a/0x410 [...] CC: stable@vger.kernel.org CC: Alex Graf Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/kvm-s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ecced9d18986..38883f0bf27e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -997,7 +997,7 @@ static int __init kvm_s390_init(void) } memcpy(facilities, S390_lowcore.stfle_fac_list, 16); facilities[0] &= 0xff00fff3f47c0000ULL; - facilities[1] &= 0x201c000000000000ULL; + facilities[1] &= 0x001c000000000000ULL; return 0; } From 2886e93f5d28c559b3de9c4f75547af31e14504e Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 5 Oct 2012 12:06:00 -0300 Subject: [PATCH 0284/5831] drm/i915: disable DDI_BUF_CTL at the correct time And also properly wait for its idle bit. You may notice that DDI_BUF_CTL is enabled in .enable but disabled in .post_disable instead of .disable. Yes, the mode set sequence is not exactly symmetrical, but let's assume the spec is correct unless we can prove it's wrong. Signed-off-by: Paulo Zanoni Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 33 ++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 9a95beafc507..e79d0db4abf5 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1002,11 +1002,33 @@ void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) I915_WRITE(PORT_CLK_SEL(port), intel_crtc->ddi_pll_sel); } +static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, + enum port port) +{ + uint32_t reg = DDI_BUF_CTL(port); + int i; + + for (i = 0; i < 8; i++) { + udelay(1); + if (I915_READ(reg) & DDI_BUF_IS_IDLE) + return; + } + DRM_ERROR("Timeout waiting for DDI BUF %c idle bit\n", port_name(port)); +} + void intel_ddi_post_disable(struct intel_encoder *intel_encoder) { struct drm_encoder *encoder = &intel_encoder->base; struct drm_i915_private *dev_priv = encoder->dev->dev_private; enum port port = intel_ddi_get_encoder_port(intel_encoder); + uint32_t val; + + val = I915_READ(DDI_BUF_CTL(port)); + if (val & DDI_BUF_CTL_ENABLE) { + val &= ~DDI_BUF_CTL_ENABLE; + I915_WRITE(DDI_BUF_CTL(port), val); + intel_wait_ddi_buf_idle(dev_priv, port); + } I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); } @@ -1027,16 +1049,7 @@ void intel_enable_ddi(struct intel_encoder *encoder) void intel_disable_ddi(struct intel_encoder *encoder) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); - int port = intel_hdmi->ddi_port; - u32 temp; - - temp = I915_READ(DDI_BUF_CTL(port)); - temp &= ~DDI_BUF_CTL_ENABLE; - - I915_WRITE(DDI_BUF_CTL(port), temp); + /* This will be needed in the future, so leave it here for now */ } static int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv) From 1ce4292073695fd0fec74d1169bc94dadc339731 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 5 Oct 2012 12:06:01 -0300 Subject: [PATCH 0285/5831] drm/i915: pipe and planes should be disabled on haswell_crtc_mode_set So WARN in case they're not. It also does not make any sense to wait_for_vblank at this point. Signed-off-by: Paulo Zanoni Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 709497dc807b..705ed80e1e11 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5217,6 +5217,11 @@ static int haswell_crtc_mode_set(struct drm_crtc *crtc, WARN(num_connectors != 1, "%d connectors attached to pipe %c\n", num_connectors, pipe_name(pipe)); + WARN_ON(I915_READ(PIPECONF(pipe)) & + (PIPECONF_ENABLE | I965_PIPECONF_ACTIVE)); + + WARN_ON(I915_READ(DSPCNTR(plane)) & DISPLAY_PLANE_ENABLE); + if (!intel_ddi_pll_mode_set(crtc, adjusted_mode->clock)) return -EINVAL; @@ -5357,8 +5362,6 @@ static int haswell_crtc_mode_set(struct drm_crtc *crtc, haswell_set_pipeconf(crtc, adjusted_mode, dither); - intel_wait_for_vblank(dev, pipe); - /* Set up the display plane register */ I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE); POSTING_READ(DSPCNTR(plane)); From 9493d974b0f1f34903adfb529a510d4d768493dc Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Wed, 10 Oct 2012 09:34:00 -0700 Subject: [PATCH 0286/5831] Input: cy8ctmg110_ts - use C99-style structure initializators Convert the struct i2c_msg initialization to C99 format. This makes maintaining and editing the code simpler. Also helps once other fields like transferred are added in future. Thanks to Julia Lawall for automating the conversion. Signed-off-by: Shubhrajyoti D Acked-by: Jean Delvare Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/cy8ctmg110_ts.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/cy8ctmg110_ts.c b/drivers/input/touchscreen/cy8ctmg110_ts.c index 464f1bf4b61d..ad6a6640f385 100644 --- a/drivers/input/touchscreen/cy8ctmg110_ts.c +++ b/drivers/input/touchscreen/cy8ctmg110_ts.c @@ -99,9 +99,18 @@ static int cy8ctmg110_read_regs(struct cy8ctmg110 *tsc, int ret; struct i2c_msg msg[2] = { /* first write slave position to i2c devices */ - { client->addr, 0, 1, &cmd }, + { + .addr = client->addr, + .len = 1, + .buf = &cmd + }, /* Second read data from position */ - { client->addr, I2C_M_RD, len, data } + { + .addr = client->addr, + .flags = I2C_M_RD, + .len = len, + .buf = data + } }; ret = i2c_transfer(client->adapter, msg, 2); From 24e491c21b4e214a980a5daf2a5bc80e8c410ce6 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti D Date: Wed, 10 Oct 2012 09:35:38 -0700 Subject: [PATCH 0287/5831] Input: as5011 - use C99-style structure initializators Convert the struct i2c_msg initialization to C99 format. This makes maintaining and editing the code simpler. Also helps once other fields like transferred are added in future. Thanks to Julia Lawall for automating the conversion. Signed-off-by: Shubhrajyoti D Acked-by: Jean Delvare Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/as5011.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/input/joystick/as5011.c b/drivers/input/joystick/as5011.c index c96653b58867..9d869e202b81 100644 --- a/drivers/input/joystick/as5011.c +++ b/drivers/input/joystick/as5011.c @@ -85,7 +85,10 @@ static int as5011_i2c_write(struct i2c_client *client, { uint8_t data[2] = { aregaddr, avalue }; struct i2c_msg msg = { - client->addr, I2C_M_IGNORE_NAK, 2, (uint8_t *)data + .addr = client->addr, + .flags = I2C_M_IGNORE_NAK, + .len = 2, + .buf = (uint8_t *)data }; int error; @@ -98,8 +101,18 @@ static int as5011_i2c_read(struct i2c_client *client, { uint8_t data[2] = { aregaddr }; struct i2c_msg msg_set[2] = { - { client->addr, I2C_M_REV_DIR_ADDR, 1, (uint8_t *)data }, - { client->addr, I2C_M_RD | I2C_M_NOSTART, 1, (uint8_t *)data } + { + .addr = client->addr, + .flags = I2C_M_REV_DIR_ADDR, + .len = 1, + .buf = (uint8_t *)data + }, + { + .addr = client->addr, + .flags = I2C_M_RD | I2C_M_NOSTART, + .len = 1, + .buf = (uint8_t *)data + } }; int error; From 2f7badb9742f88e7307d9e823f40c8621ceaa1c4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 11 Oct 2012 00:42:24 -0700 Subject: [PATCH 0288/5831] Input: wm831x-ts - remove unneeded clearing of driver data This is unneeded, only a bound driver can use driver data and a driver relying on the state prior to probe() is buggy. Signed-off-by: Mark Brown Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/wm831x-ts.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/touchscreen/wm831x-ts.c b/drivers/input/touchscreen/wm831x-ts.c index e83410721e38..ac667e469b35 100644 --- a/drivers/input/touchscreen/wm831x-ts.c +++ b/drivers/input/touchscreen/wm831x-ts.c @@ -390,7 +390,6 @@ static __devexit int wm831x_ts_remove(struct platform_device *pdev) input_unregister_device(wm831x_ts->input_dev); kfree(wm831x_ts); - platform_set_drvdata(pdev, NULL); return 0; } From ef8dee5cfe4df1091419e7d58b902e7e3d90b00e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 11 Oct 2012 00:42:34 -0700 Subject: [PATCH 0289/5831] Input: wm831x-ts - convert to devm_kzalloc() Saves a little code and eliminates the possibility of introducing some leaks. Signed-off-by: Mark Brown Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/wm831x-ts.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/wm831x-ts.c b/drivers/input/touchscreen/wm831x-ts.c index ac667e469b35..362f78d4507f 100644 --- a/drivers/input/touchscreen/wm831x-ts.c +++ b/drivers/input/touchscreen/wm831x-ts.c @@ -245,7 +245,8 @@ static __devinit int wm831x_ts_probe(struct platform_device *pdev) if (core_pdata) pdata = core_pdata->touch; - wm831x_ts = kzalloc(sizeof(struct wm831x_ts), GFP_KERNEL); + wm831x_ts = devm_kzalloc(&pdev->dev, sizeof(struct wm831x_ts), + GFP_KERNEL); input_dev = input_allocate_device(); if (!wm831x_ts || !input_dev) { error = -ENOMEM; @@ -376,7 +377,6 @@ err_data_irq: free_irq(wm831x_ts->data_irq, wm831x_ts); err_alloc: input_free_device(input_dev); - kfree(wm831x_ts); return error; } @@ -388,7 +388,6 @@ static __devexit int wm831x_ts_remove(struct platform_device *pdev) free_irq(wm831x_ts->pd_irq, wm831x_ts); free_irq(wm831x_ts->data_irq, wm831x_ts); input_unregister_device(wm831x_ts->input_dev); - kfree(wm831x_ts); return 0; } From dae6ba4ab797ed411fbde60ef5b5f6fbf13f0090 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 11 Oct 2012 00:42:34 -0700 Subject: [PATCH 0290/5831] Input: wm831x-on - convert to devm_kzalloc() Saves a small amount of code and reduces the potential for leaks. Signed-off-by: Mark Brown Signed-off-by: Dmitry Torokhov --- drivers/input/misc/wm831x-on.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/input/misc/wm831x-on.c b/drivers/input/misc/wm831x-on.c index 6790a812a1db..fa8b3900d981 100644 --- a/drivers/input/misc/wm831x-on.c +++ b/drivers/input/misc/wm831x-on.c @@ -76,7 +76,8 @@ static int __devinit wm831x_on_probe(struct platform_device *pdev) int irq = wm831x_irq(wm831x, platform_get_irq(pdev, 0)); int ret; - wm831x_on = kzalloc(sizeof(struct wm831x_on), GFP_KERNEL); + wm831x_on = devm_kzalloc(&pdev->dev, sizeof(struct wm831x_on), + GFP_KERNEL); if (!wm831x_on) { dev_err(&pdev->dev, "Can't allocate data\n"); return -ENOMEM; @@ -120,7 +121,6 @@ err_irq: err_input_dev: input_free_device(wm831x_on->dev); err: - kfree(wm831x_on); return ret; } @@ -132,7 +132,6 @@ static int __devexit wm831x_on_remove(struct platform_device *pdev) free_irq(irq, wm831x_on); cancel_delayed_work_sync(&wm831x_on->work); input_unregister_device(wm831x_on->dev); - kfree(wm831x_on); return 0; } From ad5396ee32afbdabb6188ffba67778080ea795b8 Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Thu, 11 Oct 2012 01:03:50 -0700 Subject: [PATCH 0291/5831] Input: mms114 - add device tree bindings Add device tree bindings for mms114 touchscreen. [Dmitry Torokhov: added #ifdef CONFIG_OF guards] Signed-off-by: Tomasz Figa Signed-off-by: Dmitry Torokhov --- .../bindings/input/touchscreen/mms114.txt | 34 ++++++++++ drivers/input/touchscreen/mms114.c | 62 ++++++++++++++++++- 2 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 Documentation/devicetree/bindings/input/touchscreen/mms114.txt diff --git a/Documentation/devicetree/bindings/input/touchscreen/mms114.txt b/Documentation/devicetree/bindings/input/touchscreen/mms114.txt new file mode 100644 index 000000000000..89d4c56c5671 --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/mms114.txt @@ -0,0 +1,34 @@ +* MELFAS MMS114 touchscreen controller + +Required properties: +- compatible: must be "melfas,mms114" +- reg: I2C address of the chip +- interrupts: interrupt to which the chip is connected +- x-size: horizontal resolution of touchscreen +- y-size: vertical resolution of touchscreen + +Optional properties: +- contact-threshold: +- moving-threshold: +- x-invert: invert X axis +- y-invert: invert Y axis + +Example: + + i2c@00000000 { + /* ... */ + + touchscreen@48 { + compatible = "melfas,mms114"; + reg = <0x48>; + interrupts = <39 0>; + x-size = <720>; + y-size = <1280>; + contact-threshold = <10>; + moving-threshold = <10>; + x-invert; + y-invert; + }; + + /* ... */ + }; diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c index 560cf09d1c5a..3426d2e11a36 100644 --- a/drivers/input/touchscreen/mms114.c +++ b/drivers/input/touchscreen/mms114.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -360,14 +361,63 @@ static void mms114_input_close(struct input_dev *dev) mms114_stop(data); } +#ifdef CONFIG_OF +static struct mms114_platform_data * __devinit mms114_parse_dt(struct device *dev) +{ + struct mms114_platform_data *pdata; + struct device_node *np = dev->of_node; + + if (!np) + return NULL; + + pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) { + dev_err(dev, "failed to allocate platform data\n"); + return NULL; + } + + if (of_property_read_u32(np, "x-size", &pdata->x_size)) { + dev_err(dev, "failed to get x-size property\n"); + return NULL; + }; + + if (of_property_read_u32(np, "y-size", &pdata->y_size)) { + dev_err(dev, "failed to get y-size property\n"); + return NULL; + }; + + of_property_read_u32(np, "contact-threshold", + &pdata->contact_threshold); + of_property_read_u32(np, "moving-threshold", + &pdata->moving_threshold); + + if (of_find_property(np, "x-invert", NULL)) + pdata->x_invert = true; + if (of_find_property(np, "y-invert", NULL)) + pdata->y_invert = true; + + return pdata; +} +#else +static inline struct mms114_platform_data *mms114_parse_dt(struct device *dev) +{ + return NULL; +} +#endif + static int __devinit mms114_probe(struct i2c_client *client, const struct i2c_device_id *id) { + const struct mms114_platform_data *pdata; struct mms114_data *data; struct input_dev *input_dev; int error; - if (!client->dev.platform_data) { + pdata = dev_get_platdata(&client->dev); + if (!pdata) + pdata = mms114_parse_dt(&client->dev); + + if (!pdata) { dev_err(&client->dev, "Need platform data\n"); return -EINVAL; } @@ -389,7 +439,7 @@ static int __devinit mms114_probe(struct i2c_client *client, data->client = client; data->input_dev = input_dev; - data->pdata = client->dev.platform_data; + data->pdata = pdata; input_dev->name = "MELPAS MMS114 Touchscreen"; input_dev->id.bustype = BUS_I2C; @@ -525,11 +575,19 @@ static const struct i2c_device_id mms114_id[] = { }; MODULE_DEVICE_TABLE(i2c, mms114_id); +#ifdef CONFIG_OF +static struct of_device_id __devinitdata mms114_dt_match[] = { + { .compatible = "melfas,mms114" }, + { } +}; +#endif + static struct i2c_driver mms114_driver = { .driver = { .name = "mms114", .owner = THIS_MODULE, .pm = &mms114_pm_ops, + .of_match_table = of_match_ptr(mms114_dt_match), }, .probe = mms114_probe, .remove = __devexit_p(mms114_remove), From 39bc66c9371fa6cdb1029e6c1768824f068be913 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Thu, 11 Oct 2012 15:24:04 +0100 Subject: [PATCH 0292/5831] drm/i915: Fix the SCC/SSC typo in the SPLL bits definition We're talking about Spread Spectrum Clocks here, thus SSC. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 6 +++--- drivers/gpu/drm/i915/intel_ddi.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c8c8dd0ff7b4..8200c317f1fd 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4496,8 +4496,8 @@ /* SPLL */ #define SPLL_CTL 0x46020 #define SPLL_PLL_ENABLE (1<<31) -#define SPLL_PLL_SCC (1<<28) -#define SPLL_PLL_NON_SCC (2<<28) +#define SPLL_PLL_SSC (1<<28) +#define SPLL_PLL_NON_SSC (2<<28) #define SPLL_PLL_FREQ_810MHz (0<<26) #define SPLL_PLL_FREQ_1350MHz (1<<26) @@ -4506,7 +4506,7 @@ #define WRPLL_CTL2 0x46060 #define WRPLL_PLL_ENABLE (1<<31) #define WRPLL_PLL_SELECT_SSC (0x01<<28) -#define WRPLL_PLL_SELECT_NON_SCC (0x02<<28) +#define WRPLL_PLL_SELECT_NON_SSC (0x02<<28) #define WRPLL_PLL_SELECT_LCPLL_2700 (0x03<<28) /* WRPLL divider programming */ #define WRPLL_DIVIDER_REFERENCE(x) ((x)<<0) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index e79d0db4abf5..a78860a04a56 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -814,7 +814,7 @@ bool intel_ddi_pll_mode_set(struct drm_crtc *crtc, int clock) WARN(I915_READ(reg) & SPLL_PLL_ENABLE, "SPLL already enabled\n"); - val = SPLL_PLL_ENABLE | SPLL_PLL_FREQ_1350MHz | SPLL_PLL_SCC; + val = SPLL_PLL_ENABLE | SPLL_PLL_FREQ_1350MHz | SPLL_PLL_SSC; } else { WARN(1, "Invalid DDI encoder type %d\n", type); From 7ad6623a4980caa053eb9eb3207e0c3ba7d4ed4d Mon Sep 17 00:00:00 2001 From: Chandrabhanu Mahapatra Date: Thu, 27 Sep 2012 16:52:17 +0530 Subject: [PATCH 0293/5831] OMAPDSS: Move definition of DEBUG flag to Makefile In OMAPDSS the DEBUG flag is set only after the OMAPDSS module is called, for which the debugging capabilities are available only after its proper initialization. As a result of which tracking of bugs prior to or during initial process becomes difficult. So, the definition of DEBUG is being moved to the corresponding Makefile. Signed-off-by: Chandrabhanu Mahapatra Reviewed-by: Sumit Semwal Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/Makefile | 1 + drivers/video/omap2/dss/dss.h | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/video/omap2/dss/Makefile b/drivers/video/omap2/dss/Makefile index 4549869bfe1a..86493e3dd65d 100644 --- a/drivers/video/omap2/dss/Makefile +++ b/drivers/video/omap2/dss/Makefile @@ -8,3 +8,4 @@ omapdss-$(CONFIG_OMAP2_DSS_SDI) += sdi.o omapdss-$(CONFIG_OMAP2_DSS_DSI) += dsi.o omapdss-$(CONFIG_OMAP4_DSS_HDMI) += hdmi.o \ hdmi_panel.o ti_hdmi_4xxx_ip.o +ccflags-$(CONFIG_OMAP2_DSS_DEBUG_SUPPORT) += -DDEBUG diff --git a/drivers/video/omap2/dss/dss.h b/drivers/video/omap2/dss/dss.h index 6728892f9dad..ffbba7e994e9 100644 --- a/drivers/video/omap2/dss/dss.h +++ b/drivers/video/omap2/dss/dss.h @@ -23,10 +23,6 @@ #ifndef __OMAP2_DSS_H #define __OMAP2_DSS_H -#ifdef CONFIG_OMAP2_DSS_DEBUG_SUPPORT -#define DEBUG -#endif - #ifdef DEBUG extern bool dss_debug; #ifdef DSS_SUBSYS_NAME From 1b3bcb33fb9faeab29e5c734fa000f6c7746ea1c Mon Sep 17 00:00:00 2001 From: Chandrabhanu Mahapatra Date: Sat, 29 Sep 2012 11:25:42 +0530 Subject: [PATCH 0294/5831] OMAPDSS: Create new debug config options The config option CONFIG_OMAP2_DSS_DEBUG_SUPPORT has been removed and replaced with CONFIG_OMAP2_DSS_DEBUG and CONFIG_OMAP2_DSS_DEBUGFS. CONFIG_OMAP2_DSS_DEBUG enables DEBUG flag and CONFIG_OMAP2_DSS_DEBUGFS enables creation of debugfs for OMAPDSS. Both the config options are disabled by default and can be enabled independently of one another as per convenience. Signed-off-by: Chandrabhanu Mahapatra Reviewed-by: Sumit Semwal Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/Kconfig | 22 ++++++++++++++++------ drivers/video/omap2/dss/Makefile | 2 +- drivers/video/omap2/dss/core.c | 6 +++--- drivers/video/omap2/dss/dss.c | 2 +- drivers/video/omap2/dss/dss.h | 2 +- 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/video/omap2/dss/Kconfig b/drivers/video/omap2/dss/Kconfig index 80f5390aa136..7052487495cd 100644 --- a/drivers/video/omap2/dss/Kconfig +++ b/drivers/video/omap2/dss/Kconfig @@ -18,16 +18,26 @@ config OMAP2_VRAM_SIZE You can also set this with "vram=" kernel argument, or in the board file. -config OMAP2_DSS_DEBUG_SUPPORT - bool "Debug support" - default y +config OMAP2_DSS_DEBUG + bool "Debug support" + default n help - This enables debug messages. You need to enable printing - with 'debug' module parameter. + This enables printing of debug messages. Alternatively, debug messages + can also be enabled by setting CONFIG_DYNAMIC_DEBUG and then setting + appropriate flags in /dynamic_debug/control. + +config OMAP2_DSS_DEBUGFS + bool "Debugfs filesystem support" + depends on DEBUG_FS + default n + help + This enables debugfs for OMAPDSS at /omapdss. This enables + querying about clock configuration and register configuration of dss, + dispc, dsi, hdmi and rfbi. config OMAP2_DSS_COLLECT_IRQ_STATS bool "Collect DSS IRQ statistics" - depends on OMAP2_DSS_DEBUG_SUPPORT + depends on OMAP2_DSS_DEBUGFS default n help Collect DSS IRQ statistics, printable via debugfs. diff --git a/drivers/video/omap2/dss/Makefile b/drivers/video/omap2/dss/Makefile index 86493e3dd65d..40701910f737 100644 --- a/drivers/video/omap2/dss/Makefile +++ b/drivers/video/omap2/dss/Makefile @@ -8,4 +8,4 @@ omapdss-$(CONFIG_OMAP2_DSS_SDI) += sdi.o omapdss-$(CONFIG_OMAP2_DSS_DSI) += dsi.o omapdss-$(CONFIG_OMAP4_DSS_HDMI) += hdmi.o \ hdmi_panel.o ti_hdmi_4xxx_ip.o -ccflags-$(CONFIG_OMAP2_DSS_DEBUG_SUPPORT) += -DDEBUG +ccflags-$(CONFIG_OMAP2_DSS_DEBUG) += -DDEBUG diff --git a/drivers/video/omap2/dss/core.c b/drivers/video/omap2/dss/core.c index b2af72dc20bd..826d64faeaa7 100644 --- a/drivers/video/omap2/dss/core.c +++ b/drivers/video/omap2/dss/core.c @@ -138,7 +138,7 @@ int dss_set_min_bus_tput(struct device *dev, unsigned long tput) return 0; } -#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_OMAP2_DSS_DEBUG_SUPPORT) +#if defined(CONFIG_OMAP2_DSS_DEBUGFS) static int dss_debug_show(struct seq_file *s, void *unused) { void (*func)(struct seq_file *) = s->private; @@ -193,7 +193,7 @@ int dss_debugfs_create_file(const char *name, void (*write)(struct seq_file *)) return 0; } -#else /* CONFIG_DEBUG_FS && CONFIG_OMAP2_DSS_DEBUG_SUPPORT */ +#else /* CONFIG_OMAP2_DSS_DEBUGFS */ static inline int dss_initialize_debugfs(void) { return 0; @@ -205,7 +205,7 @@ int dss_debugfs_create_file(const char *name, void (*write)(struct seq_file *)) { return 0; } -#endif /* CONFIG_DEBUG_FS && CONFIG_OMAP2_DSS_DEBUG_SUPPORT */ +#endif /* CONFIG_OMAP2_DSS_DEBUGFS */ /* PLATFORM DEVICE */ static int omap_dss_pm_notif(struct notifier_block *b, unsigned long v, void *d) diff --git a/drivers/video/omap2/dss/dss.c b/drivers/video/omap2/dss/dss.c index 2ab1c3e96553..3954742d46d3 100644 --- a/drivers/video/omap2/dss/dss.c +++ b/drivers/video/omap2/dss/dss.c @@ -746,7 +746,7 @@ static void dss_runtime_put(void) } /* DEBUGFS */ -#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_OMAP2_DSS_DEBUG_SUPPORT) +#if defined(CONFIG_OMAP2_DSS_DEBUGFS) void dss_debug_dump_clocks(struct seq_file *s) { dss_dump_clocks(s); diff --git a/drivers/video/omap2/dss/dss.h b/drivers/video/omap2/dss/dss.h index ffbba7e994e9..685275e0bed5 100644 --- a/drivers/video/omap2/dss/dss.h +++ b/drivers/video/omap2/dss/dss.h @@ -301,7 +301,7 @@ enum dss_hdmi_venc_clk_source_select dss_get_hdmi_venc_clk_source(void); const char *dss_get_generic_clk_source_name(enum omap_dss_clk_source clk_src); void dss_dump_clocks(struct seq_file *s); -#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_OMAP2_DSS_DEBUG_SUPPORT) +#if defined(CONFIG_OMAP2_DSS_DEBUGFS) void dss_debug_dump_clocks(struct seq_file *s); #endif From 702d267eb8ec66e1b2cde9448fc5960315ed8662 Mon Sep 17 00:00:00 2001 From: Chandrabhanu Mahapatra Date: Mon, 24 Sep 2012 17:12:58 +0530 Subject: [PATCH 0295/5831] OMAPDSS: Cleanup DSSDBG with dynamic pr_debug function The printk in DSSDBG function definition is replaced with dynamic debug enabled pr_debug(). The use of dynamic debugging provides more flexibility as each debug statement can be enabled or disabled dynamically on basis of source filename, line number, module name etc., by writing to a control file in debugfs filesystem. For better understanding please refer to Documentation/dynamic-debug-howto.txt. The DSSDBGF() differs from DSSDBG() by providing function name. However, function name, line number, module name and thread ID can be printed through dynamic debug by setting appropriate flags 'f','l','m' and 't' in the debugfs control file. So, DSSDBGF instances are replaced with DSSDBG. Signed-off-by: Chandrabhanu Mahapatra Reviewed-by: Sumit Semwal Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/apply.c | 8 ++++---- drivers/video/omap2/dss/dsi.c | 12 ++++++------ drivers/video/omap2/dss/dss.h | 34 ++++++++------------------------- 3 files changed, 18 insertions(+), 36 deletions(-) diff --git a/drivers/video/omap2/dss/apply.c b/drivers/video/omap2/dss/apply.c index 19d66f471b4b..e923d9f90ee6 100644 --- a/drivers/video/omap2/dss/apply.c +++ b/drivers/video/omap2/dss/apply.c @@ -573,7 +573,7 @@ static void dss_ovl_write_regs(struct omap_overlay *ovl) struct mgr_priv_data *mp; int r; - DSSDBGF("%d", ovl->id); + DSSDBG("writing ovl %d regs", ovl->id); if (!op->enabled || !op->info_dirty) return; @@ -608,7 +608,7 @@ static void dss_ovl_write_regs_extra(struct omap_overlay *ovl) struct ovl_priv_data *op = get_ovl_priv(ovl); struct mgr_priv_data *mp; - DSSDBGF("%d", ovl->id); + DSSDBG("writing ovl %d regs extra", ovl->id); if (!op->extra_info_dirty) return; @@ -632,7 +632,7 @@ static void dss_mgr_write_regs(struct omap_overlay_manager *mgr) struct mgr_priv_data *mp = get_mgr_priv(mgr); struct omap_overlay *ovl; - DSSDBGF("%d", mgr->id); + DSSDBG("writing mgr %d regs", mgr->id); if (!mp->enabled) return; @@ -658,7 +658,7 @@ static void dss_mgr_write_regs_extra(struct omap_overlay_manager *mgr) { struct mgr_priv_data *mp = get_mgr_priv(mgr); - DSSDBGF("%d", mgr->id); + DSSDBG("writing mgr %d regs extra", mgr->id); if (!mp->extra_info_dirty) return; diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c index e37e6d868acd..b0345f3ac909 100644 --- a/drivers/video/omap2/dss/dsi.c +++ b/drivers/video/omap2/dss/dsi.c @@ -1612,7 +1612,7 @@ int dsi_pll_set_clock_div(struct platform_device *dsidev, u8 regn_start, regn_end, regm_start, regm_end; u8 regm_dispc_start, regm_dispc_end, regm_dsi_start, regm_dsi_end; - DSSDBGF(); + DSSDBG("DSI PLL clock config starts"); dsi->current_cinfo.clkin = cinfo->clkin; dsi->current_cinfo.fint = cinfo->fint; @@ -2431,7 +2431,7 @@ static int dsi_cio_init(struct platform_device *dsidev) int r; u32 l; - DSSDBGF(); + DSSDBG("DSI CIO init starts"); r = dss_dsi_enable_pads(dsi->module_id, dsi_get_lane_mask(dsidev)); if (r) @@ -2782,7 +2782,7 @@ static void dsi_vc_initial_config(struct platform_device *dsidev, int channel) { u32 r; - DSSDBGF("%d", channel); + DSSDBG("Initial config of virtual channel %d", channel); r = dsi_read_reg(dsidev, DSI_VC_CTRL(channel)); @@ -2814,7 +2814,7 @@ static int dsi_vc_config_source(struct platform_device *dsidev, int channel, if (dsi->vc[channel].source == source) return 0; - DSSDBGF("%d", channel); + DSSDBG("Source config of virtual channel %d", channel); dsi_sync_vc(dsidev, channel); @@ -3572,7 +3572,7 @@ static int dsi_enter_ulps(struct platform_device *dsidev) int r, i; unsigned mask; - DSSDBGF(); + DSSDBG("Entering ULPS"); WARN_ON(!dsi_bus_is_locked(dsidev)); @@ -4276,7 +4276,7 @@ int omapdss_dsi_set_clocks(struct omap_dss_device *dssdev, unsigned long pck; int r; - DSSDBGF("ddr_clk %lu, lp_clk %lu", ddr_clk, lp_clk); + DSSDBG("Setting DSI clocks: ddr_clk %lu, lp_clk %lu", ddr_clk, lp_clk); mutex_lock(&dsi->lock); diff --git a/drivers/video/omap2/dss/dss.h b/drivers/video/omap2/dss/dss.h index 685275e0bed5..d8aeee49825b 100644 --- a/drivers/video/omap2/dss/dss.h +++ b/drivers/video/omap2/dss/dss.h @@ -25,38 +25,20 @@ #ifdef DEBUG extern bool dss_debug; -#ifdef DSS_SUBSYS_NAME -#define DSSDBG(format, ...) \ - if (dss_debug) \ - printk(KERN_DEBUG "omapdss " DSS_SUBSYS_NAME ": " format, \ - ## __VA_ARGS__) -#else -#define DSSDBG(format, ...) \ - if (dss_debug) \ - printk(KERN_DEBUG "omapdss: " format, ## __VA_ARGS__) +#endif + +#ifdef pr_fmt +#undef pr_fmt #endif #ifdef DSS_SUBSYS_NAME -#define DSSDBGF(format, ...) \ - if (dss_debug) \ - printk(KERN_DEBUG "omapdss " DSS_SUBSYS_NAME \ - ": %s(" format ")\n", \ - __func__, \ - ## __VA_ARGS__) +#define pr_fmt(fmt) DSS_SUBSYS_NAME ": " fmt #else -#define DSSDBGF(format, ...) \ - if (dss_debug) \ - printk(KERN_DEBUG "omapdss: " \ - ": %s(" format ")\n", \ - __func__, \ - ## __VA_ARGS__) -#endif - -#else /* DEBUG */ -#define DSSDBG(format, ...) -#define DSSDBGF(format, ...) +#define pr_fmt(fmt) fmt #endif +#define DSSDBG(format, ...) \ + pr_debug(format, ## __VA_ARGS__) #ifdef DSS_SUBSYS_NAME #define DSSERR(format, ...) \ From f30be7d326671ec5691f83b6d473550ac002e008 Mon Sep 17 00:00:00 2001 From: Chandrabhanu Mahapatra Date: Sat, 29 Sep 2012 12:33:05 +0530 Subject: [PATCH 0296/5831] OMAPDSS: Replace multi part debug prints with pr_debug The various functions in dispc and dsi such as print_irq_status(), print_irq_status_vc(), print_irq_status_cio() and _dsi_print_reset_status() consist of a number of debug prints which need to be enabled all at once or none at all. So, these debug prints in corresponding functions are replaced with one dynamic debug enabled pr_debug() each. Signed-off-by: Chandrabhanu Mahapatra Reviewed-by: Sumit Semwal Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/dispc.c | 30 +++--- drivers/video/omap2/dss/dsi.c | 166 +++++++++++++++----------------- 2 files changed, 88 insertions(+), 108 deletions(-) diff --git a/drivers/video/omap2/dss/dispc.c b/drivers/video/omap2/dss/dispc.c index a173a9481a23..67d9f3bdea39 100644 --- a/drivers/video/omap2/dss/dispc.c +++ b/drivers/video/omap2/dss/dispc.c @@ -3675,26 +3675,20 @@ static void print_irq_status(u32 status) if ((status & dispc.irq_error_mask) == 0) return; - printk(KERN_DEBUG "DISPC IRQ: 0x%x: ", status); +#define PIS(x) (status & DISPC_IRQ_##x) ? (#x " ") : "" -#define PIS(x) \ - if (status & DISPC_IRQ_##x) \ - printk(#x " "); - PIS(GFX_FIFO_UNDERFLOW); - PIS(OCP_ERR); - PIS(VID1_FIFO_UNDERFLOW); - PIS(VID2_FIFO_UNDERFLOW); - if (dss_feat_get_num_ovls() > 3) - PIS(VID3_FIFO_UNDERFLOW); - PIS(SYNC_LOST); - PIS(SYNC_LOST_DIGIT); - if (dss_has_feature(FEAT_MGR_LCD2)) - PIS(SYNC_LOST2); - if (dss_has_feature(FEAT_MGR_LCD3)) - PIS(SYNC_LOST3); + pr_debug("DISPC IRQ: 0x%x: %s%s%s%s%s%s%s%s%s\n", + status, + PIS(OCP_ERR), + PIS(GFX_FIFO_UNDERFLOW), + PIS(VID1_FIFO_UNDERFLOW), + PIS(VID2_FIFO_UNDERFLOW), + dss_feat_get_num_ovls() > 3 ? PIS(VID3_FIFO_UNDERFLOW) : "", + PIS(SYNC_LOST), + PIS(SYNC_LOST_DIGIT), + dss_has_feature(FEAT_MGR_LCD2) ? PIS(SYNC_LOST2) : "", + dss_has_feature(FEAT_MGR_LCD3) ? PIS(SYNC_LOST3) : ""); #undef PIS - - printk("\n"); } #endif diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c index b0345f3ac909..19daee9fd302 100644 --- a/drivers/video/omap2/dss/dsi.c +++ b/drivers/video/omap2/dss/dsi.c @@ -45,7 +45,6 @@ #include "dss.h" #include "dss_features.h" -/*#define VERBOSE_IRQ*/ #define DSI_CATCH_MISSING_TE struct dsi_reg { u16 idx; }; @@ -526,42 +525,38 @@ static inline void dsi_perf_show(struct platform_device *dsidev, } #endif +static int verbose_irq; + static void print_irq_status(u32 status) { if (status == 0) return; -#ifndef VERBOSE_IRQ - if ((status & ~DSI_IRQ_CHANNEL_MASK) == 0) + if (!verbose_irq && (status & ~DSI_IRQ_CHANNEL_MASK) == 0) return; -#endif - printk(KERN_DEBUG "DSI IRQ: 0x%x: ", status); -#define PIS(x) \ - if (status & DSI_IRQ_##x) \ - printk(#x " "); -#ifdef VERBOSE_IRQ - PIS(VC0); - PIS(VC1); - PIS(VC2); - PIS(VC3); -#endif - PIS(WAKEUP); - PIS(RESYNC); - PIS(PLL_LOCK); - PIS(PLL_UNLOCK); - PIS(PLL_RECALL); - PIS(COMPLEXIO_ERR); - PIS(HS_TX_TIMEOUT); - PIS(LP_RX_TIMEOUT); - PIS(TE_TRIGGER); - PIS(ACK_TRIGGER); - PIS(SYNC_LOST); - PIS(LDO_POWER_GOOD); - PIS(TA_TIMEOUT); +#define PIS(x) (status & DSI_IRQ_##x) ? (#x " ") : "" + + pr_debug("DSI IRQ: 0x%x: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + status, + verbose_irq ? PIS(VC0) : "", + verbose_irq ? PIS(VC1) : "", + verbose_irq ? PIS(VC2) : "", + verbose_irq ? PIS(VC3) : "", + PIS(WAKEUP), + PIS(RESYNC), + PIS(PLL_LOCK), + PIS(PLL_UNLOCK), + PIS(PLL_RECALL), + PIS(COMPLEXIO_ERR), + PIS(HS_TX_TIMEOUT), + PIS(LP_RX_TIMEOUT), + PIS(TE_TRIGGER), + PIS(ACK_TRIGGER), + PIS(SYNC_LOST), + PIS(LDO_POWER_GOOD), + PIS(TA_TIMEOUT)); #undef PIS - - printk("\n"); } static void print_irq_status_vc(int channel, u32 status) @@ -569,28 +564,24 @@ static void print_irq_status_vc(int channel, u32 status) if (status == 0) return; -#ifndef VERBOSE_IRQ - if ((status & ~DSI_VC_IRQ_PACKET_SENT) == 0) + if (!verbose_irq && (status & ~DSI_VC_IRQ_PACKET_SENT) == 0) return; -#endif - printk(KERN_DEBUG "DSI VC(%d) IRQ 0x%x: ", channel, status); -#define PIS(x) \ - if (status & DSI_VC_IRQ_##x) \ - printk(#x " "); - PIS(CS); - PIS(ECC_CORR); -#ifdef VERBOSE_IRQ - PIS(PACKET_SENT); -#endif - PIS(FIFO_TX_OVF); - PIS(FIFO_RX_OVF); - PIS(BTA); - PIS(ECC_NO_CORR); - PIS(FIFO_TX_UDF); - PIS(PP_BUSY_CHANGE); +#define PIS(x) (status & DSI_VC_IRQ_##x) ? (#x " ") : "" + + pr_debug("DSI VC(%d) IRQ 0x%x: %s%s%s%s%s%s%s%s%s\n", + channel, + status, + PIS(CS), + PIS(ECC_CORR), + PIS(ECC_NO_CORR), + verbose_irq ? PIS(PACKET_SENT) : "", + PIS(BTA), + PIS(FIFO_TX_OVF), + PIS(FIFO_RX_OVF), + PIS(FIFO_TX_UDF), + PIS(PP_BUSY_CHANGE)); #undef PIS - printk("\n"); } static void print_irq_status_cio(u32 status) @@ -598,34 +589,31 @@ static void print_irq_status_cio(u32 status) if (status == 0) return; - printk(KERN_DEBUG "DSI CIO IRQ 0x%x: ", status); +#define PIS(x) (status & DSI_CIO_IRQ_##x) ? (#x " ") : "" -#define PIS(x) \ - if (status & DSI_CIO_IRQ_##x) \ - printk(#x " "); - PIS(ERRSYNCESC1); - PIS(ERRSYNCESC2); - PIS(ERRSYNCESC3); - PIS(ERRESC1); - PIS(ERRESC2); - PIS(ERRESC3); - PIS(ERRCONTROL1); - PIS(ERRCONTROL2); - PIS(ERRCONTROL3); - PIS(STATEULPS1); - PIS(STATEULPS2); - PIS(STATEULPS3); - PIS(ERRCONTENTIONLP0_1); - PIS(ERRCONTENTIONLP1_1); - PIS(ERRCONTENTIONLP0_2); - PIS(ERRCONTENTIONLP1_2); - PIS(ERRCONTENTIONLP0_3); - PIS(ERRCONTENTIONLP1_3); - PIS(ULPSACTIVENOT_ALL0); - PIS(ULPSACTIVENOT_ALL1); + pr_debug("DSI CIO IRQ 0x%x: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + status, + PIS(ERRSYNCESC1), + PIS(ERRSYNCESC2), + PIS(ERRSYNCESC3), + PIS(ERRESC1), + PIS(ERRESC2), + PIS(ERRESC3), + PIS(ERRCONTROL1), + PIS(ERRCONTROL2), + PIS(ERRCONTROL3), + PIS(STATEULPS1), + PIS(STATEULPS2), + PIS(STATEULPS3), + PIS(ERRCONTENTIONLP0_1), + PIS(ERRCONTENTIONLP1_1), + PIS(ERRCONTENTIONLP0_2), + PIS(ERRCONTENTIONLP1_2), + PIS(ERRCONTENTIONLP0_3), + PIS(ERRCONTENTIONLP1_3), + PIS(ULPSACTIVENOT_ALL0), + PIS(ULPSACTIVENOT_ALL1)); #undef PIS - - printk("\n"); } #ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS @@ -1121,14 +1109,6 @@ static void _dsi_print_reset_status(struct platform_device *dsidev) * I/O. */ l = dsi_read_reg(dsidev, DSI_DSIPHY_CFG5); - printk(KERN_DEBUG "DSI resets: "); - - l = dsi_read_reg(dsidev, DSI_PLL_STATUS); - printk("PLL (%d) ", FLD_GET(l, 0, 0)); - - l = dsi_read_reg(dsidev, DSI_COMPLEXIO_CFG1); - printk("CIO (%d) ", FLD_GET(l, 29, 29)); - if (dss_has_feature(FEAT_DSI_REVERSE_TXCLKESC)) { b0 = 28; b1 = 27; @@ -1139,14 +1119,20 @@ static void _dsi_print_reset_status(struct platform_device *dsidev) b2 = 26; } - l = dsi_read_reg(dsidev, DSI_DSIPHY_CFG5); - printk("PHY (%x%x%x, %d, %d, %d)\n", - FLD_GET(l, b0, b0), - FLD_GET(l, b1, b1), - FLD_GET(l, b2, b2), - FLD_GET(l, 29, 29), - FLD_GET(l, 30, 30), - FLD_GET(l, 31, 31)); +#define DSI_FLD_GET(fld, start, end)\ + FLD_GET(dsi_read_reg(dsidev, DSI_##fld), start, end) + + pr_debug("DSI resets: PLL (%d) CIO (%d) PHY (%x%x%x, %d, %d, %d)\n", + DSI_FLD_GET(PLL_STATUS, 0, 0), + DSI_FLD_GET(COMPLEXIO_CFG1, 29, 29), + DSI_FLD_GET(DSIPHY_CFG5, b0, b0), + DSI_FLD_GET(DSIPHY_CFG5, b1, b1), + DSI_FLD_GET(DSIPHY_CFG5, b2, b2), + DSI_FLD_GET(DSIPHY_CFG5, 29, 29), + DSI_FLD_GET(DSIPHY_CFG5, 30, 30), + DSI_FLD_GET(DSIPHY_CFG5, 31, 31)); + +#undef DSI_FLD_GET } #else #define _dsi_print_reset_status(x) From 28bcd199cc4465733c1ac0c70135a385fff97c71 Mon Sep 17 00:00:00 2001 From: Chandrabhanu Mahapatra Date: Sat, 29 Sep 2012 13:57:31 +0530 Subject: [PATCH 0297/5831] OMAPDSS: Remove dss_debug variable All the debug prints have been replaced with pr_debug(). Thus, the dependency on dss_debug variable is replaced with dyndbg in dynamic debugging mode and DEBUG flag otherwise. So, the dss_debug variable is removed along with checks for DEBUG flag. Signed-off-by: Chandrabhanu Mahapatra Reviewed-by: Sumit Semwal Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/core.c | 5 ----- drivers/video/omap2/dss/dispc.c | 8 ++------ drivers/video/omap2/dss/dsi.c | 7 ------- drivers/video/omap2/dss/dss.h | 4 ---- 4 files changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/video/omap2/dss/core.c b/drivers/video/omap2/dss/core.c index 826d64faeaa7..3794147b6eb0 100644 --- a/drivers/video/omap2/dss/core.c +++ b/drivers/video/omap2/dss/core.c @@ -53,11 +53,6 @@ static char *def_disp_name; module_param_named(def_disp, def_disp_name, charp, 0); MODULE_PARM_DESC(def_disp, "default display name"); -#ifdef DEBUG -bool dss_debug; -module_param_named(debug, dss_debug, bool, 0644); -#endif - const char *dss_get_default_display_name(void) { return core.default_display_name; diff --git a/drivers/video/omap2/dss/dispc.c b/drivers/video/omap2/dss/dispc.c index 67d9f3bdea39..b5204b490ceb 100644 --- a/drivers/video/omap2/dss/dispc.c +++ b/drivers/video/omap2/dss/dispc.c @@ -3669,7 +3669,6 @@ int omap_dispc_unregister_isr(omap_dispc_isr_t isr, void *arg, u32 mask) } EXPORT_SYMBOL(omap_dispc_unregister_isr); -#ifdef DEBUG static void print_irq_status(u32 status) { if ((status & dispc.irq_error_mask) == 0) @@ -3690,7 +3689,6 @@ static void print_irq_status(u32 status) dss_has_feature(FEAT_MGR_LCD3) ? PIS(SYNC_LOST3) : ""); #undef PIS } -#endif /* Called from dss.c. Note that we don't touch clocks here, * but we presume they are on because we got an IRQ. However, @@ -3723,10 +3721,8 @@ static irqreturn_t omap_dispc_irq_handler(int irq, void *arg) spin_unlock(&dispc.irq_stats_lock); #endif -#ifdef DEBUG - if (dss_debug) - print_irq_status(irqstatus); -#endif + print_irq_status(irqstatus); + /* Ack the interrupt. Do it here before clocks are possibly turned * off */ dispc_write_reg(DISPC_IRQSTATUS, irqstatus); diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c index 19daee9fd302..bbbafe370572 100644 --- a/drivers/video/omap2/dss/dsi.c +++ b/drivers/video/omap2/dss/dsi.c @@ -1095,15 +1095,11 @@ static inline void dsi_enable_pll_clock(struct platform_device *dsidev, } } -#ifdef DEBUG static void _dsi_print_reset_status(struct platform_device *dsidev) { u32 l; int b0, b1, b2; - if (!dss_debug) - return; - /* A dummy read using the SCP interface to any DSIPHY register is * required after DSIPHY reset to complete the reset of the DSI complex * I/O. */ @@ -1134,9 +1130,6 @@ static void _dsi_print_reset_status(struct platform_device *dsidev) #undef DSI_FLD_GET } -#else -#define _dsi_print_reset_status(x) -#endif static inline int dsi_if_enable(struct platform_device *dsidev, bool enable) { diff --git a/drivers/video/omap2/dss/dss.h b/drivers/video/omap2/dss/dss.h index d8aeee49825b..75f841a4a1e1 100644 --- a/drivers/video/omap2/dss/dss.h +++ b/drivers/video/omap2/dss/dss.h @@ -23,10 +23,6 @@ #ifndef __OMAP2_DSS_H #define __OMAP2_DSS_H -#ifdef DEBUG -extern bool dss_debug; -#endif - #ifdef pr_fmt #undef pr_fmt #endif From 4e92920b4ba25a0a70dd056e0dd7b4b4fd1a3dc6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 3 Oct 2012 12:42:06 +0100 Subject: [PATCH 0298/5831] regulator: lp8788-ldo: Staticise non-exported symbol Signed-off-by: Mark Brown --- drivers/regulator/lp8788-ldo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/lp8788-ldo.c b/drivers/regulator/lp8788-ldo.c index 6796eeb47dc6..e496ba075336 100644 --- a/drivers/regulator/lp8788-ldo.c +++ b/drivers/regulator/lp8788-ldo.c @@ -126,7 +126,7 @@ struct lp8788_ldo { }; /* DLDO 1, 2, 3, 9 voltage table */ -const int lp8788_dldo1239_vtbl[] = { +static const int lp8788_dldo1239_vtbl[] = { 1800000, 1900000, 2000000, 2100000, 2200000, 2300000, 2400000, 2500000, 2600000, 2700000, 2800000, 2900000, 3000000, 2850000, 2850000, 2850000, 2850000, 2850000, 2850000, 2850000, 2850000, 2850000, 2850000, 2850000, From 0c57067430a2b729bc08c92b17eb4f29d9bbfaae Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Sat, 6 Oct 2012 20:47:46 +0530 Subject: [PATCH 0299/5831] regulator: tps51632: Add tps51632 regulator driver The TPS51632 is a driverless step down controller with serial control. Advanced features such as D-Cap+ architecture with overlapping pulse support and OSR overshoot reduction provide fast transient response, lowest output capacitance and high efficiency. The TPS51632 supports both I2C and DVFS interfaces (through PWM) for dynamic control of the output voltage and current monitor telemetry. Add regulator driver for TPS51632. Signed-off-by: Laxman Dewangan Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 11 + drivers/regulator/Makefile | 1 + drivers/regulator/tps51632-regulator.c | 332 +++++++++++++++++++ include/linux/regulator/tps51632-regulator.h | 47 +++ 4 files changed, 391 insertions(+) create mode 100644 drivers/regulator/tps51632-regulator.c create mode 100644 include/linux/regulator/tps51632-regulator.h diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 67d47b59a66d..aa9e8a18262d 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -335,6 +335,17 @@ config REGULATOR_PALMAS on the muxing. This is handled automatically in the driver by reading the mux info from OTP. +config REGULATOR_TPS51632 + tristate "TI TPS51632 Power Regulator" + depends on I2C + select REGMAP_I2C + help + This driver supports TPS51632 voltage regulator chip. + The TPS52632 is 3-2-1 Phase D-Cap+ Step Down Driverless Controller + with Serial VID control and DVFS. + The voltage output can be configure through I2C interface or PWM + interface. + config REGULATOR_TPS6105X tristate "TI TPS6105X Power regulators" depends on TPS6105X diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index e431eed8a878..ec1aec460bf6 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_REGULATOR_MC13783) += mc13783-regulator.o obj-$(CONFIG_REGULATOR_MC13892) += mc13892-regulator.o obj-$(CONFIG_REGULATOR_MC13XXX_CORE) += mc13xxx-regulator-core.o obj-$(CONFIG_REGULATOR_PALMAS) += palmas-regulator.o +obj-$(CONFIG_REGULATOR_TPS51632) += tps51632-regulator.o obj-$(CONFIG_REGULATOR_PCAP) += pcap-regulator.o obj-$(CONFIG_REGULATOR_PCF50633) += pcf50633-regulator.o obj-$(CONFIG_REGULATOR_RC5T583) += rc5t583-regulator.o diff --git a/drivers/regulator/tps51632-regulator.c b/drivers/regulator/tps51632-regulator.c new file mode 100644 index 000000000000..34603640d6d9 --- /dev/null +++ b/drivers/regulator/tps51632-regulator.c @@ -0,0 +1,332 @@ +/* + * tps51632-regulator.c -- TI TPS51632 + * + * Regulator driver for TPS51632 3-2-1 Phase D-Cap Step Down Driverless + * Controller with serial VID control and DVFS. + * + * Copyright (c) 2012, NVIDIA Corporation. + * + * Author: Laxman Dewangan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind, + * whether express or implied; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307, USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Register definitions */ +#define TPS51632_VOLTAGE_SELECT_REG 0x0 +#define TPS51632_VOLTAGE_BASE_REG 0x1 +#define TPS51632_OFFSET_REG 0x2 +#define TPS51632_IMON_REG 0x3 +#define TPS51632_VMAX_REG 0x4 +#define TPS51632_DVFS_CONTROL_REG 0x5 +#define TPS51632_POWER_STATE_REG 0x6 +#define TPS51632_SLEW_REGS 0x7 +#define TPS51632_FAULT_REG 0x14 + +#define TPS51632_MAX_REG 0x15 + +#define TPS51632_VOUT_MASK 0x7F +#define TPS51632_VOUT_OFFSET_MASK 0x1F +#define TPS51632_VMAX_MASK 0x7F +#define TPS51632_VMAX_LOCK 0x80 + +/* TPS51632_DVFS_CONTROL_REG */ +#define TPS51632_DVFS_PWMEN 0x1 +#define TPS51632_DVFS_STEP_20 0x2 +#define TPS51632_DVFS_VMAX_PG 0x4 +#define TPS51632_DVFS_PWMRST 0x8 +#define TPS51632_DVFS_OCA_EN 0x10 +#define TPS51632_DVFS_FCCM 0x20 + +/* TPS51632_POWER_STATE_REG */ +#define TPS51632_POWER_STATE_MASK 0x03 +#define TPS51632_POWER_STATE_MULTI_PHASE_CCM 0x0 +#define TPS51632_POWER_STATE_SINGLE_PHASE_CCM 0x1 +#define TPS51632_POWER_STATE_SINGLE_PHASE_DCM 0x2 + +#define TPS51632_MIN_VOLATGE 500000 +#define TPS51632_MAX_VOLATGE 1520000 +#define TPS51632_VOLATGE_STEP_10mV 10000 +#define TPS51632_VOLATGE_STEP_20mV 20000 +#define TPS51632_MAX_VSEL 0x7F +#define TPS51632_MIN_VSEL 0x19 +#define TPS51632_DEFAULT_RAMP_DELAY 6000 +#define TPS51632_VOLT_VSEL(uV) \ + (DIV_ROUND_UP(uV - TPS51632_MIN_VOLATGE, \ + TPS51632_VOLATGE_STEP_10mV) + \ + TPS51632_MIN_VSEL) + +/* TPS51632 chip information */ +struct tps51632_chip { + struct device *dev; + struct regulator_desc desc; + struct regulator_dev *rdev; + struct regmap *regmap; + bool enable_pwm_dvfs; +}; + +static int tps51632_dcdc_get_voltage_sel(struct regulator_dev *rdev) +{ + struct tps51632_chip *tps = rdev_get_drvdata(rdev); + unsigned int data; + int ret; + unsigned int reg = TPS51632_VOLTAGE_SELECT_REG; + int vsel; + + if (tps->enable_pwm_dvfs) + reg = TPS51632_VOLTAGE_BASE_REG; + + ret = regmap_read(tps->regmap, reg, &data); + if (ret < 0) { + dev_err(tps->dev, "reg read failed, err %d\n", ret); + return ret; + } + + vsel = data & TPS51632_VOUT_MASK; + + if (vsel < TPS51632_MIN_VSEL) + return 0; + else + return vsel - TPS51632_MIN_VSEL; +} + +static int tps51632_dcdc_set_voltage_sel(struct regulator_dev *rdev, + unsigned selector) +{ + struct tps51632_chip *tps = rdev_get_drvdata(rdev); + int vsel; + int ret; + unsigned int reg = TPS51632_VOLTAGE_SELECT_REG; + + if (tps->enable_pwm_dvfs) + reg = TPS51632_VOLTAGE_BASE_REG; + + vsel = selector + TPS51632_MIN_VSEL; + if (vsel > TPS51632_MAX_VSEL) + return -EINVAL; + + ret = regmap_write(tps->regmap, TPS51632_VOLTAGE_SELECT_REG, vsel); + if (ret < 0) + dev_err(tps->dev, "reg write failed, err %d\n", ret); + return ret; +} + +static int tps51632_dcdc_set_ramp_delay(struct regulator_dev *rdev, + int ramp_delay) +{ + struct tps51632_chip *tps = rdev_get_drvdata(rdev); + int bit = ramp_delay/6000; + int ret; + + if (bit) + bit--; + ret = regmap_write(tps->regmap, TPS51632_SLEW_REGS, BIT(bit)); + if (ret < 0) + dev_err(tps->dev, "SLEW reg write failed, err %d\n", ret); + return ret; +} + +static struct regulator_ops tps51632_dcdc_ops = { + .get_voltage_sel = tps51632_dcdc_get_voltage_sel, + .set_voltage_sel = tps51632_dcdc_set_voltage_sel, + .list_voltage = regulator_list_voltage_linear, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .set_ramp_delay = tps51632_dcdc_set_ramp_delay, +}; + +static int __devinit tps51632_init_dcdc(struct tps51632_chip *tps, + struct tps51632_regulator_platform_data *pdata) +{ + int ret; + uint8_t control = 0; + int vsel; + + if (!pdata->enable_pwm_dvfs) + goto skip_pwm_config; + + control |= TPS51632_DVFS_PWMEN; + tps->enable_pwm_dvfs = pdata->enable_pwm_dvfs; + vsel = TPS51632_VOLT_VSEL(pdata->base_voltage_uV); + ret = regmap_write(tps->regmap, TPS51632_VOLTAGE_BASE_REG, vsel); + if (ret < 0) { + dev_err(tps->dev, "BASE reg write failed, err %d\n", ret); + return ret; + } + + if (pdata->dvfs_step_20mV) + control |= TPS51632_DVFS_STEP_20; + + if (pdata->max_voltage_uV) { + unsigned int vmax; + /** + * TPS51632 hw behavior: VMAX register can be write only + * once as it get locked after first write. The lock get + * reset only when device is power-reset. + * Write register only when lock bit is not enabled. + */ + ret = regmap_read(tps->regmap, TPS51632_VMAX_REG, &vmax); + if (ret < 0) { + dev_err(tps->dev, "VMAX read failed, err %d\n", ret); + return ret; + } + if (!(vmax & TPS51632_VMAX_LOCK)) { + vsel = TPS51632_VOLT_VSEL(pdata->max_voltage_uV); + ret = regmap_write(tps->regmap, TPS51632_VMAX_REG, + vsel); + if (ret < 0) { + dev_err(tps->dev, + "VMAX write failed, err %d\n", ret); + return ret; + } + } + } + +skip_pwm_config: + ret = regmap_write(tps->regmap, TPS51632_DVFS_CONTROL_REG, control); + if (ret < 0) + dev_err(tps->dev, "DVFS reg write failed, err %d\n", ret); + return ret; +} + +static bool rd_wr_reg(struct device *dev, unsigned int reg) +{ + if ((reg >= 0x8) && (reg <= 0x10)) + return false; + return true; +} + +static const struct regmap_config tps51632_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .writeable_reg = rd_wr_reg, + .readable_reg = rd_wr_reg, + .max_register = TPS51632_MAX_REG - 1, + .cache_type = REGCACHE_RBTREE, +}; + +static int __devinit tps51632_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct tps51632_regulator_platform_data *pdata; + struct regulator_dev *rdev; + struct tps51632_chip *tps; + int ret; + struct regulator_config config = { }; + + pdata = client->dev.platform_data; + if (!pdata) { + dev_err(&client->dev, "No Platform data\n"); + return -EINVAL; + } + + tps = devm_kzalloc(&client->dev, sizeof(*tps), GFP_KERNEL); + if (!tps) { + dev_err(&client->dev, "Memory allocation failed\n"); + return -ENOMEM; + } + + tps->dev = &client->dev; + tps->desc.name = id->name; + tps->desc.id = 0; + tps->desc.ramp_delay = TPS51632_DEFAULT_RAMP_DELAY; + tps->desc.min_uV = TPS51632_MIN_VOLATGE; + tps->desc.uV_step = TPS51632_VOLATGE_STEP_10mV; + tps->desc.n_voltages = (TPS51632_MAX_VSEL - TPS51632_MIN_VSEL) + 1; + tps->desc.ops = &tps51632_dcdc_ops; + tps->desc.type = REGULATOR_VOLTAGE; + tps->desc.owner = THIS_MODULE; + + tps->regmap = devm_regmap_init_i2c(client, &tps51632_regmap_config); + if (IS_ERR(tps->regmap)) { + ret = PTR_ERR(tps->regmap); + dev_err(&client->dev, "regmap init failed, err %d\n", ret); + return ret; + } + i2c_set_clientdata(client, tps); + + ret = tps51632_init_dcdc(tps, pdata); + if (ret < 0) { + dev_err(tps->dev, "Init failed, err = %d\n", ret); + return ret; + } + + /* Register the regulators */ + config.dev = &client->dev; + config.init_data = pdata->reg_init_data; + config.driver_data = tps; + config.regmap = tps->regmap; + config.of_node = client->dev.of_node; + + rdev = regulator_register(&tps->desc, &config); + if (IS_ERR(rdev)) { + dev_err(tps->dev, "regulator register failed\n"); + return PTR_ERR(rdev); + } + + tps->rdev = rdev; + return 0; +} + +static int __devexit tps51632_remove(struct i2c_client *client) +{ + struct tps51632_chip *tps = i2c_get_clientdata(client); + + regulator_unregister(tps->rdev); + return 0; +} + +static const struct i2c_device_id tps51632_id[] = { + {.name = "tps51632",}, + {}, +}; + +MODULE_DEVICE_TABLE(i2c, tps51632_id); + +static struct i2c_driver tps51632_i2c_driver = { + .driver = { + .name = "tps51632", + .owner = THIS_MODULE, + }, + .probe = tps51632_probe, + .remove = __devexit_p(tps51632_remove), + .id_table = tps51632_id, +}; + +static int __init tps51632_init(void) +{ + return i2c_add_driver(&tps51632_i2c_driver); +} +subsys_initcall(tps51632_init); + +static void __exit tps51632_cleanup(void) +{ + i2c_del_driver(&tps51632_i2c_driver); +} +module_exit(tps51632_cleanup); + +MODULE_AUTHOR("Laxman Dewangan "); +MODULE_DESCRIPTION("TPS51632 voltage regulator driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/regulator/tps51632-regulator.h b/include/linux/regulator/tps51632-regulator.h new file mode 100644 index 000000000000..d00841e1a75a --- /dev/null +++ b/include/linux/regulator/tps51632-regulator.h @@ -0,0 +1,47 @@ +/* + * tps51632-regulator.h -- TPS51632 regulator + * + * Interface for regulator driver for TPS51632 3-2-1 Phase D-Cap Step Down + * Driverless Controller with serial VID control and DVFS. + * + * Copyright (C) 2012 NVIDIA Corporation + + * Author: Laxman Dewangan + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#ifndef __LINUX_REGULATOR_TPS51632_H +#define __LINUX_REGULATOR_TPS51632_H + +/* + * struct tps51632_regulator_platform_data - tps51632 regulator platform data. + * + * @reg_init_data: The regulator init data. + * @enable_pwm_dvfs: Enable PWM DVFS or not. + * @dvfs_step_20mV: Step for DVFS is 20mV or 10mV. + * @max_voltage_uV: Maximum possible voltage in PWM-DVFS mode. + * @base_voltage_uV: Base voltage when PWM-DVFS enabled. + */ +struct tps51632_regulator_platform_data { + struct regulator_init_data *reg_init_data; + bool enable_pwm_dvfs; + bool dvfs_step_20mV; + int max_voltage_uV; + int base_voltage_uV; +}; + +#endif /* __LINUX_REGULATOR_TPS51632_H */ From bd7a2b600ace90c8819495b639a744c8f5c68feb Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Mon, 24 Sep 2012 18:56:53 +0100 Subject: [PATCH 0300/5831] regulator: core: Support for continuous voltage range Some regulators can set any voltage within the constraints range, not being limited to specified operating points. This patch makes it possible to describe such regulator and makes the regulator_is_supported_voltage() function behave correctly. Signed-off-by: Pawel Moll Signed-off-by: Mark Brown --- drivers/regulator/core.c | 5 +++++ include/linux/regulator/driver.h | 3 +++ 2 files changed, 8 insertions(+) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 5c4829cba6a6..f7c74db7465c 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1979,6 +1979,11 @@ int regulator_is_supported_voltage(struct regulator *regulator, return ret; } + /* Any voltage within constrains range is fine? */ + if (rdev->desc->continuous_voltage_range) + return min_uV >= rdev->constraints->min_uV && + max_uV <= rdev->constraints->max_uV; + ret = regulator_count_voltages(regulator); if (ret < 0) return ret; diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 7932a3bf21bd..f2b72b230b9b 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -181,6 +181,8 @@ enum regulator_type { * @type: Indicates if the regulator is a voltage or current regulator. * @owner: Module providing the regulator, used for refcounting. * + * @continuous_voltage_range: Indicates if the regulator can set any + * voltage within constrains range. * @n_voltages: Number of selectors available for ops.list_voltage(). * * @min_uV: Voltage given by the lowest selector (if linear mapping) @@ -199,6 +201,7 @@ struct regulator_desc { const char *name; const char *supply_name; int id; + bool continuous_voltage_range; unsigned n_voltages; struct regulator_ops *ops; int irq; From 31e54086dd7bb86ad40f1d76a9063f2a95866b87 Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Mon, 24 Sep 2012 18:56:54 +0100 Subject: [PATCH 0301/5831] regulator: Versatile Express regulator driver Implementation of the regulator framework driver for the Versatile Express voltage control. Devices without voltage constraints (ie. "regulator-[min|max]-microvolt" properties in the DT node) are treated as fixed (or rather read-only) regulators. Signed-off-by: Pawel Moll Signed-off-by: Mark Brown --- .../bindings/regulator/vexpress.txt | 32 ++++ drivers/regulator/Kconfig | 7 + drivers/regulator/Makefile | 1 + drivers/regulator/vexpress.c | 146 ++++++++++++++++++ 4 files changed, 186 insertions(+) create mode 100644 Documentation/devicetree/bindings/regulator/vexpress.txt create mode 100644 drivers/regulator/vexpress.c diff --git a/Documentation/devicetree/bindings/regulator/vexpress.txt b/Documentation/devicetree/bindings/regulator/vexpress.txt new file mode 100644 index 000000000000..d775f72487aa --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/vexpress.txt @@ -0,0 +1,32 @@ +Versatile Express voltage regulators +------------------------------------ + +Requires node properties: +- "compatible" value: "arm,vexpress-volt" +- "arm,vexpress-sysreg,func" when controlled via vexpress-sysreg + (see Documentation/devicetree/bindings/arm/vexpress-sysreg.txt + for more details) + +Required regulator properties: +- "regulator-name" +- "regulator-always-on" + +Optional regulator properties: +- "regulator-min-microvolt" +- "regulator-max-microvolt" + +See Documentation/devicetree/bindings/regulator/regulator.txt +for more details about the regulator properties. + +When no "regulator-[min|max]-microvolt" properties are defined, +the device is treated as fixed (or rather "read-only") regulator. + +Example: + volt@0 { + compatible = "arm,vexpress-volt"; + arm,vexpress-sysreg,func = <2 0>; + regulator-name = "Cores"; + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <1050000>; + regulator-always-on; + }; diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 67d47b59a66d..b44b019b9433 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -422,6 +422,13 @@ config REGULATOR_TWL4030 This driver supports the voltage regulators provided by this family of companion chips. +config REGULATOR_VEXPRESS + tristate "Versatile Express regulators" + depends on VEXPRESS_CONFIG + help + This driver provides support for voltage regulators available + on the ARM Ltd's Versatile Express platform. + config REGULATOR_WM831X tristate "Wolfson Microelectronics WM831x PMIC regulators" depends on MFD_WM831X diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index e431eed8a878..9fa7a7bc42d8 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -57,6 +57,7 @@ obj-$(CONFIG_REGULATOR_TPS6586X) += tps6586x-regulator.o obj-$(CONFIG_REGULATOR_TPS65910) += tps65910-regulator.o obj-$(CONFIG_REGULATOR_TPS65912) += tps65912-regulator.o obj-$(CONFIG_REGULATOR_TWL4030) += twl-regulator.o +obj-$(CONFIG_REGULATOR_VEXPRESS) += vexpress.o obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o obj-$(CONFIG_REGULATOR_WM831X) += wm831x-isink.o obj-$(CONFIG_REGULATOR_WM831X) += wm831x-ldo.o diff --git a/drivers/regulator/vexpress.c b/drivers/regulator/vexpress.c new file mode 100644 index 000000000000..1702945a93a9 --- /dev/null +++ b/drivers/regulator/vexpress.c @@ -0,0 +1,146 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2012 ARM Limited + */ + +#define DRVNAME "vexpress-regulator" +#define pr_fmt(fmt) DRVNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +struct vexpress_regulator { + struct regulator_desc desc; + struct regulator_dev *regdev; + struct vexpress_config_func *func; +}; + +static int vexpress_regulator_get_voltage(struct regulator_dev *regdev) +{ + struct vexpress_regulator *reg = rdev_get_drvdata(regdev); + u32 uV; + int err = vexpress_config_read(reg->func, 0, &uV); + + return err ? err : uV; +} + +static int vexpress_regulator_set_voltage(struct regulator_dev *regdev, + int min_uV, int max_uV, unsigned *selector) +{ + struct vexpress_regulator *reg = rdev_get_drvdata(regdev); + + return vexpress_config_write(reg->func, 0, min_uV); +} + +static struct regulator_ops vexpress_regulator_ops_ro = { + .get_voltage = vexpress_regulator_get_voltage, +}; + +static struct regulator_ops vexpress_regulator_ops = { + .get_voltage = vexpress_regulator_get_voltage, + .set_voltage = vexpress_regulator_set_voltage, +}; + +static int vexpress_regulator_probe(struct platform_device *pdev) +{ + int err; + struct vexpress_regulator *reg; + struct regulator_init_data *init_data; + struct regulator_config config = { }; + + reg = devm_kzalloc(&pdev->dev, sizeof(*reg), GFP_KERNEL); + if (!reg) { + err = -ENOMEM; + goto error_kzalloc; + } + + reg->func = vexpress_config_func_get_by_dev(&pdev->dev); + if (!reg->func) { + err = -ENXIO; + goto error_get_func; + } + + reg->desc.name = dev_name(&pdev->dev); + reg->desc.type = REGULATOR_VOLTAGE; + reg->desc.owner = THIS_MODULE; + reg->desc.continuous_voltage_range = true; + + init_data = of_get_regulator_init_data(&pdev->dev, pdev->dev.of_node); + if (!init_data) { + err = -EINVAL; + goto error_get_regulator_init_data; + } + + init_data->constraints.apply_uV = 0; + if (init_data->constraints.min_uV && init_data->constraints.max_uV) + reg->desc.ops = &vexpress_regulator_ops; + else + reg->desc.ops = &vexpress_regulator_ops_ro; + + config.dev = &pdev->dev; + config.init_data = init_data; + config.driver_data = reg; + config.of_node = pdev->dev.of_node; + + reg->regdev = regulator_register(®->desc, &config); + if (IS_ERR(reg->regdev)) { + err = PTR_ERR(reg->regdev); + goto error_regulator_register; + } + + platform_set_drvdata(pdev, reg); + + return 0; + +error_regulator_register: +error_get_regulator_init_data: + vexpress_config_func_put(reg->func); +error_get_func: +error_kzalloc: + return err; +} + +static int __devexit vexpress_regulator_remove(struct platform_device *pdev) +{ + struct vexpress_regulator *reg = platform_get_drvdata(pdev); + + vexpress_config_func_put(reg->func); + regulator_unregister(reg->regdev); + + return 0; +} + +static struct of_device_id vexpress_regulator_of_match[] = { + { .compatible = "arm,vexpress-volt", }, +}; + +static struct platform_driver vexpress_regulator_driver = { + .probe = vexpress_regulator_probe, + .remove = __devexit_p(vexpress_regulator_remove), + .driver = { + .name = DRVNAME, + .owner = THIS_MODULE, + .of_match_table = vexpress_regulator_of_match, + }, +}; + +module_platform_driver(vexpress_regulator_driver); + +MODULE_AUTHOR("Pawel Moll "); +MODULE_DESCRIPTION("Versatile Express regulator"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:vexpress-regulator"); From 35b80920d4f0253fed03a1c3a345df8578dbd057 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Thu, 27 Sep 2012 15:44:17 -0700 Subject: [PATCH 0302/5831] crypto: crc32c - Rename crc32c-intel.c to crc32c-intel_glue.c This patch renames the crc32c-intel.c file to crc32c-intel_glue.c file in preparation for linking with the new crc32c-pcl-intel-asm.S file, which contains optimized crc32c calculation based on PCLMULQDQ instruction. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 1 + arch/x86/crypto/{crc32c-intel.c => crc32c-intel_glue.c} | 0 2 files changed, 1 insertion(+) rename arch/x86/crypto/{crc32c-intel.c => crc32c-intel_glue.c} (100%) diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 5bacb4a226ac..b95778311178 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -47,3 +47,4 @@ serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o +crc32c-intel-y := crc32c-intel_glue.o diff --git a/arch/x86/crypto/crc32c-intel.c b/arch/x86/crypto/crc32c-intel_glue.c similarity index 100% rename from arch/x86/crypto/crc32c-intel.c rename to arch/x86/crypto/crc32c-intel_glue.c From 6a8ce1ef3940e0cab5ff5f11e1cff5301f83fef6 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Thu, 27 Sep 2012 15:44:22 -0700 Subject: [PATCH 0303/5831] crypto: crc32c - Optimize CRC32C calculation with PCLMULQDQ instruction This patch adds the crc_pcl function that calculates CRC32C checksum using the PCLMULQDQ instruction on processors that support this feature. This will provide speedup over using CRC32 instruction only. The usage of PCLMULQDQ necessitate the invocation of kernel_fpu_begin and kernel_fpu_end and incur some overhead. So the new crc_pcl function is only invoked for buffer size of 512 bytes or more. Larger sized buffers will expect to see greater speedup. This feature is best used coupled with eager_fpu which reduces the kernel_fpu_begin/end overhead. For buffer size of 1K the speedup is around 1.6x and for buffer size greater than 4K, the speedup is around 3x compared to original implementation in crc32c-intel module. Test was performed on Sandy Bridge based platform with constant frequency set for cpu. A white paper detailing the algorithm can be found here: http://download.intel.com/design/intarch/papers/323405.pdf Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 1 + arch/x86/crypto/crc32c-intel_glue.c | 81 ++++ arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 460 ++++++++++++++++++++++ crypto/Kconfig | 10 + 4 files changed, 552 insertions(+) create mode 100644 arch/x86/crypto/crc32c-pcl-intel-asm_64.S diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index b95778311178..84d7dbaba26e 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -48,3 +48,4 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o crc32c-intel-y := crc32c-intel_glue.o +crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 493f959261f7..6812ad98355c 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -32,6 +32,8 @@ #include #include +#include +#include #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 @@ -44,6 +46,31 @@ #define REX_PRE #endif +#ifdef CONFIG_X86_64 +/* + * use carryless multiply version of crc32c when buffer + * size is >= 512 (when eager fpu is enabled) or + * >= 1024 (when eager fpu is disabled) to account + * for fpu state save/restore overhead. + */ +#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512 +#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024 + +asmlinkage unsigned int crc_pcl(const u8 *buffer, int len, + unsigned int crc_init); +static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU; +#if defined(X86_FEATURE_EAGER_FPU) +#define set_pcl_breakeven_point() \ +do { \ + if (!use_eager_fpu()) \ + crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \ +} while (0) +#else +#define set_pcl_breakeven_point() \ + (crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU) +#endif +#endif /* CONFIG_X86_64 */ + static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) { while (length--) { @@ -154,6 +181,52 @@ static int crc32c_intel_cra_init(struct crypto_tfm *tfm) return 0; } +#ifdef CONFIG_X86_64 +static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + /* + * use faster PCL version if datasize is large enough to + * overcome kernel fpu state save/restore overhead + */ + if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { + kernel_fpu_begin(); + *crcp = crc_pcl(data, len, *crcp); + kernel_fpu_end(); + } else + *crcp = crc32c_intel_le_hw(*crcp, data, len); + return 0; +} + +static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { + kernel_fpu_begin(); + *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); + kernel_fpu_end(); + } else + *(__le32 *)out = + ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); + return 0; +} + +static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} +#endif /* CONFIG_X86_64 */ + static struct shash_alg alg = { .setkey = crc32c_intel_setkey, .init = crc32c_intel_init, @@ -184,6 +257,14 @@ static int __init crc32c_intel_mod_init(void) { if (!x86_match_cpu(crc32c_cpu_id)) return -ENODEV; +#ifdef CONFIG_X86_64 + if (cpu_has_pclmulqdq) { + alg.update = crc32c_pcl_intel_update; + alg.finup = crc32c_pcl_intel_finup; + alg.digest = crc32c_pcl_intel_digest; + set_pcl_breakeven_point(); + } +#endif return crypto_register_shash(&alg); } diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S new file mode 100644 index 000000000000..93c6d39237ac --- /dev/null +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -0,0 +1,460 @@ +/* + * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64) + * + * The white paper on CRC32C calculations with PCLMULQDQ instruction can be + * downloaded from: + * http://download.intel.com/design/intarch/papers/323405.pdf + * + * Copyright (C) 2012 Intel Corporation. + * + * Authors: + * Wajdi Feghali + * James Guilford + * David Cote + * Tim Chen + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction + +.macro LABEL prefix n +\prefix\n\(): +.endm + +.macro JMPTBL_ENTRY i +.word crc_\i - crc_array +.endm + +.macro JNC_LESS_THAN j + jnc less_than_\j +.endm + +# Define threshold where buffers are considered "small" and routed to more +# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so +# SMALL_SIZE can be no larger than 255. + +#define SMALL_SIZE 200 + +.if (SMALL_SIZE > 255) +.error "SMALL_ SIZE must be < 256" +.endif + +# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); + +.global crc_pcl +crc_pcl: +#define bufp %rdi +#define bufp_dw %edi +#define bufp_w %di +#define bufp_b %dil +#define bufptmp %rcx +#define block_0 %rcx +#define block_1 %rdx +#define block_2 %r11 +#define len %rsi +#define len_dw %esi +#define len_w %si +#define len_b %sil +#define crc_init_arg %rdx +#define tmp %rbx +#define crc_init %r8 +#define crc_init_dw %r8d +#define crc1 %r9 +#define crc2 %r10 + + pushq %rbx + pushq %rdi + pushq %rsi + + ## Move crc_init for Linux to a different + mov crc_init_arg, crc_init + + ################################################################ + ## 1) ALIGN: + ################################################################ + + mov bufp, bufptmp # rdi = *buf + neg bufp + and $7, bufp # calculate the unalignment amount of + # the address + je proc_block # Skip if aligned + + ## If len is less than 8 and we're unaligned, we need to jump + ## to special code to avoid reading beyond the end of the buffer + cmp $8, len + jae do_align + # less_than_8 expects length in upper 3 bits of len_dw + # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30] + shl $32-3+1, len_dw + jmp less_than_8_post_shl1 + +do_align: + #### Calculate CRC of unaligned bytes of the buffer (if any) + movq (bufptmp), tmp # load a quadward from the buffer + add bufp, bufptmp # align buffer pointer for quadword + # processing + sub bufp, len # update buffer length +align_loop: + crc32b %bl, crc_init_dw # compute crc32 of 1-byte + shr $8, tmp # get next byte + dec bufp + jne align_loop + +proc_block: + + ################################################################ + ## 2) PROCESS BLOCKS: + ################################################################ + + ## compute num of bytes to be processed + movq len, tmp # save num bytes in tmp + + cmpq $128*24, len + jae full_block + +continue_block: + cmpq $SMALL_SIZE, len + jb small + + ## len < 128*24 + movq $2731, %rax # 2731 = ceil(2^16 / 24) + mul len_dw + shrq $16, %rax + + ## eax contains floor(bytes / 24) = num 24-byte chunks to do + + ## process rax 24-byte chunks (128 >= rax >= 0) + + ## compute end address of each block + ## block 0 (base addr + RAX * 8) + ## block 1 (base addr + RAX * 16) + ## block 2 (base addr + RAX * 24) + lea (bufptmp, %rax, 8), block_0 + lea (block_0, %rax, 8), block_1 + lea (block_1, %rax, 8), block_2 + + xor crc1, crc1 + xor crc2, crc2 + + ## branch into array + lea jump_table(%rip), bufp + movzxw (bufp, %rax, 2), len + offset=crc_array-jump_table + lea offset(bufp, len, 1), bufp + jmp *bufp + + ################################################################ + ## 2a) PROCESS FULL BLOCKS: + ################################################################ +full_block: + movq $128,%rax + lea 128*8*2(block_0), block_1 + lea 128*8*3(block_0), block_2 + add $128*8*1, block_0 + + xor crc1,crc1 + xor crc2,crc2 + + # Fall thruogh into top of crc array (crc_128) + + ################################################################ + ## 3) CRC Array: + ################################################################ + +crc_array: + i=128 +.rept 128-1 +.altmacro +LABEL crc_ %i +.noaltmacro + crc32q -i*8(block_0), crc_init + crc32q -i*8(block_1), crc1 + crc32q -i*8(block_2), crc2 + i=(i-1) +.endr + +.altmacro +LABEL crc_ %i +.noaltmacro + crc32q -i*8(block_0), crc_init + crc32q -i*8(block_1), crc1 +# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet + + mov block_2, block_0 + + ################################################################ + ## 4) Combine three results: + ################################################################ + + lea (K_table-16)(%rip), bufp # first entry is for idx 1 + shlq $3, %rax # rax *= 8 + subq %rax, tmp # tmp -= rax*8 + shlq $1, %rax + subq %rax, tmp # tmp -= rax*16 + # (total tmp -= rax*24) + addq %rax, bufp + + movdqa (bufp), %xmm0 # 2 consts: K1:K2 + + movq crc_init, %xmm1 # CRC for block 1 + pclmulqdq $0x00,%xmm0,%xmm1 # Multiply by K2 + + movq crc1, %xmm2 # CRC for block 2 + pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1 + + pxor %xmm2,%xmm1 + movq %xmm1, %rax + xor -i*8(block_2), %rax + mov crc2, crc_init + crc32 %rax, crc_init + +################################################################ +## 5) Check for end: +################################################################ + +LABEL crc_ 0 + mov tmp, len + cmp $128*24, tmp + jae full_block + cmp $24, tmp + jae continue_block + +less_than_24: + shl $32-4, len_dw # less_than_16 expects length + # in upper 4 bits of len_dw + jnc less_than_16 + crc32q (bufptmp), crc_init + crc32q 8(bufptmp), crc_init + jz do_return + add $16, bufptmp + # len is less than 8 if we got here + # less_than_8 expects length in upper 3 bits of len_dw + # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30] + shl $2, len_dw + jmp less_than_8_post_shl1 + + ####################################################################### + ## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full) + ####################################################################### +small: + shl $32-8, len_dw # Prepare len_dw for less_than_256 + j=256 +.rept 5 # j = {256, 128, 64, 32, 16} +.altmacro +LABEL less_than_ %j # less_than_j: Length should be in + # upper lg(j) bits of len_dw + j=(j/2) + shl $1, len_dw # Get next MSB + JNC_LESS_THAN %j +.noaltmacro + i=0 +.rept (j/8) + crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data + i=i+8 +.endr + jz do_return # Return if remaining length is zero + add $j, bufptmp # Advance buf +.endr + +less_than_8: # Length should be stored in + # upper 3 bits of len_dw + shl $1, len_dw +less_than_8_post_shl1: + jnc less_than_4 + crc32l (bufptmp), crc_init_dw # CRC of 4 bytes + jz do_return # return if remaining data is zero + add $4, bufptmp +less_than_4: # Length should be stored in + # upper 2 bits of len_dw + shl $1, len_dw + jnc less_than_2 + crc32w (bufptmp), crc_init_dw # CRC of 2 bytes + jz do_return # return if remaining data is zero + add $2, bufptmp +less_than_2: # Length should be stored in the MSB + # of len_dw + shl $1, len_dw + jnc less_than_1 + crc32b (bufptmp), crc_init_dw # CRC of 1 byte +less_than_1: # Length should be zero +do_return: + movq crc_init, %rax + popq %rsi + popq %rdi + popq %rbx + ret + + ################################################################ + ## jump table Table is 129 entries x 2 bytes each + ################################################################ +.align 4 +jump_table: + i=0 +.rept 129 +.altmacro +JMPTBL_ENTRY %i +.noaltmacro + i=i+1 +.endr + ################################################################ + ## PCLMULQDQ tables + ## Table is 128 entries x 2 quad words each + ################################################################ +.data +.align 64 +K_table: + .quad 0x14cd00bd6,0x105ec76f0 + .quad 0x0ba4fc28e,0x14cd00bd6 + .quad 0x1d82c63da,0x0f20c0dfe + .quad 0x09e4addf8,0x0ba4fc28e + .quad 0x039d3b296,0x1384aa63a + .quad 0x102f9b8a2,0x1d82c63da + .quad 0x14237f5e6,0x01c291d04 + .quad 0x00d3b6092,0x09e4addf8 + .quad 0x0c96cfdc0,0x0740eef02 + .quad 0x18266e456,0x039d3b296 + .quad 0x0daece73e,0x0083a6eec + .quad 0x0ab7aff2a,0x102f9b8a2 + .quad 0x1248ea574,0x1c1733996 + .quad 0x083348832,0x14237f5e6 + .quad 0x12c743124,0x02ad91c30 + .quad 0x0b9e02b86,0x00d3b6092 + .quad 0x018b33a4e,0x06992cea2 + .quad 0x1b331e26a,0x0c96cfdc0 + .quad 0x17d35ba46,0x07e908048 + .quad 0x1bf2e8b8a,0x18266e456 + .quad 0x1a3e0968a,0x11ed1f9d8 + .quad 0x0ce7f39f4,0x0daece73e + .quad 0x061d82e56,0x0f1d0f55e + .quad 0x0d270f1a2,0x0ab7aff2a + .quad 0x1c3f5f66c,0x0a87ab8a8 + .quad 0x12ed0daac,0x1248ea574 + .quad 0x065863b64,0x08462d800 + .quad 0x11eef4f8e,0x083348832 + .quad 0x1ee54f54c,0x071d111a8 + .quad 0x0b3e32c28,0x12c743124 + .quad 0x0064f7f26,0x0ffd852c6 + .quad 0x0dd7e3b0c,0x0b9e02b86 + .quad 0x0f285651c,0x0dcb17aa4 + .quad 0x010746f3c,0x018b33a4e + .quad 0x1c24afea4,0x0f37c5aee + .quad 0x0271d9844,0x1b331e26a + .quad 0x08e766a0c,0x06051d5a2 + .quad 0x093a5f730,0x17d35ba46 + .quad 0x06cb08e5c,0x11d5ca20e + .quad 0x06b749fb2,0x1bf2e8b8a + .quad 0x1167f94f2,0x021f3d99c + .quad 0x0cec3662e,0x1a3e0968a + .quad 0x19329634a,0x08f158014 + .quad 0x0e6fc4e6a,0x0ce7f39f4 + .quad 0x08227bb8a,0x1a5e82106 + .quad 0x0b0cd4768,0x061d82e56 + .quad 0x13c2b89c4,0x188815ab2 + .quad 0x0d7a4825c,0x0d270f1a2 + .quad 0x10f5ff2ba,0x105405f3e + .quad 0x00167d312,0x1c3f5f66c + .quad 0x0f6076544,0x0e9adf796 + .quad 0x026f6a60a,0x12ed0daac + .quad 0x1a2adb74e,0x096638b34 + .quad 0x19d34af3a,0x065863b64 + .quad 0x049c3cc9c,0x1e50585a0 + .quad 0x068bce87a,0x11eef4f8e + .quad 0x1524fa6c6,0x19f1c69dc + .quad 0x16cba8aca,0x1ee54f54c + .quad 0x042d98888,0x12913343e + .quad 0x1329d9f7e,0x0b3e32c28 + .quad 0x1b1c69528,0x088f25a3a + .quad 0x02178513a,0x0064f7f26 + .quad 0x0e0ac139e,0x04e36f0b0 + .quad 0x0170076fa,0x0dd7e3b0c + .quad 0x141a1a2e2,0x0bd6f81f8 + .quad 0x16ad828b4,0x0f285651c + .quad 0x041d17b64,0x19425cbba + .quad 0x1fae1cc66,0x010746f3c + .quad 0x1a75b4b00,0x18db37e8a + .quad 0x0f872e54c,0x1c24afea4 + .quad 0x01e41e9fc,0x04c144932 + .quad 0x086d8e4d2,0x0271d9844 + .quad 0x160f7af7a,0x052148f02 + .quad 0x05bb8f1bc,0x08e766a0c + .quad 0x0a90fd27a,0x0a3c6f37a + .quad 0x0b3af077a,0x093a5f730 + .quad 0x04984d782,0x1d22c238e + .quad 0x0ca6ef3ac,0x06cb08e5c + .quad 0x0234e0b26,0x063ded06a + .quad 0x1d88abd4a,0x06b749fb2 + .quad 0x04597456a,0x04d56973c + .quad 0x0e9e28eb4,0x1167f94f2 + .quad 0x07b3ff57a,0x19385bf2e + .quad 0x0c9c8b782,0x0cec3662e + .quad 0x13a9cba9e,0x0e417f38a + .quad 0x093e106a4,0x19329634a + .quad 0x167001a9c,0x14e727980 + .quad 0x1ddffc5d4,0x0e6fc4e6a + .quad 0x00df04680,0x0d104b8fc + .quad 0x02342001e,0x08227bb8a + .quad 0x00a2a8d7e,0x05b397730 + .quad 0x168763fa6,0x0b0cd4768 + .quad 0x1ed5a407a,0x0e78eb416 + .quad 0x0d2c3ed1a,0x13c2b89c4 + .quad 0x0995a5724,0x1641378f0 + .quad 0x19b1afbc4,0x0d7a4825c + .quad 0x109ffedc0,0x08d96551c + .quad 0x0f2271e60,0x10f5ff2ba + .quad 0x00b0bf8ca,0x00bf80dd2 + .quad 0x123888b7a,0x00167d312 + .quad 0x1e888f7dc,0x18dcddd1c + .quad 0x002ee03b2,0x0f6076544 + .quad 0x183e8d8fe,0x06a45d2b2 + .quad 0x133d7a042,0x026f6a60a + .quad 0x116b0f50c,0x1dd3e10e8 + .quad 0x05fabe670,0x1a2adb74e + .quad 0x130004488,0x0de87806c + .quad 0x000bcf5f6,0x19d34af3a + .quad 0x18f0c7078,0x014338754 + .quad 0x017f27698,0x049c3cc9c + .quad 0x058ca5f00,0x15e3e77ee + .quad 0x1af900c24,0x068bce87a + .quad 0x0b5cfca28,0x0dd07448e + .quad 0x0ded288f8,0x1524fa6c6 + .quad 0x059f229bc,0x1d8048348 + .quad 0x06d390dec,0x16cba8aca + .quad 0x037170390,0x0a3e3e02c + .quad 0x06353c1cc,0x042d98888 + .quad 0x0c4584f5c,0x0d73c7bea + .quad 0x1f16a3418,0x1329d9f7e + .quad 0x0531377e2,0x185137662 + .quad 0x1d8d9ca7c,0x1b1c69528 + .quad 0x0b25b29f2,0x18a08b5bc + .quad 0x19fb2a8b0,0x02178513a + .quad 0x1a08fe6ac,0x1da758ae0 + .quad 0x045cddf4e,0x0e0ac139e + .quad 0x1a91647f2,0x169cf9eb0 + .quad 0x1a0f717c4,0x0170076fa diff --git a/crypto/Kconfig b/crypto/Kconfig index 6563366bae80..b901b590635f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -324,9 +324,19 @@ config CRYPTO_CRC32C by iSCSI for header and data digests and by others. See Castagnoli93. Module will be crc32c. +config CRYPTO_CRC32C_X86_64 + bool + depends on X86 && 64BIT + select CRYPTO_HASH + help + In Intel processor with SSE4.2 supported, the processor will + support CRC32C calculation using hardware accelerated CRC32 + instruction optimized with PCLMULQDQ instruction when available. + config CRYPTO_CRC32C_INTEL tristate "CRC32c INTEL hardware acceleration" depends on X86 + select CRYPTO_CRC32C_X86_64 if 64BIT select CRYPTO_HASH help In Intel processor with SSE4.2 supported, the processor will From e3899e4df02720fcc8bc22320ce32a7c5594a585 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Thu, 27 Sep 2012 15:44:24 -0700 Subject: [PATCH 0304/5831] crypto: tcrypt - Added speed test in tcrypt for crc32c This patch adds a test case in tcrypt to perform speed test for crc32c checksum calculation. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index e87fa60f5831..7a3c4500f06d 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1479,6 +1479,10 @@ static int do_test(int m) test_hash_speed("ghash-generic", sec, hash_speed_template_16); if (mode > 300 && mode < 400) break; + case 319: + test_hash_speed("crc32c", sec, generic_hash_speed_template); + if (mode > 300 && mode < 400) break; + case 399: break; From 7291a932c6e27d9768e374e9d648086636daf61c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 28 Sep 2012 12:52:25 +0800 Subject: [PATCH 0305/5831] crypto: talitos - convert to use be16_add_cpu() Convert cpu_to_be16(be16_to_cpu(E1) + E2) to use be16_add_cpu(). dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index da1112765a44..09b184adf31b 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -936,8 +936,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count, sg_count--; link_tbl_ptr--; } - link_tbl_ptr->len = cpu_to_be16(be16_to_cpu(link_tbl_ptr->len) - + cryptlen); + be16_add_cpu(&link_tbl_ptr->len, cryptlen); /* tag end of link table */ link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; From ba1ee070909fae01248b8117da1706f3cf2bfd1b Mon Sep 17 00:00:00 2001 From: Salman Qazi Date: Fri, 5 Oct 2012 14:24:14 -0700 Subject: [PATCH 0306/5831] crypto: vmac - Make VMAC work when blocks aren't aligned VMAC implementation, as it is, does not work with blocks that are not multiples of 128-bytes. Furthermore, this is a problem when using the implementation on scatterlists, even when the complete plain text is 128-byte multiple, as the pieces that get passed to vmac_update can be pretty much any size. I also added test cases for unaligned blocks. Signed-off-by: Salman Qazi Signed-off-by: Herbert Xu --- crypto/testmgr.h | 33 +++++++++++++++++++++++++++++- crypto/vmac.c | 47 +++++++++++++++++++++++++++++++++++++++---- include/crypto/vmac.h | 2 ++ 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 76d7f6cc82f5..f8365e913de9 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -1707,7 +1707,7 @@ static struct hash_testvec aes_xcbc128_tv_template[] = { } }; -#define VMAC_AES_TEST_VECTORS 8 +#define VMAC_AES_TEST_VECTORS 11 static char vmac_string1[128] = {'\x01', '\x01', '\x01', '\x01', '\x02', '\x03', '\x02', '\x02', '\x02', '\x04', '\x01', '\x07', @@ -1723,6 +1723,19 @@ static char vmac_string3[128] = {'a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c', }; +static char vmac_string4[17] = {'b', 'c', 'e', 'f', + 'i', 'j', 'l', 'm', + 'o', 'p', 'r', 's', + 't', 'u', 'w', 'x', 'z'}; + +static char vmac_string5[127] = {'r', 'm', 'b', 't', 'c', + 'o', 'l', 'k', ']', '%', + '9', '2', '7', '!', 'A'}; + +static char vmac_string6[129] = {'p', 't', '*', '7', 'l', + 'i', '!', '#', 'w', '0', + 'z', '/', '4', 'A', 'n'}; + static struct hash_testvec aes_vmac128_tv_template[] = { { .key = "\x00\x01\x02\x03\x04\x05\x06\x07" @@ -1776,6 +1789,24 @@ static struct hash_testvec aes_vmac128_tv_template[] = { .digest = "\x8b\x32\x8f\xe1\xed\x8f\xfa\xd4", .psize = 128, .ksize = 16, + }, { + .key = "a09b5cd!f#07K\x00\x00\x00", + .plaintext = vmac_string4, + .digest = "\xab\xa5\x0f\xea\x42\x4e\xa1\x5f", + .psize = sizeof(vmac_string4), + .ksize = 16, + }, { + .key = "a09b5cd!f#07K\x00\x00\x00", + .plaintext = vmac_string5, + .digest = "\x25\x31\x98\xbc\x1d\xe8\x67\x60", + .psize = sizeof(vmac_string5), + .ksize = 16, + }, { + .key = "a09b5cd!f#07K\x00\x00\x00", + .plaintext = vmac_string6, + .digest = "\xc4\xae\x9b\x47\x95\x65\xeb\x41", + .psize = sizeof(vmac_string6), + .ksize = 16, }, }; diff --git a/crypto/vmac.c b/crypto/vmac.c index f2338ca98368..2eb11a30c29c 100644 --- a/crypto/vmac.c +++ b/crypto/vmac.c @@ -375,6 +375,11 @@ static void vhash_update(const unsigned char *m, u64 pkh = ctx->polykey[0]; u64 pkl = ctx->polykey[1]; + if (!mbytes) + return; + + BUG_ON(mbytes % VMAC_NHBYTES); + mptr = (u64 *)m; i = mbytes / VMAC_NHBYTES; /* Must be non-zero */ @@ -454,7 +459,7 @@ do_l3: } static u64 vmac(unsigned char m[], unsigned int mbytes, - unsigned char n[16], u64 *tagl, + const unsigned char n[16], u64 *tagl, struct vmac_ctx_t *ctx) { u64 *in_n, *out_p; @@ -559,8 +564,33 @@ static int vmac_update(struct shash_desc *pdesc, const u8 *p, { struct crypto_shash *parent = pdesc->tfm; struct vmac_ctx_t *ctx = crypto_shash_ctx(parent); + int expand; + int min; - vhash_update(p, len, &ctx->__vmac_ctx); + expand = VMAC_NHBYTES - ctx->partial_size > 0 ? + VMAC_NHBYTES - ctx->partial_size : 0; + + min = len < expand ? len : expand; + + memcpy(ctx->partial + ctx->partial_size, p, min); + ctx->partial_size += min; + + if (len < expand) + return 0; + + vhash_update(ctx->partial, VMAC_NHBYTES, &ctx->__vmac_ctx); + ctx->partial_size = 0; + + len -= expand; + p += expand; + + if (len % VMAC_NHBYTES) { + memcpy(ctx->partial, p + len - (len % VMAC_NHBYTES), + len % VMAC_NHBYTES); + ctx->partial_size = len % VMAC_NHBYTES; + } + + vhash_update(p, len - len % VMAC_NHBYTES, &ctx->__vmac_ctx); return 0; } @@ -572,10 +602,20 @@ static int vmac_final(struct shash_desc *pdesc, u8 *out) vmac_t mac; u8 nonce[16] = {}; - mac = vmac(NULL, 0, nonce, NULL, ctx); + /* vmac() ends up accessing outside the array bounds that + * we specify. In appears to access up to the next 2-word + * boundary. We'll just be uber cautious and zero the + * unwritten bytes in the buffer. + */ + if (ctx->partial_size) { + memset(ctx->partial + ctx->partial_size, 0, + VMAC_NHBYTES - ctx->partial_size); + } + mac = vmac(ctx->partial, ctx->partial_size, nonce, NULL, ctx); memcpy(out, &mac, sizeof(vmac_t)); memset(&mac, 0, sizeof(vmac_t)); memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx)); + ctx->partial_size = 0; return 0; } @@ -673,4 +713,3 @@ module_exit(vmac_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("VMAC hash algorithm"); - diff --git a/include/crypto/vmac.h b/include/crypto/vmac.h index c4467c55df1e..6b700c7b2fe1 100644 --- a/include/crypto/vmac.h +++ b/include/crypto/vmac.h @@ -56,6 +56,8 @@ typedef u64 vmac_t; struct vmac_ctx_t { struct crypto_cipher *child; struct vmac_ctx __vmac_ctx; + u8 partial[VMAC_NHBYTES]; /* partial block */ + int partial_size; /* size of the partial block */ }; #endif /* __CRYPTO_VMAC_H */ From b6755ffb0c03c2e763cc56f72cc2ceaba8ea2805 Mon Sep 17 00:00:00 2001 From: Sachin Bhamare Date: Thu, 9 Feb 2012 17:09:32 -0800 Subject: [PATCH 0307/5831] osduld: Add osdname & systemid sysfs at scsi_osd class This patch adds the support for following two read-only sysfs attributes to scsi_osd class members : osdname & systemid These attributes will show up as below in sysfs class hierarchy: /sys/class/scsi_osd/osdX/osdname /sys/class/scsi_osd/osdX/systemid The osdname & systemid are OSD device attributes which uniquely identify a device on the network, while it's IP and certainly it's /dev/osdX device path might change. Userspace utilities (e.g. mkfs.exofs) can parse these attributes to identify the correct OSD in safer and faster way. (Today osd apps open each device in the system and send a attributes query for these, in order to access the user requested device) Signed-off-by: Sachin Bhamare Signed-off-by: Boaz Harrosh --- drivers/scsi/osd/osd_uld.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c index d4ed9eb52657..43754176a7b7 100644 --- a/drivers/scsi/osd/osd_uld.c +++ b/drivers/scsi/osd/osd_uld.c @@ -97,9 +97,37 @@ struct osd_dev_handle { static DEFINE_IDA(osd_minor_ida); +/* + * scsi sysfs attribute operations + */ +static ssize_t osdname_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct osd_uld_device *ould = container_of(dev, struct osd_uld_device, + class_dev); + return sprintf(buf, "%s\n", ould->odi.osdname); +} + +static ssize_t systemid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct osd_uld_device *ould = container_of(dev, struct osd_uld_device, + class_dev); + + memcpy(buf, ould->odi.systemid, ould->odi.systemid_len); + return ould->odi.systemid_len; +} + +static struct device_attribute osd_uld_attrs[] = { + __ATTR(osdname, S_IRUGO, osdname_show, NULL), + __ATTR(systemid, S_IRUGO, systemid_show, NULL), + __ATTR_NULL, +}; + static struct class osd_uld_class = { .owner = THIS_MODULE, .name = "scsi_osd", + .dev_attrs = osd_uld_attrs, }; /* From be5b779ae9ce64ede0a8f4939360b0320bb257e2 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 15 Oct 2012 23:42:55 +0200 Subject: [PATCH 0308/5831] random: make it possible to enable debugging without rebuild The module parameter that turns debugging mode (which basically means printing a few extra lines during runtime) is in '#if 0' block. Forcing everyone who would like to see how entropy is behaving on his system to rebuild seems to be a little bit too harsh. If we were concerned about speed, we could potentially turn 'debug' into a static key, but I don't think it's necessary. Drop the '#if 0' block to allow using the 'debug' parameter without rebuilding. Signed-off-by: Jiri Kosina Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index b86eae9b77df..9ac4443a1854 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -399,7 +399,6 @@ static DECLARE_WAIT_QUEUE_HEAD(random_read_wait); static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); static struct fasync_struct *fasync; -#if 0 static bool debug; module_param(debug, bool, 0644); #define DEBUG_ENT(fmt, arg...) do { \ @@ -410,9 +409,6 @@ module_param(debug, bool, 0644); blocking_pool.entropy_count,\ nonblocking_pool.entropy_count,\ ## arg); } while (0) -#else -#define DEBUG_ENT(fmt, arg...) do {} while (0) -#endif /********************************************************************** * From 42c0526c930523425ff6edc95b7235ce7ab9308d Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 26 Sep 2012 10:34:00 -0700 Subject: [PATCH 0309/5831] drm/i915: Extract PCU communication There is a special mechanism for communicating with the PCU already being used for the ring frequency stuff. As we'll be needing this for other commands, extract it now to make future code less error prone and the current code more reusable. I'm not entirely sure if this code matches 1:1 with the previous code behaviorally. Functionally however, it should be the same. CC: Jesse Barnes Signed-off-by: Ben Widawsky Reviewed-by: Jesse Barnes [danvet: Fixup compile fail reported by Wu Fengguang.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 13 ++-- drivers/gpu/drm/i915/i915_drv.h | 3 + drivers/gpu/drm/i915/intel_pm.c | 96 ++++++++++++++++++----------- 3 files changed, 68 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 7a4b3f3aad3f..66475855c0ec 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1283,15 +1283,10 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) for (gpu_freq = dev_priv->rps.min_delay; gpu_freq <= dev_priv->rps.max_delay; gpu_freq++) { - I915_WRITE(GEN6_PCODE_DATA, gpu_freq); - I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | - GEN6_PCODE_READ_MIN_FREQ_TABLE); - if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & - GEN6_PCODE_READY) == 0, 10)) { - DRM_ERROR("pcode read of freq table timed out\n"); - continue; - } - ia_freq = I915_READ(GEN6_PCODE_DATA); + ia_freq = gpu_freq; + sandybridge_pcode_read(dev_priv, + GEN6_PCODE_READ_MIN_FREQ_TABLE, + &ia_freq); seq_printf(m, "%d\t\t%d\n", gpu_freq * GT_FREQUENCY_MULTIPLIER, ia_freq * 100); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9e446b621642..32814340f763 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1630,6 +1630,9 @@ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv); void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv); int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv); +int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val); +int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val); + #define __i915_read(x, y) \ u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 07da990eb77d..0cc7dbf55bee 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2407,7 +2407,7 @@ static void gen6_enable_rps(struct drm_device *dev) u32 pcu_mbox, rc6_mask = 0; u32 gtfifodbg; int rc6_mode; - int i; + int i, ret; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -2503,30 +2503,16 @@ static void gen6_enable_rps(struct drm_device *dev) GEN6_RP_UP_BUSY_AVG | (IS_HASWELL(dev) ? GEN7_RP_DOWN_IDLE_AVG : GEN6_RP_DOWN_IDLE_CONT)); - if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, - 500)) - DRM_ERROR("timeout waiting for pcode mailbox to become idle\n"); - - I915_WRITE(GEN6_PCODE_DATA, 0); - I915_WRITE(GEN6_PCODE_MAILBOX, - GEN6_PCODE_READY | - GEN6_PCODE_WRITE_MIN_FREQ_TABLE); - if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, - 500)) - DRM_ERROR("timeout waiting for pcode mailbox to finish\n"); - - /* Check for overclock support */ - if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, - 500)) - DRM_ERROR("timeout waiting for pcode mailbox to become idle\n"); - I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_READ_OC_PARAMS); - pcu_mbox = I915_READ(GEN6_PCODE_DATA); - if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, - 500)) - DRM_ERROR("timeout waiting for pcode mailbox to finish\n"); - if (pcu_mbox & (1<<31)) { /* OC supported */ - dev_priv->rps.max_delay = pcu_mbox & 0xff; - DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50); + ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0); + if (!ret) { + pcu_mbox = 0; + ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); + if (ret && pcu_mbox & (1<<31)) { /* OC supported */ + dev_priv->rps.max_delay = pcu_mbox & 0xff; + DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50); + } + } else { + DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); } gen6_set_rps(dev_priv->dev, (gt_perf_status & 0xff00) >> 8); @@ -2581,17 +2567,11 @@ static void gen6_update_ring_freq(struct drm_device *dev) else ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); + ia_freq <<= GEN6_PCODE_FREQ_IA_RATIO_SHIFT; - I915_WRITE(GEN6_PCODE_DATA, - (ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT) | - gpu_freq); - I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | - GEN6_PCODE_WRITE_MIN_FREQ_TABLE); - if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & - GEN6_PCODE_READY) == 0, 10)) { - DRM_ERROR("pcode write of freq table timed out\n"); - continue; - } + sandybridge_pcode_write(dev_priv, + GEN6_PCODE_WRITE_MIN_FREQ_TABLE, + ia_freq | gpu_freq); } } @@ -4152,3 +4132,49 @@ void intel_gt_init(struct drm_device *dev) } } +int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val) +{ + WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); + + if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { + DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n"); + return -EAGAIN; + } + + I915_WRITE(GEN6_PCODE_DATA, *val); + I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); + + if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, + 500)) { + DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); + return -ETIMEDOUT; + } + + *val = I915_READ(GEN6_PCODE_DATA); + I915_WRITE(GEN6_PCODE_DATA, 0); + + return 0; +} + +int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val) +{ + WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); + + if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { + DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n"); + return -EAGAIN; + } + + I915_WRITE(GEN6_PCODE_DATA, val); + I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); + + if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, + 500)) { + DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); + return -ETIMEDOUT; + } + + I915_WRITE(GEN6_PCODE_DATA, 0); + + return 0; +} From 31643d54a739382626c27c0f2a12b3bbc22d1a38 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 26 Sep 2012 10:34:01 -0700 Subject: [PATCH 0310/5831] drm/i915: Workaround to bump rc6 voltage to 450 BIOS should be setting the minimum voltage for rc6 to be 450mV. Old or buggy BIOSen may not be doing this, so we correct it for them. Ideally customers should update the BIOS as only it would know the optimal values for the platform, so we leave that fact as a DRM_ERROR for the user to see. Unfortunately this isn't fixing any of the issues it was targeted to fix, but it is documented that we must do it. CC: Jesse Barnes CC: Matt Turner Signed-off-by: Ben Widawsky Reviewed-by: Jesse Barnes [danvet: bikeshedded loglevel of the "your bios is broken message" to debug.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ drivers/gpu/drm/i915/intel_pm.c | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8200c317f1fd..d7f516c4855a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4225,6 +4225,10 @@ #define GEN6_READ_OC_PARAMS 0xc #define GEN6_PCODE_WRITE_MIN_FREQ_TABLE 0x8 #define GEN6_PCODE_READ_MIN_FREQ_TABLE 0x9 +#define GEN6_PCODE_WRITE_RC6VIDS 0x4 +#define GEN6_PCODE_READ_RC6VIDS 0x5 +#define GEN6_ENCODE_RC6_VID(mv) (((mv) / 5) - 245) < 0 ?: 0 +#define GEN6_DECODE_RC6_VID(vids) (((vids) * 5) > 0 ? ((vids) * 5) + 245 : 0) #define GEN6_PCODE_DATA 0x138128 #define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0cc7dbf55bee..a56ca428c38e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2404,7 +2404,7 @@ static void gen6_enable_rps(struct drm_device *dev) struct intel_ring_buffer *ring; u32 rp_state_cap; u32 gt_perf_status; - u32 pcu_mbox, rc6_mask = 0; + u32 rc6vids, pcu_mbox, rc6_mask = 0; u32 gtfifodbg; int rc6_mode; int i, ret; @@ -2526,6 +2526,20 @@ static void gen6_enable_rps(struct drm_device *dev) /* enable all PM interrupts */ I915_WRITE(GEN6_PMINTRMSK, 0); + rc6vids = 0; + ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); + if (IS_GEN6(dev) && ret) { + DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); + } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { + DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", + GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); + rc6vids &= 0xffff00; + rc6vids |= GEN6_ENCODE_RC6_VID(450); + ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); + if (ret) + DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); + } + gen6_gt_force_wake_put(dev_priv); } From ecd8faeac4897619e6b26fbcdce62bb732d5a571 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 26 Sep 2012 10:34:02 -0700 Subject: [PATCH 0311/5831] drm/i915: Add rc6vids to debugfs CC: Jesse Barnes Signed-off-by: Ben Widawsky Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 66475855c0ec..3c5710f95a1d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1069,7 +1069,7 @@ static int gen6_drpc_info(struct seq_file *m) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = dev->dev_private; - u32 rpmodectl1, gt_core_status, rcctl1; + u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0; unsigned forcewake_count; int count=0, ret; @@ -1097,6 +1097,7 @@ static int gen6_drpc_info(struct seq_file *m) rpmodectl1 = I915_READ(GEN6_RP_CONTROL); rcctl1 = I915_READ(GEN6_RC_CONTROL); + sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); mutex_unlock(&dev->struct_mutex); seq_printf(m, "Video Turbo Mode: %s\n", @@ -1149,6 +1150,12 @@ static int gen6_drpc_info(struct seq_file *m) seq_printf(m, "RC6++ residency since boot: %u\n", I915_READ(GEN6_GT_GFX_RC6pp)); + seq_printf(m, "RC6 voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff))); + seq_printf(m, "RC6+ voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff))); + seq_printf(m, "RC6++ voltage: %dmV\n", + GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff))); return 0; } From da6c56873927418aba585038e5c4d3d72a4d9b6c Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 16 Oct 2012 13:51:21 +0200 Subject: [PATCH 0312/5831] OMAPDSS: Correct check for the callback pointer in dss_dsi_disable_pads() Appear to be a copy-paste bug: the code was checking board_data->dsi_enable_pads while calling board_data->dsi_disable_pads. Signed-off-by: Peter Ujfalusi Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/omap2/dss/core.c b/drivers/video/omap2/dss/core.c index 3794147b6eb0..d1ed2b4a5a2e 100644 --- a/drivers/video/omap2/dss/core.c +++ b/drivers/video/omap2/dss/core.c @@ -117,7 +117,7 @@ void dss_dsi_disable_pads(int dsi_id, unsigned lane_mask) { struct omap_dss_board_info *board_data = core.pdev->dev.platform_data; - if (!board_data->dsi_enable_pads) + if (!board_data->dsi_disable_pads) return; return board_data->dsi_disable_pads(dsi_id, lane_mask); From 9f667bff0f48d01a4474fa879cb384331584a4ee Mon Sep 17 00:00:00 2001 From: Thomas Abraham Date: Wed, 3 Oct 2012 08:30:12 +0900 Subject: [PATCH 0313/5831] spi/s3c64xx: use clk_prepare_enable and clk_disable_unprepare Convert clk_enable/clk_disable to clk_prepare_enable/clk_disable_unprepare calls as required by common clock framework. Signed-off-by: Thomas Abraham Signed-off-by: Mark Brown --- drivers/spi/spi-s3c64xx.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index 1a81c90a4a71..06a5fe777434 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -516,7 +516,7 @@ static void s3c64xx_spi_config(struct s3c64xx_spi_driver_data *sdd) /* Disable Clock */ if (sdd->port_conf->clk_from_cmu) { - clk_disable(sdd->src_clk); + clk_disable_unprepare(sdd->src_clk); } else { val = readl(regs + S3C64XX_SPI_CLK_CFG); val &= ~S3C64XX_SPI_ENCLK_ENABLE; @@ -564,7 +564,7 @@ static void s3c64xx_spi_config(struct s3c64xx_spi_driver_data *sdd) /* There is half-multiplier before the SPI */ clk_set_rate(sdd->src_clk, sdd->cur_speed * 2); /* Enable Clock */ - clk_enable(sdd->src_clk); + clk_prepare_enable(sdd->src_clk); } else { /* Configure Clock */ val = readl(regs + S3C64XX_SPI_CLK_CFG); @@ -1302,7 +1302,7 @@ static int __init s3c64xx_spi_probe(struct platform_device *pdev) goto err3; } - if (clk_enable(sdd->clk)) { + if (clk_prepare_enable(sdd->clk)) { dev_err(&pdev->dev, "Couldn't enable clock 'spi'\n"); ret = -EBUSY; goto err4; @@ -1317,7 +1317,7 @@ static int __init s3c64xx_spi_probe(struct platform_device *pdev) goto err5; } - if (clk_enable(sdd->src_clk)) { + if (clk_prepare_enable(sdd->src_clk)) { dev_err(&pdev->dev, "Couldn't enable clock '%s'\n", clk_name); ret = -EBUSY; goto err6; @@ -1361,11 +1361,11 @@ static int __init s3c64xx_spi_probe(struct platform_device *pdev) err8: free_irq(irq, sdd); err7: - clk_disable(sdd->src_clk); + clk_disable_unprepare(sdd->src_clk); err6: clk_put(sdd->src_clk); err5: - clk_disable(sdd->clk); + clk_disable_unprepare(sdd->clk); err4: clk_put(sdd->clk); err3: @@ -1393,10 +1393,10 @@ static int s3c64xx_spi_remove(struct platform_device *pdev) free_irq(platform_get_irq(pdev, 0), sdd); - clk_disable(sdd->src_clk); + clk_disable_unprepare(sdd->src_clk); clk_put(sdd->src_clk); - clk_disable(sdd->clk); + clk_disable_unprepare(sdd->clk); clk_put(sdd->clk); if (!sdd->cntrlr_info->cfg_gpio && pdev->dev.of_node) @@ -1417,8 +1417,8 @@ static int s3c64xx_spi_suspend(struct device *dev) spi_master_suspend(master); /* Disable the clock */ - clk_disable(sdd->src_clk); - clk_disable(sdd->clk); + clk_disable_unprepare(sdd->src_clk); + clk_disable_unprepare(sdd->clk); if (!sdd->cntrlr_info->cfg_gpio && dev->of_node) s3c64xx_spi_dt_gpio_free(sdd); @@ -1440,8 +1440,8 @@ static int s3c64xx_spi_resume(struct device *dev) sci->cfg_gpio(); /* Enable the clock */ - clk_enable(sdd->src_clk); - clk_enable(sdd->clk); + clk_prepare_enable(sdd->src_clk); + clk_prepare_enable(sdd->clk); s3c64xx_spi_hwinit(sdd, sdd->port_id); @@ -1457,8 +1457,8 @@ static int s3c64xx_spi_runtime_suspend(struct device *dev) struct spi_master *master = dev_get_drvdata(dev); struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(master); - clk_disable(sdd->clk); - clk_disable(sdd->src_clk); + clk_disable_unprepare(sdd->clk); + clk_disable_unprepare(sdd->src_clk); return 0; } @@ -1468,8 +1468,8 @@ static int s3c64xx_spi_runtime_resume(struct device *dev) struct spi_master *master = dev_get_drvdata(dev); struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(master); - clk_enable(sdd->src_clk); - clk_enable(sdd->clk); + clk_prepare_enable(sdd->src_clk); + clk_prepare_enable(sdd->clk); return 0; } From 61d15963176d44ba898913389b65588e717b7c59 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 3 Oct 2012 11:56:53 +0200 Subject: [PATCH 0314/5831] spi/bcm63xx: remove driver version As Grant Likely reported, this does not make any sense in a mainline kernel remove that driver version string. Reported-by: Grant Likely Signed-off-by: Florian Fainelli Signed-off-by: Mark Brown --- drivers/spi/spi-bcm63xx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c index a9f4049c6769..af191f560136 100644 --- a/drivers/spi/spi-bcm63xx.c +++ b/drivers/spi/spi-bcm63xx.c @@ -36,7 +36,6 @@ #include #define PFX KBUILD_MODNAME -#define DRV_VER "0.1.2" struct bcm63xx_spi { struct completion done; @@ -441,8 +440,8 @@ static int __devinit bcm63xx_spi_probe(struct platform_device *pdev) goto out_clk_disable; } - dev_info(dev, "at 0x%08x (irq %d, FIFOs size %d) v%s\n", - r->start, irq, bs->fifo_size, DRV_VER); + dev_info(dev, "at 0x%08x (irq %d, FIFOs size %d)\n", + r->start, irq, bs->fifo_size); return 0; From 965199573a9a50dae82ada92eb2b808b739ce949 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 3 Oct 2012 11:56:54 +0200 Subject: [PATCH 0315/5831] spi/bcm63xx: add missing spi_master_{resume,suspend} calls to PM callbacks The PM callbacks implemented by the spi-bcm63xx driver don't call spi_master_{resume,suspend}, fix that. Signed-off-by: Florian Fainelli Signed-off-by: Mark Brown --- drivers/spi/spi-bcm63xx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c index af191f560136..c7b569567f12 100644 --- a/drivers/spi/spi-bcm63xx.c +++ b/drivers/spi/spi-bcm63xx.c @@ -484,6 +484,8 @@ static int bcm63xx_spi_suspend(struct device *dev) platform_get_drvdata(to_platform_device(dev)); struct bcm63xx_spi *bs = spi_master_get_devdata(master); + spi_master_suspend(master); + clk_disable(bs->clk); return 0; @@ -497,6 +499,8 @@ static int bcm63xx_spi_resume(struct device *dev) clk_enable(bs->clk); + spi_master_resume(master); + return 0; } From b308fb208e23c3e765cefa45eb868c40c5b9c3e7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 3 Oct 2012 11:56:55 +0200 Subject: [PATCH 0316/5831] spi/bcm63xx: remove useless call to bcm63xx_spi_check_transfer() Calling bcm63xx_spi_check_transfer() with a NULL argument does not do anything useful that the core spi code is not doing already, remove this superfluous call. Signed-off-by: Florian Fainelli Signed-off-by: Mark Brown --- drivers/spi/spi-bcm63xx.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c index c7b569567f12..6d97047d9242 100644 --- a/drivers/spi/spi-bcm63xx.c +++ b/drivers/spi/spi-bcm63xx.c @@ -169,13 +169,6 @@ static int bcm63xx_spi_setup(struct spi_device *spi) return -EINVAL; } - ret = bcm63xx_spi_check_transfer(spi, NULL); - if (ret < 0) { - dev_err(&spi->dev, "setup: unsupported mode bits %x\n", - spi->mode & ~MODEBITS); - return ret; - } - dev_dbg(&spi->dev, "%s, mode %d, %u bits/w, %u nsec/bit\n", __func__, spi->mode & MODEBITS, spi->bits_per_word, 0); From 0384e90b853357d24935c65ba0e1bdd27faa6e58 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sun, 7 Oct 2012 18:19:44 +0200 Subject: [PATCH 0317/5831] spi/mcspi: allow configuration of pin directions Allow D0 to be an input and D1 to be an output, configurable via platform data and a new DT property. Based on a patch from Matus Ujhelyi Signed-off-by: Daniel Mack Signed-off-by: Mark Brown --- .../devicetree/bindings/spi/omap-spi.txt | 4 ++- drivers/spi/spi-omap2-mcspi.c | 25 +++++++++++++------ include/linux/platform_data/spi-omap2-mcspi.h | 4 +++ 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/omap-spi.txt b/Documentation/devicetree/bindings/spi/omap-spi.txt index 81df374adbb9..2ef0a6b85653 100644 --- a/Documentation/devicetree/bindings/spi/omap-spi.txt +++ b/Documentation/devicetree/bindings/spi/omap-spi.txt @@ -6,7 +6,9 @@ Required properties: - "ti,omap4-spi" for OMAP4+. - ti,spi-num-cs : Number of chipselect supported by the instance. - ti,hwmods: Name of the hwmod associated to the McSPI - +- ti,pindir-d0-in-d1-out: Select the D0 pin as input and D1 as + output. The default is D0 as output and + D1 as input. Example: diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 3542fdc664b1..51046332677c 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -130,6 +130,7 @@ struct omap2_mcspi { struct omap2_mcspi_dma *dma_channels; struct device *dev; struct omap2_mcspi_regs ctx; + unsigned int pin_dir:1; }; struct omap2_mcspi_cs { @@ -765,8 +766,15 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi, /* standard 4-wire master mode: SCK, MOSI/out, MISO/in, nCS * REVISIT: this controller could support SPI_3WIRE mode. */ - l &= ~(OMAP2_MCSPI_CHCONF_IS|OMAP2_MCSPI_CHCONF_DPE1); - l |= OMAP2_MCSPI_CHCONF_DPE0; + if (mcspi->pin_dir == MCSPI_PINDIR_D0_OUT_D1_IN) { + l &= ~OMAP2_MCSPI_CHCONF_IS; + l &= ~OMAP2_MCSPI_CHCONF_DPE1; + l |= OMAP2_MCSPI_CHCONF_DPE0; + } else { + l |= OMAP2_MCSPI_CHCONF_IS; + l |= OMAP2_MCSPI_CHCONF_DPE1; + l &= ~OMAP2_MCSPI_CHCONF_DPE0; + } /* wordlength */ l &= ~OMAP2_MCSPI_CHCONF_WL_MASK; @@ -1167,6 +1175,11 @@ static int __devinit omap2_mcspi_probe(struct platform_device *pdev) master->cleanup = omap2_mcspi_cleanup; master->dev.of_node = node; + dev_set_drvdata(&pdev->dev, master); + + mcspi = spi_master_get_devdata(master); + mcspi->master = master; + match = of_match_device(omap_mcspi_of_match, &pdev->dev); if (match) { u32 num_cs = 1; /* default number of chipselect */ @@ -1175,19 +1188,17 @@ static int __devinit omap2_mcspi_probe(struct platform_device *pdev) of_property_read_u32(node, "ti,spi-num-cs", &num_cs); master->num_chipselect = num_cs; master->bus_num = bus_num++; + if (of_get_property(node, "ti,pindir-d0-in-d1-out", NULL)) + mcspi->pin_dir = MCSPI_PINDIR_D0_IN_D1_OUT; } else { pdata = pdev->dev.platform_data; master->num_chipselect = pdata->num_cs; if (pdev->id != -1) master->bus_num = pdev->id; + mcspi->pin_dir = pdata->pin_dir; } regs_offset = pdata->regs_offset; - dev_set_drvdata(&pdev->dev, master); - - mcspi = spi_master_get_devdata(master); - mcspi->master = master; - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (r == NULL) { status = -ENODEV; diff --git a/include/linux/platform_data/spi-omap2-mcspi.h b/include/linux/platform_data/spi-omap2-mcspi.h index a357eb26bd25..ce70f7b5a8e1 100644 --- a/include/linux/platform_data/spi-omap2-mcspi.h +++ b/include/linux/platform_data/spi-omap2-mcspi.h @@ -7,9 +7,13 @@ #define OMAP4_MCSPI_REG_OFFSET 0x100 +#define MCSPI_PINDIR_D0_OUT_D1_IN 0 +#define MCSPI_PINDIR_D0_IN_D1_OUT 1 + struct omap2_mcspi_platform_config { unsigned short num_cs; unsigned int regs_offset; + unsigned int pin_dir:1; }; struct omap2_mcspi_dev_attr { From 5015a2e1679f9f56b574f3aab7006517106da83c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 5 Oct 2012 17:51:11 +0200 Subject: [PATCH 0318/5831] spi/pl022: Revert "spi/pl022: fix spi-pl022 pm enable at probe" This reverts commit 6887237cd7da904184dab2750504040c68f3a080. Commit "spi/pl022: enable runtime PM" introduced runtime PM issues. Due to that commit, "spi/pl022: fix spi-pl022 pm enable at probe" was merged to fix part of those issues. Instead of adding another fix, let's clean up and revert everything back to when it was already fine. Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Signed-off-by: Mark Brown --- drivers/spi/spi-pl022.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index 919464102d33..5cf0643ae74c 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -2186,6 +2186,7 @@ pl022_probe(struct amba_device *adev, const struct amba_id *id) printk(KERN_INFO "pl022: mapped registers from 0x%08x to %p\n", adev->res.start, pl022->virtbase); + pm_runtime_enable(dev); pm_runtime_resume(dev); pl022->clk = devm_clk_get(&adev->dev, NULL); From 9f4e45f77e8a532c8a7e39268e844821dbf7fe91 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 17 Oct 2012 09:00:20 +0800 Subject: [PATCH 0319/5831] regulator: vexpress: Add terminating entry for vexpress_regulator_of_match table The of_device_id table is supposed to be zero-terminated. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- drivers/regulator/vexpress.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/vexpress.c b/drivers/regulator/vexpress.c index 1702945a93a9..1d558111d60f 100644 --- a/drivers/regulator/vexpress.c +++ b/drivers/regulator/vexpress.c @@ -126,6 +126,7 @@ static int __devexit vexpress_regulator_remove(struct platform_device *pdev) static struct of_device_id vexpress_regulator_of_match[] = { { .compatible = "arm,vexpress-volt", }, + { } }; static struct platform_driver vexpress_regulator_driver = { From 33366d0e8be06b980784c732ebace26de08e6679 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 28 Sep 2012 13:54:35 +0300 Subject: [PATCH 0320/5831] OMAPDSS: add missing sizes.h includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling on x86, we get compilation errors for dss.c and dispc.c: drivers/video/omap2/dss/dispc.c:126:11: error: ‘SZ_4K’ undeclared here (not in a function) include to fix compilation. Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/dispc.c | 1 + drivers/video/omap2/dss/dss.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/video/omap2/dss/dispc.c b/drivers/video/omap2/dss/dispc.c index a5ab354f267a..c32ec4ae8b4d 100644 --- a/drivers/video/omap2/dss/dispc.c +++ b/drivers/video/omap2/dss/dispc.c @@ -36,6 +36,7 @@ #include #include #include +#include #include