Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block layer fixes from Jens Axboe:
"A small collection of fixes that should go in before -rc1. The pull
request contains:

- A two patch fix for a regression with block enabled tagging caused
by a commit in the initial pull request. One patch is from Martin
and ensures that SCSI doesn't truncate 64-bit block flags, the
other one is from me and prevents us from double using struct
request queuelist for both completion and busy tags. This caused
anything from a boot crash for some, to crashes under load.

- A blk-mq fix for a potential soft stall when hot unplugging CPUs
with busy IO.

- percpu_counter fix is listed in here, that caused a suspend issue
with virtio-blk due to percpu counters having an inconsistent state
during CPU removal. Andrew sent this in separately a few days ago,
but it's here. JFYI.

- A few fixes for block integrity from Martin.

- A ratelimit fix for loop from Mike Galbraith, to avoid spewing too
much in error cases"

* 'for-linus' of git://git.kernel.dk/linux-block:
block: fix regression with block enabled tagging
scsi: Make sure cmd_flags are 64-bit
block: Ensure we only enable integrity metadata for reads and writes
block: Fix integrity verification
block: Fix for_each_bvec()
drivers/block/loop.c: ratelimit error messages
blk-mq: fix potential stall during CPU unplug with IO pending
percpu_counter: fix bad counter state during suspend

+53 -37
+1 -1
block/blk-core.c
··· 1307 1307 struct request_list *rl = blk_rq_rl(req); 1308 1308 1309 1309 BUG_ON(!list_empty(&req->queuelist)); 1310 - BUG_ON(!hlist_unhashed(&req->hash)); 1310 + BUG_ON(ELV_ON_HASH(req)); 1311 1311 1312 1312 blk_free_request(rl, req); 1313 1313 freed_request(rl, flags);
+6 -2
block/blk-mq.c
··· 956 956 unsigned int cpu) 957 957 { 958 958 struct blk_mq_hw_ctx *hctx = data; 959 + struct request_queue *q = hctx->queue; 959 960 struct blk_mq_ctx *ctx; 960 961 LIST_HEAD(tmp); 961 962 ··· 966 965 /* 967 966 * Move ctx entries to new CPU, if this one is going away. 968 967 */ 969 - ctx = __blk_mq_get_ctx(hctx->queue, cpu); 968 + ctx = __blk_mq_get_ctx(q, cpu); 970 969 971 970 spin_lock(&ctx->lock); 972 971 if (!list_empty(&ctx->rq_list)) { ··· 978 977 if (list_empty(&tmp)) 979 978 return; 980 979 981 - ctx = blk_mq_get_ctx(hctx->queue); 980 + ctx = blk_mq_get_ctx(q); 982 981 spin_lock(&ctx->lock); 983 982 984 983 while (!list_empty(&tmp)) { ··· 989 988 list_move_tail(&rq->queuelist, &ctx->rq_list); 990 989 } 991 990 991 + hctx = q->mq_ops->map_queue(q, ctx->cpu); 992 992 blk_mq_hctx_mark_pending(hctx, ctx); 993 993 994 994 spin_unlock(&ctx->lock); 995 995 blk_mq_put_ctx(ctx); 996 + 997 + blk_mq_run_hw_queue(hctx, true); 996 998 } 997 999 998 1000 static int blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx,
+6 -11
block/blk-softirq.c
··· 30 30 while (!list_empty(&local_list)) { 31 31 struct request *rq; 32 32 33 - rq = list_entry(local_list.next, struct request, queuelist); 34 - list_del_init(&rq->queuelist); 33 + rq = list_entry(local_list.next, struct request, ipi_list); 34 + list_del_init(&rq->ipi_list); 35 35 rq->q->softirq_done_fn(rq); 36 36 } 37 37 } ··· 45 45 46 46 local_irq_save(flags); 47 47 list = this_cpu_ptr(&blk_cpu_done); 48 - /* 49 - * We reuse queuelist for a list of requests to process. Since the 50 - * queuelist is used by the block layer only for requests waiting to be 51 - * submitted to the device it is unused now. 52 - */ 53 - list_add_tail(&rq->queuelist, list); 48 + list_add_tail(&rq->ipi_list, list); 54 49 55 - if (list->next == &rq->queuelist) 50 + if (list->next == &rq->ipi_list) 56 51 raise_softirq_irqoff(BLOCK_SOFTIRQ); 57 52 58 53 local_irq_restore(flags); ··· 136 141 struct list_head *list; 137 142 do_local: 138 143 list = this_cpu_ptr(&blk_cpu_done); 139 - list_add_tail(&req->queuelist, list); 144 + list_add_tail(&req->ipi_list, list); 140 145 141 146 /* 142 147 * if the list only contains our just added request, ··· 144 149 * entries there, someone already raised the irq but it 145 150 * hasn't run yet. 146 151 */ 147 - if (list->next == &req->queuelist) 152 + if (list->next == &req->ipi_list) 148 153 raise_softirq_irqoff(BLOCK_SOFTIRQ); 149 154 } else if (raise_blk_irq(ccpu, req)) 150 155 goto do_local;
+1 -1
block/blk.h
··· 78 78 /* 79 79 * Internal elevator interface 80 80 */ 81 - #define ELV_ON_HASH(rq) hash_hashed(&(rq)->hash) 81 + #define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED) 82 82 83 83 void blk_insert_flush(struct request *rq); 84 84 void blk_abort_flushes(struct request_queue *q);
+2
block/elevator.c
··· 247 247 static inline void __elv_rqhash_del(struct request *rq) 248 248 { 249 249 hash_del(&rq->hash); 250 + rq->cmd_flags &= ~REQ_HASHED; 250 251 } 251 252 252 253 static void elv_rqhash_del(struct request_queue *q, struct request *rq) ··· 262 261 263 262 BUG_ON(ELV_ON_HASH(rq)); 264 263 hash_add(e->hash, &rq->hash, rq_hash_key(rq)); 264 + rq->cmd_flags |= REQ_HASHED; 265 265 } 266 266 267 267 static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
+4 -4
drivers/block/loop.c
··· 237 237 file_end_write(file); 238 238 if (likely(bw == len)) 239 239 return 0; 240 - printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", 240 + printk_ratelimited(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", 241 241 (unsigned long long)pos, len); 242 242 if (bw >= 0) 243 243 bw = -EIO; ··· 277 277 return __do_lo_send_write(lo->lo_backing_file, 278 278 page_address(page), bvec->bv_len, 279 279 pos); 280 - printk(KERN_ERR "loop: Transfer error at byte offset %llu, " 280 + printk_ratelimited(KERN_ERR "loop: Transfer error at byte offset %llu, " 281 281 "length %i.\n", (unsigned long long)pos, bvec->bv_len); 282 282 if (ret > 0) 283 283 ret = -EIO; ··· 316 316 out: 317 317 return ret; 318 318 fail: 319 - printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); 319 + printk_ratelimited(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); 320 320 ret = -ENOMEM; 321 321 goto out; 322 322 } ··· 345 345 size = p->bsize; 346 346 347 347 if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) { 348 - printk(KERN_ERR "loop: transfer error block %ld\n", 348 + printk_ratelimited(KERN_ERR "loop: transfer error block %ld\n", 349 349 page->index); 350 350 size = -EINVAL; 351 351 }
+2 -2
drivers/scsi/scsi_lib.c
··· 184 184 */ 185 185 int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, 186 186 int data_direction, void *buffer, unsigned bufflen, 187 - unsigned char *sense, int timeout, int retries, int flags, 187 + unsigned char *sense, int timeout, int retries, u64 flags, 188 188 int *resid) 189 189 { 190 190 struct request *req; ··· 235 235 int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd, 236 236 int data_direction, void *buffer, unsigned bufflen, 237 237 struct scsi_sense_hdr *sshdr, int timeout, int retries, 238 - int *resid, int flags) 238 + int *resid, u64 flags) 239 239 { 240 240 char *sense = NULL; 241 241 int result;
+12 -10
fs/bio-integrity.c
··· 182 182 */ 183 183 int bio_integrity_enabled(struct bio *bio) 184 184 { 185 + if (!bio_is_rw(bio)) 186 + return 0; 187 + 185 188 /* Already protected? */ 186 189 if (bio_integrity(bio)) 187 190 return 0; ··· 312 309 { 313 310 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 314 311 struct blk_integrity_exchg bix; 315 - struct bio_vec bv; 316 - struct bvec_iter iter; 312 + struct bio_vec *bv; 317 313 sector_t sector; 318 - unsigned int sectors, ret = 0; 314 + unsigned int sectors, ret = 0, i; 319 315 void *prot_buf = bio->bi_integrity->bip_buf; 320 316 321 317 if (operate) ··· 325 323 bix.disk_name = bio->bi_bdev->bd_disk->disk_name; 326 324 bix.sector_size = bi->sector_size; 327 325 328 - bio_for_each_segment(bv, bio, iter) { 329 - void *kaddr = kmap_atomic(bv.bv_page); 330 - bix.data_buf = kaddr + bv.bv_offset; 331 - bix.data_size = bv.bv_len; 326 + bio_for_each_segment_all(bv, bio, i) { 327 + void *kaddr = kmap_atomic(bv->bv_page); 328 + bix.data_buf = kaddr + bv->bv_offset; 329 + bix.data_size = bv->bv_len; 332 330 bix.prot_buf = prot_buf; 333 331 bix.sector = sector; 334 332 335 - if (operate) { 333 + if (operate) 336 334 bi->generate_fn(&bix); 337 - } else { 335 + else { 338 336 ret = bi->verify_fn(&bix); 339 337 if (ret) { 340 338 kunmap_atomic(kaddr); ··· 342 340 } 343 341 } 344 342 345 - sectors = bv.bv_len / bi->sector_size; 343 + sectors = bv->bv_len / bi->sector_size; 346 344 sector += sectors; 347 345 prot_buf += sectors * bi->tuple_size; 348 346
+3 -3
include/linux/bio.h
··· 216 216 } 217 217 218 218 #define for_each_bvec(bvl, bio_vec, iter, start) \ 219 - for ((iter) = start; \ 220 - (bvl) = bvec_iter_bvec((bio_vec), (iter)), \ 221 - (iter).bi_size; \ 219 + for (iter = (start); \ 220 + (iter).bi_size && \ 221 + ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ 222 222 bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len)) 223 223 224 224
+2
include/linux/blk_types.h
··· 189 189 __REQ_KERNEL, /* direct IO to kernel pages */ 190 190 __REQ_PM, /* runtime pm request */ 191 191 __REQ_END, /* last of chain of requests */ 192 + __REQ_HASHED, /* on IO scheduler merge hash */ 192 193 __REQ_NR_BITS, /* stops here */ 193 194 }; 194 195 ··· 242 241 #define REQ_KERNEL (1ULL << __REQ_KERNEL) 243 242 #define REQ_PM (1ULL << __REQ_PM) 244 243 #define REQ_END (1ULL << __REQ_END) 244 + #define REQ_HASHED (1ULL << __REQ_HASHED) 245 245 246 246 #endif /* __LINUX_BLK_TYPES_H */
+12 -1
include/linux/blkdev.h
··· 118 118 struct bio *bio; 119 119 struct bio *biotail; 120 120 121 - struct hlist_node hash; /* merge hash */ 121 + /* 122 + * The hash is used inside the scheduler, and killed once the 123 + * request reaches the dispatch list. The ipi_list is only used 124 + * to queue the request for softirq completion, which is long 125 + * after the request has been unhashed (and even removed from 126 + * the dispatch list). 127 + */ 128 + union { 129 + struct hlist_node hash; /* merge hash */ 130 + struct list_head ipi_list; 131 + }; 132 + 122 133 /* 123 134 * The rb_node is only used inside the io scheduler, requests 124 135 * are pruned when moved to the dispatch queue. So let the
+2 -2
include/scsi/scsi_device.h
··· 423 423 extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, 424 424 int data_direction, void *buffer, unsigned bufflen, 425 425 unsigned char *sense, int timeout, int retries, 426 - int flag, int *resid); 426 + u64 flags, int *resid); 427 427 extern int scsi_execute_req_flags(struct scsi_device *sdev, 428 428 const unsigned char *cmd, int data_direction, void *buffer, 429 429 unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, 430 - int retries, int *resid, int flags); 430 + int retries, int *resid, u64 flags); 431 431 static inline int scsi_execute_req(struct scsi_device *sdev, 432 432 const unsigned char *cmd, int data_direction, void *buffer, 433 433 unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,