Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xen-blkfront: use a different scatterlist for each request

In blkif_queue_request blkfront iterates over the scatterlist in order
to set the segments of the request, and in blkif_completion blkfront
iterates over the raw request, which makes it hard to know the exact
position of the source and destination memory positions.

This can be solved by allocating a scatterlist for each request, that
will be keep until the request is finished, allowing us to copy the
data back to the original memory without having to iterate over the
raw request.

Oracle-Bug: 16660413 - LARGE ASYNCHRONOUS READS APPEAR BROKEN ON 2.6.39-400
CC: stable@vger.kernel.org
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reported-and-Tested-by: Anne Milicia <anne.milicia@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

authored by

Roger Pau Monne and committed by
Konrad Rzeszutek Wilk
b7649158 bb642e83

+18 -25
+18 -25
drivers/block/xen-blkfront.c
··· 76 76 struct request *request; 77 77 struct grant **grants_used; 78 78 struct grant **indirect_grants; 79 + struct scatterlist *sg; 79 80 }; 80 81 81 82 struct split_bio { ··· 114 113 enum blkif_state connected; 115 114 int ring_ref; 116 115 struct blkif_front_ring ring; 117 - struct scatterlist *sg; 118 116 unsigned int evtchn, irq; 119 117 struct request_queue *rq; 120 118 struct work_struct work; ··· 438 438 req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); 439 439 BUG_ON(info->max_indirect_segments && 440 440 req->nr_phys_segments > info->max_indirect_segments); 441 - nseg = blk_rq_map_sg(req->q, req, info->sg); 441 + nseg = blk_rq_map_sg(req->q, req, info->shadow[id].sg); 442 442 ring_req->u.rw.id = id; 443 443 if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) { 444 444 /* ··· 469 469 } 470 470 ring_req->u.rw.nr_segments = nseg; 471 471 } 472 - for_each_sg(info->sg, sg, nseg, i) { 472 + for_each_sg(info->shadow[id].sg, sg, nseg, i) { 473 473 fsect = sg->offset >> 9; 474 474 lsect = fsect + (sg->length >> 9) - 1; 475 475 ··· 914 914 } 915 915 BUG_ON(info->persistent_gnts_c != 0); 916 916 917 - kfree(info->sg); 918 - info->sg = NULL; 919 917 for (i = 0; i < BLK_RING_SIZE; i++) { 920 918 /* 921 919 * Clear persistent grants present in requests already ··· 951 953 info->shadow[i].grants_used = NULL; 952 954 kfree(info->shadow[i].indirect_grants); 953 955 info->shadow[i].indirect_grants = NULL; 956 + kfree(info->shadow[i].sg); 957 + info->shadow[i].sg = NULL; 954 958 } 955 959 956 960 /* No more gnttab callback work. */ ··· 979 979 struct blkif_response *bret) 980 980 { 981 981 int i = 0; 982 - struct bio_vec *bvec; 983 - struct req_iterator iter; 984 - unsigned long flags; 982 + struct scatterlist *sg; 985 983 char *bvec_data; 986 984 void *shared_data; 987 - unsigned int offset = 0; 988 985 int nseg; 989 986 990 987 nseg = s->req.operation == BLKIF_OP_INDIRECT ? ··· 994 997 * than PAGE_SIZE, we have to keep track of the current offset, 995 998 * to be sure we are copying the data from the right shared page. 996 999 */ 997 - rq_for_each_segment(bvec, s->request, iter) { 998 - BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); 999 - if (bvec->bv_offset < offset) 1000 - i++; 1001 - BUG_ON(i >= nseg); 1000 + for_each_sg(s->sg, sg, nseg, i) { 1001 + BUG_ON(sg->offset + sg->length > PAGE_SIZE); 1002 1002 shared_data = kmap_atomic( 1003 1003 pfn_to_page(s->grants_used[i]->pfn)); 1004 - bvec_data = bvec_kmap_irq(bvec, &flags); 1005 - memcpy(bvec_data, shared_data + bvec->bv_offset, 1006 - bvec->bv_len); 1007 - bvec_kunmap_irq(bvec_data, &flags); 1004 + bvec_data = kmap_atomic(sg_page(sg)); 1005 + memcpy(bvec_data + sg->offset, 1006 + shared_data + sg->offset, 1007 + sg->length); 1008 + kunmap_atomic(bvec_data); 1008 1009 kunmap_atomic(shared_data); 1009 - offset = bvec->bv_offset + bvec->bv_len; 1010 1010 } 1011 1011 } 1012 1012 /* Add the persistent grant into the list of free grants */ ··· 1650 1656 xen_blkif_max_segments); 1651 1657 segs = info->max_indirect_segments; 1652 1658 } 1653 - info->sg = kzalloc(sizeof(info->sg[0]) * segs, GFP_KERNEL); 1654 - if (info->sg == NULL) 1655 - goto out_of_memory; 1656 - sg_init_table(info->sg, segs); 1657 1659 1658 1660 err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE); 1659 1661 if (err) ··· 1659 1669 info->shadow[i].grants_used = kzalloc( 1660 1670 sizeof(info->shadow[i].grants_used[0]) * segs, 1661 1671 GFP_NOIO); 1672 + info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * segs, GFP_NOIO); 1662 1673 if (info->max_indirect_segments) 1663 1674 info->shadow[i].indirect_grants = kzalloc( 1664 1675 sizeof(info->shadow[i].indirect_grants[0]) * 1665 1676 INDIRECT_GREFS(segs), 1666 1677 GFP_NOIO); 1667 1678 if ((info->shadow[i].grants_used == NULL) || 1679 + (info->shadow[i].sg == NULL) || 1668 1680 (info->max_indirect_segments && 1669 1681 (info->shadow[i].indirect_grants == NULL))) 1670 1682 goto out_of_memory; 1683 + sg_init_table(info->shadow[i].sg, segs); 1671 1684 } 1672 1685 1673 1686 1674 1687 return 0; 1675 1688 1676 1689 out_of_memory: 1677 - kfree(info->sg); 1678 - info->sg = NULL; 1679 1690 for (i = 0; i < BLK_RING_SIZE; i++) { 1680 1691 kfree(info->shadow[i].grants_used); 1681 1692 info->shadow[i].grants_used = NULL; 1693 + kfree(info->shadow[i].sg); 1694 + info->shadow[i].sg = NULL; 1682 1695 kfree(info->shadow[i].indirect_grants); 1683 1696 info->shadow[i].indirect_grants = NULL; 1684 1697 }