Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull more block layer patches from Jens Axboe:
"A few later arrivers that I didn't fold into the first pull request,
so we had a chance to run some testing. This contains:

- NVMe:
- Set of fixes from Keith
- 4.4 and earlier gcc build fix from Andrew

- small set of xen-blk{back,front} fixes from Bob Liu.

- warnings fix for bogus inline statement in I_BDEV() from Geert.

- error code fixup for SG_IO ioctl from Paolo Bonzini"

* 'for-linus' of git://git.kernel.dk/linux-block:
drivers/block/nvme-core.c: fix build with gcc-4.4.4
bdi: Remove "inline" keyword from exported I_BDEV() implementation
block: fix bogus EFAULT error from SG_IO ioctl
NVMe: Fix filesystem deadlock on removal
NVMe: Failed controller initialization fixes
NVMe: Unify controller probe and resume
NVMe: Don't use fake status on cancelled command
NVMe: Fix device cleanup on initialization failure
drivers: xen-blkfront: only talk_to_blkback() when in XenbusStateInitialising
xen/block: add multi-page ring support
driver: xen-blkfront: move talk_to_blkback to a more suitable place
drivers: xen-blkback: delay pending_req allocation to connect_ring

+308 -171
+2 -2
block/scsi_ioctl.c
··· 326 326 goto out_put_request; 327 327 } 328 328 329 - ret = -EFAULT; 330 - if (blk_fill_sghdr_rq(q, rq, hdr, mode)) 329 + ret = blk_fill_sghdr_rq(q, rq, hdr, mode); 330 + if (ret < 0) 331 331 goto out_free_cdb; 332 332 333 333 ret = 0;
+80 -58
drivers/block/nvme-core.c
··· 193 193 return 0; 194 194 } 195 195 196 + static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) 197 + { 198 + struct nvme_queue *nvmeq = hctx->driver_data; 199 + 200 + nvmeq->tags = NULL; 201 + } 202 + 196 203 static int nvme_admin_init_request(void *data, struct request *req, 197 204 unsigned int hctx_idx, unsigned int rq_idx, 198 205 unsigned int numa_node) ··· 613 606 return; 614 607 } 615 608 if (req->cmd_type == REQ_TYPE_DRV_PRIV) { 616 - req->errors = status; 609 + if (cmd_rq->ctx == CMD_CTX_CANCELLED) 610 + req->errors = -EINTR; 611 + else 612 + req->errors = status; 617 613 } else { 618 614 req->errors = nvme_error_status(status); 619 615 } ··· 1171 1161 1172 1162 int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id) 1173 1163 { 1174 - struct nvme_command c = { 1175 - .identify.opcode = nvme_admin_identify, 1176 - .identify.cns = cpu_to_le32(1), 1177 - }; 1164 + struct nvme_command c = { }; 1178 1165 int error; 1166 + 1167 + /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ 1168 + c.identify.opcode = nvme_admin_identify; 1169 + c.identify.cns = cpu_to_le32(1); 1179 1170 1180 1171 *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); 1181 1172 if (!*id) ··· 1192 1181 int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, 1193 1182 struct nvme_id_ns **id) 1194 1183 { 1195 - struct nvme_command c = { 1196 - .identify.opcode = nvme_admin_identify, 1197 - .identify.nsid = cpu_to_le32(nsid), 1198 - }; 1184 + struct nvme_command c = { }; 1199 1185 int error; 1186 + 1187 + /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ 1188 + c.identify.opcode = nvme_admin_identify, 1189 + c.identify.nsid = cpu_to_le32(nsid), 1200 1190 1201 1191 *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); 1202 1192 if (!*id) ··· 1242 1230 1243 1231 int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log) 1244 1232 { 1245 - struct nvme_command c = { 1246 - .common.opcode = nvme_admin_get_log_page, 1247 - .common.nsid = cpu_to_le32(0xFFFFFFFF), 1248 - .common.cdw10[0] = cpu_to_le32( 1233 + struct nvme_command c = { }; 1234 + int error; 1235 + 1236 + c.common.opcode = nvme_admin_get_log_page, 1237 + c.common.nsid = cpu_to_le32(0xFFFFFFFF), 1238 + c.common.cdw10[0] = cpu_to_le32( 1249 1239 (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) | 1250 1240 NVME_LOG_SMART), 1251 - }; 1252 - int error; 1253 1241 1254 1242 *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL); 1255 1243 if (!*log) ··· 1618 1606 .queue_rq = nvme_queue_rq, 1619 1607 .map_queue = blk_mq_map_queue, 1620 1608 .init_hctx = nvme_admin_init_hctx, 1609 + .exit_hctx = nvme_admin_exit_hctx, 1621 1610 .init_request = nvme_admin_init_request, 1622 1611 .timeout = nvme_timeout, 1623 1612 }; ··· 1661 1648 } 1662 1649 if (!blk_get_queue(dev->admin_q)) { 1663 1650 nvme_dev_remove_admin(dev); 1651 + dev->admin_q = NULL; 1664 1652 return -ENODEV; 1665 1653 } 1666 1654 } else ··· 2363 2349 } 2364 2350 kfree(ctrl); 2365 2351 2366 - dev->tagset.ops = &nvme_mq_ops; 2367 - dev->tagset.nr_hw_queues = dev->online_queues - 1; 2368 - dev->tagset.timeout = NVME_IO_TIMEOUT; 2369 - dev->tagset.numa_node = dev_to_node(dev->dev); 2370 - dev->tagset.queue_depth = 2352 + if (!dev->tagset.tags) { 2353 + dev->tagset.ops = &nvme_mq_ops; 2354 + dev->tagset.nr_hw_queues = dev->online_queues - 1; 2355 + dev->tagset.timeout = NVME_IO_TIMEOUT; 2356 + dev->tagset.numa_node = dev_to_node(dev->dev); 2357 + dev->tagset.queue_depth = 2371 2358 min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; 2372 - dev->tagset.cmd_size = nvme_cmd_size(dev); 2373 - dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; 2374 - dev->tagset.driver_data = dev; 2359 + dev->tagset.cmd_size = nvme_cmd_size(dev); 2360 + dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; 2361 + dev->tagset.driver_data = dev; 2375 2362 2376 - if (blk_mq_alloc_tag_set(&dev->tagset)) 2377 - return 0; 2378 - 2363 + if (blk_mq_alloc_tag_set(&dev->tagset)) 2364 + return 0; 2365 + } 2379 2366 schedule_work(&dev->scan_work); 2380 2367 return 0; 2381 2368 } ··· 2749 2734 put_device(dev->device); 2750 2735 nvme_free_namespaces(dev); 2751 2736 nvme_release_instance(dev); 2752 - blk_mq_free_tag_set(&dev->tagset); 2753 - blk_put_queue(dev->admin_q); 2737 + if (dev->tagset.tags) 2738 + blk_mq_free_tag_set(&dev->tagset); 2739 + if (dev->admin_q) 2740 + blk_put_queue(dev->admin_q); 2754 2741 kfree(dev->queues); 2755 2742 kfree(dev->entry); 2756 2743 kfree(dev); ··· 2883 2866 2884 2867 free_tags: 2885 2868 nvme_dev_remove_admin(dev); 2869 + blk_put_queue(dev->admin_q); 2870 + dev->admin_q = NULL; 2871 + dev->queues[0]->tags = NULL; 2886 2872 disable: 2887 2873 nvme_disable_queue(dev, 0); 2888 2874 nvme_dev_list_remove(dev); ··· 2927 2907 spin_unlock(&dev_list_lock); 2928 2908 } else { 2929 2909 nvme_unfreeze_queues(dev); 2930 - schedule_work(&dev->scan_work); 2910 + nvme_dev_add(dev); 2931 2911 nvme_set_irq_hints(dev); 2932 2912 } 2933 2913 return 0; 2934 2914 } 2935 2915 2916 + static void nvme_dead_ctrl(struct nvme_dev *dev) 2917 + { 2918 + dev_warn(dev->dev, "Device failed to resume\n"); 2919 + kref_get(&dev->kref); 2920 + if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", 2921 + dev->instance))) { 2922 + dev_err(dev->dev, 2923 + "Failed to start controller remove task\n"); 2924 + kref_put(&dev->kref, nvme_free_dev); 2925 + } 2926 + } 2927 + 2936 2928 static void nvme_dev_reset(struct nvme_dev *dev) 2937 2929 { 2930 + bool in_probe = work_busy(&dev->probe_work); 2931 + 2938 2932 nvme_dev_shutdown(dev); 2939 - if (nvme_dev_resume(dev)) { 2940 - dev_warn(dev->dev, "Device failed to resume\n"); 2941 - kref_get(&dev->kref); 2942 - if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", 2943 - dev->instance))) { 2944 - dev_err(dev->dev, 2945 - "Failed to start controller remove task\n"); 2946 - kref_put(&dev->kref, nvme_free_dev); 2947 - } 2933 + 2934 + /* Synchronize with device probe so that work will see failure status 2935 + * and exit gracefully without trying to schedule another reset */ 2936 + flush_work(&dev->probe_work); 2937 + 2938 + /* Fail this device if reset occured during probe to avoid 2939 + * infinite initialization loops. */ 2940 + if (in_probe) { 2941 + nvme_dead_ctrl(dev); 2942 + return; 2948 2943 } 2944 + /* Schedule device resume asynchronously so the reset work is available 2945 + * to cleanup errors that may occur during reinitialization */ 2946 + schedule_work(&dev->probe_work); 2949 2947 } 2950 2948 2951 2949 static void nvme_reset_failed_dev(struct work_struct *ws) ··· 2995 2957 2996 2958 if (!ret) { 2997 2959 flush_work(&dev->reset_work); 2960 + flush_work(&dev->probe_work); 2998 2961 return 0; 2999 2962 } 3000 2963 ··· 3092 3053 static void nvme_async_probe(struct work_struct *work) 3093 3054 { 3094 3055 struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work); 3095 - int result; 3096 3056 3097 - result = nvme_dev_start(dev); 3098 - if (result) 3099 - goto reset; 3100 - 3101 - if (dev->online_queues > 1) 3102 - result = nvme_dev_add(dev); 3103 - if (result) 3104 - goto reset; 3105 - 3106 - nvme_set_irq_hints(dev); 3107 - return; 3108 - reset: 3109 - spin_lock(&dev_list_lock); 3110 - if (!work_busy(&dev->reset_work)) { 3111 - dev->reset_workfn = nvme_reset_failed_dev; 3112 - queue_work(nvme_workq, &dev->reset_work); 3113 - } 3114 - spin_unlock(&dev_list_lock); 3057 + if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work)) 3058 + nvme_dead_ctrl(dev); 3115 3059 } 3116 3060 3117 3061 static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) ··· 3126 3104 flush_work(&dev->reset_work); 3127 3105 flush_work(&dev->scan_work); 3128 3106 device_remove_file(dev->device, &dev_attr_reset_controller); 3129 - nvme_dev_shutdown(dev); 3130 3107 nvme_dev_remove(dev); 3108 + nvme_dev_shutdown(dev); 3131 3109 nvme_dev_remove_admin(dev); 3132 3110 device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance)); 3133 3111 nvme_free_queues(dev, 0);
+13
drivers/block/xen-blkback/blkback.c
··· 84 84 "Maximum number of grants to map persistently"); 85 85 86 86 /* 87 + * Maximum order of pages to be used for the shared ring between front and 88 + * backend, 4KB page granularity is used. 89 + */ 90 + unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER; 91 + module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); 92 + MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); 93 + /* 87 94 * The LRU mechanism to clean the lists of persistent grants needs to 88 95 * be executed periodically. The time interval between consecutive executions 89 96 * of the purge mechanism is set in ms. ··· 1444 1437 1445 1438 if (!xen_domain()) 1446 1439 return -ENODEV; 1440 + 1441 + if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) { 1442 + pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", 1443 + xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER); 1444 + xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER; 1445 + } 1447 1446 1448 1447 rc = xen_blkif_interface_init(); 1449 1448 if (rc)
+3 -1
drivers/block/xen-blkback/common.h
··· 44 44 #include <xen/interface/io/blkif.h> 45 45 #include <xen/interface/io/protocols.h> 46 46 47 + extern unsigned int xen_blkif_max_ring_order; 47 48 /* 48 49 * This is the maximum number of segments that would be allowed in indirect 49 50 * requests. This value will also be passed to the frontend. ··· 249 248 #define PERSISTENT_GNT_WAS_ACTIVE 1 250 249 251 250 /* Number of requests that we can fit in a ring */ 252 - #define XEN_BLKIF_REQS 32 251 + #define XEN_BLKIF_REQS_PER_PAGE 32 253 252 254 253 struct persistent_gnt { 255 254 struct page *page; ··· 321 320 struct work_struct free_work; 322 321 /* Thread shutdown wait queue. */ 323 322 wait_queue_head_t shutdown_wq; 323 + unsigned int nr_ring_pages; 324 324 }; 325 325 326 326 struct seg_buf {
+105 -62
drivers/block/xen-blkback/xenbus.c
··· 25 25 26 26 /* Enlarge the array size in order to fully show blkback name. */ 27 27 #define BLKBACK_NAME_LEN (20) 28 + #define RINGREF_NAME_LEN (20) 28 29 29 30 struct backend_info { 30 31 struct xenbus_device *dev; ··· 125 124 static struct xen_blkif *xen_blkif_alloc(domid_t domid) 126 125 { 127 126 struct xen_blkif *blkif; 128 - struct pending_req *req, *n; 129 - int i, j; 130 127 131 128 BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST); 132 129 ··· 150 151 151 152 INIT_LIST_HEAD(&blkif->pending_free); 152 153 INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); 153 - 154 - for (i = 0; i < XEN_BLKIF_REQS; i++) { 155 - req = kzalloc(sizeof(*req), GFP_KERNEL); 156 - if (!req) 157 - goto fail; 158 - list_add_tail(&req->free_list, 159 - &blkif->pending_free); 160 - for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { 161 - req->segments[j] = kzalloc(sizeof(*req->segments[0]), 162 - GFP_KERNEL); 163 - if (!req->segments[j]) 164 - goto fail; 165 - } 166 - for (j = 0; j < MAX_INDIRECT_PAGES; j++) { 167 - req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]), 168 - GFP_KERNEL); 169 - if (!req->indirect_pages[j]) 170 - goto fail; 171 - } 172 - } 173 154 spin_lock_init(&blkif->pending_free_lock); 174 155 init_waitqueue_head(&blkif->pending_free_wq); 175 156 init_waitqueue_head(&blkif->shutdown_wq); 176 157 177 158 return blkif; 178 - 179 - fail: 180 - list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { 181 - list_del(&req->free_list); 182 - for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { 183 - if (!req->segments[j]) 184 - break; 185 - kfree(req->segments[j]); 186 - } 187 - for (j = 0; j < MAX_INDIRECT_PAGES; j++) { 188 - if (!req->indirect_pages[j]) 189 - break; 190 - kfree(req->indirect_pages[j]); 191 - } 192 - kfree(req); 193 - } 194 - 195 - kmem_cache_free(xen_blkif_cachep, blkif); 196 - 197 - return ERR_PTR(-ENOMEM); 198 159 } 199 160 200 - static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref, 201 - unsigned int evtchn) 161 + static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, 162 + unsigned int nr_grefs, unsigned int evtchn) 202 163 { 203 164 int err; 204 165 ··· 166 207 if (blkif->irq) 167 208 return 0; 168 209 169 - err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1, 210 + err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs, 170 211 &blkif->blk_ring); 171 212 if (err < 0) 172 213 return err; ··· 176 217 { 177 218 struct blkif_sring *sring; 178 219 sring = (struct blkif_sring *)blkif->blk_ring; 179 - BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); 220 + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs); 180 221 break; 181 222 } 182 223 case BLKIF_PROTOCOL_X86_32: 183 224 { 184 225 struct blkif_x86_32_sring *sring_x86_32; 185 226 sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; 186 - BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); 227 + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs); 187 228 break; 188 229 } 189 230 case BLKIF_PROTOCOL_X86_64: 190 231 { 191 232 struct blkif_x86_64_sring *sring_x86_64; 192 233 sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; 193 - BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); 234 + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs); 194 235 break; 195 236 } 196 237 default: ··· 271 312 i++; 272 313 } 273 314 274 - WARN_ON(i != XEN_BLKIF_REQS); 315 + WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); 275 316 276 317 kmem_cache_free(xen_blkif_cachep, blkif); 277 318 } ··· 556 597 if (err) 557 598 goto fail; 558 599 600 + err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u", 601 + xen_blkif_max_ring_order); 602 + if (err) 603 + pr_warn("%s write out 'max-ring-page-order' failed\n", __func__); 604 + 559 605 err = xenbus_switch_state(dev, XenbusStateInitWait); 560 606 if (err) 561 607 goto fail; ··· 824 860 static int connect_ring(struct backend_info *be) 825 861 { 826 862 struct xenbus_device *dev = be->dev; 827 - unsigned long ring_ref; 828 - unsigned int evtchn; 863 + unsigned int ring_ref[XENBUS_MAX_RING_PAGES]; 864 + unsigned int evtchn, nr_grefs, ring_page_order; 829 865 unsigned int pers_grants; 830 866 char protocol[64] = ""; 831 - int err; 867 + struct pending_req *req, *n; 868 + int err, i, j; 832 869 833 870 pr_debug("%s %s\n", __func__, dev->otherend); 834 871 835 - err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", 836 - &ring_ref, "event-channel", "%u", &evtchn, NULL); 837 - if (err) { 838 - xenbus_dev_fatal(dev, err, 839 - "reading %s/ring-ref and event-channel", 872 + err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u", 873 + &evtchn); 874 + if (err != 1) { 875 + err = -EINVAL; 876 + xenbus_dev_fatal(dev, err, "reading %s/event-channel", 840 877 dev->otherend); 841 878 return err; 879 + } 880 + pr_info("event-channel %u\n", evtchn); 881 + 882 + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", 883 + &ring_page_order); 884 + if (err != 1) { 885 + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", 886 + "%u", &ring_ref[0]); 887 + if (err != 1) { 888 + err = -EINVAL; 889 + xenbus_dev_fatal(dev, err, "reading %s/ring-ref", 890 + dev->otherend); 891 + return err; 892 + } 893 + nr_grefs = 1; 894 + pr_info("%s:using single page: ring-ref %d\n", dev->otherend, 895 + ring_ref[0]); 896 + } else { 897 + unsigned int i; 898 + 899 + if (ring_page_order > xen_blkif_max_ring_order) { 900 + err = -EINVAL; 901 + xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d", 902 + dev->otherend, ring_page_order, 903 + xen_blkif_max_ring_order); 904 + return err; 905 + } 906 + 907 + nr_grefs = 1 << ring_page_order; 908 + for (i = 0; i < nr_grefs; i++) { 909 + char ring_ref_name[RINGREF_NAME_LEN]; 910 + 911 + snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); 912 + err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name, 913 + "%u", &ring_ref[i]); 914 + if (err != 1) { 915 + err = -EINVAL; 916 + xenbus_dev_fatal(dev, err, "reading %s/%s", 917 + dev->otherend, ring_ref_name); 918 + return err; 919 + } 920 + pr_info("ring-ref%u: %u\n", i, ring_ref[i]); 921 + } 842 922 } 843 923 844 924 be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; ··· 908 900 909 901 be->blkif->vbd.feature_gnt_persistent = pers_grants; 910 902 be->blkif->vbd.overflow_max_grants = 0; 903 + be->blkif->nr_ring_pages = nr_grefs; 911 904 912 - pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n", 913 - ring_ref, evtchn, be->blkif->blk_protocol, protocol, 905 + pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n", 906 + nr_grefs, evtchn, be->blkif->blk_protocol, protocol, 914 907 pers_grants ? "persistent grants" : ""); 915 908 909 + for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) { 910 + req = kzalloc(sizeof(*req), GFP_KERNEL); 911 + if (!req) 912 + goto fail; 913 + list_add_tail(&req->free_list, &be->blkif->pending_free); 914 + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { 915 + req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL); 916 + if (!req->segments[j]) 917 + goto fail; 918 + } 919 + for (j = 0; j < MAX_INDIRECT_PAGES; j++) { 920 + req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]), 921 + GFP_KERNEL); 922 + if (!req->indirect_pages[j]) 923 + goto fail; 924 + } 925 + } 926 + 916 927 /* Map the shared frame, irq etc. */ 917 - err = xen_blkif_map(be->blkif, ring_ref, evtchn); 928 + err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn); 918 929 if (err) { 919 - xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", 920 - ring_ref, evtchn); 930 + xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn); 921 931 return err; 922 932 } 923 933 924 934 return 0; 935 + 936 + fail: 937 + list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) { 938 + list_del(&req->free_list); 939 + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { 940 + if (!req->segments[j]) 941 + break; 942 + kfree(req->segments[j]); 943 + } 944 + for (j = 0; j < MAX_INDIRECT_PAGES; j++) { 945 + if (!req->indirect_pages[j]) 946 + break; 947 + kfree(req->indirect_pages[j]); 948 + } 949 + kfree(req); 950 + } 951 + return -ENOMEM; 925 952 } 926 953 927 954 static const struct xenbus_device_id xen_blkbk_ids[] = {
+104 -47
drivers/block/xen-blkfront.c
··· 98 98 module_param_named(max, xen_blkif_max_segments, int, S_IRUGO); 99 99 MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)"); 100 100 101 - #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) 101 + /* 102 + * Maximum order of pages to be used for the shared ring between front and 103 + * backend, 4KB page granularity is used. 104 + */ 105 + static unsigned int xen_blkif_max_ring_order; 106 + module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); 107 + MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); 108 + 109 + #define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * (info)->nr_ring_pages) 110 + #define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES) 111 + /* 112 + * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19 113 + * characters are enough. Define to 20 to keep consist with backend. 114 + */ 115 + #define RINGREF_NAME_LEN (20) 102 116 103 117 /* 104 118 * We have one of these per vbd, whether ide, scsi or 'other'. They ··· 128 114 int vdevice; 129 115 blkif_vdev_t handle; 130 116 enum blkif_state connected; 131 - int ring_ref; 117 + int ring_ref[XENBUS_MAX_RING_PAGES]; 118 + unsigned int nr_ring_pages; 132 119 struct blkif_front_ring ring; 133 120 unsigned int evtchn, irq; 134 121 struct request_queue *rq; 135 122 struct work_struct work; 136 123 struct gnttab_free_callback callback; 137 - struct blk_shadow shadow[BLK_RING_SIZE]; 124 + struct blk_shadow shadow[BLK_MAX_RING_SIZE]; 138 125 struct list_head grants; 139 126 struct list_head indirect_pages; 140 127 unsigned int persistent_gnts_c; ··· 154 139 static unsigned long *minors; 155 140 static DEFINE_SPINLOCK(minor_lock); 156 141 157 - #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 158 - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) 159 142 #define GRANT_INVALID_REF 0 160 143 161 144 #define PARTS_PER_DISK 16 ··· 183 170 static int get_id_from_freelist(struct blkfront_info *info) 184 171 { 185 172 unsigned long free = info->shadow_free; 186 - BUG_ON(free >= BLK_RING_SIZE); 173 + BUG_ON(free >= BLK_RING_SIZE(info)); 187 174 info->shadow_free = info->shadow[free].req.u.rw.id; 188 175 info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ 189 176 return free; ··· 996 983 } 997 984 } 998 985 999 - for (i = 0; i < BLK_RING_SIZE; i++) { 986 + for (i = 0; i < BLK_RING_SIZE(info); i++) { 1000 987 /* 1001 988 * Clear persistent grants present in requests already 1002 989 * on the shared ring ··· 1046 1033 flush_work(&info->work); 1047 1034 1048 1035 /* Free resources associated with old device channel. */ 1049 - if (info->ring_ref != GRANT_INVALID_REF) { 1050 - gnttab_end_foreign_access(info->ring_ref, 0, 1051 - (unsigned long)info->ring.sring); 1052 - info->ring_ref = GRANT_INVALID_REF; 1053 - info->ring.sring = NULL; 1036 + for (i = 0; i < info->nr_ring_pages; i++) { 1037 + if (info->ring_ref[i] != GRANT_INVALID_REF) { 1038 + gnttab_end_foreign_access(info->ring_ref[i], 0, 0); 1039 + info->ring_ref[i] = GRANT_INVALID_REF; 1040 + } 1054 1041 } 1042 + free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE)); 1043 + info->ring.sring = NULL; 1044 + 1055 1045 if (info->irq) 1056 1046 unbind_from_irqhandler(info->irq, info); 1057 1047 info->evtchn = info->irq = 0; ··· 1173 1157 * never have given to it (we stamp it up to BLK_RING_SIZE - 1174 1158 * look in get_id_from_freelist. 1175 1159 */ 1176 - if (id >= BLK_RING_SIZE) { 1160 + if (id >= BLK_RING_SIZE(info)) { 1177 1161 WARN(1, "%s: response to %s has incorrect id (%ld)\n", 1178 1162 info->gd->disk_name, op_name(bret->operation), id); 1179 1163 /* We can't safely get the 'struct request' as ··· 1261 1245 struct blkfront_info *info) 1262 1246 { 1263 1247 struct blkif_sring *sring; 1264 - grant_ref_t gref; 1265 - int err; 1248 + int err, i; 1249 + unsigned long ring_size = info->nr_ring_pages * PAGE_SIZE; 1250 + grant_ref_t gref[XENBUS_MAX_RING_PAGES]; 1266 1251 1267 - info->ring_ref = GRANT_INVALID_REF; 1252 + for (i = 0; i < info->nr_ring_pages; i++) 1253 + info->ring_ref[i] = GRANT_INVALID_REF; 1268 1254 1269 - sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); 1255 + sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, 1256 + get_order(ring_size)); 1270 1257 if (!sring) { 1271 1258 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); 1272 1259 return -ENOMEM; 1273 1260 } 1274 1261 SHARED_RING_INIT(sring); 1275 - FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); 1262 + FRONT_RING_INIT(&info->ring, sring, ring_size); 1276 1263 1277 - err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref); 1264 + err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref); 1278 1265 if (err < 0) { 1279 - free_page((unsigned long)sring); 1266 + free_pages((unsigned long)sring, get_order(ring_size)); 1280 1267 info->ring.sring = NULL; 1281 1268 goto fail; 1282 1269 } 1283 - info->ring_ref = gref; 1270 + for (i = 0; i < info->nr_ring_pages; i++) 1271 + info->ring_ref[i] = gref[i]; 1284 1272 1285 1273 err = xenbus_alloc_evtchn(dev, &info->evtchn); 1286 1274 if (err) ··· 1312 1292 { 1313 1293 const char *message = NULL; 1314 1294 struct xenbus_transaction xbt; 1315 - int err; 1295 + int err, i; 1296 + unsigned int max_page_order = 0; 1297 + unsigned int ring_page_order = 0; 1298 + 1299 + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, 1300 + "max-ring-page-order", "%u", &max_page_order); 1301 + if (err != 1) 1302 + info->nr_ring_pages = 1; 1303 + else { 1304 + ring_page_order = min(xen_blkif_max_ring_order, max_page_order); 1305 + info->nr_ring_pages = 1 << ring_page_order; 1306 + } 1316 1307 1317 1308 /* Create shared ring, alloc event channel. */ 1318 1309 err = setup_blkring(dev, info); ··· 1337 1306 goto destroy_blkring; 1338 1307 } 1339 1308 1340 - err = xenbus_printf(xbt, dev->nodename, 1341 - "ring-ref", "%u", info->ring_ref); 1342 - if (err) { 1343 - message = "writing ring-ref"; 1344 - goto abort_transaction; 1309 + if (info->nr_ring_pages == 1) { 1310 + err = xenbus_printf(xbt, dev->nodename, 1311 + "ring-ref", "%u", info->ring_ref[0]); 1312 + if (err) { 1313 + message = "writing ring-ref"; 1314 + goto abort_transaction; 1315 + } 1316 + } else { 1317 + err = xenbus_printf(xbt, dev->nodename, 1318 + "ring-page-order", "%u", ring_page_order); 1319 + if (err) { 1320 + message = "writing ring-page-order"; 1321 + goto abort_transaction; 1322 + } 1323 + 1324 + for (i = 0; i < info->nr_ring_pages; i++) { 1325 + char ring_ref_name[RINGREF_NAME_LEN]; 1326 + 1327 + snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); 1328 + err = xenbus_printf(xbt, dev->nodename, ring_ref_name, 1329 + "%u", info->ring_ref[i]); 1330 + if (err) { 1331 + message = "writing ring-ref"; 1332 + goto abort_transaction; 1333 + } 1334 + } 1345 1335 } 1346 1336 err = xenbus_printf(xbt, dev->nodename, 1347 1337 "event-channel", "%u", info->evtchn); ··· 1390 1338 goto destroy_blkring; 1391 1339 } 1392 1340 1341 + for (i = 0; i < BLK_RING_SIZE(info); i++) 1342 + info->shadow[i].req.u.rw.id = i+1; 1343 + info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; 1393 1344 xenbus_switch_state(dev, XenbusStateInitialised); 1394 1345 1395 1346 return 0; ··· 1416 1361 static int blkfront_probe(struct xenbus_device *dev, 1417 1362 const struct xenbus_device_id *id) 1418 1363 { 1419 - int err, vdevice, i; 1364 + int err, vdevice; 1420 1365 struct blkfront_info *info; 1421 1366 1422 1367 /* FIXME: Use dynamic device id if this is not set. */ ··· 1477 1422 info->connected = BLKIF_STATE_DISCONNECTED; 1478 1423 INIT_WORK(&info->work, blkif_restart_queue); 1479 1424 1480 - for (i = 0; i < BLK_RING_SIZE; i++) 1481 - info->shadow[i].req.u.rw.id = i+1; 1482 - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; 1483 - 1484 1425 /* Front end dir is a number, which is used as the id. */ 1485 1426 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); 1486 1427 dev_set_drvdata(&dev->dev, info); 1487 - 1488 - err = talk_to_blkback(dev, info); 1489 - if (err) { 1490 - kfree(info); 1491 - dev_set_drvdata(&dev->dev, NULL); 1492 - return err; 1493 - } 1494 1428 1495 1429 return 0; 1496 1430 } ··· 1520 1476 1521 1477 /* Stage 2: Set up free list. */ 1522 1478 memset(&info->shadow, 0, sizeof(info->shadow)); 1523 - for (i = 0; i < BLK_RING_SIZE; i++) 1479 + for (i = 0; i < BLK_RING_SIZE(info); i++) 1524 1480 info->shadow[i].req.u.rw.id = i+1; 1525 1481 info->shadow_free = info->ring.req_prod_pvt; 1526 - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; 1482 + info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; 1527 1483 1528 1484 rc = blkfront_setup_indirect(info); 1529 1485 if (rc) { ··· 1535 1491 blk_queue_max_segments(info->rq, segs); 1536 1492 bio_list_init(&bio_list); 1537 1493 INIT_LIST_HEAD(&requests); 1538 - for (i = 0; i < BLK_RING_SIZE; i++) { 1494 + for (i = 0; i < BLK_RING_SIZE(info); i++) { 1539 1495 /* Not in use? */ 1540 1496 if (!copy[i].request) 1541 1497 continue; ··· 1741 1697 segs = info->max_indirect_segments; 1742 1698 } 1743 1699 1744 - err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE); 1700 + err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info)); 1745 1701 if (err) 1746 1702 goto out_of_memory; 1747 1703 ··· 1751 1707 * grants, we need to allocate a set of pages that can be 1752 1708 * used for mapping indirect grefs 1753 1709 */ 1754 - int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE; 1710 + int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE(info); 1755 1711 1756 1712 BUG_ON(!list_empty(&info->indirect_pages)); 1757 1713 for (i = 0; i < num; i++) { ··· 1762 1718 } 1763 1719 } 1764 1720 1765 - for (i = 0; i < BLK_RING_SIZE; i++) { 1721 + for (i = 0; i < BLK_RING_SIZE(info); i++) { 1766 1722 info->shadow[i].grants_used = kzalloc( 1767 1723 sizeof(info->shadow[i].grants_used[0]) * segs, 1768 1724 GFP_NOIO); ··· 1784 1740 return 0; 1785 1741 1786 1742 out_of_memory: 1787 - for (i = 0; i < BLK_RING_SIZE; i++) { 1743 + for (i = 0; i < BLK_RING_SIZE(info); i++) { 1788 1744 kfree(info->shadow[i].grants_used); 1789 1745 info->shadow[i].grants_used = NULL; 1790 1746 kfree(info->shadow[i].sg); ··· 1950 1906 dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state); 1951 1907 1952 1908 switch (backend_state) { 1953 - case XenbusStateInitialising: 1954 1909 case XenbusStateInitWait: 1910 + if (dev->state != XenbusStateInitialising) 1911 + break; 1912 + if (talk_to_blkback(dev, info)) { 1913 + kfree(info); 1914 + dev_set_drvdata(&dev->dev, NULL); 1915 + break; 1916 + } 1917 + case XenbusStateInitialising: 1955 1918 case XenbusStateInitialised: 1956 1919 case XenbusStateReconfiguring: 1957 1920 case XenbusStateReconfigured: ··· 2141 2090 2142 2091 if (!xen_domain()) 2143 2092 return -ENODEV; 2093 + 2094 + if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) { 2095 + pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", 2096 + xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER); 2097 + xen_blkif_max_ring_order = 0; 2098 + } 2144 2099 2145 2100 if (!xen_has_pv_disk_devices()) 2146 2101 return -ENODEV;
+1 -1
fs/block_dev.c
··· 43 43 return container_of(inode, struct bdev_inode, vfs_inode); 44 44 } 45 45 46 - inline struct block_device *I_BDEV(struct inode *inode) 46 + struct block_device *I_BDEV(struct inode *inode) 47 47 { 48 48 return &BDEV_I(inode)->bdev; 49 49 }