Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mmc: bounce requests for simple hosts

Some hosts cannot do scatter/gather in hardware. Since not doing sg
is such a big performance hit, we (optionally) bounce the requests
to a simple linear buffer that we hand over to the driver.

Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>

+208 -15
+18
drivers/mmc/card/Kconfig
··· 14 14 mount the filesystem. Almost everyone wishing MMC support 15 15 should say Y or M here. 16 16 17 + config MMC_BLOCK_BOUNCE 18 + bool "Use bounce buffer for simple hosts" 19 + depends on MMC_BLOCK 20 + default y 21 + help 22 + SD/MMC is a high latency protocol where it is crucial to 23 + send large requests in order to get high performance. Many 24 + controllers, however, are restricted to continuous memory 25 + (i.e. they can't do scatter-gather), something the kernel 26 + rarely can provide. 27 + 28 + Say Y here to help these restricted hosts by bouncing 29 + requests back and forth from a large buffer. You will get 30 + a big performance gain at the cost of up to 64 KiB of 31 + physical memory. 32 + 33 + If unsure, say Y here. 34 +
+6 -1
drivers/mmc/card/block.c
··· 262 262 } 263 263 264 264 brq.data.sg = mq->sg; 265 - brq.data.sg_len = blk_rq_map_sg(req->q, req, brq.data.sg); 265 + brq.data.sg_len = mmc_queue_map_sg(mq); 266 + 267 + mmc_queue_bounce_pre(mq); 266 268 267 269 if (brq.data.blocks != 268 270 (req->nr_sectors >> (md->block_bits - 9))) { ··· 281 279 } 282 280 283 281 mmc_wait_for_req(card->host, &brq.mrq); 282 + 283 + mmc_queue_bounce_post(mq); 284 + 284 285 if (brq.cmd.error) { 285 286 printk(KERN_ERR "%s: error %d sending read/write command\n", 286 287 req->rq_disk->disk_name, brq.cmd.error);
+177 -14
drivers/mmc/card/queue.c
··· 17 17 #include <linux/mmc/host.h> 18 18 #include "queue.h" 19 19 20 + #define MMC_QUEUE_BOUNCESZ 65536 21 + 20 22 #define MMC_QUEUE_SUSPENDED (1 << 0) 21 23 22 24 /* ··· 120 118 struct mmc_host *host = card->host; 121 119 u64 limit = BLK_BOUNCE_HIGH; 122 120 int ret; 121 + unsigned int bouncesz; 123 122 124 123 if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) 125 124 limit = *mmc_dev(host)->dma_mask; ··· 130 127 if (!mq->queue) 131 128 return -ENOMEM; 132 129 133 - blk_queue_prep_rq(mq->queue, mmc_prep_request); 134 - blk_queue_bounce_limit(mq->queue, limit); 135 - blk_queue_max_sectors(mq->queue, host->max_req_size / 512); 136 - blk_queue_max_phys_segments(mq->queue, host->max_phys_segs); 137 - blk_queue_max_hw_segments(mq->queue, host->max_hw_segs); 138 - blk_queue_max_segment_size(mq->queue, host->max_seg_size); 139 - 140 130 mq->queue->queuedata = mq; 141 131 mq->req = NULL; 142 132 143 - mq->sg = kmalloc(sizeof(struct scatterlist) * host->max_phys_segs, 144 - GFP_KERNEL); 145 - if (!mq->sg) { 146 - ret = -ENOMEM; 147 - goto cleanup_queue; 133 + blk_queue_prep_rq(mq->queue, mmc_prep_request); 134 + 135 + #ifdef CONFIG_MMC_BLOCK_BOUNCE 136 + if (host->max_hw_segs == 1) { 137 + bouncesz = MMC_QUEUE_BOUNCESZ; 138 + 139 + if (bouncesz > host->max_req_size) 140 + bouncesz = host->max_req_size; 141 + if (bouncesz > host->max_seg_size) 142 + bouncesz = host->max_seg_size; 143 + 144 + mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL); 145 + if (!mq->bounce_buf) { 146 + printk(KERN_WARNING "%s: unable to allocate " 147 + "bounce buffer\n", mmc_card_name(card)); 148 + } else { 149 + blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); 150 + blk_queue_max_sectors(mq->queue, bouncesz / 512); 151 + blk_queue_max_phys_segments(mq->queue, bouncesz / 512); 152 + blk_queue_max_hw_segments(mq->queue, bouncesz / 512); 153 + blk_queue_max_segment_size(mq->queue, bouncesz); 154 + 155 + mq->sg = kmalloc(sizeof(struct scatterlist), 156 + GFP_KERNEL); 157 + if (!mq->sg) { 158 + ret = -ENOMEM; 159 + goto free_bounce_buf; 160 + } 161 + 162 + mq->bounce_sg = kmalloc(sizeof(struct scatterlist) * 163 + bouncesz / 512, GFP_KERNEL); 164 + if (!mq->bounce_sg) { 165 + ret = -ENOMEM; 166 + goto free_sg; 167 + } 168 + } 169 + } 170 + #endif 171 + 172 + if (!mq->bounce_buf) { 173 + blk_queue_bounce_limit(mq->queue, limit); 174 + blk_queue_max_sectors(mq->queue, host->max_req_size / 512); 175 + blk_queue_max_phys_segments(mq->queue, host->max_phys_segs); 176 + blk_queue_max_hw_segments(mq->queue, host->max_hw_segs); 177 + blk_queue_max_segment_size(mq->queue, host->max_seg_size); 178 + 179 + mq->sg = kmalloc(sizeof(struct scatterlist) * 180 + host->max_phys_segs, GFP_KERNEL); 181 + if (!mq->sg) { 182 + ret = -ENOMEM; 183 + goto cleanup_queue; 184 + } 148 185 } 149 186 150 187 init_MUTEX(&mq->thread_sem); ··· 192 149 mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd"); 193 150 if (IS_ERR(mq->thread)) { 194 151 ret = PTR_ERR(mq->thread); 195 - goto free_sg; 152 + goto free_bounce_sg; 196 153 } 197 154 198 155 return 0; 199 - 156 + free_bounce_sg: 157 + if (mq->bounce_sg) 158 + kfree(mq->bounce_sg); 159 + mq->bounce_sg = NULL; 200 160 free_sg: 201 161 kfree(mq->sg); 202 162 mq->sg = NULL; 163 + free_bounce_buf: 164 + if (mq->bounce_buf) 165 + kfree(mq->bounce_buf); 166 + mq->bounce_buf = NULL; 203 167 cleanup_queue: 204 168 blk_cleanup_queue(mq->queue); 205 169 return ret; ··· 228 178 /* Then terminate our worker thread */ 229 179 kthread_stop(mq->thread); 230 180 181 + if (mq->bounce_sg) 182 + kfree(mq->bounce_sg); 183 + mq->bounce_sg = NULL; 184 + 231 185 kfree(mq->sg); 232 186 mq->sg = NULL; 187 + 188 + if (mq->bounce_buf) 189 + kfree(mq->bounce_buf); 190 + mq->bounce_buf = NULL; 233 191 234 192 blk_cleanup_queue(mq->queue); 235 193 ··· 287 229 blk_start_queue(q); 288 230 spin_unlock_irqrestore(q->queue_lock, flags); 289 231 } 232 + } 233 + 234 + static void copy_sg(struct scatterlist *dst, unsigned int dst_len, 235 + struct scatterlist *src, unsigned int src_len) 236 + { 237 + unsigned int chunk; 238 + char *dst_buf, *src_buf; 239 + unsigned int dst_size, src_size; 240 + 241 + dst_buf = NULL; 242 + src_buf = NULL; 243 + dst_size = 0; 244 + src_size = 0; 245 + 246 + while (src_len) { 247 + BUG_ON(dst_len == 0); 248 + 249 + if (dst_size == 0) { 250 + dst_buf = page_address(dst->page) + dst->offset; 251 + dst_size = dst->length; 252 + } 253 + 254 + if (src_size == 0) { 255 + src_buf = page_address(src->page) + src->offset; 256 + src_size = src->length; 257 + } 258 + 259 + chunk = min(dst_size, src_size); 260 + 261 + memcpy(dst_buf, src_buf, chunk); 262 + 263 + dst_buf += chunk; 264 + src_buf += chunk; 265 + dst_size -= chunk; 266 + src_size -= chunk; 267 + 268 + if (dst_size == 0) { 269 + dst++; 270 + dst_len--; 271 + } 272 + 273 + if (src_size == 0) { 274 + src++; 275 + src_len--; 276 + } 277 + } 278 + } 279 + 280 + unsigned int mmc_queue_map_sg(struct mmc_queue *mq) 281 + { 282 + unsigned int sg_len; 283 + 284 + if (!mq->bounce_buf) 285 + return blk_rq_map_sg(mq->queue, mq->req, mq->sg); 286 + 287 + BUG_ON(!mq->bounce_sg); 288 + 289 + sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg); 290 + 291 + mq->bounce_sg_len = sg_len; 292 + 293 + /* 294 + * Shortcut in the event we only get a single entry. 295 + */ 296 + if (sg_len == 1) { 297 + memcpy(mq->sg, mq->bounce_sg, sizeof(struct scatterlist)); 298 + return 1; 299 + } 300 + 301 + mq->sg[0].page = virt_to_page(mq->bounce_buf); 302 + mq->sg[0].offset = offset_in_page(mq->bounce_buf); 303 + mq->sg[0].length = 0; 304 + 305 + while (sg_len) { 306 + mq->sg[0].length += mq->bounce_sg[sg_len - 1].length; 307 + sg_len--; 308 + } 309 + 310 + return 1; 311 + } 312 + 313 + void mmc_queue_bounce_pre(struct mmc_queue *mq) 314 + { 315 + if (!mq->bounce_buf) 316 + return; 317 + 318 + if (mq->bounce_sg_len == 1) 319 + return; 320 + if (rq_data_dir(mq->req) != WRITE) 321 + return; 322 + 323 + copy_sg(mq->sg, 1, mq->bounce_sg, mq->bounce_sg_len); 324 + } 325 + 326 + void mmc_queue_bounce_post(struct mmc_queue *mq) 327 + { 328 + if (!mq->bounce_buf) 329 + return; 330 + 331 + if (mq->bounce_sg_len == 1) 332 + return; 333 + if (rq_data_dir(mq->req) != READ) 334 + return; 335 + 336 + copy_sg(mq->bounce_sg, mq->bounce_sg_len, mq->sg, 1); 290 337 } 291 338
+7
drivers/mmc/card/queue.h
··· 14 14 void *data; 15 15 struct request_queue *queue; 16 16 struct scatterlist *sg; 17 + char *bounce_buf; 18 + struct scatterlist *bounce_sg; 19 + unsigned int bounce_sg_len; 17 20 }; 18 21 19 22 extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *); 20 23 extern void mmc_cleanup_queue(struct mmc_queue *); 21 24 extern void mmc_queue_suspend(struct mmc_queue *); 22 25 extern void mmc_queue_resume(struct mmc_queue *); 26 + 27 + extern unsigned int mmc_queue_map_sg(struct mmc_queue *); 28 + extern void mmc_queue_bounce_pre(struct mmc_queue *); 29 + extern void mmc_queue_bounce_post(struct mmc_queue *); 23 30 24 31 #endif