Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mmc: block: Add CQE support

Add CQE support to the block driver, including:
- optionally using DCMD for flush requests
- "manually" issuing discard requests
- issuing read / write requests to the CQE
- supporting block-layer timeouts
- handling recovery
- supporting re-tuning

CQE offers 25% - 50% better random multi-threaded I/O. There is a slight
(e.g. 2%) drop in sequential read speed but no observable change to sequential
write.

CQE automatically sends the commands to complete requests. However it only
supports reads / writes and so-called "direct commands" (DCMD). Furthermore
DCMD is limited to one command at a time, but discards require 3 commands.
That makes issuing discards through CQE very awkward, but some CQE's don't
support DCMD anyway. So for discards, the existing non-CQE approach is
taken, where the mmc core code issues the 3 commands one at a time i.e.
mmc_erase(). Where DCMD is used, is for issuing flushes.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Linus Walleij <linus.walleij@linaro.org>

authored by

Adrian Hunter and committed by
Ulf Hansson
1e8e55b6 81196976

+326 -6
+148 -2
drivers/mmc/core/block.c
··· 112 112 #define MMC_BLK_WRITE BIT(1) 113 113 #define MMC_BLK_DISCARD BIT(2) 114 114 #define MMC_BLK_SECDISCARD BIT(3) 115 + #define MMC_BLK_CQE_RECOVERY BIT(4) 115 116 116 117 /* 117 118 * Only set in main mmc_blk_data associated ··· 1731 1730 *do_data_tag_p = do_data_tag; 1732 1731 } 1733 1732 1733 + #define MMC_CQE_RETRIES 2 1734 + 1735 + static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req) 1736 + { 1737 + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); 1738 + struct mmc_request *mrq = &mqrq->brq.mrq; 1739 + struct request_queue *q = req->q; 1740 + struct mmc_host *host = mq->card->host; 1741 + unsigned long flags; 1742 + bool put_card; 1743 + int err; 1744 + 1745 + mmc_cqe_post_req(host, mrq); 1746 + 1747 + if (mrq->cmd && mrq->cmd->error) 1748 + err = mrq->cmd->error; 1749 + else if (mrq->data && mrq->data->error) 1750 + err = mrq->data->error; 1751 + else 1752 + err = 0; 1753 + 1754 + if (err) { 1755 + if (mqrq->retries++ < MMC_CQE_RETRIES) 1756 + blk_mq_requeue_request(req, true); 1757 + else 1758 + blk_mq_end_request(req, BLK_STS_IOERR); 1759 + } else if (mrq->data) { 1760 + if (blk_update_request(req, BLK_STS_OK, mrq->data->bytes_xfered)) 1761 + blk_mq_requeue_request(req, true); 1762 + else 1763 + __blk_mq_end_request(req, BLK_STS_OK); 1764 + } else { 1765 + blk_mq_end_request(req, BLK_STS_OK); 1766 + } 1767 + 1768 + spin_lock_irqsave(q->queue_lock, flags); 1769 + 1770 + mq->in_flight[mmc_issue_type(mq, req)] -= 1; 1771 + 1772 + put_card = (mmc_tot_in_flight(mq) == 0); 1773 + 1774 + mmc_cqe_check_busy(mq); 1775 + 1776 + spin_unlock_irqrestore(q->queue_lock, flags); 1777 + 1778 + if (!mq->cqe_busy) 1779 + blk_mq_run_hw_queues(q, true); 1780 + 1781 + if (put_card) 1782 + mmc_put_card(mq->card, &mq->ctx); 1783 + } 1784 + 1785 + void mmc_blk_cqe_recovery(struct mmc_queue *mq) 1786 + { 1787 + struct mmc_card *card = mq->card; 1788 + struct mmc_host *host = card->host; 1789 + int err; 1790 + 1791 + pr_debug("%s: CQE recovery start\n", mmc_hostname(host)); 1792 + 1793 + err = mmc_cqe_recovery(host); 1794 + if (err) 1795 + mmc_blk_reset(mq->blkdata, host, MMC_BLK_CQE_RECOVERY); 1796 + else 1797 + mmc_blk_reset_success(mq->blkdata, MMC_BLK_CQE_RECOVERY); 1798 + 1799 + pr_debug("%s: CQE recovery done\n", mmc_hostname(host)); 1800 + } 1801 + 1802 + static void mmc_blk_cqe_req_done(struct mmc_request *mrq) 1803 + { 1804 + struct mmc_queue_req *mqrq = container_of(mrq, struct mmc_queue_req, 1805 + brq.mrq); 1806 + struct request *req = mmc_queue_req_to_req(mqrq); 1807 + struct request_queue *q = req->q; 1808 + struct mmc_queue *mq = q->queuedata; 1809 + 1810 + /* 1811 + * Block layer timeouts race with completions which means the normal 1812 + * completion path cannot be used during recovery. 1813 + */ 1814 + if (mq->in_recovery) 1815 + mmc_blk_cqe_complete_rq(mq, req); 1816 + else 1817 + blk_mq_complete_request(req); 1818 + } 1819 + 1820 + static int mmc_blk_cqe_start_req(struct mmc_host *host, struct mmc_request *mrq) 1821 + { 1822 + mrq->done = mmc_blk_cqe_req_done; 1823 + mrq->recovery_notifier = mmc_cqe_recovery_notifier; 1824 + 1825 + return mmc_cqe_start_req(host, mrq); 1826 + } 1827 + 1828 + static struct mmc_request *mmc_blk_cqe_prep_dcmd(struct mmc_queue_req *mqrq, 1829 + struct request *req) 1830 + { 1831 + struct mmc_blk_request *brq = &mqrq->brq; 1832 + 1833 + memset(brq, 0, sizeof(*brq)); 1834 + 1835 + brq->mrq.cmd = &brq->cmd; 1836 + brq->mrq.tag = req->tag; 1837 + 1838 + return &brq->mrq; 1839 + } 1840 + 1841 + static int mmc_blk_cqe_issue_flush(struct mmc_queue *mq, struct request *req) 1842 + { 1843 + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); 1844 + struct mmc_request *mrq = mmc_blk_cqe_prep_dcmd(mqrq, req); 1845 + 1846 + mrq->cmd->opcode = MMC_SWITCH; 1847 + mrq->cmd->arg = (MMC_SWITCH_MODE_WRITE_BYTE << 24) | 1848 + (EXT_CSD_FLUSH_CACHE << 16) | 1849 + (1 << 8) | 1850 + EXT_CSD_CMD_SET_NORMAL; 1851 + mrq->cmd->flags = MMC_CMD_AC | MMC_RSP_R1B; 1852 + 1853 + return mmc_blk_cqe_start_req(mq->card->host, mrq); 1854 + } 1855 + 1856 + static int mmc_blk_cqe_issue_rw_rq(struct mmc_queue *mq, struct request *req) 1857 + { 1858 + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); 1859 + 1860 + mmc_blk_data_prep(mq, mqrq, 0, NULL, NULL); 1861 + 1862 + return mmc_blk_cqe_start_req(mq->card->host, &mqrq->brq.mrq); 1863 + } 1864 + 1734 1865 static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, 1735 1866 struct mmc_card *card, 1736 1867 int disable_multi, ··· 2171 2038 { 2172 2039 struct mmc_queue *mq = req->q->queuedata; 2173 2040 2174 - mmc_blk_mq_complete_rq(mq, req); 2041 + if (mq->use_cqe) 2042 + mmc_blk_cqe_complete_rq(mq, req); 2043 + else 2044 + mmc_blk_mq_complete_rq(mq, req); 2175 2045 } 2176 2046 2177 2047 static void mmc_blk_mq_poll_completion(struct mmc_queue *mq, ··· 2348 2212 2349 2213 static int mmc_blk_wait_for_idle(struct mmc_queue *mq, struct mmc_host *host) 2350 2214 { 2215 + if (mq->use_cqe) 2216 + return host->cqe_ops->cqe_wait_for_idle(host); 2217 + 2351 2218 return mmc_blk_rw_wait(mq, NULL); 2352 2219 } 2353 2220 ··· 2389 2250 return MMC_REQ_FAILED_TO_START; 2390 2251 } 2391 2252 return MMC_REQ_FINISHED; 2253 + case MMC_ISSUE_DCMD: 2392 2254 case MMC_ISSUE_ASYNC: 2393 2255 switch (req_op(req)) { 2256 + case REQ_OP_FLUSH: 2257 + ret = mmc_blk_cqe_issue_flush(mq, req); 2258 + break; 2394 2259 case REQ_OP_READ: 2395 2260 case REQ_OP_WRITE: 2396 - ret = mmc_blk_mq_issue_rw_rq(mq, req); 2261 + if (mq->use_cqe) 2262 + ret = mmc_blk_cqe_issue_rw_rq(mq, req); 2263 + else 2264 + ret = mmc_blk_mq_issue_rw_rq(mq, req); 2397 2265 break; 2398 2266 default: 2399 2267 WARN_ON_ONCE(1);
+2
drivers/mmc/core/block.h
··· 7 7 8 8 void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req); 9 9 10 + void mmc_blk_cqe_recovery(struct mmc_queue *mq); 11 + 10 12 enum mmc_issued; 11 13 12 14 enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req);
+158 -4
drivers/mmc/core/queue.c
··· 40 40 return BLKPREP_OK; 41 41 } 42 42 43 + static inline bool mmc_cqe_dcmd_busy(struct mmc_queue *mq) 44 + { 45 + /* Allow only 1 DCMD at a time */ 46 + return mq->in_flight[MMC_ISSUE_DCMD]; 47 + } 48 + 49 + void mmc_cqe_check_busy(struct mmc_queue *mq) 50 + { 51 + if ((mq->cqe_busy & MMC_CQE_DCMD_BUSY) && !mmc_cqe_dcmd_busy(mq)) 52 + mq->cqe_busy &= ~MMC_CQE_DCMD_BUSY; 53 + 54 + mq->cqe_busy &= ~MMC_CQE_QUEUE_FULL; 55 + } 56 + 57 + static inline bool mmc_cqe_can_dcmd(struct mmc_host *host) 58 + { 59 + return host->caps2 & MMC_CAP2_CQE_DCMD; 60 + } 61 + 62 + enum mmc_issue_type mmc_cqe_issue_type(struct mmc_host *host, 63 + struct request *req) 64 + { 65 + switch (req_op(req)) { 66 + case REQ_OP_DRV_IN: 67 + case REQ_OP_DRV_OUT: 68 + case REQ_OP_DISCARD: 69 + case REQ_OP_SECURE_ERASE: 70 + return MMC_ISSUE_SYNC; 71 + case REQ_OP_FLUSH: 72 + return mmc_cqe_can_dcmd(host) ? MMC_ISSUE_DCMD : MMC_ISSUE_SYNC; 73 + default: 74 + return MMC_ISSUE_ASYNC; 75 + } 76 + } 77 + 43 78 enum mmc_issue_type mmc_issue_type(struct mmc_queue *mq, struct request *req) 44 79 { 80 + struct mmc_host *host = mq->card->host; 81 + 82 + if (mq->use_cqe) 83 + return mmc_cqe_issue_type(host, req); 84 + 45 85 if (req_op(req) == REQ_OP_READ || req_op(req) == REQ_OP_WRITE) 46 86 return MMC_ISSUE_ASYNC; 47 87 48 88 return MMC_ISSUE_SYNC; 49 89 } 50 90 91 + static void __mmc_cqe_recovery_notifier(struct mmc_queue *mq) 92 + { 93 + if (!mq->recovery_needed) { 94 + mq->recovery_needed = true; 95 + schedule_work(&mq->recovery_work); 96 + } 97 + } 98 + 99 + void mmc_cqe_recovery_notifier(struct mmc_request *mrq) 100 + { 101 + struct mmc_queue_req *mqrq = container_of(mrq, struct mmc_queue_req, 102 + brq.mrq); 103 + struct request *req = mmc_queue_req_to_req(mqrq); 104 + struct request_queue *q = req->q; 105 + struct mmc_queue *mq = q->queuedata; 106 + unsigned long flags; 107 + 108 + spin_lock_irqsave(q->queue_lock, flags); 109 + __mmc_cqe_recovery_notifier(mq); 110 + spin_unlock_irqrestore(q->queue_lock, flags); 111 + } 112 + 113 + static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req) 114 + { 115 + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); 116 + struct mmc_request *mrq = &mqrq->brq.mrq; 117 + struct mmc_queue *mq = req->q->queuedata; 118 + struct mmc_host *host = mq->card->host; 119 + enum mmc_issue_type issue_type = mmc_issue_type(mq, req); 120 + bool recovery_needed = false; 121 + 122 + switch (issue_type) { 123 + case MMC_ISSUE_ASYNC: 124 + case MMC_ISSUE_DCMD: 125 + if (host->cqe_ops->cqe_timeout(host, mrq, &recovery_needed)) { 126 + if (recovery_needed) 127 + __mmc_cqe_recovery_notifier(mq); 128 + return BLK_EH_RESET_TIMER; 129 + } 130 + /* No timeout */ 131 + return BLK_EH_HANDLED; 132 + default: 133 + /* Timeout is handled by mmc core */ 134 + return BLK_EH_RESET_TIMER; 135 + } 136 + } 137 + 51 138 static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req, 52 139 bool reserved) 53 140 { 54 - return BLK_EH_RESET_TIMER; 141 + struct request_queue *q = req->q; 142 + struct mmc_queue *mq = q->queuedata; 143 + unsigned long flags; 144 + int ret; 145 + 146 + spin_lock_irqsave(q->queue_lock, flags); 147 + 148 + if (mq->recovery_needed || !mq->use_cqe) 149 + ret = BLK_EH_RESET_TIMER; 150 + else 151 + ret = mmc_cqe_timed_out(req); 152 + 153 + spin_unlock_irqrestore(q->queue_lock, flags); 154 + 155 + return ret; 156 + } 157 + 158 + static void mmc_mq_recovery_handler(struct work_struct *work) 159 + { 160 + struct mmc_queue *mq = container_of(work, struct mmc_queue, 161 + recovery_work); 162 + struct request_queue *q = mq->queue; 163 + 164 + mmc_get_card(mq->card, &mq->ctx); 165 + 166 + mq->in_recovery = true; 167 + 168 + mmc_blk_cqe_recovery(mq); 169 + 170 + mq->in_recovery = false; 171 + 172 + spin_lock_irq(q->queue_lock); 173 + mq->recovery_needed = false; 174 + spin_unlock_irq(q->queue_lock); 175 + 176 + mmc_put_card(mq->card, &mq->ctx); 177 + 178 + blk_mq_run_hw_queues(q, true); 55 179 } 56 180 57 181 static int mmc_queue_thread(void *d) ··· 347 223 struct request_queue *q = req->q; 348 224 struct mmc_queue *mq = q->queuedata; 349 225 struct mmc_card *card = mq->card; 226 + struct mmc_host *host = card->host; 350 227 enum mmc_issue_type issue_type; 351 228 enum mmc_issued issued; 352 - bool get_card; 229 + bool get_card, cqe_retune_ok; 353 230 int ret; 354 231 355 232 if (mmc_card_removed(mq->card)) { ··· 362 237 363 238 spin_lock_irq(q->queue_lock); 364 239 240 + if (mq->recovery_needed) { 241 + spin_unlock_irq(q->queue_lock); 242 + return BLK_STS_RESOURCE; 243 + } 244 + 365 245 switch (issue_type) { 246 + case MMC_ISSUE_DCMD: 247 + if (mmc_cqe_dcmd_busy(mq)) { 248 + mq->cqe_busy |= MMC_CQE_DCMD_BUSY; 249 + spin_unlock_irq(q->queue_lock); 250 + return BLK_STS_RESOURCE; 251 + } 252 + break; 366 253 case MMC_ISSUE_ASYNC: 367 254 break; 368 255 default: ··· 391 254 392 255 mq->in_flight[issue_type] += 1; 393 256 get_card = (mmc_tot_in_flight(mq) == 1); 257 + cqe_retune_ok = (mmc_cqe_qcnt(mq) == 1); 394 258 395 259 spin_unlock_irq(q->queue_lock); 396 260 ··· 402 264 403 265 if (get_card) 404 266 mmc_get_card(card, &mq->ctx); 267 + 268 + if (mq->use_cqe) { 269 + host->retune_now = host->need_retune && cqe_retune_ok && 270 + !host->hold_retune; 271 + } 405 272 406 273 blk_mq_start_request(req); 407 274 ··· 469 326 /* Initialize thread_sem even if it is not used */ 470 327 sema_init(&mq->thread_sem, 1); 471 328 329 + INIT_WORK(&mq->recovery_work, mmc_mq_recovery_handler); 472 330 INIT_WORK(&mq->complete_work, mmc_blk_mq_complete_work); 473 331 474 332 mutex_init(&mq->complete_lock); ··· 519 375 static int mmc_mq_init(struct mmc_queue *mq, struct mmc_card *card, 520 376 spinlock_t *lock) 521 377 { 378 + struct mmc_host *host = card->host; 522 379 int q_depth; 523 380 int ret; 524 381 525 - q_depth = MMC_QUEUE_DEPTH; 382 + /* 383 + * The queue depth for CQE must match the hardware because the request 384 + * tag is used to index the hardware queue. 385 + */ 386 + if (mq->use_cqe) 387 + q_depth = min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth); 388 + else 389 + q_depth = MMC_QUEUE_DEPTH; 526 390 527 391 ret = mmc_mq_init_queue(mq, q_depth, &mmc_mq_ops, lock); 528 392 if (ret) ··· 560 408 561 409 mq->card = card; 562 410 563 - if (mmc_host_use_blk_mq(host)) 411 + mq->use_cqe = host->cqe_enabled; 412 + 413 + if (mq->use_cqe || mmc_host_use_blk_mq(host)) 564 414 return mmc_mq_init(mq, card, lock); 565 415 566 416 mq->queue = blk_alloc_queue(GFP_KERNEL);
+18
drivers/mmc/core/queue.h
··· 17 17 18 18 enum mmc_issue_type { 19 19 MMC_ISSUE_SYNC, 20 + MMC_ISSUE_DCMD, 20 21 MMC_ISSUE_ASYNC, 21 22 MMC_ISSUE_MAX, 22 23 }; ··· 93 92 int qcnt; 94 93 95 94 int in_flight[MMC_ISSUE_MAX]; 95 + unsigned int cqe_busy; 96 + #define MMC_CQE_DCMD_BUSY BIT(0) 97 + #define MMC_CQE_QUEUE_FULL BIT(1) 98 + bool use_cqe; 99 + bool recovery_needed; 100 + bool in_recovery; 96 101 bool rw_wait; 97 102 bool waiting; 103 + struct work_struct recovery_work; 98 104 wait_queue_head_t wait; 99 105 struct request *complete_req; 100 106 struct mutex complete_lock; ··· 116 108 extern unsigned int mmc_queue_map_sg(struct mmc_queue *, 117 109 struct mmc_queue_req *); 118 110 111 + void mmc_cqe_check_busy(struct mmc_queue *mq); 112 + void mmc_cqe_recovery_notifier(struct mmc_request *mrq); 113 + 119 114 enum mmc_issue_type mmc_issue_type(struct mmc_queue *mq, struct request *req); 120 115 121 116 static inline int mmc_tot_in_flight(struct mmc_queue *mq) 122 117 { 123 118 return mq->in_flight[MMC_ISSUE_SYNC] + 119 + mq->in_flight[MMC_ISSUE_DCMD] + 120 + mq->in_flight[MMC_ISSUE_ASYNC]; 121 + } 122 + 123 + static inline int mmc_cqe_qcnt(struct mmc_queue *mq) 124 + { 125 + return mq->in_flight[MMC_ISSUE_DCMD] + 124 126 mq->in_flight[MMC_ISSUE_ASYNC]; 125 127 } 126 128