Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

async_tx: fix multiple dependency submission

Shrink struct dma_async_tx_descriptor and introduce
async_tx_channel_switch to properly inject a channel switch interrupt in
the descriptor stream. This simplifies the locking model as drivers no
longer need to handle dma_async_tx_descriptor.lock.

Acked-by: Shannon Nelson <shannon.nelson@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

+171 -48
+163 -36
crypto/async_tx/async_tx.c
··· 89 89 iter = tx; 90 90 91 91 /* find the root of the unsubmitted dependency chain */ 92 - while (iter->cookie == -EBUSY) { 92 + do { 93 93 parent = iter->parent; 94 - if (parent && parent->cookie == -EBUSY) 95 - iter = iter->parent; 96 - else 94 + if (!parent) 97 95 break; 98 - } 96 + else 97 + iter = parent; 98 + } while (parent); 99 + 100 + /* there is a small window for ->parent == NULL and 101 + * ->cookie == -EBUSY 102 + */ 103 + while (iter->cookie == -EBUSY) 104 + cpu_relax(); 99 105 100 106 status = dma_sync_wait(iter->chan, iter->cookie); 101 107 } while (status == DMA_IN_PROGRESS || (iter != tx)); ··· 117 111 void 118 112 async_tx_run_dependencies(struct dma_async_tx_descriptor *tx) 119 113 { 120 - struct dma_async_tx_descriptor *dep_tx, *_dep_tx; 121 - struct dma_device *dev; 114 + struct dma_async_tx_descriptor *next = tx->next; 122 115 struct dma_chan *chan; 123 116 124 - list_for_each_entry_safe(dep_tx, _dep_tx, &tx->depend_list, 125 - depend_node) { 126 - chan = dep_tx->chan; 127 - dev = chan->device; 128 - /* we can't depend on ourselves */ 129 - BUG_ON(chan == tx->chan); 130 - list_del(&dep_tx->depend_node); 131 - tx->tx_submit(dep_tx); 117 + if (!next) 118 + return; 132 119 133 - /* we need to poke the engine as client code does not 134 - * know about dependency submission events 135 - */ 136 - dev->device_issue_pending(chan); 120 + tx->next = NULL; 121 + chan = next->chan; 122 + 123 + /* keep submitting up until a channel switch is detected 124 + * in that case we will be called again as a result of 125 + * processing the interrupt from async_tx_channel_switch 126 + */ 127 + while (next && next->chan == chan) { 128 + struct dma_async_tx_descriptor *_next; 129 + 130 + spin_lock_bh(&next->lock); 131 + next->parent = NULL; 132 + _next = next->next; 133 + next->next = NULL; 134 + spin_unlock_bh(&next->lock); 135 + 136 + next->tx_submit(next); 137 + next = _next; 137 138 } 139 + 140 + chan->device->device_issue_pending(chan); 138 141 } 139 142 EXPORT_SYMBOL_GPL(async_tx_run_dependencies); 140 143 ··· 412 397 } 413 398 #endif 414 399 400 + 401 + /** 402 + * async_tx_channel_switch - queue an interrupt descriptor with a dependency 403 + * pre-attached. 404 + * @depend_tx: the operation that must finish before the new operation runs 405 + * @tx: the new operation 406 + */ 407 + static void 408 + async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, 409 + struct dma_async_tx_descriptor *tx) 410 + { 411 + struct dma_chan *chan; 412 + struct dma_device *device; 413 + struct dma_async_tx_descriptor *intr_tx = (void *) ~0; 414 + 415 + /* first check to see if we can still append to depend_tx */ 416 + spin_lock_bh(&depend_tx->lock); 417 + if (depend_tx->parent && depend_tx->chan == tx->chan) { 418 + tx->parent = depend_tx; 419 + depend_tx->next = tx; 420 + intr_tx = NULL; 421 + } 422 + spin_unlock_bh(&depend_tx->lock); 423 + 424 + if (!intr_tx) 425 + return; 426 + 427 + chan = depend_tx->chan; 428 + device = chan->device; 429 + 430 + /* see if we can schedule an interrupt 431 + * otherwise poll for completion 432 + */ 433 + if (dma_has_cap(DMA_INTERRUPT, device->cap_mask)) 434 + intr_tx = device->device_prep_dma_interrupt(chan); 435 + else 436 + intr_tx = NULL; 437 + 438 + if (intr_tx) { 439 + intr_tx->callback = NULL; 440 + intr_tx->callback_param = NULL; 441 + tx->parent = intr_tx; 442 + /* safe to set ->next outside the lock since we know we are 443 + * not submitted yet 444 + */ 445 + intr_tx->next = tx; 446 + 447 + /* check if we need to append */ 448 + spin_lock_bh(&depend_tx->lock); 449 + if (depend_tx->parent) { 450 + intr_tx->parent = depend_tx; 451 + depend_tx->next = intr_tx; 452 + async_tx_ack(intr_tx); 453 + intr_tx = NULL; 454 + } 455 + spin_unlock_bh(&depend_tx->lock); 456 + 457 + if (intr_tx) { 458 + intr_tx->parent = NULL; 459 + intr_tx->tx_submit(intr_tx); 460 + async_tx_ack(intr_tx); 461 + } 462 + } else { 463 + if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) 464 + panic("%s: DMA_ERROR waiting for depend_tx\n", 465 + __func__); 466 + tx->tx_submit(tx); 467 + } 468 + } 469 + 470 + 471 + /** 472 + * submit_disposition - while holding depend_tx->lock we must avoid submitting 473 + * new operations to prevent a circular locking dependency with 474 + * drivers that already hold a channel lock when calling 475 + * async_tx_run_dependencies. 476 + * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock 477 + * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch 478 + * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly 479 + */ 480 + enum submit_disposition { 481 + ASYNC_TX_SUBMITTED, 482 + ASYNC_TX_CHANNEL_SWITCH, 483 + ASYNC_TX_DIRECT_SUBMIT, 484 + }; 485 + 415 486 void 416 487 async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, 417 488 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, ··· 506 405 tx->callback = cb_fn; 507 406 tx->callback_param = cb_param; 508 407 509 - /* set this new tx to run after depend_tx if: 510 - * 1/ a dependency exists (depend_tx is !NULL) 511 - * 2/ the tx can not be submitted to the current channel 512 - */ 513 - if (depend_tx && depend_tx->chan != chan) { 514 - /* if ack is already set then we cannot be sure 515 - * we are referring to the correct operation 516 - */ 517 - BUG_ON(depend_tx->ack); 408 + if (depend_tx) { 409 + enum submit_disposition s; 518 410 519 - tx->parent = depend_tx; 411 + /* sanity check the dependency chain: 412 + * 1/ if ack is already set then we cannot be sure 413 + * we are referring to the correct operation 414 + * 2/ dependencies are 1:1 i.e. two transactions can 415 + * not depend on the same parent 416 + */ 417 + BUG_ON(depend_tx->ack || depend_tx->next || tx->parent); 418 + 419 + /* the lock prevents async_tx_run_dependencies from missing 420 + * the setting of ->next when ->parent != NULL 421 + */ 520 422 spin_lock_bh(&depend_tx->lock); 521 - list_add_tail(&tx->depend_node, &depend_tx->depend_list); 522 - if (depend_tx->cookie == 0) { 523 - struct dma_chan *dep_chan = depend_tx->chan; 524 - struct dma_device *dep_dev = dep_chan->device; 525 - dep_dev->device_dependency_added(dep_chan); 423 + if (depend_tx->parent) { 424 + /* we have a parent so we can not submit directly 425 + * if we are staying on the same channel: append 426 + * else: channel switch 427 + */ 428 + if (depend_tx->chan == chan) { 429 + tx->parent = depend_tx; 430 + depend_tx->next = tx; 431 + s = ASYNC_TX_SUBMITTED; 432 + } else 433 + s = ASYNC_TX_CHANNEL_SWITCH; 434 + } else { 435 + /* we do not have a parent so we may be able to submit 436 + * directly if we are staying on the same channel 437 + */ 438 + if (depend_tx->chan == chan) 439 + s = ASYNC_TX_DIRECT_SUBMIT; 440 + else 441 + s = ASYNC_TX_CHANNEL_SWITCH; 526 442 } 527 443 spin_unlock_bh(&depend_tx->lock); 528 444 529 - /* schedule an interrupt to trigger the channel switch */ 530 - async_trigger_callback(ASYNC_TX_ACK, depend_tx, NULL, NULL); 445 + switch (s) { 446 + case ASYNC_TX_SUBMITTED: 447 + break; 448 + case ASYNC_TX_CHANNEL_SWITCH: 449 + async_tx_channel_switch(depend_tx, tx); 450 + break; 451 + case ASYNC_TX_DIRECT_SUBMIT: 452 + tx->parent = NULL; 453 + tx->tx_submit(tx); 454 + break; 455 + } 531 456 } else { 532 457 tx->parent = NULL; 533 458 tx->tx_submit(tx);
-2
drivers/dma/dmaengine.c
··· 600 600 { 601 601 tx->chan = chan; 602 602 spin_lock_init(&tx->lock); 603 - INIT_LIST_HEAD(&tx->depend_node); 604 - INIT_LIST_HEAD(&tx->depend_list); 605 603 } 606 604 EXPORT_SYMBOL(dma_async_tx_descriptor_init); 607 605
+5 -4
drivers/dma/iop-adma.c
··· 63 63 struct iop_adma_chan *iop_chan, dma_cookie_t cookie) 64 64 { 65 65 BUG_ON(desc->async_tx.cookie < 0); 66 - spin_lock_bh(&desc->async_tx.lock); 67 66 if (desc->async_tx.cookie > 0) { 68 67 cookie = desc->async_tx.cookie; 69 68 desc->async_tx.cookie = 0; ··· 100 101 101 102 /* run dependent operations */ 102 103 async_tx_run_dependencies(&desc->async_tx); 103 - spin_unlock_bh(&desc->async_tx.lock); 104 104 105 105 return cookie; 106 106 } ··· 273 275 274 276 static void iop_adma_tasklet(unsigned long data) 275 277 { 276 - struct iop_adma_chan *chan = (struct iop_adma_chan *) data; 277 - __iop_adma_slot_cleanup(chan); 278 + struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data; 279 + 280 + spin_lock(&iop_chan->lock); 281 + __iop_adma_slot_cleanup(iop_chan); 282 + spin_unlock(&iop_chan->lock); 278 283 } 279 284 280 285 static struct iop_adma_desc_slot *
+3 -6
include/linux/dmaengine.h
··· 221 221 * @callback: routine to call after this operation is complete 222 222 * @callback_param: general parameter to pass to the callback routine 223 223 * ---async_tx api specific fields--- 224 - * @depend_list: at completion this list of transactions are submitted 225 - * @depend_node: allow this transaction to be executed after another 226 - * transaction has completed, possibly on another channel 224 + * @next: at completion submit this descriptor 227 225 * @parent: pointer to the next level up in the dependency chain 228 - * @lock: protect the dependency list 226 + * @lock: protect the parent and next pointers 229 227 */ 230 228 struct dma_async_tx_descriptor { 231 229 dma_cookie_t cookie; ··· 234 236 dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); 235 237 dma_async_tx_callback callback; 236 238 void *callback_param; 237 - struct list_head depend_list; 238 - struct list_head depend_node; 239 + struct dma_async_tx_descriptor *next; 239 240 struct dma_async_tx_descriptor *parent; 240 241 spinlock_t lock; 241 242 };