Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fsldma: fix controller lockups

Enabling poisoning in the dmapool API quickly showed that the DMA
controller was fetching descriptors that should not have been in use.
This has caused intermittent controller lockups during testing.

I have been unable to figure out the exact set of conditions which cause
this to happen. However, I believe it is related to the driver using the
hardware registers to track whether the controller is busy or not. The
code can incorrectly decide that the hardware is idle due to lag between
register writes and the hardware actually becoming busy.

To fix this, the driver has been reworked to explicitly track the state
of the hardware, rather than try to guess what it is doing based on the
register values.

This has passed dmatest with 10 threads per channel, 100000 iterations
per thread several times without error. Previously, this would fail
within a few seconds.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

authored by

Ira Snyder and committed by
Dan Williams
f04cd407 31f4306c

+103 -126
+102 -126
drivers/dma/fsldma.c
··· 68 68 return DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN; 69 69 } 70 70 71 - static dma_addr_t get_ndar(struct fsldma_chan *chan) 72 - { 73 - return DMA_IN(chan, &chan->regs->ndar, 64); 74 - } 75 - 76 71 static u32 get_bcr(struct fsldma_chan *chan) 77 72 { 78 73 return DMA_IN(chan, &chan->regs->bcr, 32); ··· 138 143 case FSL_DMA_IP_85XX: 139 144 /* Set the channel to below modes: 140 145 * EIE - Error interrupt enable 141 - * EOSIE - End of segments interrupt enable (basic mode) 142 146 * EOLNIE - End of links interrupt enable 143 147 * BWC - Bandwidth sharing among channels 144 148 */ 145 149 DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_BWC 146 - | FSL_DMA_MR_EIE | FSL_DMA_MR_EOLNIE 147 - | FSL_DMA_MR_EOSIE, 32); 150 + | FSL_DMA_MR_EIE | FSL_DMA_MR_EOLNIE, 32); 148 151 break; 149 152 case FSL_DMA_IP_83XX: 150 153 /* Set the channel to below modes: ··· 161 168 return (!(sr & FSL_DMA_SR_CB)) || (sr & FSL_DMA_SR_CH); 162 169 } 163 170 171 + /* 172 + * Start the DMA controller 173 + * 174 + * Preconditions: 175 + * - the CDAR register must point to the start descriptor 176 + * - the MRn[CS] bit must be cleared 177 + */ 164 178 static void dma_start(struct fsldma_chan *chan) 165 179 { 166 180 u32 mode; 167 181 168 182 mode = DMA_IN(chan, &chan->regs->mr, 32); 169 183 170 - if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) { 171 - if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) { 172 - DMA_OUT(chan, &chan->regs->bcr, 0, 32); 173 - mode |= FSL_DMA_MR_EMP_EN; 174 - } else { 175 - mode &= ~FSL_DMA_MR_EMP_EN; 176 - } 184 + if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) { 185 + DMA_OUT(chan, &chan->regs->bcr, 0, 32); 186 + mode |= FSL_DMA_MR_EMP_EN; 187 + } else { 188 + mode &= ~FSL_DMA_MR_EMP_EN; 177 189 } 178 190 179 - if (chan->feature & FSL_DMA_CHAN_START_EXT) 191 + if (chan->feature & FSL_DMA_CHAN_START_EXT) { 180 192 mode |= FSL_DMA_MR_EMS_EN; 181 - else 193 + } else { 194 + mode &= ~FSL_DMA_MR_EMS_EN; 182 195 mode |= FSL_DMA_MR_CS; 196 + } 183 197 184 198 DMA_OUT(chan, &chan->regs->mr, mode, 32); 185 199 } ··· 760 760 761 761 switch (cmd) { 762 762 case DMA_TERMINATE_ALL: 763 + spin_lock_irqsave(&chan->desc_lock, flags); 764 + 763 765 /* Halt the DMA engine */ 764 766 dma_halt(chan); 765 - 766 - spin_lock_irqsave(&chan->desc_lock, flags); 767 767 768 768 /* Remove and free all of the descriptors in the LD queue */ 769 769 fsldma_free_desc_list(chan, &chan->ld_pending); 770 770 fsldma_free_desc_list(chan, &chan->ld_running); 771 + chan->idle = true; 771 772 772 773 spin_unlock_irqrestore(&chan->desc_lock, flags); 773 774 return 0; ··· 806 805 } 807 806 808 807 /** 809 - * fsl_dma_update_completed_cookie - Update the completed cookie. 810 - * @chan : Freescale DMA channel 811 - * 812 - * CONTEXT: hardirq 813 - */ 814 - static void fsl_dma_update_completed_cookie(struct fsldma_chan *chan) 815 - { 816 - struct fsl_desc_sw *desc; 817 - unsigned long flags; 818 - dma_cookie_t cookie; 819 - 820 - spin_lock_irqsave(&chan->desc_lock, flags); 821 - 822 - if (list_empty(&chan->ld_running)) { 823 - chan_dbg(chan, "no running descriptors\n"); 824 - goto out_unlock; 825 - } 826 - 827 - /* Get the last descriptor, update the cookie to that */ 828 - desc = to_fsl_desc(chan->ld_running.prev); 829 - if (dma_is_idle(chan)) 830 - cookie = desc->async_tx.cookie; 831 - else { 832 - cookie = desc->async_tx.cookie - 1; 833 - if (unlikely(cookie < DMA_MIN_COOKIE)) 834 - cookie = DMA_MAX_COOKIE; 835 - } 836 - 837 - chan->completed_cookie = cookie; 838 - 839 - out_unlock: 840 - spin_unlock_irqrestore(&chan->desc_lock, flags); 841 - } 842 - 843 - /** 844 - * fsldma_desc_status - Check the status of a descriptor 845 - * @chan: Freescale DMA channel 846 - * @desc: DMA SW descriptor 847 - * 848 - * This function will return the status of the given descriptor 849 - */ 850 - static enum dma_status fsldma_desc_status(struct fsldma_chan *chan, 851 - struct fsl_desc_sw *desc) 852 - { 853 - return dma_async_is_complete(desc->async_tx.cookie, 854 - chan->completed_cookie, 855 - chan->common.cookie); 856 - } 857 - 858 - /** 859 808 * fsl_chan_ld_cleanup - Clean up link descriptors 860 809 * @chan : Freescale DMA channel 861 810 * 862 - * This function clean up the ld_queue of DMA channel. 811 + * This function is run after the queue of running descriptors has been 812 + * executed by the DMA engine. It will run any callbacks, and then free 813 + * the descriptors. 814 + * 815 + * HARDWARE STATE: idle 863 816 */ 864 817 static void fsl_chan_ld_cleanup(struct fsldma_chan *chan) 865 818 { ··· 822 867 823 868 spin_lock_irqsave(&chan->desc_lock, flags); 824 869 825 - chan_dbg(chan, "chan completed_cookie = %d\n", chan->completed_cookie); 870 + /* if the ld_running list is empty, there is nothing to do */ 871 + if (list_empty(&chan->ld_running)) { 872 + chan_dbg(chan, "no descriptors to cleanup\n"); 873 + goto out_unlock; 874 + } 875 + 876 + /* 877 + * Get the last descriptor, update the cookie to it 878 + * 879 + * This is done before callbacks run so that clients can check the 880 + * status of their DMA transfer inside the callback. 881 + */ 882 + desc = to_fsl_desc(chan->ld_running.prev); 883 + chan->completed_cookie = desc->async_tx.cookie; 884 + chan_dbg(chan, "completed_cookie = %d\n", chan->completed_cookie); 885 + 886 + /* Run the callback for each descriptor, in order */ 826 887 list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) { 827 888 dma_async_tx_callback callback; 828 889 void *callback_param; 829 - 830 - if (fsldma_desc_status(chan, desc) == DMA_IN_PROGRESS) 831 - break; 832 890 833 891 /* Remove from the list of running transactions */ 834 892 list_del(&desc->node); ··· 866 898 dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys); 867 899 } 868 900 901 + out_unlock: 869 902 spin_unlock_irqrestore(&chan->desc_lock, flags); 870 903 } 871 904 ··· 874 905 * fsl_chan_xfer_ld_queue - transfer any pending transactions 875 906 * @chan : Freescale DMA channel 876 907 * 877 - * This will make sure that any pending transactions will be run. 878 - * If the DMA controller is idle, it will be started. Otherwise, 879 - * the DMA controller's interrupt handler will start any pending 880 - * transactions when it becomes idle. 908 + * HARDWARE STATE: idle 881 909 */ 882 910 static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan) 883 911 { ··· 893 927 } 894 928 895 929 /* 896 - * The DMA controller is not idle, which means the interrupt 897 - * handler will start any queued transactions when it runs 898 - * at the end of the current transaction 930 + * The DMA controller is not idle, which means that the interrupt 931 + * handler will start any queued transactions when it runs after 932 + * this transaction finishes 899 933 */ 900 - if (!dma_is_idle(chan)) { 934 + if (!chan->idle) { 901 935 chan_dbg(chan, "DMA controller still busy\n"); 902 936 goto out_unlock; 903 937 } 904 - 905 - /* 906 - * TODO: 907 - * make sure the dma_halt() function really un-wedges the 908 - * controller as much as possible 909 - */ 910 - dma_halt(chan); 911 938 912 939 /* 913 940 * If there are some link descriptors which have not been ··· 911 952 * Move all elements from the queue of pending transactions 912 953 * onto the list of running transactions 913 954 */ 955 + chan_dbg(chan, "idle, starting controller\n"); 914 956 desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node); 915 957 list_splice_tail_init(&chan->ld_pending, &chan->ld_running); 958 + 959 + /* 960 + * The 85xx DMA controller doesn't clear the channel start bit 961 + * automatically at the end of a transfer. Therefore we must clear 962 + * it in software before starting the transfer. 963 + */ 964 + if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) { 965 + u32 mode; 966 + 967 + mode = DMA_IN(chan, &chan->regs->mr, 32); 968 + mode &= ~FSL_DMA_MR_CS; 969 + DMA_OUT(chan, &chan->regs->mr, mode, 32); 970 + } 916 971 917 972 /* 918 973 * Program the descriptor's address into the DMA controller, 919 974 * then start the DMA transaction 920 975 */ 921 976 set_cdar(chan, desc->async_tx.phys); 977 + get_cdar(chan); 978 + 922 979 dma_start(chan); 980 + chan->idle = false; 923 981 924 982 out_unlock: 925 983 spin_unlock_irqrestore(&chan->desc_lock, flags); ··· 961 985 struct dma_tx_state *txstate) 962 986 { 963 987 struct fsldma_chan *chan = to_fsl_chan(dchan); 964 - dma_cookie_t last_used; 965 988 dma_cookie_t last_complete; 989 + dma_cookie_t last_used; 990 + unsigned long flags; 966 991 967 - fsl_chan_ld_cleanup(chan); 992 + spin_lock_irqsave(&chan->desc_lock, flags); 968 993 969 - last_used = dchan->cookie; 970 994 last_complete = chan->completed_cookie; 995 + last_used = dchan->cookie; 996 + 997 + spin_unlock_irqrestore(&chan->desc_lock, flags); 971 998 972 999 dma_set_tx_state(txstate, last_complete, last_used, 0); 973 - 974 1000 return dma_async_is_complete(cookie, last_complete, last_used); 975 1001 } 976 1002 ··· 983 1005 static irqreturn_t fsldma_chan_irq(int irq, void *data) 984 1006 { 985 1007 struct fsldma_chan *chan = data; 986 - int update_cookie = 0; 987 - int xfer_ld_q = 0; 988 1008 u32 stat; 989 1009 990 1010 /* save and clear the status register */ ··· 990 1014 set_sr(chan, stat); 991 1015 chan_dbg(chan, "irq: stat = 0x%x\n", stat); 992 1016 1017 + /* check that this was really our device */ 993 1018 stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH); 994 1019 if (!stat) 995 1020 return IRQ_NONE; ··· 1005 1028 */ 1006 1029 if (stat & FSL_DMA_SR_PE) { 1007 1030 chan_dbg(chan, "irq: Programming Error INT\n"); 1008 - if (get_bcr(chan) == 0) { 1009 - /* BCR register is 0, this is a DMA_INTERRUPT async_tx. 1010 - * Now, update the completed cookie, and continue the 1011 - * next uncompleted transfer. 1012 - */ 1013 - update_cookie = 1; 1014 - xfer_ld_q = 1; 1015 - } 1016 1031 stat &= ~FSL_DMA_SR_PE; 1017 - } 1018 - 1019 - /* 1020 - * If the link descriptor segment transfer finishes, 1021 - * we will recycle the used descriptor. 1022 - */ 1023 - if (stat & FSL_DMA_SR_EOSI) { 1024 - chan_dbg(chan, "irq: End-of-segments INT\n"); 1025 - chan_dbg(chan, "irq: clndar 0x%llx, nlndar 0x%llx\n", 1026 - (unsigned long long)get_cdar(chan), 1027 - (unsigned long long)get_ndar(chan)); 1028 - stat &= ~FSL_DMA_SR_EOSI; 1029 - update_cookie = 1; 1032 + if (get_bcr(chan) != 0) 1033 + chan_err(chan, "Programming Error!\n"); 1030 1034 } 1031 1035 1032 1036 /* ··· 1017 1059 if (stat & FSL_DMA_SR_EOCDI) { 1018 1060 chan_dbg(chan, "irq: End-of-Chain link INT\n"); 1019 1061 stat &= ~FSL_DMA_SR_EOCDI; 1020 - update_cookie = 1; 1021 - xfer_ld_q = 1; 1022 1062 } 1023 1063 1024 1064 /* ··· 1027 1071 if (stat & FSL_DMA_SR_EOLNI) { 1028 1072 chan_dbg(chan, "irq: End-of-link INT\n"); 1029 1073 stat &= ~FSL_DMA_SR_EOLNI; 1030 - xfer_ld_q = 1; 1031 1074 } 1032 1075 1033 - if (update_cookie) 1034 - fsl_dma_update_completed_cookie(chan); 1035 - if (xfer_ld_q) 1036 - fsl_chan_xfer_ld_queue(chan); 1037 - if (stat) 1038 - chan_dbg(chan, "irq: unhandled sr 0x%08x\n", stat); 1076 + /* check that the DMA controller is really idle */ 1077 + if (!dma_is_idle(chan)) 1078 + chan_err(chan, "irq: controller not idle!\n"); 1039 1079 1040 - chan_dbg(chan, "irq: Exit\n"); 1080 + /* check that we handled all of the bits */ 1081 + if (stat) 1082 + chan_err(chan, "irq: unhandled sr 0x%08x\n", stat); 1083 + 1084 + /* 1085 + * Schedule the tasklet to handle all cleanup of the current 1086 + * transaction. It will start a new transaction if there is 1087 + * one pending. 1088 + */ 1041 1089 tasklet_schedule(&chan->tasklet); 1090 + chan_dbg(chan, "irq: Exit\n"); 1042 1091 return IRQ_HANDLED; 1043 1092 } 1044 1093 1045 1094 static void dma_do_tasklet(unsigned long data) 1046 1095 { 1047 1096 struct fsldma_chan *chan = (struct fsldma_chan *)data; 1097 + unsigned long flags; 1098 + 1099 + chan_dbg(chan, "tasklet entry\n"); 1100 + 1101 + /* run all callbacks, free all used descriptors */ 1048 1102 fsl_chan_ld_cleanup(chan); 1103 + 1104 + /* the channel is now idle */ 1105 + spin_lock_irqsave(&chan->desc_lock, flags); 1106 + chan->idle = true; 1107 + spin_unlock_irqrestore(&chan->desc_lock, flags); 1108 + 1109 + /* start any pending transactions automatically */ 1110 + fsl_chan_xfer_ld_queue(chan); 1111 + chan_dbg(chan, "tasklet exit\n"); 1049 1112 } 1050 1113 1051 1114 static irqreturn_t fsldma_ctrl_irq(int irq, void *data) ··· 1244 1269 spin_lock_init(&chan->desc_lock); 1245 1270 INIT_LIST_HEAD(&chan->ld_pending); 1246 1271 INIT_LIST_HEAD(&chan->ld_running); 1272 + chan->idle = true; 1247 1273 1248 1274 chan->common.device = &fdev->common; 1249 1275
+1
drivers/dma/fsldma.h
··· 148 148 int id; /* Raw id of this channel */ 149 149 struct tasklet_struct tasklet; 150 150 u32 feature; 151 + bool idle; /* DMA controller is idle */ 151 152 152 153 void (*toggle_ext_pause)(struct fsldma_chan *fsl_chan, int enable); 153 154 void (*toggle_ext_start)(struct fsldma_chan *fsl_chan, int enable);