Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dmaengine: ptdma: Initial driver for the AMD PTDMA

Add support for AMD PTDMA controller. It performs high-bandwidth
memory to memory and IO copy operation. Device commands are managed
via a circular queue of 'descriptors', each of which specifies source
and destination addresses for copying a single buffer of data.

Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
Link: https://lore.kernel.org/r/1629208559-51964-2-git-send-email-Sanju.Mehta@amd.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>

authored by

Sanjay R Mehta and committed by
Vinod Koul
fa5d823b 64d57d2c

+834
+6
MAINTAINERS
··· 979 979 T: git https://gitlab.freedesktop.org/agd5f/linux.git 980 980 F: drivers/gpu/drm/amd/pm/powerplay/ 981 981 982 + +AMD PTDMA DRIVER 983 + +M: Sanjay R Mehta <sanju.mehta@amd.com> 984 + +L: dmaengine@vger.kernel.org 985 + +S: Maintained 986 + +F: drivers/dma/ptdma/ 987 + 982 988 AMD SEATTLE DEVICE TREE SUPPORT 983 989 M: Brijesh Singh <brijeshkumar.singh@amd.com> 984 990 M: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+2
drivers/dma/Kconfig
··· 738 738 739 739 source "drivers/dma/mediatek/Kconfig" 740 740 741 + source "drivers/dma/ptdma/Kconfig" 742 + 741 743 source "drivers/dma/qcom/Kconfig" 742 744 743 745 source "drivers/dma/dw/Kconfig"
+1
drivers/dma/Makefile
··· 16 16 obj-$(CONFIG_ALTERA_MSGDMA) += altera-msgdma.o 17 17 obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o 18 18 obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ 19 + obj-$(CONFIG_AMD_PTDMA) += ptdma/ 19 20 obj-$(CONFIG_AT_HDMAC) += at_hdmac.o 20 21 obj-$(CONFIG_AT_XDMAC) += at_xdmac.o 21 22 obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o
+11
drivers/dma/ptdma/Kconfig
··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + config AMD_PTDMA 3 + tristate "AMD PassThru DMA Engine" 4 + depends on X86_64 && PCI 5 + help 6 + Enable support for the AMD PTDMA controller. This controller 7 + provides DMA capabilities to perform high bandwidth memory to 8 + memory and IO copy operations. It performs DMA transfer through 9 + queue-based descriptor management. This DMA controller is intended 10 + to be used with AMD Non-Transparent Bridge devices and not for 11 + general purpose peripheral DMA.
+10
drivers/dma/ptdma/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0-only 2 + # 3 + # AMD Passthru DMA driver 4 + # 5 + 6 + obj-$(CONFIG_AMD_PTDMA) += ptdma.o 7 + 8 + ptdma-objs := ptdma-dev.o 9 + 10 + ptdma-$(CONFIG_PCI) += ptdma-pci.o
+268
drivers/dma/ptdma/ptdma-dev.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * AMD Passthru DMA device driver 4 + * -- Based on the CCP driver 5 + * 6 + * Copyright (C) 2016,2021 Advanced Micro Devices, Inc. 7 + * 8 + * Author: Sanjay R Mehta <sanju.mehta@amd.com> 9 + * Author: Gary R Hook <gary.hook@amd.com> 10 + */ 11 + 12 + #include <linux/bitfield.h> 13 + #include <linux/dma-mapping.h> 14 + #include <linux/debugfs.h> 15 + #include <linux/interrupt.h> 16 + #include <linux/kernel.h> 17 + #include <linux/module.h> 18 + #include <linux/pci.h> 19 + 20 + #include "ptdma.h" 21 + 22 + /* Human-readable error strings */ 23 + static char *pt_error_codes[] = { 24 + "", 25 + "ERR 01: ILLEGAL_ENGINE", 26 + "ERR 03: ILLEGAL_FUNCTION_TYPE", 27 + "ERR 04: ILLEGAL_FUNCTION_MODE", 28 + "ERR 06: ILLEGAL_FUNCTION_SIZE", 29 + "ERR 08: ILLEGAL_FUNCTION_RSVD", 30 + "ERR 09: ILLEGAL_BUFFER_LENGTH", 31 + "ERR 10: VLSB_FAULT", 32 + "ERR 11: ILLEGAL_MEM_ADDR", 33 + "ERR 12: ILLEGAL_MEM_SEL", 34 + "ERR 13: ILLEGAL_CONTEXT_ID", 35 + "ERR 15: 0xF Reserved", 36 + "ERR 18: CMD_TIMEOUT", 37 + "ERR 19: IDMA0_AXI_SLVERR", 38 + "ERR 20: IDMA0_AXI_DECERR", 39 + "ERR 21: 0x15 Reserved", 40 + "ERR 22: IDMA1_AXI_SLAVE_FAULT", 41 + "ERR 23: IDMA1_AIXI_DECERR", 42 + "ERR 24: 0x18 Reserved", 43 + "ERR 27: 0x1B Reserved", 44 + "ERR 38: ODMA0_AXI_SLVERR", 45 + "ERR 39: ODMA0_AXI_DECERR", 46 + "ERR 40: 0x28 Reserved", 47 + "ERR 41: ODMA1_AXI_SLVERR", 48 + "ERR 42: ODMA1_AXI_DECERR", 49 + "ERR 43: LSB_PARITY_ERR", 50 + }; 51 + 52 + static void pt_log_error(struct pt_device *d, int e) 53 + { 54 + dev_err(d->dev, "PTDMA error: %s (0x%x)\n", pt_error_codes[e], e); 55 + } 56 + 57 + void pt_start_queue(struct pt_cmd_queue *cmd_q) 58 + { 59 + /* Turn on the run bit */ 60 + iowrite32(cmd_q->qcontrol | CMD_Q_RUN, cmd_q->reg_control); 61 + } 62 + 63 + void pt_stop_queue(struct pt_cmd_queue *cmd_q) 64 + { 65 + /* Turn off the run bit */ 66 + iowrite32(cmd_q->qcontrol & ~CMD_Q_RUN, cmd_q->reg_control); 67 + } 68 + 69 + static int pt_core_execute_cmd(struct ptdma_desc *desc, struct pt_cmd_queue *cmd_q) 70 + { 71 + bool soc = FIELD_GET(DWORD0_SOC, desc->dw0); 72 + u8 *q_desc = (u8 *)&cmd_q->qbase[cmd_q->qidx]; 73 + u32 tail; 74 + 75 + if (soc) { 76 + desc->dw0 |= FIELD_PREP(DWORD0_IOC, desc->dw0); 77 + desc->dw0 &= ~DWORD0_SOC; 78 + } 79 + mutex_lock(&cmd_q->q_mutex); 80 + 81 + /* Copy 32-byte command descriptor to hw queue. */ 82 + memcpy(q_desc, desc, 32); 83 + cmd_q->qidx = (cmd_q->qidx + 1) % CMD_Q_LEN; 84 + 85 + /* The data used by this command must be flushed to memory */ 86 + wmb(); 87 + 88 + /* Write the new tail address back to the queue register */ 89 + tail = lower_32_bits(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE); 90 + iowrite32(tail, cmd_q->reg_control + 0x0004); 91 + 92 + /* Turn the queue back on using our cached control register */ 93 + pt_start_queue(cmd_q); 94 + mutex_unlock(&cmd_q->q_mutex); 95 + 96 + return 0; 97 + } 98 + 99 + int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q, 100 + struct pt_passthru_engine *pt_engine) 101 + { 102 + struct ptdma_desc desc; 103 + 104 + cmd_q->cmd_error = 0; 105 + memset(&desc, 0, sizeof(desc)); 106 + desc.dw0 = CMD_DESC_DW0_VAL; 107 + desc.length = pt_engine->src_len; 108 + desc.src_lo = lower_32_bits(pt_engine->src_dma); 109 + desc.dw3.src_hi = upper_32_bits(pt_engine->src_dma); 110 + desc.dst_lo = lower_32_bits(pt_engine->dst_dma); 111 + desc.dw5.dst_hi = upper_32_bits(pt_engine->dst_dma); 112 + 113 + return pt_core_execute_cmd(&desc, cmd_q); 114 + } 115 + 116 + static inline void pt_core_disable_queue_interrupts(struct pt_device *pt) 117 + { 118 + iowrite32(0, pt->cmd_q.reg_control + 0x000C); 119 + } 120 + 121 + static inline void pt_core_enable_queue_interrupts(struct pt_device *pt) 122 + { 123 + iowrite32(SUPPORTED_INTERRUPTS, pt->cmd_q.reg_control + 0x000C); 124 + } 125 + 126 + static irqreturn_t pt_core_irq_handler(int irq, void *data) 127 + { 128 + struct pt_device *pt = data; 129 + struct pt_cmd_queue *cmd_q = &pt->cmd_q; 130 + u32 status; 131 + 132 + pt_core_disable_queue_interrupts(pt); 133 + status = ioread32(cmd_q->reg_control + 0x0010); 134 + if (status) { 135 + cmd_q->int_status = status; 136 + cmd_q->q_status = ioread32(cmd_q->reg_control + 0x0100); 137 + cmd_q->q_int_status = ioread32(cmd_q->reg_control + 0x0104); 138 + 139 + /* On error, only save the first error value */ 140 + if ((status & INT_ERROR) && !cmd_q->cmd_error) 141 + cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); 142 + 143 + /* Acknowledge the interrupt */ 144 + iowrite32(status, cmd_q->reg_control + 0x0010); 145 + pt_core_enable_queue_interrupts(pt); 146 + } 147 + return IRQ_HANDLED; 148 + } 149 + 150 + int pt_core_init(struct pt_device *pt) 151 + { 152 + char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; 153 + struct pt_cmd_queue *cmd_q = &pt->cmd_q; 154 + u32 dma_addr_lo, dma_addr_hi; 155 + struct device *dev = pt->dev; 156 + struct dma_pool *dma_pool; 157 + int ret; 158 + 159 + /* Allocate a dma pool for the queue */ 160 + snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q", dev_name(pt->dev)); 161 + 162 + dma_pool = dma_pool_create(dma_pool_name, dev, 163 + PT_DMAPOOL_MAX_SIZE, 164 + PT_DMAPOOL_ALIGN, 0); 165 + if (!dma_pool) 166 + return -ENOMEM; 167 + 168 + /* ptdma core initialisation */ 169 + iowrite32(CMD_CONFIG_VHB_EN, pt->io_regs + CMD_CONFIG_OFFSET); 170 + iowrite32(CMD_QUEUE_PRIO, pt->io_regs + CMD_QUEUE_PRIO_OFFSET); 171 + iowrite32(CMD_TIMEOUT_DISABLE, pt->io_regs + CMD_TIMEOUT_OFFSET); 172 + iowrite32(CMD_CLK_GATE_CONFIG, pt->io_regs + CMD_CLK_GATE_CTL_OFFSET); 173 + iowrite32(CMD_CONFIG_REQID, pt->io_regs + CMD_REQID_CONFIG_OFFSET); 174 + 175 + cmd_q->pt = pt; 176 + cmd_q->dma_pool = dma_pool; 177 + mutex_init(&cmd_q->q_mutex); 178 + 179 + /* Page alignment satisfies our needs for N <= 128 */ 180 + cmd_q->qsize = Q_SIZE(Q_DESC_SIZE); 181 + cmd_q->qbase = dma_alloc_coherent(dev, cmd_q->qsize, 182 + &cmd_q->qbase_dma, 183 + GFP_KERNEL); 184 + if (!cmd_q->qbase) { 185 + dev_err(dev, "unable to allocate command queue\n"); 186 + ret = -ENOMEM; 187 + goto e_dma_alloc; 188 + } 189 + 190 + cmd_q->qidx = 0; 191 + 192 + /* Preset some register values */ 193 + cmd_q->reg_control = pt->io_regs + CMD_Q_STATUS_INCR; 194 + 195 + /* Turn off the queues and disable interrupts until ready */ 196 + pt_core_disable_queue_interrupts(pt); 197 + 198 + cmd_q->qcontrol = 0; /* Start with nothing */ 199 + iowrite32(cmd_q->qcontrol, cmd_q->reg_control); 200 + 201 + ioread32(cmd_q->reg_control + 0x0104); 202 + ioread32(cmd_q->reg_control + 0x0100); 203 + 204 + /* Clear the interrupt status */ 205 + iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010); 206 + 207 + /* Request an irq */ 208 + ret = request_irq(pt->pt_irq, pt_core_irq_handler, 0, dev_name(pt->dev), pt); 209 + if (ret) 210 + goto e_pool; 211 + 212 + /* Update the device registers with queue information. */ 213 + cmd_q->qcontrol &= ~CMD_Q_SIZE; 214 + cmd_q->qcontrol |= FIELD_PREP(CMD_Q_SIZE, QUEUE_SIZE_VAL); 215 + 216 + cmd_q->qdma_tail = cmd_q->qbase_dma; 217 + dma_addr_lo = lower_32_bits(cmd_q->qdma_tail); 218 + iowrite32((u32)dma_addr_lo, cmd_q->reg_control + 0x0004); 219 + iowrite32((u32)dma_addr_lo, cmd_q->reg_control + 0x0008); 220 + 221 + dma_addr_hi = upper_32_bits(cmd_q->qdma_tail); 222 + cmd_q->qcontrol |= (dma_addr_hi << 16); 223 + iowrite32(cmd_q->qcontrol, cmd_q->reg_control); 224 + 225 + pt_core_enable_queue_interrupts(pt); 226 + 227 + return 0; 228 + 229 + e_dma_alloc: 230 + dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase, cmd_q->qbase_dma); 231 + 232 + e_pool: 233 + dev_err(dev, "unable to allocate an IRQ\n"); 234 + dma_pool_destroy(pt->cmd_q.dma_pool); 235 + 236 + return ret; 237 + } 238 + 239 + void pt_core_destroy(struct pt_device *pt) 240 + { 241 + struct device *dev = pt->dev; 242 + struct pt_cmd_queue *cmd_q = &pt->cmd_q; 243 + struct pt_cmd *cmd; 244 + 245 + /* Disable and clear interrupts */ 246 + pt_core_disable_queue_interrupts(pt); 247 + 248 + /* Turn off the run bit */ 249 + pt_stop_queue(cmd_q); 250 + 251 + /* Clear the interrupt status */ 252 + iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010); 253 + ioread32(cmd_q->reg_control + 0x0104); 254 + ioread32(cmd_q->reg_control + 0x0100); 255 + 256 + free_irq(pt->pt_irq, pt); 257 + 258 + dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase, 259 + cmd_q->qbase_dma); 260 + 261 + /* Flush the cmd queue */ 262 + while (!list_empty(&pt->cmd)) { 263 + /* Invoke the callback directly with an error code */ 264 + cmd = list_first_entry(&pt->cmd, struct pt_cmd, entry); 265 + list_del(&cmd->entry); 266 + cmd->pt_cmd_callback(cmd->data, -ENODEV); 267 + } 268 + }
+243
drivers/dma/ptdma/ptdma-pci.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * AMD Passthru DMA device driver 4 + * -- Based on the CCP driver 5 + * 6 + * Copyright (C) 2016,2021 Advanced Micro Devices, Inc. 7 + * 8 + * Author: Sanjay R Mehta <sanju.mehta@amd.com> 9 + * Author: Tom Lendacky <thomas.lendacky@amd.com> 10 + * Author: Gary R Hook <gary.hook@amd.com> 11 + */ 12 + 13 + #include <linux/device.h> 14 + #include <linux/dma-mapping.h> 15 + #include <linux/delay.h> 16 + #include <linux/interrupt.h> 17 + #include <linux/kernel.h> 18 + #include <linux/kthread.h> 19 + #include <linux/module.h> 20 + #include <linux/pci_ids.h> 21 + #include <linux/pci.h> 22 + #include <linux/spinlock.h> 23 + 24 + #include "ptdma.h" 25 + 26 + struct pt_msix { 27 + int msix_count; 28 + struct msix_entry msix_entry; 29 + }; 30 + 31 + /* 32 + * pt_alloc_struct - allocate and initialize the pt_device struct 33 + * 34 + * @dev: device struct of the PTDMA 35 + */ 36 + static struct pt_device *pt_alloc_struct(struct device *dev) 37 + { 38 + struct pt_device *pt; 39 + 40 + pt = devm_kzalloc(dev, sizeof(*pt), GFP_KERNEL); 41 + 42 + if (!pt) 43 + return NULL; 44 + pt->dev = dev; 45 + 46 + INIT_LIST_HEAD(&pt->cmd); 47 + 48 + return pt; 49 + } 50 + 51 + static int pt_get_msix_irqs(struct pt_device *pt) 52 + { 53 + struct pt_msix *pt_msix = pt->pt_msix; 54 + struct device *dev = pt->dev; 55 + struct pci_dev *pdev = to_pci_dev(dev); 56 + int ret; 57 + 58 + pt_msix->msix_entry.entry = 0; 59 + 60 + ret = pci_enable_msix_range(pdev, &pt_msix->msix_entry, 1, 1); 61 + if (ret < 0) 62 + return ret; 63 + 64 + pt_msix->msix_count = ret; 65 + 66 + pt->pt_irq = pt_msix->msix_entry.vector; 67 + 68 + return 0; 69 + } 70 + 71 + static int pt_get_msi_irq(struct pt_device *pt) 72 + { 73 + struct device *dev = pt->dev; 74 + struct pci_dev *pdev = to_pci_dev(dev); 75 + int ret; 76 + 77 + ret = pci_enable_msi(pdev); 78 + if (ret) 79 + return ret; 80 + 81 + pt->pt_irq = pdev->irq; 82 + 83 + return 0; 84 + } 85 + 86 + static int pt_get_irqs(struct pt_device *pt) 87 + { 88 + struct device *dev = pt->dev; 89 + int ret; 90 + 91 + ret = pt_get_msix_irqs(pt); 92 + if (!ret) 93 + return 0; 94 + 95 + /* Couldn't get MSI-X vectors, try MSI */ 96 + dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret); 97 + ret = pt_get_msi_irq(pt); 98 + if (!ret) 99 + return 0; 100 + 101 + /* Couldn't get MSI interrupt */ 102 + dev_err(dev, "could not enable MSI (%d)\n", ret); 103 + 104 + return ret; 105 + } 106 + 107 + static void pt_free_irqs(struct pt_device *pt) 108 + { 109 + struct pt_msix *pt_msix = pt->pt_msix; 110 + struct device *dev = pt->dev; 111 + struct pci_dev *pdev = to_pci_dev(dev); 112 + 113 + if (pt_msix->msix_count) 114 + pci_disable_msix(pdev); 115 + else if (pt->pt_irq) 116 + pci_disable_msi(pdev); 117 + 118 + pt->pt_irq = 0; 119 + } 120 + 121 + static int pt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 122 + { 123 + struct pt_device *pt; 124 + struct pt_msix *pt_msix; 125 + struct device *dev = &pdev->dev; 126 + void __iomem * const *iomap_table; 127 + int bar_mask; 128 + int ret = -ENOMEM; 129 + 130 + pt = pt_alloc_struct(dev); 131 + if (!pt) 132 + goto e_err; 133 + 134 + pt_msix = devm_kzalloc(dev, sizeof(*pt_msix), GFP_KERNEL); 135 + if (!pt_msix) 136 + goto e_err; 137 + 138 + pt->pt_msix = pt_msix; 139 + pt->dev_vdata = (struct pt_dev_vdata *)id->driver_data; 140 + if (!pt->dev_vdata) { 141 + ret = -ENODEV; 142 + dev_err(dev, "missing driver data\n"); 143 + goto e_err; 144 + } 145 + 146 + ret = pcim_enable_device(pdev); 147 + if (ret) { 148 + dev_err(dev, "pcim_enable_device failed (%d)\n", ret); 149 + goto e_err; 150 + } 151 + 152 + bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); 153 + ret = pcim_iomap_regions(pdev, bar_mask, "ptdma"); 154 + if (ret) { 155 + dev_err(dev, "pcim_iomap_regions failed (%d)\n", ret); 156 + goto e_err; 157 + } 158 + 159 + iomap_table = pcim_iomap_table(pdev); 160 + if (!iomap_table) { 161 + dev_err(dev, "pcim_iomap_table failed\n"); 162 + ret = -ENOMEM; 163 + goto e_err; 164 + } 165 + 166 + pt->io_regs = iomap_table[pt->dev_vdata->bar]; 167 + if (!pt->io_regs) { 168 + dev_err(dev, "ioremap failed\n"); 169 + ret = -ENOMEM; 170 + goto e_err; 171 + } 172 + 173 + ret = pt_get_irqs(pt); 174 + if (ret) 175 + goto e_err; 176 + 177 + pci_set_master(pdev); 178 + 179 + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); 180 + if (ret) { 181 + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); 182 + if (ret) { 183 + dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", 184 + ret); 185 + goto e_err; 186 + } 187 + } 188 + 189 + dev_set_drvdata(dev, pt); 190 + 191 + if (pt->dev_vdata) 192 + ret = pt_core_init(pt); 193 + 194 + if (ret) 195 + goto e_err; 196 + 197 + return 0; 198 + 199 + e_err: 200 + dev_err(dev, "initialization failed ret = %d\n", ret); 201 + 202 + return ret; 203 + } 204 + 205 + static void pt_pci_remove(struct pci_dev *pdev) 206 + { 207 + struct device *dev = &pdev->dev; 208 + struct pt_device *pt = dev_get_drvdata(dev); 209 + 210 + if (!pt) 211 + return; 212 + 213 + if (pt->dev_vdata) 214 + pt_core_destroy(pt); 215 + 216 + pt_free_irqs(pt); 217 + } 218 + 219 + static const struct pt_dev_vdata dev_vdata[] = { 220 + { 221 + .bar = 2, 222 + }, 223 + }; 224 + 225 + static const struct pci_device_id pt_pci_table[] = { 226 + { PCI_VDEVICE(AMD, 0x1498), (kernel_ulong_t)&dev_vdata[0] }, 227 + /* Last entry must be zero */ 228 + { 0, } 229 + }; 230 + MODULE_DEVICE_TABLE(pci, pt_pci_table); 231 + 232 + static struct pci_driver pt_pci_driver = { 233 + .name = "ptdma", 234 + .id_table = pt_pci_table, 235 + .probe = pt_pci_probe, 236 + .remove = pt_pci_remove, 237 + }; 238 + 239 + module_pci_driver(pt_pci_driver); 240 + 241 + MODULE_AUTHOR("Sanjay R Mehta <sanju.mehta@amd.com>"); 242 + MODULE_LICENSE("GPL"); 243 + MODULE_DESCRIPTION("AMD PassThru DMA driver");
+293
drivers/dma/ptdma/ptdma.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * AMD Passthru DMA device driver 4 + * -- Based on the CCP driver 5 + * 6 + * Copyright (C) 2016,2021 Advanced Micro Devices, Inc. 7 + * 8 + * Author: Sanjay R Mehta <sanju.mehta@amd.com> 9 + * Author: Tom Lendacky <thomas.lendacky@amd.com> 10 + * Author: Gary R Hook <gary.hook@amd.com> 11 + */ 12 + 13 + #ifndef __PT_DEV_H__ 14 + #define __PT_DEV_H__ 15 + 16 + #include <linux/device.h> 17 + #include <linux/pci.h> 18 + #include <linux/spinlock.h> 19 + #include <linux/mutex.h> 20 + #include <linux/list.h> 21 + #include <linux/wait.h> 22 + #include <linux/dmapool.h> 23 + 24 + #define MAX_PT_NAME_LEN 16 25 + #define MAX_DMAPOOL_NAME_LEN 32 26 + 27 + #define MAX_HW_QUEUES 1 28 + #define MAX_CMD_QLEN 100 29 + 30 + #define PT_ENGINE_PASSTHRU 5 31 + #define PT_OFFSET 0x0 32 + 33 + /* Register Mappings */ 34 + #define IRQ_MASK_REG 0x040 35 + #define IRQ_STATUS_REG 0x200 36 + 37 + #define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f) 38 + 39 + #define CMD_QUEUE_PRIO_OFFSET 0x00 40 + #define CMD_REQID_CONFIG_OFFSET 0x04 41 + #define CMD_TIMEOUT_OFFSET 0x08 42 + #define CMD_PT_VERSION 0x10 43 + 44 + #define CMD_Q_CONTROL_BASE 0x0000 45 + #define CMD_Q_TAIL_LO_BASE 0x0004 46 + #define CMD_Q_HEAD_LO_BASE 0x0008 47 + #define CMD_Q_INT_ENABLE_BASE 0x000C 48 + #define CMD_Q_INTERRUPT_STATUS_BASE 0x0010 49 + 50 + #define CMD_Q_STATUS_BASE 0x0100 51 + #define CMD_Q_INT_STATUS_BASE 0x0104 52 + #define CMD_Q_DMA_STATUS_BASE 0x0108 53 + #define CMD_Q_DMA_READ_STATUS_BASE 0x010C 54 + #define CMD_Q_DMA_WRITE_STATUS_BASE 0x0110 55 + #define CMD_Q_ABORT_BASE 0x0114 56 + #define CMD_Q_AX_CACHE_BASE 0x0118 57 + 58 + #define CMD_CONFIG_OFFSET 0x1120 59 + #define CMD_CLK_GATE_CTL_OFFSET 0x6004 60 + 61 + #define CMD_DESC_DW0_VAL 0x500012 62 + 63 + /* Address offset for virtual queue registers */ 64 + #define CMD_Q_STATUS_INCR 0x1000 65 + 66 + /* Bit masks */ 67 + #define CMD_CONFIG_REQID 0 68 + #define CMD_TIMEOUT_DISABLE 0 69 + #define CMD_CLK_DYN_GATING_DIS 0 70 + #define CMD_CLK_SW_GATE_MODE 0 71 + #define CMD_CLK_GATE_CTL 0 72 + #define CMD_QUEUE_PRIO GENMASK(2, 1) 73 + #define CMD_CONFIG_VHB_EN BIT(0) 74 + #define CMD_CLK_DYN_GATING_EN BIT(0) 75 + #define CMD_CLK_HW_GATE_MODE BIT(0) 76 + #define CMD_CLK_GATE_ON_DELAY BIT(12) 77 + #define CMD_CLK_GATE_OFF_DELAY BIT(12) 78 + 79 + #define CMD_CLK_GATE_CONFIG (CMD_CLK_GATE_CTL | \ 80 + CMD_CLK_HW_GATE_MODE | \ 81 + CMD_CLK_GATE_ON_DELAY | \ 82 + CMD_CLK_DYN_GATING_EN | \ 83 + CMD_CLK_GATE_OFF_DELAY) 84 + 85 + #define CMD_Q_LEN 32 86 + #define CMD_Q_RUN BIT(0) 87 + #define CMD_Q_HALT BIT(1) 88 + #define CMD_Q_MEM_LOCATION BIT(2) 89 + #define CMD_Q_SIZE_MASK GENMASK(4, 0) 90 + #define CMD_Q_SIZE GENMASK(7, 3) 91 + #define CMD_Q_SHIFT GENMASK(1, 0) 92 + #define QUEUE_SIZE_VAL ((ffs(CMD_Q_LEN) - 2) & \ 93 + CMD_Q_SIZE_MASK) 94 + #define Q_PTR_MASK (2 << (QUEUE_SIZE_VAL + 5) - 1) 95 + #define Q_DESC_SIZE sizeof(struct ptdma_desc) 96 + #define Q_SIZE(n) (CMD_Q_LEN * (n)) 97 + 98 + #define INT_COMPLETION BIT(0) 99 + #define INT_ERROR BIT(1) 100 + #define INT_QUEUE_STOPPED BIT(2) 101 + #define INT_EMPTY_QUEUE BIT(3) 102 + #define SUPPORTED_INTERRUPTS (INT_COMPLETION | INT_ERROR) 103 + 104 + /****** Local Storage Block ******/ 105 + #define LSB_START 0 106 + #define LSB_END 127 107 + #define LSB_COUNT (LSB_END - LSB_START + 1) 108 + 109 + #define PT_DMAPOOL_MAX_SIZE 64 110 + #define PT_DMAPOOL_ALIGN BIT(5) 111 + 112 + #define PT_PASSTHRU_BLOCKSIZE 512 113 + 114 + struct pt_device; 115 + 116 + struct pt_tasklet_data { 117 + struct completion completion; 118 + struct pt_cmd *cmd; 119 + }; 120 + 121 + /* 122 + * struct pt_passthru_engine - pass-through operation 123 + * without performing DMA mapping 124 + * @mask: mask to be applied to data 125 + * @mask_len: length in bytes of mask 126 + * @src_dma: data to be used for this operation 127 + * @dst_dma: data produced by this operation 128 + * @src_len: length in bytes of data used for this operation 129 + * 130 + * Variables required to be set when calling pt_enqueue_cmd(): 131 + * - bit_mod, byte_swap, src, dst, src_len 132 + * - mask, mask_len if bit_mod is not PT_PASSTHRU_BITWISE_NOOP 133 + */ 134 + struct pt_passthru_engine { 135 + dma_addr_t mask; 136 + u32 mask_len; /* In bytes */ 137 + 138 + dma_addr_t src_dma, dst_dma; 139 + u64 src_len; /* In bytes */ 140 + }; 141 + 142 + /* 143 + * struct pt_cmd - PTDMA operation request 144 + * @entry: list element 145 + * @work: work element used for callbacks 146 + * @pt: PT device to be run on 147 + * @ret: operation return code 148 + * @flags: cmd processing flags 149 + * @engine: PTDMA operation to perform (passthru) 150 + * @engine_error: PT engine return code 151 + * @passthru: engine specific structures, refer to specific engine struct below 152 + * @callback: operation completion callback function 153 + * @data: parameter value to be supplied to the callback function 154 + * 155 + * Variables required to be set when calling pt_enqueue_cmd(): 156 + * - engine, callback 157 + * - See the operation structures below for what is required for each 158 + * operation. 159 + */ 160 + struct pt_cmd { 161 + struct list_head entry; 162 + struct work_struct work; 163 + struct pt_device *pt; 164 + int ret; 165 + u32 engine; 166 + u32 engine_error; 167 + struct pt_passthru_engine passthru; 168 + /* Completion callback support */ 169 + void (*pt_cmd_callback)(void *data, int err); 170 + void *data; 171 + }; 172 + 173 + struct pt_cmd_queue { 174 + struct pt_device *pt; 175 + 176 + /* Queue dma pool */ 177 + struct dma_pool *dma_pool; 178 + 179 + /* Queue base address (not neccessarily aligned)*/ 180 + struct ptdma_desc *qbase; 181 + 182 + /* Aligned queue start address (per requirement) */ 183 + struct mutex q_mutex ____cacheline_aligned; 184 + unsigned int qidx; 185 + 186 + unsigned int qsize; 187 + dma_addr_t qbase_dma; 188 + dma_addr_t qdma_tail; 189 + 190 + unsigned int active; 191 + unsigned int suspended; 192 + 193 + /* Register addresses for queue */ 194 + void __iomem *reg_control; 195 + u32 qcontrol; /* Cached control register */ 196 + 197 + /* Status values from job */ 198 + u32 int_status; 199 + u32 q_status; 200 + u32 q_int_status; 201 + u32 cmd_error; 202 + } ____cacheline_aligned; 203 + 204 + struct pt_device { 205 + struct list_head entry; 206 + 207 + unsigned int ord; 208 + char name[MAX_PT_NAME_LEN]; 209 + 210 + struct device *dev; 211 + 212 + /* Bus specific device information */ 213 + struct pt_msix *pt_msix; 214 + 215 + struct pt_dev_vdata *dev_vdata; 216 + 217 + unsigned int pt_irq; 218 + 219 + /* I/O area used for device communication */ 220 + void __iomem *io_regs; 221 + 222 + spinlock_t cmd_lock ____cacheline_aligned; 223 + unsigned int cmd_count; 224 + struct list_head cmd; 225 + 226 + /* 227 + * The command queue. This represent the queue available on the 228 + * PTDMA that are available for processing cmds 229 + */ 230 + struct pt_cmd_queue cmd_q; 231 + 232 + wait_queue_head_t lsb_queue; 233 + 234 + struct pt_tasklet_data tdata; 235 + }; 236 + 237 + /* 238 + * descriptor for PTDMA commands 239 + * 8 32-bit words: 240 + * word 0: function; engine; control bits 241 + * word 1: length of source data 242 + * word 2: low 32 bits of source pointer 243 + * word 3: upper 16 bits of source pointer; source memory type 244 + * word 4: low 32 bits of destination pointer 245 + * word 5: upper 16 bits of destination pointer; destination memory type 246 + * word 6: reserved 32 bits 247 + * word 7: reserved 32 bits 248 + */ 249 + 250 + #define DWORD0_SOC BIT(0) 251 + #define DWORD0_IOC BIT(1) 252 + 253 + struct dword3 { 254 + unsigned int src_hi:16; 255 + unsigned int src_mem:2; 256 + unsigned int lsb_cxt_id:8; 257 + unsigned int rsvd1:5; 258 + unsigned int fixed:1; 259 + }; 260 + 261 + struct dword5 { 262 + unsigned int dst_hi:16; 263 + unsigned int dst_mem:2; 264 + unsigned int rsvd1:13; 265 + unsigned int fixed:1; 266 + }; 267 + 268 + struct ptdma_desc { 269 + u32 dw0; 270 + u32 length; 271 + u32 src_lo; 272 + struct dword3 dw3; 273 + u32 dst_lo; 274 + struct dword5 dw5; 275 + __le32 rsvd1; 276 + __le32 rsvd2; 277 + }; 278 + 279 + /* Structure to hold PT device data */ 280 + struct pt_dev_vdata { 281 + const unsigned int bar; 282 + }; 283 + 284 + int pt_core_init(struct pt_device *pt); 285 + void pt_core_destroy(struct pt_device *pt); 286 + 287 + int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q, 288 + struct pt_passthru_engine *pt_engine); 289 + 290 + void pt_start_queue(struct pt_cmd_queue *cmd_q); 291 + void pt_stop_queue(struct pt_cmd_queue *cmd_q); 292 + 293 + #endif