Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull second round of rdma updates from Doug Ledford:
"This can be split out into just two categories:

- fixes to the RDMA R/W API in regards to SG list length limits
(about 5 patches)

- fixes/features for the Intel hfi1 driver (everything else)

The hfi1 driver is still being brought to full feature support by
Intel, and they have a lot of people working on it, so that amounts to
almost the entirety of this pull request"

* tag 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (84 commits)
IB/hfi1: Add cache evict LRU list
IB/hfi1: Fix memory leak during unexpected shutdown
IB/hfi1: Remove unneeded mm argument in remove function
IB/hfi1: Consistently call ops->remove outside spinlock
IB/hfi1: Use evict mmu rb operation
IB/hfi1: Add evict operation to the mmu rb handler
IB/hfi1: Fix TID caching actions
IB/hfi1: Make the cache handler own its rb tree root
IB/hfi1: Make use of mm consistent
IB/hfi1: Fix user SDMA racy user request claim
IB/hfi1: Fix error condition that needs to clean up
IB/hfi1: Release node on insert failure
IB/hfi1: Validate SDMA user iovector count
IB/hfi1: Validate SDMA user request index
IB/hfi1: Use the same capability state for all shared contexts
IB/hfi1: Prevent null pointer dereference
IB/hfi1: Rename TID mmu_rb_* functions
IB/hfi1: Remove unneeded empty check in hfi1_mmu_rb_unregister()
IB/hfi1: Restructure hfi1_file_open
IB/hfi1: Make iovec loop index easy to understand
...

+4334 -3023
+11 -13
drivers/infiniband/core/rw.c
··· 58 58 return false; 59 59 } 60 60 61 - static inline u32 rdma_rw_max_sge(struct ib_device *dev, 62 - enum dma_data_direction dir) 63 - { 64 - return dir == DMA_TO_DEVICE ? 65 - dev->attrs.max_sge : dev->attrs.max_sge_rd; 66 - } 67 - 68 61 static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev) 69 62 { 70 63 /* arbitrary limit to avoid allocating gigantic resources */ 71 64 return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256); 72 65 } 73 66 67 + /* Caller must have zero-initialized *reg. */ 74 68 static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, 75 69 struct rdma_rw_reg_ctx *reg, struct scatterlist *sg, 76 70 u32 sg_cnt, u32 offset) ··· 108 114 u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset, 109 115 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 110 116 { 117 + struct rdma_rw_reg_ctx *prev = NULL; 111 118 u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); 112 119 int i, j, ret = 0, count = 0; 113 120 ··· 120 125 } 121 126 122 127 for (i = 0; i < ctx->nr_ops; i++) { 123 - struct rdma_rw_reg_ctx *prev = i ? &ctx->reg[i - 1] : NULL; 124 128 struct rdma_rw_reg_ctx *reg = &ctx->reg[i]; 125 129 u32 nents = min(sg_cnt, pages_per_mr); 126 130 ··· 156 162 sg_cnt -= nents; 157 163 for (j = 0; j < nents; j++) 158 164 sg = sg_next(sg); 165 + prev = reg; 159 166 offset = 0; 160 167 } 168 + 169 + if (prev) 170 + prev->wr.wr.next = NULL; 161 171 162 172 ctx->type = RDMA_RW_MR; 163 173 return count; ··· 179 181 u64 remote_addr, u32 rkey, enum dma_data_direction dir) 180 182 { 181 183 struct ib_device *dev = qp->pd->device; 182 - u32 max_sge = rdma_rw_max_sge(dev, dir); 184 + u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge : 185 + qp->max_read_sge; 183 186 struct ib_sge *sge; 184 187 u32 total_len = 0, i, j; 185 188 ··· 204 205 rdma_wr->wr.opcode = IB_WR_RDMA_READ; 205 206 rdma_wr->remote_addr = remote_addr + total_len; 206 207 rdma_wr->rkey = rkey; 208 + rdma_wr->wr.num_sge = nr_sge; 207 209 rdma_wr->wr.sg_list = sge; 208 210 209 211 for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) { 210 - rdma_wr->wr.num_sge++; 211 - 212 212 sge->addr = ib_sg_dma_address(dev, sg) + offset; 213 213 sge->length = ib_sg_dma_len(dev, sg) - offset; 214 214 sge->lkey = qp->pd->local_dma_lkey; ··· 218 220 offset = 0; 219 221 } 220 222 221 - if (i + 1 < ctx->nr_ops) 222 - rdma_wr->wr.next = &ctx->map.wrs[i + 1].wr; 223 + rdma_wr->wr.next = i + 1 < ctx->nr_ops ? 224 + &ctx->map.wrs[i + 1].wr : NULL; 223 225 } 224 226 225 227 ctx->type = RDMA_RW_MULTI_WR;
+9
drivers/infiniband/core/verbs.c
··· 825 825 } 826 826 } 827 827 828 + /* 829 + * Note: all hw drivers guarantee that max_send_sge is lower than 830 + * the device RDMA WRITE SGE limit but not all hw drivers ensure that 831 + * max_send_sge <= max_sge_rd. 832 + */ 833 + qp->max_write_sge = qp_init_attr->cap.max_send_sge; 834 + qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge, 835 + device->attrs.max_sge_rd); 836 + 828 837 return qp; 829 838 } 830 839 EXPORT_SYMBOL(ib_create_qp);
+2 -1
drivers/infiniband/hw/hfi1/Kconfig
··· 1 1 config INFINIBAND_HFI1 2 2 tristate "Intel OPA Gen1 support" 3 - depends on X86_64 && INFINIBAND_RDMAVT 3 + depends on X86_64 && INFINIBAND_RDMAVT && I2C 4 4 select MMU_NOTIFIER 5 5 select CRC32 6 + select I2C_ALGOBIT 6 7 ---help--- 7 8 This is a low-level driver for Intel OPA Gen1 adapter. 8 9 config HFI1_DEBUG_SDMA_ORDER
+1 -1
drivers/infiniband/hw/hfi1/Makefile
··· 10 10 hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ 11 11 eprom.o file_ops.o firmware.o \ 12 12 init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ 13 - qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \ 13 + qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ 14 14 uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ 15 15 verbs_txreq.o 16 16 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
+439 -124
drivers/infiniband/hw/hfi1/affinity.c
··· 47 47 #include <linux/topology.h> 48 48 #include <linux/cpumask.h> 49 49 #include <linux/module.h> 50 + #include <linux/cpumask.h> 50 51 51 52 #include "hfi.h" 52 53 #include "affinity.h" 53 54 #include "sdma.h" 54 55 #include "trace.h" 56 + 57 + struct hfi1_affinity_node_list node_affinity = { 58 + .list = LIST_HEAD_INIT(node_affinity.list), 59 + .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock), 60 + }; 55 61 56 62 /* Name of IRQ types, indexed by enum irq_type */ 57 63 static const char * const irq_type_names[] = { ··· 67 61 "OTHER", 68 62 }; 69 63 64 + /* Per NUMA node count of HFI devices */ 65 + static unsigned int *hfi1_per_node_cntr; 66 + 70 67 static inline void init_cpu_mask_set(struct cpu_mask_set *set) 71 68 { 72 69 cpumask_clear(&set->mask); ··· 78 69 } 79 70 80 71 /* Initialize non-HT cpu cores mask */ 81 - int init_real_cpu_mask(struct hfi1_devdata *dd) 72 + void init_real_cpu_mask(void) 82 73 { 83 - struct hfi1_affinity *info; 84 74 int possible, curr_cpu, i, ht; 85 75 86 - info = kzalloc(sizeof(*info), GFP_KERNEL); 87 - if (!info) 88 - return -ENOMEM; 89 - 90 - cpumask_clear(&info->real_cpu_mask); 76 + cpumask_clear(&node_affinity.real_cpu_mask); 91 77 92 78 /* Start with cpu online mask as the real cpu mask */ 93 - cpumask_copy(&info->real_cpu_mask, cpu_online_mask); 79 + cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask); 94 80 95 81 /* 96 82 * Remove HT cores from the real cpu mask. Do this in two steps below. 97 83 */ 98 - possible = cpumask_weight(&info->real_cpu_mask); 84 + possible = cpumask_weight(&node_affinity.real_cpu_mask); 99 85 ht = cpumask_weight(topology_sibling_cpumask( 100 - cpumask_first(&info->real_cpu_mask))); 86 + cpumask_first(&node_affinity.real_cpu_mask))); 101 87 /* 102 88 * Step 1. Skip over the first N HT siblings and use them as the 103 89 * "real" cores. Assumes that HT cores are not enumerated in 104 90 * succession (except in the single core case). 105 91 */ 106 - curr_cpu = cpumask_first(&info->real_cpu_mask); 92 + curr_cpu = cpumask_first(&node_affinity.real_cpu_mask); 107 93 for (i = 0; i < possible / ht; i++) 108 - curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask); 94 + curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); 109 95 /* 110 96 * Step 2. Remove the remaining HT siblings. Use cpumask_next() to 111 97 * skip any gaps. 112 98 */ 113 99 for (; i < possible; i++) { 114 - cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask); 115 - curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask); 100 + cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask); 101 + curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); 102 + } 103 + } 104 + 105 + int node_affinity_init(void) 106 + { 107 + int node; 108 + struct pci_dev *dev = NULL; 109 + const struct pci_device_id *ids = hfi1_pci_tbl; 110 + 111 + cpumask_clear(&node_affinity.proc.used); 112 + cpumask_copy(&node_affinity.proc.mask, cpu_online_mask); 113 + 114 + node_affinity.proc.gen = 0; 115 + node_affinity.num_core_siblings = 116 + cpumask_weight(topology_sibling_cpumask( 117 + cpumask_first(&node_affinity.proc.mask) 118 + )); 119 + node_affinity.num_online_nodes = num_online_nodes(); 120 + node_affinity.num_online_cpus = num_online_cpus(); 121 + 122 + /* 123 + * The real cpu mask is part of the affinity struct but it has to be 124 + * initialized early. It is needed to calculate the number of user 125 + * contexts in set_up_context_variables(). 126 + */ 127 + init_real_cpu_mask(); 128 + 129 + hfi1_per_node_cntr = kcalloc(num_possible_nodes(), 130 + sizeof(*hfi1_per_node_cntr), GFP_KERNEL); 131 + if (!hfi1_per_node_cntr) 132 + return -ENOMEM; 133 + 134 + while (ids->vendor) { 135 + dev = NULL; 136 + while ((dev = pci_get_device(ids->vendor, ids->device, dev))) { 137 + node = pcibus_to_node(dev->bus); 138 + if (node < 0) 139 + node = numa_node_id(); 140 + 141 + hfi1_per_node_cntr[node]++; 142 + } 143 + ids++; 116 144 } 117 145 118 - dd->affinity = info; 119 146 return 0; 147 + } 148 + 149 + void node_affinity_destroy(void) 150 + { 151 + struct list_head *pos, *q; 152 + struct hfi1_affinity_node *entry; 153 + 154 + spin_lock(&node_affinity.lock); 155 + list_for_each_safe(pos, q, &node_affinity.list) { 156 + entry = list_entry(pos, struct hfi1_affinity_node, 157 + list); 158 + list_del(pos); 159 + kfree(entry); 160 + } 161 + spin_unlock(&node_affinity.lock); 162 + kfree(hfi1_per_node_cntr); 163 + } 164 + 165 + static struct hfi1_affinity_node *node_affinity_allocate(int node) 166 + { 167 + struct hfi1_affinity_node *entry; 168 + 169 + entry = kzalloc(sizeof(*entry), GFP_KERNEL); 170 + if (!entry) 171 + return NULL; 172 + entry->node = node; 173 + INIT_LIST_HEAD(&entry->list); 174 + 175 + return entry; 176 + } 177 + 178 + /* 179 + * It appends an entry to the list. 180 + * It *must* be called with node_affinity.lock held. 181 + */ 182 + static void node_affinity_add_tail(struct hfi1_affinity_node *entry) 183 + { 184 + list_add_tail(&entry->list, &node_affinity.list); 185 + } 186 + 187 + /* It must be called with node_affinity.lock held */ 188 + static struct hfi1_affinity_node *node_affinity_lookup(int node) 189 + { 190 + struct list_head *pos; 191 + struct hfi1_affinity_node *entry; 192 + 193 + list_for_each(pos, &node_affinity.list) { 194 + entry = list_entry(pos, struct hfi1_affinity_node, list); 195 + if (entry->node == node) 196 + return entry; 197 + } 198 + 199 + return NULL; 120 200 } 121 201 122 202 /* ··· 219 121 * to the node relative 1 as necessary. 220 122 * 221 123 */ 222 - void hfi1_dev_affinity_init(struct hfi1_devdata *dd) 124 + int hfi1_dev_affinity_init(struct hfi1_devdata *dd) 223 125 { 224 126 int node = pcibus_to_node(dd->pcidev->bus); 225 - struct hfi1_affinity *info = dd->affinity; 127 + struct hfi1_affinity_node *entry; 226 128 const struct cpumask *local_mask; 227 129 int curr_cpu, possible, i; 228 130 ··· 230 132 node = numa_node_id(); 231 133 dd->node = node; 232 134 233 - spin_lock_init(&info->lock); 234 - 235 - init_cpu_mask_set(&info->def_intr); 236 - init_cpu_mask_set(&info->rcv_intr); 237 - init_cpu_mask_set(&info->proc); 238 - 239 135 local_mask = cpumask_of_node(dd->node); 240 136 if (cpumask_first(local_mask) >= nr_cpu_ids) 241 137 local_mask = topology_core_cpumask(0); 242 - /* Use the "real" cpu mask of this node as the default */ 243 - cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask); 244 138 245 - /* fill in the receive list */ 246 - possible = cpumask_weight(&info->def_intr.mask); 247 - curr_cpu = cpumask_first(&info->def_intr.mask); 248 - if (possible == 1) { 249 - /* only one CPU, everyone will use it */ 250 - cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask); 251 - } else { 252 - /* 253 - * Retain the first CPU in the default list for the control 254 - * context. 255 - */ 256 - curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); 257 - /* 258 - * Remove the remaining kernel receive queues from 259 - * the default list and add them to the receive list. 260 - */ 261 - for (i = 0; i < dd->n_krcv_queues - 1; i++) { 262 - cpumask_clear_cpu(curr_cpu, &info->def_intr.mask); 263 - cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask); 264 - curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); 265 - if (curr_cpu >= nr_cpu_ids) 266 - break; 139 + spin_lock(&node_affinity.lock); 140 + entry = node_affinity_lookup(dd->node); 141 + spin_unlock(&node_affinity.lock); 142 + 143 + /* 144 + * If this is the first time this NUMA node's affinity is used, 145 + * create an entry in the global affinity structure and initialize it. 146 + */ 147 + if (!entry) { 148 + entry = node_affinity_allocate(node); 149 + if (!entry) { 150 + dd_dev_err(dd, 151 + "Unable to allocate global affinity node\n"); 152 + return -ENOMEM; 267 153 } 154 + init_cpu_mask_set(&entry->def_intr); 155 + init_cpu_mask_set(&entry->rcv_intr); 156 + cpumask_clear(&entry->general_intr_mask); 157 + /* Use the "real" cpu mask of this node as the default */ 158 + cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask, 159 + local_mask); 160 + 161 + /* fill in the receive list */ 162 + possible = cpumask_weight(&entry->def_intr.mask); 163 + curr_cpu = cpumask_first(&entry->def_intr.mask); 164 + 165 + if (possible == 1) { 166 + /* only one CPU, everyone will use it */ 167 + cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask); 168 + cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); 169 + } else { 170 + /* 171 + * The general/control context will be the first CPU in 172 + * the default list, so it is removed from the default 173 + * list and added to the general interrupt list. 174 + */ 175 + cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask); 176 + cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); 177 + curr_cpu = cpumask_next(curr_cpu, 178 + &entry->def_intr.mask); 179 + 180 + /* 181 + * Remove the remaining kernel receive queues from 182 + * the default list and add them to the receive list. 183 + */ 184 + for (i = 0; 185 + i < (dd->n_krcv_queues - 1) * 186 + hfi1_per_node_cntr[dd->node]; 187 + i++) { 188 + cpumask_clear_cpu(curr_cpu, 189 + &entry->def_intr.mask); 190 + cpumask_set_cpu(curr_cpu, 191 + &entry->rcv_intr.mask); 192 + curr_cpu = cpumask_next(curr_cpu, 193 + &entry->def_intr.mask); 194 + if (curr_cpu >= nr_cpu_ids) 195 + break; 196 + } 197 + 198 + /* 199 + * If there ends up being 0 CPU cores leftover for SDMA 200 + * engines, use the same CPU cores as general/control 201 + * context. 202 + */ 203 + if (cpumask_weight(&entry->def_intr.mask) == 0) 204 + cpumask_copy(&entry->def_intr.mask, 205 + &entry->general_intr_mask); 206 + } 207 + 208 + spin_lock(&node_affinity.lock); 209 + node_affinity_add_tail(entry); 210 + spin_unlock(&node_affinity.lock); 268 211 } 269 212 270 - cpumask_copy(&info->proc.mask, cpu_online_mask); 271 - } 272 - 273 - void hfi1_dev_affinity_free(struct hfi1_devdata *dd) 274 - { 275 - kfree(dd->affinity); 213 + return 0; 276 214 } 277 215 278 216 int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) 279 217 { 280 218 int ret; 281 219 cpumask_var_t diff; 282 - struct cpu_mask_set *set; 220 + struct hfi1_affinity_node *entry; 221 + struct cpu_mask_set *set = NULL; 283 222 struct sdma_engine *sde = NULL; 284 223 struct hfi1_ctxtdata *rcd = NULL; 285 224 char extra[64]; ··· 329 194 if (!ret) 330 195 return -ENOMEM; 331 196 197 + spin_lock(&node_affinity.lock); 198 + entry = node_affinity_lookup(dd->node); 199 + spin_unlock(&node_affinity.lock); 200 + 332 201 switch (msix->type) { 333 202 case IRQ_SDMA: 334 203 sde = (struct sdma_engine *)msix->arg; 335 204 scnprintf(extra, 64, "engine %u", sde->this_idx); 336 - /* fall through */ 205 + set = &entry->def_intr; 206 + break; 337 207 case IRQ_GENERAL: 338 - set = &dd->affinity->def_intr; 208 + cpu = cpumask_first(&entry->general_intr_mask); 339 209 break; 340 210 case IRQ_RCVCTXT: 341 211 rcd = (struct hfi1_ctxtdata *)msix->arg; 342 - if (rcd->ctxt == HFI1_CTRL_CTXT) { 343 - set = &dd->affinity->def_intr; 344 - cpu = cpumask_first(&set->mask); 345 - } else { 346 - set = &dd->affinity->rcv_intr; 347 - } 212 + if (rcd->ctxt == HFI1_CTRL_CTXT) 213 + cpu = cpumask_first(&entry->general_intr_mask); 214 + else 215 + set = &entry->rcv_intr; 348 216 scnprintf(extra, 64, "ctxt %u", rcd->ctxt); 349 217 break; 350 218 default: ··· 356 218 } 357 219 358 220 /* 359 - * The control receive context is placed on a particular CPU, which 360 - * is set above. Skip accounting for it. Everything else finds its 361 - * CPU here. 221 + * The general and control contexts are placed on a particular 222 + * CPU, which is set above. Skip accounting for it. Everything else 223 + * finds its CPU here. 362 224 */ 363 - if (cpu == -1) { 364 - spin_lock(&dd->affinity->lock); 225 + if (cpu == -1 && set) { 226 + spin_lock(&node_affinity.lock); 365 227 if (cpumask_equal(&set->mask, &set->used)) { 366 228 /* 367 229 * We've used up all the CPUs, bump up the generation ··· 373 235 cpumask_andnot(diff, &set->mask, &set->used); 374 236 cpu = cpumask_first(diff); 375 237 cpumask_set_cpu(cpu, &set->used); 376 - spin_unlock(&dd->affinity->lock); 238 + spin_unlock(&node_affinity.lock); 377 239 } 378 240 379 241 switch (msix->type) { ··· 401 263 { 402 264 struct cpu_mask_set *set = NULL; 403 265 struct hfi1_ctxtdata *rcd; 266 + struct hfi1_affinity_node *entry; 267 + 268 + spin_lock(&node_affinity.lock); 269 + entry = node_affinity_lookup(dd->node); 270 + spin_unlock(&node_affinity.lock); 404 271 405 272 switch (msix->type) { 406 273 case IRQ_SDMA: 274 + set = &entry->def_intr; 275 + break; 407 276 case IRQ_GENERAL: 408 - set = &dd->affinity->def_intr; 277 + /* Don't do accounting for general contexts */ 409 278 break; 410 279 case IRQ_RCVCTXT: 411 280 rcd = (struct hfi1_ctxtdata *)msix->arg; 412 - /* only do accounting for non control contexts */ 281 + /* Don't do accounting for control contexts */ 413 282 if (rcd->ctxt != HFI1_CTRL_CTXT) 414 - set = &dd->affinity->rcv_intr; 283 + set = &entry->rcv_intr; 415 284 break; 416 285 default: 417 286 return; 418 287 } 419 288 420 289 if (set) { 421 - spin_lock(&dd->affinity->lock); 290 + spin_lock(&node_affinity.lock); 422 291 cpumask_andnot(&set->used, &set->used, &msix->mask); 423 292 if (cpumask_empty(&set->used) && set->gen) { 424 293 set->gen--; 425 294 cpumask_copy(&set->used, &set->mask); 426 295 } 427 - spin_unlock(&dd->affinity->lock); 296 + spin_unlock(&node_affinity.lock); 428 297 } 429 298 430 299 irq_set_affinity_hint(msix->msix.vector, NULL); 431 300 cpumask_clear(&msix->mask); 432 301 } 433 302 434 - int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) 303 + /* This should be called with node_affinity.lock held */ 304 + static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask, 305 + struct hfi1_affinity_node_list *affinity) 435 306 { 436 - int cpu = -1, ret; 437 - cpumask_var_t diff, mask, intrs; 307 + int possible, curr_cpu, i; 308 + uint num_cores_per_socket = node_affinity.num_online_cpus / 309 + affinity->num_core_siblings / 310 + node_affinity.num_online_nodes; 311 + 312 + cpumask_copy(hw_thread_mask, &affinity->proc.mask); 313 + if (affinity->num_core_siblings > 0) { 314 + /* Removing other siblings not needed for now */ 315 + possible = cpumask_weight(hw_thread_mask); 316 + curr_cpu = cpumask_first(hw_thread_mask); 317 + for (i = 0; 318 + i < num_cores_per_socket * node_affinity.num_online_nodes; 319 + i++) 320 + curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); 321 + 322 + for (; i < possible; i++) { 323 + cpumask_clear_cpu(curr_cpu, hw_thread_mask); 324 + curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); 325 + } 326 + 327 + /* Identifying correct HW threads within physical cores */ 328 + cpumask_shift_left(hw_thread_mask, hw_thread_mask, 329 + num_cores_per_socket * 330 + node_affinity.num_online_nodes * 331 + hw_thread_no); 332 + } 333 + } 334 + 335 + int hfi1_get_proc_affinity(int node) 336 + { 337 + int cpu = -1, ret, i; 338 + struct hfi1_affinity_node *entry; 339 + cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; 438 340 const struct cpumask *node_mask, 439 341 *proc_mask = tsk_cpus_allowed(current); 440 - struct cpu_mask_set *set = &dd->affinity->proc; 342 + struct hfi1_affinity_node_list *affinity = &node_affinity; 343 + struct cpu_mask_set *set = &affinity->proc; 441 344 442 345 /* 443 346 * check whether process/context affinity has already ··· 504 325 505 326 /* 506 327 * The process does not have a preset CPU affinity so find one to 507 - * recommend. We prefer CPUs on the same NUMA as the device. 328 + * recommend using the following algorithm: 329 + * 330 + * For each user process that is opening a context on HFI Y: 331 + * a) If all cores are filled, reinitialize the bitmask 332 + * b) Fill real cores first, then HT cores (First set of HT 333 + * cores on all physical cores, then second set of HT core, 334 + * and, so on) in the following order: 335 + * 336 + * 1. Same NUMA node as HFI Y and not running an IRQ 337 + * handler 338 + * 2. Same NUMA node as HFI Y and running an IRQ handler 339 + * 3. Different NUMA node to HFI Y and not running an IRQ 340 + * handler 341 + * 4. Different NUMA node to HFI Y and running an IRQ 342 + * handler 343 + * c) Mark core as filled in the bitmask. As user processes are 344 + * done, clear cores from the bitmask. 508 345 */ 509 346 510 347 ret = zalloc_cpumask_var(&diff, GFP_KERNEL); 511 348 if (!ret) 512 349 goto done; 513 - ret = zalloc_cpumask_var(&mask, GFP_KERNEL); 350 + ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL); 514 351 if (!ret) 515 352 goto free_diff; 516 - ret = zalloc_cpumask_var(&intrs, GFP_KERNEL); 353 + ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL); 517 354 if (!ret) 518 - goto free_mask; 355 + goto free_hw_thread_mask; 356 + ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL); 357 + if (!ret) 358 + goto free_available_mask; 519 359 520 - spin_lock(&dd->affinity->lock); 360 + spin_lock(&affinity->lock); 521 361 /* 522 - * If we've used all available CPUs, clear the mask and start 362 + * If we've used all available HW threads, clear the mask and start 523 363 * overloading. 524 364 */ 525 365 if (cpumask_equal(&set->mask, &set->used)) { ··· 546 348 cpumask_clear(&set->used); 547 349 } 548 350 549 - /* CPUs used by interrupt handlers */ 550 - cpumask_copy(intrs, (dd->affinity->def_intr.gen ? 551 - &dd->affinity->def_intr.mask : 552 - &dd->affinity->def_intr.used)); 553 - cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ? 554 - &dd->affinity->rcv_intr.mask : 555 - &dd->affinity->rcv_intr.used)); 351 + /* 352 + * If NUMA node has CPUs used by interrupt handlers, include them in the 353 + * interrupt handler mask. 354 + */ 355 + entry = node_affinity_lookup(node); 356 + if (entry) { 357 + cpumask_copy(intrs_mask, (entry->def_intr.gen ? 358 + &entry->def_intr.mask : 359 + &entry->def_intr.used)); 360 + cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ? 361 + &entry->rcv_intr.mask : 362 + &entry->rcv_intr.used)); 363 + cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask); 364 + } 556 365 hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl", 557 - cpumask_pr_args(intrs)); 366 + cpumask_pr_args(intrs_mask)); 367 + 368 + cpumask_copy(hw_thread_mask, &set->mask); 558 369 559 370 /* 560 - * If we don't have a NUMA node requested, preference is towards 561 - * device NUMA node 371 + * If HT cores are enabled, identify which HW threads within the 372 + * physical cores should be used. 562 373 */ 563 - if (node == -1) 564 - node = dd->node; 374 + if (affinity->num_core_siblings > 0) { 375 + for (i = 0; i < affinity->num_core_siblings; i++) { 376 + find_hw_thread_mask(i, hw_thread_mask, affinity); 377 + 378 + /* 379 + * If there's at least one available core for this HW 380 + * thread number, stop looking for a core. 381 + * 382 + * diff will always be not empty at least once in this 383 + * loop as the used mask gets reset when 384 + * (set->mask == set->used) before this loop. 385 + */ 386 + cpumask_andnot(diff, hw_thread_mask, &set->used); 387 + if (!cpumask_empty(diff)) 388 + break; 389 + } 390 + } 391 + hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl", 392 + cpumask_pr_args(hw_thread_mask)); 393 + 565 394 node_mask = cpumask_of_node(node); 566 - hfi1_cdbg(PROC, "device on NUMA %u, CPUs %*pbl", node, 395 + hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node, 567 396 cpumask_pr_args(node_mask)); 568 397 569 - /* diff will hold all unused cpus */ 570 - cpumask_andnot(diff, &set->mask, &set->used); 571 - hfi1_cdbg(PROC, "unused CPUs (all) %*pbl", cpumask_pr_args(diff)); 572 - 573 - /* get cpumask of available CPUs on preferred NUMA */ 574 - cpumask_and(mask, diff, node_mask); 575 - hfi1_cdbg(PROC, "available cpus on NUMA %*pbl", cpumask_pr_args(mask)); 398 + /* Get cpumask of available CPUs on preferred NUMA */ 399 + cpumask_and(available_mask, hw_thread_mask, node_mask); 400 + cpumask_andnot(available_mask, available_mask, &set->used); 401 + hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node, 402 + cpumask_pr_args(available_mask)); 576 403 577 404 /* 578 405 * At first, we don't want to place processes on the same 579 - * CPUs as interrupt handlers. 406 + * CPUs as interrupt handlers. Then, CPUs running interrupt 407 + * handlers are used. 408 + * 409 + * 1) If diff is not empty, then there are CPUs not running 410 + * non-interrupt handlers available, so diff gets copied 411 + * over to available_mask. 412 + * 2) If diff is empty, then all CPUs not running interrupt 413 + * handlers are taken, so available_mask contains all 414 + * available CPUs running interrupt handlers. 415 + * 3) If available_mask is empty, then all CPUs on the 416 + * preferred NUMA node are taken, so other NUMA nodes are 417 + * used for process assignments using the same method as 418 + * the preferred NUMA node. 580 419 */ 581 - cpumask_andnot(diff, mask, intrs); 420 + cpumask_andnot(diff, available_mask, intrs_mask); 582 421 if (!cpumask_empty(diff)) 583 - cpumask_copy(mask, diff); 422 + cpumask_copy(available_mask, diff); 584 423 585 - /* 586 - * if we don't have a cpu on the preferred NUMA, get 587 - * the list of the remaining available CPUs 588 - */ 589 - if (cpumask_empty(mask)) { 590 - cpumask_andnot(diff, &set->mask, &set->used); 591 - cpumask_andnot(mask, diff, node_mask); 424 + /* If we don't have CPUs on the preferred node, use other NUMA nodes */ 425 + if (cpumask_empty(available_mask)) { 426 + cpumask_andnot(available_mask, hw_thread_mask, &set->used); 427 + /* Excluding preferred NUMA cores */ 428 + cpumask_andnot(available_mask, available_mask, node_mask); 429 + hfi1_cdbg(PROC, 430 + "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl", 431 + cpumask_pr_args(available_mask)); 432 + 433 + /* 434 + * At first, we don't want to place processes on the same 435 + * CPUs as interrupt handlers. 436 + */ 437 + cpumask_andnot(diff, available_mask, intrs_mask); 438 + if (!cpumask_empty(diff)) 439 + cpumask_copy(available_mask, diff); 592 440 } 593 - hfi1_cdbg(PROC, "possible CPUs for process %*pbl", 594 - cpumask_pr_args(mask)); 441 + hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl", 442 + cpumask_pr_args(available_mask)); 595 443 596 - cpu = cpumask_first(mask); 444 + cpu = cpumask_first(available_mask); 597 445 if (cpu >= nr_cpu_ids) /* empty */ 598 446 cpu = -1; 599 447 else 600 448 cpumask_set_cpu(cpu, &set->used); 601 - spin_unlock(&dd->affinity->lock); 449 + spin_unlock(&affinity->lock); 450 + hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu); 602 451 603 - free_cpumask_var(intrs); 604 - free_mask: 605 - free_cpumask_var(mask); 452 + free_cpumask_var(intrs_mask); 453 + free_available_mask: 454 + free_cpumask_var(available_mask); 455 + free_hw_thread_mask: 456 + free_cpumask_var(hw_thread_mask); 606 457 free_diff: 607 458 free_cpumask_var(diff); 608 459 done: 609 460 return cpu; 610 461 } 611 462 612 - void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu) 463 + void hfi1_put_proc_affinity(int cpu) 613 464 { 614 - struct cpu_mask_set *set = &dd->affinity->proc; 465 + struct hfi1_affinity_node_list *affinity = &node_affinity; 466 + struct cpu_mask_set *set = &affinity->proc; 615 467 616 468 if (cpu < 0) 617 469 return; 618 - spin_lock(&dd->affinity->lock); 470 + spin_lock(&affinity->lock); 619 471 cpumask_clear_cpu(cpu, &set->used); 472 + hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); 620 473 if (cpumask_empty(&set->used) && set->gen) { 621 474 set->gen--; 622 475 cpumask_copy(&set->used, &set->mask); 623 476 } 624 - spin_unlock(&dd->affinity->lock); 477 + spin_unlock(&affinity->lock); 625 478 } 626 479 480 + /* Prevents concurrent reads and writes of the sdma_affinity attrib */ 481 + static DEFINE_MUTEX(sdma_affinity_mutex); 482 + 483 + int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, 484 + size_t count) 485 + { 486 + struct hfi1_affinity_node *entry; 487 + struct cpumask mask; 488 + int ret, i; 489 + 490 + spin_lock(&node_affinity.lock); 491 + entry = node_affinity_lookup(dd->node); 492 + spin_unlock(&node_affinity.lock); 493 + 494 + if (!entry) 495 + return -EINVAL; 496 + 497 + ret = cpulist_parse(buf, &mask); 498 + if (ret) 499 + return ret; 500 + 501 + if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) { 502 + dd_dev_warn(dd, "Invalid CPU mask\n"); 503 + return -EINVAL; 504 + } 505 + 506 + mutex_lock(&sdma_affinity_mutex); 507 + /* reset the SDMA interrupt affinity details */ 508 + init_cpu_mask_set(&entry->def_intr); 509 + cpumask_copy(&entry->def_intr.mask, &mask); 510 + /* 511 + * Reassign the affinity for each SDMA interrupt. 512 + */ 513 + for (i = 0; i < dd->num_msix_entries; i++) { 514 + struct hfi1_msix_entry *msix; 515 + 516 + msix = &dd->msix_entries[i]; 517 + if (msix->type != IRQ_SDMA) 518 + continue; 519 + 520 + ret = hfi1_get_irq_affinity(dd, msix); 521 + 522 + if (ret) 523 + break; 524 + } 525 + 526 + mutex_unlock(&sdma_affinity_mutex); 527 + return ret ? ret : strnlen(buf, PAGE_SIZE); 528 + } 529 + 530 + int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf) 531 + { 532 + struct hfi1_affinity_node *entry; 533 + 534 + spin_lock(&node_affinity.lock); 535 + entry = node_affinity_lookup(dd->node); 536 + spin_unlock(&node_affinity.lock); 537 + 538 + if (!entry) 539 + return -EINVAL; 540 + 541 + mutex_lock(&sdma_affinity_mutex); 542 + cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask); 543 + mutex_unlock(&sdma_affinity_mutex); 544 + return strnlen(buf, PAGE_SIZE); 545 + }
+31 -7
drivers/infiniband/hw/hfi1/affinity.h
··· 73 73 struct hfi1_affinity { 74 74 struct cpu_mask_set def_intr; 75 75 struct cpu_mask_set rcv_intr; 76 - struct cpu_mask_set proc; 77 76 struct cpumask real_cpu_mask; 78 77 /* spin lock to protect affinity struct */ 79 78 spinlock_t lock; ··· 81 82 struct hfi1_msix_entry; 82 83 83 84 /* Initialize non-HT cpu cores mask */ 84 - int init_real_cpu_mask(struct hfi1_devdata *); 85 + void init_real_cpu_mask(void); 85 86 /* Initialize driver affinity data */ 86 - void hfi1_dev_affinity_init(struct hfi1_devdata *); 87 - /* Free driver affinity data */ 88 - void hfi1_dev_affinity_free(struct hfi1_devdata *); 87 + int hfi1_dev_affinity_init(struct hfi1_devdata *); 89 88 /* 90 89 * Set IRQ affinity to a CPU. The function will determine the 91 90 * CPU and set the affinity to it. ··· 98 101 * Determine a CPU affinity for a user process, if the process does not 99 102 * have an affinity set yet. 100 103 */ 101 - int hfi1_get_proc_affinity(struct hfi1_devdata *, int); 104 + int hfi1_get_proc_affinity(int); 102 105 /* Release a CPU used by a user process. */ 103 - void hfi1_put_proc_affinity(struct hfi1_devdata *, int); 106 + void hfi1_put_proc_affinity(int); 107 + 108 + int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf); 109 + int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, 110 + size_t count); 111 + 112 + struct hfi1_affinity_node { 113 + int node; 114 + struct cpu_mask_set def_intr; 115 + struct cpu_mask_set rcv_intr; 116 + struct cpumask general_intr_mask; 117 + struct list_head list; 118 + }; 119 + 120 + struct hfi1_affinity_node_list { 121 + struct list_head list; 122 + struct cpumask real_cpu_mask; 123 + struct cpu_mask_set proc; 124 + int num_core_siblings; 125 + int num_online_nodes; 126 + int num_online_cpus; 127 + /* protect affinity node list */ 128 + spinlock_t lock; 129 + }; 130 + 131 + int node_affinity_init(void); 132 + void node_affinity_destroy(void); 133 + extern struct hfi1_affinity_node_list node_affinity; 104 134 105 135 #endif /* _HFI1_AFFINITY_H */
+193 -103
drivers/infiniband/hw/hfi1/chip.c
··· 63 63 #include "efivar.h" 64 64 #include "platform.h" 65 65 #include "aspm.h" 66 + #include "affinity.h" 66 67 67 68 #define NUM_IB_PORTS 1 68 69 ··· 122 121 #define SEC_SC_HALTED 0x4 /* per-context only */ 123 122 #define SEC_SPC_FREEZE 0x8 /* per-HFI only */ 124 123 124 + #define DEFAULT_KRCVQS 2 125 125 #define MIN_KERNEL_KCTXTS 2 126 126 #define FIRST_KERNEL_KCTXT 1 127 127 /* sizes for both the QP and RSM map tables */ ··· 239 237 | CCE_STATUS_SDMA_PAUSED_SMASK) 240 238 /* all CceStatus sub-block RXE pause bits */ 241 239 #define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK 240 + 241 + #define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL 242 + #define CNTR_32BIT_MAX 0x00000000FFFFFFFF 242 243 243 244 /* 244 245 * CCE Error flags. ··· 3952 3947 return dd->sw_send_dma_eng_err_status_cnt[0]; 3953 3948 } 3954 3949 3950 + static u64 access_dc_rcv_err_cnt(const struct cntr_entry *entry, 3951 + void *context, int vl, int mode, 3952 + u64 data) 3953 + { 3954 + struct hfi1_devdata *dd = (struct hfi1_devdata *)context; 3955 + 3956 + u64 val = 0; 3957 + u64 csr = entry->csr; 3958 + 3959 + val = read_write_csr(dd, csr, mode, data); 3960 + if (mode == CNTR_MODE_R) { 3961 + val = val > CNTR_MAX - dd->sw_rcv_bypass_packet_errors ? 3962 + CNTR_MAX : val + dd->sw_rcv_bypass_packet_errors; 3963 + } else if (mode == CNTR_MODE_W) { 3964 + dd->sw_rcv_bypass_packet_errors = 0; 3965 + } else { 3966 + dd_dev_err(dd, "Invalid cntr register access mode"); 3967 + return 0; 3968 + } 3969 + return val; 3970 + } 3971 + 3955 3972 #define def_access_sw_cpu(cntr) \ 3956 3973 static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry, \ 3957 3974 void *context, int vl, int mode, u64 data) \ ··· 4047 4020 CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL), 4048 4021 [C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT, 4049 4022 CNTR_SYNTH), 4050 - [C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH), 4023 + [C_DC_RCV_ERR] = CNTR_ELEM("DcRecvErr", DCC_ERR_PORTRCV_ERR_CNT, 0, CNTR_SYNTH, 4024 + access_dc_rcv_err_cnt), 4051 4025 [C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT, 4052 4026 CNTR_SYNTH), 4053 4027 [C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT, ··· 8826 8798 return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame); 8827 8799 } 8828 8800 8829 - static void check_fabric_firmware_versions(struct hfi1_devdata *dd) 8830 - { 8831 - u32 frame, version, prod_id; 8832 - int ret, lane; 8833 - 8834 - /* 4 lanes */ 8835 - for (lane = 0; lane < 4; lane++) { 8836 - ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame); 8837 - if (ret) { 8838 - dd_dev_err(dd, 8839 - "Unable to read lane %d firmware details\n", 8840 - lane); 8841 - continue; 8842 - } 8843 - version = (frame >> SPICO_ROM_VERSION_SHIFT) 8844 - & SPICO_ROM_VERSION_MASK; 8845 - prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT) 8846 - & SPICO_ROM_PROD_ID_MASK; 8847 - dd_dev_info(dd, 8848 - "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n", 8849 - lane, version, prod_id); 8850 - } 8851 - } 8852 - 8853 8801 /* 8854 8802 * Read an idle LCB message. 8855 8803 * ··· 9191 9187 unsigned long timeout; 9192 9188 9193 9189 /* 9194 - * Check for QSFP interrupt for t_init (SFF 8679) 9190 + * Some QSFP cables have a quirk that asserts the IntN line as a side 9191 + * effect of power up on plug-in. We ignore this false positive 9192 + * interrupt until the module has finished powering up by waiting for 9193 + * a minimum timeout of the module inrush initialization time of 9194 + * 500 ms (SFF 8679 Table 5-6) to ensure the voltage rails in the 9195 + * module have stabilized. 9196 + */ 9197 + msleep(500); 9198 + 9199 + /* 9200 + * Check for QSFP interrupt for t_init (SFF 8679 Table 8-1) 9195 9201 */ 9196 9202 timeout = jiffies + msecs_to_jiffies(2000); 9197 9203 while (1) { 9198 9204 mask = read_csr(dd, dd->hfi1_id ? 9199 9205 ASIC_QSFP2_IN : ASIC_QSFP1_IN); 9200 - if (!(mask & QSFP_HFI0_INT_N)) { 9201 - write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : 9202 - ASIC_QSFP1_CLEAR, QSFP_HFI0_INT_N); 9206 + if (!(mask & QSFP_HFI0_INT_N)) 9203 9207 break; 9204 - } 9205 9208 if (time_after(jiffies, timeout)) { 9206 9209 dd_dev_info(dd, "%s: No IntN detected, reset complete\n", 9207 9210 __func__); ··· 9224 9213 u64 mask; 9225 9214 9226 9215 mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK); 9227 - if (enable) 9216 + if (enable) { 9217 + /* 9218 + * Clear the status register to avoid an immediate interrupt 9219 + * when we re-enable the IntN pin 9220 + */ 9221 + write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : ASIC_QSFP1_CLEAR, 9222 + QSFP_HFI0_INT_N); 9228 9223 mask |= (u64)QSFP_HFI0_INT_N; 9229 - else 9224 + } else { 9230 9225 mask &= ~(u64)QSFP_HFI0_INT_N; 9226 + } 9231 9227 write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask); 9232 9228 } 9233 9229 ··· 9648 9630 hfi1_put_tid(dd, i, PT_INVALID, 0, 0); 9649 9631 } 9650 9632 9651 - int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd, 9652 - struct hfi1_ctxt_info *kinfo) 9653 - { 9654 - kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) | 9655 - HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U); 9656 - return 0; 9657 - } 9658 - 9659 9633 struct hfi1_message_header *hfi1_get_msgheader( 9660 9634 struct hfi1_devdata *dd, __le32 *rhf_addr) 9661 9635 { ··· 9900 9890 return 0; 9901 9891 } 9902 9892 9893 + static const char *state_completed_string(u32 completed) 9894 + { 9895 + static const char * const state_completed[] = { 9896 + "EstablishComm", 9897 + "OptimizeEQ", 9898 + "VerifyCap" 9899 + }; 9900 + 9901 + if (completed < ARRAY_SIZE(state_completed)) 9902 + return state_completed[completed]; 9903 + 9904 + return "unknown"; 9905 + } 9906 + 9907 + static const char all_lanes_dead_timeout_expired[] = 9908 + "All lanes were inactive – was the interconnect media removed?"; 9909 + static const char tx_out_of_policy[] = 9910 + "Passing lanes on local port do not meet the local link width policy"; 9911 + static const char no_state_complete[] = 9912 + "State timeout occurred before link partner completed the state"; 9913 + static const char * const state_complete_reasons[] = { 9914 + [0x00] = "Reason unknown", 9915 + [0x01] = "Link was halted by driver, refer to LinkDownReason", 9916 + [0x02] = "Link partner reported failure", 9917 + [0x10] = "Unable to achieve frame sync on any lane", 9918 + [0x11] = 9919 + "Unable to find a common bit rate with the link partner", 9920 + [0x12] = 9921 + "Unable to achieve frame sync on sufficient lanes to meet the local link width policy", 9922 + [0x13] = 9923 + "Unable to identify preset equalization on sufficient lanes to meet the local link width policy", 9924 + [0x14] = no_state_complete, 9925 + [0x15] = 9926 + "State timeout occurred before link partner identified equalization presets", 9927 + [0x16] = 9928 + "Link partner completed the EstablishComm state, but the passing lanes do not meet the local link width policy", 9929 + [0x17] = tx_out_of_policy, 9930 + [0x20] = all_lanes_dead_timeout_expired, 9931 + [0x21] = 9932 + "Unable to achieve acceptable BER on sufficient lanes to meet the local link width policy", 9933 + [0x22] = no_state_complete, 9934 + [0x23] = 9935 + "Link partner completed the OptimizeEq state, but the passing lanes do not meet the local link width policy", 9936 + [0x24] = tx_out_of_policy, 9937 + [0x30] = all_lanes_dead_timeout_expired, 9938 + [0x31] = 9939 + "State timeout occurred waiting for host to process received frames", 9940 + [0x32] = no_state_complete, 9941 + [0x33] = 9942 + "Link partner completed the VerifyCap state, but the passing lanes do not meet the local link width policy", 9943 + [0x34] = tx_out_of_policy, 9944 + }; 9945 + 9946 + static const char *state_complete_reason_code_string(struct hfi1_pportdata *ppd, 9947 + u32 code) 9948 + { 9949 + const char *str = NULL; 9950 + 9951 + if (code < ARRAY_SIZE(state_complete_reasons)) 9952 + str = state_complete_reasons[code]; 9953 + 9954 + if (str) 9955 + return str; 9956 + return "Reserved"; 9957 + } 9958 + 9959 + /* describe the given last state complete frame */ 9960 + static void decode_state_complete(struct hfi1_pportdata *ppd, u32 frame, 9961 + const char *prefix) 9962 + { 9963 + struct hfi1_devdata *dd = ppd->dd; 9964 + u32 success; 9965 + u32 state; 9966 + u32 reason; 9967 + u32 lanes; 9968 + 9969 + /* 9970 + * Decode frame: 9971 + * [ 0: 0] - success 9972 + * [ 3: 1] - state 9973 + * [ 7: 4] - next state timeout 9974 + * [15: 8] - reason code 9975 + * [31:16] - lanes 9976 + */ 9977 + success = frame & 0x1; 9978 + state = (frame >> 1) & 0x7; 9979 + reason = (frame >> 8) & 0xff; 9980 + lanes = (frame >> 16) & 0xffff; 9981 + 9982 + dd_dev_err(dd, "Last %s LNI state complete frame 0x%08x:\n", 9983 + prefix, frame); 9984 + dd_dev_err(dd, " last reported state state: %s (0x%x)\n", 9985 + state_completed_string(state), state); 9986 + dd_dev_err(dd, " state successfully completed: %s\n", 9987 + success ? "yes" : "no"); 9988 + dd_dev_err(dd, " fail reason 0x%x: %s\n", 9989 + reason, state_complete_reason_code_string(ppd, reason)); 9990 + dd_dev_err(dd, " passing lane mask: 0x%x", lanes); 9991 + } 9992 + 9993 + /* 9994 + * Read the last state complete frames and explain them. This routine 9995 + * expects to be called if the link went down during link negotiation 9996 + * and initialization (LNI). That is, anywhere between polling and link up. 9997 + */ 9998 + static void check_lni_states(struct hfi1_pportdata *ppd) 9999 + { 10000 + u32 last_local_state; 10001 + u32 last_remote_state; 10002 + 10003 + read_last_local_state(ppd->dd, &last_local_state); 10004 + read_last_remote_state(ppd->dd, &last_remote_state); 10005 + 10006 + /* 10007 + * Don't report anything if there is nothing to report. A value of 10008 + * 0 means the link was taken down while polling and there was no 10009 + * training in-process. 10010 + */ 10011 + if (last_local_state == 0 && last_remote_state == 0) 10012 + return; 10013 + 10014 + decode_state_complete(ppd, last_local_state, "transmitted"); 10015 + decode_state_complete(ppd, last_remote_state, "received"); 10016 + } 10017 + 9903 10018 /* 9904 10019 * Helper for set_link_state(). Do not call except from that routine. 9905 10020 * Expects ppd->hls_mutex to be held. ··· 10037 9902 { 10038 9903 struct hfi1_devdata *dd = ppd->dd; 10039 9904 u32 pstate, previous_state; 10040 - u32 last_local_state; 10041 - u32 last_remote_state; 10042 9905 int ret; 10043 9906 int do_transition; 10044 9907 int do_wait; ··· 10136 10003 } else if (previous_state 10137 10004 & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) { 10138 10005 /* went down while attempting link up */ 10139 - /* byte 1 of last_*_state is the failure reason */ 10140 - read_last_local_state(dd, &last_local_state); 10141 - read_last_remote_state(dd, &last_remote_state); 10142 - dd_dev_err(dd, 10143 - "LNI failure last states: local 0x%08x, remote 0x%08x\n", 10144 - last_local_state, last_remote_state); 10006 + check_lni_states(ppd); 10145 10007 } 10146 10008 10147 10009 /* the active link width (downgrade) is 0 on link down */ ··· 11796 11668 dd->cntrnames = NULL; 11797 11669 } 11798 11670 11799 - #define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL 11800 - #define CNTR_32BIT_MAX 0x00000000FFFFFFFF 11801 - 11802 11671 static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry, 11803 11672 u64 *psval, void *context, int vl) 11804 11673 { ··· 12450 12325 return ib_pstate; 12451 12326 } 12452 12327 12453 - /* 12454 - * Read/modify/write ASIC_QSFP register bits as selected by mask 12455 - * data: 0 or 1 in the positions depending on what needs to be written 12456 - * dir: 0 for read, 1 for write 12457 - * mask: select by setting 12458 - * I2CCLK (bit 0) 12459 - * I2CDATA (bit 1) 12460 - */ 12461 - u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir, 12462 - u32 mask) 12463 - { 12464 - u64 qsfp_oe, target_oe; 12465 - 12466 - target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE; 12467 - if (mask) { 12468 - /* We are writing register bits, so lock access */ 12469 - dir &= mask; 12470 - data &= mask; 12471 - 12472 - qsfp_oe = read_csr(dd, target_oe); 12473 - qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir; 12474 - write_csr(dd, target_oe, qsfp_oe); 12475 - } 12476 - /* We are exclusively reading bits here, but it is unlikely 12477 - * we'll get valid data when we set the direction of the pin 12478 - * in the same call, so read should call this function again 12479 - * to get valid data 12480 - */ 12481 - return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN); 12482 - } 12483 - 12484 12328 #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ 12485 12329 (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) 12486 12330 ··· 12874 12780 12875 12781 /* 12876 12782 * Kernel receive contexts: 12877 - * - min of 2 or 1 context/numa (excluding control context) 12878 12783 * - Context 0 - control context (VL15/multicast/error) 12879 12784 * - Context 1 - first kernel context 12880 12785 * - Context 2 - second kernel context ··· 12887 12794 */ 12888 12795 num_kernel_contexts = n_krcvqs + 1; 12889 12796 else 12890 - num_kernel_contexts = num_online_nodes() + 1; 12891 - num_kernel_contexts = 12892 - max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts); 12797 + num_kernel_contexts = DEFAULT_KRCVQS + 1; 12893 12798 /* 12894 12799 * Every kernel receive context needs an ACK send context. 12895 12800 * one send context is allocated for each VL{0-7} and VL15 ··· 12906 12815 */ 12907 12816 if (num_user_contexts < 0) 12908 12817 num_user_contexts = 12909 - cpumask_weight(&dd->affinity->real_cpu_mask); 12818 + cpumask_weight(&node_affinity.real_cpu_mask); 12910 12819 12911 12820 total_contexts = num_kernel_contexts + num_user_contexts; 12912 12821 ··· 14232 14141 } 14233 14142 dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */ 14234 14143 spin_unlock_irqrestore(&hfi1_devs_lock, flags); 14144 + 14145 + /* first one through - set up i2c devices */ 14146 + if (!peer) 14147 + ret = set_up_i2c(dd, dd->asic_data); 14148 + 14235 14149 return ret; 14236 14150 } 14237 14151 ··· 14541 14445 (dd->revision >> CCE_REVISION_SW_SHIFT) 14542 14446 & CCE_REVISION_SW_MASK); 14543 14447 14544 - /* 14545 - * The real cpu mask is part of the affinity struct but has to be 14546 - * initialized earlier than the rest of the affinity struct because it 14547 - * is needed to calculate the number of user contexts in 14548 - * set_up_context_variables(). However, hfi1_dev_affinity_init(), 14549 - * which initializes the rest of the affinity struct members, 14550 - * depends on set_up_context_variables() for the number of kernel 14551 - * contexts, so it cannot be called before set_up_context_variables(). 14552 - */ 14553 - ret = init_real_cpu_mask(dd); 14554 - if (ret) 14555 - goto bail_cleanup; 14556 - 14557 14448 ret = set_up_context_variables(dd); 14558 14449 if (ret) 14559 14450 goto bail_cleanup; ··· 14554 14471 /* set up KDETH QP prefix in both RX and TX CSRs */ 14555 14472 init_kdeth_qp(dd); 14556 14473 14557 - hfi1_dev_affinity_init(dd); 14474 + ret = hfi1_dev_affinity_init(dd); 14475 + if (ret) 14476 + goto bail_cleanup; 14558 14477 14559 14478 /* send contexts must be set up before receive contexts */ 14560 14479 ret = init_send_contexts(dd); ··· 14593 14508 /* set up LCB access - must be after set_up_interrupts() */ 14594 14509 init_lcb_access(dd); 14595 14510 14511 + /* 14512 + * Serial number is created from the base guid: 14513 + * [27:24] = base guid [38:35] 14514 + * [23: 0] = base guid [23: 0] 14515 + */ 14596 14516 snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n", 14597 - dd->base_guid & 0xFFFFFF); 14517 + (dd->base_guid & 0xFFFFFF) | 14518 + ((dd->base_guid >> 11) & 0xF000000)); 14598 14519 14599 14520 dd->oui1 = dd->base_guid >> 56 & 0xFF; 14600 14521 dd->oui2 = dd->base_guid >> 48 & 0xFF; ··· 14609 14518 ret = load_firmware(dd); /* asymmetric with dispose_firmware() */ 14610 14519 if (ret) 14611 14520 goto bail_clear_intr; 14612 - check_fabric_firmware_versions(dd); 14613 14521 14614 14522 thermal_init(dd); 14615 14523
+1 -4
drivers/infiniband/hw/hfi1/chip.h
··· 640 640 /* SBus commands */ 641 641 #define RESET_SBUS_RECEIVER 0x20 642 642 #define WRITE_SBUS_RECEIVER 0x21 643 + #define READ_SBUS_RECEIVER 0x22 643 644 void sbus_request(struct hfi1_devdata *dd, 644 645 u8 receiver_addr, u8 data_addr, u8 command, u32 data_in); 645 646 int sbus_request_slow(struct hfi1_devdata *dd, ··· 1337 1336 void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); 1338 1337 struct hfi1_message_header *hfi1_get_msgheader( 1339 1338 struct hfi1_devdata *dd, __le32 *rhf_addr); 1340 - int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd, 1341 - struct hfi1_ctxt_info *kinfo); 1342 - u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir, 1343 - u32 mask); 1344 1339 int hfi1_init_ctxt(struct send_context *sc); 1345 1340 void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, 1346 1341 u32 type, unsigned long pa, u16 order);
+4
drivers/infiniband/hw/hfi1/chip_registers.h
··· 471 471 #define ASIC_STS_SBUS_RESULT (ASIC + 0x000000000010) 472 472 #define ASIC_STS_SBUS_RESULT_DONE_SMASK 0x1ull 473 473 #define ASIC_STS_SBUS_RESULT_RCV_DATA_VALID_SMASK 0x2ull 474 + #define ASIC_STS_SBUS_RESULT_RESULT_CODE_SHIFT 2 475 + #define ASIC_STS_SBUS_RESULT_RESULT_CODE_MASK 0x7ull 476 + #define ASIC_STS_SBUS_RESULT_DATA_OUT_SHIFT 32 477 + #define ASIC_STS_SBUS_RESULT_DATA_OUT_MASK 0xFFFFFFFFull 474 478 #define ASIC_STS_THERM (ASIC + 0x000000000058) 475 479 #define ASIC_STS_THERM_CRIT_TEMP_MASK 0x7FFull 476 480 #define ASIC_STS_THERM_CRIT_TEMP_SHIFT 18
+28 -24
drivers/infiniband/hw/hfi1/driver.c
··· 392 392 u16 rlid; 393 393 u8 svc_type, sl, sc5; 394 394 395 - sc5 = (be16_to_cpu(rhdr->lrh[0]) >> 12) & 0xf; 396 - if (rhf_dc_info(packet->rhf)) 397 - sc5 |= 0x10; 395 + sc5 = hdr2sc(rhdr, packet->rhf); 398 396 sl = ibp->sc_to_sl[sc5]; 399 397 400 398 lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK; ··· 448 450 packet->rcv_flags = 0; 449 451 } 450 452 451 - static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr, 452 - struct hfi1_other_headers *ohdr, 453 - u64 rhf, u32 bth1, struct ib_grh *grh) 453 + void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, 454 + bool do_cnp) 454 455 { 455 456 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 456 - u32 rqpn = 0; 457 - u16 rlid; 458 - u8 sc5, svc_type; 457 + struct hfi1_ib_header *hdr = pkt->hdr; 458 + struct hfi1_other_headers *ohdr = pkt->ohdr; 459 + struct ib_grh *grh = NULL; 460 + u32 rqpn = 0, bth1; 461 + u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]); 462 + u8 sc, svc_type; 463 + bool is_mcast = false; 464 + 465 + if (pkt->rcv_flags & HFI1_HAS_GRH) 466 + grh = &hdr->u.l.grh; 459 467 460 468 switch (qp->ibqp.qp_type) { 461 469 case IB_QPT_SMI: ··· 470 466 rlid = be16_to_cpu(hdr->lrh[3]); 471 467 rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; 472 468 svc_type = IB_CC_SVCTYPE_UD; 469 + is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && 470 + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); 473 471 break; 474 472 case IB_QPT_UC: 475 473 rlid = qp->remote_ah_attr.dlid; ··· 487 481 return; 488 482 } 489 483 490 - sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; 491 - if (rhf_dc_info(rhf)) 492 - sc5 |= 0x10; 484 + sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf); 493 485 494 - if (bth1 & HFI1_FECN_SMASK) { 486 + bth1 = be32_to_cpu(ohdr->bth[1]); 487 + if (do_cnp && (bth1 & HFI1_FECN_SMASK)) { 495 488 u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); 496 - u16 dlid = be16_to_cpu(hdr->lrh[1]); 497 489 498 - return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc5, grh); 490 + return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); 499 491 } 500 492 501 - if (bth1 & HFI1_BECN_SMASK) { 493 + if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) { 502 494 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 503 495 u32 lqpn = bth1 & RVT_QPN_MASK; 504 - u8 sl = ibp->sc_to_sl[sc5]; 496 + u8 sl = ibp->sc_to_sl[sc]; 505 497 506 498 process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); 507 499 } 500 + 508 501 } 509 502 510 503 struct ps_mdata { ··· 601 596 struct rvt_qp *qp; 602 597 struct hfi1_ib_header *hdr; 603 598 struct hfi1_other_headers *ohdr; 604 - struct ib_grh *grh = NULL; 605 599 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; 606 600 u64 rhf = rhf_to_cpu(rhf_addr); 607 601 u32 etype = rhf_rcv_type(rhf), qpn, bth1; ··· 620 616 hfi1_get_msgheader(dd, rhf_addr); 621 617 lnh = be16_to_cpu(hdr->lrh[0]) & 3; 622 618 623 - if (lnh == HFI1_LRH_BTH) { 619 + if (lnh == HFI1_LRH_BTH) 624 620 ohdr = &hdr->u.oth; 625 - } else if (lnh == HFI1_LRH_GRH) { 621 + else if (lnh == HFI1_LRH_GRH) 626 622 ohdr = &hdr->u.l.oth; 627 - grh = &hdr->u.l.grh; 628 - } else { 623 + else 629 624 goto next; /* just in case */ 630 - } 625 + 631 626 bth1 = be32_to_cpu(ohdr->bth[1]); 632 627 is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK)); 633 628 ··· 642 639 goto next; 643 640 } 644 641 645 - process_ecn(qp, hdr, ohdr, rhf, bth1, grh); 642 + process_ecn(qp, packet, true); 646 643 rcu_read_unlock(); 647 644 648 645 /* turn off BECN, FECN */ ··· 1365 1362 1366 1363 dd_dev_err(packet->rcd->dd, 1367 1364 "Bypass packets are not supported in normal operation. Dropping\n"); 1365 + incr_cntr64(&packet->rcd->dd->sw_rcv_bypass_packet_errors); 1368 1366 return RHF_RCV_CONTINUE; 1369 1367 } 1370 1368
+51 -40
drivers/infiniband/hw/hfi1/file_ops.c
··· 168 168 169 169 static int hfi1_file_open(struct inode *inode, struct file *fp) 170 170 { 171 + struct hfi1_filedata *fd; 171 172 struct hfi1_devdata *dd = container_of(inode->i_cdev, 172 173 struct hfi1_devdata, 173 174 user_cdev); ··· 177 176 kobject_get(&dd->kobj); 178 177 179 178 /* The real work is performed later in assign_ctxt() */ 180 - fp->private_data = kzalloc(sizeof(struct hfi1_filedata), GFP_KERNEL); 181 - if (fp->private_data) /* no cpu affinity by default */ 182 - ((struct hfi1_filedata *)fp->private_data)->rec_cpu_num = -1; 183 - return fp->private_data ? 0 : -ENOMEM; 179 + 180 + fd = kzalloc(sizeof(*fd), GFP_KERNEL); 181 + 182 + if (fd) { 183 + fd->rec_cpu_num = -1; /* no cpu affinity by default */ 184 + fd->mm = current->mm; 185 + } 186 + 187 + fp->private_data = fd; 188 + 189 + return fd ? 0 : -ENOMEM; 184 190 } 185 191 186 192 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, ··· 400 392 struct hfi1_filedata *fd = kiocb->ki_filp->private_data; 401 393 struct hfi1_user_sdma_pkt_q *pq = fd->pq; 402 394 struct hfi1_user_sdma_comp_q *cq = fd->cq; 403 - int ret = 0, done = 0, reqs = 0; 395 + int done = 0, reqs = 0; 404 396 unsigned long dim = from->nr_segs; 405 397 406 - if (!cq || !pq) { 407 - ret = -EIO; 408 - goto done; 409 - } 398 + if (!cq || !pq) 399 + return -EIO; 410 400 411 - if (!iter_is_iovec(from) || !dim) { 412 - ret = -EINVAL; 413 - goto done; 414 - } 401 + if (!iter_is_iovec(from) || !dim) 402 + return -EINVAL; 415 403 416 404 hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)", 417 405 fd->uctxt->ctxt, fd->subctxt, dim); 418 406 419 - if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { 420 - ret = -ENOSPC; 421 - goto done; 422 - } 407 + if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) 408 + return -ENOSPC; 423 409 424 410 while (dim) { 411 + int ret; 425 412 unsigned long count = 0; 426 413 427 414 ret = hfi1_user_sdma_process_request( 428 415 kiocb->ki_filp, (struct iovec *)(from->iov + done), 429 416 dim, &count); 430 - if (ret) 431 - goto done; 417 + if (ret) { 418 + reqs = ret; 419 + break; 420 + } 432 421 dim -= count; 433 422 done += count; 434 423 reqs++; 435 424 } 436 - done: 437 - return ret ? ret : reqs; 425 + 426 + return reqs; 438 427 } 439 428 440 429 static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) ··· 723 718 hfi1_user_sdma_free_queues(fdata); 724 719 725 720 /* release the cpu */ 726 - hfi1_put_proc_affinity(dd, fdata->rec_cpu_num); 721 + hfi1_put_proc_affinity(fdata->rec_cpu_num); 727 722 728 723 /* 729 724 * Clear any left over, unhandled events so the next process that ··· 735 730 736 731 if (--uctxt->cnt) { 737 732 uctxt->active_slaves &= ~(1 << fdata->subctxt); 738 - uctxt->subpid[fdata->subctxt] = 0; 739 733 mutex_unlock(&hfi1_mutex); 740 734 goto done; 741 735 } ··· 760 756 write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, 761 757 hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type)); 762 758 sc_disable(uctxt->sc); 763 - uctxt->pid = 0; 764 759 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 765 760 766 761 dd->rcd[uctxt->ctxt] = NULL; ··· 821 818 ret = find_shared_ctxt(fp, uinfo); 822 819 if (ret < 0) 823 820 goto done_unlock; 824 - if (ret) 825 - fd->rec_cpu_num = hfi1_get_proc_affinity( 826 - fd->uctxt->dd, fd->uctxt->numa_id); 821 + if (ret) { 822 + fd->rec_cpu_num = 823 + hfi1_get_proc_affinity(fd->uctxt->numa_id); 824 + } 827 825 } 828 826 829 827 /* ··· 899 895 } 900 896 fd->uctxt = uctxt; 901 897 fd->subctxt = uctxt->cnt++; 902 - uctxt->subpid[fd->subctxt] = current->pid; 903 898 uctxt->active_slaves |= 1 << fd->subctxt; 904 899 ret = 1; 905 900 goto done; ··· 935 932 if (ctxt == dd->num_rcv_contexts) 936 933 return -EBUSY; 937 934 938 - fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1); 935 + /* 936 + * If we don't have a NUMA node requested, preference is towards 937 + * device NUMA node. 938 + */ 939 + fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node); 939 940 if (fd->rec_cpu_num != -1) 940 941 numa = cpu_to_node(fd->rec_cpu_num); 941 942 else ··· 983 976 return ret; 984 977 } 985 978 uctxt->userversion = uinfo->userversion; 986 - uctxt->pid = current->pid; 987 - uctxt->flags = HFI1_CAP_UGET(MASK); 979 + uctxt->flags = hfi1_cap_mask; /* save current flag state */ 988 980 init_waitqueue_head(&uctxt->wait); 989 981 strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); 990 982 memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); ··· 1086 1080 hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey); 1087 1081 1088 1082 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 1089 - if (HFI1_CAP_KGET_MASK(uctxt->flags, HDRSUPP)) 1083 + if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) 1090 1084 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; 1091 1085 /* 1092 1086 * Ignore the bit in the flags for now until proper 1093 1087 * support for multiple packet per rcv array entry is 1094 1088 * added. 1095 1089 */ 1096 - if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 1090 + if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 1097 1091 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; 1098 - if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 1092 + if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 1099 1093 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; 1100 - if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 1094 + if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 1101 1095 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; 1102 1096 /* 1103 1097 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written. ··· 1105 1099 * uses of the chip or ctxt. Therefore, add the rcvctrl op 1106 1100 * for both cases. 1107 1101 */ 1108 - if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) 1102 + if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL)) 1109 1103 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; 1110 1104 else 1111 1105 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; ··· 1128 1122 int ret = 0; 1129 1123 1130 1124 memset(&cinfo, 0, sizeof(cinfo)); 1131 - ret = hfi1_get_base_kinfo(uctxt, &cinfo); 1132 - if (ret < 0) 1133 - goto done; 1125 + cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & 1126 + HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) | 1127 + HFI1_CAP_UGET_MASK(uctxt->flags, MASK) | 1128 + HFI1_CAP_KGET_MASK(uctxt->flags, K2U); 1129 + /* adjust flag if this fd is not able to cache */ 1130 + if (!fd->handler) 1131 + cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */ 1132 + 1134 1133 cinfo.num_active = hfi1_count_active_units(); 1135 1134 cinfo.unit = uctxt->dd->unit; 1136 1135 cinfo.ctxt = uctxt->ctxt; ··· 1157 1146 trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); 1158 1147 if (copy_to_user(ubase, &cinfo, sizeof(cinfo))) 1159 1148 ret = -EFAULT; 1160 - done: 1149 + 1161 1150 return ret; 1162 1151 } 1163 1152
+125
drivers/infiniband/hw/hfi1/firmware.c
··· 206 206 /* the number of fabric SerDes on the SBus */ 207 207 #define NUM_FABRIC_SERDES 4 208 208 209 + /* ASIC_STS_SBUS_RESULT.RESULT_CODE value */ 210 + #define SBUS_READ_COMPLETE 0x4 211 + 209 212 /* SBus fabric SerDes addresses, one set per HFI */ 210 213 static const u8 fabric_serdes_addrs[2][NUM_FABRIC_SERDES] = { 211 214 { 0x01, 0x02, 0x03, 0x04 }, ··· 243 240 static void dispose_one_firmware(struct firmware_details *fdet); 244 241 static int load_fabric_serdes_firmware(struct hfi1_devdata *dd, 245 242 struct firmware_details *fdet); 243 + static void dump_fw_version(struct hfi1_devdata *dd); 246 244 247 245 /* 248 246 * Read a single 64-bit value from 8051 data memory. ··· 1083 1079 } 1084 1080 1085 1081 /* 1082 + * Read a value from the SBus. 1083 + * 1084 + * Requires the caller to be in fast mode 1085 + */ 1086 + static u32 sbus_read(struct hfi1_devdata *dd, u8 receiver_addr, u8 data_addr, 1087 + u32 data_in) 1088 + { 1089 + u64 reg; 1090 + int retries; 1091 + int success = 0; 1092 + u32 result = 0; 1093 + u32 result_code = 0; 1094 + 1095 + sbus_request(dd, receiver_addr, data_addr, READ_SBUS_RECEIVER, data_in); 1096 + 1097 + for (retries = 0; retries < 100; retries++) { 1098 + usleep_range(1000, 1200); /* arbitrary */ 1099 + reg = read_csr(dd, ASIC_STS_SBUS_RESULT); 1100 + result_code = (reg >> ASIC_STS_SBUS_RESULT_RESULT_CODE_SHIFT) 1101 + & ASIC_STS_SBUS_RESULT_RESULT_CODE_MASK; 1102 + if (result_code != SBUS_READ_COMPLETE) 1103 + continue; 1104 + 1105 + success = 1; 1106 + result = (reg >> ASIC_STS_SBUS_RESULT_DATA_OUT_SHIFT) 1107 + & ASIC_STS_SBUS_RESULT_DATA_OUT_MASK; 1108 + break; 1109 + } 1110 + 1111 + if (!success) { 1112 + dd_dev_err(dd, "%s: read failed, result code 0x%x\n", __func__, 1113 + result_code); 1114 + } 1115 + 1116 + return result; 1117 + } 1118 + 1119 + /* 1086 1120 * Turn off the SBus and fabric serdes spicos. 1087 1121 * 1088 1122 * + Must be called with Sbus fast mode turned on. ··· 1678 1636 return ret; 1679 1637 } 1680 1638 1639 + dump_fw_version(dd); 1681 1640 return 0; 1682 1641 } 1683 1642 ··· 2096 2053 dd->base_guid = read_csr(dd, DC_DC8051_CFG_LOCAL_GUID); 2097 2054 dd_dev_info(dd, "GUID %llx", 2098 2055 (unsigned long long)dd->base_guid); 2056 + } 2057 + 2058 + /* read and display firmware version info */ 2059 + static void dump_fw_version(struct hfi1_devdata *dd) 2060 + { 2061 + u32 pcie_vers[NUM_PCIE_SERDES]; 2062 + u32 fabric_vers[NUM_FABRIC_SERDES]; 2063 + u32 sbus_vers; 2064 + int i; 2065 + int all_same; 2066 + int ret; 2067 + u8 rcv_addr; 2068 + 2069 + ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT); 2070 + if (ret) { 2071 + dd_dev_err(dd, "Unable to acquire SBus to read firmware versions\n"); 2072 + return; 2073 + } 2074 + 2075 + /* set fast mode */ 2076 + set_sbus_fast_mode(dd); 2077 + 2078 + /* read version for SBus Master */ 2079 + sbus_request(dd, SBUS_MASTER_BROADCAST, 0x02, WRITE_SBUS_RECEIVER, 0); 2080 + sbus_request(dd, SBUS_MASTER_BROADCAST, 0x07, WRITE_SBUS_RECEIVER, 0x1); 2081 + /* wait for interrupt to be processed */ 2082 + usleep_range(10000, 11000); 2083 + sbus_vers = sbus_read(dd, SBUS_MASTER_BROADCAST, 0x08, 0x1); 2084 + dd_dev_info(dd, "SBus Master firmware version 0x%08x\n", sbus_vers); 2085 + 2086 + /* read version for PCIe SerDes */ 2087 + all_same = 1; 2088 + pcie_vers[0] = 0; 2089 + for (i = 0; i < NUM_PCIE_SERDES; i++) { 2090 + rcv_addr = pcie_serdes_addrs[dd->hfi1_id][i]; 2091 + sbus_request(dd, rcv_addr, 0x03, WRITE_SBUS_RECEIVER, 0); 2092 + /* wait for interrupt to be processed */ 2093 + usleep_range(10000, 11000); 2094 + pcie_vers[i] = sbus_read(dd, rcv_addr, 0x04, 0x0); 2095 + if (i > 0 && pcie_vers[0] != pcie_vers[i]) 2096 + all_same = 0; 2097 + } 2098 + 2099 + if (all_same) { 2100 + dd_dev_info(dd, "PCIe SerDes firmware version 0x%x\n", 2101 + pcie_vers[0]); 2102 + } else { 2103 + dd_dev_warn(dd, "PCIe SerDes do not have the same firmware version\n"); 2104 + for (i = 0; i < NUM_PCIE_SERDES; i++) { 2105 + dd_dev_info(dd, 2106 + "PCIe SerDes lane %d firmware version 0x%x\n", 2107 + i, pcie_vers[i]); 2108 + } 2109 + } 2110 + 2111 + /* read version for fabric SerDes */ 2112 + all_same = 1; 2113 + fabric_vers[0] = 0; 2114 + for (i = 0; i < NUM_FABRIC_SERDES; i++) { 2115 + rcv_addr = fabric_serdes_addrs[dd->hfi1_id][i]; 2116 + sbus_request(dd, rcv_addr, 0x03, WRITE_SBUS_RECEIVER, 0); 2117 + /* wait for interrupt to be processed */ 2118 + usleep_range(10000, 11000); 2119 + fabric_vers[i] = sbus_read(dd, rcv_addr, 0x04, 0x0); 2120 + if (i > 0 && fabric_vers[0] != fabric_vers[i]) 2121 + all_same = 0; 2122 + } 2123 + 2124 + if (all_same) { 2125 + dd_dev_info(dd, "Fabric SerDes firmware version 0x%x\n", 2126 + fabric_vers[0]); 2127 + } else { 2128 + dd_dev_warn(dd, "Fabric SerDes do not have the same firmware version\n"); 2129 + for (i = 0; i < NUM_FABRIC_SERDES; i++) { 2130 + dd_dev_info(dd, 2131 + "Fabric SerDes lane %d firmware version 0x%x\n", 2132 + i, fabric_vers[i]); 2133 + } 2134 + } 2135 + 2136 + clear_sbus_fast_mode(dd); 2137 + release_chip_resource(dd, CR_SBUS); 2099 2138 }
+104 -17
drivers/infiniband/hw/hfi1/hfi.h
··· 62 62 #include <linux/cdev.h> 63 63 #include <linux/delay.h> 64 64 #include <linux/kthread.h> 65 + #include <linux/i2c.h> 66 + #include <linux/i2c-algo-bit.h> 65 67 #include <rdma/rdma_vt.h> 66 68 67 69 #include "chip_registers.h" ··· 255 253 /* chip offset of PIO buffers for this ctxt */ 256 254 u32 piobufs; 257 255 /* per-context configuration flags */ 258 - u32 flags; 256 + unsigned long flags; 259 257 /* per-context event flags for fileops/intr communication */ 260 258 unsigned long event_flags; 261 259 /* WAIT_RCV that timed out, no interrupt */ ··· 270 268 u32 urgent; 271 269 /* saved total number of polled urgent packets for poll edge trigger */ 272 270 u32 urgent_poll; 273 - /* pid of process using this ctxt */ 274 - pid_t pid; 275 - pid_t subpid[HFI1_MAX_SHARED_CTXTS]; 276 271 /* same size as task_struct .comm[], command that opened context */ 277 272 char comm[TASK_COMM_LEN]; 278 273 /* so file ops can get at unit */ ··· 364 365 u8 rcv_flags; 365 366 u8 etype; 366 367 }; 367 - 368 - static inline bool has_sc4_bit(struct hfi1_packet *p) 369 - { 370 - return !!rhf_dc_info(p->rhf); 371 - } 372 368 373 369 /* 374 370 * Private data for snoop/capture support. ··· 799 805 u8 triggers; /* temperature triggers */ 800 806 }; 801 807 808 + struct hfi1_i2c_bus { 809 + struct hfi1_devdata *controlling_dd; /* current controlling device */ 810 + struct i2c_adapter adapter; /* bus details */ 811 + struct i2c_algo_bit_data algo; /* bus algorithm details */ 812 + int num; /* bus number, 0 or 1 */ 813 + }; 814 + 802 815 /* common data between shared ASIC HFIs */ 803 816 struct hfi1_asic_data { 804 817 struct hfi1_devdata *dds[2]; /* back pointers */ 805 818 struct mutex asic_resource_mutex; 819 + struct hfi1_i2c_bus *i2c_bus0; 820 + struct hfi1_i2c_bus *i2c_bus1; 806 821 }; 807 822 808 823 /* device data struct now contains only "general per-device" info. ··· 1131 1128 NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS]; 1132 1129 /* Software counter that aggregates all cce_err_status errors */ 1133 1130 u64 sw_cce_err_status_aggregate; 1134 - 1131 + /* Software counter that aggregates all bypass packet rcv errors */ 1132 + u64 sw_rcv_bypass_packet_errors; 1135 1133 /* receive interrupt functions */ 1136 1134 rhf_rcv_function_ptr *rhf_rcv_function_map; 1137 1135 rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; ··· 1188 1184 1189 1185 struct tid_rb_node; 1190 1186 struct mmu_rb_node; 1187 + struct mmu_rb_handler; 1191 1188 1192 1189 /* Private data for file operations */ 1193 1190 struct hfi1_filedata { ··· 1199 1194 /* for cpu affinity; -1 if none */ 1200 1195 int rec_cpu_num; 1201 1196 u32 tid_n_pinned; 1202 - struct rb_root tid_rb_root; 1197 + struct mmu_rb_handler *handler; 1203 1198 struct tid_rb_node **entry_to_rb; 1204 1199 spinlock_t tid_lock; /* protect tid_[limit,used] counters */ 1205 1200 u32 tid_limit; ··· 1208 1203 u32 invalid_tid_idx; 1209 1204 /* protect invalid_tids array and invalid_tid_idx */ 1210 1205 spinlock_t invalid_lock; 1206 + struct mm_struct *mm; 1211 1207 }; 1212 1208 1213 1209 extern struct list_head hfi1_dev_list; ··· 1242 1236 int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int); 1243 1237 void set_all_slowpath(struct hfi1_devdata *dd); 1244 1238 1239 + extern const struct pci_device_id hfi1_pci_tbl[]; 1240 + 1245 1241 /* receive packet handler dispositions */ 1246 1242 #define RCV_PKT_OK 0x0 /* keep going */ 1247 1243 #define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */ ··· 1269 1261 static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) 1270 1262 { 1271 1263 return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | 1272 - ((!!(rhf & RHF_DC_INFO_SMASK)) << 4); 1264 + ((!!(rhf_dc_info(rhf))) << 4); 1273 1265 } 1274 1266 1275 1267 static inline u16 generate_jkey(kuid_t uid) ··· 1579 1571 return &dd->pport[pidx].ibport_data; 1580 1572 } 1581 1573 1574 + void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, 1575 + bool do_cnp); 1576 + static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, 1577 + bool do_cnp) 1578 + { 1579 + struct hfi1_other_headers *ohdr = pkt->ohdr; 1580 + u32 bth1; 1581 + 1582 + bth1 = be32_to_cpu(ohdr->bth[1]); 1583 + if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) { 1584 + hfi1_process_ecn_slowpath(qp, pkt, do_cnp); 1585 + return bth1 & HFI1_FECN_SMASK; 1586 + } 1587 + return false; 1588 + } 1589 + 1582 1590 /* 1583 1591 * Return the indexed PKEY from the port PKEY table. 1584 1592 */ ··· 1612 1588 } 1613 1589 1614 1590 /* 1615 - * Readers of cc_state must call get_cc_state() under rcu_read_lock(). 1616 - * Writers of cc_state must call get_cc_state() under cc_state_lock. 1591 + * Called by readers of cc_state only, must call under rcu_read_lock(). 1617 1592 */ 1618 1593 static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd) 1619 1594 { 1620 1595 return rcu_dereference(ppd->cc_state); 1596 + } 1597 + 1598 + /* 1599 + * Called by writers of cc_state only, must call under cc_state_lock. 1600 + */ 1601 + static inline 1602 + struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) 1603 + { 1604 + return rcu_dereference_protected(ppd->cc_state, 1605 + lockdep_is_held(&ppd->cc_state_lock)); 1621 1606 } 1622 1607 1623 1608 /* ··· 1704 1671 */ 1705 1672 #define DEFAULT_RCVHDR_ENTSIZE 32 1706 1673 1707 - bool hfi1_can_pin_pages(struct hfi1_devdata *, u32, u32); 1708 - int hfi1_acquire_user_pages(unsigned long, size_t, bool, struct page **); 1709 - void hfi1_release_user_pages(struct mm_struct *, struct page **, size_t, bool); 1674 + bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm, 1675 + u32 nlocked, u32 npages); 1676 + int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, 1677 + size_t npages, bool writable, struct page **pages); 1678 + void hfi1_release_user_pages(struct mm_struct *mm, struct page **p, 1679 + size_t npages, bool dirty); 1710 1680 1711 1681 static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd) 1712 1682 { ··· 1985 1949 1986 1950 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); 1987 1951 1952 + #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) 1953 + #define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) 1954 + 1955 + #define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } 1956 + #define show_packettype(etype) \ 1957 + __print_symbolic(etype, \ 1958 + packettype_name(EXPECTED), \ 1959 + packettype_name(EAGER), \ 1960 + packettype_name(IB), \ 1961 + packettype_name(ERROR), \ 1962 + packettype_name(BYPASS)) 1963 + 1964 + #define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } 1965 + #define show_ib_opcode(opcode) \ 1966 + __print_symbolic(opcode, \ 1967 + ib_opcode_name(RC_SEND_FIRST), \ 1968 + ib_opcode_name(RC_SEND_MIDDLE), \ 1969 + ib_opcode_name(RC_SEND_LAST), \ 1970 + ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ 1971 + ib_opcode_name(RC_SEND_ONLY), \ 1972 + ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ 1973 + ib_opcode_name(RC_RDMA_WRITE_FIRST), \ 1974 + ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ 1975 + ib_opcode_name(RC_RDMA_WRITE_LAST), \ 1976 + ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ 1977 + ib_opcode_name(RC_RDMA_WRITE_ONLY), \ 1978 + ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ 1979 + ib_opcode_name(RC_RDMA_READ_REQUEST), \ 1980 + ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ 1981 + ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ 1982 + ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ 1983 + ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ 1984 + ib_opcode_name(RC_ACKNOWLEDGE), \ 1985 + ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ 1986 + ib_opcode_name(RC_COMPARE_SWAP), \ 1987 + ib_opcode_name(RC_FETCH_ADD), \ 1988 + ib_opcode_name(UC_SEND_FIRST), \ 1989 + ib_opcode_name(UC_SEND_MIDDLE), \ 1990 + ib_opcode_name(UC_SEND_LAST), \ 1991 + ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ 1992 + ib_opcode_name(UC_SEND_ONLY), \ 1993 + ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ 1994 + ib_opcode_name(UC_RDMA_WRITE_FIRST), \ 1995 + ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ 1996 + ib_opcode_name(UC_RDMA_WRITE_LAST), \ 1997 + ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ 1998 + ib_opcode_name(UC_RDMA_WRITE_ONLY), \ 1999 + ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ 2000 + ib_opcode_name(UD_SEND_ONLY), \ 2001 + ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ 2002 + ib_opcode_name(CNP)) 1988 2003 #endif /* _HFI1_KERNEL_H */
+34 -17
drivers/infiniband/hw/hfi1/init.c
··· 64 64 #include "debugfs.h" 65 65 #include "verbs.h" 66 66 #include "aspm.h" 67 + #include "affinity.h" 67 68 68 69 #undef pr_fmt 69 70 #define pr_fmt(fmt) DRIVER_NAME ": " fmt ··· 475 474 void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, 476 475 struct hfi1_devdata *dd, u8 hw_pidx, u8 port) 477 476 { 478 - int i, size; 477 + int i; 479 478 uint default_pkey_idx; 479 + struct cc_state *cc_state; 480 480 481 481 ppd->dd = dd; 482 482 ppd->hw_pidx = hw_pidx; ··· 528 526 529 527 spin_lock_init(&ppd->cc_state_lock); 530 528 spin_lock_init(&ppd->cc_log_lock); 531 - size = sizeof(struct cc_state); 532 - RCU_INIT_POINTER(ppd->cc_state, kzalloc(size, GFP_KERNEL)); 533 - if (!rcu_dereference(ppd->cc_state)) 529 + cc_state = kzalloc(sizeof(*cc_state), GFP_KERNEL); 530 + RCU_INIT_POINTER(ppd->cc_state, cc_state); 531 + if (!cc_state) 534 532 goto bail; 535 533 return; 536 534 ··· 974 972 975 973 /* 976 974 * Release our hold on the shared asic data. If we are the last one, 977 - * free the structure. Must be holding hfi1_devs_lock. 975 + * return the structure to be finalized outside the lock. Must be 976 + * holding hfi1_devs_lock. 978 977 */ 979 - static void release_asic_data(struct hfi1_devdata *dd) 978 + static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd) 980 979 { 980 + struct hfi1_asic_data *ad; 981 981 int other; 982 982 983 983 if (!dd->asic_data) 984 - return; 984 + return NULL; 985 985 dd->asic_data->dds[dd->hfi1_id] = NULL; 986 986 other = dd->hfi1_id ? 0 : 1; 987 - if (!dd->asic_data->dds[other]) { 988 - /* we are the last holder, free it */ 989 - kfree(dd->asic_data); 990 - } 987 + ad = dd->asic_data; 991 988 dd->asic_data = NULL; 989 + /* return NULL if the other dd still has a link */ 990 + return ad->dds[other] ? NULL : ad; 991 + } 992 + 993 + static void finalize_asic_data(struct hfi1_devdata *dd, 994 + struct hfi1_asic_data *ad) 995 + { 996 + clean_up_i2c(dd, ad); 997 + kfree(ad); 992 998 } 993 999 994 1000 static void __hfi1_free_devdata(struct kobject *kobj) 995 1001 { 996 1002 struct hfi1_devdata *dd = 997 1003 container_of(kobj, struct hfi1_devdata, kobj); 1004 + struct hfi1_asic_data *ad; 998 1005 unsigned long flags; 999 1006 1000 1007 spin_lock_irqsave(&hfi1_devs_lock, flags); 1001 1008 idr_remove(&hfi1_unit_table, dd->unit); 1002 1009 list_del(&dd->list); 1003 - release_asic_data(dd); 1010 + ad = release_asic_data(dd); 1004 1011 spin_unlock_irqrestore(&hfi1_devs_lock, flags); 1012 + if (ad) 1013 + finalize_asic_data(dd, ad); 1005 1014 free_platform_config(dd); 1006 1015 rcu_barrier(); /* wait for rcu callbacks to complete */ 1007 1016 free_percpu(dd->int_counter); 1008 1017 free_percpu(dd->rcv_limit); 1009 - hfi1_dev_affinity_free(dd); 1010 1018 free_percpu(dd->send_schedule); 1011 1019 rvt_dealloc_device(&dd->verbs_dev.rdi); 1012 1020 } ··· 1174 1162 #define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: " 1175 1163 #define PFX DRIVER_NAME ": " 1176 1164 1177 - static const struct pci_device_id hfi1_pci_tbl[] = { 1165 + const struct pci_device_id hfi1_pci_tbl[] = { 1178 1166 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL0) }, 1179 1167 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL1) }, 1180 1168 { 0, } ··· 1207 1195 int ret; 1208 1196 1209 1197 ret = dev_init(); 1198 + if (ret) 1199 + goto bail; 1200 + 1201 + ret = node_affinity_init(); 1210 1202 if (ret) 1211 1203 goto bail; 1212 1204 ··· 1294 1278 static void __exit hfi1_mod_cleanup(void) 1295 1279 { 1296 1280 pci_unregister_driver(&hfi1_pci_driver); 1281 + node_affinity_destroy(); 1297 1282 hfi1_wss_exit(); 1298 1283 hfi1_dbg_exit(); 1299 1284 hfi1_cpulist_count = 0; ··· 1328 1311 hrtimer_cancel(&ppd->cca_timer[i].hrtimer); 1329 1312 1330 1313 spin_lock(&ppd->cc_state_lock); 1331 - cc_state = get_cc_state(ppd); 1314 + cc_state = get_cc_state_protected(ppd); 1332 1315 RCU_INIT_POINTER(ppd->cc_state, NULL); 1333 1316 spin_unlock(&ppd->cc_state_lock); 1334 1317 ··· 1777 1760 1778 1761 hfi1_cdbg(PROC, 1779 1762 "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n", 1780 - rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size, 1781 - rcd->egrbufs.size); 1763 + rcd->ctxt, rcd->egrbufs.alloced, 1764 + rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024); 1782 1765 1783 1766 /* 1784 1767 * Set the contexts rcv array head update threshold to the closest
+21 -39
drivers/infiniband/hw/hfi1/mad.c
··· 588 588 589 589 pi->port_phys_conf = (ppd->port_type & 0xf); 590 590 591 - #if PI_LED_ENABLE_SUP 592 591 pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4; 593 592 pi->port_states.ledenable_offlinereason |= 594 593 ppd->is_sm_config_started << 5; ··· 601 602 pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6; 602 603 pi->port_states.ledenable_offlinereason |= 603 604 ppd->offline_disabled_reason; 604 - #else 605 - pi->port_states.offline_reason = ppd->neighbor_normal << 4; 606 - pi->port_states.offline_reason |= ppd->is_sm_config_started << 5; 607 - pi->port_states.offline_reason |= ppd->offline_disabled_reason; 608 - #endif /* PI_LED_ENABLE_SUP */ 609 605 610 606 pi->port_states.portphysstate_portstate = 611 607 (hfi1_ibphys_portstate(ppd) << 4) | state; ··· 1746 1752 if (start_of_sm_config && (lstate == IB_PORT_INIT)) 1747 1753 ppd->is_sm_config_started = 1; 1748 1754 1749 - #if PI_LED_ENABLE_SUP 1750 1755 psi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4; 1751 1756 psi->port_states.ledenable_offlinereason |= 1752 1757 ppd->is_sm_config_started << 5; 1753 1758 psi->port_states.ledenable_offlinereason |= 1754 1759 ppd->offline_disabled_reason; 1755 - #else 1756 - psi->port_states.offline_reason = ppd->neighbor_normal << 4; 1757 - psi->port_states.offline_reason |= ppd->is_sm_config_started << 5; 1758 - psi->port_states.offline_reason |= ppd->offline_disabled_reason; 1759 - #endif /* PI_LED_ENABLE_SUP */ 1760 1760 1761 1761 psi->port_states.portphysstate_portstate = 1762 1762 (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf); ··· 2418 2430 rsp->port_rcv_remote_physical_errors = 2419 2431 cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR, 2420 2432 CNTR_INVALID_VL)); 2421 - tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL); 2422 - tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL); 2423 - if (tmp2 < tmp) { 2424 - /* overflow/wrapped */ 2425 - rsp->local_link_integrity_errors = cpu_to_be64(~0); 2426 - } else { 2427 - rsp->local_link_integrity_errors = cpu_to_be64(tmp2); 2428 - } 2433 + rsp->local_link_integrity_errors = 2434 + cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY, 2435 + CNTR_INVALID_VL)); 2429 2436 tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL); 2430 2437 tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, 2431 2438 CNTR_INVALID_VL); ··· 2482 2499 cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL, 2483 2500 idx_from_vl(vl))); 2484 2501 2502 + rsp->vls[vfi].port_vl_xmit_discards = 2503 + cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL, 2504 + idx_from_vl(vl))); 2485 2505 vlinfo++; 2486 2506 vfi++; 2487 2507 } ··· 2515 2529 error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR, 2516 2530 CNTR_INVALID_VL); 2517 2531 /* local link integrity must be right-shifted by the lli resolution */ 2518 - tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL); 2519 - tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL); 2520 - error_counter_summary += (tmp >> res_lli); 2532 + error_counter_summary += (read_dev_cntr(dd, C_DC_RX_REPLAY, 2533 + CNTR_INVALID_VL) >> res_lli); 2521 2534 /* link error recovery must b right-shifted by the ler resolution */ 2522 2535 tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL); 2523 2536 tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL); ··· 2785 2800 rsp->port_rcv_constraint_errors = 2786 2801 cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR, 2787 2802 CNTR_INVALID_VL)); 2788 - tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL); 2789 - tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL); 2790 - if (tmp2 < tmp) { 2791 - /* overflow/wrapped */ 2792 - rsp->local_link_integrity_errors = cpu_to_be64(~0); 2793 - } else { 2794 - rsp->local_link_integrity_errors = cpu_to_be64(tmp2); 2795 - } 2803 + rsp->local_link_integrity_errors = 2804 + cpu_to_be64(read_dev_cntr(dd, C_DC_RX_REPLAY, 2805 + CNTR_INVALID_VL)); 2796 2806 rsp->excessive_buffer_overruns = 2797 2807 cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL)); 2798 2808 } ··· 2863 2883 tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL); 2864 2884 2865 2885 rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff; 2866 - 2886 + rsp->port_rcv_errors = 2887 + cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL)); 2867 2888 vlinfo = &rsp->vls[0]; 2868 2889 vfi = 0; 2869 2890 vl_select_mask = be32_to_cpu(req->vl_select_mask); 2870 2891 for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), 2871 2892 8 * sizeof(req->vl_select_mask)) { 2872 2893 memset(vlinfo, 0, sizeof(*vlinfo)); 2873 - /* vlinfo->vls[vfi].port_vl_xmit_discards ??? */ 2894 + rsp->vls[vfi].port_vl_xmit_discards = 2895 + cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL, 2896 + idx_from_vl(vl))); 2874 2897 vlinfo += 1; 2875 2898 vfi++; 2876 2899 } ··· 3145 3162 if (counter_select & CS_PORT_RCV_REMOTE_PHYSICAL_ERRORS) 3146 3163 write_dev_cntr(dd, C_DC_RMT_PHY_ERR, CNTR_INVALID_VL, 0); 3147 3164 3148 - if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS) { 3149 - write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0); 3165 + if (counter_select & CS_LOCAL_LINK_INTEGRITY_ERRORS) 3150 3166 write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0); 3151 - } 3152 3167 3153 3168 if (counter_select & CS_LINK_ERROR_RECOVERY) { 3154 3169 write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0); ··· 3204 3223 /* if (counter_select & CS_PORT_MARK_FECN) 3205 3224 * write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0); 3206 3225 */ 3207 - /* port_vl_xmit_discards ??? */ 3226 + if (counter_select & C_SW_XMIT_DSCD_VL) 3227 + write_port_cntr(ppd, C_SW_XMIT_DSCD_VL, 3228 + idx_from_vl(vl), 0); 3208 3229 } 3209 3230 3210 3231 if (resp_len) ··· 3375 3392 */ 3376 3393 spin_lock(&ppd->cc_state_lock); 3377 3394 3378 - old_cc_state = get_cc_state(ppd); 3395 + old_cc_state = get_cc_state_protected(ppd); 3379 3396 if (!old_cc_state) { 3380 3397 /* never active, or shutting down */ 3381 3398 spin_unlock(&ppd->cc_state_lock); ··· 3943 3960 write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0); 3944 3961 write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL, 0); 3945 3962 /* LocalLinkIntegrityErrors */ 3946 - write_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL, 0); 3947 3963 write_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL, 0); 3948 3964 /* ExcessiveBufferOverruns */ 3949 3965 write_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL, 0);
-7
drivers/infiniband/hw/hfi1/mad.h
··· 48 48 #define _HFI1_MAD_H 49 49 50 50 #include <rdma/ib_pma.h> 51 - #define USE_PI_LED_ENABLE 1 /* 52 - * use led enabled bit in struct 53 - * opa_port_states, if available 54 - */ 55 51 #include <rdma/opa_smi.h> 56 52 #include <rdma/opa_port_info.h> 57 - #ifndef PI_LED_ENABLE_SUP 58 - #define PI_LED_ENABLE_SUP 0 59 - #endif 60 53 #include "opa_compat.h" 61 54 62 55 /*
+163 -119
drivers/infiniband/hw/hfi1/mmu_rb.c
··· 53 53 #include "trace.h" 54 54 55 55 struct mmu_rb_handler { 56 - struct list_head list; 57 56 struct mmu_notifier mn; 58 - struct rb_root *root; 57 + struct rb_root root; 58 + void *ops_arg; 59 59 spinlock_t lock; /* protect the RB tree */ 60 60 struct mmu_rb_ops *ops; 61 + struct mm_struct *mm; 62 + struct list_head lru_list; 63 + struct work_struct del_work; 64 + struct list_head del_list; 65 + struct workqueue_struct *wq; 61 66 }; 62 - 63 - static LIST_HEAD(mmu_rb_handlers); 64 - static DEFINE_SPINLOCK(mmu_rb_lock); /* protect mmu_rb_handlers list */ 65 67 66 68 static unsigned long mmu_node_start(struct mmu_rb_node *); 67 69 static unsigned long mmu_node_last(struct mmu_rb_node *); 68 - static struct mmu_rb_handler *find_mmu_handler(struct rb_root *); 69 70 static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *, 70 71 unsigned long); 71 72 static inline void mmu_notifier_range_start(struct mmu_notifier *, ··· 77 76 unsigned long, unsigned long); 78 77 static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, 79 78 unsigned long, unsigned long); 79 + static void do_remove(struct mmu_rb_handler *handler, 80 + struct list_head *del_list); 81 + static void handle_remove(struct work_struct *work); 80 82 81 83 static struct mmu_notifier_ops mn_opts = { 82 84 .invalidate_page = mmu_notifier_page, ··· 99 95 return PAGE_ALIGN(node->addr + node->len) - 1; 100 96 } 101 97 102 - int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops) 98 + int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm, 99 + struct mmu_rb_ops *ops, 100 + struct workqueue_struct *wq, 101 + struct mmu_rb_handler **handler) 103 102 { 104 103 struct mmu_rb_handler *handlr; 105 - 106 - if (!ops->invalidate) 107 - return -EINVAL; 104 + int ret; 108 105 109 106 handlr = kmalloc(sizeof(*handlr), GFP_KERNEL); 110 107 if (!handlr) 111 108 return -ENOMEM; 112 109 113 - handlr->root = root; 110 + handlr->root = RB_ROOT; 114 111 handlr->ops = ops; 112 + handlr->ops_arg = ops_arg; 115 113 INIT_HLIST_NODE(&handlr->mn.hlist); 116 114 spin_lock_init(&handlr->lock); 117 115 handlr->mn.ops = &mn_opts; 118 - spin_lock(&mmu_rb_lock); 119 - list_add_tail_rcu(&handlr->list, &mmu_rb_handlers); 120 - spin_unlock(&mmu_rb_lock); 116 + handlr->mm = mm; 117 + INIT_WORK(&handlr->del_work, handle_remove); 118 + INIT_LIST_HEAD(&handlr->del_list); 119 + INIT_LIST_HEAD(&handlr->lru_list); 120 + handlr->wq = wq; 121 121 122 - return mmu_notifier_register(&handlr->mn, current->mm); 122 + ret = mmu_notifier_register(&handlr->mn, handlr->mm); 123 + if (ret) { 124 + kfree(handlr); 125 + return ret; 126 + } 127 + 128 + *handler = handlr; 129 + return 0; 123 130 } 124 131 125 - void hfi1_mmu_rb_unregister(struct rb_root *root) 132 + void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) 126 133 { 127 - struct mmu_rb_handler *handler = find_mmu_handler(root); 134 + struct mmu_rb_node *rbnode; 135 + struct rb_node *node; 128 136 unsigned long flags; 129 - 130 - if (!handler) 131 - return; 137 + struct list_head del_list; 132 138 133 139 /* Unregister first so we don't get any more notifications. */ 134 - if (current->mm) 135 - mmu_notifier_unregister(&handler->mn, current->mm); 140 + mmu_notifier_unregister(&handler->mn, handler->mm); 136 141 137 - spin_lock(&mmu_rb_lock); 138 - list_del_rcu(&handler->list); 139 - spin_unlock(&mmu_rb_lock); 140 - synchronize_rcu(); 142 + /* 143 + * Make sure the wq delete handler is finished running. It will not 144 + * be triggered once the mmu notifiers are unregistered above. 145 + */ 146 + flush_work(&handler->del_work); 147 + 148 + INIT_LIST_HEAD(&del_list); 141 149 142 150 spin_lock_irqsave(&handler->lock, flags); 143 - if (!RB_EMPTY_ROOT(root)) { 144 - struct rb_node *node; 145 - struct mmu_rb_node *rbnode; 146 - 147 - while ((node = rb_first(root))) { 148 - rbnode = rb_entry(node, struct mmu_rb_node, node); 149 - rb_erase(node, root); 150 - if (handler->ops->remove) 151 - handler->ops->remove(root, rbnode, NULL); 152 - } 151 + while ((node = rb_first(&handler->root))) { 152 + rbnode = rb_entry(node, struct mmu_rb_node, node); 153 + rb_erase(node, &handler->root); 154 + /* move from LRU list to delete list */ 155 + list_move(&rbnode->list, &del_list); 153 156 } 154 157 spin_unlock_irqrestore(&handler->lock, flags); 158 + 159 + do_remove(handler, &del_list); 155 160 156 161 kfree(handler); 157 162 } 158 163 159 - int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) 164 + int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, 165 + struct mmu_rb_node *mnode) 160 166 { 161 - struct mmu_rb_handler *handler = find_mmu_handler(root); 162 167 struct mmu_rb_node *node; 163 168 unsigned long flags; 164 169 int ret = 0; 165 - 166 - if (!handler) 167 - return -EINVAL; 168 170 169 171 spin_lock_irqsave(&handler->lock, flags); 170 172 hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr, ··· 180 170 ret = -EINVAL; 181 171 goto unlock; 182 172 } 183 - __mmu_int_rb_insert(mnode, root); 173 + __mmu_int_rb_insert(mnode, &handler->root); 174 + list_add(&mnode->list, &handler->lru_list); 184 175 185 - if (handler->ops->insert) { 186 - ret = handler->ops->insert(root, mnode); 187 - if (ret) 188 - __mmu_int_rb_remove(mnode, root); 176 + ret = handler->ops->insert(handler->ops_arg, mnode); 177 + if (ret) { 178 + __mmu_int_rb_remove(mnode, &handler->root); 179 + list_del(&mnode->list); /* remove from LRU list */ 189 180 } 190 181 unlock: 191 182 spin_unlock_irqrestore(&handler->lock, flags); ··· 202 191 203 192 hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len); 204 193 if (!handler->ops->filter) { 205 - node = __mmu_int_rb_iter_first(handler->root, addr, 194 + node = __mmu_int_rb_iter_first(&handler->root, addr, 206 195 (addr + len) - 1); 207 196 } else { 208 - for (node = __mmu_int_rb_iter_first(handler->root, addr, 197 + for (node = __mmu_int_rb_iter_first(&handler->root, addr, 209 198 (addr + len) - 1); 210 199 node; 211 200 node = __mmu_int_rb_iter_next(node, addr, ··· 217 206 return node; 218 207 } 219 208 220 - /* Caller must *not* hold handler lock. */ 221 - static void __mmu_rb_remove(struct mmu_rb_handler *handler, 222 - struct mmu_rb_node *node, struct mm_struct *mm) 209 + struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, 210 + unsigned long addr, unsigned long len) 211 + { 212 + struct mmu_rb_node *node; 213 + unsigned long flags; 214 + 215 + spin_lock_irqsave(&handler->lock, flags); 216 + node = __mmu_rb_search(handler, addr, len); 217 + if (node) { 218 + __mmu_int_rb_remove(node, &handler->root); 219 + list_del(&node->list); /* remove from LRU list */ 220 + } 221 + spin_unlock_irqrestore(&handler->lock, flags); 222 + 223 + return node; 224 + } 225 + 226 + void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) 227 + { 228 + struct mmu_rb_node *rbnode, *ptr; 229 + struct list_head del_list; 230 + unsigned long flags; 231 + bool stop = false; 232 + 233 + INIT_LIST_HEAD(&del_list); 234 + 235 + spin_lock_irqsave(&handler->lock, flags); 236 + list_for_each_entry_safe_reverse(rbnode, ptr, &handler->lru_list, 237 + list) { 238 + if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg, 239 + &stop)) { 240 + __mmu_int_rb_remove(rbnode, &handler->root); 241 + /* move from LRU list to delete list */ 242 + list_move(&rbnode->list, &del_list); 243 + } 244 + if (stop) 245 + break; 246 + } 247 + spin_unlock_irqrestore(&handler->lock, flags); 248 + 249 + while (!list_empty(&del_list)) { 250 + rbnode = list_first_entry(&del_list, struct mmu_rb_node, list); 251 + list_del(&rbnode->list); 252 + handler->ops->remove(handler->ops_arg, rbnode); 253 + } 254 + } 255 + 256 + /* 257 + * It is up to the caller to ensure that this function does not race with the 258 + * mmu invalidate notifier which may be calling the users remove callback on 259 + * 'node'. 260 + */ 261 + void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, 262 + struct mmu_rb_node *node) 223 263 { 224 264 unsigned long flags; 225 265 ··· 278 216 hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr, 279 217 node->len); 280 218 spin_lock_irqsave(&handler->lock, flags); 281 - __mmu_int_rb_remove(node, handler->root); 219 + __mmu_int_rb_remove(node, &handler->root); 220 + list_del(&node->list); /* remove from LRU list */ 282 221 spin_unlock_irqrestore(&handler->lock, flags); 283 222 284 - if (handler->ops->remove) 285 - handler->ops->remove(handler->root, node, mm); 286 - } 287 - 288 - struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr, 289 - unsigned long len) 290 - { 291 - struct mmu_rb_handler *handler = find_mmu_handler(root); 292 - struct mmu_rb_node *node; 293 - unsigned long flags; 294 - 295 - if (!handler) 296 - return ERR_PTR(-EINVAL); 297 - 298 - spin_lock_irqsave(&handler->lock, flags); 299 - node = __mmu_rb_search(handler, addr, len); 300 - spin_unlock_irqrestore(&handler->lock, flags); 301 - 302 - return node; 303 - } 304 - 305 - struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *root, 306 - unsigned long addr, unsigned long len) 307 - { 308 - struct mmu_rb_handler *handler = find_mmu_handler(root); 309 - struct mmu_rb_node *node; 310 - unsigned long flags; 311 - 312 - if (!handler) 313 - return ERR_PTR(-EINVAL); 314 - 315 - spin_lock_irqsave(&handler->lock, flags); 316 - node = __mmu_rb_search(handler, addr, len); 317 - if (node) 318 - __mmu_int_rb_remove(node, handler->root); 319 - spin_unlock_irqrestore(&handler->lock, flags); 320 - 321 - return node; 322 - } 323 - 324 - void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node) 325 - { 326 - struct mmu_rb_handler *handler = find_mmu_handler(root); 327 - 328 - if (!handler || !node) 329 - return; 330 - 331 - __mmu_rb_remove(handler, node, NULL); 332 - } 333 - 334 - static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root) 335 - { 336 - struct mmu_rb_handler *handler; 337 - 338 - rcu_read_lock(); 339 - list_for_each_entry_rcu(handler, &mmu_rb_handlers, list) { 340 - if (handler->root == root) 341 - goto unlock; 342 - } 343 - handler = NULL; 344 - unlock: 345 - rcu_read_unlock(); 346 - return handler; 223 + handler->ops->remove(handler->ops_arg, node); 347 224 } 348 225 349 226 static inline void mmu_notifier_page(struct mmu_notifier *mn, ··· 305 304 { 306 305 struct mmu_rb_handler *handler = 307 306 container_of(mn, struct mmu_rb_handler, mn); 308 - struct rb_root *root = handler->root; 307 + struct rb_root *root = &handler->root; 309 308 struct mmu_rb_node *node, *ptr = NULL; 310 309 unsigned long flags; 310 + bool added = false; 311 311 312 312 spin_lock_irqsave(&handler->lock, flags); 313 313 for (node = __mmu_int_rb_iter_first(root, start, end - 1); ··· 317 315 ptr = __mmu_int_rb_iter_next(node, start, end - 1); 318 316 hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", 319 317 node->addr, node->len); 320 - if (handler->ops->invalidate(root, node)) { 318 + if (handler->ops->invalidate(handler->ops_arg, node)) { 321 319 __mmu_int_rb_remove(node, root); 322 - if (handler->ops->remove) 323 - handler->ops->remove(root, node, mm); 320 + /* move from LRU list to delete list */ 321 + list_move(&node->list, &handler->del_list); 322 + added = true; 324 323 } 325 324 } 326 325 spin_unlock_irqrestore(&handler->lock, flags); 326 + 327 + if (added) 328 + queue_work(handler->wq, &handler->del_work); 329 + } 330 + 331 + /* 332 + * Call the remove function for the given handler and the list. This 333 + * is expected to be called with a delete list extracted from handler. 334 + * The caller should not be holding the handler lock. 335 + */ 336 + static void do_remove(struct mmu_rb_handler *handler, 337 + struct list_head *del_list) 338 + { 339 + struct mmu_rb_node *node; 340 + 341 + while (!list_empty(del_list)) { 342 + node = list_first_entry(del_list, struct mmu_rb_node, list); 343 + list_del(&node->list); 344 + handler->ops->remove(handler->ops_arg, node); 345 + } 346 + } 347 + 348 + /* 349 + * Work queue function to remove all nodes that have been queued up to 350 + * be removed. The key feature is that mm->mmap_sem is not being held 351 + * and the remove callback can sleep while taking it, if needed. 352 + */ 353 + static void handle_remove(struct work_struct *work) 354 + { 355 + struct mmu_rb_handler *handler = container_of(work, 356 + struct mmu_rb_handler, 357 + del_work); 358 + struct list_head del_list; 359 + unsigned long flags; 360 + 361 + /* remove anything that is queued to get removed */ 362 + spin_lock_irqsave(&handler->lock, flags); 363 + list_replace_init(&handler->del_list, &del_list); 364 + spin_unlock_irqrestore(&handler->lock, flags); 365 + 366 + do_remove(handler, &del_list); 327 367 }
+24 -13
drivers/infiniband/hw/hfi1/mmu_rb.h
··· 54 54 unsigned long len; 55 55 unsigned long __last; 56 56 struct rb_node node; 57 + struct list_head list; 57 58 }; 58 59 60 + /* 61 + * NOTE: filter, insert, invalidate, and evict must not sleep. Only remove is 62 + * allowed to sleep. 63 + */ 59 64 struct mmu_rb_ops { 60 - bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long); 61 - int (*insert)(struct rb_root *, struct mmu_rb_node *); 62 - void (*remove)(struct rb_root *, struct mmu_rb_node *, 63 - struct mm_struct *); 64 - int (*invalidate)(struct rb_root *, struct mmu_rb_node *); 65 + bool (*filter)(struct mmu_rb_node *node, unsigned long addr, 66 + unsigned long len); 67 + int (*insert)(void *ops_arg, struct mmu_rb_node *mnode); 68 + void (*remove)(void *ops_arg, struct mmu_rb_node *mnode); 69 + int (*invalidate)(void *ops_arg, struct mmu_rb_node *node); 70 + int (*evict)(void *ops_arg, struct mmu_rb_node *mnode, 71 + void *evict_arg, bool *stop); 65 72 }; 66 73 67 - int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops); 68 - void hfi1_mmu_rb_unregister(struct rb_root *); 69 - int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); 70 - void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *); 71 - struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long, 72 - unsigned long); 73 - struct mmu_rb_node *hfi1_mmu_rb_extract(struct rb_root *, unsigned long, 74 - unsigned long); 74 + int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm, 75 + struct mmu_rb_ops *ops, 76 + struct workqueue_struct *wq, 77 + struct mmu_rb_handler **handler); 78 + void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler); 79 + int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, 80 + struct mmu_rb_node *mnode); 81 + void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg); 82 + void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, 83 + struct mmu_rb_node *mnode); 84 + struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, 85 + unsigned long addr, unsigned long len); 75 86 76 87 #endif /* _HFI1_MMU_RB_H */
+64 -4
drivers/infiniband/hw/hfi1/pcie.c
··· 679 679 module_param(pcie_pset, uint, S_IRUGO); 680 680 MODULE_PARM_DESC(pcie_pset, "PCIe Eq Pset value to use, range is 0-10"); 681 681 682 + static uint pcie_ctle = 1; /* discrete on, integrated off */ 683 + module_param(pcie_ctle, uint, S_IRUGO); 684 + MODULE_PARM_DESC(pcie_ctle, "PCIe static CTLE mode, bit 0 - discrete on/off, bit 1 - integrated on/off"); 685 + 682 686 /* equalization columns */ 683 687 #define PREC 0 684 688 #define ATTN 1 ··· 718 714 { 0x03, 0x1e, 0x04 }, /* p8 */ 719 715 { 0x05, 0x1e, 0x00 }, /* p9 */ 720 716 { 0x00, 0x1e, 0x0a }, /* p10 */ 717 + }; 718 + 719 + static const u8 discrete_ctle_tunings[11][4] = { 720 + /* DC LF HF BW */ 721 + { 0x48, 0x0b, 0x04, 0x04 }, /* p0 */ 722 + { 0x60, 0x05, 0x0f, 0x0a }, /* p1 */ 723 + { 0x50, 0x09, 0x06, 0x06 }, /* p2 */ 724 + { 0x68, 0x05, 0x0f, 0x0a }, /* p3 */ 725 + { 0x80, 0x05, 0x0f, 0x0a }, /* p4 */ 726 + { 0x70, 0x05, 0x0f, 0x0a }, /* p5 */ 727 + { 0x68, 0x05, 0x0f, 0x0a }, /* p6 */ 728 + { 0x38, 0x0f, 0x00, 0x00 }, /* p7 */ 729 + { 0x48, 0x09, 0x06, 0x06 }, /* p8 */ 730 + { 0x60, 0x05, 0x0f, 0x0a }, /* p9 */ 731 + { 0x38, 0x0f, 0x00, 0x00 }, /* p10 */ 732 + }; 733 + 734 + static const u8 integrated_ctle_tunings[11][4] = { 735 + /* DC LF HF BW */ 736 + { 0x38, 0x0f, 0x00, 0x00 }, /* p0 */ 737 + { 0x38, 0x0f, 0x00, 0x00 }, /* p1 */ 738 + { 0x38, 0x0f, 0x00, 0x00 }, /* p2 */ 739 + { 0x38, 0x0f, 0x00, 0x00 }, /* p3 */ 740 + { 0x58, 0x0a, 0x05, 0x05 }, /* p4 */ 741 + { 0x48, 0x0a, 0x05, 0x05 }, /* p5 */ 742 + { 0x40, 0x0a, 0x05, 0x05 }, /* p6 */ 743 + { 0x38, 0x0f, 0x00, 0x00 }, /* p7 */ 744 + { 0x38, 0x0f, 0x00, 0x00 }, /* p8 */ 745 + { 0x38, 0x09, 0x06, 0x06 }, /* p9 */ 746 + { 0x38, 0x0e, 0x01, 0x01 }, /* p10 */ 721 747 }; 722 748 723 749 /* helper to format the value to write to hardware */ ··· 985 951 u32 status, err; 986 952 int ret; 987 953 int do_retry, retry_count = 0; 954 + int intnum = 0; 988 955 uint default_pset; 989 956 u16 target_vector, target_speed; 990 957 u16 lnkctl2, vendor; 991 958 u8 div; 992 959 const u8 (*eq)[3]; 960 + const u8 (*ctle_tunings)[4]; 961 + uint static_ctle_mode; 993 962 int return_error = 0; 994 963 995 964 /* PCIe Gen3 is for the ASIC only */ ··· 1126 1089 div = 3; 1127 1090 eq = discrete_preliminary_eq; 1128 1091 default_pset = DEFAULT_DISCRETE_PSET; 1092 + ctle_tunings = discrete_ctle_tunings; 1093 + /* bit 0 - discrete on/off */ 1094 + static_ctle_mode = pcie_ctle & 0x1; 1129 1095 } else { 1130 1096 /* 400mV, FS=29, LF = 9 */ 1131 1097 fs = 29; ··· 1136 1096 div = 1; 1137 1097 eq = integrated_preliminary_eq; 1138 1098 default_pset = DEFAULT_MCP_PSET; 1099 + ctle_tunings = integrated_ctle_tunings; 1100 + /* bit 1 - integrated on/off */ 1101 + static_ctle_mode = (pcie_ctle >> 1) & 0x1; 1139 1102 } 1140 1103 pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL101, 1141 1104 (fs << ··· 1178 1135 * step 5c: Program gasket interrupts 1179 1136 */ 1180 1137 /* set the Rx Bit Rate to REFCLK ratio */ 1181 - write_gasket_interrupt(dd, 0, 0x0006, 0x0050); 1138 + write_gasket_interrupt(dd, intnum++, 0x0006, 0x0050); 1182 1139 /* disable pCal for PCIe Gen3 RX equalization */ 1183 - write_gasket_interrupt(dd, 1, 0x0026, 0x5b01); 1140 + /* select adaptive or static CTLE */ 1141 + write_gasket_interrupt(dd, intnum++, 0x0026, 1142 + 0x5b01 | (static_ctle_mode << 3)); 1184 1143 /* 1185 1144 * Enable iCal for PCIe Gen3 RX equalization, and set which 1186 1145 * evaluation of RX_EQ_EVAL will launch the iCal procedure. 1187 1146 */ 1188 - write_gasket_interrupt(dd, 2, 0x0026, 0x5202); 1147 + write_gasket_interrupt(dd, intnum++, 0x0026, 0x5202); 1148 + 1149 + if (static_ctle_mode) { 1150 + /* apply static CTLE tunings */ 1151 + u8 pcie_dc, pcie_lf, pcie_hf, pcie_bw; 1152 + 1153 + pcie_dc = ctle_tunings[pcie_pset][0]; 1154 + pcie_lf = ctle_tunings[pcie_pset][1]; 1155 + pcie_hf = ctle_tunings[pcie_pset][2]; 1156 + pcie_bw = ctle_tunings[pcie_pset][3]; 1157 + write_gasket_interrupt(dd, intnum++, 0x0026, 0x0200 | pcie_dc); 1158 + write_gasket_interrupt(dd, intnum++, 0x0026, 0x0100 | pcie_lf); 1159 + write_gasket_interrupt(dd, intnum++, 0x0026, 0x0000 | pcie_hf); 1160 + write_gasket_interrupt(dd, intnum++, 0x0026, 0x5500 | pcie_bw); 1161 + } 1162 + 1189 1163 /* terminate list */ 1190 - write_gasket_interrupt(dd, 3, 0x0000, 0x0000); 1164 + write_gasket_interrupt(dd, intnum++, 0x0000, 0x0000); 1191 1165 1192 1166 /* 1193 1167 * step 5d: program XMT margin
+17 -4
drivers/infiniband/hw/hfi1/pio.c
··· 1952 1952 dd->vld[15].sc = sc_alloc(dd, SC_VL15, 1953 1953 dd->rcd[0]->rcvhdrqentsize, dd->node); 1954 1954 if (!dd->vld[15].sc) 1955 - goto nomem; 1955 + return -ENOMEM; 1956 + 1956 1957 hfi1_init_ctxt(dd->vld[15].sc); 1957 1958 dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048); 1958 1959 1959 - dd->kernel_send_context = kmalloc_node(dd->num_send_contexts * 1960 + dd->kernel_send_context = kzalloc_node(dd->num_send_contexts * 1960 1961 sizeof(struct send_context *), 1961 1962 GFP_KERNEL, dd->node); 1963 + if (!dd->kernel_send_context) 1964 + goto freesc15; 1965 + 1962 1966 dd->kernel_send_context[0] = dd->vld[15].sc; 1963 1967 1964 1968 for (i = 0; i < num_vls; i++) { ··· 2014 2010 if (pio_map_init(dd, ppd->port - 1, num_vls, NULL)) 2015 2011 goto nomem; 2016 2012 return 0; 2013 + 2017 2014 nomem: 2018 - sc_free(dd->vld[15].sc); 2019 - for (i = 0; i < num_vls; i++) 2015 + for (i = 0; i < num_vls; i++) { 2020 2016 sc_free(dd->vld[i].sc); 2017 + dd->vld[i].sc = NULL; 2018 + } 2019 + 2021 2020 for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) 2022 2021 sc_free(dd->kernel_send_context[i + 1]); 2022 + 2023 + kfree(dd->kernel_send_context); 2024 + dd->kernel_send_context = NULL; 2025 + 2026 + freesc15: 2027 + sc_free(dd->vld[15].sc); 2023 2028 return -ENOMEM; 2024 2029 } 2025 2030
+5 -15
drivers/infiniband/hw/hfi1/platform.c
··· 537 537 u8 precur = 0, attn = 0, postcur = 0, external_device_config = 0; 538 538 u8 *cache = ppd->qsfp_info.cache; 539 539 540 - /* Enable external device config if channel is limiting active */ 541 - read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, 542 - GENERAL_CONFIG, &config_data); 543 - config_data &= ~(0xff << ENABLE_EXT_DEV_CONFIG_SHIFT); 544 - config_data |= ((u32)limiting_active << ENABLE_EXT_DEV_CONFIG_SHIFT); 545 - ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS, 546 - GENERAL_CONFIG, config_data); 547 - if (ret != HCMD_SUCCESS) 548 - dd_dev_err( 549 - ppd->dd, 550 - "%s: Failed to set enable external device config\n", 551 - __func__); 552 - 553 - config_data = 0; /* re-init */ 554 540 /* Pass tuning method to 8051 */ 555 541 read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG, 556 542 &config_data); ··· 624 638 if (ret) 625 639 return ret; 626 640 641 + /* 642 + * We'll change the QSFP memory contents from here on out, thus we set a 643 + * flag here to remind ourselves to reset the QSFP module. This prevents 644 + * reuse of stale settings established in our previous pass through. 645 + */ 627 646 if (ppd->qsfp_info.reset_needed) { 628 647 reset_qsfp(ppd); 629 - ppd->qsfp_info.reset_needed = 0; 630 648 refresh_qsfp_cache(ppd, &ppd->qsfp_info); 631 649 } else { 632 650 ppd->qsfp_info.reset_needed = 1;
+65 -3
drivers/infiniband/hw/hfi1/qp.c
··· 52 52 #include <linux/seq_file.h> 53 53 #include <rdma/rdma_vt.h> 54 54 #include <rdma/rdmavt_qp.h> 55 + #include <rdma/ib_verbs.h> 55 56 56 57 #include "hfi.h" 57 58 #include "qp.h" ··· 114 113 16384, /* 1C */ 115 114 24576, /* 1D */ 116 115 32768 /* 1E */ 116 + }; 117 + 118 + const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = { 119 + [IB_WR_RDMA_WRITE] = { 120 + .length = sizeof(struct ib_rdma_wr), 121 + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 122 + }, 123 + 124 + [IB_WR_RDMA_READ] = { 125 + .length = sizeof(struct ib_rdma_wr), 126 + .qpt_support = BIT(IB_QPT_RC), 127 + .flags = RVT_OPERATION_ATOMIC, 128 + }, 129 + 130 + [IB_WR_ATOMIC_CMP_AND_SWP] = { 131 + .length = sizeof(struct ib_atomic_wr), 132 + .qpt_support = BIT(IB_QPT_RC), 133 + .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, 134 + }, 135 + 136 + [IB_WR_ATOMIC_FETCH_AND_ADD] = { 137 + .length = sizeof(struct ib_atomic_wr), 138 + .qpt_support = BIT(IB_QPT_RC), 139 + .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, 140 + }, 141 + 142 + [IB_WR_RDMA_WRITE_WITH_IMM] = { 143 + .length = sizeof(struct ib_rdma_wr), 144 + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 145 + }, 146 + 147 + [IB_WR_SEND] = { 148 + .length = sizeof(struct ib_send_wr), 149 + .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | 150 + BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 151 + }, 152 + 153 + [IB_WR_SEND_WITH_IMM] = { 154 + .length = sizeof(struct ib_send_wr), 155 + .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | 156 + BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 157 + }, 158 + 159 + [IB_WR_REG_MR] = { 160 + .length = sizeof(struct ib_reg_wr), 161 + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 162 + .flags = RVT_OPERATION_LOCAL, 163 + }, 164 + 165 + [IB_WR_LOCAL_INV] = { 166 + .length = sizeof(struct ib_send_wr), 167 + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 168 + .flags = RVT_OPERATION_LOCAL, 169 + }, 170 + 171 + [IB_WR_SEND_WITH_INV] = { 172 + .length = sizeof(struct ib_send_wr), 173 + .qpt_support = BIT(IB_QPT_RC), 174 + }, 175 + 117 176 }; 118 177 119 178 static void flush_tx_list(struct rvt_qp *qp) ··· 806 745 807 746 priv->owner = qp; 808 747 809 - priv->s_hdr = kzalloc_node(sizeof(*priv->s_hdr), gfp, rdi->dparms.node); 810 - if (!priv->s_hdr) { 748 + priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), gfp, 749 + rdi->dparms.node); 750 + if (!priv->s_ahg) { 811 751 kfree(priv); 812 752 return ERR_PTR(-ENOMEM); 813 753 } ··· 821 759 { 822 760 struct hfi1_qp_priv *priv = qp->priv; 823 761 824 - kfree(priv->s_hdr); 762 + kfree(priv->s_ahg); 825 763 kfree(priv); 826 764 } 827 765
+3 -1
drivers/infiniband/hw/hfi1/qp.h
··· 54 54 55 55 extern unsigned int hfi1_qp_table_size; 56 56 57 + extern const struct rvt_operation_params hfi1_post_parms[]; 58 + 57 59 /* 58 60 * free_ahg - clear ahg from QP 59 61 */ ··· 63 61 { 64 62 struct hfi1_qp_priv *priv = qp->priv; 65 63 66 - priv->s_hdr->ahgcount = 0; 64 + priv->s_ahg->ahgcount = 0; 67 65 qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR); 68 66 if (priv->s_sde && qp->s_ahgidx >= 0) 69 67 sdma_ahg_free(priv->s_sde, qp->s_ahgidx);
+301 -106
drivers/infiniband/hw/hfi1/qsfp.c
··· 50 50 #include <linux/vmalloc.h> 51 51 52 52 #include "hfi.h" 53 - #include "twsi.h" 53 + 54 + /* for the given bus number, return the CSR for reading an i2c line */ 55 + static inline u32 i2c_in_csr(u32 bus_num) 56 + { 57 + return bus_num ? ASIC_QSFP2_IN : ASIC_QSFP1_IN; 58 + } 59 + 60 + /* for the given bus number, return the CSR for writing an i2c line */ 61 + static inline u32 i2c_oe_csr(u32 bus_num) 62 + { 63 + return bus_num ? ASIC_QSFP2_OE : ASIC_QSFP1_OE; 64 + } 65 + 66 + static void hfi1_setsda(void *data, int state) 67 + { 68 + struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data; 69 + struct hfi1_devdata *dd = bus->controlling_dd; 70 + u64 reg; 71 + u32 target_oe; 72 + 73 + target_oe = i2c_oe_csr(bus->num); 74 + reg = read_csr(dd, target_oe); 75 + /* 76 + * The OE bit value is inverted and connected to the pin. When 77 + * OE is 0 the pin is left to be pulled up, when the OE is 1 78 + * the pin is driven low. This matches the "open drain" or "open 79 + * collector" convention. 80 + */ 81 + if (state) 82 + reg &= ~QSFP_HFI0_I2CDAT; 83 + else 84 + reg |= QSFP_HFI0_I2CDAT; 85 + write_csr(dd, target_oe, reg); 86 + /* do a read to force the write into the chip */ 87 + (void)read_csr(dd, target_oe); 88 + } 89 + 90 + static void hfi1_setscl(void *data, int state) 91 + { 92 + struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data; 93 + struct hfi1_devdata *dd = bus->controlling_dd; 94 + u64 reg; 95 + u32 target_oe; 96 + 97 + target_oe = i2c_oe_csr(bus->num); 98 + reg = read_csr(dd, target_oe); 99 + /* 100 + * The OE bit value is inverted and connected to the pin. When 101 + * OE is 0 the pin is left to be pulled up, when the OE is 1 102 + * the pin is driven low. This matches the "open drain" or "open 103 + * collector" convention. 104 + */ 105 + if (state) 106 + reg &= ~QSFP_HFI0_I2CCLK; 107 + else 108 + reg |= QSFP_HFI0_I2CCLK; 109 + write_csr(dd, target_oe, reg); 110 + /* do a read to force the write into the chip */ 111 + (void)read_csr(dd, target_oe); 112 + } 113 + 114 + static int hfi1_getsda(void *data) 115 + { 116 + struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data; 117 + u64 reg; 118 + u32 target_in; 119 + 120 + hfi1_setsda(data, 1); /* clear OE so we do not pull line down */ 121 + udelay(2); /* 1us pull up + 250ns hold */ 122 + 123 + target_in = i2c_in_csr(bus->num); 124 + reg = read_csr(bus->controlling_dd, target_in); 125 + return !!(reg & QSFP_HFI0_I2CDAT); 126 + } 127 + 128 + static int hfi1_getscl(void *data) 129 + { 130 + struct hfi1_i2c_bus *bus = (struct hfi1_i2c_bus *)data; 131 + u64 reg; 132 + u32 target_in; 133 + 134 + hfi1_setscl(data, 1); /* clear OE so we do not pull line down */ 135 + udelay(2); /* 1us pull up + 250ns hold */ 136 + 137 + target_in = i2c_in_csr(bus->num); 138 + reg = read_csr(bus->controlling_dd, target_in); 139 + return !!(reg & QSFP_HFI0_I2CCLK); 140 + } 54 141 55 142 /* 56 - * QSFP support for hfi driver, using "Two Wire Serial Interface" driver 57 - * in twsi.c 143 + * Allocate and initialize the given i2c bus number. 144 + * Returns NULL on failure. 58 145 */ 59 - #define I2C_MAX_RETRY 4 146 + static struct hfi1_i2c_bus *init_i2c_bus(struct hfi1_devdata *dd, 147 + struct hfi1_asic_data *ad, int num) 148 + { 149 + struct hfi1_i2c_bus *bus; 150 + int ret; 151 + 152 + bus = kzalloc(sizeof(*bus), GFP_KERNEL); 153 + if (!bus) 154 + return NULL; 155 + 156 + bus->controlling_dd = dd; 157 + bus->num = num; /* our bus number */ 158 + 159 + bus->algo.setsda = hfi1_setsda; 160 + bus->algo.setscl = hfi1_setscl; 161 + bus->algo.getsda = hfi1_getsda; 162 + bus->algo.getscl = hfi1_getscl; 163 + bus->algo.udelay = 5; 164 + bus->algo.timeout = usecs_to_jiffies(50); 165 + bus->algo.data = bus; 166 + 167 + bus->adapter.owner = THIS_MODULE; 168 + bus->adapter.algo_data = &bus->algo; 169 + bus->adapter.dev.parent = &dd->pcidev->dev; 170 + snprintf(bus->adapter.name, sizeof(bus->adapter.name), 171 + "hfi1_i2c%d", num); 172 + 173 + ret = i2c_bit_add_bus(&bus->adapter); 174 + if (ret) { 175 + dd_dev_info(dd, "%s: unable to add i2c bus %d, err %d\n", 176 + __func__, num, ret); 177 + kfree(bus); 178 + return NULL; 179 + } 180 + 181 + return bus; 182 + } 183 + 184 + /* 185 + * Initialize i2c buses. 186 + * Return 0 on success, -errno on error. 187 + */ 188 + int set_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad) 189 + { 190 + ad->i2c_bus0 = init_i2c_bus(dd, ad, 0); 191 + ad->i2c_bus1 = init_i2c_bus(dd, ad, 1); 192 + if (!ad->i2c_bus0 || !ad->i2c_bus1) 193 + return -ENOMEM; 194 + return 0; 195 + }; 196 + 197 + static void clean_i2c_bus(struct hfi1_i2c_bus *bus) 198 + { 199 + if (bus) { 200 + i2c_del_adapter(&bus->adapter); 201 + kfree(bus); 202 + } 203 + } 204 + 205 + void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad) 206 + { 207 + clean_i2c_bus(ad->i2c_bus0); 208 + ad->i2c_bus0 = NULL; 209 + clean_i2c_bus(ad->i2c_bus1); 210 + ad->i2c_bus1 = NULL; 211 + } 212 + 213 + static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c, 214 + u8 slave_addr, int offset, int offset_size, 215 + u8 *data, u16 len) 216 + { 217 + int ret; 218 + int num_msgs; 219 + u8 offset_bytes[2]; 220 + struct i2c_msg msgs[2]; 221 + 222 + switch (offset_size) { 223 + case 0: 224 + num_msgs = 1; 225 + msgs[0].addr = slave_addr; 226 + msgs[0].flags = 0; 227 + msgs[0].len = len; 228 + msgs[0].buf = data; 229 + break; 230 + case 2: 231 + offset_bytes[1] = (offset >> 8) & 0xff; 232 + /* fall through */ 233 + case 1: 234 + num_msgs = 2; 235 + offset_bytes[0] = offset & 0xff; 236 + 237 + msgs[0].addr = slave_addr; 238 + msgs[0].flags = 0; 239 + msgs[0].len = offset_size; 240 + msgs[0].buf = offset_bytes; 241 + 242 + msgs[1].addr = slave_addr; 243 + msgs[1].flags = I2C_M_NOSTART, 244 + msgs[1].len = len; 245 + msgs[1].buf = data; 246 + break; 247 + default: 248 + return -EINVAL; 249 + } 250 + 251 + i2c->controlling_dd = dd; 252 + ret = i2c_transfer(&i2c->adapter, msgs, num_msgs); 253 + if (ret != num_msgs) { 254 + dd_dev_err(dd, "%s: bus %d, i2c slave 0x%x, offset 0x%x, len 0x%x; write failed, ret %d\n", 255 + __func__, i2c->num, slave_addr, offset, len, ret); 256 + return ret < 0 ? ret : -EIO; 257 + } 258 + return 0; 259 + } 260 + 261 + static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus, 262 + u8 slave_addr, int offset, int offset_size, 263 + u8 *data, u16 len) 264 + { 265 + int ret; 266 + int num_msgs; 267 + u8 offset_bytes[2]; 268 + struct i2c_msg msgs[2]; 269 + 270 + switch (offset_size) { 271 + case 0: 272 + num_msgs = 1; 273 + msgs[0].addr = slave_addr; 274 + msgs[0].flags = I2C_M_RD; 275 + msgs[0].len = len; 276 + msgs[0].buf = data; 277 + break; 278 + case 2: 279 + offset_bytes[1] = (offset >> 8) & 0xff; 280 + /* fall through */ 281 + case 1: 282 + num_msgs = 2; 283 + offset_bytes[0] = offset & 0xff; 284 + 285 + msgs[0].addr = slave_addr; 286 + msgs[0].flags = 0; 287 + msgs[0].len = offset_size; 288 + msgs[0].buf = offset_bytes; 289 + 290 + msgs[1].addr = slave_addr; 291 + msgs[1].flags = I2C_M_RD, 292 + msgs[1].len = len; 293 + msgs[1].buf = data; 294 + break; 295 + default: 296 + return -EINVAL; 297 + } 298 + 299 + bus->controlling_dd = dd; 300 + ret = i2c_transfer(&bus->adapter, msgs, num_msgs); 301 + if (ret != num_msgs) { 302 + dd_dev_err(dd, "%s: bus %d, i2c slave 0x%x, offset 0x%x, len 0x%x; read failed, ret %d\n", 303 + __func__, bus->num, slave_addr, offset, len, ret); 304 + return ret < 0 ? ret : -EIO; 305 + } 306 + return 0; 307 + } 60 308 61 309 /* 62 310 * Raw i2c write. No set-up or lock checking. 311 + * 312 + * Return 0 on success, -errno on error. 63 313 */ 64 314 static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, 65 315 int offset, void *bp, int len) 66 316 { 67 317 struct hfi1_devdata *dd = ppd->dd; 68 - int ret, cnt; 69 - u8 *buff = bp; 318 + struct hfi1_i2c_bus *bus; 319 + u8 slave_addr; 320 + int offset_size; 70 321 71 - cnt = 0; 72 - while (cnt < len) { 73 - int wlen = len - cnt; 74 - 75 - ret = hfi1_twsi_blk_wr(dd, target, i2c_addr, offset, 76 - buff + cnt, wlen); 77 - if (ret) { 78 - /* hfi1_twsi_blk_wr() 1 for error, else 0 */ 79 - return -EIO; 80 - } 81 - offset += wlen; 82 - cnt += wlen; 83 - } 84 - 85 - /* Must wait min 20us between qsfp i2c transactions */ 86 - udelay(20); 87 - 88 - return cnt; 322 + bus = target ? dd->asic_data->i2c_bus1 : dd->asic_data->i2c_bus0; 323 + slave_addr = (i2c_addr & 0xff) >> 1; /* convert to 7-bit addr */ 324 + offset_size = (i2c_addr >> 8) & 0x3; 325 + return i2c_bus_write(dd, bus, slave_addr, offset, offset_size, bp, len); 89 326 } 90 327 91 328 /* 92 329 * Caller must hold the i2c chain resource. 330 + * 331 + * Return number of bytes written, or -errno. 93 332 */ 94 333 int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, 95 334 void *bp, int len) ··· 338 99 if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) 339 100 return -EACCES; 340 101 341 - /* make sure the TWSI bus is in a sane state */ 342 - ret = hfi1_twsi_reset(ppd->dd, target); 343 - if (ret) { 344 - hfi1_dev_porterr(ppd->dd, ppd->port, 345 - "I2C chain %d write interface reset failed\n", 346 - target); 102 + ret = __i2c_write(ppd, target, i2c_addr, offset, bp, len); 103 + if (ret) 347 104 return ret; 348 - } 349 105 350 - return __i2c_write(ppd, target, i2c_addr, offset, bp, len); 106 + return len; 351 107 } 352 108 353 109 /* 354 110 * Raw i2c read. No set-up or lock checking. 111 + * 112 + * Return 0 on success, -errno on error. 355 113 */ 356 114 static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, 357 115 int offset, void *bp, int len) 358 116 { 359 117 struct hfi1_devdata *dd = ppd->dd; 360 - int ret, cnt, pass = 0; 361 - int orig_offset = offset; 118 + struct hfi1_i2c_bus *bus; 119 + u8 slave_addr; 120 + int offset_size; 362 121 363 - cnt = 0; 364 - while (cnt < len) { 365 - int rlen = len - cnt; 366 - 367 - ret = hfi1_twsi_blk_rd(dd, target, i2c_addr, offset, 368 - bp + cnt, rlen); 369 - /* Some QSFP's fail first try. Retry as experiment */ 370 - if (ret && cnt == 0 && ++pass < I2C_MAX_RETRY) 371 - continue; 372 - if (ret) { 373 - /* hfi1_twsi_blk_rd() 1 for error, else 0 */ 374 - ret = -EIO; 375 - goto exit; 376 - } 377 - offset += rlen; 378 - cnt += rlen; 379 - } 380 - 381 - ret = cnt; 382 - 383 - exit: 384 - if (ret < 0) { 385 - hfi1_dev_porterr(dd, ppd->port, 386 - "I2C chain %d read failed, addr 0x%x, offset 0x%x, len %d\n", 387 - target, i2c_addr, orig_offset, len); 388 - } 389 - 390 - /* Must wait min 20us between qsfp i2c transactions */ 391 - udelay(20); 392 - 393 - return ret; 122 + bus = target ? dd->asic_data->i2c_bus1 : dd->asic_data->i2c_bus0; 123 + slave_addr = (i2c_addr & 0xff) >> 1; /* convert to 7-bit addr */ 124 + offset_size = (i2c_addr >> 8) & 0x3; 125 + return i2c_bus_read(dd, bus, slave_addr, offset, offset_size, bp, len); 394 126 } 395 127 396 128 /* 397 129 * Caller must hold the i2c chain resource. 130 + * 131 + * Return number of bytes read, or -errno. 398 132 */ 399 133 int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset, 400 134 void *bp, int len) ··· 377 165 if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) 378 166 return -EACCES; 379 167 380 - /* make sure the TWSI bus is in a sane state */ 381 - ret = hfi1_twsi_reset(ppd->dd, target); 382 - if (ret) { 383 - hfi1_dev_porterr(ppd->dd, ppd->port, 384 - "I2C chain %d read interface reset failed\n", 385 - target); 168 + ret = __i2c_read(ppd, target, i2c_addr, offset, bp, len); 169 + if (ret) 386 170 return ret; 387 - } 388 171 389 - return __i2c_read(ppd, target, i2c_addr, offset, bp, len); 172 + return len; 390 173 } 391 174 392 175 /* ··· 389 182 * by writing @addr = ((256 * n) + m) 390 183 * 391 184 * Caller must hold the i2c chain resource. 185 + * 186 + * Return number of bytes written or -errno. 392 187 */ 393 188 int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, 394 189 int len) ··· 398 189 int count = 0; 399 190 int offset; 400 191 int nwrite; 401 - int ret; 192 + int ret = 0; 402 193 u8 page; 403 194 404 195 if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) 405 196 return -EACCES; 406 - 407 - /* make sure the TWSI bus is in a sane state */ 408 - ret = hfi1_twsi_reset(ppd->dd, target); 409 - if (ret) { 410 - hfi1_dev_porterr(ppd->dd, ppd->port, 411 - "QSFP chain %d write interface reset failed\n", 412 - target); 413 - return ret; 414 - } 415 197 416 198 while (count < len) { 417 199 /* ··· 413 213 414 214 ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, 415 215 QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1); 416 - if (ret != 1) { 216 + /* QSFPs require a 5-10msec delay after write operations */ 217 + mdelay(5); 218 + if (ret) { 417 219 hfi1_dev_porterr(ppd->dd, ppd->port, 418 220 "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n", 419 221 target, ret); 420 - ret = -EIO; 421 222 break; 422 223 } 423 224 ··· 430 229 431 230 ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, 432 231 offset, bp + count, nwrite); 433 - if (ret <= 0) /* stop on error or nothing written */ 232 + /* QSFPs require a 5-10msec delay after write operations */ 233 + mdelay(5); 234 + if (ret) /* stop on error */ 434 235 break; 435 236 436 - count += ret; 437 - addr += ret; 237 + count += nwrite; 238 + addr += nwrite; 438 239 } 439 240 440 241 if (ret < 0) ··· 446 243 447 244 /* 448 245 * Perform a stand-alone single QSFP write. Acquire the resource, do the 449 - * read, then release the resource. 246 + * write, then release the resource. 450 247 */ 451 248 int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, 452 249 int len) ··· 469 266 * by reading @addr = ((256 * n) + m) 470 267 * 471 268 * Caller must hold the i2c chain resource. 269 + * 270 + * Return the number of bytes read or -errno. 472 271 */ 473 272 int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, 474 273 int len) ··· 478 273 int count = 0; 479 274 int offset; 480 275 int nread; 481 - int ret; 276 + int ret = 0; 482 277 u8 page; 483 278 484 279 if (!check_chip_resource(ppd->dd, i2c_target(target), __func__)) 485 280 return -EACCES; 486 - 487 - /* make sure the TWSI bus is in a sane state */ 488 - ret = hfi1_twsi_reset(ppd->dd, target); 489 - if (ret) { 490 - hfi1_dev_porterr(ppd->dd, ppd->port, 491 - "QSFP chain %d read interface reset failed\n", 492 - target); 493 - return ret; 494 - } 495 281 496 282 while (count < len) { 497 283 /* ··· 492 296 page = (u8)(addr / QSFP_PAGESIZE); 493 297 ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, 494 298 QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1); 495 - if (ret != 1) { 299 + /* QSFPs require a 5-10msec delay after write operations */ 300 + mdelay(5); 301 + if (ret) { 496 302 hfi1_dev_porterr(ppd->dd, ppd->port, 497 303 "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n", 498 304 target, ret); 499 - ret = -EIO; 500 305 break; 501 306 } 502 307 ··· 507 310 if (((addr % QSFP_RW_BOUNDARY) + nread) > QSFP_RW_BOUNDARY) 508 311 nread = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY); 509 312 510 - /* QSFPs require a 5-10msec delay after write operations */ 511 - mdelay(5); 512 313 ret = __i2c_read(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE, 513 314 offset, bp + count, nread); 514 - if (ret <= 0) /* stop on error or nothing read */ 315 + if (ret) /* stop on error */ 515 316 break; 516 317 517 - count += ret; 518 - addr += ret; 318 + count += nread; 319 + addr += nread; 519 320 } 520 321 521 322 if (ret < 0)
+3
drivers/infiniband/hw/hfi1/qsfp.h
··· 238 238 int len); 239 239 int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp, 240 240 int len); 241 + struct hfi1_asic_data; 242 + int set_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad); 243 + void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad);
+65 -25
drivers/infiniband/hw/hfi1/rc.c
··· 477 477 qp->s_flags |= RVT_S_WAIT_FENCE; 478 478 goto bail; 479 479 } 480 + /* 481 + * Local operations are processed immediately 482 + * after all prior requests have completed 483 + */ 484 + if (wqe->wr.opcode == IB_WR_REG_MR || 485 + wqe->wr.opcode == IB_WR_LOCAL_INV) { 486 + int local_ops = 0; 487 + int err = 0; 488 + 489 + if (qp->s_last != qp->s_cur) 490 + goto bail; 491 + if (++qp->s_cur == qp->s_size) 492 + qp->s_cur = 0; 493 + if (++qp->s_tail == qp->s_size) 494 + qp->s_tail = 0; 495 + if (!(wqe->wr.send_flags & 496 + RVT_SEND_COMPLETION_ONLY)) { 497 + err = rvt_invalidate_rkey( 498 + qp, 499 + wqe->wr.ex.invalidate_rkey); 500 + local_ops = 1; 501 + } 502 + hfi1_send_complete(qp, wqe, 503 + err ? IB_WC_LOC_PROT_ERR 504 + : IB_WC_SUCCESS); 505 + if (local_ops) 506 + atomic_dec(&qp->local_ops_pending); 507 + qp->s_hdrwords = 0; 508 + goto done_free_tx; 509 + } 510 + 480 511 newreq = 1; 481 512 qp->s_psn = wqe->psn; 482 513 } ··· 522 491 switch (wqe->wr.opcode) { 523 492 case IB_WR_SEND: 524 493 case IB_WR_SEND_WITH_IMM: 494 + case IB_WR_SEND_WITH_INV: 525 495 /* If no credit, return. */ 526 496 if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && 527 497 cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { ··· 536 504 } 537 505 if (wqe->wr.opcode == IB_WR_SEND) { 538 506 qp->s_state = OP(SEND_ONLY); 539 - } else { 507 + } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 540 508 qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); 541 509 /* Immediate data comes after the BTH */ 542 510 ohdr->u.imm_data = wqe->wr.ex.imm_data; 511 + hwords += 1; 512 + } else { 513 + qp->s_state = OP(SEND_ONLY_WITH_INVALIDATE); 514 + /* Invalidate rkey comes after the BTH */ 515 + ohdr->u.ieth = cpu_to_be32( 516 + wqe->wr.ex.invalidate_rkey); 543 517 hwords += 1; 544 518 } 545 519 if (wqe->wr.send_flags & IB_SEND_SOLICITED) ··· 709 671 } 710 672 if (wqe->wr.opcode == IB_WR_SEND) { 711 673 qp->s_state = OP(SEND_LAST); 712 - } else { 674 + } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 713 675 qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); 714 676 /* Immediate data comes after the BTH */ 715 677 ohdr->u.imm_data = wqe->wr.ex.imm_data; 678 + hwords += 1; 679 + } else { 680 + qp->s_state = OP(SEND_LAST_WITH_INVALIDATE); 681 + /* invalidate data comes after the BTH */ 682 + ohdr->u.ieth = cpu_to_be32(wqe->wr.ex.invalidate_rkey); 716 683 hwords += 1; 717 684 } 718 685 if (wqe->wr.send_flags & IB_SEND_SOLICITED) ··· 1090 1047 ibp->rvp.n_rc_timeouts++; 1091 1048 qp->s_flags &= ~RVT_S_TIMER; 1092 1049 del_timer(&qp->s_timer); 1093 - trace_hfi1_rc_timeout(qp, qp->s_last_psn + 1); 1050 + trace_hfi1_timeout(qp, qp->s_last_psn + 1); 1094 1051 restart_rc(qp, qp->s_last_psn + 1, 1); 1095 1052 hfi1_schedule_send(qp); 1096 1053 } ··· 1214 1171 * If we were waiting for sends to complete before re-sending, 1215 1172 * and they are now complete, restart sending. 1216 1173 */ 1217 - trace_hfi1_rc_sendcomplete(qp, psn); 1174 + trace_hfi1_sendcomplete(qp, psn); 1218 1175 if (qp->s_flags & RVT_S_WAIT_PSN && 1219 1176 cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { 1220 1177 qp->s_flags &= ~RVT_S_WAIT_PSN; ··· 1610 1567 1611 1568 spin_lock_irqsave(&qp->s_lock, flags); 1612 1569 1613 - trace_hfi1_rc_ack(qp, psn); 1570 + trace_hfi1_ack(qp, psn); 1614 1571 1615 1572 /* Ignore invalid responses. */ 1616 1573 smp_read_barrier_depends(); /* see post_one_send */ ··· 1825 1782 u8 i, prev; 1826 1783 int old_req; 1827 1784 1828 - trace_hfi1_rc_rcv_error(qp, psn); 1785 + trace_hfi1_rcv_error(qp, psn); 1829 1786 if (diff > 0) { 1830 1787 /* 1831 1788 * Packet sequence error. ··· 2129 2086 u32 tlen = packet->tlen; 2130 2087 struct rvt_qp *qp = packet->qp; 2131 2088 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 2132 - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 2133 2089 struct hfi1_other_headers *ohdr = packet->ohdr; 2134 2090 u32 bth0, opcode; 2135 2091 u32 hdrsize = packet->hlen; ··· 2139 2097 int diff; 2140 2098 struct ib_reth *reth; 2141 2099 unsigned long flags; 2142 - u32 bth1; 2143 2100 int ret, is_fecn = 0; 2144 2101 int copy_last = 0; 2102 + u32 rkey; 2145 2103 2146 2104 bth0 = be32_to_cpu(ohdr->bth[0]); 2147 2105 if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) 2148 2106 return; 2149 2107 2150 - bth1 = be32_to_cpu(ohdr->bth[1]); 2151 - if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) { 2152 - if (bth1 & HFI1_BECN_SMASK) { 2153 - u16 rlid = qp->remote_ah_attr.dlid; 2154 - u32 lqpn, rqpn; 2155 - 2156 - lqpn = qp->ibqp.qp_num; 2157 - rqpn = qp->remote_qpn; 2158 - process_becn( 2159 - ppd, 2160 - qp->remote_ah_attr.sl, 2161 - rlid, lqpn, rqpn, 2162 - IB_CC_SVCTYPE_RC); 2163 - } 2164 - is_fecn = bth1 & HFI1_FECN_SMASK; 2165 - } 2108 + is_fecn = process_ecn(qp, packet, false); 2166 2109 2167 2110 psn = be32_to_cpu(ohdr->bth[2]); 2168 2111 opcode = (bth0 >> 24) & 0xff; ··· 2181 2154 case OP(SEND_MIDDLE): 2182 2155 if (opcode == OP(SEND_MIDDLE) || 2183 2156 opcode == OP(SEND_LAST) || 2184 - opcode == OP(SEND_LAST_WITH_IMMEDIATE)) 2157 + opcode == OP(SEND_LAST_WITH_IMMEDIATE) || 2158 + opcode == OP(SEND_LAST_WITH_INVALIDATE)) 2185 2159 break; 2186 2160 goto nack_inv; 2187 2161 ··· 2198 2170 if (opcode == OP(SEND_MIDDLE) || 2199 2171 opcode == OP(SEND_LAST) || 2200 2172 opcode == OP(SEND_LAST_WITH_IMMEDIATE) || 2173 + opcode == OP(SEND_LAST_WITH_INVALIDATE) || 2201 2174 opcode == OP(RDMA_WRITE_MIDDLE) || 2202 2175 opcode == OP(RDMA_WRITE_LAST) || 2203 2176 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) ··· 2247 2218 2248 2219 case OP(SEND_ONLY): 2249 2220 case OP(SEND_ONLY_WITH_IMMEDIATE): 2221 + case OP(SEND_ONLY_WITH_INVALIDATE): 2250 2222 ret = hfi1_rvt_get_rwqe(qp, 0); 2251 2223 if (ret < 0) 2252 2224 goto nack_op_err; ··· 2256 2226 qp->r_rcv_len = 0; 2257 2227 if (opcode == OP(SEND_ONLY)) 2258 2228 goto no_immediate_data; 2229 + if (opcode == OP(SEND_ONLY_WITH_INVALIDATE)) 2230 + goto send_last_inv; 2259 2231 /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */ 2260 2232 case OP(SEND_LAST_WITH_IMMEDIATE): 2261 2233 send_last_imm: 2262 2234 wc.ex.imm_data = ohdr->u.imm_data; 2263 2235 wc.wc_flags = IB_WC_WITH_IMM; 2236 + goto send_last; 2237 + case OP(SEND_LAST_WITH_INVALIDATE): 2238 + send_last_inv: 2239 + rkey = be32_to_cpu(ohdr->u.ieth); 2240 + if (rvt_invalidate_rkey(qp, rkey)) 2241 + goto no_immediate_data; 2242 + wc.ex.invalidate_rkey = rkey; 2243 + wc.wc_flags = IB_WC_WITH_INVALIDATE; 2264 2244 goto send_last; 2265 2245 case OP(RDMA_WRITE_LAST): 2266 2246 copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
+40 -17
drivers/infiniband/hw/hfi1/ruc.c
··· 372 372 int ret; 373 373 int copy_last = 0; 374 374 u32 to; 375 + int local_ops = 0; 375 376 376 377 rcu_read_lock(); 377 378 ··· 441 440 sqp->s_sge.num_sge = wqe->wr.num_sge; 442 441 sqp->s_len = wqe->length; 443 442 switch (wqe->wr.opcode) { 443 + case IB_WR_REG_MR: 444 + goto send_comp; 445 + 446 + case IB_WR_LOCAL_INV: 447 + if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) { 448 + if (rvt_invalidate_rkey(sqp, 449 + wqe->wr.ex.invalidate_rkey)) 450 + send_status = IB_WC_LOC_PROT_ERR; 451 + local_ops = 1; 452 + } 453 + goto send_comp; 454 + 455 + case IB_WR_SEND_WITH_INV: 456 + if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) { 457 + wc.wc_flags = IB_WC_WITH_INVALIDATE; 458 + wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey; 459 + } 460 + goto send; 461 + 444 462 case IB_WR_SEND_WITH_IMM: 445 463 wc.wc_flags = IB_WC_WITH_IMM; 446 464 wc.ex.imm_data = wqe->wr.ex.imm_data; 447 465 /* FALLTHROUGH */ 448 466 case IB_WR_SEND: 467 + send: 449 468 ret = hfi1_rvt_get_rwqe(qp, 0); 450 469 if (ret < 0) 451 470 goto op_err; ··· 604 583 flush_send: 605 584 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 606 585 hfi1_send_complete(sqp, wqe, send_status); 586 + if (local_ops) { 587 + atomic_dec(&sqp->local_ops_pending); 588 + local_ops = 0; 589 + } 607 590 goto again; 608 591 609 592 rnr_nak: ··· 708 683 return sizeof(struct ib_grh) / sizeof(u32); 709 684 } 710 685 711 - #define BTH2_OFFSET (offsetof(struct hfi1_pio_header, hdr.u.oth.bth[2]) / 4) 686 + #define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, hdr.u.oth.bth[2]) / 4) 712 687 713 688 /** 714 - * build_ahg - create ahg in s_hdr 689 + * build_ahg - create ahg in s_ahg 715 690 * @qp: a pointer to QP 716 691 * @npsn: the next PSN for the request/response 717 692 * ··· 733 708 qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde); 734 709 if (qp->s_ahgidx >= 0) { 735 710 qp->s_ahgpsn = npsn; 736 - priv->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY; 711 + priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY; 737 712 /* save to protect a change in another thread */ 738 - priv->s_hdr->sde = priv->s_sde; 739 - priv->s_hdr->ahgidx = qp->s_ahgidx; 713 + priv->s_ahg->ahgidx = qp->s_ahgidx; 740 714 qp->s_flags |= RVT_S_AHG_VALID; 741 715 } 742 716 } else { 743 717 /* subsequent middle after valid */ 744 718 if (qp->s_ahgidx >= 0) { 745 - priv->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG; 746 - priv->s_hdr->ahgidx = qp->s_ahgidx; 747 - priv->s_hdr->ahgcount++; 748 - priv->s_hdr->ahgdesc[0] = 719 + priv->s_ahg->tx_flags |= SDMA_TXREQ_F_USE_AHG; 720 + priv->s_ahg->ahgidx = qp->s_ahgidx; 721 + priv->s_ahg->ahgcount++; 722 + priv->s_ahg->ahgdesc[0] = 749 723 sdma_build_ahg_descriptor( 750 724 (__force u16)cpu_to_be16((u16)npsn), 751 725 BTH2_OFFSET, ··· 752 728 16); 753 729 if ((npsn & 0xffff0000) != 754 730 (qp->s_ahgpsn & 0xffff0000)) { 755 - priv->s_hdr->ahgcount++; 756 - priv->s_hdr->ahgdesc[1] = 731 + priv->s_ahg->ahgcount++; 732 + priv->s_ahg->ahgdesc[1] = 757 733 sdma_build_ahg_descriptor( 758 734 (__force u16)cpu_to_be16( 759 735 (u16)(npsn >> 16)), ··· 790 766 } 791 767 lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4; 792 768 /* 793 - * reset s_hdr/AHG fields 769 + * reset s_ahg/AHG fields 794 770 * 795 771 * This insures that the ahgentry/ahgcount 796 772 * are at a non-AHG default to protect ··· 800 776 * build_ahg() will modify as appropriate 801 777 * to use the AHG feature. 802 778 */ 803 - priv->s_hdr->tx_flags = 0; 804 - priv->s_hdr->ahgcount = 0; 805 - priv->s_hdr->ahgidx = 0; 806 - priv->s_hdr->sde = NULL; 779 + priv->s_ahg->tx_flags = 0; 780 + priv->s_ahg->ahgcount = 0; 781 + priv->s_ahg->ahgidx = 0; 807 782 if (qp->s_mig_state == IB_MIG_MIGRATED) 808 783 bth0 |= IB_BTH_MIG_REQ; 809 784 else ··· 913 890 */ 914 891 if (hfi1_verbs_send(qp, &ps)) 915 892 return; 916 - /* Record that s_hdr is empty. */ 893 + /* Record that s_ahg is empty. */ 917 894 qp->s_hdrwords = 0; 918 895 /* allow other tasks to run */ 919 896 if (unlikely(time_after(jiffies, timeout))) {
+25
drivers/infiniband/hw/hfi1/sysfs.c
··· 49 49 #include "hfi.h" 50 50 #include "mad.h" 51 51 #include "trace.h" 52 + #include "affinity.h" 52 53 53 54 /* 54 55 * Start of per-port congestion control structures and support code ··· 623 622 return ret; 624 623 } 625 624 625 + static ssize_t show_sdma_affinity(struct device *device, 626 + struct device_attribute *attr, char *buf) 627 + { 628 + struct hfi1_ibdev *dev = 629 + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); 630 + struct hfi1_devdata *dd = dd_from_dev(dev); 631 + 632 + return hfi1_get_sdma_affinity(dd, buf); 633 + } 634 + 635 + static ssize_t store_sdma_affinity(struct device *device, 636 + struct device_attribute *attr, 637 + const char *buf, size_t count) 638 + { 639 + struct hfi1_ibdev *dev = 640 + container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); 641 + struct hfi1_devdata *dd = dd_from_dev(dev); 642 + 643 + return hfi1_set_sdma_affinity(dd, buf, count); 644 + } 645 + 626 646 /* 627 647 * end of per-unit (or driver, in some cases, but replicated 628 648 * per unit) functions ··· 658 636 static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); 659 637 static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); 660 638 static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); 639 + static DEVICE_ATTR(sdma_affinity, S_IWUSR | S_IRUGO, show_sdma_affinity, 640 + store_sdma_affinity); 661 641 662 642 static struct device_attribute *hfi1_attributes[] = { 663 643 &dev_attr_hw_rev, ··· 670 646 &dev_attr_boardversion, 671 647 &dev_attr_tempsense, 672 648 &dev_attr_chip_reset, 649 + &dev_attr_sdma_affinity, 673 650 }; 674 651 675 652 int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
+7 -1326
drivers/infiniband/hw/hfi1/trace.h
··· 44 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 45 * 46 46 */ 47 - #undef TRACE_SYSTEM_VAR 48 - #define TRACE_SYSTEM_VAR hfi1 49 - 50 - #if !defined(__HFI1_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) 51 - #define __HFI1_TRACE_H 52 - 53 - #include <linux/tracepoint.h> 54 - #include <linux/trace_seq.h> 55 - 56 - #include "hfi.h" 57 - #include "mad.h" 58 - #include "sdma.h" 59 - 60 - #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) 61 - #define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) 62 - 63 - #define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } 64 - #define show_packettype(etype) \ 65 - __print_symbolic(etype, \ 66 - packettype_name(EXPECTED), \ 67 - packettype_name(EAGER), \ 68 - packettype_name(IB), \ 69 - packettype_name(ERROR), \ 70 - packettype_name(BYPASS)) 71 - 72 - #undef TRACE_SYSTEM 73 - #define TRACE_SYSTEM hfi1_rx 74 - 75 - TRACE_EVENT(hfi1_rcvhdr, 76 - TP_PROTO(struct hfi1_devdata *dd, 77 - u32 ctxt, 78 - u64 eflags, 79 - u32 etype, 80 - u32 hlen, 81 - u32 tlen, 82 - u32 updegr, 83 - u32 etail 84 - ), 85 - TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail), 86 - TP_STRUCT__entry(DD_DEV_ENTRY(dd) 87 - __field(u64, eflags) 88 - __field(u32, ctxt) 89 - __field(u32, etype) 90 - __field(u32, hlen) 91 - __field(u32, tlen) 92 - __field(u32, updegr) 93 - __field(u32, etail) 94 - ), 95 - TP_fast_assign(DD_DEV_ASSIGN(dd); 96 - __entry->eflags = eflags; 97 - __entry->ctxt = ctxt; 98 - __entry->etype = etype; 99 - __entry->hlen = hlen; 100 - __entry->tlen = tlen; 101 - __entry->updegr = updegr; 102 - __entry->etail = etail; 103 - ), 104 - TP_printk( 105 - "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d", 106 - __get_str(dev), 107 - __entry->ctxt, 108 - __entry->eflags, 109 - __entry->etype, show_packettype(__entry->etype), 110 - __entry->hlen, 111 - __entry->tlen, 112 - __entry->updegr, 113 - __entry->etail 114 - ) 115 - ); 116 - 117 - TRACE_EVENT(hfi1_receive_interrupt, 118 - TP_PROTO(struct hfi1_devdata *dd, u32 ctxt), 119 - TP_ARGS(dd, ctxt), 120 - TP_STRUCT__entry(DD_DEV_ENTRY(dd) 121 - __field(u32, ctxt) 122 - __field(u8, slow_path) 123 - __field(u8, dma_rtail) 124 - ), 125 - TP_fast_assign(DD_DEV_ASSIGN(dd); 126 - __entry->ctxt = ctxt; 127 - if (dd->rcd[ctxt]->do_interrupt == 128 - &handle_receive_interrupt) { 129 - __entry->slow_path = 1; 130 - __entry->dma_rtail = 0xFF; 131 - } else if (dd->rcd[ctxt]->do_interrupt == 132 - &handle_receive_interrupt_dma_rtail){ 133 - __entry->dma_rtail = 1; 134 - __entry->slow_path = 0; 135 - } else if (dd->rcd[ctxt]->do_interrupt == 136 - &handle_receive_interrupt_nodma_rtail) { 137 - __entry->dma_rtail = 0; 138 - __entry->slow_path = 0; 139 - } 140 - ), 141 - TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d", 142 - __get_str(dev), 143 - __entry->ctxt, 144 - __entry->slow_path, 145 - __entry->dma_rtail 146 - ) 147 - ); 148 - 149 - TRACE_EVENT(hfi1_exp_tid_reg, 150 - TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, 151 - u32 npages, unsigned long va, unsigned long pa, 152 - dma_addr_t dma), 153 - TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), 154 - TP_STRUCT__entry( 155 - __field(unsigned, ctxt) 156 - __field(u16, subctxt) 157 - __field(u32, rarr) 158 - __field(u32, npages) 159 - __field(unsigned long, va) 160 - __field(unsigned long, pa) 161 - __field(dma_addr_t, dma) 162 - ), 163 - TP_fast_assign( 164 - __entry->ctxt = ctxt; 165 - __entry->subctxt = subctxt; 166 - __entry->rarr = rarr; 167 - __entry->npages = npages; 168 - __entry->va = va; 169 - __entry->pa = pa; 170 - __entry->dma = dma; 171 - ), 172 - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", 173 - __entry->ctxt, 174 - __entry->subctxt, 175 - __entry->rarr, 176 - __entry->npages, 177 - __entry->pa, 178 - __entry->va, 179 - __entry->dma 180 - ) 181 - ); 182 - 183 - TRACE_EVENT(hfi1_exp_tid_unreg, 184 - TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, u32 npages, 185 - unsigned long va, unsigned long pa, dma_addr_t dma), 186 - TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), 187 - TP_STRUCT__entry( 188 - __field(unsigned, ctxt) 189 - __field(u16, subctxt) 190 - __field(u32, rarr) 191 - __field(u32, npages) 192 - __field(unsigned long, va) 193 - __field(unsigned long, pa) 194 - __field(dma_addr_t, dma) 195 - ), 196 - TP_fast_assign( 197 - __entry->ctxt = ctxt; 198 - __entry->subctxt = subctxt; 199 - __entry->rarr = rarr; 200 - __entry->npages = npages; 201 - __entry->va = va; 202 - __entry->pa = pa; 203 - __entry->dma = dma; 204 - ), 205 - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", 206 - __entry->ctxt, 207 - __entry->subctxt, 208 - __entry->rarr, 209 - __entry->npages, 210 - __entry->pa, 211 - __entry->va, 212 - __entry->dma 213 - ) 214 - ); 215 - 216 - TRACE_EVENT(hfi1_exp_tid_inval, 217 - TP_PROTO(unsigned ctxt, u16 subctxt, unsigned long va, u32 rarr, 218 - u32 npages, dma_addr_t dma), 219 - TP_ARGS(ctxt, subctxt, va, rarr, npages, dma), 220 - TP_STRUCT__entry( 221 - __field(unsigned, ctxt) 222 - __field(u16, subctxt) 223 - __field(unsigned long, va) 224 - __field(u32, rarr) 225 - __field(u32, npages) 226 - __field(dma_addr_t, dma) 227 - ), 228 - TP_fast_assign( 229 - __entry->ctxt = ctxt; 230 - __entry->subctxt = subctxt; 231 - __entry->va = va; 232 - __entry->rarr = rarr; 233 - __entry->npages = npages; 234 - __entry->dma = dma; 235 - ), 236 - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx", 237 - __entry->ctxt, 238 - __entry->subctxt, 239 - __entry->rarr, 240 - __entry->npages, 241 - __entry->va, 242 - __entry->dma 243 - ) 244 - ); 245 - 246 - TRACE_EVENT(hfi1_mmu_invalidate, 247 - TP_PROTO(unsigned ctxt, u16 subctxt, const char *type, 248 - unsigned long start, unsigned long end), 249 - TP_ARGS(ctxt, subctxt, type, start, end), 250 - TP_STRUCT__entry( 251 - __field(unsigned, ctxt) 252 - __field(u16, subctxt) 253 - __string(type, type) 254 - __field(unsigned long, start) 255 - __field(unsigned long, end) 256 - ), 257 - TP_fast_assign( 258 - __entry->ctxt = ctxt; 259 - __entry->subctxt = subctxt; 260 - __assign_str(type, type); 261 - __entry->start = start; 262 - __entry->end = end; 263 - ), 264 - TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx", 265 - __entry->ctxt, 266 - __entry->subctxt, 267 - __get_str(type), 268 - __entry->start, 269 - __entry->end 270 - ) 271 - ); 272 - 273 - #undef TRACE_SYSTEM 274 - #define TRACE_SYSTEM hfi1_tx 275 - 276 - TRACE_EVENT(hfi1_piofree, 277 - TP_PROTO(struct send_context *sc, int extra), 278 - TP_ARGS(sc, extra), 279 - TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) 280 - __field(u32, sw_index) 281 - __field(u32, hw_context) 282 - __field(int, extra) 283 - ), 284 - TP_fast_assign(DD_DEV_ASSIGN(sc->dd); 285 - __entry->sw_index = sc->sw_index; 286 - __entry->hw_context = sc->hw_context; 287 - __entry->extra = extra; 288 - ), 289 - TP_printk("[%s] ctxt %u(%u) extra %d", 290 - __get_str(dev), 291 - __entry->sw_index, 292 - __entry->hw_context, 293 - __entry->extra 294 - ) 295 - ); 296 - 297 - TRACE_EVENT(hfi1_wantpiointr, 298 - TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl), 299 - TP_ARGS(sc, needint, credit_ctrl), 300 - TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) 301 - __field(u32, sw_index) 302 - __field(u32, hw_context) 303 - __field(u32, needint) 304 - __field(u64, credit_ctrl) 305 - ), 306 - TP_fast_assign(DD_DEV_ASSIGN(sc->dd); 307 - __entry->sw_index = sc->sw_index; 308 - __entry->hw_context = sc->hw_context; 309 - __entry->needint = needint; 310 - __entry->credit_ctrl = credit_ctrl; 311 - ), 312 - TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx", 313 - __get_str(dev), 314 - __entry->sw_index, 315 - __entry->hw_context, 316 - __entry->needint, 317 - (unsigned long long)__entry->credit_ctrl 318 - ) 319 - ); 320 - 321 - DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template, 322 - TP_PROTO(struct rvt_qp *qp, u32 flags), 323 - TP_ARGS(qp, flags), 324 - TP_STRUCT__entry( 325 - DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) 326 - __field(u32, qpn) 327 - __field(u32, flags) 328 - __field(u32, s_flags) 329 - ), 330 - TP_fast_assign( 331 - DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) 332 - __entry->flags = flags; 333 - __entry->qpn = qp->ibqp.qp_num; 334 - __entry->s_flags = qp->s_flags; 335 - ), 336 - TP_printk( 337 - "[%s] qpn 0x%x flags 0x%x s_flags 0x%x", 338 - __get_str(dev), 339 - __entry->qpn, 340 - __entry->flags, 341 - __entry->s_flags 342 - ) 343 - ); 344 - 345 - DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup, 346 - TP_PROTO(struct rvt_qp *qp, u32 flags), 347 - TP_ARGS(qp, flags)); 348 - 349 - DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep, 350 - TP_PROTO(struct rvt_qp *qp, u32 flags), 351 - TP_ARGS(qp, flags)); 352 - 353 - #undef TRACE_SYSTEM 354 - #define TRACE_SYSTEM hfi1_ibhdrs 355 - 356 - u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr); 357 - const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); 358 - 359 - #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) 360 - 361 - const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1); 362 - 363 - #define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1) 364 - 365 - #define lrh_name(lrh) { HFI1_##lrh, #lrh } 366 - #define show_lnh(lrh) \ 367 - __print_symbolic(lrh, \ 368 - lrh_name(LRH_BTH), \ 369 - lrh_name(LRH_GRH)) 370 - 371 - #define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } 372 - #define show_ib_opcode(opcode) \ 373 - __print_symbolic(opcode, \ 374 - ib_opcode_name(RC_SEND_FIRST), \ 375 - ib_opcode_name(RC_SEND_MIDDLE), \ 376 - ib_opcode_name(RC_SEND_LAST), \ 377 - ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ 378 - ib_opcode_name(RC_SEND_ONLY), \ 379 - ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ 380 - ib_opcode_name(RC_RDMA_WRITE_FIRST), \ 381 - ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ 382 - ib_opcode_name(RC_RDMA_WRITE_LAST), \ 383 - ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ 384 - ib_opcode_name(RC_RDMA_WRITE_ONLY), \ 385 - ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ 386 - ib_opcode_name(RC_RDMA_READ_REQUEST), \ 387 - ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ 388 - ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ 389 - ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ 390 - ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ 391 - ib_opcode_name(RC_ACKNOWLEDGE), \ 392 - ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ 393 - ib_opcode_name(RC_COMPARE_SWAP), \ 394 - ib_opcode_name(RC_FETCH_ADD), \ 395 - ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \ 396 - ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \ 397 - ib_opcode_name(UC_SEND_FIRST), \ 398 - ib_opcode_name(UC_SEND_MIDDLE), \ 399 - ib_opcode_name(UC_SEND_LAST), \ 400 - ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ 401 - ib_opcode_name(UC_SEND_ONLY), \ 402 - ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ 403 - ib_opcode_name(UC_RDMA_WRITE_FIRST), \ 404 - ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ 405 - ib_opcode_name(UC_RDMA_WRITE_LAST), \ 406 - ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ 407 - ib_opcode_name(UC_RDMA_WRITE_ONLY), \ 408 - ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ 409 - ib_opcode_name(UD_SEND_ONLY), \ 410 - ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ 411 - ib_opcode_name(CNP)) 412 - 413 - #define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x" 414 - #define BTH_PRN \ 415 - "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \ 416 - "f %d b %d qpn 0x%.6x a %d psn 0x%.8x" 417 - #define EHDR_PRN "%s" 418 - 419 - DECLARE_EVENT_CLASS(hfi1_ibhdr_template, 420 - TP_PROTO(struct hfi1_devdata *dd, 421 - struct hfi1_ib_header *hdr), 422 - TP_ARGS(dd, hdr), 423 - TP_STRUCT__entry( 424 - DD_DEV_ENTRY(dd) 425 - /* LRH */ 426 - __field(u8, vl) 427 - __field(u8, lver) 428 - __field(u8, sl) 429 - __field(u8, lnh) 430 - __field(u16, dlid) 431 - __field(u16, len) 432 - __field(u16, slid) 433 - /* BTH */ 434 - __field(u8, opcode) 435 - __field(u8, se) 436 - __field(u8, m) 437 - __field(u8, pad) 438 - __field(u8, tver) 439 - __field(u16, pkey) 440 - __field(u8, f) 441 - __field(u8, b) 442 - __field(u32, qpn) 443 - __field(u8, a) 444 - __field(u32, psn) 445 - /* extended headers */ 446 - __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) 447 - ), 448 - TP_fast_assign( 449 - struct hfi1_other_headers *ohdr; 450 - 451 - DD_DEV_ASSIGN(dd); 452 - /* LRH */ 453 - __entry->vl = 454 - (u8)(be16_to_cpu(hdr->lrh[0]) >> 12); 455 - __entry->lver = 456 - (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf; 457 - __entry->sl = 458 - (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; 459 - __entry->lnh = 460 - (u8)(be16_to_cpu(hdr->lrh[0]) & 3); 461 - __entry->dlid = 462 - be16_to_cpu(hdr->lrh[1]); 463 - /* allow for larger len */ 464 - __entry->len = 465 - be16_to_cpu(hdr->lrh[2]); 466 - __entry->slid = 467 - be16_to_cpu(hdr->lrh[3]); 468 - /* BTH */ 469 - if (__entry->lnh == HFI1_LRH_BTH) 470 - ohdr = &hdr->u.oth; 471 - else 472 - ohdr = &hdr->u.l.oth; 473 - __entry->opcode = 474 - (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; 475 - __entry->se = 476 - (be32_to_cpu(ohdr->bth[0]) >> 23) & 1; 477 - __entry->m = 478 - (be32_to_cpu(ohdr->bth[0]) >> 22) & 1; 479 - __entry->pad = 480 - (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 481 - __entry->tver = 482 - (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf; 483 - __entry->pkey = 484 - be32_to_cpu(ohdr->bth[0]) & 0xffff; 485 - __entry->f = 486 - (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) & 487 - HFI1_FECN_MASK; 488 - __entry->b = 489 - (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) & 490 - HFI1_BECN_MASK; 491 - __entry->qpn = 492 - be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; 493 - __entry->a = 494 - (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; 495 - /* allow for larger PSN */ 496 - __entry->psn = 497 - be32_to_cpu(ohdr->bth[2]) & 0x7fffffff; 498 - /* extended headers */ 499 - memcpy(__get_dynamic_array(ehdrs), &ohdr->u, 500 - ibhdr_exhdr_len(hdr)); 501 - ), 502 - TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN, 503 - __get_str(dev), 504 - /* LRH */ 505 - __entry->vl, 506 - __entry->lver, 507 - __entry->sl, 508 - __entry->lnh, show_lnh(__entry->lnh), 509 - __entry->dlid, 510 - __entry->len, 511 - __entry->slid, 512 - /* BTH */ 513 - __entry->opcode, show_ib_opcode(__entry->opcode), 514 - __entry->se, 515 - __entry->m, 516 - __entry->pad, 517 - __entry->tver, 518 - __entry->pkey, 519 - __entry->f, 520 - __entry->b, 521 - __entry->qpn, 522 - __entry->a, 523 - __entry->psn, 524 - /* extended headers */ 525 - __parse_ib_ehdrs( 526 - __entry->opcode, 527 - (void *)__get_dynamic_array(ehdrs)) 528 - ) 529 - ); 530 - 531 - DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, 532 - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), 533 - TP_ARGS(dd, hdr)); 534 - 535 - DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, 536 - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), 537 - TP_ARGS(dd, hdr)); 538 - 539 - DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, 540 - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), 541 - TP_ARGS(dd, hdr)); 542 - 543 - DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, 544 - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), 545 - TP_ARGS(dd, hdr)); 546 - 547 - #define SNOOP_PRN \ 548 - "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \ 549 - "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]" 550 - 551 - #undef TRACE_SYSTEM 552 - #define TRACE_SYSTEM hfi1_snoop 553 - 554 - TRACE_EVENT(snoop_capture, 555 - TP_PROTO(struct hfi1_devdata *dd, 556 - int hdr_len, 557 - struct hfi1_ib_header *hdr, 558 - int data_len, 559 - void *data), 560 - TP_ARGS(dd, hdr_len, hdr, data_len, data), 561 - TP_STRUCT__entry( 562 - DD_DEV_ENTRY(dd) 563 - __field(u16, slid) 564 - __field(u16, dlid) 565 - __field(u32, qpn) 566 - __field(u8, opcode) 567 - __field(u8, sl) 568 - __field(u16, pkey) 569 - __field(u32, hdr_len) 570 - __field(u32, data_len) 571 - __field(u8, lnh) 572 - __dynamic_array(u8, raw_hdr, hdr_len) 573 - __dynamic_array(u8, raw_pkt, data_len) 574 - ), 575 - TP_fast_assign( 576 - struct hfi1_other_headers *ohdr; 577 - 578 - __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); 579 - if (__entry->lnh == HFI1_LRH_BTH) 580 - ohdr = &hdr->u.oth; 581 - else 582 - ohdr = &hdr->u.l.oth; 583 - DD_DEV_ASSIGN(dd); 584 - __entry->slid = be16_to_cpu(hdr->lrh[3]); 585 - __entry->dlid = be16_to_cpu(hdr->lrh[1]); 586 - __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; 587 - __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; 588 - __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; 589 - __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; 590 - __entry->hdr_len = hdr_len; 591 - __entry->data_len = data_len; 592 - memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len); 593 - memcpy(__get_dynamic_array(raw_pkt), data, data_len); 594 - ), 595 - TP_printk( 596 - "[%s] " SNOOP_PRN, 597 - __get_str(dev), 598 - __entry->slid, 599 - __entry->dlid, 600 - __entry->qpn, 601 - __entry->opcode, 602 - show_ib_opcode(__entry->opcode), 603 - __entry->sl, 604 - __entry->pkey, 605 - __entry->hdr_len, 606 - __entry->data_len 607 - ) 608 - ); 609 - 610 - #undef TRACE_SYSTEM 611 - #define TRACE_SYSTEM hfi1_ctxts 612 - 613 - #define UCTXT_FMT \ 614 - "cred:%u, credaddr:0x%llx, piobase:0x%llx, rcvhdr_cnt:%u, " \ 615 - "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx" 616 - TRACE_EVENT(hfi1_uctxtdata, 617 - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt), 618 - TP_ARGS(dd, uctxt), 619 - TP_STRUCT__entry(DD_DEV_ENTRY(dd) 620 - __field(unsigned, ctxt) 621 - __field(u32, credits) 622 - __field(u64, hw_free) 623 - __field(u64, piobase) 624 - __field(u16, rcvhdrq_cnt) 625 - __field(u64, rcvhdrq_phys) 626 - __field(u32, eager_cnt) 627 - __field(u64, rcvegr_phys) 628 - ), 629 - TP_fast_assign(DD_DEV_ASSIGN(dd); 630 - __entry->ctxt = uctxt->ctxt; 631 - __entry->credits = uctxt->sc->credits; 632 - __entry->hw_free = (u64)uctxt->sc->hw_free; 633 - __entry->piobase = (u64)uctxt->sc->base_addr; 634 - __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; 635 - __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys; 636 - __entry->eager_cnt = uctxt->egrbufs.alloced; 637 - __entry->rcvegr_phys = 638 - uctxt->egrbufs.rcvtids[0].phys; 639 - ), 640 - TP_printk("[%s] ctxt %u " UCTXT_FMT, 641 - __get_str(dev), 642 - __entry->ctxt, 643 - __entry->credits, 644 - __entry->hw_free, 645 - __entry->piobase, 646 - __entry->rcvhdrq_cnt, 647 - __entry->rcvhdrq_phys, 648 - __entry->eager_cnt, 649 - __entry->rcvegr_phys 650 - ) 651 - ); 652 - 653 - #define CINFO_FMT \ 654 - "egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u" 655 - TRACE_EVENT(hfi1_ctxt_info, 656 - TP_PROTO(struct hfi1_devdata *dd, unsigned ctxt, unsigned subctxt, 657 - struct hfi1_ctxt_info cinfo), 658 - TP_ARGS(dd, ctxt, subctxt, cinfo), 659 - TP_STRUCT__entry(DD_DEV_ENTRY(dd) 660 - __field(unsigned, ctxt) 661 - __field(unsigned, subctxt) 662 - __field(u16, egrtids) 663 - __field(u16, rcvhdrq_cnt) 664 - __field(u16, rcvhdrq_size) 665 - __field(u16, sdma_ring_size) 666 - __field(u32, rcvegr_size) 667 - ), 668 - TP_fast_assign(DD_DEV_ASSIGN(dd); 669 - __entry->ctxt = ctxt; 670 - __entry->subctxt = subctxt; 671 - __entry->egrtids = cinfo.egrtids; 672 - __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt; 673 - __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize; 674 - __entry->sdma_ring_size = cinfo.sdma_ring_size; 675 - __entry->rcvegr_size = cinfo.rcvegr_size; 676 - ), 677 - TP_printk("[%s] ctxt %u:%u " CINFO_FMT, 678 - __get_str(dev), 679 - __entry->ctxt, 680 - __entry->subctxt, 681 - __entry->egrtids, 682 - __entry->rcvegr_size, 683 - __entry->rcvhdrq_cnt, 684 - __entry->rcvhdrq_size, 685 - __entry->sdma_ring_size 686 - ) 687 - ); 688 - 689 - #undef TRACE_SYSTEM 690 - #define TRACE_SYSTEM hfi1_sma 691 - 692 - #define BCT_FORMAT \ 693 - "shared_limit %x vls 0-7 [%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x] 15 [%x,%x]" 694 - 695 - #define BCT(field) \ 696 - be16_to_cpu( \ 697 - ((struct buffer_control *)__get_dynamic_array(bct))->field \ 698 - ) 699 - 700 - DECLARE_EVENT_CLASS(hfi1_bct_template, 701 - TP_PROTO(struct hfi1_devdata *dd, 702 - struct buffer_control *bc), 703 - TP_ARGS(dd, bc), 704 - TP_STRUCT__entry(DD_DEV_ENTRY(dd) 705 - __dynamic_array(u8, bct, sizeof(*bc)) 706 - ), 707 - TP_fast_assign(DD_DEV_ASSIGN(dd); 708 - memcpy(__get_dynamic_array(bct), bc, 709 - sizeof(*bc)); 710 - ), 711 - TP_printk(BCT_FORMAT, 712 - BCT(overall_shared_limit), 713 - 714 - BCT(vl[0].dedicated), 715 - BCT(vl[0].shared), 716 - 717 - BCT(vl[1].dedicated), 718 - BCT(vl[1].shared), 719 - 720 - BCT(vl[2].dedicated), 721 - BCT(vl[2].shared), 722 - 723 - BCT(vl[3].dedicated), 724 - BCT(vl[3].shared), 725 - 726 - BCT(vl[4].dedicated), 727 - BCT(vl[4].shared), 728 - 729 - BCT(vl[5].dedicated), 730 - BCT(vl[5].shared), 731 - 732 - BCT(vl[6].dedicated), 733 - BCT(vl[6].shared), 734 - 735 - BCT(vl[7].dedicated), 736 - BCT(vl[7].shared), 737 - 738 - BCT(vl[15].dedicated), 739 - BCT(vl[15].shared) 740 - ) 741 - ); 742 - 743 - DEFINE_EVENT(hfi1_bct_template, bct_set, 744 - TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), 745 - TP_ARGS(dd, bc)); 746 - 747 - DEFINE_EVENT(hfi1_bct_template, bct_get, 748 - TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), 749 - TP_ARGS(dd, bc)); 750 - 751 - #undef TRACE_SYSTEM 752 - #define TRACE_SYSTEM hfi1_sdma 753 - 754 - TRACE_EVENT(hfi1_sdma_descriptor, 755 - TP_PROTO(struct sdma_engine *sde, 756 - u64 desc0, 757 - u64 desc1, 758 - u16 e, 759 - void *descp), 760 - TP_ARGS(sde, desc0, desc1, e, descp), 761 - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 762 - __field(void *, descp) 763 - __field(u64, desc0) 764 - __field(u64, desc1) 765 - __field(u16, e) 766 - __field(u8, idx) 767 - ), 768 - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 769 - __entry->desc0 = desc0; 770 - __entry->desc1 = desc1; 771 - __entry->idx = sde->this_idx; 772 - __entry->descp = descp; 773 - __entry->e = e; 774 - ), 775 - TP_printk( 776 - "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u", 777 - __get_str(dev), 778 - __entry->idx, 779 - __parse_sdma_flags(__entry->desc0, __entry->desc1), 780 - (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) & 781 - SDMA_DESC0_PHY_ADDR_MASK, 782 - (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) & 783 - SDMA_DESC1_GENERATION_MASK), 784 - (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) & 785 - SDMA_DESC0_BYTE_COUNT_MASK), 786 - __entry->desc0, 787 - __entry->desc1, 788 - __entry->descp, 789 - __entry->e 790 - ) 791 - ); 792 - 793 - TRACE_EVENT(hfi1_sdma_engine_select, 794 - TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx), 795 - TP_ARGS(dd, sel, vl, idx), 796 - TP_STRUCT__entry(DD_DEV_ENTRY(dd) 797 - __field(u32, sel) 798 - __field(u8, vl) 799 - __field(u8, idx) 800 - ), 801 - TP_fast_assign(DD_DEV_ASSIGN(dd); 802 - __entry->sel = sel; 803 - __entry->vl = vl; 804 - __entry->idx = idx; 805 - ), 806 - TP_printk("[%s] selecting SDE %u sel 0x%x vl %u", 807 - __get_str(dev), 808 - __entry->idx, 809 - __entry->sel, 810 - __entry->vl 811 - ) 812 - ); 813 - 814 - DECLARE_EVENT_CLASS(hfi1_sdma_engine_class, 815 - TP_PROTO(struct sdma_engine *sde, u64 status), 816 - TP_ARGS(sde, status), 817 - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 818 - __field(u64, status) 819 - __field(u8, idx) 820 - ), 821 - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 822 - __entry->status = status; 823 - __entry->idx = sde->this_idx; 824 - ), 825 - TP_printk("[%s] SDE(%u) status %llx", 826 - __get_str(dev), 827 - __entry->idx, 828 - (unsigned long long)__entry->status 829 - ) 830 - ); 831 - 832 - DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt, 833 - TP_PROTO(struct sdma_engine *sde, u64 status), 834 - TP_ARGS(sde, status) 835 - ); 836 - 837 - DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress, 838 - TP_PROTO(struct sdma_engine *sde, u64 status), 839 - TP_ARGS(sde, status) 840 - ); 841 - 842 - DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad, 843 - TP_PROTO(struct sdma_engine *sde, int aidx), 844 - TP_ARGS(sde, aidx), 845 - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 846 - __field(int, aidx) 847 - __field(u8, idx) 848 - ), 849 - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 850 - __entry->idx = sde->this_idx; 851 - __entry->aidx = aidx; 852 - ), 853 - TP_printk("[%s] SDE(%u) aidx %d", 854 - __get_str(dev), 855 - __entry->idx, 856 - __entry->aidx 857 - ) 858 - ); 859 - 860 - DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate, 861 - TP_PROTO(struct sdma_engine *sde, int aidx), 862 - TP_ARGS(sde, aidx)); 863 - 864 - DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate, 865 - TP_PROTO(struct sdma_engine *sde, int aidx), 866 - TP_ARGS(sde, aidx)); 867 - 868 - #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER 869 - TRACE_EVENT(hfi1_sdma_progress, 870 - TP_PROTO(struct sdma_engine *sde, 871 - u16 hwhead, 872 - u16 swhead, 873 - struct sdma_txreq *txp 874 - ), 875 - TP_ARGS(sde, hwhead, swhead, txp), 876 - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 877 - __field(u64, sn) 878 - __field(u16, hwhead) 879 - __field(u16, swhead) 880 - __field(u16, txnext) 881 - __field(u16, tx_tail) 882 - __field(u16, tx_head) 883 - __field(u8, idx) 884 - ), 885 - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 886 - __entry->hwhead = hwhead; 887 - __entry->swhead = swhead; 888 - __entry->tx_tail = sde->tx_tail; 889 - __entry->tx_head = sde->tx_head; 890 - __entry->txnext = txp ? txp->next_descq_idx : ~0; 891 - __entry->idx = sde->this_idx; 892 - __entry->sn = txp ? txp->sn : ~0; 893 - ), 894 - TP_printk( 895 - "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", 896 - __get_str(dev), 897 - __entry->idx, 898 - __entry->sn, 899 - __entry->hwhead, 900 - __entry->swhead, 901 - __entry->txnext, 902 - __entry->tx_head, 903 - __entry->tx_tail 904 - ) 905 - ); 906 - #else 907 - TRACE_EVENT(hfi1_sdma_progress, 908 - TP_PROTO(struct sdma_engine *sde, 909 - u16 hwhead, u16 swhead, 910 - struct sdma_txreq *txp 911 - ), 912 - TP_ARGS(sde, hwhead, swhead, txp), 913 - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 914 - __field(u16, hwhead) 915 - __field(u16, swhead) 916 - __field(u16, txnext) 917 - __field(u16, tx_tail) 918 - __field(u16, tx_head) 919 - __field(u8, idx) 920 - ), 921 - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 922 - __entry->hwhead = hwhead; 923 - __entry->swhead = swhead; 924 - __entry->tx_tail = sde->tx_tail; 925 - __entry->tx_head = sde->tx_head; 926 - __entry->txnext = txp ? txp->next_descq_idx : ~0; 927 - __entry->idx = sde->this_idx; 928 - ), 929 - TP_printk( 930 - "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", 931 - __get_str(dev), 932 - __entry->idx, 933 - __entry->hwhead, 934 - __entry->swhead, 935 - __entry->txnext, 936 - __entry->tx_head, 937 - __entry->tx_tail 938 - ) 939 - ); 940 - #endif 941 - 942 - DECLARE_EVENT_CLASS(hfi1_sdma_sn, 943 - TP_PROTO(struct sdma_engine *sde, u64 sn), 944 - TP_ARGS(sde, sn), 945 - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 946 - __field(u64, sn) 947 - __field(u8, idx) 948 - ), 949 - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 950 - __entry->sn = sn; 951 - __entry->idx = sde->this_idx; 952 - ), 953 - TP_printk("[%s] SDE(%u) sn %llu", 954 - __get_str(dev), 955 - __entry->idx, 956 - __entry->sn 957 - ) 958 - ); 959 - 960 - DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn, 961 - TP_PROTO( 962 - struct sdma_engine *sde, 963 - u64 sn 964 - ), 965 - TP_ARGS(sde, sn) 966 - ); 967 - 968 - DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn, 969 - TP_PROTO(struct sdma_engine *sde, u64 sn), 970 - TP_ARGS(sde, sn) 971 - ); 972 - 973 - #define USDMA_HDR_FORMAT \ 974 - "[%s:%u:%u:%u] PBC=(0x%x 0x%x) LRH=(0x%x 0x%x) BTH=(0x%x 0x%x 0x%x) KDETH=(0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x) TIDVal=0x%x" 975 - 976 - TRACE_EVENT(hfi1_sdma_user_header, 977 - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req, 978 - struct hfi1_pkt_header *hdr, u32 tidval), 979 - TP_ARGS(dd, ctxt, subctxt, req, hdr, tidval), 980 - TP_STRUCT__entry( 981 - DD_DEV_ENTRY(dd) 982 - __field(u16, ctxt) 983 - __field(u8, subctxt) 984 - __field(u16, req) 985 - __field(__le32, pbc0) 986 - __field(__le32, pbc1) 987 - __field(__be32, lrh0) 988 - __field(__be32, lrh1) 989 - __field(__be32, bth0) 990 - __field(__be32, bth1) 991 - __field(__be32, bth2) 992 - __field(__le32, kdeth0) 993 - __field(__le32, kdeth1) 994 - __field(__le32, kdeth2) 995 - __field(__le32, kdeth3) 996 - __field(__le32, kdeth4) 997 - __field(__le32, kdeth5) 998 - __field(__le32, kdeth6) 999 - __field(__le32, kdeth7) 1000 - __field(__le32, kdeth8) 1001 - __field(u32, tidval) 1002 - ), 1003 - TP_fast_assign( 1004 - __le32 *pbc = (__le32 *)hdr->pbc; 1005 - __be32 *lrh = (__be32 *)hdr->lrh; 1006 - __be32 *bth = (__be32 *)hdr->bth; 1007 - __le32 *kdeth = (__le32 *)&hdr->kdeth; 1008 - 1009 - DD_DEV_ASSIGN(dd); 1010 - __entry->ctxt = ctxt; 1011 - __entry->subctxt = subctxt; 1012 - __entry->req = req; 1013 - __entry->pbc0 = pbc[0]; 1014 - __entry->pbc1 = pbc[1]; 1015 - __entry->lrh0 = be32_to_cpu(lrh[0]); 1016 - __entry->lrh1 = be32_to_cpu(lrh[1]); 1017 - __entry->bth0 = be32_to_cpu(bth[0]); 1018 - __entry->bth1 = be32_to_cpu(bth[1]); 1019 - __entry->bth2 = be32_to_cpu(bth[2]); 1020 - __entry->kdeth0 = kdeth[0]; 1021 - __entry->kdeth1 = kdeth[1]; 1022 - __entry->kdeth2 = kdeth[2]; 1023 - __entry->kdeth3 = kdeth[3]; 1024 - __entry->kdeth4 = kdeth[4]; 1025 - __entry->kdeth5 = kdeth[5]; 1026 - __entry->kdeth6 = kdeth[6]; 1027 - __entry->kdeth7 = kdeth[7]; 1028 - __entry->kdeth8 = kdeth[8]; 1029 - __entry->tidval = tidval; 1030 - ), 1031 - TP_printk(USDMA_HDR_FORMAT, 1032 - __get_str(dev), 1033 - __entry->ctxt, 1034 - __entry->subctxt, 1035 - __entry->req, 1036 - __entry->pbc1, 1037 - __entry->pbc0, 1038 - __entry->lrh0, 1039 - __entry->lrh1, 1040 - __entry->bth0, 1041 - __entry->bth1, 1042 - __entry->bth2, 1043 - __entry->kdeth0, 1044 - __entry->kdeth1, 1045 - __entry->kdeth2, 1046 - __entry->kdeth3, 1047 - __entry->kdeth4, 1048 - __entry->kdeth5, 1049 - __entry->kdeth6, 1050 - __entry->kdeth7, 1051 - __entry->kdeth8, 1052 - __entry->tidval 1053 - ) 1054 - ); 1055 - 1056 - #define SDMA_UREQ_FMT \ 1057 - "[%s:%u:%u] ver/op=0x%x, iovcnt=%u, npkts=%u, frag=%u, idx=%u" 1058 - TRACE_EVENT(hfi1_sdma_user_reqinfo, 1059 - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i), 1060 - TP_ARGS(dd, ctxt, subctxt, i), 1061 - TP_STRUCT__entry( 1062 - DD_DEV_ENTRY(dd); 1063 - __field(u16, ctxt) 1064 - __field(u8, subctxt) 1065 - __field(u8, ver_opcode) 1066 - __field(u8, iovcnt) 1067 - __field(u16, npkts) 1068 - __field(u16, fragsize) 1069 - __field(u16, comp_idx) 1070 - ), 1071 - TP_fast_assign( 1072 - DD_DEV_ASSIGN(dd); 1073 - __entry->ctxt = ctxt; 1074 - __entry->subctxt = subctxt; 1075 - __entry->ver_opcode = i[0] & 0xff; 1076 - __entry->iovcnt = (i[0] >> 8) & 0xff; 1077 - __entry->npkts = i[1]; 1078 - __entry->fragsize = i[2]; 1079 - __entry->comp_idx = i[3]; 1080 - ), 1081 - TP_printk(SDMA_UREQ_FMT, 1082 - __get_str(dev), 1083 - __entry->ctxt, 1084 - __entry->subctxt, 1085 - __entry->ver_opcode, 1086 - __entry->iovcnt, 1087 - __entry->npkts, 1088 - __entry->fragsize, 1089 - __entry->comp_idx 1090 - ) 1091 - ); 1092 - 1093 - #define usdma_complete_name(st) { st, #st } 1094 - #define show_usdma_complete_state(st) \ 1095 - __print_symbolic(st, \ 1096 - usdma_complete_name(FREE), \ 1097 - usdma_complete_name(QUEUED), \ 1098 - usdma_complete_name(COMPLETE), \ 1099 - usdma_complete_name(ERROR)) 1100 - 1101 - TRACE_EVENT(hfi1_sdma_user_completion, 1102 - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 idx, 1103 - u8 state, int code), 1104 - TP_ARGS(dd, ctxt, subctxt, idx, state, code), 1105 - TP_STRUCT__entry( 1106 - DD_DEV_ENTRY(dd) 1107 - __field(u16, ctxt) 1108 - __field(u8, subctxt) 1109 - __field(u16, idx) 1110 - __field(u8, state) 1111 - __field(int, code) 1112 - ), 1113 - TP_fast_assign( 1114 - DD_DEV_ASSIGN(dd); 1115 - __entry->ctxt = ctxt; 1116 - __entry->subctxt = subctxt; 1117 - __entry->idx = idx; 1118 - __entry->state = state; 1119 - __entry->code = code; 1120 - ), 1121 - TP_printk("[%s:%u:%u:%u] SDMA completion state %s (%d)", 1122 - __get_str(dev), __entry->ctxt, __entry->subctxt, 1123 - __entry->idx, show_usdma_complete_state(__entry->state), 1124 - __entry->code) 1125 - ); 1126 - 1127 - const char *print_u32_array(struct trace_seq *, u32 *, int); 1128 - #define __print_u32_hex(arr, len) print_u32_array(p, arr, len) 1129 - 1130 - TRACE_EVENT(hfi1_sdma_user_header_ahg, 1131 - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req, 1132 - u8 sde, u8 ahgidx, u32 *ahg, int len, u32 tidval), 1133 - TP_ARGS(dd, ctxt, subctxt, req, sde, ahgidx, ahg, len, tidval), 1134 - TP_STRUCT__entry( 1135 - DD_DEV_ENTRY(dd) 1136 - __field(u16, ctxt) 1137 - __field(u8, subctxt) 1138 - __field(u16, req) 1139 - __field(u8, sde) 1140 - __field(u8, idx) 1141 - __field(int, len) 1142 - __field(u32, tidval) 1143 - __array(u32, ahg, 10) 1144 - ), 1145 - TP_fast_assign( 1146 - DD_DEV_ASSIGN(dd); 1147 - __entry->ctxt = ctxt; 1148 - __entry->subctxt = subctxt; 1149 - __entry->req = req; 1150 - __entry->sde = sde; 1151 - __entry->idx = ahgidx; 1152 - __entry->len = len; 1153 - __entry->tidval = tidval; 1154 - memcpy(__entry->ahg, ahg, len * sizeof(u32)); 1155 - ), 1156 - TP_printk("[%s:%u:%u:%u] (SDE%u/AHG%u) ahg[0-%d]=(%s) TIDVal=0x%x", 1157 - __get_str(dev), 1158 - __entry->ctxt, 1159 - __entry->subctxt, 1160 - __entry->req, 1161 - __entry->sde, 1162 - __entry->idx, 1163 - __entry->len - 1, 1164 - __print_u32_hex(__entry->ahg, __entry->len), 1165 - __entry->tidval 1166 - ) 1167 - ); 1168 - 1169 - TRACE_EVENT(hfi1_sdma_state, 1170 - TP_PROTO(struct sdma_engine *sde, 1171 - const char *cstate, 1172 - const char *nstate 1173 - ), 1174 - TP_ARGS(sde, cstate, nstate), 1175 - TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 1176 - __string(curstate, cstate) 1177 - __string(newstate, nstate) 1178 - ), 1179 - TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 1180 - __assign_str(curstate, cstate); 1181 - __assign_str(newstate, nstate); 1182 - ), 1183 - TP_printk("[%s] current state %s new state %s", 1184 - __get_str(dev), 1185 - __get_str(curstate), 1186 - __get_str(newstate) 1187 - ) 1188 - ); 1189 - 1190 - #undef TRACE_SYSTEM 1191 - #define TRACE_SYSTEM hfi1_rc 1192 - 1193 - DECLARE_EVENT_CLASS(hfi1_rc_template, 1194 - TP_PROTO(struct rvt_qp *qp, u32 psn), 1195 - TP_ARGS(qp, psn), 1196 - TP_STRUCT__entry( 1197 - DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) 1198 - __field(u32, qpn) 1199 - __field(u32, s_flags) 1200 - __field(u32, psn) 1201 - __field(u32, s_psn) 1202 - __field(u32, s_next_psn) 1203 - __field(u32, s_sending_psn) 1204 - __field(u32, s_sending_hpsn) 1205 - __field(u32, r_psn) 1206 - ), 1207 - TP_fast_assign( 1208 - DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) 1209 - __entry->qpn = qp->ibqp.qp_num; 1210 - __entry->s_flags = qp->s_flags; 1211 - __entry->psn = psn; 1212 - __entry->s_psn = qp->s_psn; 1213 - __entry->s_next_psn = qp->s_next_psn; 1214 - __entry->s_sending_psn = qp->s_sending_psn; 1215 - __entry->s_sending_hpsn = qp->s_sending_hpsn; 1216 - __entry->r_psn = qp->r_psn; 1217 - ), 1218 - TP_printk( 1219 - "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x", 1220 - __get_str(dev), 1221 - __entry->qpn, 1222 - __entry->s_flags, 1223 - __entry->psn, 1224 - __entry->s_psn, 1225 - __entry->s_next_psn, 1226 - __entry->s_sending_psn, 1227 - __entry->s_sending_hpsn, 1228 - __entry->r_psn 1229 - ) 1230 - ); 1231 - 1232 - DEFINE_EVENT(hfi1_rc_template, hfi1_rc_sendcomplete, 1233 - TP_PROTO(struct rvt_qp *qp, u32 psn), 1234 - TP_ARGS(qp, psn) 1235 - ); 1236 - 1237 - DEFINE_EVENT(hfi1_rc_template, hfi1_rc_ack, 1238 - TP_PROTO(struct rvt_qp *qp, u32 psn), 1239 - TP_ARGS(qp, psn) 1240 - ); 1241 - 1242 - DEFINE_EVENT(hfi1_rc_template, hfi1_rc_timeout, 1243 - TP_PROTO(struct rvt_qp *qp, u32 psn), 1244 - TP_ARGS(qp, psn) 1245 - ); 1246 - 1247 - DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error, 1248 - TP_PROTO(struct rvt_qp *qp, u32 psn), 1249 - TP_ARGS(qp, psn) 1250 - ); 1251 - 1252 - #undef TRACE_SYSTEM 1253 - #define TRACE_SYSTEM hfi1_misc 1254 - 1255 - TRACE_EVENT(hfi1_interrupt, 1256 - TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry, 1257 - int src), 1258 - TP_ARGS(dd, is_entry, src), 1259 - TP_STRUCT__entry(DD_DEV_ENTRY(dd) 1260 - __array(char, buf, 64) 1261 - __field(int, src) 1262 - ), 1263 - TP_fast_assign(DD_DEV_ASSIGN(dd) 1264 - is_entry->is_name(__entry->buf, 64, 1265 - src - is_entry->start); 1266 - __entry->src = src; 1267 - ), 1268 - TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf, 1269 - __entry->src) 1270 - ); 1271 - 1272 - /* 1273 - * Note: 1274 - * This produces a REALLY ugly trace in the console output when the string is 1275 - * too long. 1276 - */ 1277 - 1278 - #undef TRACE_SYSTEM 1279 - #define TRACE_SYSTEM hfi1_trace 1280 - 1281 - #define MAX_MSG_LEN 512 1282 - 1283 - DECLARE_EVENT_CLASS(hfi1_trace_template, 1284 - TP_PROTO(const char *function, struct va_format *vaf), 1285 - TP_ARGS(function, vaf), 1286 - TP_STRUCT__entry(__string(function, function) 1287 - __dynamic_array(char, msg, MAX_MSG_LEN) 1288 - ), 1289 - TP_fast_assign(__assign_str(function, function); 1290 - WARN_ON_ONCE(vsnprintf 1291 - (__get_dynamic_array(msg), 1292 - MAX_MSG_LEN, vaf->fmt, 1293 - *vaf->va) >= 1294 - MAX_MSG_LEN); 1295 - ), 1296 - TP_printk("(%s) %s", 1297 - __get_str(function), 1298 - __get_str(msg)) 1299 - ); 1300 - 1301 - /* 1302 - * It may be nice to macroize the __hfi1_trace but the va_* stuff requires an 1303 - * actual function to work and can not be in a macro. 1304 - */ 1305 - #define __hfi1_trace_def(lvl) \ 1306 - void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ 1307 - \ 1308 - DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \ 1309 - TP_PROTO(const char *function, struct va_format *vaf), \ 1310 - TP_ARGS(function, vaf)) 1311 - 1312 - #define __hfi1_trace_fn(lvl) \ 1313 - void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ 1314 - { \ 1315 - struct va_format vaf = { \ 1316 - .fmt = fmt, \ 1317 - }; \ 1318 - va_list args; \ 1319 - \ 1320 - va_start(args, fmt); \ 1321 - vaf.va = &args; \ 1322 - trace_hfi1_ ##lvl(func, &vaf); \ 1323 - va_end(args); \ 1324 - return; \ 1325 - } 1326 - 1327 - /* 1328 - * To create a new trace level simply define it below and as a __hfi1_trace_fn 1329 - * in trace.c. This will create all the hooks for calling 1330 - * hfi1_cdbg(LVL, fmt, ...); as well as take care of all 1331 - * the debugfs stuff. 1332 - */ 1333 - __hfi1_trace_def(PKT); 1334 - __hfi1_trace_def(PROC); 1335 - __hfi1_trace_def(SDMA); 1336 - __hfi1_trace_def(LINKVERB); 1337 - __hfi1_trace_def(DEBUG); 1338 - __hfi1_trace_def(SNOOP); 1339 - __hfi1_trace_def(CNTR); 1340 - __hfi1_trace_def(PIO); 1341 - __hfi1_trace_def(DC8051); 1342 - __hfi1_trace_def(FIRMWARE); 1343 - __hfi1_trace_def(RCVCTRL); 1344 - __hfi1_trace_def(TID); 1345 - __hfi1_trace_def(MMU); 1346 - __hfi1_trace_def(IOCTL); 1347 - 1348 - #define hfi1_cdbg(which, fmt, ...) \ 1349 - __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__) 1350 - 1351 - #define hfi1_dbg(fmt, ...) \ 1352 - hfi1_cdbg(DEBUG, fmt, ##__VA_ARGS__) 1353 - 1354 - /* 1355 - * Define HFI1_EARLY_DBG at compile time or here to enable early trace 1356 - * messages. Do not check in an enablement for this. 1357 - */ 1358 - 1359 - #ifdef HFI1_EARLY_DBG 1360 - #define hfi1_dbg_early(fmt, ...) \ 1361 - trace_printk(fmt, ##__VA_ARGS__) 1362 - #else 1363 - #define hfi1_dbg_early(fmt, ...) 1364 - #endif 1365 - 1366 - #endif /* __HFI1_TRACE_H */ 1367 - 1368 - #undef TRACE_INCLUDE_PATH 1369 - #undef TRACE_INCLUDE_FILE 1370 - #define TRACE_INCLUDE_PATH . 1371 - #define TRACE_INCLUDE_FILE trace 1372 - #include <trace/define_trace.h> 47 + #include "trace_dbg.h" 48 + #include "trace_misc.h" 49 + #include "trace_ctxts.h" 50 + #include "trace_ibhdrs.h" 51 + #include "trace_rc.h" 52 + #include "trace_rx.h" 53 + #include "trace_tx.h"
+141
drivers/infiniband/hw/hfi1/trace_ctxts.h
··· 1 + /* 2 + * Copyright(c) 2015, 2016 Intel Corporation. 3 + * 4 + * This file is provided under a dual BSD/GPLv2 license. When using or 5 + * redistributing this file, you may do so under either license. 6 + * 7 + * GPL LICENSE SUMMARY 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of version 2 of the GNU General Public License as 11 + * published by the Free Software Foundation. 12 + * 13 + * This program is distributed in the hope that it will be useful, but 14 + * WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 + * General Public License for more details. 17 + * 18 + * BSD LICENSE 19 + * 20 + * Redistribution and use in source and binary forms, with or without 21 + * modification, are permitted provided that the following conditions 22 + * are met: 23 + * 24 + * - Redistributions of source code must retain the above copyright 25 + * notice, this list of conditions and the following disclaimer. 26 + * - Redistributions in binary form must reproduce the above copyright 27 + * notice, this list of conditions and the following disclaimer in 28 + * the documentation and/or other materials provided with the 29 + * distribution. 30 + * - Neither the name of Intel Corporation nor the names of its 31 + * contributors may be used to endorse or promote products derived 32 + * from this software without specific prior written permission. 33 + * 34 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 + * 46 + */ 47 + #if !defined(__HFI1_TRACE_CTXTS_H) || defined(TRACE_HEADER_MULTI_READ) 48 + #define __HFI1_TRACE_CTXTS_H 49 + 50 + #include <linux/tracepoint.h> 51 + #include <linux/trace_seq.h> 52 + 53 + #include "hfi.h" 54 + 55 + #undef TRACE_SYSTEM 56 + #define TRACE_SYSTEM hfi1_ctxts 57 + 58 + #define UCTXT_FMT \ 59 + "cred:%u, credaddr:0x%llx, piobase:0x%p, rcvhdr_cnt:%u, " \ 60 + "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx" 61 + TRACE_EVENT(hfi1_uctxtdata, 62 + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt), 63 + TP_ARGS(dd, uctxt), 64 + TP_STRUCT__entry(DD_DEV_ENTRY(dd) 65 + __field(unsigned int, ctxt) 66 + __field(u32, credits) 67 + __field(u64, hw_free) 68 + __field(void __iomem *, piobase) 69 + __field(u16, rcvhdrq_cnt) 70 + __field(u64, rcvhdrq_phys) 71 + __field(u32, eager_cnt) 72 + __field(u64, rcvegr_phys) 73 + ), 74 + TP_fast_assign(DD_DEV_ASSIGN(dd); 75 + __entry->ctxt = uctxt->ctxt; 76 + __entry->credits = uctxt->sc->credits; 77 + __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free); 78 + __entry->piobase = uctxt->sc->base_addr; 79 + __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; 80 + __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys; 81 + __entry->eager_cnt = uctxt->egrbufs.alloced; 82 + __entry->rcvegr_phys = 83 + uctxt->egrbufs.rcvtids[0].phys; 84 + ), 85 + TP_printk("[%s] ctxt %u " UCTXT_FMT, 86 + __get_str(dev), 87 + __entry->ctxt, 88 + __entry->credits, 89 + __entry->hw_free, 90 + __entry->piobase, 91 + __entry->rcvhdrq_cnt, 92 + __entry->rcvhdrq_phys, 93 + __entry->eager_cnt, 94 + __entry->rcvegr_phys 95 + ) 96 + ); 97 + 98 + #define CINFO_FMT \ 99 + "egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u" 100 + TRACE_EVENT(hfi1_ctxt_info, 101 + TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt, 102 + unsigned int subctxt, 103 + struct hfi1_ctxt_info cinfo), 104 + TP_ARGS(dd, ctxt, subctxt, cinfo), 105 + TP_STRUCT__entry(DD_DEV_ENTRY(dd) 106 + __field(unsigned int, ctxt) 107 + __field(unsigned int, subctxt) 108 + __field(u16, egrtids) 109 + __field(u16, rcvhdrq_cnt) 110 + __field(u16, rcvhdrq_size) 111 + __field(u16, sdma_ring_size) 112 + __field(u32, rcvegr_size) 113 + ), 114 + TP_fast_assign(DD_DEV_ASSIGN(dd); 115 + __entry->ctxt = ctxt; 116 + __entry->subctxt = subctxt; 117 + __entry->egrtids = cinfo.egrtids; 118 + __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt; 119 + __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize; 120 + __entry->sdma_ring_size = cinfo.sdma_ring_size; 121 + __entry->rcvegr_size = cinfo.rcvegr_size; 122 + ), 123 + TP_printk("[%s] ctxt %u:%u " CINFO_FMT, 124 + __get_str(dev), 125 + __entry->ctxt, 126 + __entry->subctxt, 127 + __entry->egrtids, 128 + __entry->rcvegr_size, 129 + __entry->rcvhdrq_cnt, 130 + __entry->rcvhdrq_size, 131 + __entry->sdma_ring_size 132 + ) 133 + ); 134 + 135 + #endif /* __HFI1_TRACE_CTXTS_H */ 136 + 137 + #undef TRACE_INCLUDE_PATH 138 + #undef TRACE_INCLUDE_FILE 139 + #define TRACE_INCLUDE_PATH . 140 + #define TRACE_INCLUDE_FILE trace_ctxts 141 + #include <trace/define_trace.h>
+155
drivers/infiniband/hw/hfi1/trace_dbg.h
··· 1 + /* 2 + * Copyright(c) 2015, 2016 Intel Corporation. 3 + * 4 + * This file is provided under a dual BSD/GPLv2 license. When using or 5 + * redistributing this file, you may do so under either license. 6 + * 7 + * GPL LICENSE SUMMARY 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of version 2 of the GNU General Public License as 11 + * published by the Free Software Foundation. 12 + * 13 + * This program is distributed in the hope that it will be useful, but 14 + * WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 + * General Public License for more details. 17 + * 18 + * BSD LICENSE 19 + * 20 + * Redistribution and use in source and binary forms, with or without 21 + * modification, are permitted provided that the following conditions 22 + * are met: 23 + * 24 + * - Redistributions of source code must retain the above copyright 25 + * notice, this list of conditions and the following disclaimer. 26 + * - Redistributions in binary form must reproduce the above copyright 27 + * notice, this list of conditions and the following disclaimer in 28 + * the documentation and/or other materials provided with the 29 + * distribution. 30 + * - Neither the name of Intel Corporation nor the names of its 31 + * contributors may be used to endorse or promote products derived 32 + * from this software without specific prior written permission. 33 + * 34 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 + * 46 + */ 47 + #if !defined(__HFI1_TRACE_EXTRA_H) || defined(TRACE_HEADER_MULTI_READ) 48 + #define __HFI1_TRACE_EXTRA_H 49 + 50 + #include <linux/tracepoint.h> 51 + #include <linux/trace_seq.h> 52 + 53 + #include "hfi.h" 54 + 55 + /* 56 + * Note: 57 + * This produces a REALLY ugly trace in the console output when the string is 58 + * too long. 59 + */ 60 + 61 + #undef TRACE_SYSTEM 62 + #define TRACE_SYSTEM hfi1_dbg 63 + 64 + #define MAX_MSG_LEN 512 65 + 66 + DECLARE_EVENT_CLASS(hfi1_trace_template, 67 + TP_PROTO(const char *function, struct va_format *vaf), 68 + TP_ARGS(function, vaf), 69 + TP_STRUCT__entry(__string(function, function) 70 + __dynamic_array(char, msg, MAX_MSG_LEN) 71 + ), 72 + TP_fast_assign(__assign_str(function, function); 73 + WARN_ON_ONCE(vsnprintf 74 + (__get_dynamic_array(msg), 75 + MAX_MSG_LEN, vaf->fmt, 76 + *vaf->va) >= 77 + MAX_MSG_LEN); 78 + ), 79 + TP_printk("(%s) %s", 80 + __get_str(function), 81 + __get_str(msg)) 82 + ); 83 + 84 + /* 85 + * It may be nice to macroize the __hfi1_trace but the va_* stuff requires an 86 + * actual function to work and can not be in a macro. 87 + */ 88 + #define __hfi1_trace_def(lvl) \ 89 + void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \ 90 + \ 91 + DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \ 92 + TP_PROTO(const char *function, struct va_format *vaf), \ 93 + TP_ARGS(function, vaf)) 94 + 95 + #define __hfi1_trace_fn(lvl) \ 96 + void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ 97 + { \ 98 + struct va_format vaf = { \ 99 + .fmt = fmt, \ 100 + }; \ 101 + va_list args; \ 102 + \ 103 + va_start(args, fmt); \ 104 + vaf.va = &args; \ 105 + trace_hfi1_ ##lvl(func, &vaf); \ 106 + va_end(args); \ 107 + return; \ 108 + } 109 + 110 + /* 111 + * To create a new trace level simply define it below and as a __hfi1_trace_fn 112 + * in trace.c. This will create all the hooks for calling 113 + * hfi1_cdbg(LVL, fmt, ...); as well as take care of all 114 + * the debugfs stuff. 115 + */ 116 + __hfi1_trace_def(PKT); 117 + __hfi1_trace_def(PROC); 118 + __hfi1_trace_def(SDMA); 119 + __hfi1_trace_def(LINKVERB); 120 + __hfi1_trace_def(DEBUG); 121 + __hfi1_trace_def(SNOOP); 122 + __hfi1_trace_def(CNTR); 123 + __hfi1_trace_def(PIO); 124 + __hfi1_trace_def(DC8051); 125 + __hfi1_trace_def(FIRMWARE); 126 + __hfi1_trace_def(RCVCTRL); 127 + __hfi1_trace_def(TID); 128 + __hfi1_trace_def(MMU); 129 + __hfi1_trace_def(IOCTL); 130 + 131 + #define hfi1_cdbg(which, fmt, ...) \ 132 + __hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__) 133 + 134 + #define hfi1_dbg(fmt, ...) \ 135 + hfi1_cdbg(DEBUG, fmt, ##__VA_ARGS__) 136 + 137 + /* 138 + * Define HFI1_EARLY_DBG at compile time or here to enable early trace 139 + * messages. Do not check in an enablement for this. 140 + */ 141 + 142 + #ifdef HFI1_EARLY_DBG 143 + #define hfi1_dbg_early(fmt, ...) \ 144 + trace_printk(fmt, ##__VA_ARGS__) 145 + #else 146 + #define hfi1_dbg_early(fmt, ...) 147 + #endif 148 + 149 + #endif /* __HFI1_TRACE_EXTRA_H */ 150 + 151 + #undef TRACE_INCLUDE_PATH 152 + #undef TRACE_INCLUDE_FILE 153 + #define TRACE_INCLUDE_PATH . 154 + #define TRACE_INCLUDE_FILE trace_dbg 155 + #include <trace/define_trace.h>
+209
drivers/infiniband/hw/hfi1/trace_ibhdrs.h
··· 1 + /* 2 + * Copyright(c) 2015, 2016 Intel Corporation. 3 + * 4 + * This file is provided under a dual BSD/GPLv2 license. When using or 5 + * redistributing this file, you may do so under either license. 6 + * 7 + * GPL LICENSE SUMMARY 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of version 2 of the GNU General Public License as 11 + * published by the Free Software Foundation. 12 + * 13 + * This program is distributed in the hope that it will be useful, but 14 + * WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 + * General Public License for more details. 17 + * 18 + * BSD LICENSE 19 + * 20 + * Redistribution and use in source and binary forms, with or without 21 + * modification, are permitted provided that the following conditions 22 + * are met: 23 + * 24 + * - Redistributions of source code must retain the above copyright 25 + * notice, this list of conditions and the following disclaimer. 26 + * - Redistributions in binary form must reproduce the above copyright 27 + * notice, this list of conditions and the following disclaimer in 28 + * the documentation and/or other materials provided with the 29 + * distribution. 30 + * - Neither the name of Intel Corporation nor the names of its 31 + * contributors may be used to endorse or promote products derived 32 + * from this software without specific prior written permission. 33 + * 34 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 + * 46 + */ 47 + #if !defined(__HFI1_TRACE_IBHDRS_H) || defined(TRACE_HEADER_MULTI_READ) 48 + #define __HFI1_TRACE_IBHDRS_H 49 + 50 + #include <linux/tracepoint.h> 51 + #include <linux/trace_seq.h> 52 + 53 + #include "hfi.h" 54 + 55 + #undef TRACE_SYSTEM 56 + #define TRACE_SYSTEM hfi1_ibhdrs 57 + 58 + u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr); 59 + const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); 60 + 61 + #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) 62 + 63 + #define lrh_name(lrh) { HFI1_##lrh, #lrh } 64 + #define show_lnh(lrh) \ 65 + __print_symbolic(lrh, \ 66 + lrh_name(LRH_BTH), \ 67 + lrh_name(LRH_GRH)) 68 + 69 + #define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x" 70 + #define BTH_PRN \ 71 + "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \ 72 + "f %d b %d qpn 0x%.6x a %d psn 0x%.8x" 73 + #define EHDR_PRN "%s" 74 + 75 + DECLARE_EVENT_CLASS(hfi1_ibhdr_template, 76 + TP_PROTO(struct hfi1_devdata *dd, 77 + struct hfi1_ib_header *hdr), 78 + TP_ARGS(dd, hdr), 79 + TP_STRUCT__entry( 80 + DD_DEV_ENTRY(dd) 81 + /* LRH */ 82 + __field(u8, vl) 83 + __field(u8, lver) 84 + __field(u8, sl) 85 + __field(u8, lnh) 86 + __field(u16, dlid) 87 + __field(u16, len) 88 + __field(u16, slid) 89 + /* BTH */ 90 + __field(u8, opcode) 91 + __field(u8, se) 92 + __field(u8, m) 93 + __field(u8, pad) 94 + __field(u8, tver) 95 + __field(u16, pkey) 96 + __field(u8, f) 97 + __field(u8, b) 98 + __field(u32, qpn) 99 + __field(u8, a) 100 + __field(u32, psn) 101 + /* extended headers */ 102 + __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) 103 + ), 104 + TP_fast_assign( 105 + struct hfi1_other_headers *ohdr; 106 + 107 + DD_DEV_ASSIGN(dd); 108 + /* LRH */ 109 + __entry->vl = 110 + (u8)(be16_to_cpu(hdr->lrh[0]) >> 12); 111 + __entry->lver = 112 + (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf; 113 + __entry->sl = 114 + (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; 115 + __entry->lnh = 116 + (u8)(be16_to_cpu(hdr->lrh[0]) & 3); 117 + __entry->dlid = 118 + be16_to_cpu(hdr->lrh[1]); 119 + /* allow for larger len */ 120 + __entry->len = 121 + be16_to_cpu(hdr->lrh[2]); 122 + __entry->slid = 123 + be16_to_cpu(hdr->lrh[3]); 124 + /* BTH */ 125 + if (__entry->lnh == HFI1_LRH_BTH) 126 + ohdr = &hdr->u.oth; 127 + else 128 + ohdr = &hdr->u.l.oth; 129 + __entry->opcode = 130 + (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; 131 + __entry->se = 132 + (be32_to_cpu(ohdr->bth[0]) >> 23) & 1; 133 + __entry->m = 134 + (be32_to_cpu(ohdr->bth[0]) >> 22) & 1; 135 + __entry->pad = 136 + (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 137 + __entry->tver = 138 + (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf; 139 + __entry->pkey = 140 + be32_to_cpu(ohdr->bth[0]) & 0xffff; 141 + __entry->f = 142 + (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) & 143 + HFI1_FECN_MASK; 144 + __entry->b = 145 + (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) & 146 + HFI1_BECN_MASK; 147 + __entry->qpn = 148 + be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; 149 + __entry->a = 150 + (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; 151 + /* allow for larger PSN */ 152 + __entry->psn = 153 + be32_to_cpu(ohdr->bth[2]) & 0x7fffffff; 154 + /* extended headers */ 155 + memcpy(__get_dynamic_array(ehdrs), &ohdr->u, 156 + ibhdr_exhdr_len(hdr)); 157 + ), 158 + TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN, 159 + __get_str(dev), 160 + /* LRH */ 161 + __entry->vl, 162 + __entry->lver, 163 + __entry->sl, 164 + __entry->lnh, show_lnh(__entry->lnh), 165 + __entry->dlid, 166 + __entry->len, 167 + __entry->slid, 168 + /* BTH */ 169 + __entry->opcode, show_ib_opcode(__entry->opcode), 170 + __entry->se, 171 + __entry->m, 172 + __entry->pad, 173 + __entry->tver, 174 + __entry->pkey, 175 + __entry->f, 176 + __entry->b, 177 + __entry->qpn, 178 + __entry->a, 179 + __entry->psn, 180 + /* extended headers */ 181 + __parse_ib_ehdrs( 182 + __entry->opcode, 183 + (void *)__get_dynamic_array(ehdrs)) 184 + ) 185 + ); 186 + 187 + DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, 188 + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), 189 + TP_ARGS(dd, hdr)); 190 + 191 + DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, 192 + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), 193 + TP_ARGS(dd, hdr)); 194 + 195 + DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, 196 + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), 197 + TP_ARGS(dd, hdr)); 198 + 199 + DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, 200 + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), 201 + TP_ARGS(dd, hdr)); 202 + 203 + #endif /* __HFI1_TRACE_IBHDRS_H */ 204 + 205 + #undef TRACE_INCLUDE_PATH 206 + #undef TRACE_INCLUDE_FILE 207 + #define TRACE_INCLUDE_PATH . 208 + #define TRACE_INCLUDE_FILE trace_ibhdrs 209 + #include <trace/define_trace.h>
+81
drivers/infiniband/hw/hfi1/trace_misc.h
··· 1 + /* 2 + * Copyright(c) 2015, 2016 Intel Corporation. 3 + * 4 + * This file is provided under a dual BSD/GPLv2 license. When using or 5 + * redistributing this file, you may do so under either license. 6 + * 7 + * GPL LICENSE SUMMARY 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of version 2 of the GNU General Public License as 11 + * published by the Free Software Foundation. 12 + * 13 + * This program is distributed in the hope that it will be useful, but 14 + * WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 + * General Public License for more details. 17 + * 18 + * BSD LICENSE 19 + * 20 + * Redistribution and use in source and binary forms, with or without 21 + * modification, are permitted provided that the following conditions 22 + * are met: 23 + * 24 + * - Redistributions of source code must retain the above copyright 25 + * notice, this list of conditions and the following disclaimer. 26 + * - Redistributions in binary form must reproduce the above copyright 27 + * notice, this list of conditions and the following disclaimer in 28 + * the documentation and/or other materials provided with the 29 + * distribution. 30 + * - Neither the name of Intel Corporation nor the names of its 31 + * contributors may be used to endorse or promote products derived 32 + * from this software without specific prior written permission. 33 + * 34 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 + * 46 + */ 47 + #if !defined(__HFI1_TRACE_MISC_H) || defined(TRACE_HEADER_MULTI_READ) 48 + #define __HFI1_TRACE_MISC_H 49 + 50 + #include <linux/tracepoint.h> 51 + #include <linux/trace_seq.h> 52 + 53 + #include "hfi.h" 54 + 55 + #undef TRACE_SYSTEM 56 + #define TRACE_SYSTEM hfi1_misc 57 + 58 + TRACE_EVENT(hfi1_interrupt, 59 + TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry, 60 + int src), 61 + TP_ARGS(dd, is_entry, src), 62 + TP_STRUCT__entry(DD_DEV_ENTRY(dd) 63 + __array(char, buf, 64) 64 + __field(int, src) 65 + ), 66 + TP_fast_assign(DD_DEV_ASSIGN(dd) 67 + is_entry->is_name(__entry->buf, 64, 68 + src - is_entry->start); 69 + __entry->src = src; 70 + ), 71 + TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf, 72 + __entry->src) 73 + ); 74 + 75 + #endif /* __HFI1_TRACE_MISC_H */ 76 + 77 + #undef TRACE_INCLUDE_PATH 78 + #undef TRACE_INCLUDE_FILE 79 + #define TRACE_INCLUDE_PATH . 80 + #define TRACE_INCLUDE_FILE trace_misc 81 + #include <trace/define_trace.h>
+123
drivers/infiniband/hw/hfi1/trace_rc.h
··· 1 + /* 2 + * Copyright(c) 2015, 2016 Intel Corporation. 3 + * 4 + * This file is provided under a dual BSD/GPLv2 license. When using or 5 + * redistributing this file, you may do so under either license. 6 + * 7 + * GPL LICENSE SUMMARY 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of version 2 of the GNU General Public License as 11 + * published by the Free Software Foundation. 12 + * 13 + * This program is distributed in the hope that it will be useful, but 14 + * WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 + * General Public License for more details. 17 + * 18 + * BSD LICENSE 19 + * 20 + * Redistribution and use in source and binary forms, with or without 21 + * modification, are permitted provided that the following conditions 22 + * are met: 23 + * 24 + * - Redistributions of source code must retain the above copyright 25 + * notice, this list of conditions and the following disclaimer. 26 + * - Redistributions in binary form must reproduce the above copyright 27 + * notice, this list of conditions and the following disclaimer in 28 + * the documentation and/or other materials provided with the 29 + * distribution. 30 + * - Neither the name of Intel Corporation nor the names of its 31 + * contributors may be used to endorse or promote products derived 32 + * from this software without specific prior written permission. 33 + * 34 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 + * 46 + */ 47 + #if !defined(__HFI1_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ) 48 + #define __HFI1_TRACE_RC_H 49 + 50 + #include <linux/tracepoint.h> 51 + #include <linux/trace_seq.h> 52 + 53 + #include "hfi.h" 54 + 55 + #undef TRACE_SYSTEM 56 + #define TRACE_SYSTEM hfi1_rc 57 + 58 + DECLARE_EVENT_CLASS(hfi1_rc_template, 59 + TP_PROTO(struct rvt_qp *qp, u32 psn), 60 + TP_ARGS(qp, psn), 61 + TP_STRUCT__entry( 62 + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) 63 + __field(u32, qpn) 64 + __field(u32, s_flags) 65 + __field(u32, psn) 66 + __field(u32, s_psn) 67 + __field(u32, s_next_psn) 68 + __field(u32, s_sending_psn) 69 + __field(u32, s_sending_hpsn) 70 + __field(u32, r_psn) 71 + ), 72 + TP_fast_assign( 73 + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) 74 + __entry->qpn = qp->ibqp.qp_num; 75 + __entry->s_flags = qp->s_flags; 76 + __entry->psn = psn; 77 + __entry->s_psn = qp->s_psn; 78 + __entry->s_next_psn = qp->s_next_psn; 79 + __entry->s_sending_psn = qp->s_sending_psn; 80 + __entry->s_sending_hpsn = qp->s_sending_hpsn; 81 + __entry->r_psn = qp->r_psn; 82 + ), 83 + TP_printk( 84 + "[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x", 85 + __get_str(dev), 86 + __entry->qpn, 87 + __entry->s_flags, 88 + __entry->psn, 89 + __entry->s_psn, 90 + __entry->s_next_psn, 91 + __entry->s_sending_psn, 92 + __entry->s_sending_hpsn, 93 + __entry->r_psn 94 + ) 95 + ); 96 + 97 + DEFINE_EVENT(hfi1_rc_template, hfi1_sendcomplete, 98 + TP_PROTO(struct rvt_qp *qp, u32 psn), 99 + TP_ARGS(qp, psn) 100 + ); 101 + 102 + DEFINE_EVENT(hfi1_rc_template, hfi1_ack, 103 + TP_PROTO(struct rvt_qp *qp, u32 psn), 104 + TP_ARGS(qp, psn) 105 + ); 106 + 107 + DEFINE_EVENT(hfi1_rc_template, hfi1_timeout, 108 + TP_PROTO(struct rvt_qp *qp, u32 psn), 109 + TP_ARGS(qp, psn) 110 + ); 111 + 112 + DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error, 113 + TP_PROTO(struct rvt_qp *qp, u32 psn), 114 + TP_ARGS(qp, psn) 115 + ); 116 + 117 + #endif /* __HFI1_TRACE_RC_H */ 118 + 119 + #undef TRACE_INCLUDE_PATH 120 + #undef TRACE_INCLUDE_FILE 121 + #define TRACE_INCLUDE_PATH . 122 + #define TRACE_INCLUDE_FILE trace_rc 123 + #include <trace/define_trace.h>
+322
drivers/infiniband/hw/hfi1/trace_rx.h
··· 1 + /* 2 + * Copyright(c) 2015, 2016 Intel Corporation. 3 + * 4 + * This file is provided under a dual BSD/GPLv2 license. When using or 5 + * redistributing this file, you may do so under either license. 6 + * 7 + * GPL LICENSE SUMMARY 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of version 2 of the GNU General Public License as 11 + * published by the Free Software Foundation. 12 + * 13 + * This program is distributed in the hope that it will be useful, but 14 + * WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 + * General Public License for more details. 17 + * 18 + * BSD LICENSE 19 + * 20 + * Redistribution and use in source and binary forms, with or without 21 + * modification, are permitted provided that the following conditions 22 + * are met: 23 + * 24 + * - Redistributions of source code must retain the above copyright 25 + * notice, this list of conditions and the following disclaimer. 26 + * - Redistributions in binary form must reproduce the above copyright 27 + * notice, this list of conditions and the following disclaimer in 28 + * the documentation and/or other materials provided with the 29 + * distribution. 30 + * - Neither the name of Intel Corporation nor the names of its 31 + * contributors may be used to endorse or promote products derived 32 + * from this software without specific prior written permission. 33 + * 34 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 + * 46 + */ 47 + #if !defined(__HFI1_TRACE_RX_H) || defined(TRACE_HEADER_MULTI_READ) 48 + #define __HFI1_TRACE_RX_H 49 + 50 + #include <linux/tracepoint.h> 51 + #include <linux/trace_seq.h> 52 + 53 + #include "hfi.h" 54 + 55 + #undef TRACE_SYSTEM 56 + #define TRACE_SYSTEM hfi1_rx 57 + 58 + TRACE_EVENT(hfi1_rcvhdr, 59 + TP_PROTO(struct hfi1_devdata *dd, 60 + u32 ctxt, 61 + u64 eflags, 62 + u32 etype, 63 + u32 hlen, 64 + u32 tlen, 65 + u32 updegr, 66 + u32 etail 67 + ), 68 + TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail), 69 + TP_STRUCT__entry(DD_DEV_ENTRY(dd) 70 + __field(u64, eflags) 71 + __field(u32, ctxt) 72 + __field(u32, etype) 73 + __field(u32, hlen) 74 + __field(u32, tlen) 75 + __field(u32, updegr) 76 + __field(u32, etail) 77 + ), 78 + TP_fast_assign(DD_DEV_ASSIGN(dd); 79 + __entry->eflags = eflags; 80 + __entry->ctxt = ctxt; 81 + __entry->etype = etype; 82 + __entry->hlen = hlen; 83 + __entry->tlen = tlen; 84 + __entry->updegr = updegr; 85 + __entry->etail = etail; 86 + ), 87 + TP_printk( 88 + "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d", 89 + __get_str(dev), 90 + __entry->ctxt, 91 + __entry->eflags, 92 + __entry->etype, show_packettype(__entry->etype), 93 + __entry->hlen, 94 + __entry->tlen, 95 + __entry->updegr, 96 + __entry->etail 97 + ) 98 + ); 99 + 100 + TRACE_EVENT(hfi1_receive_interrupt, 101 + TP_PROTO(struct hfi1_devdata *dd, u32 ctxt), 102 + TP_ARGS(dd, ctxt), 103 + TP_STRUCT__entry(DD_DEV_ENTRY(dd) 104 + __field(u32, ctxt) 105 + __field(u8, slow_path) 106 + __field(u8, dma_rtail) 107 + ), 108 + TP_fast_assign(DD_DEV_ASSIGN(dd); 109 + __entry->ctxt = ctxt; 110 + if (dd->rcd[ctxt]->do_interrupt == 111 + &handle_receive_interrupt) { 112 + __entry->slow_path = 1; 113 + __entry->dma_rtail = 0xFF; 114 + } else if (dd->rcd[ctxt]->do_interrupt == 115 + &handle_receive_interrupt_dma_rtail){ 116 + __entry->dma_rtail = 1; 117 + __entry->slow_path = 0; 118 + } else if (dd->rcd[ctxt]->do_interrupt == 119 + &handle_receive_interrupt_nodma_rtail) { 120 + __entry->dma_rtail = 0; 121 + __entry->slow_path = 0; 122 + } 123 + ), 124 + TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d", 125 + __get_str(dev), 126 + __entry->ctxt, 127 + __entry->slow_path, 128 + __entry->dma_rtail 129 + ) 130 + ); 131 + 132 + TRACE_EVENT(hfi1_exp_tid_reg, 133 + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, 134 + u32 npages, unsigned long va, unsigned long pa, 135 + dma_addr_t dma), 136 + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), 137 + TP_STRUCT__entry( 138 + __field(unsigned int, ctxt) 139 + __field(u16, subctxt) 140 + __field(u32, rarr) 141 + __field(u32, npages) 142 + __field(unsigned long, va) 143 + __field(unsigned long, pa) 144 + __field(dma_addr_t, dma) 145 + ), 146 + TP_fast_assign( 147 + __entry->ctxt = ctxt; 148 + __entry->subctxt = subctxt; 149 + __entry->rarr = rarr; 150 + __entry->npages = npages; 151 + __entry->va = va; 152 + __entry->pa = pa; 153 + __entry->dma = dma; 154 + ), 155 + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", 156 + __entry->ctxt, 157 + __entry->subctxt, 158 + __entry->rarr, 159 + __entry->npages, 160 + __entry->pa, 161 + __entry->va, 162 + __entry->dma 163 + ) 164 + ); 165 + 166 + TRACE_EVENT(hfi1_exp_tid_unreg, 167 + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, 168 + unsigned long va, unsigned long pa, dma_addr_t dma), 169 + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), 170 + TP_STRUCT__entry( 171 + __field(unsigned int, ctxt) 172 + __field(u16, subctxt) 173 + __field(u32, rarr) 174 + __field(u32, npages) 175 + __field(unsigned long, va) 176 + __field(unsigned long, pa) 177 + __field(dma_addr_t, dma) 178 + ), 179 + TP_fast_assign( 180 + __entry->ctxt = ctxt; 181 + __entry->subctxt = subctxt; 182 + __entry->rarr = rarr; 183 + __entry->npages = npages; 184 + __entry->va = va; 185 + __entry->pa = pa; 186 + __entry->dma = dma; 187 + ), 188 + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", 189 + __entry->ctxt, 190 + __entry->subctxt, 191 + __entry->rarr, 192 + __entry->npages, 193 + __entry->pa, 194 + __entry->va, 195 + __entry->dma 196 + ) 197 + ); 198 + 199 + TRACE_EVENT(hfi1_exp_tid_inval, 200 + TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr, 201 + u32 npages, dma_addr_t dma), 202 + TP_ARGS(ctxt, subctxt, va, rarr, npages, dma), 203 + TP_STRUCT__entry( 204 + __field(unsigned int, ctxt) 205 + __field(u16, subctxt) 206 + __field(unsigned long, va) 207 + __field(u32, rarr) 208 + __field(u32, npages) 209 + __field(dma_addr_t, dma) 210 + ), 211 + TP_fast_assign( 212 + __entry->ctxt = ctxt; 213 + __entry->subctxt = subctxt; 214 + __entry->va = va; 215 + __entry->rarr = rarr; 216 + __entry->npages = npages; 217 + __entry->dma = dma; 218 + ), 219 + TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx", 220 + __entry->ctxt, 221 + __entry->subctxt, 222 + __entry->rarr, 223 + __entry->npages, 224 + __entry->va, 225 + __entry->dma 226 + ) 227 + ); 228 + 229 + TRACE_EVENT(hfi1_mmu_invalidate, 230 + TP_PROTO(unsigned int ctxt, u16 subctxt, const char *type, 231 + unsigned long start, unsigned long end), 232 + TP_ARGS(ctxt, subctxt, type, start, end), 233 + TP_STRUCT__entry( 234 + __field(unsigned int, ctxt) 235 + __field(u16, subctxt) 236 + __string(type, type) 237 + __field(unsigned long, start) 238 + __field(unsigned long, end) 239 + ), 240 + TP_fast_assign( 241 + __entry->ctxt = ctxt; 242 + __entry->subctxt = subctxt; 243 + __assign_str(type, type); 244 + __entry->start = start; 245 + __entry->end = end; 246 + ), 247 + TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx", 248 + __entry->ctxt, 249 + __entry->subctxt, 250 + __get_str(type), 251 + __entry->start, 252 + __entry->end 253 + ) 254 + ); 255 + 256 + #define SNOOP_PRN \ 257 + "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \ 258 + "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]" 259 + 260 + TRACE_EVENT(snoop_capture, 261 + TP_PROTO(struct hfi1_devdata *dd, 262 + int hdr_len, 263 + struct hfi1_ib_header *hdr, 264 + int data_len, 265 + void *data), 266 + TP_ARGS(dd, hdr_len, hdr, data_len, data), 267 + TP_STRUCT__entry( 268 + DD_DEV_ENTRY(dd) 269 + __field(u16, slid) 270 + __field(u16, dlid) 271 + __field(u32, qpn) 272 + __field(u8, opcode) 273 + __field(u8, sl) 274 + __field(u16, pkey) 275 + __field(u32, hdr_len) 276 + __field(u32, data_len) 277 + __field(u8, lnh) 278 + __dynamic_array(u8, raw_hdr, hdr_len) 279 + __dynamic_array(u8, raw_pkt, data_len) 280 + ), 281 + TP_fast_assign( 282 + struct hfi1_other_headers *ohdr; 283 + 284 + __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); 285 + if (__entry->lnh == HFI1_LRH_BTH) 286 + ohdr = &hdr->u.oth; 287 + else 288 + ohdr = &hdr->u.l.oth; 289 + DD_DEV_ASSIGN(dd); 290 + __entry->slid = be16_to_cpu(hdr->lrh[3]); 291 + __entry->dlid = be16_to_cpu(hdr->lrh[1]); 292 + __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; 293 + __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; 294 + __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; 295 + __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; 296 + __entry->hdr_len = hdr_len; 297 + __entry->data_len = data_len; 298 + memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len); 299 + memcpy(__get_dynamic_array(raw_pkt), data, data_len); 300 + ), 301 + TP_printk( 302 + "[%s] " SNOOP_PRN, 303 + __get_str(dev), 304 + __entry->slid, 305 + __entry->dlid, 306 + __entry->qpn, 307 + __entry->opcode, 308 + show_ib_opcode(__entry->opcode), 309 + __entry->sl, 310 + __entry->pkey, 311 + __entry->hdr_len, 312 + __entry->data_len 313 + ) 314 + ); 315 + 316 + #endif /* __HFI1_TRACE_RX_H */ 317 + 318 + #undef TRACE_INCLUDE_PATH 319 + #undef TRACE_INCLUDE_FILE 320 + #define TRACE_INCLUDE_PATH . 321 + #define TRACE_INCLUDE_FILE trace_rx 322 + #include <trace/define_trace.h>
+642
drivers/infiniband/hw/hfi1/trace_tx.h
··· 1 + /* 2 + * Copyright(c) 2015, 2016 Intel Corporation. 3 + * 4 + * This file is provided under a dual BSD/GPLv2 license. When using or 5 + * redistributing this file, you may do so under either license. 6 + * 7 + * GPL LICENSE SUMMARY 8 + * 9 + * This program is free software; you can redistribute it and/or modify 10 + * it under the terms of version 2 of the GNU General Public License as 11 + * published by the Free Software Foundation. 12 + * 13 + * This program is distributed in the hope that it will be useful, but 14 + * WITHOUT ANY WARRANTY; without even the implied warranty of 15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 + * General Public License for more details. 17 + * 18 + * BSD LICENSE 19 + * 20 + * Redistribution and use in source and binary forms, with or without 21 + * modification, are permitted provided that the following conditions 22 + * are met: 23 + * 24 + * - Redistributions of source code must retain the above copyright 25 + * notice, this list of conditions and the following disclaimer. 26 + * - Redistributions in binary form must reproduce the above copyright 27 + * notice, this list of conditions and the following disclaimer in 28 + * the documentation and/or other materials provided with the 29 + * distribution. 30 + * - Neither the name of Intel Corporation nor the names of its 31 + * contributors may be used to endorse or promote products derived 32 + * from this software without specific prior written permission. 33 + * 34 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 + * 46 + */ 47 + #if !defined(__HFI1_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ) 48 + #define __HFI1_TRACE_TX_H 49 + 50 + #include <linux/tracepoint.h> 51 + #include <linux/trace_seq.h> 52 + 53 + #include "hfi.h" 54 + #include "mad.h" 55 + #include "sdma.h" 56 + 57 + const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1); 58 + 59 + #define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1) 60 + 61 + #undef TRACE_SYSTEM 62 + #define TRACE_SYSTEM hfi1_tx 63 + 64 + TRACE_EVENT(hfi1_piofree, 65 + TP_PROTO(struct send_context *sc, int extra), 66 + TP_ARGS(sc, extra), 67 + TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) 68 + __field(u32, sw_index) 69 + __field(u32, hw_context) 70 + __field(int, extra) 71 + ), 72 + TP_fast_assign(DD_DEV_ASSIGN(sc->dd); 73 + __entry->sw_index = sc->sw_index; 74 + __entry->hw_context = sc->hw_context; 75 + __entry->extra = extra; 76 + ), 77 + TP_printk("[%s] ctxt %u(%u) extra %d", 78 + __get_str(dev), 79 + __entry->sw_index, 80 + __entry->hw_context, 81 + __entry->extra 82 + ) 83 + ); 84 + 85 + TRACE_EVENT(hfi1_wantpiointr, 86 + TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl), 87 + TP_ARGS(sc, needint, credit_ctrl), 88 + TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd) 89 + __field(u32, sw_index) 90 + __field(u32, hw_context) 91 + __field(u32, needint) 92 + __field(u64, credit_ctrl) 93 + ), 94 + TP_fast_assign(DD_DEV_ASSIGN(sc->dd); 95 + __entry->sw_index = sc->sw_index; 96 + __entry->hw_context = sc->hw_context; 97 + __entry->needint = needint; 98 + __entry->credit_ctrl = credit_ctrl; 99 + ), 100 + TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx", 101 + __get_str(dev), 102 + __entry->sw_index, 103 + __entry->hw_context, 104 + __entry->needint, 105 + (unsigned long long)__entry->credit_ctrl 106 + ) 107 + ); 108 + 109 + DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template, 110 + TP_PROTO(struct rvt_qp *qp, u32 flags), 111 + TP_ARGS(qp, flags), 112 + TP_STRUCT__entry( 113 + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) 114 + __field(u32, qpn) 115 + __field(u32, flags) 116 + __field(u32, s_flags) 117 + ), 118 + TP_fast_assign( 119 + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) 120 + __entry->flags = flags; 121 + __entry->qpn = qp->ibqp.qp_num; 122 + __entry->s_flags = qp->s_flags; 123 + ), 124 + TP_printk( 125 + "[%s] qpn 0x%x flags 0x%x s_flags 0x%x", 126 + __get_str(dev), 127 + __entry->qpn, 128 + __entry->flags, 129 + __entry->s_flags 130 + ) 131 + ); 132 + 133 + DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup, 134 + TP_PROTO(struct rvt_qp *qp, u32 flags), 135 + TP_ARGS(qp, flags)); 136 + 137 + DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep, 138 + TP_PROTO(struct rvt_qp *qp, u32 flags), 139 + TP_ARGS(qp, flags)); 140 + 141 + TRACE_EVENT(hfi1_sdma_descriptor, 142 + TP_PROTO(struct sdma_engine *sde, 143 + u64 desc0, 144 + u64 desc1, 145 + u16 e, 146 + void *descp), 147 + TP_ARGS(sde, desc0, desc1, e, descp), 148 + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 149 + __field(void *, descp) 150 + __field(u64, desc0) 151 + __field(u64, desc1) 152 + __field(u16, e) 153 + __field(u8, idx) 154 + ), 155 + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 156 + __entry->desc0 = desc0; 157 + __entry->desc1 = desc1; 158 + __entry->idx = sde->this_idx; 159 + __entry->descp = descp; 160 + __entry->e = e; 161 + ), 162 + TP_printk( 163 + "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u", 164 + __get_str(dev), 165 + __entry->idx, 166 + __parse_sdma_flags(__entry->desc0, __entry->desc1), 167 + (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) & 168 + SDMA_DESC0_PHY_ADDR_MASK, 169 + (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) & 170 + SDMA_DESC1_GENERATION_MASK), 171 + (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) & 172 + SDMA_DESC0_BYTE_COUNT_MASK), 173 + __entry->desc0, 174 + __entry->desc1, 175 + __entry->descp, 176 + __entry->e 177 + ) 178 + ); 179 + 180 + TRACE_EVENT(hfi1_sdma_engine_select, 181 + TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx), 182 + TP_ARGS(dd, sel, vl, idx), 183 + TP_STRUCT__entry(DD_DEV_ENTRY(dd) 184 + __field(u32, sel) 185 + __field(u8, vl) 186 + __field(u8, idx) 187 + ), 188 + TP_fast_assign(DD_DEV_ASSIGN(dd); 189 + __entry->sel = sel; 190 + __entry->vl = vl; 191 + __entry->idx = idx; 192 + ), 193 + TP_printk("[%s] selecting SDE %u sel 0x%x vl %u", 194 + __get_str(dev), 195 + __entry->idx, 196 + __entry->sel, 197 + __entry->vl 198 + ) 199 + ); 200 + 201 + DECLARE_EVENT_CLASS(hfi1_sdma_engine_class, 202 + TP_PROTO(struct sdma_engine *sde, u64 status), 203 + TP_ARGS(sde, status), 204 + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 205 + __field(u64, status) 206 + __field(u8, idx) 207 + ), 208 + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 209 + __entry->status = status; 210 + __entry->idx = sde->this_idx; 211 + ), 212 + TP_printk("[%s] SDE(%u) status %llx", 213 + __get_str(dev), 214 + __entry->idx, 215 + (unsigned long long)__entry->status 216 + ) 217 + ); 218 + 219 + DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt, 220 + TP_PROTO(struct sdma_engine *sde, u64 status), 221 + TP_ARGS(sde, status) 222 + ); 223 + 224 + DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress, 225 + TP_PROTO(struct sdma_engine *sde, u64 status), 226 + TP_ARGS(sde, status) 227 + ); 228 + 229 + DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad, 230 + TP_PROTO(struct sdma_engine *sde, int aidx), 231 + TP_ARGS(sde, aidx), 232 + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 233 + __field(int, aidx) 234 + __field(u8, idx) 235 + ), 236 + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 237 + __entry->idx = sde->this_idx; 238 + __entry->aidx = aidx; 239 + ), 240 + TP_printk("[%s] SDE(%u) aidx %d", 241 + __get_str(dev), 242 + __entry->idx, 243 + __entry->aidx 244 + ) 245 + ); 246 + 247 + DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate, 248 + TP_PROTO(struct sdma_engine *sde, int aidx), 249 + TP_ARGS(sde, aidx)); 250 + 251 + DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate, 252 + TP_PROTO(struct sdma_engine *sde, int aidx), 253 + TP_ARGS(sde, aidx)); 254 + 255 + #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER 256 + TRACE_EVENT(hfi1_sdma_progress, 257 + TP_PROTO(struct sdma_engine *sde, 258 + u16 hwhead, 259 + u16 swhead, 260 + struct sdma_txreq *txp 261 + ), 262 + TP_ARGS(sde, hwhead, swhead, txp), 263 + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 264 + __field(u64, sn) 265 + __field(u16, hwhead) 266 + __field(u16, swhead) 267 + __field(u16, txnext) 268 + __field(u16, tx_tail) 269 + __field(u16, tx_head) 270 + __field(u8, idx) 271 + ), 272 + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 273 + __entry->hwhead = hwhead; 274 + __entry->swhead = swhead; 275 + __entry->tx_tail = sde->tx_tail; 276 + __entry->tx_head = sde->tx_head; 277 + __entry->txnext = txp ? txp->next_descq_idx : ~0; 278 + __entry->idx = sde->this_idx; 279 + __entry->sn = txp ? txp->sn : ~0; 280 + ), 281 + TP_printk( 282 + "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", 283 + __get_str(dev), 284 + __entry->idx, 285 + __entry->sn, 286 + __entry->hwhead, 287 + __entry->swhead, 288 + __entry->txnext, 289 + __entry->tx_head, 290 + __entry->tx_tail 291 + ) 292 + ); 293 + #else 294 + TRACE_EVENT(hfi1_sdma_progress, 295 + TP_PROTO(struct sdma_engine *sde, 296 + u16 hwhead, u16 swhead, 297 + struct sdma_txreq *txp 298 + ), 299 + TP_ARGS(sde, hwhead, swhead, txp), 300 + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 301 + __field(u16, hwhead) 302 + __field(u16, swhead) 303 + __field(u16, txnext) 304 + __field(u16, tx_tail) 305 + __field(u16, tx_head) 306 + __field(u8, idx) 307 + ), 308 + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 309 + __entry->hwhead = hwhead; 310 + __entry->swhead = swhead; 311 + __entry->tx_tail = sde->tx_tail; 312 + __entry->tx_head = sde->tx_head; 313 + __entry->txnext = txp ? txp->next_descq_idx : ~0; 314 + __entry->idx = sde->this_idx; 315 + ), 316 + TP_printk( 317 + "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u", 318 + __get_str(dev), 319 + __entry->idx, 320 + __entry->hwhead, 321 + __entry->swhead, 322 + __entry->txnext, 323 + __entry->tx_head, 324 + __entry->tx_tail 325 + ) 326 + ); 327 + #endif 328 + 329 + DECLARE_EVENT_CLASS(hfi1_sdma_sn, 330 + TP_PROTO(struct sdma_engine *sde, u64 sn), 331 + TP_ARGS(sde, sn), 332 + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 333 + __field(u64, sn) 334 + __field(u8, idx) 335 + ), 336 + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 337 + __entry->sn = sn; 338 + __entry->idx = sde->this_idx; 339 + ), 340 + TP_printk("[%s] SDE(%u) sn %llu", 341 + __get_str(dev), 342 + __entry->idx, 343 + __entry->sn 344 + ) 345 + ); 346 + 347 + DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn, 348 + TP_PROTO( 349 + struct sdma_engine *sde, 350 + u64 sn 351 + ), 352 + TP_ARGS(sde, sn) 353 + ); 354 + 355 + DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn, 356 + TP_PROTO(struct sdma_engine *sde, u64 sn), 357 + TP_ARGS(sde, sn) 358 + ); 359 + 360 + #define USDMA_HDR_FORMAT \ 361 + "[%s:%u:%u:%u] PBC=(0x%x 0x%x) LRH=(0x%x 0x%x) BTH=(0x%x 0x%x 0x%x) KDETH=(0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x) TIDVal=0x%x" 362 + 363 + TRACE_EVENT(hfi1_sdma_user_header, 364 + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req, 365 + struct hfi1_pkt_header *hdr, u32 tidval), 366 + TP_ARGS(dd, ctxt, subctxt, req, hdr, tidval), 367 + TP_STRUCT__entry( 368 + DD_DEV_ENTRY(dd) 369 + __field(u16, ctxt) 370 + __field(u8, subctxt) 371 + __field(u16, req) 372 + __field(u32, pbc0) 373 + __field(u32, pbc1) 374 + __field(u32, lrh0) 375 + __field(u32, lrh1) 376 + __field(u32, bth0) 377 + __field(u32, bth1) 378 + __field(u32, bth2) 379 + __field(u32, kdeth0) 380 + __field(u32, kdeth1) 381 + __field(u32, kdeth2) 382 + __field(u32, kdeth3) 383 + __field(u32, kdeth4) 384 + __field(u32, kdeth5) 385 + __field(u32, kdeth6) 386 + __field(u32, kdeth7) 387 + __field(u32, kdeth8) 388 + __field(u32, tidval) 389 + ), 390 + TP_fast_assign( 391 + __le32 *pbc = (__le32 *)hdr->pbc; 392 + __be32 *lrh = (__be32 *)hdr->lrh; 393 + __be32 *bth = (__be32 *)hdr->bth; 394 + __le32 *kdeth = (__le32 *)&hdr->kdeth; 395 + 396 + DD_DEV_ASSIGN(dd); 397 + __entry->ctxt = ctxt; 398 + __entry->subctxt = subctxt; 399 + __entry->req = req; 400 + __entry->pbc0 = le32_to_cpu(pbc[0]); 401 + __entry->pbc1 = le32_to_cpu(pbc[1]); 402 + __entry->lrh0 = be32_to_cpu(lrh[0]); 403 + __entry->lrh1 = be32_to_cpu(lrh[1]); 404 + __entry->bth0 = be32_to_cpu(bth[0]); 405 + __entry->bth1 = be32_to_cpu(bth[1]); 406 + __entry->bth2 = be32_to_cpu(bth[2]); 407 + __entry->kdeth0 = le32_to_cpu(kdeth[0]); 408 + __entry->kdeth1 = le32_to_cpu(kdeth[1]); 409 + __entry->kdeth2 = le32_to_cpu(kdeth[2]); 410 + __entry->kdeth3 = le32_to_cpu(kdeth[3]); 411 + __entry->kdeth4 = le32_to_cpu(kdeth[4]); 412 + __entry->kdeth5 = le32_to_cpu(kdeth[5]); 413 + __entry->kdeth6 = le32_to_cpu(kdeth[6]); 414 + __entry->kdeth7 = le32_to_cpu(kdeth[7]); 415 + __entry->kdeth8 = le32_to_cpu(kdeth[8]); 416 + __entry->tidval = tidval; 417 + ), 418 + TP_printk(USDMA_HDR_FORMAT, 419 + __get_str(dev), 420 + __entry->ctxt, 421 + __entry->subctxt, 422 + __entry->req, 423 + __entry->pbc1, 424 + __entry->pbc0, 425 + __entry->lrh0, 426 + __entry->lrh1, 427 + __entry->bth0, 428 + __entry->bth1, 429 + __entry->bth2, 430 + __entry->kdeth0, 431 + __entry->kdeth1, 432 + __entry->kdeth2, 433 + __entry->kdeth3, 434 + __entry->kdeth4, 435 + __entry->kdeth5, 436 + __entry->kdeth6, 437 + __entry->kdeth7, 438 + __entry->kdeth8, 439 + __entry->tidval 440 + ) 441 + ); 442 + 443 + #define SDMA_UREQ_FMT \ 444 + "[%s:%u:%u] ver/op=0x%x, iovcnt=%u, npkts=%u, frag=%u, idx=%u" 445 + TRACE_EVENT(hfi1_sdma_user_reqinfo, 446 + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i), 447 + TP_ARGS(dd, ctxt, subctxt, i), 448 + TP_STRUCT__entry( 449 + DD_DEV_ENTRY(dd); 450 + __field(u16, ctxt) 451 + __field(u8, subctxt) 452 + __field(u8, ver_opcode) 453 + __field(u8, iovcnt) 454 + __field(u16, npkts) 455 + __field(u16, fragsize) 456 + __field(u16, comp_idx) 457 + ), 458 + TP_fast_assign( 459 + DD_DEV_ASSIGN(dd); 460 + __entry->ctxt = ctxt; 461 + __entry->subctxt = subctxt; 462 + __entry->ver_opcode = i[0] & 0xff; 463 + __entry->iovcnt = (i[0] >> 8) & 0xff; 464 + __entry->npkts = i[1]; 465 + __entry->fragsize = i[2]; 466 + __entry->comp_idx = i[3]; 467 + ), 468 + TP_printk(SDMA_UREQ_FMT, 469 + __get_str(dev), 470 + __entry->ctxt, 471 + __entry->subctxt, 472 + __entry->ver_opcode, 473 + __entry->iovcnt, 474 + __entry->npkts, 475 + __entry->fragsize, 476 + __entry->comp_idx 477 + ) 478 + ); 479 + 480 + #define usdma_complete_name(st) { st, #st } 481 + #define show_usdma_complete_state(st) \ 482 + __print_symbolic(st, \ 483 + usdma_complete_name(FREE), \ 484 + usdma_complete_name(QUEUED), \ 485 + usdma_complete_name(COMPLETE), \ 486 + usdma_complete_name(ERROR)) 487 + 488 + TRACE_EVENT(hfi1_sdma_user_completion, 489 + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 idx, 490 + u8 state, int code), 491 + TP_ARGS(dd, ctxt, subctxt, idx, state, code), 492 + TP_STRUCT__entry( 493 + DD_DEV_ENTRY(dd) 494 + __field(u16, ctxt) 495 + __field(u8, subctxt) 496 + __field(u16, idx) 497 + __field(u8, state) 498 + __field(int, code) 499 + ), 500 + TP_fast_assign( 501 + DD_DEV_ASSIGN(dd); 502 + __entry->ctxt = ctxt; 503 + __entry->subctxt = subctxt; 504 + __entry->idx = idx; 505 + __entry->state = state; 506 + __entry->code = code; 507 + ), 508 + TP_printk("[%s:%u:%u:%u] SDMA completion state %s (%d)", 509 + __get_str(dev), __entry->ctxt, __entry->subctxt, 510 + __entry->idx, show_usdma_complete_state(__entry->state), 511 + __entry->code) 512 + ); 513 + 514 + const char *print_u32_array(struct trace_seq *, u32 *, int); 515 + #define __print_u32_hex(arr, len) print_u32_array(p, arr, len) 516 + 517 + TRACE_EVENT(hfi1_sdma_user_header_ahg, 518 + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 req, 519 + u8 sde, u8 ahgidx, u32 *ahg, int len, u32 tidval), 520 + TP_ARGS(dd, ctxt, subctxt, req, sde, ahgidx, ahg, len, tidval), 521 + TP_STRUCT__entry( 522 + DD_DEV_ENTRY(dd) 523 + __field(u16, ctxt) 524 + __field(u8, subctxt) 525 + __field(u16, req) 526 + __field(u8, sde) 527 + __field(u8, idx) 528 + __field(int, len) 529 + __field(u32, tidval) 530 + __array(u32, ahg, 10) 531 + ), 532 + TP_fast_assign( 533 + DD_DEV_ASSIGN(dd); 534 + __entry->ctxt = ctxt; 535 + __entry->subctxt = subctxt; 536 + __entry->req = req; 537 + __entry->sde = sde; 538 + __entry->idx = ahgidx; 539 + __entry->len = len; 540 + __entry->tidval = tidval; 541 + memcpy(__entry->ahg, ahg, len * sizeof(u32)); 542 + ), 543 + TP_printk("[%s:%u:%u:%u] (SDE%u/AHG%u) ahg[0-%d]=(%s) TIDVal=0x%x", 544 + __get_str(dev), 545 + __entry->ctxt, 546 + __entry->subctxt, 547 + __entry->req, 548 + __entry->sde, 549 + __entry->idx, 550 + __entry->len - 1, 551 + __print_u32_hex(__entry->ahg, __entry->len), 552 + __entry->tidval 553 + ) 554 + ); 555 + 556 + TRACE_EVENT(hfi1_sdma_state, 557 + TP_PROTO(struct sdma_engine *sde, 558 + const char *cstate, 559 + const char *nstate 560 + ), 561 + TP_ARGS(sde, cstate, nstate), 562 + TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd) 563 + __string(curstate, cstate) 564 + __string(newstate, nstate) 565 + ), 566 + TP_fast_assign(DD_DEV_ASSIGN(sde->dd); 567 + __assign_str(curstate, cstate); 568 + __assign_str(newstate, nstate); 569 + ), 570 + TP_printk("[%s] current state %s new state %s", 571 + __get_str(dev), 572 + __get_str(curstate), 573 + __get_str(newstate) 574 + ) 575 + ); 576 + 577 + #define BCT_FORMAT \ 578 + "shared_limit %x vls 0-7 [%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x][%x,%x] 15 [%x,%x]" 579 + 580 + #define BCT(field) \ 581 + be16_to_cpu( \ 582 + ((struct buffer_control *)__get_dynamic_array(bct))->field \ 583 + ) 584 + 585 + DECLARE_EVENT_CLASS(hfi1_bct_template, 586 + TP_PROTO(struct hfi1_devdata *dd, 587 + struct buffer_control *bc), 588 + TP_ARGS(dd, bc), 589 + TP_STRUCT__entry(DD_DEV_ENTRY(dd) 590 + __dynamic_array(u8, bct, sizeof(*bc)) 591 + ), 592 + TP_fast_assign(DD_DEV_ASSIGN(dd); 593 + memcpy(__get_dynamic_array(bct), bc, 594 + sizeof(*bc)); 595 + ), 596 + TP_printk(BCT_FORMAT, 597 + BCT(overall_shared_limit), 598 + 599 + BCT(vl[0].dedicated), 600 + BCT(vl[0].shared), 601 + 602 + BCT(vl[1].dedicated), 603 + BCT(vl[1].shared), 604 + 605 + BCT(vl[2].dedicated), 606 + BCT(vl[2].shared), 607 + 608 + BCT(vl[3].dedicated), 609 + BCT(vl[3].shared), 610 + 611 + BCT(vl[4].dedicated), 612 + BCT(vl[4].shared), 613 + 614 + BCT(vl[5].dedicated), 615 + BCT(vl[5].shared), 616 + 617 + BCT(vl[6].dedicated), 618 + BCT(vl[6].shared), 619 + 620 + BCT(vl[7].dedicated), 621 + BCT(vl[7].shared), 622 + 623 + BCT(vl[15].dedicated), 624 + BCT(vl[15].shared) 625 + ) 626 + ); 627 + 628 + DEFINE_EVENT(hfi1_bct_template, bct_set, 629 + TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), 630 + TP_ARGS(dd, bc)); 631 + 632 + DEFINE_EVENT(hfi1_bct_template, bct_get, 633 + TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), 634 + TP_ARGS(dd, bc)); 635 + 636 + #endif /* __HFI1_TRACE_TX_H */ 637 + 638 + #undef TRACE_INCLUDE_PATH 639 + #undef TRACE_INCLUDE_FILE 640 + #define TRACE_INCLUDE_PATH . 641 + #define TRACE_INCLUDE_FILE trace_tx 642 + #include <trace/define_trace.h>
-489
drivers/infiniband/hw/hfi1/twsi.c
··· 1 - /* 2 - * Copyright(c) 2015, 2016 Intel Corporation. 3 - * 4 - * This file is provided under a dual BSD/GPLv2 license. When using or 5 - * redistributing this file, you may do so under either license. 6 - * 7 - * GPL LICENSE SUMMARY 8 - * 9 - * This program is free software; you can redistribute it and/or modify 10 - * it under the terms of version 2 of the GNU General Public License as 11 - * published by the Free Software Foundation. 12 - * 13 - * This program is distributed in the hope that it will be useful, but 14 - * WITHOUT ANY WARRANTY; without even the implied warranty of 15 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 - * General Public License for more details. 17 - * 18 - * BSD LICENSE 19 - * 20 - * Redistribution and use in source and binary forms, with or without 21 - * modification, are permitted provided that the following conditions 22 - * are met: 23 - * 24 - * - Redistributions of source code must retain the above copyright 25 - * notice, this list of conditions and the following disclaimer. 26 - * - Redistributions in binary form must reproduce the above copyright 27 - * notice, this list of conditions and the following disclaimer in 28 - * the documentation and/or other materials provided with the 29 - * distribution. 30 - * - Neither the name of Intel Corporation nor the names of its 31 - * contributors may be used to endorse or promote products derived 32 - * from this software without specific prior written permission. 33 - * 34 - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 - * 46 - */ 47 - 48 - #include <linux/delay.h> 49 - #include <linux/pci.h> 50 - #include <linux/vmalloc.h> 51 - 52 - #include "hfi.h" 53 - #include "twsi.h" 54 - 55 - /* 56 - * "Two Wire Serial Interface" support. 57 - * 58 - * Originally written for a not-quite-i2c serial eeprom, which is 59 - * still used on some supported boards. Later boards have added a 60 - * variety of other uses, most board-specific, so the bit-boffing 61 - * part has been split off to this file, while the other parts 62 - * have been moved to chip-specific files. 63 - * 64 - * We have also dropped all pretense of fully generic (e.g. pretend 65 - * we don't know whether '1' is the higher voltage) interface, as 66 - * the restrictions of the generic i2c interface (e.g. no access from 67 - * driver itself) make it unsuitable for this use. 68 - */ 69 - 70 - #define READ_CMD 1 71 - #define WRITE_CMD 0 72 - 73 - /** 74 - * i2c_wait_for_writes - wait for a write 75 - * @dd: the hfi1_ib device 76 - * 77 - * We use this instead of udelay directly, so we can make sure 78 - * that previous register writes have been flushed all the way 79 - * to the chip. Since we are delaying anyway, the cost doesn't 80 - * hurt, and makes the bit twiddling more regular 81 - */ 82 - static void i2c_wait_for_writes(struct hfi1_devdata *dd, u32 target) 83 - { 84 - /* 85 - * implicit read of EXTStatus is as good as explicit 86 - * read of scratch, if all we want to do is flush 87 - * writes. 88 - */ 89 - hfi1_gpio_mod(dd, target, 0, 0, 0); 90 - rmb(); /* inlined, so prevent compiler reordering */ 91 - } 92 - 93 - /* 94 - * QSFP modules are allowed to hold SCL low for 500uSec. Allow twice that 95 - * for "almost compliant" modules 96 - */ 97 - #define SCL_WAIT_USEC 1000 98 - 99 - /* BUF_WAIT is time bus must be free between STOP or ACK and to next START. 100 - * Should be 20, but some chips need more. 101 - */ 102 - #define TWSI_BUF_WAIT_USEC 60 103 - 104 - static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit) 105 - { 106 - u32 mask; 107 - 108 - udelay(1); 109 - 110 - mask = QSFP_HFI0_I2CCLK; 111 - 112 - /* SCL is meant to be bare-drain, so never set "OUT", just DIR */ 113 - hfi1_gpio_mod(dd, target, 0, bit ? 0 : mask, mask); 114 - 115 - /* 116 - * Allow for slow slaves by simple 117 - * delay for falling edge, sampling on rise. 118 - */ 119 - if (!bit) { 120 - udelay(2); 121 - } else { 122 - int rise_usec; 123 - 124 - for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) { 125 - if (mask & hfi1_gpio_mod(dd, target, 0, 0, 0)) 126 - break; 127 - udelay(2); 128 - } 129 - if (rise_usec <= 0) 130 - dd_dev_err(dd, "SCL interface stuck low > %d uSec\n", 131 - SCL_WAIT_USEC); 132 - } 133 - i2c_wait_for_writes(dd, target); 134 - } 135 - 136 - static u8 scl_in(struct hfi1_devdata *dd, u32 target, int wait) 137 - { 138 - u32 read_val, mask; 139 - 140 - mask = QSFP_HFI0_I2CCLK; 141 - /* SCL is meant to be bare-drain, so never set "OUT", just DIR */ 142 - hfi1_gpio_mod(dd, target, 0, 0, mask); 143 - read_val = hfi1_gpio_mod(dd, target, 0, 0, 0); 144 - if (wait) 145 - i2c_wait_for_writes(dd, target); 146 - return (read_val & mask) >> GPIO_SCL_NUM; 147 - } 148 - 149 - static void sda_out(struct hfi1_devdata *dd, u32 target, u8 bit) 150 - { 151 - u32 mask; 152 - 153 - mask = QSFP_HFI0_I2CDAT; 154 - 155 - /* SDA is meant to be bare-drain, so never set "OUT", just DIR */ 156 - hfi1_gpio_mod(dd, target, 0, bit ? 0 : mask, mask); 157 - 158 - i2c_wait_for_writes(dd, target); 159 - udelay(2); 160 - } 161 - 162 - static u8 sda_in(struct hfi1_devdata *dd, u32 target, int wait) 163 - { 164 - u32 read_val, mask; 165 - 166 - mask = QSFP_HFI0_I2CDAT; 167 - /* SDA is meant to be bare-drain, so never set "OUT", just DIR */ 168 - hfi1_gpio_mod(dd, target, 0, 0, mask); 169 - read_val = hfi1_gpio_mod(dd, target, 0, 0, 0); 170 - if (wait) 171 - i2c_wait_for_writes(dd, target); 172 - return (read_val & mask) >> GPIO_SDA_NUM; 173 - } 174 - 175 - /** 176 - * i2c_ackrcv - see if ack following write is true 177 - * @dd: the hfi1_ib device 178 - */ 179 - static int i2c_ackrcv(struct hfi1_devdata *dd, u32 target) 180 - { 181 - u8 ack_received; 182 - 183 - /* AT ENTRY SCL = LOW */ 184 - /* change direction, ignore data */ 185 - ack_received = sda_in(dd, target, 1); 186 - scl_out(dd, target, 1); 187 - ack_received = sda_in(dd, target, 1) == 0; 188 - scl_out(dd, target, 0); 189 - return ack_received; 190 - } 191 - 192 - static void stop_cmd(struct hfi1_devdata *dd, u32 target); 193 - 194 - /** 195 - * rd_byte - read a byte, sending STOP on last, else ACK 196 - * @dd: the hfi1_ib device 197 - * 198 - * Returns byte shifted out of device 199 - */ 200 - static int rd_byte(struct hfi1_devdata *dd, u32 target, int last) 201 - { 202 - int bit_cntr, data; 203 - 204 - data = 0; 205 - 206 - for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) { 207 - data <<= 1; 208 - scl_out(dd, target, 1); 209 - data |= sda_in(dd, target, 0); 210 - scl_out(dd, target, 0); 211 - } 212 - if (last) { 213 - scl_out(dd, target, 1); 214 - stop_cmd(dd, target); 215 - } else { 216 - sda_out(dd, target, 0); 217 - scl_out(dd, target, 1); 218 - scl_out(dd, target, 0); 219 - sda_out(dd, target, 1); 220 - } 221 - return data; 222 - } 223 - 224 - /** 225 - * wr_byte - write a byte, one bit at a time 226 - * @dd: the hfi1_ib device 227 - * @data: the byte to write 228 - * 229 - * Returns 0 if we got the following ack, otherwise 1 230 - */ 231 - static int wr_byte(struct hfi1_devdata *dd, u32 target, u8 data) 232 - { 233 - int bit_cntr; 234 - u8 bit; 235 - 236 - for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) { 237 - bit = (data >> bit_cntr) & 1; 238 - sda_out(dd, target, bit); 239 - scl_out(dd, target, 1); 240 - scl_out(dd, target, 0); 241 - } 242 - return (!i2c_ackrcv(dd, target)) ? 1 : 0; 243 - } 244 - 245 - /* 246 - * issue TWSI start sequence: 247 - * (both clock/data high, clock high, data low while clock is high) 248 - */ 249 - static void start_seq(struct hfi1_devdata *dd, u32 target) 250 - { 251 - sda_out(dd, target, 1); 252 - scl_out(dd, target, 1); 253 - sda_out(dd, target, 0); 254 - udelay(1); 255 - scl_out(dd, target, 0); 256 - } 257 - 258 - /** 259 - * stop_seq - transmit the stop sequence 260 - * @dd: the hfi1_ib device 261 - * 262 - * (both clock/data low, clock high, data high while clock is high) 263 - */ 264 - static void stop_seq(struct hfi1_devdata *dd, u32 target) 265 - { 266 - scl_out(dd, target, 0); 267 - sda_out(dd, target, 0); 268 - scl_out(dd, target, 1); 269 - sda_out(dd, target, 1); 270 - } 271 - 272 - /** 273 - * stop_cmd - transmit the stop condition 274 - * @dd: the hfi1_ib device 275 - * 276 - * (both clock/data low, clock high, data high while clock is high) 277 - */ 278 - static void stop_cmd(struct hfi1_devdata *dd, u32 target) 279 - { 280 - stop_seq(dd, target); 281 - udelay(TWSI_BUF_WAIT_USEC); 282 - } 283 - 284 - /** 285 - * hfi1_twsi_reset - reset I2C communication 286 - * @dd: the hfi1_ib device 287 - * returns 0 if ok, -EIO on error 288 - */ 289 - int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target) 290 - { 291 - int clock_cycles_left = 9; 292 - u32 mask; 293 - 294 - /* Both SCL and SDA should be high. If not, there 295 - * is something wrong. 296 - */ 297 - mask = QSFP_HFI0_I2CCLK | QSFP_HFI0_I2CDAT; 298 - 299 - /* 300 - * Force pins to desired innocuous state. 301 - * This is the default power-on state with out=0 and dir=0, 302 - * So tri-stated and should be floating high (barring HW problems) 303 - */ 304 - hfi1_gpio_mod(dd, target, 0, 0, mask); 305 - 306 - /* Check if SCL is low, if it is low then we have a slave device 307 - * misbehaving and there is not much we can do. 308 - */ 309 - if (!scl_in(dd, target, 0)) 310 - return -EIO; 311 - 312 - /* Check if SDA is low, if it is low then we have to clock SDA 313 - * up to 9 times for the device to release the bus 314 - */ 315 - while (clock_cycles_left--) { 316 - if (sda_in(dd, target, 0)) 317 - return 0; 318 - scl_out(dd, target, 0); 319 - scl_out(dd, target, 1); 320 - } 321 - 322 - return -EIO; 323 - } 324 - 325 - #define HFI1_TWSI_START 0x100 326 - #define HFI1_TWSI_STOP 0x200 327 - 328 - /* Write byte to TWSI, optionally prefixed with START or suffixed with 329 - * STOP. 330 - * returns 0 if OK (ACK received), else != 0 331 - */ 332 - static int twsi_wr(struct hfi1_devdata *dd, u32 target, int data, int flags) 333 - { 334 - int ret = 1; 335 - 336 - if (flags & HFI1_TWSI_START) 337 - start_seq(dd, target); 338 - 339 - /* Leaves SCL low (from i2c_ackrcv()) */ 340 - ret = wr_byte(dd, target, data); 341 - 342 - if (flags & HFI1_TWSI_STOP) 343 - stop_cmd(dd, target); 344 - return ret; 345 - } 346 - 347 - /* Added functionality for IBA7220-based cards */ 348 - #define HFI1_TEMP_DEV 0x98 349 - 350 - /* 351 - * hfi1_twsi_blk_rd 352 - * General interface for data transfer from twsi devices. 353 - * One vestige of its former role is that it recognizes a device 354 - * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part, 355 - * which responded to all TWSI device codes, interpreting them as 356 - * address within device. On all other devices found on board handled by 357 - * this driver, the device is followed by a N-byte "address" which selects 358 - * the "register" or "offset" within the device from which data should 359 - * be read. 360 - */ 361 - int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr, 362 - void *buffer, int len) 363 - { 364 - u8 *bp = buffer; 365 - int ret = 1; 366 - int i; 367 - int offset_size; 368 - 369 - /* obtain the offset size, strip it from the device address */ 370 - offset_size = (dev >> 8) & 0xff; 371 - dev &= 0xff; 372 - 373 - /* allow at most a 2 byte offset */ 374 - if (offset_size > 2) 375 - goto bail; 376 - 377 - if (dev == HFI1_TWSI_NO_DEV) { 378 - /* legacy not-really-I2C */ 379 - addr = (addr << 1) | READ_CMD; 380 - ret = twsi_wr(dd, target, addr, HFI1_TWSI_START); 381 - } else { 382 - /* Actual I2C */ 383 - if (offset_size) { 384 - ret = twsi_wr(dd, target, 385 - dev | WRITE_CMD, HFI1_TWSI_START); 386 - if (ret) { 387 - stop_cmd(dd, target); 388 - goto bail; 389 - } 390 - 391 - for (i = 0; i < offset_size; i++) { 392 - ret = twsi_wr(dd, target, 393 - (addr >> (i * 8)) & 0xff, 0); 394 - udelay(TWSI_BUF_WAIT_USEC); 395 - if (ret) { 396 - dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n", 397 - i, addr); 398 - goto bail; 399 - } 400 - } 401 - } 402 - ret = twsi_wr(dd, target, dev | READ_CMD, HFI1_TWSI_START); 403 - } 404 - if (ret) { 405 - stop_cmd(dd, target); 406 - goto bail; 407 - } 408 - 409 - /* 410 - * block devices keeps clocking data out as long as we ack, 411 - * automatically incrementing the address. Some have "pages" 412 - * whose boundaries will not be crossed, but the handling 413 - * of these is left to the caller, who is in a better 414 - * position to know. 415 - */ 416 - while (len-- > 0) { 417 - /* 418 - * Get and store data, sending ACK if length remaining, 419 - * else STOP 420 - */ 421 - *bp++ = rd_byte(dd, target, !len); 422 - } 423 - 424 - ret = 0; 425 - 426 - bail: 427 - return ret; 428 - } 429 - 430 - /* 431 - * hfi1_twsi_blk_wr 432 - * General interface for data transfer to twsi devices. 433 - * One vestige of its former role is that it recognizes a device 434 - * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part, 435 - * which responded to all TWSI device codes, interpreting them as 436 - * address within device. On all other devices found on board handled by 437 - * this driver, the device is followed by a N-byte "address" which selects 438 - * the "register" or "offset" within the device to which data should 439 - * be written. 440 - */ 441 - int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr, 442 - const void *buffer, int len) 443 - { 444 - const u8 *bp = buffer; 445 - int ret = 1; 446 - int i; 447 - int offset_size; 448 - 449 - /* obtain the offset size, strip it from the device address */ 450 - offset_size = (dev >> 8) & 0xff; 451 - dev &= 0xff; 452 - 453 - /* allow at most a 2 byte offset */ 454 - if (offset_size > 2) 455 - goto bail; 456 - 457 - if (dev == HFI1_TWSI_NO_DEV) { 458 - if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD, 459 - HFI1_TWSI_START)) { 460 - goto failed_write; 461 - } 462 - } else { 463 - /* Real I2C */ 464 - if (twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START)) 465 - goto failed_write; 466 - } 467 - 468 - for (i = 0; i < offset_size; i++) { 469 - ret = twsi_wr(dd, target, (addr >> (i * 8)) & 0xff, 0); 470 - udelay(TWSI_BUF_WAIT_USEC); 471 - if (ret) { 472 - dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n", 473 - i, addr); 474 - goto bail; 475 - } 476 - } 477 - 478 - for (i = 0; i < len; i++) 479 - if (twsi_wr(dd, target, *bp++, 0)) 480 - goto failed_write; 481 - 482 - ret = 0; 483 - 484 - failed_write: 485 - stop_cmd(dd, target); 486 - 487 - bail: 488 - return ret; 489 - }
-65
drivers/infiniband/hw/hfi1/twsi.h
··· 1 - #ifndef _TWSI_H 2 - #define _TWSI_H 3 - /* 4 - * Copyright(c) 2015, 2016 Intel Corporation. 5 - * 6 - * This file is provided under a dual BSD/GPLv2 license. When using or 7 - * redistributing this file, you may do so under either license. 8 - * 9 - * GPL LICENSE SUMMARY 10 - * 11 - * This program is free software; you can redistribute it and/or modify 12 - * it under the terms of version 2 of the GNU General Public License as 13 - * published by the Free Software Foundation. 14 - * 15 - * This program is distributed in the hope that it will be useful, but 16 - * WITHOUT ANY WARRANTY; without even the implied warranty of 17 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 - * General Public License for more details. 19 - * 20 - * BSD LICENSE 21 - * 22 - * Redistribution and use in source and binary forms, with or without 23 - * modification, are permitted provided that the following conditions 24 - * are met: 25 - * 26 - * - Redistributions of source code must retain the above copyright 27 - * notice, this list of conditions and the following disclaimer. 28 - * - Redistributions in binary form must reproduce the above copyright 29 - * notice, this list of conditions and the following disclaimer in 30 - * the documentation and/or other materials provided with the 31 - * distribution. 32 - * - Neither the name of Intel Corporation nor the names of its 33 - * contributors may be used to endorse or promote products derived 34 - * from this software without specific prior written permission. 35 - * 36 - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 37 - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 38 - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 39 - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 40 - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 41 - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 42 - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 43 - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 44 - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 45 - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 46 - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 47 - * 48 - */ 49 - 50 - #define HFI1_TWSI_NO_DEV 0xFF 51 - 52 - struct hfi1_devdata; 53 - 54 - /* Bit position of SDA/SCL pins in ASIC_QSFP* registers */ 55 - #define GPIO_SDA_NUM 1 56 - #define GPIO_SCL_NUM 0 57 - 58 - /* these functions must be called with qsfp_lock held */ 59 - int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target); 60 - int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr, 61 - void *buffer, int len); 62 - int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr, 63 - const void *buffer, int len); 64 - 65 - #endif /* _TWSI_H */
+26 -35
drivers/infiniband/hw/hfi1/uc.c
··· 119 119 goto bail; 120 120 } 121 121 /* 122 + * Local operations are processed immediately 123 + * after all prior requests have completed. 124 + */ 125 + if (wqe->wr.opcode == IB_WR_REG_MR || 126 + wqe->wr.opcode == IB_WR_LOCAL_INV) { 127 + int local_ops = 0; 128 + int err = 0; 129 + 130 + if (qp->s_last != qp->s_cur) 131 + goto bail; 132 + if (++qp->s_cur == qp->s_size) 133 + qp->s_cur = 0; 134 + if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) { 135 + err = rvt_invalidate_rkey( 136 + qp, wqe->wr.ex.invalidate_rkey); 137 + local_ops = 1; 138 + } 139 + hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR 140 + : IB_WC_SUCCESS); 141 + if (local_ops) 142 + atomic_dec(&qp->local_ops_pending); 143 + qp->s_hdrwords = 0; 144 + goto done_free_tx; 145 + } 146 + /* 122 147 * Start a new request. 123 148 */ 124 149 qp->s_psn = wqe->psn; ··· 319 294 struct ib_reth *reth; 320 295 int has_grh = rcv_flags & HFI1_HAS_GRH; 321 296 int ret; 322 - u32 bth1; 323 297 324 298 bth0 = be32_to_cpu(ohdr->bth[0]); 325 299 if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) 326 300 return; 327 301 328 - bth1 = be32_to_cpu(ohdr->bth[1]); 329 - if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) { 330 - if (bth1 & HFI1_BECN_SMASK) { 331 - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 332 - u32 rqpn, lqpn; 333 - u16 rlid = be16_to_cpu(hdr->lrh[3]); 334 - u8 sl, sc5; 335 - 336 - lqpn = bth1 & RVT_QPN_MASK; 337 - rqpn = qp->remote_qpn; 338 - 339 - sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; 340 - sl = ibp->sc_to_sl[sc5]; 341 - 342 - process_becn(ppd, sl, rlid, lqpn, rqpn, 343 - IB_CC_SVCTYPE_UC); 344 - } 345 - 346 - if (bth1 & HFI1_FECN_SMASK) { 347 - struct ib_grh *grh = NULL; 348 - u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); 349 - u16 slid = be16_to_cpu(hdr->lrh[3]); 350 - u16 dlid = be16_to_cpu(hdr->lrh[1]); 351 - u32 src_qp = qp->remote_qpn; 352 - u8 sc5; 353 - 354 - sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; 355 - if (has_grh) 356 - grh = &hdr->u.l.grh; 357 - 358 - return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, 359 - grh); 360 - } 361 - } 302 + process_ecn(qp, packet, true); 362 303 363 304 psn = be32_to_cpu(ohdr->bth[2]); 364 305 opcode = (bth0 >> 24) & 0xff;
+29 -57
drivers/infiniband/hw/hfi1/ud.c
··· 184 184 } 185 185 186 186 if (ah_attr->ah_flags & IB_AH_GRH) { 187 - hfi1_copy_sge(&qp->r_sge, &ah_attr->grh, 188 - sizeof(struct ib_grh), 1, 0); 187 + struct ib_grh grh; 188 + struct ib_global_route grd = ah_attr->grh; 189 + 190 + hfi1_make_grh(ibp, &grh, &grd, 0, 0); 191 + hfi1_copy_sge(&qp->r_sge, &grh, 192 + sizeof(grh), 1, 0); 189 193 wc.wc_flags |= IB_WC_GRH; 190 194 } else { 191 195 hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); ··· 434 430 qp->qkey : wqe->ud_wr.remote_qkey); 435 431 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); 436 432 /* disarm any ahg */ 437 - priv->s_hdr->ahgcount = 0; 438 - priv->s_hdr->ahgidx = 0; 439 - priv->s_hdr->tx_flags = 0; 440 - priv->s_hdr->sde = NULL; 433 + priv->s_ahg->ahgcount = 0; 434 + priv->s_ahg->ahgidx = 0; 435 + priv->s_ahg->tx_flags = 0; 441 436 /* pbc */ 442 437 ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; 443 438 ··· 668 665 struct hfi1_other_headers *ohdr = packet->ohdr; 669 666 int opcode; 670 667 u32 hdrsize = packet->hlen; 671 - u32 pad; 672 668 struct ib_wc wc; 673 669 u32 qkey; 674 670 u32 src_qp; 675 671 u16 dlid, pkey; 676 672 int mgmt_pkey_idx = -1; 677 673 struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; 674 + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 678 675 struct hfi1_ib_header *hdr = packet->hdr; 679 676 u32 rcv_flags = packet->rcv_flags; 680 677 void *data = packet->ebuf; ··· 683 680 bool has_grh = rcv_flags & HFI1_HAS_GRH; 684 681 u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); 685 682 u32 bth1; 686 - int is_mcast; 687 - struct ib_grh *grh = NULL; 683 + u8 sl_from_sc, sl; 684 + u16 slid; 685 + u8 extra_bytes; 688 686 689 687 qkey = be32_to_cpu(ohdr->u.ud.deth[0]); 690 688 src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; 691 689 dlid = be16_to_cpu(hdr->lrh[1]); 692 - is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && 693 - (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); 694 690 bth1 = be32_to_cpu(ohdr->bth[1]); 695 - if (unlikely(bth1 & HFI1_BECN_SMASK)) { 696 - /* 697 - * In pre-B0 h/w the CNP_OPCODE is handled via an 698 - * error path. 699 - */ 700 - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 701 - u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; 702 - u8 sl; 691 + slid = be16_to_cpu(hdr->lrh[3]); 692 + pkey = (u16)be32_to_cpu(ohdr->bth[0]); 693 + sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; 694 + extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 695 + extra_bytes += (SIZE_OF_CRC << 2); 696 + sl_from_sc = ibp->sc_to_sl[sc5]; 703 697 704 - sl = ibp->sc_to_sl[sc5]; 705 - 706 - process_becn(ppd, sl, 0, lqpn, 0, IB_CC_SVCTYPE_UD); 707 - } 708 - 709 - /* 710 - * The opcode is in the low byte when its in network order 711 - * (top byte when in host order). 712 - */ 713 698 opcode = be32_to_cpu(ohdr->bth[0]) >> 24; 714 699 opcode &= 0xff; 715 700 716 - pkey = (u16)be32_to_cpu(ohdr->bth[0]); 717 - 718 - if (!is_mcast && (opcode != IB_OPCODE_CNP) && bth1 & HFI1_FECN_SMASK) { 719 - u16 slid = be16_to_cpu(hdr->lrh[3]); 720 - 721 - return_cnp(ibp, qp, src_qp, pkey, dlid, slid, sc5, grh); 722 - } 701 + process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); 723 702 /* 724 703 * Get the number of bytes the message was padded by 725 704 * and drop incomplete packets. 726 705 */ 727 - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 728 - if (unlikely(tlen < (hdrsize + pad + 4))) 706 + if (unlikely(tlen < (hdrsize + extra_bytes))) 729 707 goto drop; 730 708 731 - tlen -= hdrsize + pad + 4; 709 + tlen -= hdrsize + extra_bytes; 732 710 733 711 /* 734 712 * Check that the permissive LID is only used on QP0 ··· 720 736 hdr->lrh[3] == IB_LID_PERMISSIVE)) 721 737 goto drop; 722 738 if (qp->ibqp.qp_num > 1) { 723 - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 724 - u16 slid; 725 - 726 - slid = be16_to_cpu(hdr->lrh[3]); 727 739 if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) { 728 740 /* 729 741 * Traps will not be sent for packets dropped ··· 728 748 * IB spec (release 1.3, section 10.9.4) 729 749 */ 730 750 hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, 731 - pkey, 732 - (be16_to_cpu(hdr->lrh[0]) >> 4) & 733 - 0xF, 751 + pkey, sl, 734 752 src_qp, qp->ibqp.qp_num, 735 - be16_to_cpu(hdr->lrh[3]), 736 - be16_to_cpu(hdr->lrh[1])); 753 + slid, dlid); 737 754 return; 738 755 } 739 756 } else { ··· 740 763 goto drop; 741 764 } 742 765 if (unlikely(qkey != qp->qkey)) { 743 - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, 744 - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, 766 + hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl, 745 767 src_qp, qp->ibqp.qp_num, 746 - be16_to_cpu(hdr->lrh[3]), 747 - be16_to_cpu(hdr->lrh[1])); 768 + slid, dlid); 748 769 return; 749 770 } 750 771 /* Drop invalid MAD packets (see 13.5.3.1). */ 751 772 if (unlikely(qp->ibqp.qp_num == 1 && 752 - (tlen > 2048 || 753 - (be16_to_cpu(hdr->lrh[0]) >> 12) == 15))) 773 + (tlen > 2048 || (sc5 == 0xF)))) 754 774 goto drop; 755 775 } else { 756 776 /* Received on QP0, and so by definition, this is an SMP */ 757 777 struct opa_smp *smp = (struct opa_smp *)data; 758 - u16 slid = be16_to_cpu(hdr->lrh[3]); 759 778 760 779 if (opa_smp_check(ibp, pkey, sc5, qp, slid, smp)) 761 780 goto drop; ··· 834 861 qp->ibqp.qp_type == IB_QPT_SMI) { 835 862 if (mgmt_pkey_idx < 0) { 836 863 if (net_ratelimit()) { 837 - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 838 864 struct hfi1_devdata *dd = ppd->dd; 839 865 840 866 dd_dev_err(dd, "QP type %d mgmt_pkey_idx < 0 and packet not dropped???\n", ··· 846 874 wc.pkey_index = 0; 847 875 } 848 876 849 - wc.slid = be16_to_cpu(hdr->lrh[3]); 850 - wc.sl = ibp->sc_to_sl[sc5]; 877 + wc.slid = slid; 878 + wc.sl = sl_from_sc; 851 879 852 880 /* 853 881 * Save the LMC lower bits if the destination LID is a unicast LID.
+65 -59
drivers/infiniband/hw/hfi1/user_exp_rcv.c
··· 82 82 ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT)) 83 83 84 84 static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *, 85 - struct rb_root *); 85 + struct hfi1_filedata *); 86 86 static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); 87 87 static int set_rcvarray_entry(struct file *, unsigned long, u32, 88 88 struct tid_group *, struct page **, unsigned); 89 - static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *); 90 - static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, 91 - struct mm_struct *); 92 - static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *); 89 + static int tid_rb_insert(void *, struct mmu_rb_node *); 90 + static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, 91 + struct tid_rb_node *tnode); 92 + static void tid_rb_remove(void *, struct mmu_rb_node *); 93 + static int tid_rb_invalidate(void *, struct mmu_rb_node *); 93 94 static int program_rcvarray(struct file *, unsigned long, struct tid_group *, 94 95 struct tid_pageset *, unsigned, u16, struct page **, 95 96 u32 *, unsigned *, unsigned *); 96 97 static int unprogram_rcvarray(struct file *, u32, struct tid_group **); 97 - static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *); 98 + static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); 98 99 99 100 static struct mmu_rb_ops tid_rb_ops = { 100 - .insert = mmu_rb_insert, 101 - .remove = mmu_rb_remove, 102 - .invalidate = mmu_rb_invalidate 101 + .insert = tid_rb_insert, 102 + .remove = tid_rb_remove, 103 + .invalidate = tid_rb_invalidate 103 104 }; 104 105 105 106 static inline u32 rcventry2tidinfo(u32 rcventry) ··· 163 162 164 163 spin_lock_init(&fd->tid_lock); 165 164 spin_lock_init(&fd->invalid_lock); 166 - fd->tid_rb_root = RB_ROOT; 167 165 168 166 if (!uctxt->subctxt_cnt || !fd->subctxt) { 169 167 exp_tid_group_init(&uctxt->tid_group_list); ··· 197 197 if (!fd->entry_to_rb) 198 198 return -ENOMEM; 199 199 200 - if (!HFI1_CAP_IS_USET(TID_UNMAP)) { 200 + if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) { 201 201 fd->invalid_tid_idx = 0; 202 202 fd->invalid_tids = kzalloc(uctxt->expected_count * 203 203 sizeof(u32), GFP_KERNEL); ··· 208 208 209 209 /* 210 210 * Register MMU notifier callbacks. If the registration 211 - * fails, continue but turn off the TID caching for 212 - * all user contexts. 211 + * fails, continue without TID caching for this context. 213 212 */ 214 - ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops); 213 + ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops, 214 + dd->pport->hfi1_wq, 215 + &fd->handler); 215 216 if (ret) { 216 217 dd_dev_info(dd, 217 218 "Failed MMU notifier registration %d\n", 218 219 ret); 219 - HFI1_CAP_USET(TID_UNMAP); 220 220 ret = 0; 221 221 } 222 222 } ··· 235 235 * init. 236 236 */ 237 237 spin_lock(&fd->tid_lock); 238 - if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) { 238 + if (uctxt->subctxt_cnt && fd->handler) { 239 239 u16 remainder; 240 240 241 241 fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; ··· 261 261 * The notifier would have been removed when the process'es mm 262 262 * was freed. 263 263 */ 264 - if (!HFI1_CAP_IS_USET(TID_UNMAP)) 265 - hfi1_mmu_rb_unregister(&fd->tid_rb_root); 264 + if (fd->handler) 265 + hfi1_mmu_rb_unregister(fd->handler); 266 266 267 267 kfree(fd->invalid_tids); 268 268 269 269 if (!uctxt->cnt) { 270 270 if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) 271 - unlock_exp_tids(uctxt, &uctxt->tid_full_list, 272 - &fd->tid_rb_root); 271 + unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); 273 272 if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) 274 - unlock_exp_tids(uctxt, &uctxt->tid_used_list, 275 - &fd->tid_rb_root); 273 + unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); 276 274 list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, 277 275 list) { 278 276 list_del_init(&grp->list); ··· 397 399 * pages, accept the amount pinned so far and program only that. 398 400 * User space knows how to deal with partially programmed buffers. 399 401 */ 400 - if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages)) { 402 + if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) { 401 403 ret = -ENOMEM; 402 404 goto bail; 403 405 } 404 406 405 - pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages); 407 + pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages); 406 408 if (pinned <= 0) { 407 409 ret = pinned; 408 410 goto bail; ··· 557 559 * for example), unpin all unmapped pages so we can pin them nex time. 558 560 */ 559 561 if (mapped_pages != pinned) { 560 - hfi1_release_user_pages(current->mm, &pages[mapped_pages], 562 + hfi1_release_user_pages(fd->mm, &pages[mapped_pages], 561 563 pinned - mapped_pages, 562 564 false); 563 565 fd->tid_n_pinned -= pinned - mapped_pages; ··· 827 829 struct hfi1_ctxtdata *uctxt = fd->uctxt; 828 830 struct tid_rb_node *node; 829 831 struct hfi1_devdata *dd = uctxt->dd; 830 - struct rb_root *root = &fd->tid_rb_root; 831 832 dma_addr_t phys; 832 833 833 834 /* ··· 858 861 node->freed = false; 859 862 memcpy(node->pages, pages, sizeof(struct page *) * npages); 860 863 861 - if (HFI1_CAP_IS_USET(TID_UNMAP)) 862 - ret = mmu_rb_insert(root, &node->mmu); 864 + if (!fd->handler) 865 + ret = tid_rb_insert(fd, &node->mmu); 863 866 else 864 - ret = hfi1_mmu_rb_insert(root, &node->mmu); 867 + ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu); 865 868 866 869 if (ret) { 867 870 hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", ··· 901 904 node = fd->entry_to_rb[rcventry]; 902 905 if (!node || node->rcventry != (uctxt->expected_base + rcventry)) 903 906 return -EBADF; 904 - if (HFI1_CAP_IS_USET(TID_UNMAP)) 905 - mmu_rb_remove(&fd->tid_rb_root, &node->mmu, NULL); 906 - else 907 - hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu); 908 907 909 908 if (grp) 910 909 *grp = node->grp; 911 - clear_tid_node(fd, fd->subctxt, node); 910 + 911 + if (!fd->handler) 912 + cacheless_tid_rb_remove(fd, node); 913 + else 914 + hfi1_mmu_rb_remove(fd->handler, &node->mmu); 915 + 912 916 return 0; 913 917 } 914 918 915 - static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt, 916 - struct tid_rb_node *node) 919 + static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) 917 920 { 918 921 struct hfi1_ctxtdata *uctxt = fd->uctxt; 919 922 struct hfi1_devdata *dd = uctxt->dd; ··· 931 934 932 935 pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, 933 936 PCI_DMA_FROMDEVICE); 934 - hfi1_release_user_pages(current->mm, node->pages, node->npages, true); 937 + hfi1_release_user_pages(fd->mm, node->pages, node->npages, true); 935 938 fd->tid_n_pinned -= node->npages; 936 939 937 940 node->grp->used--; ··· 946 949 kfree(node); 947 950 } 948 951 952 + /* 953 + * As a simple helper for hfi1_user_exp_rcv_free, this function deals with 954 + * clearing nodes in the non-cached case. 955 + */ 949 956 static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, 950 - struct exp_tid_set *set, struct rb_root *root) 957 + struct exp_tid_set *set, 958 + struct hfi1_filedata *fd) 951 959 { 952 960 struct tid_group *grp, *ptr; 953 - struct hfi1_filedata *fd = container_of(root, struct hfi1_filedata, 954 - tid_rb_root); 955 961 int i; 956 962 957 963 list_for_each_entry_safe(grp, ptr, &set->list, list) { ··· 969 969 uctxt->expected_base]; 970 970 if (!node || node->rcventry != rcventry) 971 971 continue; 972 - if (HFI1_CAP_IS_USET(TID_UNMAP)) 973 - mmu_rb_remove(&fd->tid_rb_root, 974 - &node->mmu, NULL); 975 - else 976 - hfi1_mmu_rb_remove(&fd->tid_rb_root, 977 - &node->mmu); 978 - clear_tid_node(fd, -1, node); 972 + 973 + cacheless_tid_rb_remove(fd, node); 979 974 } 980 975 } 981 976 } 982 977 } 983 978 984 - static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode) 979 + /* 980 + * Always return 0 from this function. A non-zero return indicates that the 981 + * remove operation will be called and that memory should be unpinned. 982 + * However, the driver cannot unpin out from under PSM. Instead, retain the 983 + * memory (by returning 0) and inform PSM that the memory is going away. PSM 984 + * will call back later when it has removed the memory from its list. 985 + */ 986 + static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) 985 987 { 986 - struct hfi1_filedata *fdata = 987 - container_of(root, struct hfi1_filedata, tid_rb_root); 988 + struct hfi1_filedata *fdata = arg; 988 989 struct hfi1_ctxtdata *uctxt = fdata->uctxt; 989 990 struct tid_rb_node *node = 990 991 container_of(mnode, struct tid_rb_node, mmu); ··· 1026 1025 return 0; 1027 1026 } 1028 1027 1029 - static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node) 1028 + static int tid_rb_insert(void *arg, struct mmu_rb_node *node) 1030 1029 { 1031 - struct hfi1_filedata *fdata = 1032 - container_of(root, struct hfi1_filedata, tid_rb_root); 1030 + struct hfi1_filedata *fdata = arg; 1033 1031 struct tid_rb_node *tnode = 1034 1032 container_of(node, struct tid_rb_node, mmu); 1035 1033 u32 base = fdata->uctxt->expected_base; ··· 1037 1037 return 0; 1038 1038 } 1039 1039 1040 - static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node, 1041 - struct mm_struct *mm) 1040 + static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, 1041 + struct tid_rb_node *tnode) 1042 1042 { 1043 - struct hfi1_filedata *fdata = 1044 - container_of(root, struct hfi1_filedata, tid_rb_root); 1045 - struct tid_rb_node *tnode = 1046 - container_of(node, struct tid_rb_node, mmu); 1047 1043 u32 base = fdata->uctxt->expected_base; 1048 1044 1049 1045 fdata->entry_to_rb[tnode->rcventry - base] = NULL; 1046 + clear_tid_node(fdata, tnode); 1047 + } 1048 + 1049 + static void tid_rb_remove(void *arg, struct mmu_rb_node *node) 1050 + { 1051 + struct hfi1_filedata *fdata = arg; 1052 + struct tid_rb_node *tnode = 1053 + container_of(node, struct tid_rb_node, mmu); 1054 + 1055 + cacheless_tid_rb_remove(fdata, tnode); 1050 1056 }
+10 -9
drivers/infiniband/hw/hfi1/user_pages.c
··· 68 68 * could keeping caching buffers. 69 69 * 70 70 */ 71 - bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages) 71 + bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm, 72 + u32 nlocked, u32 npages) 72 73 { 73 74 unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit, 74 75 size = (cache_size * (1UL << 20)); /* convert to bytes */ ··· 90 89 /* Convert to number of pages */ 91 90 size = DIV_ROUND_UP(size, PAGE_SIZE); 92 91 93 - down_read(&current->mm->mmap_sem); 94 - pinned = current->mm->pinned_vm; 95 - up_read(&current->mm->mmap_sem); 92 + down_read(&mm->mmap_sem); 93 + pinned = mm->pinned_vm; 94 + up_read(&mm->mmap_sem); 96 95 97 96 /* First, check the absolute limit against all pinned pages. */ 98 97 if (pinned + npages >= ulimit && !can_lock) ··· 101 100 return ((nlocked + npages) <= size) || can_lock; 102 101 } 103 102 104 - int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable, 105 - struct page **pages) 103 + int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t npages, 104 + bool writable, struct page **pages) 106 105 { 107 106 int ret; 108 107 ··· 110 109 if (ret < 0) 111 110 return ret; 112 111 113 - down_write(&current->mm->mmap_sem); 114 - current->mm->pinned_vm += ret; 115 - up_write(&current->mm->mmap_sem); 112 + down_write(&mm->mmap_sem); 113 + mm->pinned_vm += ret; 114 + up_write(&mm->mmap_sem); 116 115 117 116 return ret; 118 117 }
+171 -130
drivers/infiniband/hw/hfi1/user_sdma.c
··· 145 145 /* Last packet in the request */ 146 146 #define TXREQ_FLAGS_REQ_LAST_PKT BIT(0) 147 147 148 - #define SDMA_REQ_IN_USE 0 148 + /* SDMA request flag bits */ 149 149 #define SDMA_REQ_FOR_THREAD 1 150 150 #define SDMA_REQ_SEND_DONE 2 151 151 #define SDMA_REQ_HAVE_AHG 3 ··· 183 183 struct sdma_mmu_node *node; 184 184 }; 185 185 186 - #define SDMA_CACHE_NODE_EVICT 0 187 - 188 186 struct sdma_mmu_node { 189 187 struct mmu_rb_node rb; 190 - struct list_head list; 191 188 struct hfi1_user_sdma_pkt_q *pq; 192 189 atomic_t refcount; 193 190 struct page **pages; 194 191 unsigned npages; 195 - unsigned long flags; 192 + }; 193 + 194 + /* evict operation argument */ 195 + struct evict_data { 196 + u32 cleared; /* count evicted so far */ 197 + u32 target; /* target count to evict */ 196 198 }; 197 199 198 200 struct user_sdma_request { ··· 307 305 unsigned seq); 308 306 static void activate_packet_queue(struct iowait *, int); 309 307 static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long); 310 - static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *); 311 - static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, 312 - struct mm_struct *); 313 - static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *); 308 + static int sdma_rb_insert(void *, struct mmu_rb_node *); 309 + static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, 310 + void *arg2, bool *stop); 311 + static void sdma_rb_remove(void *, struct mmu_rb_node *); 312 + static int sdma_rb_invalidate(void *, struct mmu_rb_node *); 314 313 315 314 static struct mmu_rb_ops sdma_rb_ops = { 316 315 .filter = sdma_rb_filter, 317 316 .insert = sdma_rb_insert, 317 + .evict = sdma_rb_evict, 318 318 .remove = sdma_rb_remove, 319 319 .invalidate = sdma_rb_invalidate 320 320 }; ··· 401 397 if (!pq->reqs) 402 398 goto pq_reqs_nomem; 403 399 400 + memsize = BITS_TO_LONGS(hfi1_sdma_comp_ring_size) * sizeof(long); 401 + pq->req_in_use = kzalloc(memsize, GFP_KERNEL); 402 + if (!pq->req_in_use) 403 + goto pq_reqs_no_in_use; 404 + 404 405 INIT_LIST_HEAD(&pq->list); 405 406 pq->dd = dd; 406 407 pq->ctxt = uctxt->ctxt; ··· 414 405 pq->state = SDMA_PKT_Q_INACTIVE; 415 406 atomic_set(&pq->n_reqs, 0); 416 407 init_waitqueue_head(&pq->wait); 417 - pq->sdma_rb_root = RB_ROOT; 418 - INIT_LIST_HEAD(&pq->evict); 419 - spin_lock_init(&pq->evict_lock); 408 + atomic_set(&pq->n_locked, 0); 409 + pq->mm = fd->mm; 420 410 421 411 iowait_init(&pq->busy, 0, NULL, defer_packet_queue, 422 412 activate_packet_queue, NULL); ··· 445 437 cq->nentries = hfi1_sdma_comp_ring_size; 446 438 fd->cq = cq; 447 439 448 - ret = hfi1_mmu_rb_register(&pq->sdma_rb_root, &sdma_rb_ops); 440 + ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq, 441 + &pq->handler); 449 442 if (ret) { 450 443 dd_dev_err(dd, "Failed to register with MMU %d", ret); 451 444 goto done; ··· 462 453 cq_nomem: 463 454 kmem_cache_destroy(pq->txreq_cache); 464 455 pq_txreq_nomem: 456 + kfree(pq->req_in_use); 457 + pq_reqs_no_in_use: 465 458 kfree(pq->reqs); 466 459 pq_reqs_nomem: 467 460 kfree(pq); ··· 483 472 hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, 484 473 uctxt->ctxt, fd->subctxt); 485 474 pq = fd->pq; 486 - hfi1_mmu_rb_unregister(&pq->sdma_rb_root); 487 475 if (pq) { 476 + if (pq->handler) 477 + hfi1_mmu_rb_unregister(pq->handler); 488 478 spin_lock_irqsave(&uctxt->sdma_qlock, flags); 489 479 if (!list_empty(&pq->list)) 490 480 list_del_init(&pq->list); ··· 496 484 pq->wait, 497 485 (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); 498 486 kfree(pq->reqs); 487 + kfree(pq->req_in_use); 499 488 kmem_cache_destroy(pq->txreq_cache); 500 489 kfree(pq); 501 490 fd->pq = NULL; ··· 509 496 return 0; 510 497 } 511 498 499 + static u8 dlid_to_selector(u16 dlid) 500 + { 501 + static u8 mapping[256]; 502 + static int initialized; 503 + static u8 next; 504 + int hash; 505 + 506 + if (!initialized) { 507 + memset(mapping, 0xFF, 256); 508 + initialized = 1; 509 + } 510 + 511 + hash = ((dlid >> 8) ^ dlid) & 0xFF; 512 + if (mapping[hash] == 0xFF) { 513 + mapping[hash] = next; 514 + next = (next + 1) & 0x7F; 515 + } 516 + 517 + return mapping[hash]; 518 + } 519 + 512 520 int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, 513 521 unsigned long dim, unsigned long *count) 514 522 { 515 - int ret = 0, i = 0; 523 + int ret = 0, i; 516 524 struct hfi1_filedata *fd = fp->private_data; 517 525 struct hfi1_ctxtdata *uctxt = fd->uctxt; 518 526 struct hfi1_user_sdma_pkt_q *pq = fd->pq; ··· 545 511 struct user_sdma_request *req; 546 512 u8 opcode, sc, vl; 547 513 int req_queued = 0; 514 + u16 dlid; 515 + u8 selector; 548 516 549 517 if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { 550 518 hfi1_cdbg( ··· 565 529 566 530 trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt, 567 531 (u16 *)&info); 568 - if (cq->comps[info.comp_idx].status == QUEUED || 569 - test_bit(SDMA_REQ_IN_USE, &pq->reqs[info.comp_idx].flags)) { 570 - hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state", 571 - dd->unit, uctxt->ctxt, fd->subctxt, 572 - info.comp_idx); 573 - return -EBADSLT; 532 + 533 + if (info.comp_idx >= hfi1_sdma_comp_ring_size) { 534 + hfi1_cdbg(SDMA, 535 + "[%u:%u:%u:%u] Invalid comp index", 536 + dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); 537 + return -EINVAL; 574 538 } 539 + 540 + /* 541 + * Sanity check the header io vector count. Need at least 1 vector 542 + * (header) and cannot be larger than the actual io vector count. 543 + */ 544 + if (req_iovcnt(info.ctrl) < 1 || req_iovcnt(info.ctrl) > dim) { 545 + hfi1_cdbg(SDMA, 546 + "[%u:%u:%u:%u] Invalid iov count %d, dim %ld", 547 + dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx, 548 + req_iovcnt(info.ctrl), dim); 549 + return -EINVAL; 550 + } 551 + 575 552 if (!info.fragsize) { 576 553 hfi1_cdbg(SDMA, 577 554 "[%u:%u:%u:%u] Request does not specify fragsize", 578 555 dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); 579 556 return -EINVAL; 580 557 } 558 + 559 + /* Try to claim the request. */ 560 + if (test_and_set_bit(info.comp_idx, pq->req_in_use)) { 561 + hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use", 562 + dd->unit, uctxt->ctxt, fd->subctxt, 563 + info.comp_idx); 564 + return -EBADSLT; 565 + } 581 566 /* 582 - * We've done all the safety checks that we can up to this point, 583 - * "allocate" the request entry. 567 + * All safety checks have been done and this request has been claimed. 584 568 */ 585 569 hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit, 586 570 uctxt->ctxt, fd->subctxt, info.comp_idx); 587 571 req = pq->reqs + info.comp_idx; 588 572 memset(req, 0, sizeof(*req)); 589 - /* Mark the request as IN_USE before we start filling it in. */ 590 - set_bit(SDMA_REQ_IN_USE, &req->flags); 591 - req->data_iovs = req_iovcnt(info.ctrl) - 1; 573 + req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ 592 574 req->pq = pq; 593 575 req->cq = cq; 594 576 req->status = -1; ··· 614 560 615 561 memcpy(&req->info, &info, sizeof(info)); 616 562 617 - if (req_opcode(info.ctrl) == EXPECTED) 563 + if (req_opcode(info.ctrl) == EXPECTED) { 564 + /* expected must have a TID info and at least one data vector */ 565 + if (req->data_iovs < 2) { 566 + SDMA_DBG(req, 567 + "Not enough vectors for expected request"); 568 + ret = -EINVAL; 569 + goto free_req; 570 + } 618 571 req->data_iovs--; 572 + } 619 573 620 574 if (!info.npkts || req->data_iovs > MAX_VECTORS_PER_REQ) { 621 575 SDMA_DBG(req, "Too many vectors (%u/%u)", req->data_iovs, 622 576 MAX_VECTORS_PER_REQ); 623 - return -EINVAL; 577 + ret = -EINVAL; 578 + goto free_req; 624 579 } 625 580 /* Copy the header from the user buffer */ 626 581 ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info), ··· 697 634 idx++; 698 635 699 636 /* Save all the IO vector structures */ 700 - while (i < req->data_iovs) { 637 + for (i = 0; i < req->data_iovs; i++) { 701 638 INIT_LIST_HEAD(&req->iovs[i].list); 702 639 memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec)); 703 640 ret = pin_vector_pages(req, &req->iovs[i]); ··· 705 642 req->status = ret; 706 643 goto free_req; 707 644 } 708 - req->data_len += req->iovs[i++].iov.iov_len; 645 + req->data_len += req->iovs[i].iov.iov_len; 709 646 } 710 647 SDMA_DBG(req, "total data length %u", req->data_len); 711 648 ··· 749 686 idx++; 750 687 } 751 688 689 + dlid = be16_to_cpu(req->hdr.lrh[1]); 690 + selector = dlid_to_selector(dlid); 691 + 752 692 /* Have to select the engine */ 753 693 req->sde = sdma_select_engine_vl(dd, 754 - (u32)(uctxt->ctxt + fd->subctxt), 694 + (u32)(uctxt->ctxt + fd->subctxt + 695 + selector), 755 696 vl); 756 697 if (!req->sde || !sdma_running(req->sde)) { 757 698 ret = -ECOMM; ··· 833 766 * The size of the data of the first packet is in the header 834 767 * template. However, it includes the header and ICRC, which need 835 768 * to be subtracted. 769 + * The minimum representable packet data length in a header is 4 bytes, 770 + * therefore, when the data length request is less than 4 bytes, there's 771 + * only one packet, and the packet data length is equal to that of the 772 + * request data length. 836 773 * The size of the remaining packets is the minimum of the frag 837 774 * size (MTU) or remaining data in the request. 838 775 */ 839 776 u32 len; 840 777 841 778 if (!req->seqnum) { 842 - len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) - 843 - (sizeof(tx->hdr) - 4)); 779 + if (req->data_len < sizeof(u32)) 780 + len = req->data_len; 781 + else 782 + len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) - 783 + (sizeof(tx->hdr) - 4)); 844 784 } else if (req_opcode(req->info.ctrl) == EXPECTED) { 845 785 u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) * 846 786 PAGE_SIZE; ··· 874 800 len = min(req->data_len - req->sent, (u32)req->info.fragsize); 875 801 } 876 802 SDMA_DBG(req, "Data Length = %u", len); 803 + return len; 804 + } 805 + 806 + static inline u32 pad_len(u32 len) 807 + { 808 + if (len & (sizeof(u32) - 1)) 809 + len += sizeof(u32) - (len & (sizeof(u32) - 1)); 877 810 return len; 878 811 } 879 812 ··· 975 894 if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) { 976 895 if (!req->seqnum) { 977 896 u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); 978 - u32 lrhlen = get_lrh_len(req->hdr, datalen); 897 + u32 lrhlen = get_lrh_len(req->hdr, 898 + pad_len(datalen)); 979 899 /* 980 900 * Copy the request header into the tx header 981 901 * because the HW needs a cacheline-aligned ··· 1130 1048 1131 1049 static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) 1132 1050 { 1133 - u32 cleared = 0; 1134 - struct sdma_mmu_node *node, *ptr; 1135 - struct list_head to_evict = LIST_HEAD_INIT(to_evict); 1051 + struct evict_data evict_data; 1136 1052 1137 - spin_lock(&pq->evict_lock); 1138 - list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) { 1139 - /* Make sure that no one is still using the node. */ 1140 - if (!atomic_read(&node->refcount)) { 1141 - set_bit(SDMA_CACHE_NODE_EVICT, &node->flags); 1142 - list_del_init(&node->list); 1143 - list_add(&node->list, &to_evict); 1144 - cleared += node->npages; 1145 - if (cleared >= npages) 1146 - break; 1147 - } 1148 - } 1149 - spin_unlock(&pq->evict_lock); 1150 - 1151 - list_for_each_entry_safe(node, ptr, &to_evict, list) 1152 - hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb); 1153 - 1154 - return cleared; 1053 + evict_data.cleared = 0; 1054 + evict_data.target = npages; 1055 + hfi1_mmu_rb_evict(pq->handler, &evict_data); 1056 + return evict_data.cleared; 1155 1057 } 1156 1058 1157 1059 static int pin_vector_pages(struct user_sdma_request *req, 1158 - struct user_sdma_iovec *iovec) { 1060 + struct user_sdma_iovec *iovec) 1061 + { 1159 1062 int ret = 0, pinned, npages, cleared; 1160 1063 struct page **pages; 1161 1064 struct hfi1_user_sdma_pkt_q *pq = req->pq; 1162 1065 struct sdma_mmu_node *node = NULL; 1163 1066 struct mmu_rb_node *rb_node; 1164 1067 1165 - rb_node = hfi1_mmu_rb_extract(&pq->sdma_rb_root, 1068 + rb_node = hfi1_mmu_rb_extract(pq->handler, 1166 1069 (unsigned long)iovec->iov.iov_base, 1167 1070 iovec->iov.iov_len); 1168 1071 if (rb_node && !IS_ERR(rb_node)) ··· 1163 1096 node->rb.addr = (unsigned long)iovec->iov.iov_base; 1164 1097 node->pq = pq; 1165 1098 atomic_set(&node->refcount, 0); 1166 - INIT_LIST_HEAD(&node->list); 1167 1099 } 1168 1100 1169 1101 npages = num_user_pages(&iovec->iov); ··· 1177 1111 1178 1112 npages -= node->npages; 1179 1113 1180 - /* 1181 - * If rb_node is NULL, it means that this is brand new node 1182 - * and, therefore not on the eviction list. 1183 - * If, however, the rb_node is non-NULL, it means that the 1184 - * node is already in RB tree and, therefore on the eviction 1185 - * list (nodes are unconditionally inserted in the eviction 1186 - * list). In that case, we have to remove the node prior to 1187 - * calling the eviction function in order to prevent it from 1188 - * freeing this node. 1189 - */ 1190 - if (rb_node) { 1191 - spin_lock(&pq->evict_lock); 1192 - list_del_init(&node->list); 1193 - spin_unlock(&pq->evict_lock); 1194 - } 1195 1114 retry: 1196 - if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) { 1115 + if (!hfi1_can_pin_pages(pq->dd, pq->mm, 1116 + atomic_read(&pq->n_locked), npages)) { 1197 1117 cleared = sdma_cache_evict(pq, npages); 1198 1118 if (cleared >= npages) 1199 1119 goto retry; 1200 1120 } 1201 - pinned = hfi1_acquire_user_pages( 1121 + pinned = hfi1_acquire_user_pages(pq->mm, 1202 1122 ((unsigned long)iovec->iov.iov_base + 1203 1123 (node->npages * PAGE_SIZE)), npages, 0, 1204 1124 pages + node->npages); ··· 1194 1142 goto bail; 1195 1143 } 1196 1144 if (pinned != npages) { 1197 - unpin_vector_pages(current->mm, pages, node->npages, 1145 + unpin_vector_pages(pq->mm, pages, node->npages, 1198 1146 pinned); 1199 1147 ret = -EFAULT; 1200 1148 goto bail; ··· 1204 1152 node->pages = pages; 1205 1153 node->npages += pinned; 1206 1154 npages = node->npages; 1207 - spin_lock(&pq->evict_lock); 1208 - list_add(&node->list, &pq->evict); 1209 - pq->n_locked += pinned; 1210 - spin_unlock(&pq->evict_lock); 1155 + atomic_add(pinned, &pq->n_locked); 1211 1156 } 1212 1157 iovec->pages = node->pages; 1213 1158 iovec->npages = npages; 1214 1159 iovec->node = node; 1215 1160 1216 - ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb); 1161 + ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb); 1217 1162 if (ret) { 1218 - spin_lock(&pq->evict_lock); 1219 - if (!list_empty(&node->list)) 1220 - list_del(&node->list); 1221 - pq->n_locked -= node->npages; 1222 - spin_unlock(&pq->evict_lock); 1163 + atomic_sub(node->npages, &pq->n_locked); 1164 + iovec->node = NULL; 1223 1165 goto bail; 1224 1166 } 1225 1167 return 0; 1226 1168 bail: 1227 1169 if (rb_node) 1228 - unpin_vector_pages(current->mm, node->pages, 0, node->npages); 1170 + unpin_vector_pages(pq->mm, node->pages, 0, node->npages); 1229 1171 kfree(node); 1230 1172 return ret; 1231 1173 } ··· 1227 1181 static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, 1228 1182 unsigned start, unsigned npages) 1229 1183 { 1230 - hfi1_release_user_pages(mm, pages + start, npages, 0); 1184 + hfi1_release_user_pages(mm, pages + start, npages, false); 1231 1185 kfree(pages); 1232 1186 } 1233 1187 ··· 1238 1192 /* 1239 1193 * Perform safety checks for any type of packet: 1240 1194 * - transfer size is multiple of 64bytes 1241 - * - packet length is multiple of 4bytes 1242 - * - entire request length is multiple of 4bytes 1195 + * - packet length is multiple of 4 bytes 1243 1196 * - packet length is not larger than MTU size 1244 1197 * 1245 1198 * These checks are only done for the first packet of the 1246 1199 * transfer since the header is "given" to us by user space. 1247 1200 * For the remainder of the packets we compute the values. 1248 1201 */ 1249 - if (req->info.fragsize % PIO_BLOCK_SIZE || 1250 - lrhlen & 0x3 || req->data_len & 0x3 || 1202 + if (req->info.fragsize % PIO_BLOCK_SIZE || lrhlen & 0x3 || 1251 1203 lrhlen > get_lrh_len(*hdr, req->info.fragsize)) 1252 1204 return -EINVAL; 1253 1205 ··· 1307 1263 struct hfi1_pkt_header *hdr = &tx->hdr; 1308 1264 u16 pbclen; 1309 1265 int ret; 1310 - u32 tidval = 0, lrhlen = get_lrh_len(*hdr, datalen); 1266 + u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); 1311 1267 1312 1268 /* Copy the header template to the request before modification */ 1313 1269 memcpy(hdr, &req->hdr, sizeof(*hdr)); ··· 1418 1374 struct hfi1_user_sdma_pkt_q *pq = req->pq; 1419 1375 struct hfi1_pkt_header *hdr = &req->hdr; 1420 1376 u16 pbclen = le16_to_cpu(hdr->pbc[0]); 1421 - u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, len); 1377 + u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len)); 1422 1378 1423 1379 if (PBC2LRH(pbclen) != lrhlen) { 1424 1380 /* PBC.PbcLengthDWs */ ··· 1578 1534 continue; 1579 1535 1580 1536 if (unpin) 1581 - hfi1_mmu_rb_remove(&req->pq->sdma_rb_root, 1537 + hfi1_mmu_rb_remove(req->pq->handler, 1582 1538 &node->rb); 1583 1539 else 1584 1540 atomic_dec(&node->refcount); 1585 1541 } 1586 1542 } 1587 1543 kfree(req->tids); 1588 - clear_bit(SDMA_REQ_IN_USE, &req->flags); 1544 + clear_bit(req->info.comp_idx, req->pq->req_in_use); 1589 1545 } 1590 1546 1591 1547 static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, ··· 1608 1564 return (bool)(node->addr == addr); 1609 1565 } 1610 1566 1611 - static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode) 1567 + static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode) 1612 1568 { 1613 1569 struct sdma_mmu_node *node = 1614 1570 container_of(mnode, struct sdma_mmu_node, rb); ··· 1617 1573 return 0; 1618 1574 } 1619 1575 1620 - static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode, 1621 - struct mm_struct *mm) 1576 + /* 1577 + * Return 1 to remove the node from the rb tree and call the remove op. 1578 + * 1579 + * Called with the rb tree lock held. 1580 + */ 1581 + static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, 1582 + void *evict_arg, bool *stop) 1583 + { 1584 + struct sdma_mmu_node *node = 1585 + container_of(mnode, struct sdma_mmu_node, rb); 1586 + struct evict_data *evict_data = evict_arg; 1587 + 1588 + /* is this node still being used? */ 1589 + if (atomic_read(&node->refcount)) 1590 + return 0; /* keep this node */ 1591 + 1592 + /* this node will be evicted, add its pages to our count */ 1593 + evict_data->cleared += node->npages; 1594 + 1595 + /* have enough pages been cleared? */ 1596 + if (evict_data->cleared >= evict_data->target) 1597 + *stop = true; 1598 + 1599 + return 1; /* remove this node */ 1600 + } 1601 + 1602 + static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) 1622 1603 { 1623 1604 struct sdma_mmu_node *node = 1624 1605 container_of(mnode, struct sdma_mmu_node, rb); 1625 1606 1626 - spin_lock(&node->pq->evict_lock); 1627 - /* 1628 - * We've been called by the MMU notifier but this node has been 1629 - * scheduled for eviction. The eviction function will take care 1630 - * of freeing this node. 1631 - * We have to take the above lock first because we are racing 1632 - * against the setting of the bit in the eviction function. 1633 - */ 1634 - if (mm && test_bit(SDMA_CACHE_NODE_EVICT, &node->flags)) { 1635 - spin_unlock(&node->pq->evict_lock); 1636 - return; 1637 - } 1607 + atomic_sub(node->npages, &node->pq->n_locked); 1638 1608 1639 - if (!list_empty(&node->list)) 1640 - list_del(&node->list); 1641 - node->pq->n_locked -= node->npages; 1642 - spin_unlock(&node->pq->evict_lock); 1609 + unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages); 1643 1610 1644 - /* 1645 - * If mm is set, we are being called by the MMU notifier and we 1646 - * should not pass a mm_struct to unpin_vector_page(). This is to 1647 - * prevent a deadlock when hfi1_release_user_pages() attempts to 1648 - * take the mmap_sem, which the MMU notifier has already taken. 1649 - */ 1650 - unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0, 1651 - node->npages); 1652 - /* 1653 - * If called by the MMU notifier, we have to adjust the pinned 1654 - * page count ourselves. 1655 - */ 1656 - if (mm) 1657 - mm->pinned_vm -= node->npages; 1658 1611 kfree(node); 1659 1612 } 1660 1613 1661 - static int sdma_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode) 1614 + static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode) 1662 1615 { 1663 1616 struct sdma_mmu_node *node = 1664 1617 container_of(mnode, struct sdma_mmu_node, rb);
+4 -4
drivers/infiniband/hw/hfi1/user_sdma.h
··· 63 63 struct hfi1_devdata *dd; 64 64 struct kmem_cache *txreq_cache; 65 65 struct user_sdma_request *reqs; 66 + unsigned long *req_in_use; 66 67 struct iowait busy; 67 68 unsigned state; 68 69 wait_queue_head_t wait; 69 70 unsigned long unpinned; 70 - struct rb_root sdma_rb_root; 71 - u32 n_locked; 72 - struct list_head evict; 73 - spinlock_t evict_lock; /* protect evict and n_locked */ 71 + struct mmu_rb_handler *handler; 72 + atomic_t n_locked; 73 + struct mm_struct *mm; 74 74 }; 75 75 76 76 struct hfi1_user_sdma_comp_q {
+42 -32
drivers/infiniband/hw/hfi1/verbs.c
··· 306 306 [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, 307 307 [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, 308 308 [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, 309 - [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD 309 + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, 310 + [IB_WR_SEND_WITH_INV] = IB_WC_SEND, 311 + [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, 312 + [IB_WR_REG_MR] = IB_WC_REG_MR 310 313 }; 311 314 312 315 /* ··· 381 378 [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = &hfi1_rc_rcv, 382 379 [IB_OPCODE_RC_COMPARE_SWAP] = &hfi1_rc_rcv, 383 380 [IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv, 381 + [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv, 382 + [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv, 384 383 /* UC */ 385 384 [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv, 386 385 [IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv, ··· 545 540 /* 546 541 * Make sure the QP is ready and able to accept the given opcode. 547 542 */ 548 - static inline int qp_ok(int opcode, struct hfi1_packet *packet) 543 + static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) 549 544 { 550 - struct hfi1_ibport *ibp; 551 - 552 545 if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) 553 - goto dropit; 546 + return NULL; 554 547 if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) || 555 548 (opcode == IB_OPCODE_CNP)) 556 - return 1; 557 - dropit: 558 - ibp = &packet->rcd->ppd->ibport_data; 559 - ibp->rvp.n_pkt_drops++; 560 - return 0; 549 + return opcode_handler_tbl[opcode]; 550 + 551 + return NULL; 561 552 } 562 553 563 554 /** ··· 572 571 struct hfi1_pportdata *ppd = rcd->ppd; 573 572 struct hfi1_ibport *ibp = &ppd->ibport_data; 574 573 struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; 574 + opcode_handler packet_handler; 575 575 unsigned long flags; 576 576 u32 qp_num; 577 577 int lnh; ··· 618 616 list_for_each_entry_rcu(p, &mcast->qp_list, list) { 619 617 packet->qp = p->qp; 620 618 spin_lock_irqsave(&packet->qp->r_lock, flags); 621 - if (likely((qp_ok(opcode, packet)))) 622 - opcode_handler_tbl[opcode](packet); 619 + packet_handler = qp_ok(opcode, packet); 620 + if (likely(packet_handler)) 621 + packet_handler(packet); 622 + else 623 + ibp->rvp.n_pkt_drops++; 623 624 spin_unlock_irqrestore(&packet->qp->r_lock, flags); 624 625 } 625 626 /* ··· 639 634 goto drop; 640 635 } 641 636 spin_lock_irqsave(&packet->qp->r_lock, flags); 642 - if (likely((qp_ok(opcode, packet)))) 643 - opcode_handler_tbl[opcode](packet); 637 + packet_handler = qp_ok(opcode, packet); 638 + if (likely(packet_handler)) 639 + packet_handler(packet); 640 + else 641 + ibp->rvp.n_pkt_drops++; 644 642 spin_unlock_irqrestore(&packet->qp->r_lock, flags); 645 643 rcu_read_unlock(); 646 644 } ··· 816 808 struct rvt_sge_state *ss, 817 809 u32 length, 818 810 struct verbs_txreq *tx, 819 - struct ahg_ib_header *ahdr, 811 + struct hfi1_ahg_info *ahg_info, 820 812 u64 pbc) 821 813 { 822 814 int ret = 0; 823 - struct hfi1_pio_header *phdr = &tx->phdr; 815 + struct hfi1_sdma_header *phdr = &tx->phdr; 824 816 u16 hdrbytes = tx->hdr_dwords << 2; 825 817 826 - if (!ahdr->ahgcount) { 818 + if (!ahg_info->ahgcount) { 827 819 ret = sdma_txinit_ahg( 828 820 &tx->txreq, 829 - ahdr->tx_flags, 821 + ahg_info->tx_flags, 830 822 hdrbytes + length, 831 - ahdr->ahgidx, 823 + ahg_info->ahgidx, 832 824 0, 833 825 NULL, 834 826 0, ··· 846 838 } else { 847 839 ret = sdma_txinit_ahg( 848 840 &tx->txreq, 849 - ahdr->tx_flags, 841 + ahg_info->tx_flags, 850 842 length, 851 - ahdr->ahgidx, 852 - ahdr->ahgcount, 853 - ahdr->ahgdesc, 843 + ahg_info->ahgidx, 844 + ahg_info->ahgcount, 845 + ahg_info->ahgdesc, 854 846 hdrbytes, 855 847 verbs_sdma_complete); 856 848 if (ret) ··· 868 860 u64 pbc) 869 861 { 870 862 struct hfi1_qp_priv *priv = qp->priv; 871 - struct ahg_ib_header *ahdr = priv->s_hdr; 863 + struct hfi1_ahg_info *ahg_info = priv->s_ahg; 872 864 u32 hdrwords = qp->s_hdrwords; 873 865 struct rvt_sge_state *ss = qp->s_cur_sge; 874 866 u32 len = qp->s_cur_size; ··· 896 888 plen); 897 889 } 898 890 tx->wqe = qp->s_wqe; 899 - ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc); 891 + ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc); 900 892 if (unlikely(ret)) 901 893 goto bail_build; 902 894 } ··· 1308 1300 rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | 1309 1301 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | 1310 1302 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | 1311 - IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; 1303 + IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | 1304 + IB_DEVICE_MEM_MGT_EXTENSIONS; 1312 1305 rdi->dparms.props.page_size_cap = PAGE_SIZE; 1313 1306 rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; 1314 1307 rdi->dparms.props.vendor_part_id = dd->pcidev->device; 1315 1308 rdi->dparms.props.hw_ver = dd->minrev; 1316 1309 rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid; 1317 - rdi->dparms.props.max_mr_size = ~0ULL; 1310 + rdi->dparms.props.max_mr_size = U64_MAX; 1311 + rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; 1318 1312 rdi->dparms.props.max_qp = hfi1_max_qps; 1319 1313 rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; 1320 1314 rdi->dparms.props.max_sge = hfi1_max_sges; ··· 1705 1695 dd->verbs_dev.rdi.dparms.nports = dd->num_pports; 1706 1696 dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); 1707 1697 1698 + /* post send table */ 1699 + dd->verbs_dev.rdi.post_parms = hfi1_post_parms; 1700 + 1708 1701 ppd = dd->pport; 1709 1702 for (i = 0; i < dd->num_pports; i++, ppd++) 1710 1703 rvt_init_port(&dd->verbs_dev.rdi, ··· 1758 1745 struct rvt_qp *qp = packet->qp; 1759 1746 u32 lqpn, rqpn = 0; 1760 1747 u16 rlid = 0; 1761 - u8 sl, sc5, sc4_bit, svc_type; 1762 - bool sc4_set = has_sc4_bit(packet); 1748 + u8 sl, sc5, svc_type; 1763 1749 1764 1750 switch (packet->qp->ibqp.qp_type) { 1765 1751 case IB_QPT_UC: ··· 1781 1769 return; 1782 1770 } 1783 1771 1784 - sc4_bit = sc4_set << 4; 1785 - sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; 1786 - sc5 |= sc4_bit; 1772 + sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); 1787 1773 sl = ibp->sc_to_sl[sc5]; 1788 1774 lqpn = qp->ibqp.qp_num; 1789 1775
+3 -5
drivers/infiniband/hw/hfi1/verbs.h
··· 178 178 } u; 179 179 } __packed; 180 180 181 - struct ahg_ib_header { 182 - struct sdma_engine *sde; 181 + struct hfi1_ahg_info { 183 182 u32 ahgdesc[2]; 184 183 u16 tx_flags; 185 184 u8 ahgcount; 186 185 u8 ahgidx; 187 - struct hfi1_ib_header ibh; 188 186 }; 189 187 190 - struct hfi1_pio_header { 188 + struct hfi1_sdma_header { 191 189 __le64 pbc; 192 190 struct hfi1_ib_header hdr; 193 191 } __packed; ··· 195 197 * pair is made common 196 198 */ 197 199 struct hfi1_qp_priv { 198 - struct ahg_ib_header *s_hdr; /* next header to send */ 200 + struct hfi1_ahg_info *s_ahg; /* ahg info for next header */ 199 201 struct sdma_engine *s_sde; /* current sde */ 200 202 struct send_context *s_sendcontext; /* current sendcontext */ 201 203 u8 s_sc; /* SC[0..4] for next packet */
+1 -1
drivers/infiniband/hw/hfi1/verbs_txreq.h
··· 56 56 #include "iowait.h" 57 57 58 58 struct verbs_txreq { 59 - struct hfi1_pio_header phdr; 59 + struct hfi1_sdma_header phdr; 60 60 struct sdma_txreq txreq; 61 61 struct rvt_qp *qp; 62 62 struct rvt_swqe *wqe;
+43
drivers/infiniband/hw/qib/qib_qp.c
··· 106 106 32768 /* 1E */ 107 107 }; 108 108 109 + const struct rvt_operation_params qib_post_parms[RVT_OPERATION_MAX] = { 110 + [IB_WR_RDMA_WRITE] = { 111 + .length = sizeof(struct ib_rdma_wr), 112 + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 113 + }, 114 + 115 + [IB_WR_RDMA_READ] = { 116 + .length = sizeof(struct ib_rdma_wr), 117 + .qpt_support = BIT(IB_QPT_RC), 118 + .flags = RVT_OPERATION_ATOMIC, 119 + }, 120 + 121 + [IB_WR_ATOMIC_CMP_AND_SWP] = { 122 + .length = sizeof(struct ib_atomic_wr), 123 + .qpt_support = BIT(IB_QPT_RC), 124 + .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, 125 + }, 126 + 127 + [IB_WR_ATOMIC_FETCH_AND_ADD] = { 128 + .length = sizeof(struct ib_atomic_wr), 129 + .qpt_support = BIT(IB_QPT_RC), 130 + .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, 131 + }, 132 + 133 + [IB_WR_RDMA_WRITE_WITH_IMM] = { 134 + .length = sizeof(struct ib_rdma_wr), 135 + .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 136 + }, 137 + 138 + [IB_WR_SEND] = { 139 + .length = sizeof(struct ib_send_wr), 140 + .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | 141 + BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 142 + }, 143 + 144 + [IB_WR_SEND_WITH_IMM] = { 145 + .length = sizeof(struct ib_send_wr), 146 + .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | 147 + BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 148 + }, 149 + 150 + }; 151 + 109 152 static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, 110 153 gfp_t gfp) 111 154 {
+6 -2
drivers/infiniband/hw/qib/qib_ud.c
··· 169 169 } 170 170 171 171 if (ah_attr->ah_flags & IB_AH_GRH) { 172 - qib_copy_sge(&qp->r_sge, &ah_attr->grh, 173 - sizeof(struct ib_grh), 1); 172 + struct ib_grh grh; 173 + struct ib_global_route grd = ah_attr->grh; 174 + 175 + qib_make_grh(ibp, &grh, &grd, 0, 0); 176 + qib_copy_sge(&qp->r_sge, &grh, 177 + sizeof(grh), 1); 174 178 wc.wc_flags |= IB_WC_GRH; 175 179 } else 176 180 qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+2
drivers/infiniband/hw/qib/qib_verbs.c
··· 1582 1582 rdi->dparms.props.max_total_mcast_qp_attach = 1583 1583 rdi->dparms.props.max_mcast_qp_attach * 1584 1584 rdi->dparms.props.max_mcast_grp; 1585 + /* post send table */ 1586 + dd->verbs_dev.rdi.post_parms = qib_post_parms; 1585 1587 } 1586 1588 1587 1589 /**
+2
drivers/infiniband/hw/qib/qib_verbs.h
··· 497 497 498 498 extern const u32 ib_qib_rnr_table[]; 499 499 500 + extern const struct rvt_operation_params qib_post_parms[]; 501 + 500 502 #endif /* QIB_VERBS_H */
+1
drivers/infiniband/sw/rdmavt/cq.c
··· 510 510 511 511 if (rdi->worker) 512 512 return 0; 513 + spin_lock_init(&rdi->n_cqs_lock); 513 514 rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); 514 515 if (!rdi->worker) 515 516 return -ENOMEM;
+122 -2
drivers/infiniband/sw/rdmavt/mr.c
··· 140 140 init_completion(&mr->comp); 141 141 /* count returning the ptr to user */ 142 142 atomic_set(&mr->refcount, 1); 143 + atomic_set(&mr->lkey_invalid, 0); 143 144 mr->pd = pd; 144 145 mr->max_segs = count; 145 146 return 0; ··· 481 480 } 482 481 483 482 /** 483 + * rvt_set_page - page assignment function called by ib_sg_to_pages 484 + * @ibmr: memory region 485 + * @addr: dma address of mapped page 486 + * 487 + * Return: 0 on success 488 + */ 489 + static int rvt_set_page(struct ib_mr *ibmr, u64 addr) 490 + { 491 + struct rvt_mr *mr = to_imr(ibmr); 492 + u32 ps = 1 << mr->mr.page_shift; 493 + u32 mapped_segs = mr->mr.length >> mr->mr.page_shift; 494 + int m, n; 495 + 496 + if (unlikely(mapped_segs == mr->mr.max_segs)) 497 + return -ENOMEM; 498 + 499 + if (mr->mr.length == 0) { 500 + mr->mr.user_base = addr; 501 + mr->mr.iova = addr; 502 + } 503 + 504 + m = mapped_segs / RVT_SEGSZ; 505 + n = mapped_segs % RVT_SEGSZ; 506 + mr->mr.map[m]->segs[n].vaddr = (void *)addr; 507 + mr->mr.map[m]->segs[n].length = ps; 508 + mr->mr.length += ps; 509 + 510 + return 0; 511 + } 512 + 513 + /** 514 + * rvt_map_mr_sg - map sg list and set it the memory region 515 + * @ibmr: memory region 516 + * @sg: dma mapped scatterlist 517 + * @sg_nents: number of entries in sg 518 + * @sg_offset: offset in bytes into sg 519 + * 520 + * Return: number of sg elements mapped to the memory region 521 + */ 522 + int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 523 + int sg_nents, unsigned int *sg_offset) 524 + { 525 + struct rvt_mr *mr = to_imr(ibmr); 526 + 527 + mr->mr.length = 0; 528 + mr->mr.page_shift = PAGE_SHIFT; 529 + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 530 + rvt_set_page); 531 + } 532 + 533 + /** 534 + * rvt_fast_reg_mr - fast register physical MR 535 + * @qp: the queue pair where the work request comes from 536 + * @ibmr: the memory region to be registered 537 + * @key: updated key for this memory region 538 + * @access: access flags for this memory region 539 + * 540 + * Returns 0 on success. 541 + */ 542 + int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, 543 + int access) 544 + { 545 + struct rvt_mr *mr = to_imr(ibmr); 546 + 547 + if (qp->ibqp.pd != mr->mr.pd) 548 + return -EACCES; 549 + 550 + /* not applicable to dma MR or user MR */ 551 + if (!mr->mr.lkey || mr->umem) 552 + return -EINVAL; 553 + 554 + if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00)) 555 + return -EINVAL; 556 + 557 + ibmr->lkey = key; 558 + ibmr->rkey = key; 559 + mr->mr.lkey = key; 560 + mr->mr.access_flags = access; 561 + atomic_set(&mr->mr.lkey_invalid, 0); 562 + 563 + return 0; 564 + } 565 + EXPORT_SYMBOL(rvt_fast_reg_mr); 566 + 567 + /** 568 + * rvt_invalidate_rkey - invalidate an MR rkey 569 + * @qp: queue pair associated with the invalidate op 570 + * @rkey: rkey to invalidate 571 + * 572 + * Returns 0 on success. 573 + */ 574 + int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey) 575 + { 576 + struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); 577 + struct rvt_lkey_table *rkt = &dev->lkey_table; 578 + struct rvt_mregion *mr; 579 + 580 + if (rkey == 0) 581 + return -EINVAL; 582 + 583 + rcu_read_lock(); 584 + mr = rcu_dereference( 585 + rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); 586 + if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 587 + goto bail; 588 + 589 + atomic_set(&mr->lkey_invalid, 1); 590 + rcu_read_unlock(); 591 + return 0; 592 + 593 + bail: 594 + rcu_read_unlock(); 595 + return -EINVAL; 596 + } 597 + EXPORT_SYMBOL(rvt_invalidate_rkey); 598 + 599 + /** 484 600 * rvt_alloc_fmr - allocate a fast memory region 485 601 * @pd: the protection domain for this memory region 486 602 * @mr_access_flags: access flags for this memory region ··· 800 682 } 801 683 mr = rcu_dereference( 802 684 rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]); 803 - if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) 685 + if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || 686 + mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) 804 687 goto bail; 805 688 806 689 off = sge->addr - mr->user_base; ··· 901 782 902 783 mr = rcu_dereference( 903 784 rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); 904 - if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 785 + if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || 786 + mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 905 787 goto bail; 906 788 907 789 off = vaddr - mr->iova;
+2
drivers/infiniband/sw/rdmavt/mr.h
··· 82 82 struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, 83 83 enum ib_mr_type mr_type, 84 84 u32 max_num_sg); 85 + int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 86 + int sg_nents, unsigned int *sg_offset); 85 87 struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, 86 88 struct ib_fmr_attr *fmr_attr); 87 89 int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+181 -65
drivers/infiniband/sw/rdmavt/qp.c
··· 435 435 for (n = 0; n < rvt_max_atomic(rdi); n++) { 436 436 struct rvt_ack_entry *e = &qp->s_ack_queue[n]; 437 437 438 - if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && 439 - e->rdma_sge.mr) { 438 + if (e->rdma_sge.mr) { 440 439 rvt_put_mr(e->rdma_sge.mr); 441 440 e->rdma_sge.mr = NULL; 442 441 } ··· 583 584 qp->r_rq.wq->tail = 0; 584 585 } 585 586 qp->r_sge.num_sge = 0; 587 + atomic_set(&qp->s_reserved_used, 0); 586 588 } 587 589 588 590 /** ··· 613 613 struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); 614 614 void *priv = NULL; 615 615 gfp_t gfp; 616 + size_t sqsize; 616 617 617 618 if (!rdi) 618 619 return ERR_PTR(-EINVAL); ··· 644 643 init_attr->cap.max_recv_wr == 0) 645 644 return ERR_PTR(-EINVAL); 646 645 } 647 - 646 + sqsize = 647 + init_attr->cap.max_send_wr + 1 + 648 + rdi->dparms.reserved_operations; 648 649 switch (init_attr->qp_type) { 649 650 case IB_QPT_SMI: 650 651 case IB_QPT_GSI: ··· 661 658 sizeof(struct rvt_swqe); 662 659 if (gfp == GFP_NOIO) 663 660 swq = __vmalloc( 664 - (init_attr->cap.max_send_wr + 1) * sz, 661 + sqsize * sz, 665 662 gfp | __GFP_ZERO, PAGE_KERNEL); 666 663 else 667 664 swq = vzalloc_node( 668 - (init_attr->cap.max_send_wr + 1) * sz, 665 + sqsize * sz, 669 666 rdi->dparms.node); 670 667 if (!swq) 671 668 return ERR_PTR(-ENOMEM); ··· 744 741 spin_lock_init(&qp->s_lock); 745 742 spin_lock_init(&qp->r_rq.lock); 746 743 atomic_set(&qp->refcount, 0); 744 + atomic_set(&qp->local_ops_pending, 0); 747 745 init_waitqueue_head(&qp->wait); 748 746 init_timer(&qp->s_timer); 749 747 qp->s_timer.data = (unsigned long)qp; 750 748 INIT_LIST_HEAD(&qp->rspwait); 751 749 qp->state = IB_QPS_RESET; 752 750 qp->s_wq = swq; 753 - qp->s_size = init_attr->cap.max_send_wr + 1; 751 + qp->s_size = sqsize; 754 752 qp->s_avail = init_attr->cap.max_send_wr; 755 753 qp->s_max_sge = init_attr->cap.max_send_sge; 756 754 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) ··· 1336 1332 attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask; 1337 1333 attr->dest_qp_num = qp->remote_qpn; 1338 1334 attr->qp_access_flags = qp->qp_access_flags; 1339 - attr->cap.max_send_wr = qp->s_size - 1; 1335 + attr->cap.max_send_wr = qp->s_size - 1 - 1336 + rdi->dparms.reserved_operations; 1340 1337 attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; 1341 1338 attr->cap.max_send_sge = qp->s_max_sge; 1342 1339 attr->cap.max_recv_sge = qp->r_rq.max_sge; ··· 1445 1440 } 1446 1441 1447 1442 /** 1448 - * qp_get_savail - return number of avail send entries 1449 - * 1443 + * rvt_qp_valid_operation - validate post send wr request 1450 1444 * @qp - the qp 1445 + * @post-parms - the post send table for the driver 1446 + * @wr - the work request 1447 + * 1448 + * The routine validates the operation based on the 1449 + * validation table an returns the length of the operation 1450 + * which can extend beyond the ib_send_bw. Operation 1451 + * dependent flags key atomic operation validation. 1452 + * 1453 + * There is an exception for UD qps that validates the pd and 1454 + * overrides the length to include the additional UD specific 1455 + * length. 1456 + * 1457 + * Returns a negative error or the length of the work request 1458 + * for building the swqe. 1459 + */ 1460 + static inline int rvt_qp_valid_operation( 1461 + struct rvt_qp *qp, 1462 + const struct rvt_operation_params *post_parms, 1463 + struct ib_send_wr *wr) 1464 + { 1465 + int len; 1466 + 1467 + if (wr->opcode >= RVT_OPERATION_MAX || !post_parms[wr->opcode].length) 1468 + return -EINVAL; 1469 + if (!(post_parms[wr->opcode].qpt_support & BIT(qp->ibqp.qp_type))) 1470 + return -EINVAL; 1471 + if ((post_parms[wr->opcode].flags & RVT_OPERATION_PRIV) && 1472 + ibpd_to_rvtpd(qp->ibqp.pd)->user) 1473 + return -EINVAL; 1474 + if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC_SGE && 1475 + (wr->num_sge == 0 || 1476 + wr->sg_list[0].length < sizeof(u64) || 1477 + wr->sg_list[0].addr & (sizeof(u64) - 1))) 1478 + return -EINVAL; 1479 + if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC && 1480 + !qp->s_max_rd_atomic) 1481 + return -EINVAL; 1482 + len = post_parms[wr->opcode].length; 1483 + /* UD specific */ 1484 + if (qp->ibqp.qp_type != IB_QPT_UC && 1485 + qp->ibqp.qp_type != IB_QPT_RC) { 1486 + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) 1487 + return -EINVAL; 1488 + len = sizeof(struct ib_ud_wr); 1489 + } 1490 + return len; 1491 + } 1492 + 1493 + /** 1494 + * rvt_qp_is_avail - determine queue capacity 1495 + * @qp - the qp 1496 + * @rdi - the rdmavt device 1497 + * @reserved_op - is reserved operation 1451 1498 * 1452 1499 * This assumes the s_hlock is held but the s_last 1453 1500 * qp variable is uncontrolled. 1501 + * 1502 + * For non reserved operations, the qp->s_avail 1503 + * may be changed. 1504 + * 1505 + * The return value is zero or a -ENOMEM. 1454 1506 */ 1455 - static inline u32 qp_get_savail(struct rvt_qp *qp) 1507 + static inline int rvt_qp_is_avail( 1508 + struct rvt_qp *qp, 1509 + struct rvt_dev_info *rdi, 1510 + bool reserved_op) 1456 1511 { 1457 1512 u32 slast; 1458 - u32 ret; 1513 + u32 avail; 1514 + u32 reserved_used; 1459 1515 1516 + /* see rvt_qp_wqe_unreserve() */ 1517 + smp_mb__before_atomic(); 1518 + reserved_used = atomic_read(&qp->s_reserved_used); 1519 + if (unlikely(reserved_op)) { 1520 + /* see rvt_qp_wqe_unreserve() */ 1521 + smp_mb__before_atomic(); 1522 + if (reserved_used >= rdi->dparms.reserved_operations) 1523 + return -ENOMEM; 1524 + return 0; 1525 + } 1526 + /* non-reserved operations */ 1527 + if (likely(qp->s_avail)) 1528 + return 0; 1460 1529 smp_read_barrier_depends(); /* see rc.c */ 1461 1530 slast = ACCESS_ONCE(qp->s_last); 1462 1531 if (qp->s_head >= slast) 1463 - ret = qp->s_size - (qp->s_head - slast); 1532 + avail = qp->s_size - (qp->s_head - slast); 1464 1533 else 1465 - ret = slast - qp->s_head; 1466 - return ret - 1; 1534 + avail = slast - qp->s_head; 1535 + 1536 + /* see rvt_qp_wqe_unreserve() */ 1537 + smp_mb__before_atomic(); 1538 + reserved_used = atomic_read(&qp->s_reserved_used); 1539 + avail = avail - 1 - 1540 + (rdi->dparms.reserved_operations - reserved_used); 1541 + /* insure we don't assign a negative s_avail */ 1542 + if ((s32)avail <= 0) 1543 + return -ENOMEM; 1544 + qp->s_avail = avail; 1545 + if (WARN_ON(qp->s_avail > 1546 + (qp->s_size - 1 - rdi->dparms.reserved_operations))) 1547 + rvt_pr_err(rdi, 1548 + "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u", 1549 + qp->ibqp.qp_num, qp->s_size, qp->s_avail, 1550 + qp->s_head, qp->s_tail, qp->s_cur, 1551 + qp->s_acked, qp->s_last); 1552 + return 0; 1467 1553 } 1468 1554 1469 1555 /** ··· 1576 1480 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); 1577 1481 u8 log_pmtu; 1578 1482 int ret; 1483 + size_t cplen; 1484 + bool reserved_op; 1485 + int local_ops_delayed = 0; 1486 + 1487 + BUILD_BUG_ON(IB_QPT_MAX >= (sizeof(u32) * BITS_PER_BYTE)); 1579 1488 1580 1489 /* IB spec says that num_sge == 0 is OK. */ 1581 1490 if (unlikely(wr->num_sge > qp->s_max_sge)) 1582 1491 return -EINVAL; 1583 1492 1493 + ret = rvt_qp_valid_operation(qp, rdi->post_parms, wr); 1494 + if (ret < 0) 1495 + return ret; 1496 + cplen = ret; 1497 + 1584 1498 /* 1585 - * Don't allow RDMA reads or atomic operations on UC or 1586 - * undefined operations. 1587 - * Make sure buffer is large enough to hold the result for atomics. 1499 + * Local operations include fast register and local invalidate. 1500 + * Fast register needs to be processed immediately because the 1501 + * registered lkey may be used by following work requests and the 1502 + * lkey needs to be valid at the time those requests are posted. 1503 + * Local invalidate can be processed immediately if fencing is 1504 + * not required and no previous local invalidate ops are pending. 1505 + * Signaled local operations that have been processed immediately 1506 + * need to have requests with "completion only" flags set posted 1507 + * to the send queue in order to generate completions. 1588 1508 */ 1589 - if (qp->ibqp.qp_type == IB_QPT_UC) { 1590 - if ((unsigned)wr->opcode >= IB_WR_RDMA_READ) 1509 + if ((rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL)) { 1510 + switch (wr->opcode) { 1511 + case IB_WR_REG_MR: 1512 + ret = rvt_fast_reg_mr(qp, 1513 + reg_wr(wr)->mr, 1514 + reg_wr(wr)->key, 1515 + reg_wr(wr)->access); 1516 + if (ret || !(wr->send_flags & IB_SEND_SIGNALED)) 1517 + return ret; 1518 + break; 1519 + case IB_WR_LOCAL_INV: 1520 + if ((wr->send_flags & IB_SEND_FENCE) || 1521 + atomic_read(&qp->local_ops_pending)) { 1522 + local_ops_delayed = 1; 1523 + } else { 1524 + ret = rvt_invalidate_rkey( 1525 + qp, wr->ex.invalidate_rkey); 1526 + if (ret || !(wr->send_flags & IB_SEND_SIGNALED)) 1527 + return ret; 1528 + } 1529 + break; 1530 + default: 1591 1531 return -EINVAL; 1592 - } else if (qp->ibqp.qp_type != IB_QPT_RC) { 1593 - /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ 1594 - if (wr->opcode != IB_WR_SEND && 1595 - wr->opcode != IB_WR_SEND_WITH_IMM) 1596 - return -EINVAL; 1597 - /* Check UD destination address PD */ 1598 - if (qp->ibqp.pd != ud_wr(wr)->ah->pd) 1599 - return -EINVAL; 1600 - } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) { 1601 - return -EINVAL; 1602 - } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && 1603 - (wr->num_sge == 0 || 1604 - wr->sg_list[0].length < sizeof(u64) || 1605 - wr->sg_list[0].addr & (sizeof(u64) - 1))) { 1606 - return -EINVAL; 1607 - } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { 1608 - return -EINVAL; 1532 + } 1609 1533 } 1534 + 1535 + reserved_op = rdi->post_parms[wr->opcode].flags & 1536 + RVT_OPERATION_USE_RESERVE; 1610 1537 /* check for avail */ 1611 - if (unlikely(!qp->s_avail)) { 1612 - qp->s_avail = qp_get_savail(qp); 1613 - if (WARN_ON(qp->s_avail > (qp->s_size - 1))) 1614 - rvt_pr_err(rdi, 1615 - "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u", 1616 - qp->ibqp.qp_num, qp->s_size, qp->s_avail, 1617 - qp->s_head, qp->s_tail, qp->s_cur, 1618 - qp->s_acked, qp->s_last); 1619 - if (!qp->s_avail) 1620 - return -ENOMEM; 1621 - } 1538 + ret = rvt_qp_is_avail(qp, rdi, reserved_op); 1539 + if (ret) 1540 + return ret; 1622 1541 next = qp->s_head + 1; 1623 1542 if (next >= qp->s_size) 1624 1543 next = 0; ··· 1642 1531 pd = ibpd_to_rvtpd(qp->ibqp.pd); 1643 1532 wqe = rvt_get_swqe_ptr(qp, qp->s_head); 1644 1533 1645 - if (qp->ibqp.qp_type != IB_QPT_UC && 1646 - qp->ibqp.qp_type != IB_QPT_RC) 1647 - memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); 1648 - else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || 1649 - wr->opcode == IB_WR_RDMA_WRITE || 1650 - wr->opcode == IB_WR_RDMA_READ) 1651 - memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); 1652 - else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || 1653 - wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) 1654 - memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); 1655 - else 1656 - memcpy(&wqe->wr, wr, sizeof(wqe->wr)); 1534 + /* cplen has length from above */ 1535 + memcpy(&wqe->wr, wr, cplen); 1657 1536 1658 1537 wqe->length = 0; 1659 1538 j = 0; ··· 1686 1585 atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); 1687 1586 } 1688 1587 1689 - wqe->ssn = qp->s_ssn++; 1690 - wqe->psn = qp->s_next_psn; 1691 - wqe->lpsn = wqe->psn + 1692 - (wqe->length ? ((wqe->length - 1) >> log_pmtu) : 0); 1693 - qp->s_next_psn = wqe->lpsn + 1; 1588 + if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) { 1589 + if (local_ops_delayed) 1590 + atomic_inc(&qp->local_ops_pending); 1591 + else 1592 + wqe->wr.send_flags |= RVT_SEND_COMPLETION_ONLY; 1593 + wqe->ssn = 0; 1594 + wqe->psn = 0; 1595 + wqe->lpsn = 0; 1596 + } else { 1597 + wqe->ssn = qp->s_ssn++; 1598 + wqe->psn = qp->s_next_psn; 1599 + wqe->lpsn = wqe->psn + 1600 + (wqe->length ? 1601 + ((wqe->length - 1) >> log_pmtu) : 1602 + 0); 1603 + qp->s_next_psn = wqe->lpsn + 1; 1604 + } 1694 1605 trace_rvt_post_one_wr(qp, wqe); 1606 + if (unlikely(reserved_op)) 1607 + rvt_qp_wqe_reserve(qp, wqe); 1608 + else 1609 + qp->s_avail--; 1695 1610 smp_wmb(); /* see request builders */ 1696 - qp->s_avail--; 1697 1611 qp->s_head = next; 1698 1612 1699 1613 return 0;
+9 -1
drivers/infiniband/sw/rdmavt/vt.c
··· 370 370 REG_USER_MR, 371 371 DEREG_MR, 372 372 ALLOC_MR, 373 + MAP_MR_SG, 373 374 ALLOC_FMR, 374 375 MAP_PHYS_FMR, 375 376 UNMAP_FMR, ··· 529 528 post_send), 530 529 rvt_post_send)) 531 530 if (!rdi->driver_f.schedule_send || 532 - !rdi->driver_f.do_send) 531 + !rdi->driver_f.do_send || 532 + !rdi->post_parms) 533 533 return -EINVAL; 534 534 break; 535 535 ··· 633 631 check_driver_override(rdi, offsetof(struct ib_device, 634 632 alloc_mr), 635 633 rvt_alloc_mr); 634 + break; 635 + 636 + case MAP_MR_SG: 637 + check_driver_override(rdi, offsetof(struct ib_device, 638 + map_mr_sg), 639 + rvt_map_mr_sg); 636 640 break; 637 641 638 642 case MAP_PHYS_FMR:
-2
drivers/infiniband/ulp/isert/ib_isert.c
··· 137 137 attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; 138 138 attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX; 139 139 attr.cap.max_send_sge = device->ib_device->attrs.max_sge; 140 - isert_conn->max_sge = min(device->ib_device->attrs.max_sge, 141 - device->ib_device->attrs.max_sge_rd); 142 140 attr.cap.max_recv_sge = 1; 143 141 attr.sq_sig_type = IB_SIGNAL_REQ_WR; 144 142 attr.qp_type = IB_QPT_RC;
-1
drivers/infiniband/ulp/isert/ib_isert.h
··· 138 138 u32 responder_resources; 139 139 u32 initiator_depth; 140 140 bool pi_support; 141 - u32 max_sge; 142 141 struct iser_rx_desc *login_req_buf; 143 142 char *login_rsp_buf; 144 143 u64 login_req_dma;
+4 -6
drivers/infiniband/ulp/srpt/ib_srpt.c
··· 1601 1601 struct ib_qp_init_attr *qp_init; 1602 1602 struct srpt_port *sport = ch->sport; 1603 1603 struct srpt_device *sdev = sport->sdev; 1604 + const struct ib_device_attr *attrs = &sdev->device->attrs; 1604 1605 u32 srp_sq_size = sport->port_attrib.srp_sq_size; 1605 1606 int ret; 1606 1607 ··· 1639 1638 */ 1640 1639 qp_init->cap.max_send_wr = srp_sq_size / 2; 1641 1640 qp_init->cap.max_rdma_ctxs = srp_sq_size / 2; 1642 - qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE; 1641 + qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); 1643 1642 qp_init->port_num = ch->sport->port; 1644 1643 1645 1644 ch->qp = ib_create_qp(sdev->pd, qp_init); ··· 2262 2261 container_of(cmd, struct srpt_send_ioctx, cmd); 2263 2262 struct srpt_rdma_ch *ch = ioctx->ch; 2264 2263 struct srpt_device *sdev = ch->sport->sdev; 2265 - struct ib_send_wr send_wr, *first_wr = NULL, *bad_wr; 2264 + struct ib_send_wr send_wr, *first_wr = &send_wr, *bad_wr; 2266 2265 struct ib_sge sge; 2267 2266 enum srpt_command_state state; 2268 2267 unsigned long flags; ··· 2303 2302 struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i]; 2304 2303 2305 2304 first_wr = rdma_rw_ctx_wrs(&ctx->rw, ch->qp, 2306 - ch->sport->port, NULL, 2307 - first_wr ? first_wr : &send_wr); 2305 + ch->sport->port, NULL, first_wr); 2308 2306 } 2309 - } else { 2310 - first_wr = &send_wr; 2311 2307 } 2312 2308 2313 2309 if (state != SRPT_STATE_MGMT)
+5 -1
drivers/infiniband/ulp/srpt/ib_srpt.h
··· 106 106 SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2, 107 107 108 108 SRPT_DEF_SG_TABLESIZE = 128, 109 - SRPT_DEF_SG_PER_WQE = 16, 109 + /* 110 + * An experimentally determined value that avoids that QP creation 111 + * fails due to "swiotlb buffer is full" on systems using the swiotlb. 112 + */ 113 + SRPT_MAX_SG_PER_WQE = 16, 110 114 111 115 MIN_SRPT_SQ_SIZE = 16, 112 116 DEF_SRPT_SQ_SIZE = 4096,
+6
include/rdma/ib_verbs.h
··· 1490 1490 struct ib_wq **ind_tbl; 1491 1491 }; 1492 1492 1493 + /* 1494 + * @max_write_sge: Maximum SGE elements per RDMA WRITE request. 1495 + * @max_read_sge: Maximum SGE elements per RDMA READ request. 1496 + */ 1493 1497 struct ib_qp { 1494 1498 struct ib_device *device; 1495 1499 struct ib_pd *pd; ··· 1515 1511 void (*event_handler)(struct ib_event *, void *); 1516 1512 void *qp_context; 1517 1513 u32 qp_num; 1514 + u32 max_write_sge; 1515 + u32 max_read_sge; 1518 1516 enum ib_qp_type qp_type; 1519 1517 struct ib_rwq_ind_table *rwq_ind_tbl; 1520 1518 };
-16
include/rdma/opa_port_info.h
··· 33 33 #if !defined(OPA_PORT_INFO_H) 34 34 #define OPA_PORT_INFO_H 35 35 36 - /* Temporary until HFI driver is updated */ 37 - #ifndef USE_PI_LED_ENABLE 38 - #define USE_PI_LED_ENABLE 0 39 - #endif 40 - 41 36 #define OPA_PORT_LINK_MODE_NOP 0 /* No change */ 42 37 #define OPA_PORT_LINK_MODE_OPA 4 /* Port mode is OPA */ 43 38 ··· 269 274 OPA_PI_MASK_MTU_CAP = 0x0F, 270 275 }; 271 276 272 - #if USE_PI_LED_ENABLE 273 277 struct opa_port_states { 274 278 u8 reserved; 275 279 u8 ledenable_offlinereason; /* 1 res, 1 bit, 6 bits */ 276 280 u8 reserved2; 277 281 u8 portphysstate_portstate; /* 4 bits, 4 bits */ 278 282 }; 279 - #define PI_LED_ENABLE_SUP 1 280 - #else 281 - struct opa_port_states { 282 - u8 reserved; 283 - u8 offline_reason; /* 2 res, 6 bits */ 284 - u8 reserved2; 285 - u8 portphysstate_portstate; /* 4 bits, 4 bits */ 286 - }; 287 - #define PI_LED_ENABLE_SUP 0 288 - #endif 289 283 290 284 struct opa_port_state_info { 291 285 struct opa_port_states port_states;
+7
include/rdma/rdma_vt.h
··· 158 158 u32 max_mad_size; 159 159 u8 qos_shift; 160 160 u8 max_rdma_atomic; 161 + u8 reserved_operations; 161 162 }; 162 163 163 164 /* Protection domain */ ··· 352 351 /* Driver specific properties */ 353 352 struct rvt_driver_params dparms; 354 353 354 + /* post send table */ 355 + const struct rvt_operation_params *post_parms; 356 + 355 357 struct rvt_mregion __rcu *dma_mr; 356 358 struct rvt_lkey_table lkey_table; 357 359 ··· 488 484 int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); 489 485 int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, 490 486 int port_index, u16 *pkey_table); 487 + int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, 488 + int access); 489 + int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey); 491 490 int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, 492 491 u32 len, u64 vaddr, u32 rkey, int acc); 493 492 int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
+1
include/rdma/rdmavt_mr.h
··· 81 81 u32 mapsz; /* size of the map array */ 82 82 u8 page_shift; /* 0 - non unform/non powerof2 sizes */ 83 83 u8 lkey_published; /* in global table */ 84 + atomic_t lkey_invalid; /* true if current lkey is invalid */ 84 85 struct completion comp; /* complete when refcount goes to zero */ 85 86 atomic_t refcount; 86 87 struct rvt_segarray *map[0]; /* the segments */
+82 -10
include/rdma/rdmavt_qp.h
··· 145 145 (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND) 146 146 147 147 /* 148 + * Internal send flags 149 + */ 150 + #define RVT_SEND_RESERVE_USED IB_SEND_RESERVED_START 151 + #define RVT_SEND_COMPLETION_ONLY (IB_SEND_RESERVED_START << 1) 152 + 153 + /* 148 154 * Send work request queue entry. 149 155 * The size of the sg_list is determined when the QP is created and stored 150 156 * in qp->s_max_sge. ··· 222 216 * to send a RDMA read response or atomic operation. 223 217 */ 224 218 struct rvt_ack_entry { 225 - u8 opcode; 226 - u8 sent; 219 + struct rvt_sge rdma_sge; 220 + u64 atomic_data; 227 221 u32 psn; 228 222 u32 lpsn; 229 - union { 230 - struct rvt_sge rdma_sge; 231 - u64 atomic_data; 232 - }; 223 + u8 opcode; 224 + u8 sent; 233 225 }; 234 226 235 227 #define RC_QP_SCALING_INTERVAL 5 236 228 237 - /* 238 - * Variables prefixed with s_ are for the requester (sender). 239 - * Variables prefixed with r_ are for the responder (receiver). 240 - * Variables prefixed with ack_ are for responder replies. 229 + #define RVT_OPERATION_PRIV 0x00000001 230 + #define RVT_OPERATION_ATOMIC 0x00000002 231 + #define RVT_OPERATION_ATOMIC_SGE 0x00000004 232 + #define RVT_OPERATION_LOCAL 0x00000008 233 + #define RVT_OPERATION_USE_RESERVE 0x00000010 234 + 235 + #define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1) 236 + 237 + /** 238 + * rvt_operation_params - op table entry 239 + * @length - the length to copy into the swqe entry 240 + * @qpt_support - a bit mask indicating QP type support 241 + * @flags - RVT_OPERATION flags (see above) 241 242 * 243 + * This supports table driven post send so that 244 + * the driver can have differing an potentially 245 + * different sets of operations. 246 + * 247 + **/ 248 + 249 + struct rvt_operation_params { 250 + size_t length; 251 + u32 qpt_support; 252 + u32 flags; 253 + }; 254 + 255 + /* 242 256 * Common variables are protected by both r_rq.lock and s_lock in that order 243 257 * which only happens in modify_qp() or changing the QP 'state'. 244 258 */ ··· 333 307 u32 s_next_psn; /* PSN for next request */ 334 308 u32 s_avail; /* number of entries avail */ 335 309 u32 s_ssn; /* SSN of tail entry */ 310 + atomic_t s_reserved_used; /* reserved entries in use */ 336 311 337 312 spinlock_t s_lock ____cacheline_aligned_in_smp; 338 313 u32 s_flags; ··· 369 342 370 343 struct rvt_sge_state s_ack_rdma_sge; 371 344 struct timer_list s_timer; 345 + 346 + atomic_t local_ops_pending; /* number of fast_reg/local_inv reqs */ 372 347 373 348 /* 374 349 * This sge list MUST be last. Do not add anything below here. ··· 463 434 ((char *)rq->wq->wq + 464 435 (sizeof(struct rvt_rwqe) + 465 436 rq->max_sge * sizeof(struct ib_sge)) * n); 437 + } 438 + 439 + /** 440 + * rvt_qp_wqe_reserve - reserve operation 441 + * @qp - the rvt qp 442 + * @wqe - the send wqe 443 + * 444 + * This routine used in post send to record 445 + * a wqe relative reserved operation use. 446 + */ 447 + static inline void rvt_qp_wqe_reserve( 448 + struct rvt_qp *qp, 449 + struct rvt_swqe *wqe) 450 + { 451 + wqe->wr.send_flags |= RVT_SEND_RESERVE_USED; 452 + atomic_inc(&qp->s_reserved_used); 453 + } 454 + 455 + /** 456 + * rvt_qp_wqe_unreserve - clean reserved operation 457 + * @qp - the rvt qp 458 + * @wqe - the send wqe 459 + * 460 + * This decrements the reserve use count. 461 + * 462 + * This call MUST precede the change to 463 + * s_last to insure that post send sees a stable 464 + * s_avail. 465 + * 466 + * An smp_mp__after_atomic() is used to insure 467 + * the compiler does not juggle the order of the s_last 468 + * ring index and the decrementing of s_reserved_used. 469 + */ 470 + static inline void rvt_qp_wqe_unreserve( 471 + struct rvt_qp *qp, 472 + struct rvt_swqe *wqe) 473 + { 474 + if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) { 475 + wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED; 476 + atomic_dec(&qp->s_reserved_used); 477 + /* insure no compiler re-order up to s_last change */ 478 + smp_mb__after_atomic(); 479 + } 466 480 } 467 481 468 482 extern const int ib_rvt_state_ops[];
+1 -1
include/uapi/rdma/hfi/hfi1_user.h
··· 75 75 * may not be implemented; the user code must deal with this if it 76 76 * cares, or it must abort after initialization reports the difference. 77 77 */ 78 - #define HFI1_USER_SWMINOR 1 78 + #define HFI1_USER_SWMINOR 2 79 79 80 80 /* 81 81 * We will encode the major/minor inside a single 32bit version number.