Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

IB/hfi1: Virtual Network Interface Controller (VNIC) HW support

HFI1 HW specific support for VNIC functionality.
Dynamically allocate a set of contexts for VNIC when the first vnic
port is instantiated. Allocate VNIC contexts from user contexts pool
and return them back to the same pool while freeing up. Set aside
enough MSI-X interrupts for VNIC contexts and assign them when the
contexts are allocated. On the receive side, use an RSM rule to
spread TCP/UDP streams among VNIC contexts.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Andrzej Kacprowski <andrzej.kacprowski@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

authored by

Vishwanathapura, Niranjana and committed by
Doug Ledford
2280740f d4829ea6

+661 -105
+9 -6
drivers/infiniband/hw/hfi1/aspm.h
··· 1 1 /* 2 - * Copyright(c) 2015, 2016 Intel Corporation. 2 + * Copyright(c) 2015-2017 Intel Corporation. 3 3 * 4 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 5 * redistributing this file, you may do so under either license. ··· 229 229 spin_unlock_irqrestore(&rcd->aspm_lock, flags); 230 230 } 231 231 232 - /* Disable interrupt processing for verbs contexts when PSM contexts are open */ 232 + /* 233 + * Disable interrupt processing for verbs contexts when PSM or VNIC contexts 234 + * are open. 235 + */ 233 236 static inline void aspm_disable_all(struct hfi1_devdata *dd) 234 237 { 235 238 struct hfi1_ctxtdata *rcd; 236 239 unsigned long flags; 237 240 unsigned i; 238 241 239 - for (i = 0; i < dd->first_user_ctxt; i++) { 242 + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { 240 243 rcd = dd->rcd[i]; 241 244 del_timer_sync(&rcd->aspm_timer); 242 245 spin_lock_irqsave(&rcd->aspm_lock, flags); ··· 263 260 if (aspm_mode != ASPM_MODE_DYNAMIC) 264 261 return; 265 262 266 - for (i = 0; i < dd->first_user_ctxt; i++) { 263 + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { 267 264 rcd = dd->rcd[i]; 268 265 spin_lock_irqsave(&rcd->aspm_lock, flags); 269 266 rcd->aspm_intr_enable = true; ··· 279 276 (unsigned long)rcd); 280 277 rcd->aspm_intr_supported = rcd->dd->aspm_supported && 281 278 aspm_mode == ASPM_MODE_DYNAMIC && 282 - rcd->ctxt < rcd->dd->first_user_ctxt; 279 + rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt; 283 280 } 284 281 285 282 static inline void aspm_init(struct hfi1_devdata *dd) ··· 289 286 spin_lock_init(&dd->aspm_lock); 290 287 dd->aspm_supported = aspm_hw_l1_supported(dd); 291 288 292 - for (i = 0; i < dd->first_user_ctxt; i++) 289 + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) 293 290 aspm_ctx_init(dd->rcd[i]); 294 291 295 292 /* Start with ASPM disabled */
+249 -42
drivers/infiniband/hw/hfi1/chip.c
··· 126 126 #define DEFAULT_KRCVQS 2 127 127 #define MIN_KERNEL_KCTXTS 2 128 128 #define FIRST_KERNEL_KCTXT 1 129 - /* sizes for both the QP and RSM map tables */ 130 - #define NUM_MAP_ENTRIES 256 131 - #define NUM_MAP_REGS 32 129 + 130 + /* 131 + * RSM instance allocation 132 + * 0 - Verbs 133 + * 1 - User Fecn Handling 134 + * 2 - Vnic 135 + */ 136 + #define RSM_INS_VERBS 0 137 + #define RSM_INS_FECN 1 138 + #define RSM_INS_VNIC 2 132 139 133 140 /* Bit offset into the GUID which carries HFI id information */ 134 141 #define GUID_HFI_INDEX_SHIFT 39 ··· 146 139 #define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3) 147 140 #define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4) 148 141 149 - /* RSM fields */ 150 - 142 + /* RSM fields for Verbs */ 151 143 /* packet type */ 152 144 #define IB_PACKET_TYPE 2ull 153 145 #define QW_SHIFT 6ull ··· 175 169 176 170 /* QPN[m+n:1] QW 1, OFFSET 1 */ 177 171 #define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull)) 172 + 173 + /* RSM fields for Vnic */ 174 + /* L2_TYPE: QW 0, OFFSET 61 - for match */ 175 + #define L2_TYPE_QW 0ull 176 + #define L2_TYPE_BIT_OFFSET 61ull 177 + #define L2_TYPE_OFFSET(off) ((L2_TYPE_QW << QW_SHIFT) | (off)) 178 + #define L2_TYPE_MATCH_OFFSET L2_TYPE_OFFSET(L2_TYPE_BIT_OFFSET) 179 + #define L2_TYPE_MASK 3ull 180 + #define L2_16B_VALUE 2ull 181 + 182 + /* L4_TYPE QW 1, OFFSET 0 - for match */ 183 + #define L4_TYPE_QW 1ull 184 + #define L4_TYPE_BIT_OFFSET 0ull 185 + #define L4_TYPE_OFFSET(off) ((L4_TYPE_QW << QW_SHIFT) | (off)) 186 + #define L4_TYPE_MATCH_OFFSET L4_TYPE_OFFSET(L4_TYPE_BIT_OFFSET) 187 + #define L4_16B_TYPE_MASK 0xFFull 188 + #define L4_16B_ETH_VALUE 0x78ull 189 + 190 + /* 16B VESWID - for select */ 191 + #define L4_16B_HDR_VESWID_OFFSET ((2 << QW_SHIFT) | (16ull)) 192 + /* 16B ENTROPY - for select */ 193 + #define L2_16B_ENTROPY_OFFSET ((1 << QW_SHIFT) | (32ull)) 178 194 179 195 /* defines to build power on SC2VL table */ 180 196 #define SC2VL_VAL( \ ··· 1075 1047 unsigned int *np); 1076 1048 static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd); 1077 1049 static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms); 1050 + static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index); 1078 1051 1079 1052 /* 1080 1053 * Error interrupt table entry. This is used as input to the interrupt ··· 6732 6703 int i; 6733 6704 6734 6705 /* enable all kernel contexts */ 6735 - for (i = 0; i < dd->n_krcv_queues; i++) { 6706 + for (i = 0; i < dd->num_rcv_contexts; i++) { 6707 + struct hfi1_ctxtdata *rcd = dd->rcd[i]; 6708 + 6709 + /* Ensure all non-user contexts(including vnic) are enabled */ 6710 + if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER)) 6711 + continue; 6712 + 6736 6713 rcvmask = HFI1_RCVCTRL_CTXT_ENB; 6737 6714 /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ 6738 6715 rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? ··· 8035 8000 if (likely(source < dd->num_rcv_contexts)) { 8036 8001 rcd = dd->rcd[source]; 8037 8002 if (rcd) { 8038 - if (source < dd->first_user_ctxt) 8003 + /* Check for non-user contexts, including vnic */ 8004 + if ((source < dd->first_dyn_alloc_ctxt) || 8005 + (rcd->sc && (rcd->sc->type == SC_KERNEL))) 8039 8006 rcd->do_interrupt(rcd, 0); 8040 8007 else 8041 8008 handle_user_interrupt(rcd); ··· 8065 8028 rcd = dd->rcd[source]; 8066 8029 if (rcd) { 8067 8030 /* only pay attention to user urgent interrupts */ 8068 - if (source >= dd->first_user_ctxt) 8031 + if ((source >= dd->first_dyn_alloc_ctxt) && 8032 + (!rcd->sc || (rcd->sc->type == SC_USER))) 8069 8033 handle_user_interrupt(rcd); 8070 8034 return; /* OK */ 8071 8035 } ··· 12880 12842 first_sdma = last_general; 12881 12843 last_sdma = first_sdma + dd->num_sdma; 12882 12844 first_rx = last_sdma; 12883 - last_rx = first_rx + dd->n_krcv_queues; 12845 + last_rx = first_rx + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; 12846 + 12847 + /* VNIC MSIx interrupts get mapped when VNIC contexts are created */ 12848 + dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues; 12884 12849 12885 12850 /* 12886 12851 * Sanity check - the code expects all SDMA chip source ··· 12897 12856 const char *err_info; 12898 12857 irq_handler_t handler; 12899 12858 irq_handler_t thread = NULL; 12900 - void *arg; 12859 + void *arg = NULL; 12901 12860 int idx; 12902 12861 struct hfi1_ctxtdata *rcd = NULL; 12903 12862 struct sdma_engine *sde = NULL; ··· 12924 12883 } else if (first_rx <= i && i < last_rx) { 12925 12884 idx = i - first_rx; 12926 12885 rcd = dd->rcd[idx]; 12927 - /* no interrupt if no rcd */ 12928 - if (!rcd) 12929 - continue; 12930 - /* 12931 - * Set the interrupt register and mask for this 12932 - * context's interrupt. 12933 - */ 12934 - rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; 12935 - rcd->imask = ((u64)1) << 12936 - ((IS_RCVAVAIL_START + idx) % 64); 12937 - handler = receive_context_interrupt; 12938 - thread = receive_context_thread; 12939 - arg = rcd; 12940 - snprintf(me->name, sizeof(me->name), 12941 - DRIVER_NAME "_%d kctxt%d", dd->unit, idx); 12942 - err_info = "receive context"; 12943 - remap_intr(dd, IS_RCVAVAIL_START + idx, i); 12944 - me->type = IRQ_RCVCTXT; 12886 + if (rcd) { 12887 + /* 12888 + * Set the interrupt register and mask for this 12889 + * context's interrupt. 12890 + */ 12891 + rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; 12892 + rcd->imask = ((u64)1) << 12893 + ((IS_RCVAVAIL_START + idx) % 64); 12894 + handler = receive_context_interrupt; 12895 + thread = receive_context_thread; 12896 + arg = rcd; 12897 + snprintf(me->name, sizeof(me->name), 12898 + DRIVER_NAME "_%d kctxt%d", 12899 + dd->unit, idx); 12900 + err_info = "receive context"; 12901 + remap_intr(dd, IS_RCVAVAIL_START + idx, i); 12902 + me->type = IRQ_RCVCTXT; 12903 + rcd->msix_intr = i; 12904 + } 12945 12905 } else { 12946 12906 /* not in our expected range - complain, then 12947 12907 * ignore it ··· 12980 12938 return ret; 12981 12939 } 12982 12940 12941 + void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) 12942 + { 12943 + int i; 12944 + 12945 + if (!dd->num_msix_entries) { 12946 + synchronize_irq(dd->pcidev->irq); 12947 + return; 12948 + } 12949 + 12950 + for (i = 0; i < dd->vnic.num_ctxt; i++) { 12951 + struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; 12952 + struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; 12953 + 12954 + synchronize_irq(me->msix.vector); 12955 + } 12956 + } 12957 + 12958 + void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd) 12959 + { 12960 + struct hfi1_devdata *dd = rcd->dd; 12961 + struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; 12962 + 12963 + if (!me->arg) /* => no irq, no affinity */ 12964 + return; 12965 + 12966 + hfi1_put_irq_affinity(dd, me); 12967 + free_irq(me->msix.vector, me->arg); 12968 + 12969 + me->arg = NULL; 12970 + } 12971 + 12972 + void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) 12973 + { 12974 + struct hfi1_devdata *dd = rcd->dd; 12975 + struct hfi1_msix_entry *me; 12976 + int idx = rcd->ctxt; 12977 + void *arg = rcd; 12978 + int ret; 12979 + 12980 + rcd->msix_intr = dd->vnic.msix_idx++; 12981 + me = &dd->msix_entries[rcd->msix_intr]; 12982 + 12983 + /* 12984 + * Set the interrupt register and mask for this 12985 + * context's interrupt. 12986 + */ 12987 + rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; 12988 + rcd->imask = ((u64)1) << 12989 + ((IS_RCVAVAIL_START + idx) % 64); 12990 + 12991 + snprintf(me->name, sizeof(me->name), 12992 + DRIVER_NAME "_%d kctxt%d", dd->unit, idx); 12993 + me->name[sizeof(me->name) - 1] = 0; 12994 + me->type = IRQ_RCVCTXT; 12995 + 12996 + remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr); 12997 + 12998 + ret = request_threaded_irq(me->msix.vector, receive_context_interrupt, 12999 + receive_context_thread, 0, me->name, arg); 13000 + if (ret) { 13001 + dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n", 13002 + me->msix.vector, idx, ret); 13003 + return; 13004 + } 13005 + /* 13006 + * assign arg after request_irq call, so it will be 13007 + * cleaned up 13008 + */ 13009 + me->arg = arg; 13010 + 13011 + ret = hfi1_get_irq_affinity(dd, me); 13012 + if (ret) { 13013 + dd_dev_err(dd, 13014 + "unable to pin IRQ %d\n", ret); 13015 + free_irq(me->msix.vector, me->arg); 13016 + } 13017 + } 13018 + 12983 13019 /* 12984 13020 * Set the general handler to accept all interrupts, remap all 12985 13021 * chip interrupts back to MSI-X 0. ··· 13089 12969 * N interrupts - one per used SDMA engine 13090 12970 * M interrupt - one per kernel receive context 13091 12971 */ 13092 - total = 1 + dd->num_sdma + dd->n_krcv_queues; 12972 + total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; 13093 12973 13094 12974 entries = kcalloc(total, sizeof(*entries), GFP_KERNEL); 13095 12975 if (!entries) { ··· 13154 13034 * 13155 13035 * num_rcv_contexts - number of contexts being used 13156 13036 * n_krcv_queues - number of kernel contexts 13157 - * first_user_ctxt - first non-kernel context in array of contexts 13037 + * first_dyn_alloc_ctxt - first dynamically allocated context 13038 + * in array of contexts 13158 13039 * freectxts - number of free user contexts 13159 13040 * num_send_contexts - number of PIO send contexts being used 13160 13041 */ ··· 13232 13111 total_contexts = num_kernel_contexts + num_user_contexts; 13233 13112 } 13234 13113 13235 - /* the first N are kernel contexts, the rest are user contexts */ 13114 + /* Accommodate VNIC contexts */ 13115 + if ((total_contexts + HFI1_NUM_VNIC_CTXT) <= dd->chip_rcv_contexts) 13116 + total_contexts += HFI1_NUM_VNIC_CTXT; 13117 + 13118 + /* the first N are kernel contexts, the rest are user/vnic contexts */ 13236 13119 dd->num_rcv_contexts = total_contexts; 13237 13120 dd->n_krcv_queues = num_kernel_contexts; 13238 - dd->first_user_ctxt = num_kernel_contexts; 13121 + dd->first_dyn_alloc_ctxt = num_kernel_contexts; 13239 13122 dd->num_user_contexts = num_user_contexts; 13240 13123 dd->freectxts = num_user_contexts; 13241 13124 dd_dev_info(dd, ··· 13695 13570 write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0); 13696 13571 for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++) 13697 13572 write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0); 13698 - for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) { 13699 - write_csr(dd, RCV_RSM_CFG + (8 * i), 0); 13700 - write_csr(dd, RCV_RSM_SELECT + (8 * i), 0); 13701 - write_csr(dd, RCV_RSM_MATCH + (8 * i), 0); 13702 - } 13573 + for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) 13574 + clear_rsm_rule(dd, i); 13703 13575 for (i = 0; i < 32; i++) 13704 13576 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0); 13705 13577 ··· 14055 13933 (u64)rrd->value2 << RCV_RSM_MATCH_VALUE2_SHIFT); 14056 13934 } 14057 13935 13936 + /* 13937 + * Clear a receive side mapping rule. 13938 + */ 13939 + static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index) 13940 + { 13941 + write_csr(dd, RCV_RSM_CFG + (8 * rule_index), 0); 13942 + write_csr(dd, RCV_RSM_SELECT + (8 * rule_index), 0); 13943 + write_csr(dd, RCV_RSM_MATCH + (8 * rule_index), 0); 13944 + } 13945 + 14058 13946 /* return the number of RSM map table entries that will be used for QOS */ 14059 13947 static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, 14060 13948 unsigned int *np) ··· 14180 14048 rrd.value2 = LRH_SC_VALUE; 14181 14049 14182 14050 /* add rule 0 */ 14183 - add_rsm_rule(dd, 0, &rrd); 14051 + add_rsm_rule(dd, RSM_INS_VERBS, &rrd); 14184 14052 14185 14053 /* mark RSM map entries as used */ 14186 14054 rmt->used += rmt_entries; ··· 14210 14078 /* 14211 14079 * RSM will extract the destination context as an index into the 14212 14080 * map table. The destination contexts are a sequential block 14213 - * in the range first_user_ctxt...num_rcv_contexts-1 (inclusive). 14081 + * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive). 14214 14082 * Map entries are accessed as offset + extracted value. Adjust 14215 14083 * the added offset so this sequence can be placed anywhere in 14216 14084 * the table - as long as the entries themselves do not wrap. ··· 14218 14086 * start with that to allow for a "negative" offset. 14219 14087 */ 14220 14088 offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used - 14221 - (int)dd->first_user_ctxt); 14089 + (int)dd->first_dyn_alloc_ctxt); 14222 14090 14223 - for (i = dd->first_user_ctxt, idx = rmt->used; 14091 + for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used; 14224 14092 i < dd->num_rcv_contexts; i++, idx++) { 14225 14093 /* replace with identity mapping */ 14226 14094 regoff = (idx % 8) * 8; ··· 14254 14122 rrd.value2 = 1; 14255 14123 14256 14124 /* add rule 1 */ 14257 - add_rsm_rule(dd, 1, &rrd); 14125 + add_rsm_rule(dd, RSM_INS_FECN, &rrd); 14258 14126 14259 14127 rmt->used += dd->num_user_contexts; 14128 + } 14129 + 14130 + /* Initialize RSM for VNIC */ 14131 + void hfi1_init_vnic_rsm(struct hfi1_devdata *dd) 14132 + { 14133 + u8 i, j; 14134 + u8 ctx_id = 0; 14135 + u64 reg; 14136 + u32 regoff; 14137 + struct rsm_rule_data rrd; 14138 + 14139 + if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) { 14140 + dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n", 14141 + dd->vnic.rmt_start); 14142 + return; 14143 + } 14144 + 14145 + dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n", 14146 + dd->vnic.rmt_start, 14147 + dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES); 14148 + 14149 + /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */ 14150 + regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8; 14151 + reg = read_csr(dd, regoff); 14152 + for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) { 14153 + /* Update map register with vnic context */ 14154 + j = (dd->vnic.rmt_start + i) % 8; 14155 + reg &= ~(0xffllu << (j * 8)); 14156 + reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8); 14157 + /* Wrap up vnic ctx index */ 14158 + ctx_id %= dd->vnic.num_ctxt; 14159 + /* Write back map register */ 14160 + if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) { 14161 + dev_dbg(&(dd)->pcidev->dev, 14162 + "Vnic rsm map reg[%d] =0x%llx\n", 14163 + regoff - RCV_RSM_MAP_TABLE, reg); 14164 + 14165 + write_csr(dd, regoff, reg); 14166 + regoff += 8; 14167 + if (i < (NUM_VNIC_MAP_ENTRIES - 1)) 14168 + reg = read_csr(dd, regoff); 14169 + } 14170 + } 14171 + 14172 + /* Add rule for vnic */ 14173 + rrd.offset = dd->vnic.rmt_start; 14174 + rrd.pkt_type = 4; 14175 + /* Match 16B packets */ 14176 + rrd.field1_off = L2_TYPE_MATCH_OFFSET; 14177 + rrd.mask1 = L2_TYPE_MASK; 14178 + rrd.value1 = L2_16B_VALUE; 14179 + /* Match ETH L4 packets */ 14180 + rrd.field2_off = L4_TYPE_MATCH_OFFSET; 14181 + rrd.mask2 = L4_16B_TYPE_MASK; 14182 + rrd.value2 = L4_16B_ETH_VALUE; 14183 + /* Calc context from veswid and entropy */ 14184 + rrd.index1_off = L4_16B_HDR_VESWID_OFFSET; 14185 + rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES); 14186 + rrd.index2_off = L2_16B_ENTROPY_OFFSET; 14187 + rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES); 14188 + add_rsm_rule(dd, RSM_INS_VNIC, &rrd); 14189 + 14190 + /* Enable RSM if not already enabled */ 14191 + add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); 14192 + } 14193 + 14194 + void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd) 14195 + { 14196 + clear_rsm_rule(dd, RSM_INS_VNIC); 14197 + 14198 + /* Disable RSM if used only by vnic */ 14199 + if (dd->vnic.rmt_start == 0) 14200 + clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); 14260 14201 } 14261 14202 14262 14203 static void init_rxe(struct hfi1_devdata *dd) ··· 14344 14139 init_qos(dd, rmt); 14345 14140 init_user_fecn_handling(dd, rmt); 14346 14141 complete_rsm_map_table(dd, rmt); 14142 + /* record number of used rsm map entries for vnic */ 14143 + dd->vnic.rmt_start = rmt->used; 14347 14144 kfree(rmt); 14348 14145 14349 14146 /*
+2
drivers/infiniband/hw/hfi1/chip.h
··· 1362 1362 int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey); 1363 1363 int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt); 1364 1364 void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); 1365 + void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); 1366 + void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd); 1365 1367 1366 1368 /* 1367 1369 * Interrupt source table.
+5 -5
drivers/infiniband/hw/hfi1/debugfs.c
··· 1 1 #ifdef CONFIG_DEBUG_FS 2 2 /* 3 - * Copyright(c) 2015, 2016 Intel Corporation. 3 + * Copyright(c) 2015-2017 Intel Corporation. 4 4 * 5 5 * This file is provided under a dual BSD/GPLv2 license. When using or 6 6 * redistributing this file, you may do so under either license. ··· 174 174 struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; 175 175 struct hfi1_devdata *dd = dd_from_dev(ibd); 176 176 177 - for (j = 0; j < dd->first_user_ctxt; j++) { 177 + for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { 178 178 if (!dd->rcd[j]) 179 179 continue; 180 180 n_packets += dd->rcd[j]->opstats->stats[i].n_packets; ··· 200 200 201 201 if (!*pos) 202 202 return SEQ_START_TOKEN; 203 - if (*pos >= dd->first_user_ctxt) 203 + if (*pos >= dd->first_dyn_alloc_ctxt) 204 204 return NULL; 205 205 return pos; 206 206 } ··· 214 214 return pos; 215 215 216 216 ++*pos; 217 - if (*pos >= dd->first_user_ctxt) 217 + if (*pos >= dd->first_dyn_alloc_ctxt) 218 218 return NULL; 219 219 return pos; 220 220 } ··· 1099 1099 struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; 1100 1100 struct hfi1_devdata *dd = dd_from_dev(ibd); 1101 1101 1102 - for (j = 0; j < dd->first_user_ctxt; j++) { 1102 + for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { 1103 1103 if (!dd->rcd[j]) 1104 1104 continue; 1105 1105 n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
+40 -12
drivers/infiniband/hw/hfi1/driver.c
··· 874 874 return last; 875 875 } 876 876 877 - static inline void set_all_nodma_rtail(struct hfi1_devdata *dd) 877 + static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt) 878 878 { 879 879 int i; 880 880 881 - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++) 881 + /* 882 + * For dynamically allocated kernel contexts (like vnic) switch 883 + * interrupt handler only for that context. Otherwise, switch 884 + * interrupt handler for all statically allocated kernel contexts. 885 + */ 886 + if (ctxt >= dd->first_dyn_alloc_ctxt) { 887 + dd->rcd[ctxt]->do_interrupt = 888 + &handle_receive_interrupt_nodma_rtail; 889 + return; 890 + } 891 + 892 + for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) 882 893 dd->rcd[i]->do_interrupt = 883 894 &handle_receive_interrupt_nodma_rtail; 884 895 } 885 896 886 - static inline void set_all_dma_rtail(struct hfi1_devdata *dd) 897 + static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt) 887 898 { 888 899 int i; 889 900 890 - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++) 901 + /* 902 + * For dynamically allocated kernel contexts (like vnic) switch 903 + * interrupt handler only for that context. Otherwise, switch 904 + * interrupt handler for all statically allocated kernel contexts. 905 + */ 906 + if (ctxt >= dd->first_dyn_alloc_ctxt) { 907 + dd->rcd[ctxt]->do_interrupt = 908 + &handle_receive_interrupt_dma_rtail; 909 + return; 910 + } 911 + 912 + for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) 891 913 dd->rcd[i]->do_interrupt = 892 914 &handle_receive_interrupt_dma_rtail; 893 915 } ··· 919 897 int i; 920 898 921 899 /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */ 922 - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++) 923 - dd->rcd[i]->do_interrupt = &handle_receive_interrupt; 900 + for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { 901 + struct hfi1_ctxtdata *rcd = dd->rcd[i]; 902 + 903 + if ((i < dd->first_dyn_alloc_ctxt) || 904 + (rcd && rcd->sc && (rcd->sc->type == SC_KERNEL))) 905 + rcd->do_interrupt = &handle_receive_interrupt; 906 + } 924 907 } 925 908 926 909 static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, ··· 1035 1008 last = RCV_PKT_DONE; 1036 1009 if (needset) { 1037 1010 dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n"); 1038 - set_all_nodma_rtail(dd); 1011 + set_nodma_rtail(dd, rcd->ctxt); 1039 1012 needset = 0; 1040 1013 } 1041 1014 } else { ··· 1057 1030 if (needset) { 1058 1031 dd_dev_info(dd, 1059 1032 "Switching to DMA_RTAIL\n"); 1060 - set_all_dma_rtail(dd); 1033 + set_dma_rtail(dd, rcd->ctxt); 1061 1034 needset = 0; 1062 1035 } 1063 1036 } ··· 1106 1079 set_link_state(ppd, HLS_UP_ACTIVE); 1107 1080 1108 1081 /* 1109 - * Interrupt all kernel contexts that could have had an 1110 - * interrupt during auto activation. 1082 + * Interrupt all statically allocated kernel contexts that could 1083 + * have had an interrupt during auto activation. 1111 1084 */ 1112 - for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++) 1085 + for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++) 1113 1086 force_recv_intr(dd->rcd[i]); 1114 1087 } 1115 1088 ··· 1323 1296 1324 1297 spin_lock_irqsave(&dd->uctxt_lock, flags); 1325 1298 if (dd->rcd) 1326 - for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) { 1299 + for (i = dd->first_dyn_alloc_ctxt; 1300 + i < dd->num_rcv_contexts; i++) { 1327 1301 if (!dd->rcd[i] || !dd->rcd[i]->cnt) 1328 1302 continue; 1329 1303 spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+17 -10
drivers/infiniband/hw/hfi1/file_ops.c
··· 1 1 /* 2 - * Copyright(c) 2015, 2016 Intel Corporation. 2 + * Copyright(c) 2015-2017 Intel Corporation. 3 3 * 4 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 5 * redistributing this file, you may do so under either license. ··· 586 586 * knows where it's own bitmap is within the page. 587 587 */ 588 588 memaddr = (unsigned long)(dd->events + 589 - ((uctxt->ctxt - dd->first_user_ctxt) * 590 - HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; 589 + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 590 + HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; 591 591 memlen = PAGE_SIZE; 592 592 /* 593 593 * v3.7 removes VM_RESERVED but the effect is kept by ··· 756 756 * Clear any left over, unhandled events so the next process that 757 757 * gets this context doesn't get confused. 758 758 */ 759 - ev = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) * 759 + ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 760 760 HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; 761 761 *ev = 0; 762 762 ··· 909 909 910 910 if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase)) 911 911 continue; 912 - for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) { 912 + for (i = dd->first_dyn_alloc_ctxt; 913 + i < dd->num_rcv_contexts; i++) { 913 914 struct hfi1_ctxtdata *uctxt = dd->rcd[i]; 914 915 915 916 /* Skip ctxts which are not yet open */ 916 917 if (!uctxt || !uctxt->cnt) 917 918 continue; 919 + 920 + /* Skip dynamically allocted kernel contexts */ 921 + if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) 922 + continue; 923 + 918 924 /* Skip ctxt if it doesn't match the requested one */ 919 925 if (memcmp(uctxt->uuid, uinfo->uuid, 920 926 sizeof(uctxt->uuid)) || ··· 966 960 return -EIO; 967 961 } 968 962 969 - for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; ctxt++) 963 + for (ctxt = dd->first_dyn_alloc_ctxt; 964 + ctxt < dd->num_rcv_contexts; ctxt++) 970 965 if (!dd->rcd[ctxt]) 971 966 break; 972 967 ··· 1313 1306 */ 1314 1307 binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, 1315 1308 fd->subctxt, 0); 1316 - offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) * 1309 + offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 1317 1310 HFI1_MAX_SHARED_CTXTS) + fd->subctxt) * 1318 1311 sizeof(*dd->events)); 1319 1312 binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, ··· 1407 1400 } 1408 1401 1409 1402 spin_lock_irqsave(&dd->uctxt_lock, flags); 1410 - for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; 1403 + for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; 1411 1404 ctxt++) { 1412 1405 uctxt = dd->rcd[ctxt]; 1413 1406 if (uctxt) { 1414 1407 unsigned long *evs = dd->events + 1415 - (uctxt->ctxt - dd->first_user_ctxt) * 1408 + (uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 1416 1409 HFI1_MAX_SHARED_CTXTS; 1417 1410 int i; 1418 1411 /* ··· 1484 1477 if (!dd->events) 1485 1478 return 0; 1486 1479 1487 - evs = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) * 1480 + evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * 1488 1481 HFI1_MAX_SHARED_CTXTS) + subctxt; 1489 1482 1490 1483 for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) {
+27 -2
drivers/infiniband/hw/hfi1/hfi.h
··· 54 54 #include <linux/list.h> 55 55 #include <linux/scatterlist.h> 56 56 #include <linux/slab.h> 57 + #include <linux/idr.h> 57 58 #include <linux/io.h> 58 59 #include <linux/fs.h> 59 60 #include <linux/completion.h> ··· 67 66 #include <linux/i2c-algo-bit.h> 68 67 #include <rdma/ib_hdrs.h> 69 68 #include <linux/rhashtable.h> 69 + #include <linux/netdevice.h> 70 70 #include <rdma/rdma_vt.h> 71 71 72 72 #include "chip_registers.h" ··· 280 278 struct hfi1_devdata *dd; 281 279 /* so functions that need physical port can get it easily */ 282 280 struct hfi1_pportdata *ppd; 281 + /* associated msix interrupt */ 282 + u32 msix_intr; 283 283 /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ 284 284 void *subctxt_uregbase; 285 285 /* An array of pages for the eager receive buffers * N */ ··· 818 814 struct hfi1_i2c_bus *i2c_bus1; 819 815 }; 820 816 817 + /* sizes for both the QP and RSM map tables */ 818 + #define NUM_MAP_ENTRIES 256 819 + #define NUM_MAP_REGS 32 820 + 821 821 /* 822 822 * Number of VNIC contexts used. Ensure it is less than or equal to 823 823 * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE). 824 824 */ 825 825 #define HFI1_NUM_VNIC_CTXT 8 826 826 827 + /* Number of VNIC RSM entries */ 828 + #define NUM_VNIC_MAP_ENTRIES 8 829 + 827 830 /* Virtual NIC information */ 828 831 struct hfi1_vnic_data { 832 + struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT]; 833 + u8 num_vports; 829 834 struct idr vesw_idr; 835 + u8 rmt_start; 836 + u8 num_ctxt; 837 + u32 msix_idx; 830 838 }; 831 839 832 840 struct hfi1_vnic_vport_info; ··· 1066 1050 /* MSI-X information */ 1067 1051 struct hfi1_msix_entry *msix_entries; 1068 1052 u32 num_msix_entries; 1053 + u32 first_dyn_msix_idx; 1069 1054 1070 1055 /* INTx information */ 1071 1056 u32 requested_intx_irq; /* did we request one? */ ··· 1165 1148 u16 flags; 1166 1149 /* Number of physical ports available */ 1167 1150 u8 num_pports; 1168 - /* Lowest context number which can be used by user processes */ 1169 - u8 first_user_ctxt; 1151 + /* Lowest context number which can be used by user processes or VNIC */ 1152 + u8 first_dyn_alloc_ctxt; 1170 1153 /* adding a new field here would make it part of this cacheline */ 1171 1154 1172 1155 /* seqlock for sc2vl */ ··· 1213 1196 /* vnic data */ 1214 1197 struct hfi1_vnic_data vnic; 1215 1198 }; 1199 + 1200 + static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare) 1201 + { 1202 + return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES; 1203 + } 1216 1204 1217 1205 /* 8051 firmware version helper */ 1218 1206 #define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c)) ··· 1283 1261 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int); 1284 1262 int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int); 1285 1263 void set_all_slowpath(struct hfi1_devdata *dd); 1264 + void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd); 1265 + void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd); 1266 + void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd); 1286 1267 1287 1268 extern const struct pci_device_id hfi1_pci_tbl[]; 1288 1269
+17 -12
drivers/infiniband/hw/hfi1/init.c
··· 140 140 goto nomem; 141 141 142 142 /* create one or more kernel contexts */ 143 - for (i = 0; i < dd->first_user_ctxt; ++i) { 143 + for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { 144 144 struct hfi1_pportdata *ppd; 145 145 struct hfi1_ctxtdata *rcd; 146 146 ··· 215 215 u32 base; 216 216 217 217 if (dd->rcv_entries.nctxt_extra > 218 - dd->num_rcv_contexts - dd->first_user_ctxt) 218 + dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt) 219 219 kctxt_ngroups = (dd->rcv_entries.nctxt_extra - 220 - (dd->num_rcv_contexts - dd->first_user_ctxt)); 220 + (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt)); 221 221 rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa); 222 222 if (rcd) { 223 223 u32 rcvtids, max_entries; ··· 239 239 * Calculate the context's RcvArray entry starting point. 240 240 * We do this here because we have to take into account all 241 241 * the RcvArray entries that previous context would have 242 - * taken and we have to account for any extra groups 243 - * assigned to the kernel or user contexts. 242 + * taken and we have to account for any extra groups assigned 243 + * to the static (kernel) or dynamic (vnic/user) contexts. 244 244 */ 245 - if (ctxt < dd->first_user_ctxt) { 245 + if (ctxt < dd->first_dyn_alloc_ctxt) { 246 246 if (ctxt < kctxt_ngroups) { 247 247 base = ctxt * (dd->rcv_entries.ngroups + 1); 248 248 rcd->rcv_array_groups++; ··· 250 250 base = kctxt_ngroups + 251 251 (ctxt * dd->rcv_entries.ngroups); 252 252 } else { 253 - u16 ct = ctxt - dd->first_user_ctxt; 253 + u16 ct = ctxt - dd->first_dyn_alloc_ctxt; 254 254 255 255 base = ((dd->n_krcv_queues * dd->rcv_entries.ngroups) + 256 256 kctxt_ngroups); ··· 323 323 } 324 324 rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE; 325 325 326 - if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ 326 + /* Applicable only for statically created kernel contexts */ 327 + if (ctxt < dd->first_dyn_alloc_ctxt) { 327 328 rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), 328 329 GFP_KERNEL, numa); 329 330 if (!rcd->opstats) ··· 587 586 * Enable kernel ctxts' receive and receive interrupt. 588 587 * Other ctxts done as user opens and initializes them. 589 588 */ 590 - for (i = 0; i < dd->first_user_ctxt; ++i) { 589 + for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { 591 590 rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB; 592 591 rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? 593 592 HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; ··· 716 715 } 717 716 718 717 /* dd->rcd can be NULL if early initialization failed */ 719 - for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { 718 + for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) { 720 719 /* 721 720 * Set up the (kernel) rcvhdr queue and egr TIDs. If doing 722 721 * re-init, the simplest way to handle this is to free ··· 1536 1535 hfi1_device_remove(dd); 1537 1536 if (!ret) 1538 1537 hfi1_unregister_ib_device(dd); 1538 + hfi1_vnic_cleanup(dd); 1539 1539 postinit_cleanup(dd); 1540 1540 if (initfail) 1541 1541 ret = initfail; ··· 1623 1621 amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize * 1624 1622 sizeof(u32)); 1625 1623 1626 - gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? 1627 - GFP_USER : GFP_KERNEL; 1624 + if ((rcd->ctxt < dd->first_dyn_alloc_ctxt) || 1625 + (rcd->sc && (rcd->sc->type == SC_KERNEL))) 1626 + gfp_flags = GFP_KERNEL; 1627 + else 1628 + gfp_flags = GFP_USER; 1628 1629 rcd->rcvhdrq = dma_zalloc_coherent( 1629 1630 &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma, 1630 1631 gfp_flags | __GFP_COMP);
+7 -3
drivers/infiniband/hw/hfi1/mad.c
··· 53 53 #include "mad.h" 54 54 #include "trace.h" 55 55 #include "qp.h" 56 + #include "vnic.h" 56 57 57 58 /* the reset value from the FM is supposed to be 0xffff, handle both */ 58 59 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff ··· 651 650 OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0); 652 651 653 652 pi->port_packet_format.supported = 654 - cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B); 653 + cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B | 654 + OPA_PORT_PACKET_FORMAT_16B); 655 655 pi->port_packet_format.enabled = 656 - cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B); 656 + cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B | 657 + OPA_PORT_PACKET_FORMAT_16B); 657 658 658 659 /* flit_control.interleave is (OPA V1, version .76): 659 660 * bits use ··· 704 701 buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT; 705 702 pi->buffer_units = cpu_to_be32(buffer_units); 706 703 707 - pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported); 704 + pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported | 705 + OPA_CAP_MASK3_IsEthOnFabricSupported); 708 706 709 707 /* HFI supports a replay buffer 128 LTPs in size */ 710 708 pi->replay_depth.buffer = 0x80;
+18 -1
drivers/infiniband/hw/hfi1/pio.c
··· 1 1 /* 2 - * Copyright(c) 2015, 2016 Intel Corporation. 2 + * Copyright(c) 2015-2017 Intel Corporation. 3 3 * 4 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 5 * redistributing this file, you may do so under either license. ··· 703 703 { 704 704 struct send_context_info *sci; 705 705 struct send_context *sc = NULL; 706 + int req_type = type; 706 707 dma_addr_t dma; 707 708 unsigned long flags; 708 709 u64 reg; ··· 730 729 return NULL; 731 730 } 732 731 732 + /* 733 + * VNIC contexts are dynamically allocated. 734 + * Hence, pick a user context for VNIC. 735 + */ 736 + if (type == SC_VNIC) 737 + type = SC_USER; 738 + 733 739 spin_lock_irqsave(&dd->sc_lock, flags); 734 740 ret = sc_hw_alloc(dd, type, &sw_index, &hw_context); 735 741 if (ret) { ··· 744 736 free_percpu(sc->buffers_allocated); 745 737 kfree(sc); 746 738 return NULL; 739 + } 740 + 741 + /* 742 + * VNIC contexts are used by kernel driver. 743 + * Hence, mark them as kernel contexts. 744 + */ 745 + if (req_type == SC_VNIC) { 746 + dd->send_contexts[sw_index].type = SC_KERNEL; 747 + type = SC_KERNEL; 747 748 } 748 749 749 750 sci = &dd->send_contexts[sw_index];
+7 -1
drivers/infiniband/hw/hfi1/pio.h
··· 1 1 #ifndef _PIO_H 2 2 #define _PIO_H 3 3 /* 4 - * Copyright(c) 2015, 2016 Intel Corporation. 4 + * Copyright(c) 2015-2017 Intel Corporation. 5 5 * 6 6 * This file is provided under a dual BSD/GPLv2 license. When using or 7 7 * redistributing this file, you may do so under either license. ··· 53 53 #define SC_ACK 2 54 54 #define SC_USER 3 /* must be the last one: it may take all left */ 55 55 #define SC_MAX 4 /* count of send context types */ 56 + 57 + /* 58 + * SC_VNIC types are allocated (dynamically) from the user context pool, 59 + * (SC_USER) and used by kernel driver as kernel contexts (SC_KERNEL). 60 + */ 61 + #define SC_VNIC SC_MAX 56 62 57 63 /* invalid send context index */ 58 64 #define INVALID_SCI 0xff
+2 -2
drivers/infiniband/hw/hfi1/sysfs.c
··· 1 1 /* 2 - * Copyright(c) 2015, 2016 Intel Corporation. 2 + * Copyright(c) 2015-2017 Intel Corporation. 3 3 * 4 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 5 * redistributing this file, you may do so under either license. ··· 542 542 * give a more accurate picture of total contexts available. 543 543 */ 544 544 return scnprintf(buf, PAGE_SIZE, "%u\n", 545 - min(dd->num_rcv_contexts - dd->first_user_ctxt, 545 + min(dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt, 546 546 (u32)dd->sc_sizes[SC_USER].count)); 547 547 } 548 548
+4 -4
drivers/infiniband/hw/hfi1/user_exp_rcv.c
··· 1 1 /* 2 - * Copyright(c) 2015, 2016 Intel Corporation. 2 + * Copyright(c) 2015-2017 Intel Corporation. 3 3 * 4 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 5 * redistributing this file, you may do so under either license. ··· 607 607 struct hfi1_filedata *fd = fp->private_data; 608 608 struct hfi1_ctxtdata *uctxt = fd->uctxt; 609 609 unsigned long *ev = uctxt->dd->events + 610 - (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * 610 + (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * 611 611 HFI1_MAX_SHARED_CTXTS) + fd->subctxt); 612 612 u32 *array; 613 613 int ret = 0; ··· 1011 1011 * process in question. 1012 1012 */ 1013 1013 ev = uctxt->dd->events + 1014 - (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * 1015 - HFI1_MAX_SHARED_CTXTS) + fdata->subctxt); 1014 + (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * 1015 + HFI1_MAX_SHARED_CTXTS) + fdata->subctxt); 1016 1016 set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); 1017 1017 } 1018 1018 fdata->invalid_tid_idx++;
+3 -2
drivers/infiniband/hw/hfi1/user_pages.c
··· 1 1 /* 2 - * Copyright(c) 2015, 2016 Intel Corporation. 2 + * Copyright(c) 2015-2017 Intel Corporation. 3 3 * 4 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 5 * redistributing this file, you may do so under either license. ··· 73 73 { 74 74 unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit, 75 75 size = (cache_size * (1UL << 20)); /* convert to bytes */ 76 - unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt; 76 + unsigned int usr_ctxts = 77 + dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt; 77 78 bool can_lock = capable(CAP_IPC_LOCK); 78 79 79 80 /*
+5 -1
drivers/infiniband/hw/hfi1/verbs.c
··· 61 61 #include "qp.h" 62 62 #include "verbs_txreq.h" 63 63 #include "debugfs.h" 64 + #include "vnic.h" 64 65 65 66 static unsigned int hfi1_lkey_table_size = 16; 66 67 module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, ··· 1290 1289 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | 1291 1290 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | 1292 1291 IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | 1293 - IB_DEVICE_MEM_MGT_EXTENSIONS; 1292 + IB_DEVICE_MEM_MGT_EXTENSIONS | 1293 + IB_DEVICE_RDMA_NETDEV_OPA_VNIC; 1294 1294 rdi->dparms.props.page_size_cap = PAGE_SIZE; 1295 1295 rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; 1296 1296 rdi->dparms.props.vendor_part_id = dd->pcidev->device; ··· 1774 1772 ibdev->modify_device = modify_device; 1775 1773 ibdev->alloc_hw_stats = alloc_hw_stats; 1776 1774 ibdev->get_hw_stats = get_hw_stats; 1775 + ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn; 1776 + ibdev->free_rdma_netdev = hfi1_vnic_free_rn; 1777 1777 1778 1778 /* keep process mad in the driver */ 1779 1779 ibdev->process_mad = hfi1_process_mad;
+3
drivers/infiniband/hw/hfi1/vnic.h
··· 149 149 unsigned char name_assign_type, 150 150 void (*setup)(struct net_device *)); 151 151 void hfi1_vnic_free_rn(struct net_device *netdev); 152 + int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, 153 + struct hfi1_vnic_vport_info *vinfo, 154 + struct sk_buff *skb, u64 pbc, u8 plen); 152 155 153 156 #endif /* _HFI1_VNIC_H */
+244 -1
drivers/infiniband/hw/hfi1/vnic_main.c
··· 62 62 63 63 static DEFINE_SPINLOCK(vport_cntr_lock); 64 64 65 + static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) 66 + { 67 + unsigned int rcvctrl_ops = 0; 68 + int ret; 69 + 70 + ret = hfi1_init_ctxt(uctxt->sc); 71 + if (ret) 72 + goto done; 73 + 74 + uctxt->do_interrupt = &handle_receive_interrupt; 75 + 76 + /* Now allocate the RcvHdr queue and eager buffers. */ 77 + ret = hfi1_create_rcvhdrq(dd, uctxt); 78 + if (ret) 79 + goto done; 80 + 81 + ret = hfi1_setup_eagerbufs(uctxt); 82 + if (ret) 83 + goto done; 84 + 85 + set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags); 86 + 87 + if (uctxt->rcvhdrtail_kvaddr) 88 + clear_rcvhdrtail(uctxt); 89 + 90 + rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 91 + rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB; 92 + 93 + if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 94 + rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; 95 + if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 96 + rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; 97 + if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 98 + rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; 99 + if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) 100 + rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; 101 + 102 + hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); 103 + 104 + uctxt->is_vnic = true; 105 + done: 106 + return ret; 107 + } 108 + 109 + static int allocate_vnic_ctxt(struct hfi1_devdata *dd, 110 + struct hfi1_ctxtdata **vnic_ctxt) 111 + { 112 + struct hfi1_ctxtdata *uctxt; 113 + unsigned int ctxt; 114 + int ret; 115 + 116 + if (dd->flags & HFI1_FROZEN) 117 + return -EIO; 118 + 119 + for (ctxt = dd->first_dyn_alloc_ctxt; 120 + ctxt < dd->num_rcv_contexts; ctxt++) 121 + if (!dd->rcd[ctxt]) 122 + break; 123 + 124 + if (ctxt == dd->num_rcv_contexts) 125 + return -EBUSY; 126 + 127 + uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node); 128 + if (!uctxt) { 129 + dd_dev_err(dd, "Unable to create ctxtdata, failing open\n"); 130 + return -ENOMEM; 131 + } 132 + 133 + uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | 134 + HFI1_CAP_KGET(NODROP_RHQ_FULL) | 135 + HFI1_CAP_KGET(NODROP_EGR_FULL) | 136 + HFI1_CAP_KGET(DMA_RTAIL); 137 + uctxt->seq_cnt = 1; 138 + 139 + /* Allocate and enable a PIO send context */ 140 + uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize, 141 + uctxt->numa_id); 142 + 143 + ret = uctxt->sc ? 0 : -ENOMEM; 144 + if (ret) 145 + goto bail; 146 + 147 + dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n", 148 + uctxt->sc->sw_index, uctxt->sc->hw_context); 149 + ret = sc_enable(uctxt->sc); 150 + if (ret) 151 + goto bail; 152 + 153 + if (dd->num_msix_entries) 154 + hfi1_set_vnic_msix_info(uctxt); 155 + 156 + hfi1_stats.sps_ctxts++; 157 + dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); 158 + *vnic_ctxt = uctxt; 159 + 160 + return ret; 161 + bail: 162 + /* 163 + * hfi1_free_ctxtdata() also releases send_context 164 + * structure if uctxt->sc is not null 165 + */ 166 + dd->rcd[uctxt->ctxt] = NULL; 167 + hfi1_free_ctxtdata(dd, uctxt); 168 + dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); 169 + return ret; 170 + } 171 + 172 + static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, 173 + struct hfi1_ctxtdata *uctxt) 174 + { 175 + unsigned long flags; 176 + 177 + dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); 178 + flush_wc(); 179 + 180 + if (dd->num_msix_entries) 181 + hfi1_reset_vnic_msix_info(uctxt); 182 + 183 + spin_lock_irqsave(&dd->uctxt_lock, flags); 184 + /* 185 + * Disable receive context and interrupt available, reset all 186 + * RcvCtxtCtrl bits to default values. 187 + */ 188 + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | 189 + HFI1_RCVCTRL_TIDFLOW_DIS | 190 + HFI1_RCVCTRL_INTRAVAIL_DIS | 191 + HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 192 + HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 193 + HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); 194 + /* 195 + * VNIC contexts are allocated from user context pool. 196 + * Release them back to user context pool. 197 + * 198 + * Reset context integrity checks to default. 199 + * (writes to CSRs probably belong in chip.c) 200 + */ 201 + write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, 202 + hfi1_pkt_default_send_ctxt_mask(dd, SC_USER)); 203 + sc_disable(uctxt->sc); 204 + 205 + dd->send_contexts[uctxt->sc->sw_index].type = SC_USER; 206 + spin_unlock_irqrestore(&dd->uctxt_lock, flags); 207 + 208 + dd->rcd[uctxt->ctxt] = NULL; 209 + uctxt->event_flags = 0; 210 + 211 + hfi1_clear_tids(uctxt); 212 + hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); 213 + 214 + hfi1_stats.sps_ctxts--; 215 + hfi1_free_ctxtdata(dd, uctxt); 216 + } 217 + 65 218 void hfi1_vnic_setup(struct hfi1_devdata *dd) 66 219 { 67 220 idr_init(&dd->vnic.vesw_idr); ··· 672 519 netif_tx_disable(vinfo->netdev); 673 520 idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); 674 521 522 + /* ensure irqs see the change */ 523 + hfi1_vnic_synchronize_irq(dd); 524 + 675 525 /* remove unread skbs */ 676 526 for (i = 0; i < vinfo->num_rx_q; i++) { 677 527 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; ··· 704 548 hfi1_vnic_down(vinfo); 705 549 mutex_unlock(&vinfo->lock); 706 550 return 0; 551 + } 552 + 553 + static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd, 554 + struct hfi1_ctxtdata **vnic_ctxt) 555 + { 556 + int rc; 557 + 558 + rc = allocate_vnic_ctxt(dd, vnic_ctxt); 559 + if (rc) { 560 + dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc); 561 + return rc; 562 + } 563 + 564 + rc = setup_vnic_ctxt(dd, *vnic_ctxt); 565 + if (rc) { 566 + dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc); 567 + deallocate_vnic_ctxt(dd, *vnic_ctxt); 568 + *vnic_ctxt = NULL; 569 + } 570 + 571 + return rc; 572 + } 573 + 574 + static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) 575 + { 576 + struct hfi1_devdata *dd = vinfo->dd; 577 + int i, rc = 0; 578 + 579 + mutex_lock(&hfi1_mutex); 580 + if (!dd->vnic.num_vports) 581 + dd->vnic.msix_idx = dd->first_dyn_msix_idx; 582 + 583 + for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) { 584 + rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); 585 + if (rc) 586 + break; 587 + dd->vnic.ctxt[i]->vnic_q_idx = i; 588 + } 589 + 590 + if (i < vinfo->num_rx_q) { 591 + /* 592 + * If required amount of contexts is not 593 + * allocated successfully then remaining contexts 594 + * are released. 595 + */ 596 + while (i-- > dd->vnic.num_ctxt) { 597 + deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); 598 + dd->vnic.ctxt[i] = NULL; 599 + } 600 + goto alloc_fail; 601 + } 602 + 603 + if (dd->vnic.num_ctxt != i) { 604 + dd->vnic.num_ctxt = i; 605 + hfi1_init_vnic_rsm(dd); 606 + } 607 + 608 + dd->vnic.num_vports++; 609 + alloc_fail: 610 + mutex_unlock(&hfi1_mutex); 611 + return rc; 612 + } 613 + 614 + static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) 615 + { 616 + struct hfi1_devdata *dd = vinfo->dd; 617 + int i; 618 + 619 + mutex_lock(&hfi1_mutex); 620 + if (--dd->vnic.num_vports == 0) { 621 + for (i = 0; i < dd->vnic.num_ctxt; i++) { 622 + deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); 623 + dd->vnic.ctxt[i] = NULL; 624 + } 625 + hfi1_deinit_vnic_rsm(dd); 626 + dd->vnic.num_ctxt = 0; 627 + } 628 + mutex_unlock(&hfi1_mutex); 707 629 } 708 630 709 631 static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id) ··· 828 594 struct hfi1_vnic_vport_info *vinfo; 829 595 struct net_device *netdev; 830 596 struct rdma_netdev *rn; 831 - int i, size; 597 + int i, size, rc; 832 598 833 599 if (!port_num || (port_num > dd->num_pports)) 834 600 return ERR_PTR(-EINVAL); ··· 866 632 netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64); 867 633 } 868 634 635 + rc = hfi1_vnic_init(vinfo); 636 + if (rc) 637 + goto init_fail; 638 + 869 639 return netdev; 640 + init_fail: 641 + mutex_destroy(&vinfo->lock); 642 + free_netdev(netdev); 643 + return ERR_PTR(rc); 870 644 } 871 645 872 646 void hfi1_vnic_free_rn(struct net_device *netdev) 873 647 { 874 648 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 875 649 650 + hfi1_vnic_deinit(vinfo); 876 651 mutex_destroy(&vinfo->lock); 877 652 free_netdev(netdev); 878 653 }
+2 -1
include/rdma/opa_port_info.h
··· 1 1 /* 2 - * Copyright (c) 2014 Intel Corporation. All rights reserved. 2 + * Copyright (c) 2014-2017 Intel Corporation. All rights reserved. 3 3 * 4 4 * This software is available to you under a choice of one of two 5 5 * licenses. You may choose to be licensed under the terms of the GNU ··· 127 127 #define OPA_LINK_WIDTH_3X 0x0004 128 128 #define OPA_LINK_WIDTH_4X 0x0008 129 129 130 + #define OPA_CAP_MASK3_IsEthOnFabricSupported (1 << 13) 130 131 #define OPA_CAP_MASK3_IsSnoopSupported (1 << 7) 131 132 #define OPA_CAP_MASK3_IsAsyncSC2VLSupported (1 << 6) 132 133 #define OPA_CAP_MASK3_IsAddrRangeConfigSupported (1 << 5)