Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

IB/mlx5: Support IB_WR_REG_SIG_MR

This patch implements IB_WR_REG_SIG_MR posted by the user.

Baisically this WR involves 3 WQEs in order to prepare and properly
register the signature layout:

1. post UMR WR to register the sig_mr in one of two possible ways:
* In case the user registered a single MR for data so the UMR data segment
consists of:
- single klm (data MR) passed by the user
- BSF with signature attributes requested by the user.
* In case the user registered 2 MRs, one for data and one for protection,
the UMR consists of:
- strided block format which includes data and protection MRs and
their repetitive block format.
- BSF with signature attributes requested by the user.

2. post SET_PSV in order to set the memory domain initial
signature parameters passed by the user.
SET_PSV is not signaled and solicited CQE.

3. post SET_PSV in order to set the wire domain initial
signature parameters passed by the user.
SET_PSV is not signaled and solicited CQE.

* After this compound WR we place a small fence for next WR to come.

This patch also introduces some helper functions to set the BSF correctly
and determining the signature format selectors.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>

authored by

Sagi Grimberg and committed by
Roland Dreier
e6631814 3bcdb17a

+483
+422
drivers/infiniband/hw/mlx5/qp.c
··· 1777 1777 return cpu_to_be64(result); 1778 1778 } 1779 1779 1780 + static __be64 sig_mkey_mask(void) 1781 + { 1782 + u64 result; 1783 + 1784 + result = MLX5_MKEY_MASK_LEN | 1785 + MLX5_MKEY_MASK_PAGE_SIZE | 1786 + MLX5_MKEY_MASK_START_ADDR | 1787 + MLX5_MKEY_MASK_EN_RINVAL | 1788 + MLX5_MKEY_MASK_KEY | 1789 + MLX5_MKEY_MASK_LR | 1790 + MLX5_MKEY_MASK_LW | 1791 + MLX5_MKEY_MASK_RR | 1792 + MLX5_MKEY_MASK_RW | 1793 + MLX5_MKEY_MASK_SMALL_FENCE | 1794 + MLX5_MKEY_MASK_FREE | 1795 + MLX5_MKEY_MASK_BSF_EN; 1796 + 1797 + return cpu_to_be64(result); 1798 + } 1799 + 1780 1800 static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, 1781 1801 struct ib_send_wr *wr, int li) 1782 1802 { ··· 1981 1961 return 0; 1982 1962 } 1983 1963 1964 + static u16 prot_field_size(enum ib_signature_type type) 1965 + { 1966 + switch (type) { 1967 + case IB_SIG_TYPE_T10_DIF: 1968 + return MLX5_DIF_SIZE; 1969 + default: 1970 + return 0; 1971 + } 1972 + } 1973 + 1974 + static u8 bs_selector(int block_size) 1975 + { 1976 + switch (block_size) { 1977 + case 512: return 0x1; 1978 + case 520: return 0x2; 1979 + case 4096: return 0x3; 1980 + case 4160: return 0x4; 1981 + case 1073741824: return 0x5; 1982 + default: return 0; 1983 + } 1984 + } 1985 + 1986 + static int format_selector(struct ib_sig_attrs *attr, 1987 + struct ib_sig_domain *domain, 1988 + int *selector) 1989 + { 1990 + 1991 + #define FORMAT_DIF_NONE 0 1992 + #define FORMAT_DIF_CRC_INC 8 1993 + #define FORMAT_DIF_CRC_NO_INC 12 1994 + #define FORMAT_DIF_CSUM_INC 13 1995 + #define FORMAT_DIF_CSUM_NO_INC 14 1996 + 1997 + switch (domain->sig.dif.type) { 1998 + case IB_T10DIF_NONE: 1999 + /* No DIF */ 2000 + *selector = FORMAT_DIF_NONE; 2001 + break; 2002 + case IB_T10DIF_TYPE1: /* Fall through */ 2003 + case IB_T10DIF_TYPE2: 2004 + switch (domain->sig.dif.bg_type) { 2005 + case IB_T10DIF_CRC: 2006 + *selector = FORMAT_DIF_CRC_INC; 2007 + break; 2008 + case IB_T10DIF_CSUM: 2009 + *selector = FORMAT_DIF_CSUM_INC; 2010 + break; 2011 + default: 2012 + return 1; 2013 + } 2014 + break; 2015 + case IB_T10DIF_TYPE3: 2016 + switch (domain->sig.dif.bg_type) { 2017 + case IB_T10DIF_CRC: 2018 + *selector = domain->sig.dif.type3_inc_reftag ? 2019 + FORMAT_DIF_CRC_INC : 2020 + FORMAT_DIF_CRC_NO_INC; 2021 + break; 2022 + case IB_T10DIF_CSUM: 2023 + *selector = domain->sig.dif.type3_inc_reftag ? 2024 + FORMAT_DIF_CSUM_INC : 2025 + FORMAT_DIF_CSUM_NO_INC; 2026 + break; 2027 + default: 2028 + return 1; 2029 + } 2030 + break; 2031 + default: 2032 + return 1; 2033 + } 2034 + 2035 + return 0; 2036 + } 2037 + 2038 + static int mlx5_set_bsf(struct ib_mr *sig_mr, 2039 + struct ib_sig_attrs *sig_attrs, 2040 + struct mlx5_bsf *bsf, u32 data_size) 2041 + { 2042 + struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; 2043 + struct mlx5_bsf_basic *basic = &bsf->basic; 2044 + struct ib_sig_domain *mem = &sig_attrs->mem; 2045 + struct ib_sig_domain *wire = &sig_attrs->wire; 2046 + int ret, selector; 2047 + 2048 + switch (sig_attrs->mem.sig_type) { 2049 + case IB_SIG_TYPE_T10_DIF: 2050 + if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) 2051 + return -EINVAL; 2052 + 2053 + /* Input domain check byte mask */ 2054 + basic->check_byte_mask = sig_attrs->check_mask; 2055 + if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && 2056 + mem->sig.dif.type == wire->sig.dif.type) { 2057 + /* Same block structure */ 2058 + basic->bsf_size_sbs = 1 << 4; 2059 + if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) 2060 + basic->wire.copy_byte_mask = 0xff; 2061 + else 2062 + basic->wire.copy_byte_mask = 0x3f; 2063 + } else 2064 + basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); 2065 + 2066 + basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); 2067 + basic->raw_data_size = cpu_to_be32(data_size); 2068 + 2069 + ret = format_selector(sig_attrs, mem, &selector); 2070 + if (ret) 2071 + return -EINVAL; 2072 + basic->m_bfs_psv = cpu_to_be32(selector << 24 | 2073 + msig->psv_memory.psv_idx); 2074 + 2075 + ret = format_selector(sig_attrs, wire, &selector); 2076 + if (ret) 2077 + return -EINVAL; 2078 + basic->w_bfs_psv = cpu_to_be32(selector << 24 | 2079 + msig->psv_wire.psv_idx); 2080 + break; 2081 + 2082 + default: 2083 + return -EINVAL; 2084 + } 2085 + 2086 + return 0; 2087 + } 2088 + 2089 + static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, 2090 + void **seg, int *size) 2091 + { 2092 + struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs; 2093 + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; 2094 + struct mlx5_bsf *bsf; 2095 + u32 data_len = wr->sg_list->length; 2096 + u32 data_key = wr->sg_list->lkey; 2097 + u64 data_va = wr->sg_list->addr; 2098 + int ret; 2099 + int wqe_size; 2100 + 2101 + if (!wr->wr.sig_handover.prot) { 2102 + /** 2103 + * Source domain doesn't contain signature information 2104 + * So need construct: 2105 + * ------------------ 2106 + * | data_klm | 2107 + * ------------------ 2108 + * | BSF | 2109 + * ------------------ 2110 + **/ 2111 + struct mlx5_klm *data_klm = *seg; 2112 + 2113 + data_klm->bcount = cpu_to_be32(data_len); 2114 + data_klm->key = cpu_to_be32(data_key); 2115 + data_klm->va = cpu_to_be64(data_va); 2116 + wqe_size = ALIGN(sizeof(*data_klm), 64); 2117 + } else { 2118 + /** 2119 + * Source domain contains signature information 2120 + * So need construct a strided block format: 2121 + * --------------------------- 2122 + * | stride_block_ctrl | 2123 + * --------------------------- 2124 + * | data_klm | 2125 + * --------------------------- 2126 + * | prot_klm | 2127 + * --------------------------- 2128 + * | BSF | 2129 + * --------------------------- 2130 + **/ 2131 + struct mlx5_stride_block_ctrl_seg *sblock_ctrl; 2132 + struct mlx5_stride_block_entry *data_sentry; 2133 + struct mlx5_stride_block_entry *prot_sentry; 2134 + u32 prot_key = wr->wr.sig_handover.prot->lkey; 2135 + u64 prot_va = wr->wr.sig_handover.prot->addr; 2136 + u16 block_size = sig_attrs->mem.sig.dif.pi_interval; 2137 + int prot_size; 2138 + 2139 + sblock_ctrl = *seg; 2140 + data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); 2141 + prot_sentry = (void *)data_sentry + sizeof(*data_sentry); 2142 + 2143 + prot_size = prot_field_size(sig_attrs->mem.sig_type); 2144 + if (!prot_size) { 2145 + pr_err("Bad block size given: %u\n", block_size); 2146 + return -EINVAL; 2147 + } 2148 + sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + 2149 + prot_size); 2150 + sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); 2151 + sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); 2152 + sblock_ctrl->num_entries = cpu_to_be16(2); 2153 + 2154 + data_sentry->bcount = cpu_to_be16(block_size); 2155 + data_sentry->key = cpu_to_be32(data_key); 2156 + data_sentry->va = cpu_to_be64(data_va); 2157 + prot_sentry->bcount = cpu_to_be16(prot_size); 2158 + prot_sentry->key = cpu_to_be32(prot_key); 2159 + 2160 + if (prot_key == data_key && prot_va == data_va) { 2161 + /** 2162 + * The data and protection are interleaved 2163 + * in a single memory region 2164 + **/ 2165 + prot_sentry->va = cpu_to_be64(data_va + block_size); 2166 + prot_sentry->stride = cpu_to_be16(block_size + prot_size); 2167 + data_sentry->stride = prot_sentry->stride; 2168 + } else { 2169 + /* The data and protection are two different buffers */ 2170 + prot_sentry->va = cpu_to_be64(prot_va); 2171 + data_sentry->stride = cpu_to_be16(block_size); 2172 + prot_sentry->stride = cpu_to_be16(prot_size); 2173 + } 2174 + wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + 2175 + sizeof(*prot_sentry), 64); 2176 + } 2177 + 2178 + *seg += wqe_size; 2179 + *size += wqe_size / 16; 2180 + if (unlikely((*seg == qp->sq.qend))) 2181 + *seg = mlx5_get_send_wqe(qp, 0); 2182 + 2183 + bsf = *seg; 2184 + ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); 2185 + if (ret) 2186 + return -EINVAL; 2187 + 2188 + *seg += sizeof(*bsf); 2189 + *size += sizeof(*bsf) / 16; 2190 + if (unlikely((*seg == qp->sq.qend))) 2191 + *seg = mlx5_get_send_wqe(qp, 0); 2192 + 2193 + return 0; 2194 + } 2195 + 2196 + static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, 2197 + struct ib_send_wr *wr, u32 nelements, 2198 + u32 length, u32 pdn) 2199 + { 2200 + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; 2201 + u32 sig_key = sig_mr->rkey; 2202 + 2203 + memset(seg, 0, sizeof(*seg)); 2204 + 2205 + seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | 2206 + MLX5_ACCESS_MODE_KLM; 2207 + seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); 2208 + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | 2209 + MLX5_MKEY_BSF_EN | pdn); 2210 + seg->len = cpu_to_be64(length); 2211 + seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements))); 2212 + seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); 2213 + } 2214 + 2215 + static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, 2216 + struct ib_send_wr *wr, u32 nelements) 2217 + { 2218 + memset(umr, 0, sizeof(*umr)); 2219 + 2220 + umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; 2221 + umr->klm_octowords = get_klm_octo(nelements); 2222 + umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); 2223 + umr->mkey_mask = sig_mkey_mask(); 2224 + } 2225 + 2226 + 2227 + static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, 2228 + void **seg, int *size) 2229 + { 2230 + struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr); 2231 + u32 pdn = get_pd(qp)->pdn; 2232 + u32 klm_oct_size; 2233 + int region_len, ret; 2234 + 2235 + if (unlikely(wr->num_sge != 1) || 2236 + unlikely(wr->wr.sig_handover.access_flags & 2237 + IB_ACCESS_REMOTE_ATOMIC) || 2238 + unlikely(!sig_mr->sig) || unlikely(!qp->signature_en)) 2239 + return -EINVAL; 2240 + 2241 + /* length of the protected region, data + protection */ 2242 + region_len = wr->sg_list->length; 2243 + if (wr->wr.sig_handover.prot) 2244 + region_len += wr->wr.sig_handover.prot->length; 2245 + 2246 + /** 2247 + * KLM octoword size - if protection was provided 2248 + * then we use strided block format (3 octowords), 2249 + * else we use single KLM (1 octoword) 2250 + **/ 2251 + klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1; 2252 + 2253 + set_sig_umr_segment(*seg, wr, klm_oct_size); 2254 + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); 2255 + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; 2256 + if (unlikely((*seg == qp->sq.qend))) 2257 + *seg = mlx5_get_send_wqe(qp, 0); 2258 + 2259 + set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn); 2260 + *seg += sizeof(struct mlx5_mkey_seg); 2261 + *size += sizeof(struct mlx5_mkey_seg) / 16; 2262 + if (unlikely((*seg == qp->sq.qend))) 2263 + *seg = mlx5_get_send_wqe(qp, 0); 2264 + 2265 + ret = set_sig_data_segment(wr, qp, seg, size); 2266 + if (ret) 2267 + return ret; 2268 + 2269 + return 0; 2270 + } 2271 + 2272 + static int set_psv_wr(struct ib_sig_domain *domain, 2273 + u32 psv_idx, void **seg, int *size) 2274 + { 2275 + struct mlx5_seg_set_psv *psv_seg = *seg; 2276 + 2277 + memset(psv_seg, 0, sizeof(*psv_seg)); 2278 + psv_seg->psv_num = cpu_to_be32(psv_idx); 2279 + switch (domain->sig_type) { 2280 + case IB_SIG_TYPE_T10_DIF: 2281 + psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | 2282 + domain->sig.dif.app_tag); 2283 + psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); 2284 + 2285 + *seg += sizeof(*psv_seg); 2286 + *size += sizeof(*psv_seg) / 16; 2287 + break; 2288 + 2289 + default: 2290 + pr_err("Bad signature type given.\n"); 2291 + return 1; 2292 + } 2293 + 2294 + return 0; 2295 + } 2296 + 1984 2297 static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, 1985 2298 struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) 1986 2299 { ··· 2461 2108 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 2462 2109 struct mlx5_core_dev *mdev = &dev->mdev; 2463 2110 struct mlx5_ib_qp *qp = to_mqp(ibqp); 2111 + struct mlx5_ib_mr *mr; 2464 2112 struct mlx5_wqe_data_seg *dpseg; 2465 2113 struct mlx5_wqe_xrc_seg *xrc; 2466 2114 struct mlx5_bf *bf = qp->bf; ··· 2557 2203 num_sge = 0; 2558 2204 break; 2559 2205 2206 + case IB_WR_REG_SIG_MR: 2207 + qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; 2208 + mr = to_mmr(wr->wr.sig_handover.sig_mr); 2209 + 2210 + ctrl->imm = cpu_to_be32(mr->ibmr.rkey); 2211 + err = set_sig_umr_wr(wr, qp, &seg, &size); 2212 + if (err) { 2213 + mlx5_ib_warn(dev, "\n"); 2214 + *bad_wr = wr; 2215 + goto out; 2216 + } 2217 + 2218 + finish_wqe(qp, ctrl, size, idx, wr->wr_id, 2219 + nreq, get_fence(fence, wr), 2220 + next_fence, MLX5_OPCODE_UMR); 2221 + /* 2222 + * SET_PSV WQEs are not signaled and solicited 2223 + * on error 2224 + */ 2225 + wr->send_flags &= ~IB_SEND_SIGNALED; 2226 + wr->send_flags |= IB_SEND_SOLICITED; 2227 + err = begin_wqe(qp, &seg, &ctrl, wr, 2228 + &idx, &size, nreq); 2229 + if (err) { 2230 + mlx5_ib_warn(dev, "\n"); 2231 + err = -ENOMEM; 2232 + *bad_wr = wr; 2233 + goto out; 2234 + } 2235 + 2236 + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem, 2237 + mr->sig->psv_memory.psv_idx, &seg, 2238 + &size); 2239 + if (err) { 2240 + mlx5_ib_warn(dev, "\n"); 2241 + *bad_wr = wr; 2242 + goto out; 2243 + } 2244 + 2245 + finish_wqe(qp, ctrl, size, idx, wr->wr_id, 2246 + nreq, get_fence(fence, wr), 2247 + next_fence, MLX5_OPCODE_SET_PSV); 2248 + err = begin_wqe(qp, &seg, &ctrl, wr, 2249 + &idx, &size, nreq); 2250 + if (err) { 2251 + mlx5_ib_warn(dev, "\n"); 2252 + err = -ENOMEM; 2253 + *bad_wr = wr; 2254 + goto out; 2255 + } 2256 + 2257 + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; 2258 + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire, 2259 + mr->sig->psv_wire.psv_idx, &seg, 2260 + &size); 2261 + if (err) { 2262 + mlx5_ib_warn(dev, "\n"); 2263 + *bad_wr = wr; 2264 + goto out; 2265 + } 2266 + 2267 + finish_wqe(qp, ctrl, size, idx, wr->wr_id, 2268 + nreq, get_fence(fence, wr), 2269 + next_fence, MLX5_OPCODE_SET_PSV); 2270 + num_sge = 0; 2271 + goto skip_psv; 2272 + 2560 2273 default: 2561 2274 break; 2562 2275 } ··· 2707 2286 finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, 2708 2287 get_fence(fence, wr), next_fence, 2709 2288 mlx5_ib_opcode[wr->opcode]); 2289 + skip_psv: 2710 2290 if (0) 2711 2291 dump_wqe(qp, idx, size); 2712 2292 }
+61
include/linux/mlx5/qp.h
··· 38 38 39 39 #define MLX5_INVALID_LKEY 0x100 40 40 #define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 5) 41 + #define MLX5_DIF_SIZE 8 42 + #define MLX5_STRIDE_BLOCK_OP 0x400 41 43 42 44 enum mlx5_qp_optpar { 43 45 MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, ··· 152 150 enum { 153 151 MLX5_RCV_DBR = 0, 154 152 MLX5_SND_DBR = 1, 153 + }; 154 + 155 + enum { 156 + MLX5_FLAGS_INLINE = 1<<7, 157 + MLX5_FLAGS_CHECK_FREE = 1<<5, 155 158 }; 156 159 157 160 struct mlx5_wqe_fmr_seg { ··· 284 277 285 278 struct mlx5_wqe_inline_seg { 286 279 __be32 byte_count; 280 + }; 281 + 282 + struct mlx5_bsf { 283 + struct mlx5_bsf_basic { 284 + u8 bsf_size_sbs; 285 + u8 check_byte_mask; 286 + union { 287 + u8 copy_byte_mask; 288 + u8 bs_selector; 289 + u8 rsvd_wflags; 290 + } wire; 291 + union { 292 + u8 bs_selector; 293 + u8 rsvd_mflags; 294 + } mem; 295 + __be32 raw_data_size; 296 + __be32 w_bfs_psv; 297 + __be32 m_bfs_psv; 298 + } basic; 299 + struct mlx5_bsf_ext { 300 + __be32 t_init_gen_pro_size; 301 + __be32 rsvd_epi_size; 302 + __be32 w_tfs_psv; 303 + __be32 m_tfs_psv; 304 + } ext; 305 + struct mlx5_bsf_inl { 306 + __be32 w_inl_vld; 307 + __be32 w_rsvd; 308 + __be64 w_block_format; 309 + __be32 m_inl_vld; 310 + __be32 m_rsvd; 311 + __be64 m_block_format; 312 + } inl; 313 + }; 314 + 315 + struct mlx5_klm { 316 + __be32 bcount; 317 + __be32 key; 318 + __be64 va; 319 + }; 320 + 321 + struct mlx5_stride_block_entry { 322 + __be16 stride; 323 + __be16 bcount; 324 + __be32 key; 325 + __be64 va; 326 + }; 327 + 328 + struct mlx5_stride_block_ctrl_seg { 329 + __be32 bcount_per_cycle; 330 + __be32 op; 331 + __be32 repeat_count; 332 + u16 rsvd; 333 + __be16 num_entries; 287 334 }; 288 335 289 336 struct mlx5_core_qp {