Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

crypto: inside-secure - Probe transform record cache RAM sizes

This patch actually probes the transform record cache data and
administration RAM sizes, instead of making assumptions, and then
configures the TRC based on the actually probed values.
This allows the driver to work with EIP197 HW that has TRC RAM
sizes different from those of the Marvell EIP197B/D variants.

Signed-off-by: Pascal van Leeuwen <pvanleeuwen@verimatrix.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

authored by

Pascal van Leeuwen and committed by
Herbert Xu
465527bc b2d92ac1

+203 -53
+191 -44
drivers/crypto/inside-secure/safexcel.c
··· 28 28 module_param(max_rings, uint, 0644); 29 29 MODULE_PARM_DESC(max_rings, "Maximum number of rings to use."); 30 30 31 - static void eip197_trc_cache_init(struct safexcel_crypto_priv *priv) 31 + static void eip197_trc_cache_setupvirt(struct safexcel_crypto_priv *priv) 32 32 { 33 - u32 val, htable_offset; 34 - int i, cs_rc_max, cs_ht_wc, cs_trc_rec_wc, cs_trc_lg_rec_wc; 35 - 36 - if (priv->version == EIP197D_MRVL) { 37 - cs_rc_max = EIP197D_CS_RC_MAX; 38 - cs_ht_wc = EIP197D_CS_HT_WC; 39 - cs_trc_rec_wc = EIP197D_CS_TRC_REC_WC; 40 - cs_trc_lg_rec_wc = EIP197D_CS_TRC_LG_REC_WC; 41 - } else { 42 - /* Default to minimum "safe" settings */ 43 - cs_rc_max = EIP197B_CS_RC_MAX; 44 - cs_ht_wc = EIP197B_CS_HT_WC; 45 - cs_trc_rec_wc = EIP197B_CS_TRC_REC_WC; 46 - cs_trc_lg_rec_wc = EIP197B_CS_TRC_LG_REC_WC; 47 - } 48 - 49 - /* Enable the record cache memory access */ 50 - val = readl(priv->base + EIP197_CS_RAM_CTRL); 51 - val &= ~EIP197_TRC_ENABLE_MASK; 52 - val |= EIP197_TRC_ENABLE_0; 53 - writel(val, priv->base + EIP197_CS_RAM_CTRL); 54 - 55 - /* Clear all ECC errors */ 56 - writel(0, priv->base + EIP197_TRC_ECCCTRL); 33 + int i; 57 34 58 35 /* 59 - * Make sure the cache memory is accessible by taking record cache into 60 - * reset. 36 + * Map all interfaces/rings to register index 0 37 + * so they can share contexts. Without this, the EIP197 will 38 + * assume each interface/ring to be in its own memory domain 39 + * i.e. have its own subset of UNIQUE memory addresses. 40 + * Which would cause records with the SAME memory address to 41 + * use DIFFERENT cache buffers, causing both poor cache utilization 42 + * AND serious coherence/invalidation issues. 61 43 */ 62 - val = readl(priv->base + EIP197_TRC_PARAMS); 63 - val |= EIP197_TRC_PARAMS_SW_RESET; 64 - val &= ~EIP197_TRC_PARAMS_DATA_ACCESS; 65 - writel(val, priv->base + EIP197_TRC_PARAMS); 44 + for (i = 0; i < 4; i++) 45 + writel(0, priv->base + EIP197_FLUE_IFC_LUT(i)); 66 46 67 - /* Clear all records */ 47 + /* 48 + * Initialize other virtualization regs for cache 49 + * These may not be in their reset state ... 50 + */ 51 + for (i = 0; i < priv->config.rings; i++) { 52 + writel(0, priv->base + EIP197_FLUE_CACHEBASE_LO(i)); 53 + writel(0, priv->base + EIP197_FLUE_CACHEBASE_HI(i)); 54 + writel(EIP197_FLUE_CONFIG_MAGIC, 55 + priv->base + EIP197_FLUE_CONFIG(i)); 56 + } 57 + writel(0, priv->base + EIP197_FLUE_OFFSETS); 58 + writel(0, priv->base + EIP197_FLUE_ARC4_OFFSET); 59 + } 60 + 61 + static void eip197_trc_cache_banksel(struct safexcel_crypto_priv *priv, 62 + u32 addrmid, int *actbank) 63 + { 64 + u32 val; 65 + int curbank; 66 + 67 + curbank = addrmid >> 16; 68 + if (curbank != *actbank) { 69 + val = readl(priv->base + EIP197_CS_RAM_CTRL); 70 + val = (val & ~EIP197_CS_BANKSEL_MASK) | 71 + (curbank << EIP197_CS_BANKSEL_OFS); 72 + writel(val, priv->base + EIP197_CS_RAM_CTRL); 73 + *actbank = curbank; 74 + } 75 + } 76 + 77 + static u32 eip197_trc_cache_probe(struct safexcel_crypto_priv *priv, 78 + int maxbanks, u32 probemask) 79 + { 80 + u32 val, addrhi, addrlo, addrmid; 81 + int actbank; 82 + 83 + /* 84 + * And probe the actual size of the physically attached cache data RAM 85 + * Using a binary subdivision algorithm downto 32 byte cache lines. 86 + */ 87 + addrhi = 1 << (16 + maxbanks); 88 + addrlo = 0; 89 + actbank = min(maxbanks - 1, 0); 90 + while ((addrhi - addrlo) > 32) { 91 + /* write marker to lowest address in top half */ 92 + addrmid = (addrhi + addrlo) >> 1; 93 + eip197_trc_cache_banksel(priv, addrmid, &actbank); 94 + writel((addrmid | (addrlo << 16)) & probemask, 95 + priv->base + EIP197_CLASSIFICATION_RAMS + 96 + (addrmid & 0xffff)); 97 + 98 + /* write marker to lowest address in bottom half */ 99 + eip197_trc_cache_banksel(priv, addrlo, &actbank); 100 + writel((addrlo | (addrhi << 16)) & probemask, 101 + priv->base + EIP197_CLASSIFICATION_RAMS + 102 + (addrlo & 0xffff)); 103 + 104 + /* read back marker from top half */ 105 + eip197_trc_cache_banksel(priv, addrmid, &actbank); 106 + val = readl(priv->base + EIP197_CLASSIFICATION_RAMS + 107 + (addrmid & 0xffff)); 108 + 109 + if (val == ((addrmid | (addrlo << 16)) & probemask)) { 110 + /* read back correct, continue with top half */ 111 + addrlo = addrmid; 112 + } else { 113 + /* not read back correct, continue with bottom half */ 114 + addrhi = addrmid; 115 + } 116 + } 117 + return addrhi; 118 + } 119 + 120 + static void eip197_trc_cache_clear(struct safexcel_crypto_priv *priv, 121 + int cs_rc_max, int cs_ht_wc) 122 + { 123 + int i; 124 + u32 htable_offset, val, offset; 125 + 126 + /* Clear all records in administration RAM */ 68 127 for (i = 0; i < cs_rc_max; i++) { 69 - u32 val, offset = EIP197_CLASSIFICATION_RAMS + i * EIP197_CS_RC_SIZE; 128 + offset = EIP197_CLASSIFICATION_RAMS + i * EIP197_CS_RC_SIZE; 70 129 71 130 writel(EIP197_CS_RC_NEXT(EIP197_RC_NULL) | 72 131 EIP197_CS_RC_PREV(EIP197_RC_NULL), 73 132 priv->base + offset); 74 133 75 - val = EIP197_CS_RC_NEXT(i+1) | EIP197_CS_RC_PREV(i-1); 134 + val = EIP197_CS_RC_NEXT(i + 1) | EIP197_CS_RC_PREV(i - 1); 76 135 if (i == 0) 77 136 val |= EIP197_CS_RC_PREV(EIP197_RC_NULL); 78 137 else if (i == cs_rc_max - 1) 79 138 val |= EIP197_CS_RC_NEXT(EIP197_RC_NULL); 80 - writel(val, priv->base + offset + sizeof(u32)); 139 + writel(val, priv->base + offset + 4); 140 + /* must also initialize the address key due to ECC! */ 141 + writel(0, priv->base + offset + 8); 142 + writel(0, priv->base + offset + 12); 81 143 } 82 144 83 145 /* Clear the hash table entries */ 84 146 htable_offset = cs_rc_max * EIP197_CS_RC_SIZE; 85 147 for (i = 0; i < cs_ht_wc; i++) 86 148 writel(GENMASK(29, 0), 87 - priv->base + EIP197_CLASSIFICATION_RAMS + htable_offset + i * sizeof(u32)); 149 + priv->base + EIP197_CLASSIFICATION_RAMS + 150 + htable_offset + i * sizeof(u32)); 151 + } 152 + 153 + static void eip197_trc_cache_init(struct safexcel_crypto_priv *priv) 154 + { 155 + u32 val, dsize, asize; 156 + int cs_rc_max, cs_ht_wc, cs_trc_rec_wc, cs_trc_lg_rec_wc; 157 + int cs_rc_abs_max, cs_ht_sz; 158 + int maxbanks; 159 + 160 + /* Setup (dummy) virtualization for cache */ 161 + eip197_trc_cache_setupvirt(priv); 162 + 163 + /* 164 + * Enable the record cache memory access and 165 + * probe the bank select width 166 + */ 167 + val = readl(priv->base + EIP197_CS_RAM_CTRL); 168 + val &= ~EIP197_TRC_ENABLE_MASK; 169 + val |= EIP197_TRC_ENABLE_0 | EIP197_CS_BANKSEL_MASK; 170 + writel(val, priv->base + EIP197_CS_RAM_CTRL); 171 + val = readl(priv->base + EIP197_CS_RAM_CTRL); 172 + maxbanks = ((val&EIP197_CS_BANKSEL_MASK)>>EIP197_CS_BANKSEL_OFS) + 1; 173 + 174 + /* Clear all ECC errors */ 175 + writel(0, priv->base + EIP197_TRC_ECCCTRL); 176 + 177 + /* 178 + * Make sure the cache memory is accessible by taking record cache into 179 + * reset. Need data memory access here, not admin access. 180 + */ 181 + val = readl(priv->base + EIP197_TRC_PARAMS); 182 + val |= EIP197_TRC_PARAMS_SW_RESET | EIP197_TRC_PARAMS_DATA_ACCESS; 183 + writel(val, priv->base + EIP197_TRC_PARAMS); 184 + 185 + /* Probed data RAM size in bytes */ 186 + dsize = eip197_trc_cache_probe(priv, maxbanks, 0xffffffff); 187 + 188 + /* 189 + * Now probe the administration RAM size pretty much the same way 190 + * Except that only the lower 30 bits are writable and we don't need 191 + * bank selects 192 + */ 193 + val = readl(priv->base + EIP197_TRC_PARAMS); 194 + /* admin access now */ 195 + val &= ~(EIP197_TRC_PARAMS_DATA_ACCESS | EIP197_CS_BANKSEL_MASK); 196 + writel(val, priv->base + EIP197_TRC_PARAMS); 197 + 198 + /* Probed admin RAM size in admin words */ 199 + asize = eip197_trc_cache_probe(priv, 0, 0xbfffffff) >> 4; 200 + 201 + /* Clear any ECC errors detected while probing! */ 202 + writel(0, priv->base + EIP197_TRC_ECCCTRL); 203 + 204 + /* 205 + * Determine optimal configuration from RAM sizes 206 + * Note that we assume that the physical RAM configuration is sane 207 + * Therefore, we don't do any parameter error checking here ... 208 + */ 209 + 210 + /* For now, just use a single record format covering everything */ 211 + cs_trc_rec_wc = EIP197_CS_TRC_REC_WC; 212 + cs_trc_lg_rec_wc = EIP197_CS_TRC_REC_WC; 213 + 214 + /* 215 + * Step #1: How many records will physically fit? 216 + * Hard upper limit is 1023! 217 + */ 218 + cs_rc_abs_max = min_t(uint, ((dsize >> 2) / cs_trc_lg_rec_wc), 1023); 219 + /* Step #2: Need at least 2 words in the admin RAM per record */ 220 + cs_rc_max = min_t(uint, cs_rc_abs_max, (asize >> 1)); 221 + /* Step #3: Determine log2 of hash table size */ 222 + cs_ht_sz = __fls(asize - cs_rc_max) - 2; 223 + /* Step #4: determine current size of hash table in dwords */ 224 + cs_ht_wc = 16<<cs_ht_sz; /* dwords, not admin words */ 225 + /* Step #5: add back excess words and see if we can fit more records */ 226 + cs_rc_max = min_t(uint, cs_rc_abs_max, asize - (cs_ht_wc >> 4)); 227 + 228 + /* Clear the cache RAMs */ 229 + eip197_trc_cache_clear(priv, cs_rc_max, cs_ht_wc); 88 230 89 231 /* Disable the record cache memory access */ 90 232 val = readl(priv->base + EIP197_CS_RAM_CTRL); ··· 246 104 /* Configure the record cache #2 */ 247 105 val = EIP197_TRC_PARAMS_RC_SZ_LARGE(cs_trc_lg_rec_wc) | 248 106 EIP197_TRC_PARAMS_BLK_TIMER_SPEED(1) | 249 - EIP197_TRC_PARAMS_HTABLE_SZ(2); 107 + EIP197_TRC_PARAMS_HTABLE_SZ(cs_ht_sz); 250 108 writel(val, priv->base + EIP197_TRC_PARAMS); 109 + 110 + dev_info(priv->dev, "TRC init: %dd,%da (%dr,%dh)\n", 111 + dsize, asize, cs_rc_max, cs_ht_wc + cs_ht_wc); 251 112 } 252 113 253 114 static void eip197_init_firmware(struct safexcel_crypto_priv *priv) ··· 274 129 /* clear the scratchpad RAM using 32 bit writes only */ 275 130 for (i = 0; i < EIP197_NUM_OF_SCRATCH_BLOCKS; i++) 276 131 writel(0, EIP197_PE(priv) + 277 - EIP197_PE_ICE_SCRATCH_RAM(pe) + (i<<2)); 132 + EIP197_PE_ICE_SCRATCH_RAM(pe) + (i << 2)); 278 133 279 134 /* Reset the IFPP engine to make its program mem accessible */ 280 135 writel(EIP197_PE_ICE_x_CTRL_SW_RESET | ··· 454 309 455 310 static int safexcel_hw_setup_cdesc_rings(struct safexcel_crypto_priv *priv) 456 311 { 457 - u32 hdw, cd_size_rnd, val; 312 + u32 cd_size_rnd, val; 458 313 int i, cd_fetch_cnt; 459 314 460 315 cd_size_rnd = (priv->config.cd_size + ··· 482 337 writel(EIP197_xDR_DESC_MODE_64BIT | (priv->config.cd_offset << 16) | 483 338 priv->config.cd_size, 484 339 EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_DESC_SIZE); 485 - writel(((cd_fetch_cnt * (cd_size_rnd << hdw)) << 16) | 340 + writel(((cd_fetch_cnt * 341 + (cd_size_rnd << priv->hwconfig.hwdataw)) << 16) | 486 342 (cd_fetch_cnt * priv->config.cd_offset), 487 343 EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_CFG); 488 344 ··· 502 356 503 357 static int safexcel_hw_setup_rdesc_rings(struct safexcel_crypto_priv *priv) 504 358 { 505 - u32 hdw, rd_size_rnd, val; 359 + u32 rd_size_rnd, val; 506 360 int i, rd_fetch_cnt; 507 361 508 362 /* determine number of RD's we can fetch into the FIFO as one block */ 509 363 rd_size_rnd = (EIP197_RD64_FETCH_SIZE + 510 - BIT(priv->hwconfig.hwdataw) - 1) >> 364 + (BIT(priv->hwconfig.hwdataw) - 1)) >> 511 365 priv->hwconfig.hwdataw; 512 366 if (priv->flags & SAFEXCEL_HW_EIP197) { 513 367 /* EIP197: try to fetch enough in 1 go to keep all pipes busy */ ··· 517 371 } else { 518 372 /* for the EIP97, just fetch all that fits minus 1 */ 519 373 rd_fetch_cnt = ((1 << priv->hwconfig.hwrfsize) / 520 - rd_size_rnd) - 1; 374 + rd_size_rnd) - 1; 521 375 } 522 376 523 377 for (i = 0; i < priv->config.rings; i++) { ··· 531 385 priv->config.rd_size, 532 386 EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_DESC_SIZE); 533 387 534 - writel(((rd_fetch_cnt * (rd_size_rnd << hdw)) << 16) | 388 + writel(((rd_fetch_cnt * 389 + (rd_size_rnd << priv->hwconfig.hwdataw)) << 16) | 535 390 (rd_fetch_cnt * priv->config.rd_offset), 536 391 EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_CFG); 537 392
+12 -9
drivers/crypto/inside-secure/safexcel.h
··· 178 178 #define EIP197_TRC_ECCADMINSTAT 0xf0838 179 179 #define EIP197_TRC_ECCDATASTAT 0xf083c 180 180 #define EIP197_TRC_ECCDATA 0xf0840 181 + #define EIP197_FLUE_CACHEBASE_LO(n) (0xf6000 + (32 * (n))) 182 + #define EIP197_FLUE_CACHEBASE_HI(n) (0xf6004 + (32 * (n))) 183 + #define EIP197_FLUE_CONFIG(n) (0xf6010 + (32 * (n))) 184 + #define EIP197_FLUE_OFFSETS 0xf6808 185 + #define EIP197_FLUE_ARC4_OFFSET 0xf680c 186 + #define EIP197_FLUE_IFC_LUT(n) (0xf6820 + (4 * (n))) 181 187 #define EIP197_CS_RAM_CTRL 0xf7ff0 182 188 183 189 /* EIP197_HIA_xDR_DESC_SIZE */ ··· 327 321 #define EIP197_ADDRESS_MODE BIT(8) 328 322 #define EIP197_CONTROL_MODE BIT(9) 329 323 324 + /* EIP197_FLUE_CONFIG */ 325 + #define EIP197_FLUE_CONFIG_MAGIC 0xc7000004 326 + 330 327 /* Context Control */ 331 328 struct safexcel_context_record { 332 329 u32 control0; ··· 404 395 #define EIP197_TRC_ENABLE_1 BIT(5) 405 396 #define EIP197_TRC_ENABLE_2 BIT(6) 406 397 #define EIP197_TRC_ENABLE_MASK GENMASK(6, 4) 398 + #define EIP197_CS_BANKSEL_MASK GENMASK(14, 12) 399 + #define EIP197_CS_BANKSEL_OFS 12 407 400 408 401 /* EIP197_TRC_PARAMS */ 409 402 #define EIP197_TRC_PARAMS_SW_RESET BIT(0) ··· 423 412 #define EIP197_TRC_PARAMS2_RC_SZ_SMALL(n) ((n) << 18) 424 413 425 414 /* Cache helpers */ 426 - #define EIP197B_CS_RC_MAX 52 427 - #define EIP197D_CS_RC_MAX 96 415 + #define EIP197_CS_TRC_REC_WC 64 428 416 #define EIP197_CS_RC_SIZE (4 * sizeof(u32)) 429 417 #define EIP197_CS_RC_NEXT(x) (x) 430 418 #define EIP197_CS_RC_PREV(x) ((x) << 10) 431 419 #define EIP197_RC_NULL 0x3ff 432 - #define EIP197B_CS_TRC_REC_WC 59 433 - #define EIP197D_CS_TRC_REC_WC 64 434 - #define EIP197B_CS_TRC_LG_REC_WC 73 435 - #define EIP197D_CS_TRC_LG_REC_WC 80 436 - #define EIP197B_CS_HT_WC 64 437 - #define EIP197D_CS_HT_WC 256 438 - 439 420 440 421 /* Result data */ 441 422 struct result_data_desc {