Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'v6.3-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto update from Herbert Xu:
"API:
- Use kmap_local instead of kmap_atomic
- Change request callback to take void pointer
- Print FIPS status in /proc/crypto (when enabled)

Algorithms:
- Add rfc4106/gcm support on arm64
- Add ARIA AVX2/512 support on x86

Drivers:
- Add TRNG driver for StarFive SoC
- Delete ux500/hash driver (subsumed by stm32/hash)
- Add zlib support in qat
- Add RSA support in aspeed"

* tag 'v6.3-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (156 commits)
crypto: x86/aria-avx - Do not use avx2 instructions
crypto: aspeed - Fix modular aspeed-acry
crypto: hisilicon/qm - fix coding style issues
crypto: hisilicon/qm - update comments to match function
crypto: hisilicon/qm - change function names
crypto: hisilicon/qm - use min() instead of min_t()
crypto: hisilicon/qm - remove some unused defines
crypto: proc - Print fips status
crypto: crypto4xx - Call dma_unmap_page when done
crypto: octeontx2 - Fix objects shared between several modules
crypto: nx - Fix sparse warnings
crypto: ecc - Silence sparse warning
tls: Pass rec instead of aead_req into tls_encrypt_done
crypto: api - Remove completion function scaffolding
tls: Remove completion function scaffolding
tipc: Remove completion function scaffolding
net: ipv6: Remove completion function scaffolding
net: ipv4: Remove completion function scaffolding
net: macsec: Remove completion function scaffolding
dm: Remove completion function scaffolding
...

+6382 -4056
+2 -2
Documentation/ABI/testing/sysfs-driver-qat
··· 1 1 What: /sys/bus/pci/devices/<BDF>/qat/state 2 2 Date: June 2022 3 - KernelVersion: 5.20 3 + KernelVersion: 6.0 4 4 Contact: qat-linux@intel.com 5 5 Description: (RW) Reports the current state of the QAT device. Write to 6 6 the file to start or stop the device. ··· 18 18 19 19 What: /sys/bus/pci/devices/<BDF>/qat/cfg_services 20 20 Date: June 2022 21 - KernelVersion: 5.20 21 + KernelVersion: 6.0 22 22 Contact: qat-linux@intel.com 23 23 Description: (RW) Reports the current configuration of the QAT device. 24 24 Write to the file to change the configured services.
+37
Documentation/devicetree/bindings/bus/aspeed,ast2600-ahbc.yaml
··· 1 + # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + %YAML 1.2 3 + --- 4 + $id: http://devicetree.org/schemas/bus/aspeed,ast2600-ahbc.yaml# 5 + $schema: http://devicetree.org/meta-schemas/core.yaml# 6 + 7 + title: ASPEED Advanced High-Performance Bus Controller (AHBC) 8 + 9 + maintainers: 10 + - Neal Liu <neal_liu@aspeedtech.com> 11 + - Chia-Wei Wang <chiawei_wang@aspeedtech.com> 12 + 13 + description: | 14 + Advanced High-performance Bus Controller (AHBC) supports plenty of mechanisms 15 + including a priority arbiter, an address decoder and a data multiplexer 16 + to control the overall operations of Advanced High-performance Bus (AHB). 17 + 18 + properties: 19 + compatible: 20 + enum: 21 + - aspeed,ast2600-ahbc 22 + 23 + reg: 24 + maxItems: 1 25 + 26 + required: 27 + - compatible 28 + - reg 29 + 30 + additionalProperties: false 31 + 32 + examples: 33 + - | 34 + ahbc@1e600000 { 35 + compatible = "aspeed,ast2600-ahbc"; 36 + reg = <0x1e600000 0x100>; 37 + };
+25 -8
Documentation/devicetree/bindings/crypto/allwinner,sun8i-ce.yaml
··· 14 14 enum: 15 15 - allwinner,sun8i-h3-crypto 16 16 - allwinner,sun8i-r40-crypto 17 + - allwinner,sun20i-d1-crypto 17 18 - allwinner,sun50i-a64-crypto 18 19 - allwinner,sun50i-h5-crypto 19 20 - allwinner,sun50i-h6-crypto ··· 30 29 - description: Bus clock 31 30 - description: Module clock 32 31 - description: MBus clock 32 + - description: TRNG clock (RC oscillator) 33 33 minItems: 2 34 34 35 35 clock-names: ··· 38 36 - const: bus 39 37 - const: mod 40 38 - const: ram 39 + - const: trng 41 40 minItems: 2 42 41 43 42 resets: ··· 47 44 if: 48 45 properties: 49 46 compatible: 50 - const: allwinner,sun50i-h6-crypto 47 + enum: 48 + - allwinner,sun20i-d1-crypto 51 49 then: 52 50 properties: 53 51 clocks: 54 - minItems: 3 52 + minItems: 4 55 53 clock-names: 56 - minItems: 3 54 + minItems: 4 57 55 else: 58 - properties: 59 - clocks: 60 - maxItems: 2 61 - clock-names: 62 - maxItems: 2 56 + if: 57 + properties: 58 + compatible: 59 + const: allwinner,sun50i-h6-crypto 60 + then: 61 + properties: 62 + clocks: 63 + minItems: 3 64 + maxItems: 3 65 + clock-names: 66 + minItems: 3 67 + maxItems: 3 68 + else: 69 + properties: 70 + clocks: 71 + maxItems: 2 72 + clock-names: 73 + maxItems: 2 63 74 64 75 required: 65 76 - compatible
+49
Documentation/devicetree/bindings/crypto/aspeed,ast2600-acry.yaml
··· 1 + # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + %YAML 1.2 3 + --- 4 + $id: http://devicetree.org/schemas/crypto/aspeed,ast2600-acry.yaml# 5 + $schema: http://devicetree.org/meta-schemas/core.yaml# 6 + 7 + title: ASPEED ACRY ECDSA/RSA Hardware Accelerator Engines 8 + 9 + maintainers: 10 + - Neal Liu <neal_liu@aspeedtech.com> 11 + 12 + description: 13 + The ACRY ECDSA/RSA engines is designed to accelerate the throughput 14 + of ECDSA/RSA signature and verification. Basically, ACRY can be 15 + divided into two independent engines - ECC Engine and RSA Engine. 16 + 17 + properties: 18 + compatible: 19 + enum: 20 + - aspeed,ast2600-acry 21 + 22 + reg: 23 + items: 24 + - description: acry base address & size 25 + - description: acry sram base address & size 26 + 27 + clocks: 28 + maxItems: 1 29 + 30 + interrupts: 31 + maxItems: 1 32 + 33 + required: 34 + - compatible 35 + - reg 36 + - clocks 37 + - interrupts 38 + 39 + additionalProperties: false 40 + 41 + examples: 42 + - | 43 + #include <dt-bindings/clock/ast2600-clock.h> 44 + acry: crypto@1e6fa000 { 45 + compatible = "aspeed,ast2600-acry"; 46 + reg = <0x1e6fa000 0x400>, <0x1e710000 0x1800>; 47 + interrupts = <160>; 48 + clocks = <&syscon ASPEED_CLK_GATE_RSACLK>; 49 + };
+22 -1
Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
··· 6 6 7 7 title: STMicroelectronics STM32 HASH 8 8 9 + description: The STM32 HASH block is built on the HASH block found in 10 + the STn8820 SoC introduced in 2007, and subsequently used in the U8500 11 + SoC in 2010. 12 + 9 13 maintainers: 10 14 - Lionel Debieve <lionel.debieve@foss.st.com> 11 15 12 16 properties: 13 17 compatible: 14 18 enum: 19 + - st,stn8820-hash 20 + - stericsson,ux500-hash 15 21 - st,stm32f456-hash 16 22 - st,stm32f756-hash 17 23 ··· 47 41 maximum: 2 48 42 default: 0 49 43 44 + power-domains: 45 + maxItems: 1 46 + 50 47 required: 51 48 - compatible 52 49 - reg 53 50 - clocks 54 - - interrupts 51 + 52 + allOf: 53 + - if: 54 + properties: 55 + compatible: 56 + items: 57 + const: stericsson,ux500-hash 58 + then: 59 + properties: 60 + interrupts: false 61 + else: 62 + required: 63 + - interrupts 55 64 56 65 additionalProperties: false 57 66
+55
Documentation/devicetree/bindings/rng/starfive,jh7110-trng.yaml
··· 1 + # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + %YAML 1.2 3 + --- 4 + $id: http://devicetree.org/schemas/rng/starfive,jh7110-trng.yaml# 5 + $schema: http://devicetree.org/meta-schemas/core.yaml# 6 + 7 + title: StarFive SoC TRNG Module 8 + 9 + maintainers: 10 + - Jia Jie Ho <jiajie.ho@starfivetech.com> 11 + 12 + properties: 13 + compatible: 14 + const: starfive,jh7110-trng 15 + 16 + reg: 17 + maxItems: 1 18 + 19 + clocks: 20 + items: 21 + - description: Hardware reference clock 22 + - description: AHB reference clock 23 + 24 + clock-names: 25 + items: 26 + - const: hclk 27 + - const: ahb 28 + 29 + resets: 30 + maxItems: 1 31 + 32 + interrupts: 33 + maxItems: 1 34 + 35 + required: 36 + - compatible 37 + - reg 38 + - clocks 39 + - clock-names 40 + - resets 41 + - interrupts 42 + 43 + additionalProperties: false 44 + 45 + examples: 46 + - | 47 + rng: rng@1600C000 { 48 + compatible = "starfive,jh7110-trng"; 49 + reg = <0x1600C000 0x4000>; 50 + clocks = <&clk 15>, <&clk 16>; 51 + clock-names = "hclk", "ahb"; 52 + resets = <&reset 3>; 53 + interrupts = <30>; 54 + }; 55 + ...
+7 -1
MAINTAINERS
··· 3149 3149 M: Neal Liu <neal_liu@aspeedtech.com> 3150 3150 L: linux-aspeed@lists.ozlabs.org (moderated for non-subscribers) 3151 3151 S: Maintained 3152 - F: Documentation/devicetree/bindings/crypto/aspeed,ast2500-hace.yaml 3152 + F: Documentation/devicetree/bindings/crypto/aspeed,* 3153 3153 F: drivers/crypto/aspeed/ 3154 3154 3155 3155 ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS ··· 19768 19768 F: Documentation/devicetree/bindings/reset/starfive,jh7100-reset.yaml 19769 19769 F: drivers/reset/reset-starfive-jh7100.c 19770 19770 F: include/dt-bindings/reset/starfive-jh7100.h 19771 + 19772 + STARFIVE TRNG DRIVER 19773 + M: Jia Jie Ho <jiajie.ho@starfivetech.com> 19774 + S: Supported 19775 + F: Documentation/devicetree/bindings/rng/starfive* 19776 + F: drivers/char/hw_random/jh7110-trng.c 19771 19777 19772 19778 STATIC BRANCH/CALL 19773 19779 M: Peter Zijlstra <peterz@infradead.org>
+13
arch/arm/boot/dts/aspeed-g6.dtsi
··· 98 98 <0x40466000 0x2000>; 99 99 }; 100 100 101 + ahbc: bus@1e600000 { 102 + compatible = "aspeed,ast2600-ahbc", "syscon"; 103 + reg = <0x1e600000 0x100>; 104 + }; 105 + 101 106 fmc: spi@1e620000 { 102 107 reg = <0x1e620000 0xc4>, <0x20000000 0x10000000>; 103 108 #address-cells = <1>; ··· 434 429 sbc: secure-boot-controller@1e6f2000 { 435 430 compatible = "aspeed,ast2600-sbc"; 436 431 reg = <0x1e6f2000 0x1000>; 432 + }; 433 + 434 + acry: crypto@1e6fa000 { 435 + compatible = "aspeed,ast2600-acry"; 436 + reg = <0x1e6fa000 0x400>, <0x1e710000 0x1800>; 437 + interrupts = <GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>; 438 + clocks = <&syscon ASPEED_CLK_GATE_RSACLK>; 439 + aspeed,ahbc = <&ahbc>; 437 440 }; 438 441 439 442 video: video@1e700000 {
+6 -8
arch/arm/crypto/sha1_glue.c
··· 21 21 22 22 #include "sha1.h" 23 23 24 - asmlinkage void sha1_block_data_order(u32 *digest, 25 - const unsigned char *data, unsigned int rounds); 24 + asmlinkage void sha1_block_data_order(struct sha1_state *digest, 25 + const u8 *data, int rounds); 26 26 27 27 int sha1_update_arm(struct shash_desc *desc, const u8 *data, 28 28 unsigned int len) 29 29 { 30 - /* make sure casting to sha1_block_fn() is safe */ 30 + /* make sure signature matches sha1_block_fn() */ 31 31 BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); 32 32 33 - return sha1_base_do_update(desc, data, len, 34 - (sha1_block_fn *)sha1_block_data_order); 33 + return sha1_base_do_update(desc, data, len, sha1_block_data_order); 35 34 } 36 35 EXPORT_SYMBOL_GPL(sha1_update_arm); 37 36 38 37 static int sha1_final(struct shash_desc *desc, u8 *out) 39 38 { 40 - sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order); 39 + sha1_base_do_finalize(desc, sha1_block_data_order); 41 40 return sha1_base_finish(desc, out); 42 41 } 43 42 44 43 int sha1_finup_arm(struct shash_desc *desc, const u8 *data, 45 44 unsigned int len, u8 *out) 46 45 { 47 - sha1_base_do_update(desc, data, len, 48 - (sha1_block_fn *)sha1_block_data_order); 46 + sha1_base_do_update(desc, data, len, sha1_block_data_order); 49 47 return sha1_final(desc, out); 50 48 } 51 49 EXPORT_SYMBOL_GPL(sha1_finup_arm);
+26 -31
arch/arm64/crypto/aes-ce-ccm-glue.c
··· 161 161 memcpy(buf, req->iv, AES_BLOCK_SIZE); 162 162 163 163 err = skcipher_walk_aead_encrypt(&walk, req, false); 164 - if (unlikely(err)) 165 - return err; 166 164 167 165 kernel_neon_begin(); 168 166 169 167 if (req->assoclen) 170 168 ccm_calculate_auth_mac(req, mac); 171 169 172 - do { 170 + while (walk.nbytes) { 173 171 u32 tail = walk.nbytes % AES_BLOCK_SIZE; 172 + bool final = walk.nbytes == walk.total; 174 173 175 - if (walk.nbytes == walk.total) 174 + if (final) 176 175 tail = 0; 177 176 178 177 ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr, 179 178 walk.nbytes - tail, ctx->key_enc, 180 179 num_rounds(ctx), mac, walk.iv); 181 180 182 - if (walk.nbytes == walk.total) 183 - ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); 181 + if (!final) 182 + kernel_neon_end(); 183 + err = skcipher_walk_done(&walk, tail); 184 + if (!final) 185 + kernel_neon_begin(); 186 + } 184 187 185 - kernel_neon_end(); 188 + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); 186 189 187 - if (walk.nbytes) { 188 - err = skcipher_walk_done(&walk, tail); 189 - if (unlikely(err)) 190 - return err; 191 - if (unlikely(walk.nbytes)) 192 - kernel_neon_begin(); 193 - } 194 - } while (walk.nbytes); 190 + kernel_neon_end(); 195 191 196 192 /* copy authtag to end of dst */ 197 193 scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen, 198 194 crypto_aead_authsize(aead), 1); 199 195 200 - return 0; 196 + return err; 201 197 } 202 198 203 199 static int ccm_decrypt(struct aead_request *req) ··· 215 219 memcpy(buf, req->iv, AES_BLOCK_SIZE); 216 220 217 221 err = skcipher_walk_aead_decrypt(&walk, req, false); 218 - if (unlikely(err)) 219 - return err; 220 222 221 223 kernel_neon_begin(); 222 224 223 225 if (req->assoclen) 224 226 ccm_calculate_auth_mac(req, mac); 225 227 226 - do { 228 + while (walk.nbytes) { 227 229 u32 tail = walk.nbytes % AES_BLOCK_SIZE; 230 + bool final = walk.nbytes == walk.total; 228 231 229 - if (walk.nbytes == walk.total) 232 + if (final) 230 233 tail = 0; 231 234 232 235 ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr, 233 236 walk.nbytes - tail, ctx->key_enc, 234 237 num_rounds(ctx), mac, walk.iv); 235 238 236 - if (walk.nbytes == walk.total) 237 - ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); 239 + if (!final) 240 + kernel_neon_end(); 241 + err = skcipher_walk_done(&walk, tail); 242 + if (!final) 243 + kernel_neon_begin(); 244 + } 238 245 239 - kernel_neon_end(); 246 + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); 240 247 241 - if (walk.nbytes) { 242 - err = skcipher_walk_done(&walk, tail); 243 - if (unlikely(err)) 244 - return err; 245 - if (unlikely(walk.nbytes)) 246 - kernel_neon_begin(); 247 - } 248 - } while (walk.nbytes); 248 + kernel_neon_end(); 249 + 250 + if (unlikely(err)) 251 + return err; 249 252 250 253 /* compare calculated auth tag with the stored one */ 251 254 scatterwalk_map_and_copy(buf, req->src,
+107 -38
arch/arm64/crypto/ghash-ce-glue.c
··· 9 9 #include <asm/simd.h> 10 10 #include <asm/unaligned.h> 11 11 #include <crypto/aes.h> 12 + #include <crypto/gcm.h> 12 13 #include <crypto/algapi.h> 13 14 #include <crypto/b128ops.h> 14 15 #include <crypto/gf128mul.h> ··· 29 28 30 29 #define GHASH_BLOCK_SIZE 16 31 30 #define GHASH_DIGEST_SIZE 16 32 - #define GCM_IV_SIZE 12 31 + 32 + #define RFC4106_NONCE_SIZE 4 33 33 34 34 struct ghash_key { 35 35 be128 k; ··· 45 43 46 44 struct gcm_aes_ctx { 47 45 struct crypto_aes_ctx aes_key; 46 + u8 nonce[RFC4106_NONCE_SIZE]; 48 47 struct ghash_key ghash_key; 49 48 }; 50 49 ··· 229 226 return 6 + ctx->key_length / 4; 230 227 } 231 228 232 - static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey, 233 - unsigned int keylen) 229 + static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey, 230 + unsigned int keylen) 234 231 { 235 232 struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm); 236 233 u8 key[GHASH_BLOCK_SIZE]; ··· 261 258 return 0; 262 259 } 263 260 264 - static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 261 + static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 265 262 { 266 - switch (authsize) { 267 - case 4: 268 - case 8: 269 - case 12 ... 16: 270 - break; 271 - default: 272 - return -EINVAL; 273 - } 274 - return 0; 263 + return crypto_gcm_check_authsize(authsize); 275 264 } 276 265 277 266 static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[], ··· 297 302 } 298 303 } 299 304 300 - static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[]) 305 + static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[], u32 len) 301 306 { 302 307 struct crypto_aead *aead = crypto_aead_reqtfm(req); 303 308 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 304 309 u8 buf[GHASH_BLOCK_SIZE]; 305 310 struct scatter_walk walk; 306 - u32 len = req->assoclen; 307 311 int buf_count = 0; 308 312 309 313 scatterwalk_start(&walk, req->src); ··· 332 338 } 333 339 } 334 340 335 - static int gcm_encrypt(struct aead_request *req) 341 + static int gcm_encrypt(struct aead_request *req, char *iv, int assoclen) 336 342 { 337 343 struct crypto_aead *aead = crypto_aead_reqtfm(req); 338 344 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 339 345 int nrounds = num_rounds(&ctx->aes_key); 340 346 struct skcipher_walk walk; 341 347 u8 buf[AES_BLOCK_SIZE]; 342 - u8 iv[AES_BLOCK_SIZE]; 343 348 u64 dg[2] = {}; 344 349 be128 lengths; 345 350 u8 *tag; 346 351 int err; 347 352 348 - lengths.a = cpu_to_be64(req->assoclen * 8); 353 + lengths.a = cpu_to_be64(assoclen * 8); 349 354 lengths.b = cpu_to_be64(req->cryptlen * 8); 350 355 351 - if (req->assoclen) 352 - gcm_calculate_auth_mac(req, dg); 356 + if (assoclen) 357 + gcm_calculate_auth_mac(req, dg, assoclen); 353 358 354 - memcpy(iv, req->iv, GCM_IV_SIZE); 355 - put_unaligned_be32(2, iv + GCM_IV_SIZE); 359 + put_unaligned_be32(2, iv + GCM_AES_IV_SIZE); 356 360 357 361 err = skcipher_walk_aead_encrypt(&walk, req, false); 358 362 ··· 395 403 return 0; 396 404 } 397 405 398 - static int gcm_decrypt(struct aead_request *req) 406 + static int gcm_decrypt(struct aead_request *req, char *iv, int assoclen) 399 407 { 400 408 struct crypto_aead *aead = crypto_aead_reqtfm(req); 401 409 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); ··· 404 412 struct skcipher_walk walk; 405 413 u8 otag[AES_BLOCK_SIZE]; 406 414 u8 buf[AES_BLOCK_SIZE]; 407 - u8 iv[AES_BLOCK_SIZE]; 408 415 u64 dg[2] = {}; 409 416 be128 lengths; 410 417 u8 *tag; 411 418 int ret; 412 419 int err; 413 420 414 - lengths.a = cpu_to_be64(req->assoclen * 8); 421 + lengths.a = cpu_to_be64(assoclen * 8); 415 422 lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8); 416 423 417 - if (req->assoclen) 418 - gcm_calculate_auth_mac(req, dg); 424 + if (assoclen) 425 + gcm_calculate_auth_mac(req, dg, assoclen); 419 426 420 - memcpy(iv, req->iv, GCM_IV_SIZE); 421 - put_unaligned_be32(2, iv + GCM_IV_SIZE); 427 + put_unaligned_be32(2, iv + GCM_AES_IV_SIZE); 422 428 423 429 scatterwalk_map_and_copy(otag, req->src, 424 430 req->assoclen + req->cryptlen - authsize, ··· 461 471 return ret ? -EBADMSG : 0; 462 472 } 463 473 464 - static struct aead_alg gcm_aes_alg = { 465 - .ivsize = GCM_IV_SIZE, 474 + static int gcm_aes_encrypt(struct aead_request *req) 475 + { 476 + u8 iv[AES_BLOCK_SIZE]; 477 + 478 + memcpy(iv, req->iv, GCM_AES_IV_SIZE); 479 + return gcm_encrypt(req, iv, req->assoclen); 480 + } 481 + 482 + static int gcm_aes_decrypt(struct aead_request *req) 483 + { 484 + u8 iv[AES_BLOCK_SIZE]; 485 + 486 + memcpy(iv, req->iv, GCM_AES_IV_SIZE); 487 + return gcm_decrypt(req, iv, req->assoclen); 488 + } 489 + 490 + static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey, 491 + unsigned int keylen) 492 + { 493 + struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm); 494 + int err; 495 + 496 + keylen -= RFC4106_NONCE_SIZE; 497 + err = gcm_aes_setkey(tfm, inkey, keylen); 498 + if (err) 499 + return err; 500 + 501 + memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE); 502 + return 0; 503 + } 504 + 505 + static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 506 + { 507 + return crypto_rfc4106_check_authsize(authsize); 508 + } 509 + 510 + static int rfc4106_encrypt(struct aead_request *req) 511 + { 512 + struct crypto_aead *aead = crypto_aead_reqtfm(req); 513 + struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 514 + u8 iv[AES_BLOCK_SIZE]; 515 + 516 + memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); 517 + memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); 518 + 519 + return crypto_ipsec_check_assoclen(req->assoclen) ?: 520 + gcm_encrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); 521 + } 522 + 523 + static int rfc4106_decrypt(struct aead_request *req) 524 + { 525 + struct crypto_aead *aead = crypto_aead_reqtfm(req); 526 + struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 527 + u8 iv[AES_BLOCK_SIZE]; 528 + 529 + memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); 530 + memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); 531 + 532 + return crypto_ipsec_check_assoclen(req->assoclen) ?: 533 + gcm_decrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); 534 + } 535 + 536 + static struct aead_alg gcm_aes_algs[] = {{ 537 + .ivsize = GCM_AES_IV_SIZE, 466 538 .chunksize = AES_BLOCK_SIZE, 467 539 .maxauthsize = AES_BLOCK_SIZE, 468 - .setkey = gcm_setkey, 469 - .setauthsize = gcm_setauthsize, 470 - .encrypt = gcm_encrypt, 471 - .decrypt = gcm_decrypt, 540 + .setkey = gcm_aes_setkey, 541 + .setauthsize = gcm_aes_setauthsize, 542 + .encrypt = gcm_aes_encrypt, 543 + .decrypt = gcm_aes_decrypt, 472 544 473 545 .base.cra_name = "gcm(aes)", 474 546 .base.cra_driver_name = "gcm-aes-ce", ··· 539 487 .base.cra_ctxsize = sizeof(struct gcm_aes_ctx) + 540 488 4 * sizeof(u64[2]), 541 489 .base.cra_module = THIS_MODULE, 542 - }; 490 + }, { 491 + .ivsize = GCM_RFC4106_IV_SIZE, 492 + .chunksize = AES_BLOCK_SIZE, 493 + .maxauthsize = AES_BLOCK_SIZE, 494 + .setkey = rfc4106_setkey, 495 + .setauthsize = rfc4106_setauthsize, 496 + .encrypt = rfc4106_encrypt, 497 + .decrypt = rfc4106_decrypt, 498 + 499 + .base.cra_name = "rfc4106(gcm(aes))", 500 + .base.cra_driver_name = "rfc4106-gcm-aes-ce", 501 + .base.cra_priority = 300, 502 + .base.cra_blocksize = 1, 503 + .base.cra_ctxsize = sizeof(struct gcm_aes_ctx) + 504 + 4 * sizeof(u64[2]), 505 + .base.cra_module = THIS_MODULE, 506 + }}; 543 507 544 508 static int __init ghash_ce_mod_init(void) 545 509 { ··· 563 495 return -ENODEV; 564 496 565 497 if (cpu_have_named_feature(PMULL)) 566 - return crypto_register_aead(&gcm_aes_alg); 498 + return crypto_register_aeads(gcm_aes_algs, 499 + ARRAY_SIZE(gcm_aes_algs)); 567 500 568 501 return crypto_register_shash(&ghash_alg); 569 502 } ··· 572 503 static void __exit ghash_ce_mod_exit(void) 573 504 { 574 505 if (cpu_have_named_feature(PMULL)) 575 - crypto_unregister_aead(&gcm_aes_alg); 506 + crypto_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs)); 576 507 else 577 508 crypto_unregister_shash(&ghash_alg); 578 509 }
+26 -22
arch/arm64/crypto/sm4-ce-ccm-glue.c
··· 166 166 unsigned int nbytes, u8 *mac)) 167 167 { 168 168 u8 __aligned(8) ctr0[SM4_BLOCK_SIZE]; 169 - int err; 169 + int err = 0; 170 170 171 171 /* preserve the initial ctr0 for the TAG */ 172 172 memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE); ··· 177 177 if (req->assoclen) 178 178 ccm_calculate_auth_mac(req, mac); 179 179 180 - do { 180 + while (walk->nbytes && walk->nbytes != walk->total) { 181 181 unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; 182 - const u8 *src = walk->src.virt.addr; 183 - u8 *dst = walk->dst.virt.addr; 184 182 185 - if (walk->nbytes == walk->total) 186 - tail = 0; 187 - 188 - if (walk->nbytes - tail) 189 - sm4_ce_ccm_crypt(rkey_enc, dst, src, walk->iv, 190 - walk->nbytes - tail, mac); 191 - 192 - if (walk->nbytes == walk->total) 193 - sm4_ce_ccm_final(rkey_enc, ctr0, mac); 183 + sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, 184 + walk->src.virt.addr, walk->iv, 185 + walk->nbytes - tail, mac); 194 186 195 187 kernel_neon_end(); 196 188 197 - if (walk->nbytes) { 198 - err = skcipher_walk_done(walk, tail); 199 - if (err) 200 - return err; 201 - if (walk->nbytes) 202 - kernel_neon_begin(); 203 - } 204 - } while (walk->nbytes > 0); 189 + err = skcipher_walk_done(walk, tail); 205 190 206 - return 0; 191 + kernel_neon_begin(); 192 + } 193 + 194 + if (walk->nbytes) { 195 + sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr, 196 + walk->src.virt.addr, walk->iv, 197 + walk->nbytes, mac); 198 + 199 + sm4_ce_ccm_final(rkey_enc, ctr0, mac); 200 + 201 + kernel_neon_end(); 202 + 203 + err = skcipher_walk_done(walk, 0); 204 + } else { 205 + sm4_ce_ccm_final(rkey_enc, ctr0, mac); 206 + 207 + kernel_neon_end(); 208 + } 209 + 210 + return err; 207 211 } 208 212 209 213 static int ccm_encrypt(struct aead_request *req)
+25 -26
arch/arm64/crypto/sm4-ce-gcm-glue.c
··· 135 135 } 136 136 137 137 static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk, 138 - struct sm4_gcm_ctx *ctx, u8 ghash[], 138 + u8 ghash[], int err, 139 139 void (*sm4_ce_pmull_gcm_crypt)(const u32 *rkey_enc, 140 140 u8 *dst, const u8 *src, u8 *iv, 141 141 unsigned int nbytes, u8 *ghash, 142 142 const u8 *ghash_table, const u8 *lengths)) 143 143 { 144 + struct crypto_aead *aead = crypto_aead_reqtfm(req); 145 + struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead); 144 146 u8 __aligned(8) iv[SM4_BLOCK_SIZE]; 145 147 be128 __aligned(8) lengths; 146 - int err; 147 148 148 149 memset(ghash, 0, SM4_BLOCK_SIZE); 149 150 150 151 lengths.a = cpu_to_be64(req->assoclen * 8); 151 152 lengths.b = cpu_to_be64(walk->total * 8); 152 153 153 - memcpy(iv, walk->iv, GCM_IV_SIZE); 154 + memcpy(iv, req->iv, GCM_IV_SIZE); 154 155 put_unaligned_be32(2, iv + GCM_IV_SIZE); 155 156 156 157 kernel_neon_begin(); ··· 159 158 if (req->assoclen) 160 159 gcm_calculate_auth_mac(req, ghash); 161 160 162 - do { 161 + while (walk->nbytes) { 163 162 unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; 164 163 const u8 *src = walk->src.virt.addr; 165 164 u8 *dst = walk->dst.virt.addr; 166 165 167 166 if (walk->nbytes == walk->total) { 168 - tail = 0; 169 - 170 167 sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, 171 168 walk->nbytes, ghash, 172 169 ctx->ghash_table, 173 170 (const u8 *)&lengths); 174 - } else if (walk->nbytes - tail) { 175 - sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, 176 - walk->nbytes - tail, ghash, 177 - ctx->ghash_table, NULL); 171 + 172 + kernel_neon_end(); 173 + 174 + return skcipher_walk_done(walk, 0); 178 175 } 176 + 177 + sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, 178 + walk->nbytes - tail, ghash, 179 + ctx->ghash_table, NULL); 179 180 180 181 kernel_neon_end(); 181 182 182 183 err = skcipher_walk_done(walk, tail); 183 - if (err) 184 - return err; 185 - if (walk->nbytes) 186 - kernel_neon_begin(); 187 - } while (walk->nbytes > 0); 188 184 189 - return 0; 185 + kernel_neon_begin(); 186 + } 187 + 188 + sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, NULL, NULL, iv, 189 + walk->nbytes, ghash, ctx->ghash_table, 190 + (const u8 *)&lengths); 191 + 192 + kernel_neon_end(); 193 + 194 + return err; 190 195 } 191 196 192 197 static int gcm_encrypt(struct aead_request *req) 193 198 { 194 199 struct crypto_aead *aead = crypto_aead_reqtfm(req); 195 - struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead); 196 200 u8 __aligned(8) ghash[SM4_BLOCK_SIZE]; 197 201 struct skcipher_walk walk; 198 202 int err; 199 203 200 204 err = skcipher_walk_aead_encrypt(&walk, req, false); 201 - if (err) 202 - return err; 203 - 204 - err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_enc); 205 + err = gcm_crypt(req, &walk, ghash, err, sm4_ce_pmull_gcm_enc); 205 206 if (err) 206 207 return err; 207 208 ··· 218 215 { 219 216 struct crypto_aead *aead = crypto_aead_reqtfm(req); 220 217 unsigned int authsize = crypto_aead_authsize(aead); 221 - struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead); 222 218 u8 __aligned(8) ghash[SM4_BLOCK_SIZE]; 223 219 u8 authtag[SM4_BLOCK_SIZE]; 224 220 struct skcipher_walk walk; 225 221 int err; 226 222 227 223 err = skcipher_walk_aead_decrypt(&walk, req, false); 228 - if (err) 229 - return err; 230 - 231 - err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_dec); 224 + err = gcm_crypt(req, &walk, ghash, err, sm4_ce_pmull_gcm_dec); 232 225 if (err) 233 226 return err; 234 227
-4
arch/s390/crypto/aes_s390.c
··· 398 398 if (err) 399 399 return err; 400 400 401 - /* In fips mode only 128 bit or 256 bit keys are valid */ 402 - if (fips_enabled && key_len != 32 && key_len != 64) 403 - return -EINVAL; 404 - 405 401 /* Pick the correct function code based on the key length */ 406 402 fc = (key_len == 32) ? CPACF_KM_XTS_128 : 407 403 (key_len == 64) ? CPACF_KM_XTS_256 : 0;
+1 -1
arch/s390/crypto/paes_s390.c
··· 474 474 return rc; 475 475 476 476 /* 477 - * xts_check_key verifies the key length is not odd and makes 477 + * xts_verify_key verifies the key length is not odd and makes 478 478 * sure that the two keys are not the same. This can be done 479 479 * on the two protected keys as well 480 480 */
+5
arch/x86/Kconfig.assembler
··· 19 19 def_bool $(as-instr,tpause %ecx) 20 20 help 21 21 Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7 22 + 23 + config AS_GFNI 24 + def_bool $(as-instr,vgf2p8mulb %xmm0$(comma)%xmm1$(comma)%xmm2) 25 + help 26 + Supported by binutils >= 2.30 and LLVM integrated assembler
+38
arch/x86/crypto/Kconfig
··· 304 304 305 305 Processes 16 blocks in parallel. 306 306 307 + config CRYPTO_ARIA_AESNI_AVX2_X86_64 308 + tristate "Ciphers: ARIA with modes: ECB, CTR (AES-NI/AVX2/GFNI)" 309 + depends on X86 && 64BIT 310 + select CRYPTO_SKCIPHER 311 + select CRYPTO_SIMD 312 + select CRYPTO_ALGAPI 313 + select CRYPTO_ARIA 314 + select CRYPTO_ARIA_AESNI_AVX_X86_64 315 + help 316 + Length-preserving cipher: ARIA cipher algorithms 317 + (RFC 5794) with ECB and CTR modes 318 + 319 + Architecture: x86_64 using: 320 + - AES-NI (AES New Instructions) 321 + - AVX2 (Advanced Vector Extensions) 322 + - GFNI (Galois Field New Instructions) 323 + 324 + Processes 32 blocks in parallel. 325 + 326 + config CRYPTO_ARIA_GFNI_AVX512_X86_64 327 + tristate "Ciphers: ARIA with modes: ECB, CTR (AVX512/GFNI)" 328 + depends on X86 && 64BIT && AS_AVX512 && AS_GFNI 329 + select CRYPTO_SKCIPHER 330 + select CRYPTO_SIMD 331 + select CRYPTO_ALGAPI 332 + select CRYPTO_ARIA 333 + select CRYPTO_ARIA_AESNI_AVX_X86_64 334 + select CRYPTO_ARIA_AESNI_AVX2_X86_64 335 + help 336 + Length-preserving cipher: ARIA cipher algorithms 337 + (RFC 5794) with ECB and CTR modes 338 + 339 + Architecture: x86_64 using: 340 + - AVX512 (Advanced Vector Extensions) 341 + - GFNI (Galois Field New Instructions) 342 + 343 + Processes 64 blocks in parallel. 344 + 307 345 config CRYPTO_CHACHA20_X86_64 308 346 tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (SSSE3/AVX2/AVX-512VL)" 309 347 depends on X86 && 64BIT
+6
arch/x86/crypto/Makefile
··· 103 103 obj-$(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) += aria-aesni-avx-x86_64.o 104 104 aria-aesni-avx-x86_64-y := aria-aesni-avx-asm_64.o aria_aesni_avx_glue.o 105 105 106 + obj-$(CONFIG_CRYPTO_ARIA_AESNI_AVX2_X86_64) += aria-aesni-avx2-x86_64.o 107 + aria-aesni-avx2-x86_64-y := aria-aesni-avx2-asm_64.o aria_aesni_avx2_glue.o 108 + 109 + obj-$(CONFIG_CRYPTO_ARIA_GFNI_AVX512_X86_64) += aria-gfni-avx512-x86_64.o 110 + aria-gfni-avx512-x86_64-y := aria-gfni-avx512-asm_64.o aria_gfni_avx512_glue.o 111 + 106 112 quiet_cmd_perlasm = PERLASM $@ 107 113 cmd_perlasm = $(PERL) $< > $@ 108 114 $(obj)/%.S: $(src)/%.pl FORCE
+115 -55
arch/x86/crypto/aria-aesni-avx-asm_64.S
··· 8 8 9 9 #include <linux/linkage.h> 10 10 #include <linux/cfi_types.h> 11 + #include <asm/asm-offsets.h> 11 12 #include <asm/frame.h> 12 - 13 - /* struct aria_ctx: */ 14 - #define enc_key 0 15 - #define dec_key 272 16 - #define rounds 544 17 13 18 14 /* register macros */ 19 15 #define CTX %rdi ··· 267 271 268 272 #define aria_ark_8way(x0, x1, x2, x3, \ 269 273 x4, x5, x6, x7, \ 270 - t0, rk, idx, round) \ 274 + t0, t1, t2, rk, \ 275 + idx, round) \ 271 276 /* AddRoundKey */ \ 272 - vpbroadcastb ((round * 16) + idx + 3)(rk), t0; \ 273 - vpxor t0, x0, x0; \ 274 - vpbroadcastb ((round * 16) + idx + 2)(rk), t0; \ 275 - vpxor t0, x1, x1; \ 276 - vpbroadcastb ((round * 16) + idx + 1)(rk), t0; \ 277 - vpxor t0, x2, x2; \ 278 - vpbroadcastb ((round * 16) + idx + 0)(rk), t0; \ 279 - vpxor t0, x3, x3; \ 280 - vpbroadcastb ((round * 16) + idx + 7)(rk), t0; \ 281 - vpxor t0, x4, x4; \ 282 - vpbroadcastb ((round * 16) + idx + 6)(rk), t0; \ 283 - vpxor t0, x5, x5; \ 284 - vpbroadcastb ((round * 16) + idx + 5)(rk), t0; \ 285 - vpxor t0, x6, x6; \ 286 - vpbroadcastb ((round * 16) + idx + 4)(rk), t0; \ 287 - vpxor t0, x7, x7; 277 + vbroadcastss ((round * 16) + idx + 0)(rk), t0; \ 278 + vpsrld $24, t0, t2; \ 279 + vpshufb t1, t2, t2; \ 280 + vpxor t2, x0, x0; \ 281 + vpsrld $16, t0, t2; \ 282 + vpshufb t1, t2, t2; \ 283 + vpxor t2, x1, x1; \ 284 + vpsrld $8, t0, t2; \ 285 + vpshufb t1, t2, t2; \ 286 + vpxor t2, x2, x2; \ 287 + vpshufb t1, t0, t2; \ 288 + vpxor t2, x3, x3; \ 289 + vbroadcastss ((round * 16) + idx + 4)(rk), t0; \ 290 + vpsrld $24, t0, t2; \ 291 + vpshufb t1, t2, t2; \ 292 + vpxor t2, x4, x4; \ 293 + vpsrld $16, t0, t2; \ 294 + vpshufb t1, t2, t2; \ 295 + vpxor t2, x5, x5; \ 296 + vpsrld $8, t0, t2; \ 297 + vpshufb t1, t2, t2; \ 298 + vpxor t2, x6, x6; \ 299 + vpshufb t1, t0, t2; \ 300 + vpxor t2, x7, x7; 288 301 302 + #ifdef CONFIG_AS_GFNI 289 303 #define aria_sbox_8way_gfni(x0, x1, x2, x3, \ 290 304 x4, x5, x6, x7, \ 291 305 t0, t1, t2, t3, \ 292 306 t4, t5, t6, t7) \ 293 - vpbroadcastq .Ltf_s2_bitmatrix, t0; \ 294 - vpbroadcastq .Ltf_inv_bitmatrix, t1; \ 295 - vpbroadcastq .Ltf_id_bitmatrix, t2; \ 296 - vpbroadcastq .Ltf_aff_bitmatrix, t3; \ 297 - vpbroadcastq .Ltf_x2_bitmatrix, t4; \ 307 + vmovdqa .Ltf_s2_bitmatrix, t0; \ 308 + vmovdqa .Ltf_inv_bitmatrix, t1; \ 309 + vmovdqa .Ltf_id_bitmatrix, t2; \ 310 + vmovdqa .Ltf_aff_bitmatrix, t3; \ 311 + vmovdqa .Ltf_x2_bitmatrix, t4; \ 298 312 vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ 299 313 vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ 300 314 vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ ··· 318 312 vgf2p8affineinvqb $0, t2, x3, x3; \ 319 313 vgf2p8affineinvqb $0, t2, x7, x7 320 314 315 + #endif /* CONFIG_AS_GFNI */ 316 + 321 317 #define aria_sbox_8way(x0, x1, x2, x3, \ 322 318 x4, x5, x6, x7, \ 323 319 t0, t1, t2, t3, \ 324 320 t4, t5, t6, t7) \ 325 - vpxor t7, t7, t7; \ 326 321 vmovdqa .Linv_shift_row, t0; \ 327 322 vmovdqa .Lshift_row, t1; \ 328 - vpbroadcastd .L0f0f0f0f, t6; \ 323 + vbroadcastss .L0f0f0f0f, t6; \ 329 324 vmovdqa .Ltf_lo__inv_aff__and__s2, t2; \ 330 325 vmovdqa .Ltf_hi__inv_aff__and__s2, t3; \ 331 326 vmovdqa .Ltf_lo__x2__and__fwd_aff, t4; \ ··· 421 414 y0, y1, y2, y3, \ 422 415 y4, y5, y6, y7, \ 423 416 mem_tmp, rk, round) \ 417 + vpxor y7, y7, y7; \ 424 418 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 425 - y0, rk, 8, round); \ 419 + y0, y7, y2, rk, 8, round); \ 426 420 \ 427 421 aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ 428 422 y0, y1, y2, y3, y4, y5, y6, y7); \ ··· 438 430 x4, x5, x6, x7, \ 439 431 mem_tmp, 0); \ 440 432 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 441 - y0, rk, 0, round); \ 433 + y0, y7, y2, rk, 0, round); \ 442 434 \ 443 435 aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ 444 436 y0, y1, y2, y3, y4, y5, y6, y7); \ ··· 476 468 y0, y1, y2, y3, \ 477 469 y4, y5, y6, y7, \ 478 470 mem_tmp, rk, round) \ 471 + vpxor y7, y7, y7; \ 479 472 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 480 - y0, rk, 8, round); \ 473 + y0, y7, y2, rk, 8, round); \ 481 474 \ 482 475 aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 483 476 y0, y1, y2, y3, y4, y5, y6, y7); \ ··· 493 484 x4, x5, x6, x7, \ 494 485 mem_tmp, 0); \ 495 486 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 496 - y0, rk, 0, round); \ 487 + y0, y7, y2, rk, 0, round); \ 497 488 \ 498 489 aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 499 490 y0, y1, y2, y3, y4, y5, y6, y7); \ ··· 531 522 y0, y1, y2, y3, \ 532 523 y4, y5, y6, y7, \ 533 524 mem_tmp, rk, round, last_round) \ 525 + vpxor y7, y7, y7; \ 534 526 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 535 - y0, rk, 8, round); \ 527 + y0, y7, y2, rk, 8, round); \ 536 528 \ 537 529 aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ 538 530 y0, y1, y2, y3, y4, y5, y6, y7); \ 539 531 \ 540 532 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 541 - y0, rk, 8, last_round); \ 533 + y0, y7, y2, rk, 8, last_round); \ 542 534 \ 543 535 aria_store_state_8way(x0, x1, x2, x3, \ 544 536 x4, x5, x6, x7, \ ··· 549 539 x4, x5, x6, x7, \ 550 540 mem_tmp, 0); \ 551 541 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 552 - y0, rk, 0, round); \ 542 + y0, y7, y2, rk, 0, round); \ 553 543 \ 554 544 aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ 555 545 y0, y1, y2, y3, y4, y5, y6, y7); \ 556 546 \ 557 547 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 558 - y0, rk, 0, last_round); \ 548 + y0, y7, y2, rk, 0, last_round); \ 559 549 \ 560 550 aria_load_state_8way(y0, y1, y2, y3, \ 561 551 y4, y5, y6, y7, \ 562 552 mem_tmp, 8); 563 553 554 + #ifdef CONFIG_AS_GFNI 564 555 #define aria_fe_gfni(x0, x1, x2, x3, \ 565 556 x4, x5, x6, x7, \ 566 557 y0, y1, y2, y3, \ 567 558 y4, y5, y6, y7, \ 568 559 mem_tmp, rk, round) \ 560 + vpxor y7, y7, y7; \ 569 561 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 570 - y0, rk, 8, round); \ 562 + y0, y7, y2, rk, 8, round); \ 571 563 \ 572 564 aria_sbox_8way_gfni(x2, x3, x0, x1, \ 573 565 x6, x7, x4, x5, \ ··· 586 574 x4, x5, x6, x7, \ 587 575 mem_tmp, 0); \ 588 576 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 589 - y0, rk, 0, round); \ 577 + y0, y7, y2, rk, 0, round); \ 590 578 \ 591 579 aria_sbox_8way_gfni(x2, x3, x0, x1, \ 592 580 x6, x7, x4, x5, \ ··· 626 614 y0, y1, y2, y3, \ 627 615 y4, y5, y6, y7, \ 628 616 mem_tmp, rk, round) \ 617 + vpxor y7, y7, y7; \ 629 618 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 630 - y0, rk, 8, round); \ 619 + y0, y7, y2, rk, 8, round); \ 631 620 \ 632 621 aria_sbox_8way_gfni(x0, x1, x2, x3, \ 633 622 x4, x5, x6, x7, \ ··· 645 632 x4, x5, x6, x7, \ 646 633 mem_tmp, 0); \ 647 634 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 648 - y0, rk, 0, round); \ 635 + y0, y7, y2, rk, 0, round); \ 649 636 \ 650 637 aria_sbox_8way_gfni(x0, x1, x2, x3, \ 651 638 x4, x5, x6, x7, \ ··· 685 672 y0, y1, y2, y3, \ 686 673 y4, y5, y6, y7, \ 687 674 mem_tmp, rk, round, last_round) \ 675 + vpxor y7, y7, y7; \ 688 676 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 689 - y0, rk, 8, round); \ 677 + y0, y7, y2, rk, 8, round); \ 690 678 \ 691 679 aria_sbox_8way_gfni(x2, x3, x0, x1, \ 692 680 x6, x7, x4, x5, \ ··· 695 681 y4, y5, y6, y7); \ 696 682 \ 697 683 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 698 - y0, rk, 8, last_round); \ 684 + y0, y7, y2, rk, 8, last_round); \ 699 685 \ 700 686 aria_store_state_8way(x0, x1, x2, x3, \ 701 687 x4, x5, x6, x7, \ ··· 705 691 x4, x5, x6, x7, \ 706 692 mem_tmp, 0); \ 707 693 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 708 - y0, rk, 0, round); \ 694 + y0, y7, y2, rk, 0, round); \ 709 695 \ 710 696 aria_sbox_8way_gfni(x2, x3, x0, x1, \ 711 697 x6, x7, x4, x5, \ ··· 713 699 y4, y5, y6, y7); \ 714 700 \ 715 701 aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 716 - y0, rk, 0, last_round); \ 702 + y0, y7, y2, rk, 0, last_round); \ 717 703 \ 718 704 aria_load_state_8way(y0, y1, y2, y3, \ 719 705 y4, y5, y6, y7, \ 720 706 mem_tmp, 8); 707 + 708 + #endif /* CONFIG_AS_GFNI */ 721 709 722 710 /* NB: section is mergeable, all elements must be aligned 16-byte blocks */ 723 711 .section .rodata.cst16, "aM", @progbits, 16 ··· 772 756 .Ltf_hi__x2__and__fwd_aff: 773 757 .octa 0x3F893781E95FE1576CDA64D2BA0CB204 774 758 759 + #ifdef CONFIG_AS_GFNI 775 760 .section .rodata.cst8, "aM", @progbits, 8 776 761 .align 8 777 762 /* AES affine: */ 778 763 #define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0) 779 764 .Ltf_aff_bitmatrix: 765 + .quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1), 766 + BV8(1, 1, 0, 0, 0, 1, 1, 1), 767 + BV8(1, 1, 1, 0, 0, 0, 1, 1), 768 + BV8(1, 1, 1, 1, 0, 0, 0, 1), 769 + BV8(1, 1, 1, 1, 1, 0, 0, 0), 770 + BV8(0, 1, 1, 1, 1, 1, 0, 0), 771 + BV8(0, 0, 1, 1, 1, 1, 1, 0), 772 + BV8(0, 0, 0, 1, 1, 1, 1, 1)) 780 773 .quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1), 781 774 BV8(1, 1, 0, 0, 0, 1, 1, 1), 782 775 BV8(1, 1, 1, 0, 0, 0, 1, 1), ··· 806 781 BV8(0, 0, 1, 0, 1, 0, 0, 1), 807 782 BV8(1, 0, 0, 1, 0, 1, 0, 0), 808 783 BV8(0, 1, 0, 0, 1, 0, 1, 0)) 784 + .quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1), 785 + BV8(1, 0, 0, 1, 0, 0, 1, 0), 786 + BV8(0, 1, 0, 0, 1, 0, 0, 1), 787 + BV8(1, 0, 1, 0, 0, 1, 0, 0), 788 + BV8(0, 1, 0, 1, 0, 0, 1, 0), 789 + BV8(0, 0, 1, 0, 1, 0, 0, 1), 790 + BV8(1, 0, 0, 1, 0, 1, 0, 0), 791 + BV8(0, 1, 0, 0, 1, 0, 1, 0)) 809 792 810 793 /* S2: */ 811 794 #define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1) 812 795 .Ltf_s2_bitmatrix: 796 + .quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1), 797 + BV8(0, 0, 1, 1, 1, 1, 1, 1), 798 + BV8(1, 1, 1, 0, 1, 1, 0, 1), 799 + BV8(1, 1, 0, 0, 0, 0, 1, 1), 800 + BV8(0, 1, 0, 0, 0, 0, 1, 1), 801 + BV8(1, 1, 0, 0, 1, 1, 1, 0), 802 + BV8(0, 1, 1, 0, 0, 0, 1, 1), 803 + BV8(1, 1, 1, 1, 0, 1, 1, 0)) 813 804 .quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1), 814 805 BV8(0, 0, 1, 1, 1, 1, 1, 1), 815 806 BV8(1, 1, 1, 0, 1, 1, 0, 1), ··· 846 805 BV8(0, 1, 1, 0, 1, 0, 1, 1), 847 806 BV8(1, 0, 1, 1, 1, 1, 0, 1), 848 807 BV8(1, 0, 0, 1, 0, 0, 1, 1)) 808 + .quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0), 809 + BV8(0, 0, 1, 0, 0, 1, 1, 0), 810 + BV8(0, 0, 0, 0, 1, 0, 1, 0), 811 + BV8(1, 1, 1, 0, 0, 0, 1, 1), 812 + BV8(1, 1, 1, 0, 1, 1, 0, 0), 813 + BV8(0, 1, 1, 0, 1, 0, 1, 1), 814 + BV8(1, 0, 1, 1, 1, 1, 0, 1), 815 + BV8(1, 0, 0, 1, 0, 0, 1, 1)) 849 816 850 817 /* Identity matrix: */ 851 818 .Ltf_id_bitmatrix: ··· 865 816 BV8(0, 0, 0, 0, 0, 1, 0, 0), 866 817 BV8(0, 0, 0, 0, 0, 0, 1, 0), 867 818 BV8(0, 0, 0, 0, 0, 0, 0, 1)) 819 + .quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0), 820 + BV8(0, 1, 0, 0, 0, 0, 0, 0), 821 + BV8(0, 0, 1, 0, 0, 0, 0, 0), 822 + BV8(0, 0, 0, 1, 0, 0, 0, 0), 823 + BV8(0, 0, 0, 0, 1, 0, 0, 0), 824 + BV8(0, 0, 0, 0, 0, 1, 0, 0), 825 + BV8(0, 0, 0, 0, 0, 0, 1, 0), 826 + BV8(0, 0, 0, 0, 0, 0, 0, 1)) 827 + #endif /* CONFIG_AS_GFNI */ 868 828 869 829 /* 4-bit mask */ 870 830 .section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 ··· 932 874 aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, 933 875 %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, 934 876 %rax, %r9, 10); 935 - cmpl $12, rounds(CTX); 877 + cmpl $12, ARIA_CTX_rounds(CTX); 936 878 jne .Laria_192; 937 879 aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, 938 880 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, ··· 945 887 aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15, 946 888 %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, 947 889 %rax, %r9, 12); 948 - cmpl $14, rounds(CTX); 890 + cmpl $14, ARIA_CTX_rounds(CTX); 949 891 jne .Laria_256; 950 892 aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, 951 893 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, ··· 981 923 982 924 FRAME_BEGIN 983 925 984 - leaq enc_key(CTX), %r9; 926 + leaq ARIA_CTX_enc_key(CTX), %r9; 985 927 986 928 inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, 987 929 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, ··· 1006 948 1007 949 FRAME_BEGIN 1008 950 1009 - leaq dec_key(CTX), %r9; 951 + leaq ARIA_CTX_dec_key(CTX), %r9; 1010 952 1011 953 inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, 1012 954 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, ··· 1114 1056 leaq (%rdx), %r11; 1115 1057 leaq (%rcx), %rsi; 1116 1058 leaq (%rcx), %rdx; 1117 - leaq enc_key(CTX), %r9; 1059 + leaq ARIA_CTX_enc_key(CTX), %r9; 1118 1060 1119 1061 call __aria_aesni_avx_crypt_16way; 1120 1062 ··· 1142 1084 RET; 1143 1085 SYM_FUNC_END(aria_aesni_avx_ctr_crypt_16way) 1144 1086 1087 + #ifdef CONFIG_AS_GFNI 1145 1088 SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way) 1146 1089 /* input: 1147 1090 * %r9: rk ··· 1216 1157 %xmm0, %xmm1, %xmm2, %xmm3, 1217 1158 %xmm4, %xmm5, %xmm6, %xmm7, 1218 1159 %rax, %r9, 10); 1219 - cmpl $12, rounds(CTX); 1160 + cmpl $12, ARIA_CTX_rounds(CTX); 1220 1161 jne .Laria_gfni_192; 1221 1162 aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7, 1222 1163 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, ··· 1233 1174 %xmm0, %xmm1, %xmm2, %xmm3, 1234 1175 %xmm4, %xmm5, %xmm6, %xmm7, 1235 1176 %rax, %r9, 12); 1236 - cmpl $14, rounds(CTX); 1177 + cmpl $14, ARIA_CTX_rounds(CTX); 1237 1178 jne .Laria_gfni_256; 1238 1179 aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2, 1239 1180 %xmm4, %xmm5, %xmm6, %xmm7, ··· 1277 1218 1278 1219 FRAME_BEGIN 1279 1220 1280 - leaq enc_key(CTX), %r9; 1221 + leaq ARIA_CTX_enc_key(CTX), %r9; 1281 1222 1282 1223 inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, 1283 1224 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, ··· 1302 1243 1303 1244 FRAME_BEGIN 1304 1245 1305 - leaq dec_key(CTX), %r9; 1246 + leaq ARIA_CTX_dec_key(CTX), %r9; 1306 1247 1307 1248 inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, 1308 1249 %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, ··· 1334 1275 leaq (%rdx), %r11; 1335 1276 leaq (%rcx), %rsi; 1336 1277 leaq (%rcx), %rdx; 1337 - leaq enc_key(CTX), %r9; 1278 + leaq ARIA_CTX_enc_key(CTX), %r9; 1338 1279 1339 1280 call __aria_aesni_avx_gfni_crypt_16way; 1340 1281 ··· 1361 1302 FRAME_END 1362 1303 RET; 1363 1304 SYM_FUNC_END(aria_aesni_avx_gfni_ctr_crypt_16way) 1305 + #endif /* CONFIG_AS_GFNI */
+1441
arch/x86/crypto/aria-aesni-avx2-asm_64.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * ARIA Cipher 32-way parallel algorithm (AVX2) 4 + * 5 + * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com> 6 + * 7 + */ 8 + 9 + #include <linux/linkage.h> 10 + #include <asm/frame.h> 11 + #include <asm/asm-offsets.h> 12 + #include <linux/cfi_types.h> 13 + 14 + /* register macros */ 15 + #define CTX %rdi 16 + 17 + #define ymm0_x xmm0 18 + #define ymm1_x xmm1 19 + #define ymm2_x xmm2 20 + #define ymm3_x xmm3 21 + #define ymm4_x xmm4 22 + #define ymm5_x xmm5 23 + #define ymm6_x xmm6 24 + #define ymm7_x xmm7 25 + #define ymm8_x xmm8 26 + #define ymm9_x xmm9 27 + #define ymm10_x xmm10 28 + #define ymm11_x xmm11 29 + #define ymm12_x xmm12 30 + #define ymm13_x xmm13 31 + #define ymm14_x xmm14 32 + #define ymm15_x xmm15 33 + 34 + #define BV8(a0, a1, a2, a3, a4, a5, a6, a7) \ 35 + ( (((a0) & 1) << 0) | \ 36 + (((a1) & 1) << 1) | \ 37 + (((a2) & 1) << 2) | \ 38 + (((a3) & 1) << 3) | \ 39 + (((a4) & 1) << 4) | \ 40 + (((a5) & 1) << 5) | \ 41 + (((a6) & 1) << 6) | \ 42 + (((a7) & 1) << 7) ) 43 + 44 + #define BM8X8(l0, l1, l2, l3, l4, l5, l6, l7) \ 45 + ( ((l7) << (0 * 8)) | \ 46 + ((l6) << (1 * 8)) | \ 47 + ((l5) << (2 * 8)) | \ 48 + ((l4) << (3 * 8)) | \ 49 + ((l3) << (4 * 8)) | \ 50 + ((l2) << (5 * 8)) | \ 51 + ((l1) << (6 * 8)) | \ 52 + ((l0) << (7 * 8)) ) 53 + 54 + #define inc_le128(x, minus_one, tmp) \ 55 + vpcmpeqq minus_one, x, tmp; \ 56 + vpsubq minus_one, x, x; \ 57 + vpslldq $8, tmp, tmp; \ 58 + vpsubq tmp, x, x; 59 + 60 + #define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ 61 + vpand x, mask4bit, tmp0; \ 62 + vpandn x, mask4bit, x; \ 63 + vpsrld $4, x, x; \ 64 + \ 65 + vpshufb tmp0, lo_t, tmp0; \ 66 + vpshufb x, hi_t, x; \ 67 + vpxor tmp0, x, x; 68 + 69 + #define transpose_4x4(x0, x1, x2, x3, t1, t2) \ 70 + vpunpckhdq x1, x0, t2; \ 71 + vpunpckldq x1, x0, x0; \ 72 + \ 73 + vpunpckldq x3, x2, t1; \ 74 + vpunpckhdq x3, x2, x2; \ 75 + \ 76 + vpunpckhqdq t1, x0, x1; \ 77 + vpunpcklqdq t1, x0, x0; \ 78 + \ 79 + vpunpckhqdq x2, t2, x3; \ 80 + vpunpcklqdq x2, t2, x2; 81 + 82 + #define byteslice_16x16b(a0, b0, c0, d0, \ 83 + a1, b1, c1, d1, \ 84 + a2, b2, c2, d2, \ 85 + a3, b3, c3, d3, \ 86 + st0, st1) \ 87 + vmovdqu d2, st0; \ 88 + vmovdqu d3, st1; \ 89 + transpose_4x4(a0, a1, a2, a3, d2, d3); \ 90 + transpose_4x4(b0, b1, b2, b3, d2, d3); \ 91 + vmovdqu st0, d2; \ 92 + vmovdqu st1, d3; \ 93 + \ 94 + vmovdqu a0, st0; \ 95 + vmovdqu a1, st1; \ 96 + transpose_4x4(c0, c1, c2, c3, a0, a1); \ 97 + transpose_4x4(d0, d1, d2, d3, a0, a1); \ 98 + \ 99 + vbroadcasti128 .Lshufb_16x16b, a0; \ 100 + vmovdqu st1, a1; \ 101 + vpshufb a0, a2, a2; \ 102 + vpshufb a0, a3, a3; \ 103 + vpshufb a0, b0, b0; \ 104 + vpshufb a0, b1, b1; \ 105 + vpshufb a0, b2, b2; \ 106 + vpshufb a0, b3, b3; \ 107 + vpshufb a0, a1, a1; \ 108 + vpshufb a0, c0, c0; \ 109 + vpshufb a0, c1, c1; \ 110 + vpshufb a0, c2, c2; \ 111 + vpshufb a0, c3, c3; \ 112 + vpshufb a0, d0, d0; \ 113 + vpshufb a0, d1, d1; \ 114 + vpshufb a0, d2, d2; \ 115 + vpshufb a0, d3, d3; \ 116 + vmovdqu d3, st1; \ 117 + vmovdqu st0, d3; \ 118 + vpshufb a0, d3, a0; \ 119 + vmovdqu d2, st0; \ 120 + \ 121 + transpose_4x4(a0, b0, c0, d0, d2, d3); \ 122 + transpose_4x4(a1, b1, c1, d1, d2, d3); \ 123 + vmovdqu st0, d2; \ 124 + vmovdqu st1, d3; \ 125 + \ 126 + vmovdqu b0, st0; \ 127 + vmovdqu b1, st1; \ 128 + transpose_4x4(a2, b2, c2, d2, b0, b1); \ 129 + transpose_4x4(a3, b3, c3, d3, b0, b1); \ 130 + vmovdqu st0, b0; \ 131 + vmovdqu st1, b1; \ 132 + /* does not adjust output bytes inside vectors */ 133 + 134 + #define debyteslice_16x16b(a0, b0, c0, d0, \ 135 + a1, b1, c1, d1, \ 136 + a2, b2, c2, d2, \ 137 + a3, b3, c3, d3, \ 138 + st0, st1) \ 139 + vmovdqu d2, st0; \ 140 + vmovdqu d3, st1; \ 141 + transpose_4x4(a0, a1, a2, a3, d2, d3); \ 142 + transpose_4x4(b0, b1, b2, b3, d2, d3); \ 143 + vmovdqu st0, d2; \ 144 + vmovdqu st1, d3; \ 145 + \ 146 + vmovdqu a0, st0; \ 147 + vmovdqu a1, st1; \ 148 + transpose_4x4(c0, c1, c2, c3, a0, a1); \ 149 + transpose_4x4(d0, d1, d2, d3, a0, a1); \ 150 + \ 151 + vbroadcasti128 .Lshufb_16x16b, a0; \ 152 + vmovdqu st1, a1; \ 153 + vpshufb a0, a2, a2; \ 154 + vpshufb a0, a3, a3; \ 155 + vpshufb a0, b0, b0; \ 156 + vpshufb a0, b1, b1; \ 157 + vpshufb a0, b2, b2; \ 158 + vpshufb a0, b3, b3; \ 159 + vpshufb a0, a1, a1; \ 160 + vpshufb a0, c0, c0; \ 161 + vpshufb a0, c1, c1; \ 162 + vpshufb a0, c2, c2; \ 163 + vpshufb a0, c3, c3; \ 164 + vpshufb a0, d0, d0; \ 165 + vpshufb a0, d1, d1; \ 166 + vpshufb a0, d2, d2; \ 167 + vpshufb a0, d3, d3; \ 168 + vmovdqu d3, st1; \ 169 + vmovdqu st0, d3; \ 170 + vpshufb a0, d3, a0; \ 171 + vmovdqu d2, st0; \ 172 + \ 173 + transpose_4x4(c0, d0, a0, b0, d2, d3); \ 174 + transpose_4x4(c1, d1, a1, b1, d2, d3); \ 175 + vmovdqu st0, d2; \ 176 + vmovdqu st1, d3; \ 177 + \ 178 + vmovdqu b0, st0; \ 179 + vmovdqu b1, st1; \ 180 + transpose_4x4(c2, d2, a2, b2, b0, b1); \ 181 + transpose_4x4(c3, d3, a3, b3, b0, b1); \ 182 + vmovdqu st0, b0; \ 183 + vmovdqu st1, b1; \ 184 + /* does not adjust output bytes inside vectors */ 185 + 186 + /* load blocks to registers and apply pre-whitening */ 187 + #define inpack16_pre(x0, x1, x2, x3, \ 188 + x4, x5, x6, x7, \ 189 + y0, y1, y2, y3, \ 190 + y4, y5, y6, y7, \ 191 + rio) \ 192 + vmovdqu (0 * 32)(rio), x0; \ 193 + vmovdqu (1 * 32)(rio), x1; \ 194 + vmovdqu (2 * 32)(rio), x2; \ 195 + vmovdqu (3 * 32)(rio), x3; \ 196 + vmovdqu (4 * 32)(rio), x4; \ 197 + vmovdqu (5 * 32)(rio), x5; \ 198 + vmovdqu (6 * 32)(rio), x6; \ 199 + vmovdqu (7 * 32)(rio), x7; \ 200 + vmovdqu (8 * 32)(rio), y0; \ 201 + vmovdqu (9 * 32)(rio), y1; \ 202 + vmovdqu (10 * 32)(rio), y2; \ 203 + vmovdqu (11 * 32)(rio), y3; \ 204 + vmovdqu (12 * 32)(rio), y4; \ 205 + vmovdqu (13 * 32)(rio), y5; \ 206 + vmovdqu (14 * 32)(rio), y6; \ 207 + vmovdqu (15 * 32)(rio), y7; 208 + 209 + /* byteslice pre-whitened blocks and store to temporary memory */ 210 + #define inpack16_post(x0, x1, x2, x3, \ 211 + x4, x5, x6, x7, \ 212 + y0, y1, y2, y3, \ 213 + y4, y5, y6, y7, \ 214 + mem_ab, mem_cd) \ 215 + byteslice_16x16b(x0, x1, x2, x3, \ 216 + x4, x5, x6, x7, \ 217 + y0, y1, y2, y3, \ 218 + y4, y5, y6, y7, \ 219 + (mem_ab), (mem_cd)); \ 220 + \ 221 + vmovdqu x0, 0 * 32(mem_ab); \ 222 + vmovdqu x1, 1 * 32(mem_ab); \ 223 + vmovdqu x2, 2 * 32(mem_ab); \ 224 + vmovdqu x3, 3 * 32(mem_ab); \ 225 + vmovdqu x4, 4 * 32(mem_ab); \ 226 + vmovdqu x5, 5 * 32(mem_ab); \ 227 + vmovdqu x6, 6 * 32(mem_ab); \ 228 + vmovdqu x7, 7 * 32(mem_ab); \ 229 + vmovdqu y0, 0 * 32(mem_cd); \ 230 + vmovdqu y1, 1 * 32(mem_cd); \ 231 + vmovdqu y2, 2 * 32(mem_cd); \ 232 + vmovdqu y3, 3 * 32(mem_cd); \ 233 + vmovdqu y4, 4 * 32(mem_cd); \ 234 + vmovdqu y5, 5 * 32(mem_cd); \ 235 + vmovdqu y6, 6 * 32(mem_cd); \ 236 + vmovdqu y7, 7 * 32(mem_cd); 237 + 238 + #define write_output(x0, x1, x2, x3, \ 239 + x4, x5, x6, x7, \ 240 + y0, y1, y2, y3, \ 241 + y4, y5, y6, y7, \ 242 + mem) \ 243 + vmovdqu x0, 0 * 32(mem); \ 244 + vmovdqu x1, 1 * 32(mem); \ 245 + vmovdqu x2, 2 * 32(mem); \ 246 + vmovdqu x3, 3 * 32(mem); \ 247 + vmovdqu x4, 4 * 32(mem); \ 248 + vmovdqu x5, 5 * 32(mem); \ 249 + vmovdqu x6, 6 * 32(mem); \ 250 + vmovdqu x7, 7 * 32(mem); \ 251 + vmovdqu y0, 8 * 32(mem); \ 252 + vmovdqu y1, 9 * 32(mem); \ 253 + vmovdqu y2, 10 * 32(mem); \ 254 + vmovdqu y3, 11 * 32(mem); \ 255 + vmovdqu y4, 12 * 32(mem); \ 256 + vmovdqu y5, 13 * 32(mem); \ 257 + vmovdqu y6, 14 * 32(mem); \ 258 + vmovdqu y7, 15 * 32(mem); \ 259 + 260 + #define aria_store_state_8way(x0, x1, x2, x3, \ 261 + x4, x5, x6, x7, \ 262 + mem_tmp, idx) \ 263 + vmovdqu x0, ((idx + 0) * 32)(mem_tmp); \ 264 + vmovdqu x1, ((idx + 1) * 32)(mem_tmp); \ 265 + vmovdqu x2, ((idx + 2) * 32)(mem_tmp); \ 266 + vmovdqu x3, ((idx + 3) * 32)(mem_tmp); \ 267 + vmovdqu x4, ((idx + 4) * 32)(mem_tmp); \ 268 + vmovdqu x5, ((idx + 5) * 32)(mem_tmp); \ 269 + vmovdqu x6, ((idx + 6) * 32)(mem_tmp); \ 270 + vmovdqu x7, ((idx + 7) * 32)(mem_tmp); 271 + 272 + #define aria_load_state_8way(x0, x1, x2, x3, \ 273 + x4, x5, x6, x7, \ 274 + mem_tmp, idx) \ 275 + vmovdqu ((idx + 0) * 32)(mem_tmp), x0; \ 276 + vmovdqu ((idx + 1) * 32)(mem_tmp), x1; \ 277 + vmovdqu ((idx + 2) * 32)(mem_tmp), x2; \ 278 + vmovdqu ((idx + 3) * 32)(mem_tmp), x3; \ 279 + vmovdqu ((idx + 4) * 32)(mem_tmp), x4; \ 280 + vmovdqu ((idx + 5) * 32)(mem_tmp), x5; \ 281 + vmovdqu ((idx + 6) * 32)(mem_tmp), x6; \ 282 + vmovdqu ((idx + 7) * 32)(mem_tmp), x7; 283 + 284 + #define aria_ark_8way(x0, x1, x2, x3, \ 285 + x4, x5, x6, x7, \ 286 + t0, rk, idx, round) \ 287 + /* AddRoundKey */ \ 288 + vpbroadcastb ((round * 16) + idx + 3)(rk), t0; \ 289 + vpxor t0, x0, x0; \ 290 + vpbroadcastb ((round * 16) + idx + 2)(rk), t0; \ 291 + vpxor t0, x1, x1; \ 292 + vpbroadcastb ((round * 16) + idx + 1)(rk), t0; \ 293 + vpxor t0, x2, x2; \ 294 + vpbroadcastb ((round * 16) + idx + 0)(rk), t0; \ 295 + vpxor t0, x3, x3; \ 296 + vpbroadcastb ((round * 16) + idx + 7)(rk), t0; \ 297 + vpxor t0, x4, x4; \ 298 + vpbroadcastb ((round * 16) + idx + 6)(rk), t0; \ 299 + vpxor t0, x5, x5; \ 300 + vpbroadcastb ((round * 16) + idx + 5)(rk), t0; \ 301 + vpxor t0, x6, x6; \ 302 + vpbroadcastb ((round * 16) + idx + 4)(rk), t0; \ 303 + vpxor t0, x7, x7; 304 + 305 + #ifdef CONFIG_AS_GFNI 306 + #define aria_sbox_8way_gfni(x0, x1, x2, x3, \ 307 + x4, x5, x6, x7, \ 308 + t0, t1, t2, t3, \ 309 + t4, t5, t6, t7) \ 310 + vpbroadcastq .Ltf_s2_bitmatrix, t0; \ 311 + vpbroadcastq .Ltf_inv_bitmatrix, t1; \ 312 + vpbroadcastq .Ltf_id_bitmatrix, t2; \ 313 + vpbroadcastq .Ltf_aff_bitmatrix, t3; \ 314 + vpbroadcastq .Ltf_x2_bitmatrix, t4; \ 315 + vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ 316 + vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ 317 + vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ 318 + vgf2p8affineqb $(tf_inv_const), t1, x6, x6; \ 319 + vgf2p8affineinvqb $0, t2, x2, x2; \ 320 + vgf2p8affineinvqb $0, t2, x6, x6; \ 321 + vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0; \ 322 + vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4; \ 323 + vgf2p8affineqb $(tf_x2_const), t4, x3, x3; \ 324 + vgf2p8affineqb $(tf_x2_const), t4, x7, x7; \ 325 + vgf2p8affineinvqb $0, t2, x3, x3; \ 326 + vgf2p8affineinvqb $0, t2, x7, x7 327 + 328 + #endif /* CONFIG_AS_GFNI */ 329 + #define aria_sbox_8way(x0, x1, x2, x3, \ 330 + x4, x5, x6, x7, \ 331 + t0, t1, t2, t3, \ 332 + t4, t5, t6, t7) \ 333 + vpxor t7, t7, t7; \ 334 + vpxor t6, t6, t6; \ 335 + vbroadcasti128 .Linv_shift_row, t0; \ 336 + vbroadcasti128 .Lshift_row, t1; \ 337 + vbroadcasti128 .Ltf_lo__inv_aff__and__s2, t2; \ 338 + vbroadcasti128 .Ltf_hi__inv_aff__and__s2, t3; \ 339 + vbroadcasti128 .Ltf_lo__x2__and__fwd_aff, t4; \ 340 + vbroadcasti128 .Ltf_hi__x2__and__fwd_aff, t5; \ 341 + \ 342 + vextracti128 $1, x0, t6##_x; \ 343 + vaesenclast t7##_x, x0##_x, x0##_x; \ 344 + vaesenclast t7##_x, t6##_x, t6##_x; \ 345 + vinserti128 $1, t6##_x, x0, x0; \ 346 + \ 347 + vextracti128 $1, x4, t6##_x; \ 348 + vaesenclast t7##_x, x4##_x, x4##_x; \ 349 + vaesenclast t7##_x, t6##_x, t6##_x; \ 350 + vinserti128 $1, t6##_x, x4, x4; \ 351 + \ 352 + vextracti128 $1, x1, t6##_x; \ 353 + vaesenclast t7##_x, x1##_x, x1##_x; \ 354 + vaesenclast t7##_x, t6##_x, t6##_x; \ 355 + vinserti128 $1, t6##_x, x1, x1; \ 356 + \ 357 + vextracti128 $1, x5, t6##_x; \ 358 + vaesenclast t7##_x, x5##_x, x5##_x; \ 359 + vaesenclast t7##_x, t6##_x, t6##_x; \ 360 + vinserti128 $1, t6##_x, x5, x5; \ 361 + \ 362 + vextracti128 $1, x2, t6##_x; \ 363 + vaesdeclast t7##_x, x2##_x, x2##_x; \ 364 + vaesdeclast t7##_x, t6##_x, t6##_x; \ 365 + vinserti128 $1, t6##_x, x2, x2; \ 366 + \ 367 + vextracti128 $1, x6, t6##_x; \ 368 + vaesdeclast t7##_x, x6##_x, x6##_x; \ 369 + vaesdeclast t7##_x, t6##_x, t6##_x; \ 370 + vinserti128 $1, t6##_x, x6, x6; \ 371 + \ 372 + vpbroadcastd .L0f0f0f0f, t6; \ 373 + \ 374 + /* AES inverse shift rows */ \ 375 + vpshufb t0, x0, x0; \ 376 + vpshufb t0, x4, x4; \ 377 + vpshufb t0, x1, x1; \ 378 + vpshufb t0, x5, x5; \ 379 + vpshufb t1, x3, x3; \ 380 + vpshufb t1, x7, x7; \ 381 + vpshufb t1, x2, x2; \ 382 + vpshufb t1, x6, x6; \ 383 + \ 384 + /* affine transformation for S2 */ \ 385 + filter_8bit(x1, t2, t3, t6, t0); \ 386 + /* affine transformation for S2 */ \ 387 + filter_8bit(x5, t2, t3, t6, t0); \ 388 + \ 389 + /* affine transformation for X2 */ \ 390 + filter_8bit(x3, t4, t5, t6, t0); \ 391 + /* affine transformation for X2 */ \ 392 + filter_8bit(x7, t4, t5, t6, t0); \ 393 + \ 394 + vpxor t6, t6, t6; \ 395 + vextracti128 $1, x3, t6##_x; \ 396 + vaesdeclast t7##_x, x3##_x, x3##_x; \ 397 + vaesdeclast t7##_x, t6##_x, t6##_x; \ 398 + vinserti128 $1, t6##_x, x3, x3; \ 399 + \ 400 + vextracti128 $1, x7, t6##_x; \ 401 + vaesdeclast t7##_x, x7##_x, x7##_x; \ 402 + vaesdeclast t7##_x, t6##_x, t6##_x; \ 403 + vinserti128 $1, t6##_x, x7, x7; \ 404 + 405 + #define aria_diff_m(x0, x1, x2, x3, \ 406 + t0, t1, t2, t3) \ 407 + /* T = rotr32(X, 8); */ \ 408 + /* X ^= T */ \ 409 + vpxor x0, x3, t0; \ 410 + vpxor x1, x0, t1; \ 411 + vpxor x2, x1, t2; \ 412 + vpxor x3, x2, t3; \ 413 + /* X = T ^ rotr(X, 16); */ \ 414 + vpxor t2, x0, x0; \ 415 + vpxor x1, t3, t3; \ 416 + vpxor t0, x2, x2; \ 417 + vpxor t1, x3, x1; \ 418 + vmovdqu t3, x3; 419 + 420 + #define aria_diff_word(x0, x1, x2, x3, \ 421 + x4, x5, x6, x7, \ 422 + y0, y1, y2, y3, \ 423 + y4, y5, y6, y7) \ 424 + /* t1 ^= t2; */ \ 425 + vpxor y0, x4, x4; \ 426 + vpxor y1, x5, x5; \ 427 + vpxor y2, x6, x6; \ 428 + vpxor y3, x7, x7; \ 429 + \ 430 + /* t2 ^= t3; */ \ 431 + vpxor y4, y0, y0; \ 432 + vpxor y5, y1, y1; \ 433 + vpxor y6, y2, y2; \ 434 + vpxor y7, y3, y3; \ 435 + \ 436 + /* t0 ^= t1; */ \ 437 + vpxor x4, x0, x0; \ 438 + vpxor x5, x1, x1; \ 439 + vpxor x6, x2, x2; \ 440 + vpxor x7, x3, x3; \ 441 + \ 442 + /* t3 ^= t1; */ \ 443 + vpxor x4, y4, y4; \ 444 + vpxor x5, y5, y5; \ 445 + vpxor x6, y6, y6; \ 446 + vpxor x7, y7, y7; \ 447 + \ 448 + /* t2 ^= t0; */ \ 449 + vpxor x0, y0, y0; \ 450 + vpxor x1, y1, y1; \ 451 + vpxor x2, y2, y2; \ 452 + vpxor x3, y3, y3; \ 453 + \ 454 + /* t1 ^= t2; */ \ 455 + vpxor y0, x4, x4; \ 456 + vpxor y1, x5, x5; \ 457 + vpxor y2, x6, x6; \ 458 + vpxor y3, x7, x7; 459 + 460 + #define aria_fe(x0, x1, x2, x3, \ 461 + x4, x5, x6, x7, \ 462 + y0, y1, y2, y3, \ 463 + y4, y5, y6, y7, \ 464 + mem_tmp, rk, round) \ 465 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 466 + y0, rk, 8, round); \ 467 + \ 468 + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ 469 + y0, y1, y2, y3, y4, y5, y6, y7); \ 470 + \ 471 + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ 472 + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ 473 + aria_store_state_8way(x0, x1, x2, x3, \ 474 + x4, x5, x6, x7, \ 475 + mem_tmp, 8); \ 476 + \ 477 + aria_load_state_8way(x0, x1, x2, x3, \ 478 + x4, x5, x6, x7, \ 479 + mem_tmp, 0); \ 480 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 481 + y0, rk, 0, round); \ 482 + \ 483 + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ 484 + y0, y1, y2, y3, y4, y5, y6, y7); \ 485 + \ 486 + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ 487 + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ 488 + aria_store_state_8way(x0, x1, x2, x3, \ 489 + x4, x5, x6, x7, \ 490 + mem_tmp, 0); \ 491 + aria_load_state_8way(y0, y1, y2, y3, \ 492 + y4, y5, y6, y7, \ 493 + mem_tmp, 8); \ 494 + aria_diff_word(x0, x1, x2, x3, \ 495 + x4, x5, x6, x7, \ 496 + y0, y1, y2, y3, \ 497 + y4, y5, y6, y7); \ 498 + /* aria_diff_byte() \ 499 + * T3 = ABCD -> BADC \ 500 + * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \ 501 + * T0 = ABCD -> CDAB \ 502 + * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \ 503 + * T1 = ABCD -> DCBA \ 504 + * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \ 505 + */ \ 506 + aria_diff_word(x2, x3, x0, x1, \ 507 + x7, x6, x5, x4, \ 508 + y0, y1, y2, y3, \ 509 + y5, y4, y7, y6); \ 510 + aria_store_state_8way(x3, x2, x1, x0, \ 511 + x6, x7, x4, x5, \ 512 + mem_tmp, 0); 513 + 514 + #define aria_fo(x0, x1, x2, x3, \ 515 + x4, x5, x6, x7, \ 516 + y0, y1, y2, y3, \ 517 + y4, y5, y6, y7, \ 518 + mem_tmp, rk, round) \ 519 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 520 + y0, rk, 8, round); \ 521 + \ 522 + aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 523 + y0, y1, y2, y3, y4, y5, y6, y7); \ 524 + \ 525 + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ 526 + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ 527 + aria_store_state_8way(x0, x1, x2, x3, \ 528 + x4, x5, x6, x7, \ 529 + mem_tmp, 8); \ 530 + \ 531 + aria_load_state_8way(x0, x1, x2, x3, \ 532 + x4, x5, x6, x7, \ 533 + mem_tmp, 0); \ 534 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 535 + y0, rk, 0, round); \ 536 + \ 537 + aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 538 + y0, y1, y2, y3, y4, y5, y6, y7); \ 539 + \ 540 + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ 541 + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ 542 + aria_store_state_8way(x0, x1, x2, x3, \ 543 + x4, x5, x6, x7, \ 544 + mem_tmp, 0); \ 545 + aria_load_state_8way(y0, y1, y2, y3, \ 546 + y4, y5, y6, y7, \ 547 + mem_tmp, 8); \ 548 + aria_diff_word(x0, x1, x2, x3, \ 549 + x4, x5, x6, x7, \ 550 + y0, y1, y2, y3, \ 551 + y4, y5, y6, y7); \ 552 + /* aria_diff_byte() \ 553 + * T1 = ABCD -> BADC \ 554 + * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \ 555 + * T2 = ABCD -> CDAB \ 556 + * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \ 557 + * T3 = ABCD -> DCBA \ 558 + * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \ 559 + */ \ 560 + aria_diff_word(x0, x1, x2, x3, \ 561 + x5, x4, x7, x6, \ 562 + y2, y3, y0, y1, \ 563 + y7, y6, y5, y4); \ 564 + aria_store_state_8way(x3, x2, x1, x0, \ 565 + x6, x7, x4, x5, \ 566 + mem_tmp, 0); 567 + 568 + #define aria_ff(x0, x1, x2, x3, \ 569 + x4, x5, x6, x7, \ 570 + y0, y1, y2, y3, \ 571 + y4, y5, y6, y7, \ 572 + mem_tmp, rk, round, last_round) \ 573 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 574 + y0, rk, 8, round); \ 575 + \ 576 + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ 577 + y0, y1, y2, y3, y4, y5, y6, y7); \ 578 + \ 579 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 580 + y0, rk, 8, last_round); \ 581 + \ 582 + aria_store_state_8way(x0, x1, x2, x3, \ 583 + x4, x5, x6, x7, \ 584 + mem_tmp, 8); \ 585 + \ 586 + aria_load_state_8way(x0, x1, x2, x3, \ 587 + x4, x5, x6, x7, \ 588 + mem_tmp, 0); \ 589 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 590 + y0, rk, 0, round); \ 591 + \ 592 + aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5, \ 593 + y0, y1, y2, y3, y4, y5, y6, y7); \ 594 + \ 595 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 596 + y0, rk, 0, last_round); \ 597 + \ 598 + aria_load_state_8way(y0, y1, y2, y3, \ 599 + y4, y5, y6, y7, \ 600 + mem_tmp, 8); 601 + #ifdef CONFIG_AS_GFNI 602 + #define aria_fe_gfni(x0, x1, x2, x3, \ 603 + x4, x5, x6, x7, \ 604 + y0, y1, y2, y3, \ 605 + y4, y5, y6, y7, \ 606 + mem_tmp, rk, round) \ 607 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 608 + y0, rk, 8, round); \ 609 + \ 610 + aria_sbox_8way_gfni(x2, x3, x0, x1, \ 611 + x6, x7, x4, x5, \ 612 + y0, y1, y2, y3, \ 613 + y4, y5, y6, y7); \ 614 + \ 615 + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ 616 + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ 617 + aria_store_state_8way(x0, x1, x2, x3, \ 618 + x4, x5, x6, x7, \ 619 + mem_tmp, 8); \ 620 + \ 621 + aria_load_state_8way(x0, x1, x2, x3, \ 622 + x4, x5, x6, x7, \ 623 + mem_tmp, 0); \ 624 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 625 + y0, rk, 0, round); \ 626 + \ 627 + aria_sbox_8way_gfni(x2, x3, x0, x1, \ 628 + x6, x7, x4, x5, \ 629 + y0, y1, y2, y3, \ 630 + y4, y5, y6, y7); \ 631 + \ 632 + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ 633 + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ 634 + aria_store_state_8way(x0, x1, x2, x3, \ 635 + x4, x5, x6, x7, \ 636 + mem_tmp, 0); \ 637 + aria_load_state_8way(y0, y1, y2, y3, \ 638 + y4, y5, y6, y7, \ 639 + mem_tmp, 8); \ 640 + aria_diff_word(x0, x1, x2, x3, \ 641 + x4, x5, x6, x7, \ 642 + y0, y1, y2, y3, \ 643 + y4, y5, y6, y7); \ 644 + /* aria_diff_byte() \ 645 + * T3 = ABCD -> BADC \ 646 + * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \ 647 + * T0 = ABCD -> CDAB \ 648 + * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \ 649 + * T1 = ABCD -> DCBA \ 650 + * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \ 651 + */ \ 652 + aria_diff_word(x2, x3, x0, x1, \ 653 + x7, x6, x5, x4, \ 654 + y0, y1, y2, y3, \ 655 + y5, y4, y7, y6); \ 656 + aria_store_state_8way(x3, x2, x1, x0, \ 657 + x6, x7, x4, x5, \ 658 + mem_tmp, 0); 659 + 660 + #define aria_fo_gfni(x0, x1, x2, x3, \ 661 + x4, x5, x6, x7, \ 662 + y0, y1, y2, y3, \ 663 + y4, y5, y6, y7, \ 664 + mem_tmp, rk, round) \ 665 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 666 + y0, rk, 8, round); \ 667 + \ 668 + aria_sbox_8way_gfni(x0, x1, x2, x3, \ 669 + x4, x5, x6, x7, \ 670 + y0, y1, y2, y3, \ 671 + y4, y5, y6, y7); \ 672 + \ 673 + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ 674 + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ 675 + aria_store_state_8way(x0, x1, x2, x3, \ 676 + x4, x5, x6, x7, \ 677 + mem_tmp, 8); \ 678 + \ 679 + aria_load_state_8way(x0, x1, x2, x3, \ 680 + x4, x5, x6, x7, \ 681 + mem_tmp, 0); \ 682 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 683 + y0, rk, 0, round); \ 684 + \ 685 + aria_sbox_8way_gfni(x0, x1, x2, x3, \ 686 + x4, x5, x6, x7, \ 687 + y0, y1, y2, y3, \ 688 + y4, y5, y6, y7); \ 689 + \ 690 + aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \ 691 + aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \ 692 + aria_store_state_8way(x0, x1, x2, x3, \ 693 + x4, x5, x6, x7, \ 694 + mem_tmp, 0); \ 695 + aria_load_state_8way(y0, y1, y2, y3, \ 696 + y4, y5, y6, y7, \ 697 + mem_tmp, 8); \ 698 + aria_diff_word(x0, x1, x2, x3, \ 699 + x4, x5, x6, x7, \ 700 + y0, y1, y2, y3, \ 701 + y4, y5, y6, y7); \ 702 + /* aria_diff_byte() \ 703 + * T1 = ABCD -> BADC \ 704 + * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \ 705 + * T2 = ABCD -> CDAB \ 706 + * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \ 707 + * T3 = ABCD -> DCBA \ 708 + * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \ 709 + */ \ 710 + aria_diff_word(x0, x1, x2, x3, \ 711 + x5, x4, x7, x6, \ 712 + y2, y3, y0, y1, \ 713 + y7, y6, y5, y4); \ 714 + aria_store_state_8way(x3, x2, x1, x0, \ 715 + x6, x7, x4, x5, \ 716 + mem_tmp, 0); 717 + 718 + #define aria_ff_gfni(x0, x1, x2, x3, \ 719 + x4, x5, x6, x7, \ 720 + y0, y1, y2, y3, \ 721 + y4, y5, y6, y7, \ 722 + mem_tmp, rk, round, last_round) \ 723 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 724 + y0, rk, 8, round); \ 725 + \ 726 + aria_sbox_8way_gfni(x2, x3, x0, x1, \ 727 + x6, x7, x4, x5, \ 728 + y0, y1, y2, y3, \ 729 + y4, y5, y6, y7); \ 730 + \ 731 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 732 + y0, rk, 8, last_round); \ 733 + \ 734 + aria_store_state_8way(x0, x1, x2, x3, \ 735 + x4, x5, x6, x7, \ 736 + mem_tmp, 8); \ 737 + \ 738 + aria_load_state_8way(x0, x1, x2, x3, \ 739 + x4, x5, x6, x7, \ 740 + mem_tmp, 0); \ 741 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 742 + y0, rk, 0, round); \ 743 + \ 744 + aria_sbox_8way_gfni(x2, x3, x0, x1, \ 745 + x6, x7, x4, x5, \ 746 + y0, y1, y2, y3, \ 747 + y4, y5, y6, y7); \ 748 + \ 749 + aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \ 750 + y0, rk, 0, last_round); \ 751 + \ 752 + aria_load_state_8way(y0, y1, y2, y3, \ 753 + y4, y5, y6, y7, \ 754 + mem_tmp, 8); 755 + #endif /* CONFIG_AS_GFNI */ 756 + 757 + .section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32 758 + .align 32 759 + #define SHUFB_BYTES(idx) \ 760 + 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) 761 + .Lshufb_16x16b: 762 + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) 763 + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) 764 + 765 + .section .rodata.cst16, "aM", @progbits, 16 766 + .align 16 767 + /* For isolating SubBytes from AESENCLAST, inverse shift row */ 768 + .Linv_shift_row: 769 + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b 770 + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 771 + .Lshift_row: 772 + .byte 0x00, 0x05, 0x0a, 0x0f, 0x04, 0x09, 0x0e, 0x03 773 + .byte 0x08, 0x0d, 0x02, 0x07, 0x0c, 0x01, 0x06, 0x0b 774 + /* For CTR-mode IV byteswap */ 775 + .Lbswap128_mask: 776 + .byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 777 + .byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 778 + 779 + /* AES inverse affine and S2 combined: 780 + * 1 1 0 0 0 0 0 1 x0 0 781 + * 0 1 0 0 1 0 0 0 x1 0 782 + * 1 1 0 0 1 1 1 1 x2 0 783 + * 0 1 1 0 1 0 0 1 x3 1 784 + * 0 1 0 0 1 1 0 0 * x4 + 0 785 + * 0 1 0 1 1 0 0 0 x5 0 786 + * 0 0 0 0 0 1 0 1 x6 0 787 + * 1 1 1 0 0 1 1 1 x7 1 788 + */ 789 + .Ltf_lo__inv_aff__and__s2: 790 + .octa 0x92172DA81A9FA520B2370D883ABF8500 791 + .Ltf_hi__inv_aff__and__s2: 792 + .octa 0x2B15FFC1AF917B45E6D8320C625CB688 793 + 794 + /* X2 and AES forward affine combined: 795 + * 1 0 1 1 0 0 0 1 x0 0 796 + * 0 1 1 1 1 0 1 1 x1 0 797 + * 0 0 0 1 1 0 1 0 x2 1 798 + * 0 1 0 0 0 1 0 0 x3 0 799 + * 0 0 1 1 1 0 1 1 * x4 + 0 800 + * 0 1 0 0 1 0 0 0 x5 0 801 + * 1 1 0 1 0 0 1 1 x6 0 802 + * 0 1 0 0 1 0 1 0 x7 0 803 + */ 804 + .Ltf_lo__x2__and__fwd_aff: 805 + .octa 0xEFAE0544FCBD1657B8F95213ABEA4100 806 + .Ltf_hi__x2__and__fwd_aff: 807 + .octa 0x3F893781E95FE1576CDA64D2BA0CB204 808 + 809 + #ifdef CONFIG_AS_GFNI 810 + .section .rodata.cst8, "aM", @progbits, 8 811 + .align 8 812 + /* AES affine: */ 813 + #define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0) 814 + .Ltf_aff_bitmatrix: 815 + .quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1), 816 + BV8(1, 1, 0, 0, 0, 1, 1, 1), 817 + BV8(1, 1, 1, 0, 0, 0, 1, 1), 818 + BV8(1, 1, 1, 1, 0, 0, 0, 1), 819 + BV8(1, 1, 1, 1, 1, 0, 0, 0), 820 + BV8(0, 1, 1, 1, 1, 1, 0, 0), 821 + BV8(0, 0, 1, 1, 1, 1, 1, 0), 822 + BV8(0, 0, 0, 1, 1, 1, 1, 1)) 823 + 824 + /* AES inverse affine: */ 825 + #define tf_inv_const BV8(1, 0, 1, 0, 0, 0, 0, 0) 826 + .Ltf_inv_bitmatrix: 827 + .quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1), 828 + BV8(1, 0, 0, 1, 0, 0, 1, 0), 829 + BV8(0, 1, 0, 0, 1, 0, 0, 1), 830 + BV8(1, 0, 1, 0, 0, 1, 0, 0), 831 + BV8(0, 1, 0, 1, 0, 0, 1, 0), 832 + BV8(0, 0, 1, 0, 1, 0, 0, 1), 833 + BV8(1, 0, 0, 1, 0, 1, 0, 0), 834 + BV8(0, 1, 0, 0, 1, 0, 1, 0)) 835 + 836 + /* S2: */ 837 + #define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1) 838 + .Ltf_s2_bitmatrix: 839 + .quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1), 840 + BV8(0, 0, 1, 1, 1, 1, 1, 1), 841 + BV8(1, 1, 1, 0, 1, 1, 0, 1), 842 + BV8(1, 1, 0, 0, 0, 0, 1, 1), 843 + BV8(0, 1, 0, 0, 0, 0, 1, 1), 844 + BV8(1, 1, 0, 0, 1, 1, 1, 0), 845 + BV8(0, 1, 1, 0, 0, 0, 1, 1), 846 + BV8(1, 1, 1, 1, 0, 1, 1, 0)) 847 + 848 + /* X2: */ 849 + #define tf_x2_const BV8(0, 0, 1, 1, 0, 1, 0, 0) 850 + .Ltf_x2_bitmatrix: 851 + .quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0), 852 + BV8(0, 0, 1, 0, 0, 1, 1, 0), 853 + BV8(0, 0, 0, 0, 1, 0, 1, 0), 854 + BV8(1, 1, 1, 0, 0, 0, 1, 1), 855 + BV8(1, 1, 1, 0, 1, 1, 0, 0), 856 + BV8(0, 1, 1, 0, 1, 0, 1, 1), 857 + BV8(1, 0, 1, 1, 1, 1, 0, 1), 858 + BV8(1, 0, 0, 1, 0, 0, 1, 1)) 859 + 860 + /* Identity matrix: */ 861 + .Ltf_id_bitmatrix: 862 + .quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0), 863 + BV8(0, 1, 0, 0, 0, 0, 0, 0), 864 + BV8(0, 0, 1, 0, 0, 0, 0, 0), 865 + BV8(0, 0, 0, 1, 0, 0, 0, 0), 866 + BV8(0, 0, 0, 0, 1, 0, 0, 0), 867 + BV8(0, 0, 0, 0, 0, 1, 0, 0), 868 + BV8(0, 0, 0, 0, 0, 0, 1, 0), 869 + BV8(0, 0, 0, 0, 0, 0, 0, 1)) 870 + 871 + #endif /* CONFIG_AS_GFNI */ 872 + 873 + /* 4-bit mask */ 874 + .section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 875 + .align 4 876 + .L0f0f0f0f: 877 + .long 0x0f0f0f0f 878 + 879 + .text 880 + 881 + SYM_FUNC_START_LOCAL(__aria_aesni_avx2_crypt_32way) 882 + /* input: 883 + * %r9: rk 884 + * %rsi: dst 885 + * %rdx: src 886 + * %ymm0..%ymm15: byte-sliced blocks 887 + */ 888 + 889 + FRAME_BEGIN 890 + 891 + movq %rsi, %rax; 892 + leaq 8 * 32(%rax), %r8; 893 + 894 + inpack16_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 895 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 896 + %ymm15, %rax, %r8); 897 + aria_fo(%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, 898 + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 899 + %rax, %r9, 0); 900 + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 901 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 902 + %ymm15, %rax, %r9, 1); 903 + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, 904 + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 905 + %rax, %r9, 2); 906 + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 907 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 908 + %ymm15, %rax, %r9, 3); 909 + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, 910 + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 911 + %rax, %r9, 4); 912 + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 913 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 914 + %ymm15, %rax, %r9, 5); 915 + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, 916 + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 917 + %rax, %r9, 6); 918 + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 919 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 920 + %ymm15, %rax, %r9, 7); 921 + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, 922 + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 923 + %rax, %r9, 8); 924 + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 925 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 926 + %ymm15, %rax, %r9, 9); 927 + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, 928 + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 929 + %rax, %r9, 10); 930 + cmpl $12, ARIA_CTX_rounds(CTX); 931 + jne .Laria_192; 932 + aria_ff(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 933 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 934 + %ymm15, %rax, %r9, 11, 12); 935 + jmp .Laria_end; 936 + .Laria_192: 937 + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 938 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 939 + %ymm15, %rax, %r9, 11); 940 + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, 941 + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 942 + %rax, %r9, 12); 943 + cmpl $14, ARIA_CTX_rounds(CTX); 944 + jne .Laria_256; 945 + aria_ff(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 946 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 947 + %ymm15, %rax, %r9, 13, 14); 948 + jmp .Laria_end; 949 + .Laria_256: 950 + aria_fe(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 951 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 952 + %ymm15, %rax, %r9, 13); 953 + aria_fo(%ymm9, %ymm8, %ymm11, %ymm10, %ymm12, %ymm13, %ymm14, %ymm15, 954 + %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 955 + %rax, %r9, 14); 956 + aria_ff(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 957 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 958 + %ymm15, %rax, %r9, 15, 16); 959 + .Laria_end: 960 + debyteslice_16x16b(%ymm8, %ymm12, %ymm1, %ymm4, 961 + %ymm9, %ymm13, %ymm0, %ymm5, 962 + %ymm10, %ymm14, %ymm3, %ymm6, 963 + %ymm11, %ymm15, %ymm2, %ymm7, 964 + (%rax), (%r8)); 965 + 966 + FRAME_END 967 + RET; 968 + SYM_FUNC_END(__aria_aesni_avx2_crypt_32way) 969 + 970 + SYM_TYPED_FUNC_START(aria_aesni_avx2_encrypt_32way) 971 + /* input: 972 + * %rdi: ctx, CTX 973 + * %rsi: dst 974 + * %rdx: src 975 + */ 976 + 977 + FRAME_BEGIN 978 + 979 + leaq ARIA_CTX_enc_key(CTX), %r9; 980 + 981 + inpack16_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 982 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 983 + %ymm15, %rdx); 984 + 985 + call __aria_aesni_avx2_crypt_32way; 986 + 987 + write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 988 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 989 + %ymm15, %rax); 990 + 991 + FRAME_END 992 + RET; 993 + SYM_FUNC_END(aria_aesni_avx2_encrypt_32way) 994 + 995 + SYM_TYPED_FUNC_START(aria_aesni_avx2_decrypt_32way) 996 + /* input: 997 + * %rdi: ctx, CTX 998 + * %rsi: dst 999 + * %rdx: src 1000 + */ 1001 + 1002 + FRAME_BEGIN 1003 + 1004 + leaq ARIA_CTX_dec_key(CTX), %r9; 1005 + 1006 + inpack16_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 1007 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 1008 + %ymm15, %rdx); 1009 + 1010 + call __aria_aesni_avx2_crypt_32way; 1011 + 1012 + write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 1013 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 1014 + %ymm15, %rax); 1015 + 1016 + FRAME_END 1017 + RET; 1018 + SYM_FUNC_END(aria_aesni_avx2_decrypt_32way) 1019 + 1020 + SYM_FUNC_START_LOCAL(__aria_aesni_avx2_ctr_gen_keystream_32way) 1021 + /* input: 1022 + * %rdi: ctx 1023 + * %rsi: dst 1024 + * %rdx: src 1025 + * %rcx: keystream 1026 + * %r8: iv (big endian, 128bit) 1027 + */ 1028 + 1029 + FRAME_BEGIN 1030 + movq 8(%r8), %r11; 1031 + bswapq %r11; 1032 + 1033 + vbroadcasti128 .Lbswap128_mask (%rip), %ymm6; 1034 + vpcmpeqd %ymm0, %ymm0, %ymm0; 1035 + vpsrldq $8, %ymm0, %ymm0; /* ab: -1:0 ; cd: -1:0 */ 1036 + vpaddq %ymm0, %ymm0, %ymm5; /* ab: -2:0 ; cd: -2:0 */ 1037 + 1038 + /* load IV and byteswap */ 1039 + vmovdqu (%r8), %xmm7; 1040 + vpshufb %xmm6, %xmm7, %xmm7; 1041 + vmovdqa %xmm7, %xmm3; 1042 + inc_le128(%xmm7, %xmm0, %xmm4); 1043 + vinserti128 $1, %xmm7, %ymm3, %ymm3; 1044 + vpshufb %ymm6, %ymm3, %ymm8; /* +1 ; +0 */ 1045 + 1046 + /* check need for handling 64-bit overflow and carry */ 1047 + cmpq $(0xffffffffffffffff - 32), %r11; 1048 + ja .Lhandle_ctr_carry; 1049 + 1050 + /* construct IVs */ 1051 + vpsubq %ymm5, %ymm3, %ymm3; /* +3 ; +2 */ 1052 + vpshufb %ymm6, %ymm3, %ymm9; 1053 + vpsubq %ymm5, %ymm3, %ymm3; /* +5 ; +4 */ 1054 + vpshufb %ymm6, %ymm3, %ymm10; 1055 + vpsubq %ymm5, %ymm3, %ymm3; /* +7 ; +6 */ 1056 + vpshufb %ymm6, %ymm3, %ymm11; 1057 + vpsubq %ymm5, %ymm3, %ymm3; /* +9 ; +8 */ 1058 + vpshufb %ymm6, %ymm3, %ymm12; 1059 + vpsubq %ymm5, %ymm3, %ymm3; /* +11 ; +10 */ 1060 + vpshufb %ymm6, %ymm3, %ymm13; 1061 + vpsubq %ymm5, %ymm3, %ymm3; /* +13 ; +12 */ 1062 + vpshufb %ymm6, %ymm3, %ymm14; 1063 + vpsubq %ymm5, %ymm3, %ymm3; /* +15 ; +14 */ 1064 + vpshufb %ymm6, %ymm3, %ymm15; 1065 + vmovdqu %ymm8, (0 * 32)(%rcx); 1066 + vmovdqu %ymm9, (1 * 32)(%rcx); 1067 + vmovdqu %ymm10, (2 * 32)(%rcx); 1068 + vmovdqu %ymm11, (3 * 32)(%rcx); 1069 + vmovdqu %ymm12, (4 * 32)(%rcx); 1070 + vmovdqu %ymm13, (5 * 32)(%rcx); 1071 + vmovdqu %ymm14, (6 * 32)(%rcx); 1072 + vmovdqu %ymm15, (7 * 32)(%rcx); 1073 + 1074 + vpsubq %ymm5, %ymm3, %ymm3; /* +17 ; +16 */ 1075 + vpshufb %ymm6, %ymm3, %ymm8; 1076 + vpsubq %ymm5, %ymm3, %ymm3; /* +19 ; +18 */ 1077 + vpshufb %ymm6, %ymm3, %ymm9; 1078 + vpsubq %ymm5, %ymm3, %ymm3; /* +21 ; +20 */ 1079 + vpshufb %ymm6, %ymm3, %ymm10; 1080 + vpsubq %ymm5, %ymm3, %ymm3; /* +23 ; +22 */ 1081 + vpshufb %ymm6, %ymm3, %ymm11; 1082 + vpsubq %ymm5, %ymm3, %ymm3; /* +25 ; +24 */ 1083 + vpshufb %ymm6, %ymm3, %ymm12; 1084 + vpsubq %ymm5, %ymm3, %ymm3; /* +27 ; +26 */ 1085 + vpshufb %ymm6, %ymm3, %ymm13; 1086 + vpsubq %ymm5, %ymm3, %ymm3; /* +29 ; +28 */ 1087 + vpshufb %ymm6, %ymm3, %ymm14; 1088 + vpsubq %ymm5, %ymm3, %ymm3; /* +31 ; +30 */ 1089 + vpshufb %ymm6, %ymm3, %ymm15; 1090 + vpsubq %ymm5, %ymm3, %ymm3; /* +32 */ 1091 + vpshufb %xmm6, %xmm3, %xmm3; 1092 + vmovdqu %xmm3, (%r8); 1093 + vmovdqu (0 * 32)(%rcx), %ymm0; 1094 + vmovdqu (1 * 32)(%rcx), %ymm1; 1095 + vmovdqu (2 * 32)(%rcx), %ymm2; 1096 + vmovdqu (3 * 32)(%rcx), %ymm3; 1097 + vmovdqu (4 * 32)(%rcx), %ymm4; 1098 + vmovdqu (5 * 32)(%rcx), %ymm5; 1099 + vmovdqu (6 * 32)(%rcx), %ymm6; 1100 + vmovdqu (7 * 32)(%rcx), %ymm7; 1101 + jmp .Lctr_carry_done; 1102 + 1103 + .Lhandle_ctr_carry: 1104 + /* construct IVs */ 1105 + inc_le128(%ymm3, %ymm0, %ymm4); 1106 + inc_le128(%ymm3, %ymm0, %ymm4); 1107 + vpshufb %ymm6, %ymm3, %ymm9; /* +3 ; +2 */ 1108 + inc_le128(%ymm3, %ymm0, %ymm4); 1109 + inc_le128(%ymm3, %ymm0, %ymm4); 1110 + vpshufb %ymm6, %ymm3, %ymm10; /* +5 ; +4 */ 1111 + inc_le128(%ymm3, %ymm0, %ymm4); 1112 + inc_le128(%ymm3, %ymm0, %ymm4); 1113 + vpshufb %ymm6, %ymm3, %ymm11; /* +7 ; +6 */ 1114 + inc_le128(%ymm3, %ymm0, %ymm4); 1115 + inc_le128(%ymm3, %ymm0, %ymm4); 1116 + vpshufb %ymm6, %ymm3, %ymm12; /* +9 ; +8 */ 1117 + inc_le128(%ymm3, %ymm0, %ymm4); 1118 + inc_le128(%ymm3, %ymm0, %ymm4); 1119 + vpshufb %ymm6, %ymm3, %ymm13; /* +11 ; +10 */ 1120 + inc_le128(%ymm3, %ymm0, %ymm4); 1121 + inc_le128(%ymm3, %ymm0, %ymm4); 1122 + vpshufb %ymm6, %ymm3, %ymm14; /* +13 ; +12 */ 1123 + inc_le128(%ymm3, %ymm0, %ymm4); 1124 + inc_le128(%ymm3, %ymm0, %ymm4); 1125 + vpshufb %ymm6, %ymm3, %ymm15; /* +15 ; +14 */ 1126 + vmovdqu %ymm8, (0 * 32)(%rcx); 1127 + vmovdqu %ymm9, (1 * 32)(%rcx); 1128 + vmovdqu %ymm10, (2 * 32)(%rcx); 1129 + vmovdqu %ymm11, (3 * 32)(%rcx); 1130 + vmovdqu %ymm12, (4 * 32)(%rcx); 1131 + vmovdqu %ymm13, (5 * 32)(%rcx); 1132 + vmovdqu %ymm14, (6 * 32)(%rcx); 1133 + vmovdqu %ymm15, (7 * 32)(%rcx); 1134 + 1135 + inc_le128(%ymm3, %ymm0, %ymm4); 1136 + inc_le128(%ymm3, %ymm0, %ymm4); 1137 + vpshufb %ymm6, %ymm3, %ymm8; /* +17 ; +16 */ 1138 + inc_le128(%ymm3, %ymm0, %ymm4); 1139 + inc_le128(%ymm3, %ymm0, %ymm4); 1140 + vpshufb %ymm6, %ymm3, %ymm9; /* +19 ; +18 */ 1141 + inc_le128(%ymm3, %ymm0, %ymm4); 1142 + inc_le128(%ymm3, %ymm0, %ymm4); 1143 + vpshufb %ymm6, %ymm3, %ymm10; /* +21 ; +20 */ 1144 + inc_le128(%ymm3, %ymm0, %ymm4); 1145 + inc_le128(%ymm3, %ymm0, %ymm4); 1146 + vpshufb %ymm6, %ymm3, %ymm11; /* +23 ; +22 */ 1147 + inc_le128(%ymm3, %ymm0, %ymm4); 1148 + inc_le128(%ymm3, %ymm0, %ymm4); 1149 + vpshufb %ymm6, %ymm3, %ymm12; /* +25 ; +24 */ 1150 + inc_le128(%ymm3, %ymm0, %ymm4); 1151 + inc_le128(%ymm3, %ymm0, %ymm4); 1152 + vpshufb %ymm6, %ymm3, %ymm13; /* +27 ; +26 */ 1153 + inc_le128(%ymm3, %ymm0, %ymm4); 1154 + inc_le128(%ymm3, %ymm0, %ymm4); 1155 + vpshufb %ymm6, %ymm3, %ymm14; /* +29 ; +28 */ 1156 + inc_le128(%ymm3, %ymm0, %ymm4); 1157 + inc_le128(%ymm3, %ymm0, %ymm4); 1158 + vpshufb %ymm6, %ymm3, %ymm15; /* +31 ; +30 */ 1159 + inc_le128(%ymm3, %ymm0, %ymm4); 1160 + vextracti128 $1, %ymm3, %xmm3; 1161 + vpshufb %xmm6, %xmm3, %xmm3; /* +32 */ 1162 + vmovdqu %xmm3, (%r8); 1163 + vmovdqu (0 * 32)(%rcx), %ymm0; 1164 + vmovdqu (1 * 32)(%rcx), %ymm1; 1165 + vmovdqu (2 * 32)(%rcx), %ymm2; 1166 + vmovdqu (3 * 32)(%rcx), %ymm3; 1167 + vmovdqu (4 * 32)(%rcx), %ymm4; 1168 + vmovdqu (5 * 32)(%rcx), %ymm5; 1169 + vmovdqu (6 * 32)(%rcx), %ymm6; 1170 + vmovdqu (7 * 32)(%rcx), %ymm7; 1171 + 1172 + .Lctr_carry_done: 1173 + 1174 + FRAME_END 1175 + RET; 1176 + SYM_FUNC_END(__aria_aesni_avx2_ctr_gen_keystream_32way) 1177 + 1178 + SYM_TYPED_FUNC_START(aria_aesni_avx2_ctr_crypt_32way) 1179 + /* input: 1180 + * %rdi: ctx 1181 + * %rsi: dst 1182 + * %rdx: src 1183 + * %rcx: keystream 1184 + * %r8: iv (big endian, 128bit) 1185 + */ 1186 + FRAME_BEGIN 1187 + 1188 + call __aria_aesni_avx2_ctr_gen_keystream_32way; 1189 + 1190 + leaq (%rsi), %r10; 1191 + leaq (%rdx), %r11; 1192 + leaq (%rcx), %rsi; 1193 + leaq (%rcx), %rdx; 1194 + leaq ARIA_CTX_enc_key(CTX), %r9; 1195 + 1196 + call __aria_aesni_avx2_crypt_32way; 1197 + 1198 + vpxor (0 * 32)(%r11), %ymm1, %ymm1; 1199 + vpxor (1 * 32)(%r11), %ymm0, %ymm0; 1200 + vpxor (2 * 32)(%r11), %ymm3, %ymm3; 1201 + vpxor (3 * 32)(%r11), %ymm2, %ymm2; 1202 + vpxor (4 * 32)(%r11), %ymm4, %ymm4; 1203 + vpxor (5 * 32)(%r11), %ymm5, %ymm5; 1204 + vpxor (6 * 32)(%r11), %ymm6, %ymm6; 1205 + vpxor (7 * 32)(%r11), %ymm7, %ymm7; 1206 + vpxor (8 * 32)(%r11), %ymm8, %ymm8; 1207 + vpxor (9 * 32)(%r11), %ymm9, %ymm9; 1208 + vpxor (10 * 32)(%r11), %ymm10, %ymm10; 1209 + vpxor (11 * 32)(%r11), %ymm11, %ymm11; 1210 + vpxor (12 * 32)(%r11), %ymm12, %ymm12; 1211 + vpxor (13 * 32)(%r11), %ymm13, %ymm13; 1212 + vpxor (14 * 32)(%r11), %ymm14, %ymm14; 1213 + vpxor (15 * 32)(%r11), %ymm15, %ymm15; 1214 + write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 1215 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 1216 + %ymm15, %r10); 1217 + 1218 + FRAME_END 1219 + RET; 1220 + SYM_FUNC_END(aria_aesni_avx2_ctr_crypt_32way) 1221 + 1222 + #ifdef CONFIG_AS_GFNI 1223 + SYM_FUNC_START_LOCAL(__aria_aesni_avx2_gfni_crypt_32way) 1224 + /* input: 1225 + * %r9: rk 1226 + * %rsi: dst 1227 + * %rdx: src 1228 + * %ymm0..%ymm15: 16 byte-sliced blocks 1229 + */ 1230 + 1231 + FRAME_BEGIN 1232 + 1233 + movq %rsi, %rax; 1234 + leaq 8 * 32(%rax), %r8; 1235 + 1236 + inpack16_post(%ymm0, %ymm1, %ymm2, %ymm3, 1237 + %ymm4, %ymm5, %ymm6, %ymm7, 1238 + %ymm8, %ymm9, %ymm10, %ymm11, 1239 + %ymm12, %ymm13, %ymm14, 1240 + %ymm15, %rax, %r8); 1241 + aria_fo_gfni(%ymm8, %ymm9, %ymm10, %ymm11, 1242 + %ymm12, %ymm13, %ymm14, %ymm15, 1243 + %ymm0, %ymm1, %ymm2, %ymm3, 1244 + %ymm4, %ymm5, %ymm6, %ymm7, 1245 + %rax, %r9, 0); 1246 + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, 1247 + %ymm4, %ymm5, %ymm6, %ymm7, 1248 + %ymm8, %ymm9, %ymm10, %ymm11, 1249 + %ymm12, %ymm13, %ymm14, 1250 + %ymm15, %rax, %r9, 1); 1251 + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, 1252 + %ymm12, %ymm13, %ymm14, %ymm15, 1253 + %ymm0, %ymm1, %ymm2, %ymm3, 1254 + %ymm4, %ymm5, %ymm6, %ymm7, 1255 + %rax, %r9, 2); 1256 + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, 1257 + %ymm4, %ymm5, %ymm6, %ymm7, 1258 + %ymm8, %ymm9, %ymm10, %ymm11, 1259 + %ymm12, %ymm13, %ymm14, 1260 + %ymm15, %rax, %r9, 3); 1261 + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, 1262 + %ymm12, %ymm13, %ymm14, %ymm15, 1263 + %ymm0, %ymm1, %ymm2, %ymm3, 1264 + %ymm4, %ymm5, %ymm6, %ymm7, 1265 + %rax, %r9, 4); 1266 + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, 1267 + %ymm4, %ymm5, %ymm6, %ymm7, 1268 + %ymm8, %ymm9, %ymm10, %ymm11, 1269 + %ymm12, %ymm13, %ymm14, 1270 + %ymm15, %rax, %r9, 5); 1271 + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, 1272 + %ymm12, %ymm13, %ymm14, %ymm15, 1273 + %ymm0, %ymm1, %ymm2, %ymm3, 1274 + %ymm4, %ymm5, %ymm6, %ymm7, 1275 + %rax, %r9, 6); 1276 + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, 1277 + %ymm4, %ymm5, %ymm6, %ymm7, 1278 + %ymm8, %ymm9, %ymm10, %ymm11, 1279 + %ymm12, %ymm13, %ymm14, 1280 + %ymm15, %rax, %r9, 7); 1281 + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, 1282 + %ymm12, %ymm13, %ymm14, %ymm15, 1283 + %ymm0, %ymm1, %ymm2, %ymm3, 1284 + %ymm4, %ymm5, %ymm6, %ymm7, 1285 + %rax, %r9, 8); 1286 + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, 1287 + %ymm4, %ymm5, %ymm6, %ymm7, 1288 + %ymm8, %ymm9, %ymm10, %ymm11, 1289 + %ymm12, %ymm13, %ymm14, 1290 + %ymm15, %rax, %r9, 9); 1291 + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, 1292 + %ymm12, %ymm13, %ymm14, %ymm15, 1293 + %ymm0, %ymm1, %ymm2, %ymm3, 1294 + %ymm4, %ymm5, %ymm6, %ymm7, 1295 + %rax, %r9, 10); 1296 + cmpl $12, ARIA_CTX_rounds(CTX); 1297 + jne .Laria_gfni_192; 1298 + aria_ff_gfni(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 1299 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 1300 + %ymm15, %rax, %r9, 11, 12); 1301 + jmp .Laria_gfni_end; 1302 + .Laria_gfni_192: 1303 + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, 1304 + %ymm4, %ymm5, %ymm6, %ymm7, 1305 + %ymm8, %ymm9, %ymm10, %ymm11, 1306 + %ymm12, %ymm13, %ymm14, 1307 + %ymm15, %rax, %r9, 11); 1308 + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, 1309 + %ymm12, %ymm13, %ymm14, %ymm15, 1310 + %ymm0, %ymm1, %ymm2, %ymm3, 1311 + %ymm4, %ymm5, %ymm6, %ymm7, 1312 + %rax, %r9, 12); 1313 + cmpl $14, ARIA_CTX_rounds(CTX); 1314 + jne .Laria_gfni_256; 1315 + aria_ff_gfni(%ymm1, %ymm0, %ymm3, %ymm2, 1316 + %ymm4, %ymm5, %ymm6, %ymm7, 1317 + %ymm8, %ymm9, %ymm10, %ymm11, 1318 + %ymm12, %ymm13, %ymm14, 1319 + %ymm15, %rax, %r9, 13, 14); 1320 + jmp .Laria_gfni_end; 1321 + .Laria_gfni_256: 1322 + aria_fe_gfni(%ymm1, %ymm0, %ymm3, %ymm2, 1323 + %ymm4, %ymm5, %ymm6, %ymm7, 1324 + %ymm8, %ymm9, %ymm10, %ymm11, 1325 + %ymm12, %ymm13, %ymm14, 1326 + %ymm15, %rax, %r9, 13); 1327 + aria_fo_gfni(%ymm9, %ymm8, %ymm11, %ymm10, 1328 + %ymm12, %ymm13, %ymm14, %ymm15, 1329 + %ymm0, %ymm1, %ymm2, %ymm3, 1330 + %ymm4, %ymm5, %ymm6, %ymm7, 1331 + %rax, %r9, 14); 1332 + aria_ff_gfni(%ymm1, %ymm0, %ymm3, %ymm2, 1333 + %ymm4, %ymm5, %ymm6, %ymm7, 1334 + %ymm8, %ymm9, %ymm10, %ymm11, 1335 + %ymm12, %ymm13, %ymm14, 1336 + %ymm15, %rax, %r9, 15, 16); 1337 + .Laria_gfni_end: 1338 + debyteslice_16x16b(%ymm8, %ymm12, %ymm1, %ymm4, 1339 + %ymm9, %ymm13, %ymm0, %ymm5, 1340 + %ymm10, %ymm14, %ymm3, %ymm6, 1341 + %ymm11, %ymm15, %ymm2, %ymm7, 1342 + (%rax), (%r8)); 1343 + 1344 + FRAME_END 1345 + RET; 1346 + SYM_FUNC_END(__aria_aesni_avx2_gfni_crypt_32way) 1347 + 1348 + SYM_TYPED_FUNC_START(aria_aesni_avx2_gfni_encrypt_32way) 1349 + /* input: 1350 + * %rdi: ctx, CTX 1351 + * %rsi: dst 1352 + * %rdx: src 1353 + */ 1354 + 1355 + FRAME_BEGIN 1356 + 1357 + leaq ARIA_CTX_enc_key(CTX), %r9; 1358 + 1359 + inpack16_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 1360 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 1361 + %ymm15, %rdx); 1362 + 1363 + call __aria_aesni_avx2_gfni_crypt_32way; 1364 + 1365 + write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 1366 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 1367 + %ymm15, %rax); 1368 + 1369 + FRAME_END 1370 + RET; 1371 + SYM_FUNC_END(aria_aesni_avx2_gfni_encrypt_32way) 1372 + 1373 + SYM_TYPED_FUNC_START(aria_aesni_avx2_gfni_decrypt_32way) 1374 + /* input: 1375 + * %rdi: ctx, CTX 1376 + * %rsi: dst 1377 + * %rdx: src 1378 + */ 1379 + 1380 + FRAME_BEGIN 1381 + 1382 + leaq ARIA_CTX_dec_key(CTX), %r9; 1383 + 1384 + inpack16_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 1385 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 1386 + %ymm15, %rdx); 1387 + 1388 + call __aria_aesni_avx2_gfni_crypt_32way; 1389 + 1390 + write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 1391 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 1392 + %ymm15, %rax); 1393 + 1394 + FRAME_END 1395 + RET; 1396 + SYM_FUNC_END(aria_aesni_avx2_gfni_decrypt_32way) 1397 + 1398 + SYM_TYPED_FUNC_START(aria_aesni_avx2_gfni_ctr_crypt_32way) 1399 + /* input: 1400 + * %rdi: ctx 1401 + * %rsi: dst 1402 + * %rdx: src 1403 + * %rcx: keystream 1404 + * %r8: iv (big endian, 128bit) 1405 + */ 1406 + FRAME_BEGIN 1407 + 1408 + call __aria_aesni_avx2_ctr_gen_keystream_32way 1409 + 1410 + leaq (%rsi), %r10; 1411 + leaq (%rdx), %r11; 1412 + leaq (%rcx), %rsi; 1413 + leaq (%rcx), %rdx; 1414 + leaq ARIA_CTX_enc_key(CTX), %r9; 1415 + 1416 + call __aria_aesni_avx2_gfni_crypt_32way; 1417 + 1418 + vpxor (0 * 32)(%r11), %ymm1, %ymm1; 1419 + vpxor (1 * 32)(%r11), %ymm0, %ymm0; 1420 + vpxor (2 * 32)(%r11), %ymm3, %ymm3; 1421 + vpxor (3 * 32)(%r11), %ymm2, %ymm2; 1422 + vpxor (4 * 32)(%r11), %ymm4, %ymm4; 1423 + vpxor (5 * 32)(%r11), %ymm5, %ymm5; 1424 + vpxor (6 * 32)(%r11), %ymm6, %ymm6; 1425 + vpxor (7 * 32)(%r11), %ymm7, %ymm7; 1426 + vpxor (8 * 32)(%r11), %ymm8, %ymm8; 1427 + vpxor (9 * 32)(%r11), %ymm9, %ymm9; 1428 + vpxor (10 * 32)(%r11), %ymm10, %ymm10; 1429 + vpxor (11 * 32)(%r11), %ymm11, %ymm11; 1430 + vpxor (12 * 32)(%r11), %ymm12, %ymm12; 1431 + vpxor (13 * 32)(%r11), %ymm13, %ymm13; 1432 + vpxor (14 * 32)(%r11), %ymm14, %ymm14; 1433 + vpxor (15 * 32)(%r11), %ymm15, %ymm15; 1434 + write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7, 1435 + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, 1436 + %ymm15, %r10); 1437 + 1438 + FRAME_END 1439 + RET; 1440 + SYM_FUNC_END(aria_aesni_avx2_gfni_ctr_crypt_32way) 1441 + #endif /* CONFIG_AS_GFNI */
+47 -1
arch/x86/crypto/aria-avx.h
··· 5 5 #include <linux/types.h> 6 6 7 7 #define ARIA_AESNI_PARALLEL_BLOCKS 16 8 - #define ARIA_AESNI_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * 16) 8 + #define ARIA_AESNI_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_AESNI_PARALLEL_BLOCKS) 9 + 10 + #define ARIA_AESNI_AVX2_PARALLEL_BLOCKS 32 11 + #define ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_AESNI_AVX2_PARALLEL_BLOCKS) 12 + 13 + #define ARIA_GFNI_AVX512_PARALLEL_BLOCKS 64 14 + #define ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_GFNI_AVX512_PARALLEL_BLOCKS) 15 + 16 + asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst, 17 + const u8 *src); 18 + asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst, 19 + const u8 *src); 20 + asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst, 21 + const u8 *src, 22 + u8 *keystream, u8 *iv); 23 + asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst, 24 + const u8 *src); 25 + asmlinkage void aria_aesni_avx_gfni_decrypt_16way(const void *ctx, u8 *dst, 26 + const u8 *src); 27 + asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst, 28 + const u8 *src, 29 + u8 *keystream, u8 *iv); 30 + 31 + asmlinkage void aria_aesni_avx2_encrypt_32way(const void *ctx, u8 *dst, 32 + const u8 *src); 33 + asmlinkage void aria_aesni_avx2_decrypt_32way(const void *ctx, u8 *dst, 34 + const u8 *src); 35 + asmlinkage void aria_aesni_avx2_ctr_crypt_32way(const void *ctx, u8 *dst, 36 + const u8 *src, 37 + u8 *keystream, u8 *iv); 38 + asmlinkage void aria_aesni_avx2_gfni_encrypt_32way(const void *ctx, u8 *dst, 39 + const u8 *src); 40 + asmlinkage void aria_aesni_avx2_gfni_decrypt_32way(const void *ctx, u8 *dst, 41 + const u8 *src); 42 + asmlinkage void aria_aesni_avx2_gfni_ctr_crypt_32way(const void *ctx, u8 *dst, 43 + const u8 *src, 44 + u8 *keystream, u8 *iv); 9 45 10 46 struct aria_avx_ops { 11 47 void (*aria_encrypt_16way)(const void *ctx, u8 *dst, const u8 *src); 12 48 void (*aria_decrypt_16way)(const void *ctx, u8 *dst, const u8 *src); 13 49 void (*aria_ctr_crypt_16way)(const void *ctx, u8 *dst, const u8 *src, 14 50 u8 *keystream, u8 *iv); 51 + void (*aria_encrypt_32way)(const void *ctx, u8 *dst, const u8 *src); 52 + void (*aria_decrypt_32way)(const void *ctx, u8 *dst, const u8 *src); 53 + void (*aria_ctr_crypt_32way)(const void *ctx, u8 *dst, const u8 *src, 54 + u8 *keystream, u8 *iv); 55 + void (*aria_encrypt_64way)(const void *ctx, u8 *dst, const u8 *src); 56 + void (*aria_decrypt_64way)(const void *ctx, u8 *dst, const u8 *src); 57 + void (*aria_ctr_crypt_64way)(const void *ctx, u8 *dst, const u8 *src, 58 + u8 *keystream, u8 *iv); 59 + 60 + 15 61 }; 16 62 #endif
+971
arch/x86/crypto/aria-gfni-avx512-asm_64.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * ARIA Cipher 64-way parallel algorithm (AVX512) 4 + * 5 + * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com> 6 + * 7 + */ 8 + 9 + #include <linux/linkage.h> 10 + #include <asm/frame.h> 11 + #include <asm/asm-offsets.h> 12 + #include <linux/cfi_types.h> 13 + 14 + /* register macros */ 15 + #define CTX %rdi 16 + 17 + 18 + #define BV8(a0, a1, a2, a3, a4, a5, a6, a7) \ 19 + ( (((a0) & 1) << 0) | \ 20 + (((a1) & 1) << 1) | \ 21 + (((a2) & 1) << 2) | \ 22 + (((a3) & 1) << 3) | \ 23 + (((a4) & 1) << 4) | \ 24 + (((a5) & 1) << 5) | \ 25 + (((a6) & 1) << 6) | \ 26 + (((a7) & 1) << 7) ) 27 + 28 + #define BM8X8(l0, l1, l2, l3, l4, l5, l6, l7) \ 29 + ( ((l7) << (0 * 8)) | \ 30 + ((l6) << (1 * 8)) | \ 31 + ((l5) << (2 * 8)) | \ 32 + ((l4) << (3 * 8)) | \ 33 + ((l3) << (4 * 8)) | \ 34 + ((l2) << (5 * 8)) | \ 35 + ((l1) << (6 * 8)) | \ 36 + ((l0) << (7 * 8)) ) 37 + 38 + #define add_le128(out, in, lo_counter, hi_counter1) \ 39 + vpaddq lo_counter, in, out; \ 40 + vpcmpuq $1, lo_counter, out, %k1; \ 41 + kaddb %k1, %k1, %k1; \ 42 + vpaddq hi_counter1, out, out{%k1}; 43 + 44 + #define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ 45 + vpandq x, mask4bit, tmp0; \ 46 + vpandqn x, mask4bit, x; \ 47 + vpsrld $4, x, x; \ 48 + \ 49 + vpshufb tmp0, lo_t, tmp0; \ 50 + vpshufb x, hi_t, x; \ 51 + vpxorq tmp0, x, x; 52 + 53 + #define transpose_4x4(x0, x1, x2, x3, t1, t2) \ 54 + vpunpckhdq x1, x0, t2; \ 55 + vpunpckldq x1, x0, x0; \ 56 + \ 57 + vpunpckldq x3, x2, t1; \ 58 + vpunpckhdq x3, x2, x2; \ 59 + \ 60 + vpunpckhqdq t1, x0, x1; \ 61 + vpunpcklqdq t1, x0, x0; \ 62 + \ 63 + vpunpckhqdq x2, t2, x3; \ 64 + vpunpcklqdq x2, t2, x2; 65 + 66 + #define byteslice_16x16b(a0, b0, c0, d0, \ 67 + a1, b1, c1, d1, \ 68 + a2, b2, c2, d2, \ 69 + a3, b3, c3, d3, \ 70 + st0, st1) \ 71 + vmovdqu64 d2, st0; \ 72 + vmovdqu64 d3, st1; \ 73 + transpose_4x4(a0, a1, a2, a3, d2, d3); \ 74 + transpose_4x4(b0, b1, b2, b3, d2, d3); \ 75 + vmovdqu64 st0, d2; \ 76 + vmovdqu64 st1, d3; \ 77 + \ 78 + vmovdqu64 a0, st0; \ 79 + vmovdqu64 a1, st1; \ 80 + transpose_4x4(c0, c1, c2, c3, a0, a1); \ 81 + transpose_4x4(d0, d1, d2, d3, a0, a1); \ 82 + \ 83 + vbroadcasti64x2 .Lshufb_16x16b, a0; \ 84 + vmovdqu64 st1, a1; \ 85 + vpshufb a0, a2, a2; \ 86 + vpshufb a0, a3, a3; \ 87 + vpshufb a0, b0, b0; \ 88 + vpshufb a0, b1, b1; \ 89 + vpshufb a0, b2, b2; \ 90 + vpshufb a0, b3, b3; \ 91 + vpshufb a0, a1, a1; \ 92 + vpshufb a0, c0, c0; \ 93 + vpshufb a0, c1, c1; \ 94 + vpshufb a0, c2, c2; \ 95 + vpshufb a0, c3, c3; \ 96 + vpshufb a0, d0, d0; \ 97 + vpshufb a0, d1, d1; \ 98 + vpshufb a0, d2, d2; \ 99 + vpshufb a0, d3, d3; \ 100 + vmovdqu64 d3, st1; \ 101 + vmovdqu64 st0, d3; \ 102 + vpshufb a0, d3, a0; \ 103 + vmovdqu64 d2, st0; \ 104 + \ 105 + transpose_4x4(a0, b0, c0, d0, d2, d3); \ 106 + transpose_4x4(a1, b1, c1, d1, d2, d3); \ 107 + vmovdqu64 st0, d2; \ 108 + vmovdqu64 st1, d3; \ 109 + \ 110 + vmovdqu64 b0, st0; \ 111 + vmovdqu64 b1, st1; \ 112 + transpose_4x4(a2, b2, c2, d2, b0, b1); \ 113 + transpose_4x4(a3, b3, c3, d3, b0, b1); \ 114 + vmovdqu64 st0, b0; \ 115 + vmovdqu64 st1, b1; \ 116 + /* does not adjust output bytes inside vectors */ 117 + 118 + #define debyteslice_16x16b(a0, b0, c0, d0, \ 119 + a1, b1, c1, d1, \ 120 + a2, b2, c2, d2, \ 121 + a3, b3, c3, d3, \ 122 + st0, st1) \ 123 + vmovdqu64 d2, st0; \ 124 + vmovdqu64 d3, st1; \ 125 + transpose_4x4(a0, a1, a2, a3, d2, d3); \ 126 + transpose_4x4(b0, b1, b2, b3, d2, d3); \ 127 + vmovdqu64 st0, d2; \ 128 + vmovdqu64 st1, d3; \ 129 + \ 130 + vmovdqu64 a0, st0; \ 131 + vmovdqu64 a1, st1; \ 132 + transpose_4x4(c0, c1, c2, c3, a0, a1); \ 133 + transpose_4x4(d0, d1, d2, d3, a0, a1); \ 134 + \ 135 + vbroadcasti64x2 .Lshufb_16x16b, a0; \ 136 + vmovdqu64 st1, a1; \ 137 + vpshufb a0, a2, a2; \ 138 + vpshufb a0, a3, a3; \ 139 + vpshufb a0, b0, b0; \ 140 + vpshufb a0, b1, b1; \ 141 + vpshufb a0, b2, b2; \ 142 + vpshufb a0, b3, b3; \ 143 + vpshufb a0, a1, a1; \ 144 + vpshufb a0, c0, c0; \ 145 + vpshufb a0, c1, c1; \ 146 + vpshufb a0, c2, c2; \ 147 + vpshufb a0, c3, c3; \ 148 + vpshufb a0, d0, d0; \ 149 + vpshufb a0, d1, d1; \ 150 + vpshufb a0, d2, d2; \ 151 + vpshufb a0, d3, d3; \ 152 + vmovdqu64 d3, st1; \ 153 + vmovdqu64 st0, d3; \ 154 + vpshufb a0, d3, a0; \ 155 + vmovdqu64 d2, st0; \ 156 + \ 157 + transpose_4x4(c0, d0, a0, b0, d2, d3); \ 158 + transpose_4x4(c1, d1, a1, b1, d2, d3); \ 159 + vmovdqu64 st0, d2; \ 160 + vmovdqu64 st1, d3; \ 161 + \ 162 + vmovdqu64 b0, st0; \ 163 + vmovdqu64 b1, st1; \ 164 + transpose_4x4(c2, d2, a2, b2, b0, b1); \ 165 + transpose_4x4(c3, d3, a3, b3, b0, b1); \ 166 + vmovdqu64 st0, b0; \ 167 + vmovdqu64 st1, b1; \ 168 + /* does not adjust output bytes inside vectors */ 169 + 170 + /* load blocks to registers and apply pre-whitening */ 171 + #define inpack16_pre(x0, x1, x2, x3, \ 172 + x4, x5, x6, x7, \ 173 + y0, y1, y2, y3, \ 174 + y4, y5, y6, y7, \ 175 + rio) \ 176 + vmovdqu64 (0 * 64)(rio), x0; \ 177 + vmovdqu64 (1 * 64)(rio), x1; \ 178 + vmovdqu64 (2 * 64)(rio), x2; \ 179 + vmovdqu64 (3 * 64)(rio), x3; \ 180 + vmovdqu64 (4 * 64)(rio), x4; \ 181 + vmovdqu64 (5 * 64)(rio), x5; \ 182 + vmovdqu64 (6 * 64)(rio), x6; \ 183 + vmovdqu64 (7 * 64)(rio), x7; \ 184 + vmovdqu64 (8 * 64)(rio), y0; \ 185 + vmovdqu64 (9 * 64)(rio), y1; \ 186 + vmovdqu64 (10 * 64)(rio), y2; \ 187 + vmovdqu64 (11 * 64)(rio), y3; \ 188 + vmovdqu64 (12 * 64)(rio), y4; \ 189 + vmovdqu64 (13 * 64)(rio), y5; \ 190 + vmovdqu64 (14 * 64)(rio), y6; \ 191 + vmovdqu64 (15 * 64)(rio), y7; 192 + 193 + /* byteslice pre-whitened blocks and store to temporary memory */ 194 + #define inpack16_post(x0, x1, x2, x3, \ 195 + x4, x5, x6, x7, \ 196 + y0, y1, y2, y3, \ 197 + y4, y5, y6, y7, \ 198 + mem_ab, mem_cd) \ 199 + byteslice_16x16b(x0, x1, x2, x3, \ 200 + x4, x5, x6, x7, \ 201 + y0, y1, y2, y3, \ 202 + y4, y5, y6, y7, \ 203 + (mem_ab), (mem_cd)); \ 204 + \ 205 + vmovdqu64 x0, 0 * 64(mem_ab); \ 206 + vmovdqu64 x1, 1 * 64(mem_ab); \ 207 + vmovdqu64 x2, 2 * 64(mem_ab); \ 208 + vmovdqu64 x3, 3 * 64(mem_ab); \ 209 + vmovdqu64 x4, 4 * 64(mem_ab); \ 210 + vmovdqu64 x5, 5 * 64(mem_ab); \ 211 + vmovdqu64 x6, 6 * 64(mem_ab); \ 212 + vmovdqu64 x7, 7 * 64(mem_ab); \ 213 + vmovdqu64 y0, 0 * 64(mem_cd); \ 214 + vmovdqu64 y1, 1 * 64(mem_cd); \ 215 + vmovdqu64 y2, 2 * 64(mem_cd); \ 216 + vmovdqu64 y3, 3 * 64(mem_cd); \ 217 + vmovdqu64 y4, 4 * 64(mem_cd); \ 218 + vmovdqu64 y5, 5 * 64(mem_cd); \ 219 + vmovdqu64 y6, 6 * 64(mem_cd); \ 220 + vmovdqu64 y7, 7 * 64(mem_cd); 221 + 222 + #define write_output(x0, x1, x2, x3, \ 223 + x4, x5, x6, x7, \ 224 + y0, y1, y2, y3, \ 225 + y4, y5, y6, y7, \ 226 + mem) \ 227 + vmovdqu64 x0, 0 * 64(mem); \ 228 + vmovdqu64 x1, 1 * 64(mem); \ 229 + vmovdqu64 x2, 2 * 64(mem); \ 230 + vmovdqu64 x3, 3 * 64(mem); \ 231 + vmovdqu64 x4, 4 * 64(mem); \ 232 + vmovdqu64 x5, 5 * 64(mem); \ 233 + vmovdqu64 x6, 6 * 64(mem); \ 234 + vmovdqu64 x7, 7 * 64(mem); \ 235 + vmovdqu64 y0, 8 * 64(mem); \ 236 + vmovdqu64 y1, 9 * 64(mem); \ 237 + vmovdqu64 y2, 10 * 64(mem); \ 238 + vmovdqu64 y3, 11 * 64(mem); \ 239 + vmovdqu64 y4, 12 * 64(mem); \ 240 + vmovdqu64 y5, 13 * 64(mem); \ 241 + vmovdqu64 y6, 14 * 64(mem); \ 242 + vmovdqu64 y7, 15 * 64(mem); \ 243 + 244 + #define aria_store_state_8way(x0, x1, x2, x3, \ 245 + x4, x5, x6, x7, \ 246 + mem_tmp, idx) \ 247 + vmovdqu64 x0, ((idx + 0) * 64)(mem_tmp); \ 248 + vmovdqu64 x1, ((idx + 1) * 64)(mem_tmp); \ 249 + vmovdqu64 x2, ((idx + 2) * 64)(mem_tmp); \ 250 + vmovdqu64 x3, ((idx + 3) * 64)(mem_tmp); \ 251 + vmovdqu64 x4, ((idx + 4) * 64)(mem_tmp); \ 252 + vmovdqu64 x5, ((idx + 5) * 64)(mem_tmp); \ 253 + vmovdqu64 x6, ((idx + 6) * 64)(mem_tmp); \ 254 + vmovdqu64 x7, ((idx + 7) * 64)(mem_tmp); 255 + 256 + #define aria_load_state_8way(x0, x1, x2, x3, \ 257 + x4, x5, x6, x7, \ 258 + mem_tmp, idx) \ 259 + vmovdqu64 ((idx + 0) * 64)(mem_tmp), x0; \ 260 + vmovdqu64 ((idx + 1) * 64)(mem_tmp), x1; \ 261 + vmovdqu64 ((idx + 2) * 64)(mem_tmp), x2; \ 262 + vmovdqu64 ((idx + 3) * 64)(mem_tmp), x3; \ 263 + vmovdqu64 ((idx + 4) * 64)(mem_tmp), x4; \ 264 + vmovdqu64 ((idx + 5) * 64)(mem_tmp), x5; \ 265 + vmovdqu64 ((idx + 6) * 64)(mem_tmp), x6; \ 266 + vmovdqu64 ((idx + 7) * 64)(mem_tmp), x7; 267 + 268 + #define aria_ark_16way(x0, x1, x2, x3, \ 269 + x4, x5, x6, x7, \ 270 + y0, y1, y2, y3, \ 271 + y4, y5, y6, y7, \ 272 + t0, rk, round) \ 273 + /* AddRoundKey */ \ 274 + vpbroadcastb ((round * 16) + 3)(rk), t0; \ 275 + vpxorq t0, x0, x0; \ 276 + vpbroadcastb ((round * 16) + 2)(rk), t0; \ 277 + vpxorq t0, x1, x1; \ 278 + vpbroadcastb ((round * 16) + 1)(rk), t0; \ 279 + vpxorq t0, x2, x2; \ 280 + vpbroadcastb ((round * 16) + 0)(rk), t0; \ 281 + vpxorq t0, x3, x3; \ 282 + vpbroadcastb ((round * 16) + 7)(rk), t0; \ 283 + vpxorq t0, x4, x4; \ 284 + vpbroadcastb ((round * 16) + 6)(rk), t0; \ 285 + vpxorq t0, x5, x5; \ 286 + vpbroadcastb ((round * 16) + 5)(rk), t0; \ 287 + vpxorq t0, x6, x6; \ 288 + vpbroadcastb ((round * 16) + 4)(rk), t0; \ 289 + vpxorq t0, x7, x7; \ 290 + vpbroadcastb ((round * 16) + 11)(rk), t0; \ 291 + vpxorq t0, y0, y0; \ 292 + vpbroadcastb ((round * 16) + 10)(rk), t0; \ 293 + vpxorq t0, y1, y1; \ 294 + vpbroadcastb ((round * 16) + 9)(rk), t0; \ 295 + vpxorq t0, y2, y2; \ 296 + vpbroadcastb ((round * 16) + 8)(rk), t0; \ 297 + vpxorq t0, y3, y3; \ 298 + vpbroadcastb ((round * 16) + 15)(rk), t0; \ 299 + vpxorq t0, y4, y4; \ 300 + vpbroadcastb ((round * 16) + 14)(rk), t0; \ 301 + vpxorq t0, y5, y5; \ 302 + vpbroadcastb ((round * 16) + 13)(rk), t0; \ 303 + vpxorq t0, y6, y6; \ 304 + vpbroadcastb ((round * 16) + 12)(rk), t0; \ 305 + vpxorq t0, y7, y7; 306 + 307 + #define aria_sbox_8way_gfni(x0, x1, x2, x3, \ 308 + x4, x5, x6, x7, \ 309 + t0, t1, t2, t3, \ 310 + t4, t5, t6, t7) \ 311 + vpbroadcastq .Ltf_s2_bitmatrix, t0; \ 312 + vpbroadcastq .Ltf_inv_bitmatrix, t1; \ 313 + vpbroadcastq .Ltf_id_bitmatrix, t2; \ 314 + vpbroadcastq .Ltf_aff_bitmatrix, t3; \ 315 + vpbroadcastq .Ltf_x2_bitmatrix, t4; \ 316 + vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ 317 + vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ 318 + vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ 319 + vgf2p8affineqb $(tf_inv_const), t1, x6, x6; \ 320 + vgf2p8affineinvqb $0, t2, x2, x2; \ 321 + vgf2p8affineinvqb $0, t2, x6, x6; \ 322 + vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0; \ 323 + vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4; \ 324 + vgf2p8affineqb $(tf_x2_const), t4, x3, x3; \ 325 + vgf2p8affineqb $(tf_x2_const), t4, x7, x7; \ 326 + vgf2p8affineinvqb $0, t2, x3, x3; \ 327 + vgf2p8affineinvqb $0, t2, x7, x7; 328 + 329 + #define aria_sbox_16way_gfni(x0, x1, x2, x3, \ 330 + x4, x5, x6, x7, \ 331 + y0, y1, y2, y3, \ 332 + y4, y5, y6, y7, \ 333 + t0, t1, t2, t3, \ 334 + t4, t5, t6, t7) \ 335 + vpbroadcastq .Ltf_s2_bitmatrix, t0; \ 336 + vpbroadcastq .Ltf_inv_bitmatrix, t1; \ 337 + vpbroadcastq .Ltf_id_bitmatrix, t2; \ 338 + vpbroadcastq .Ltf_aff_bitmatrix, t3; \ 339 + vpbroadcastq .Ltf_x2_bitmatrix, t4; \ 340 + vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ 341 + vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ 342 + vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ 343 + vgf2p8affineqb $(tf_inv_const), t1, x6, x6; \ 344 + vgf2p8affineinvqb $0, t2, x2, x2; \ 345 + vgf2p8affineinvqb $0, t2, x6, x6; \ 346 + vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0; \ 347 + vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4; \ 348 + vgf2p8affineqb $(tf_x2_const), t4, x3, x3; \ 349 + vgf2p8affineqb $(tf_x2_const), t4, x7, x7; \ 350 + vgf2p8affineinvqb $0, t2, x3, x3; \ 351 + vgf2p8affineinvqb $0, t2, x7, x7; \ 352 + vgf2p8affineinvqb $(tf_s2_const), t0, y1, y1; \ 353 + vgf2p8affineinvqb $(tf_s2_const), t0, y5, y5; \ 354 + vgf2p8affineqb $(tf_inv_const), t1, y2, y2; \ 355 + vgf2p8affineqb $(tf_inv_const), t1, y6, y6; \ 356 + vgf2p8affineinvqb $0, t2, y2, y2; \ 357 + vgf2p8affineinvqb $0, t2, y6, y6; \ 358 + vgf2p8affineinvqb $(tf_aff_const), t3, y0, y0; \ 359 + vgf2p8affineinvqb $(tf_aff_const), t3, y4, y4; \ 360 + vgf2p8affineqb $(tf_x2_const), t4, y3, y3; \ 361 + vgf2p8affineqb $(tf_x2_const), t4, y7, y7; \ 362 + vgf2p8affineinvqb $0, t2, y3, y3; \ 363 + vgf2p8affineinvqb $0, t2, y7, y7; 364 + 365 + 366 + #define aria_diff_m(x0, x1, x2, x3, \ 367 + t0, t1, t2, t3) \ 368 + /* T = rotr32(X, 8); */ \ 369 + /* X ^= T */ \ 370 + vpxorq x0, x3, t0; \ 371 + vpxorq x1, x0, t1; \ 372 + vpxorq x2, x1, t2; \ 373 + vpxorq x3, x2, t3; \ 374 + /* X = T ^ rotr(X, 16); */ \ 375 + vpxorq t2, x0, x0; \ 376 + vpxorq x1, t3, t3; \ 377 + vpxorq t0, x2, x2; \ 378 + vpxorq t1, x3, x1; \ 379 + vmovdqu64 t3, x3; 380 + 381 + #define aria_diff_word(x0, x1, x2, x3, \ 382 + x4, x5, x6, x7, \ 383 + y0, y1, y2, y3, \ 384 + y4, y5, y6, y7) \ 385 + /* t1 ^= t2; */ \ 386 + vpxorq y0, x4, x4; \ 387 + vpxorq y1, x5, x5; \ 388 + vpxorq y2, x6, x6; \ 389 + vpxorq y3, x7, x7; \ 390 + \ 391 + /* t2 ^= t3; */ \ 392 + vpxorq y4, y0, y0; \ 393 + vpxorq y5, y1, y1; \ 394 + vpxorq y6, y2, y2; \ 395 + vpxorq y7, y3, y3; \ 396 + \ 397 + /* t0 ^= t1; */ \ 398 + vpxorq x4, x0, x0; \ 399 + vpxorq x5, x1, x1; \ 400 + vpxorq x6, x2, x2; \ 401 + vpxorq x7, x3, x3; \ 402 + \ 403 + /* t3 ^= t1; */ \ 404 + vpxorq x4, y4, y4; \ 405 + vpxorq x5, y5, y5; \ 406 + vpxorq x6, y6, y6; \ 407 + vpxorq x7, y7, y7; \ 408 + \ 409 + /* t2 ^= t0; */ \ 410 + vpxorq x0, y0, y0; \ 411 + vpxorq x1, y1, y1; \ 412 + vpxorq x2, y2, y2; \ 413 + vpxorq x3, y3, y3; \ 414 + \ 415 + /* t1 ^= t2; */ \ 416 + vpxorq y0, x4, x4; \ 417 + vpxorq y1, x5, x5; \ 418 + vpxorq y2, x6, x6; \ 419 + vpxorq y3, x7, x7; 420 + 421 + #define aria_fe_gfni(x0, x1, x2, x3, \ 422 + x4, x5, x6, x7, \ 423 + y0, y1, y2, y3, \ 424 + y4, y5, y6, y7, \ 425 + z0, z1, z2, z3, \ 426 + z4, z5, z6, z7, \ 427 + mem_tmp, rk, round) \ 428 + aria_ark_16way(x0, x1, x2, x3, x4, x5, x6, x7, \ 429 + y0, y1, y2, y3, y4, y5, y6, y7, \ 430 + z0, rk, round); \ 431 + \ 432 + aria_sbox_16way_gfni(x2, x3, x0, x1, \ 433 + x6, x7, x4, x5, \ 434 + y2, y3, y0, y1, \ 435 + y6, y7, y4, y5, \ 436 + z0, z1, z2, z3, \ 437 + z4, z5, z6, z7); \ 438 + \ 439 + aria_diff_m(x0, x1, x2, x3, z0, z1, z2, z3); \ 440 + aria_diff_m(x4, x5, x6, x7, z0, z1, z2, z3); \ 441 + aria_diff_m(y0, y1, y2, y3, z0, z1, z2, z3); \ 442 + aria_diff_m(y4, y5, y6, y7, z0, z1, z2, z3); \ 443 + aria_diff_word(x0, x1, x2, x3, \ 444 + x4, x5, x6, x7, \ 445 + y0, y1, y2, y3, \ 446 + y4, y5, y6, y7); \ 447 + /* aria_diff_byte() \ 448 + * T3 = ABCD -> BADC \ 449 + * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \ 450 + * T0 = ABCD -> CDAB \ 451 + * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \ 452 + * T1 = ABCD -> DCBA \ 453 + * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \ 454 + */ \ 455 + aria_diff_word(x2, x3, x0, x1, \ 456 + x7, x6, x5, x4, \ 457 + y0, y1, y2, y3, \ 458 + y5, y4, y7, y6); \ 459 + 460 + 461 + #define aria_fo_gfni(x0, x1, x2, x3, \ 462 + x4, x5, x6, x7, \ 463 + y0, y1, y2, y3, \ 464 + y4, y5, y6, y7, \ 465 + z0, z1, z2, z3, \ 466 + z4, z5, z6, z7, \ 467 + mem_tmp, rk, round) \ 468 + aria_ark_16way(x0, x1, x2, x3, x4, x5, x6, x7, \ 469 + y0, y1, y2, y3, y4, y5, y6, y7, \ 470 + z0, rk, round); \ 471 + \ 472 + aria_sbox_16way_gfni(x0, x1, x2, x3, \ 473 + x4, x5, x6, x7, \ 474 + y0, y1, y2, y3, \ 475 + y4, y5, y6, y7, \ 476 + z0, z1, z2, z3, \ 477 + z4, z5, z6, z7); \ 478 + \ 479 + aria_diff_m(x0, x1, x2, x3, z0, z1, z2, z3); \ 480 + aria_diff_m(x4, x5, x6, x7, z0, z1, z2, z3); \ 481 + aria_diff_m(y0, y1, y2, y3, z0, z1, z2, z3); \ 482 + aria_diff_m(y4, y5, y6, y7, z0, z1, z2, z3); \ 483 + aria_diff_word(x0, x1, x2, x3, \ 484 + x4, x5, x6, x7, \ 485 + y0, y1, y2, y3, \ 486 + y4, y5, y6, y7); \ 487 + /* aria_diff_byte() \ 488 + * T1 = ABCD -> BADC \ 489 + * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \ 490 + * T2 = ABCD -> CDAB \ 491 + * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \ 492 + * T3 = ABCD -> DCBA \ 493 + * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \ 494 + */ \ 495 + aria_diff_word(x0, x1, x2, x3, \ 496 + x5, x4, x7, x6, \ 497 + y2, y3, y0, y1, \ 498 + y7, y6, y5, y4); 499 + 500 + #define aria_ff_gfni(x0, x1, x2, x3, \ 501 + x4, x5, x6, x7, \ 502 + y0, y1, y2, y3, \ 503 + y4, y5, y6, y7, \ 504 + z0, z1, z2, z3, \ 505 + z4, z5, z6, z7, \ 506 + mem_tmp, rk, round, last_round) \ 507 + aria_ark_16way(x0, x1, x2, x3, \ 508 + x4, x5, x6, x7, \ 509 + y0, y1, y2, y3, \ 510 + y4, y5, y6, y7, \ 511 + z0, rk, round); \ 512 + aria_sbox_16way_gfni(x2, x3, x0, x1, \ 513 + x6, x7, x4, x5, \ 514 + y2, y3, y0, y1, \ 515 + y6, y7, y4, y5, \ 516 + z0, z1, z2, z3, \ 517 + z4, z5, z6, z7); \ 518 + aria_ark_16way(x0, x1, x2, x3, \ 519 + x4, x5, x6, x7, \ 520 + y0, y1, y2, y3, \ 521 + y4, y5, y6, y7, \ 522 + z0, rk, last_round); 523 + 524 + 525 + .section .rodata.cst64, "aM", @progbits, 64 526 + .align 64 527 + .Lcounter0123_lo: 528 + .quad 0, 0 529 + .quad 1, 0 530 + .quad 2, 0 531 + .quad 3, 0 532 + 533 + .section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32 534 + .align 32 535 + #define SHUFB_BYTES(idx) \ 536 + 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) 537 + .Lshufb_16x16b: 538 + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) 539 + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) 540 + 541 + .section .rodata.cst16, "aM", @progbits, 16 542 + .align 16 543 + 544 + .Lcounter4444_lo: 545 + .quad 4, 0 546 + .Lcounter8888_lo: 547 + .quad 8, 0 548 + .Lcounter16161616_lo: 549 + .quad 16, 0 550 + .Lcounter1111_hi: 551 + .quad 0, 1 552 + 553 + /* For CTR-mode IV byteswap */ 554 + .Lbswap128_mask: 555 + .byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 556 + .byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 557 + 558 + .section .rodata.cst8, "aM", @progbits, 8 559 + .align 8 560 + /* AES affine: */ 561 + #define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0) 562 + .Ltf_aff_bitmatrix: 563 + .quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1), 564 + BV8(1, 1, 0, 0, 0, 1, 1, 1), 565 + BV8(1, 1, 1, 0, 0, 0, 1, 1), 566 + BV8(1, 1, 1, 1, 0, 0, 0, 1), 567 + BV8(1, 1, 1, 1, 1, 0, 0, 0), 568 + BV8(0, 1, 1, 1, 1, 1, 0, 0), 569 + BV8(0, 0, 1, 1, 1, 1, 1, 0), 570 + BV8(0, 0, 0, 1, 1, 1, 1, 1)) 571 + 572 + /* AES inverse affine: */ 573 + #define tf_inv_const BV8(1, 0, 1, 0, 0, 0, 0, 0) 574 + .Ltf_inv_bitmatrix: 575 + .quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1), 576 + BV8(1, 0, 0, 1, 0, 0, 1, 0), 577 + BV8(0, 1, 0, 0, 1, 0, 0, 1), 578 + BV8(1, 0, 1, 0, 0, 1, 0, 0), 579 + BV8(0, 1, 0, 1, 0, 0, 1, 0), 580 + BV8(0, 0, 1, 0, 1, 0, 0, 1), 581 + BV8(1, 0, 0, 1, 0, 1, 0, 0), 582 + BV8(0, 1, 0, 0, 1, 0, 1, 0)) 583 + 584 + /* S2: */ 585 + #define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1) 586 + .Ltf_s2_bitmatrix: 587 + .quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1), 588 + BV8(0, 0, 1, 1, 1, 1, 1, 1), 589 + BV8(1, 1, 1, 0, 1, 1, 0, 1), 590 + BV8(1, 1, 0, 0, 0, 0, 1, 1), 591 + BV8(0, 1, 0, 0, 0, 0, 1, 1), 592 + BV8(1, 1, 0, 0, 1, 1, 1, 0), 593 + BV8(0, 1, 1, 0, 0, 0, 1, 1), 594 + BV8(1, 1, 1, 1, 0, 1, 1, 0)) 595 + 596 + /* X2: */ 597 + #define tf_x2_const BV8(0, 0, 1, 1, 0, 1, 0, 0) 598 + .Ltf_x2_bitmatrix: 599 + .quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0), 600 + BV8(0, 0, 1, 0, 0, 1, 1, 0), 601 + BV8(0, 0, 0, 0, 1, 0, 1, 0), 602 + BV8(1, 1, 1, 0, 0, 0, 1, 1), 603 + BV8(1, 1, 1, 0, 1, 1, 0, 0), 604 + BV8(0, 1, 1, 0, 1, 0, 1, 1), 605 + BV8(1, 0, 1, 1, 1, 1, 0, 1), 606 + BV8(1, 0, 0, 1, 0, 0, 1, 1)) 607 + 608 + /* Identity matrix: */ 609 + .Ltf_id_bitmatrix: 610 + .quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0), 611 + BV8(0, 1, 0, 0, 0, 0, 0, 0), 612 + BV8(0, 0, 1, 0, 0, 0, 0, 0), 613 + BV8(0, 0, 0, 1, 0, 0, 0, 0), 614 + BV8(0, 0, 0, 0, 1, 0, 0, 0), 615 + BV8(0, 0, 0, 0, 0, 1, 0, 0), 616 + BV8(0, 0, 0, 0, 0, 0, 1, 0), 617 + BV8(0, 0, 0, 0, 0, 0, 0, 1)) 618 + 619 + .text 620 + SYM_FUNC_START_LOCAL(__aria_gfni_avx512_crypt_64way) 621 + /* input: 622 + * %r9: rk 623 + * %rsi: dst 624 + * %rdx: src 625 + * %zmm0..%zmm15: byte-sliced blocks 626 + */ 627 + 628 + FRAME_BEGIN 629 + 630 + movq %rsi, %rax; 631 + leaq 8 * 64(%rax), %r8; 632 + 633 + inpack16_post(%zmm0, %zmm1, %zmm2, %zmm3, 634 + %zmm4, %zmm5, %zmm6, %zmm7, 635 + %zmm8, %zmm9, %zmm10, %zmm11, 636 + %zmm12, %zmm13, %zmm14, 637 + %zmm15, %rax, %r8); 638 + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, 639 + %zmm4, %zmm5, %zmm6, %zmm7, 640 + %zmm8, %zmm9, %zmm10, %zmm11, 641 + %zmm12, %zmm13, %zmm14, %zmm15, 642 + %zmm24, %zmm25, %zmm26, %zmm27, 643 + %zmm28, %zmm29, %zmm30, %zmm31, 644 + %rax, %r9, 0); 645 + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, 646 + %zmm6, %zmm7, %zmm4, %zmm5, 647 + %zmm9, %zmm8, %zmm11, %zmm10, 648 + %zmm12, %zmm13, %zmm14, %zmm15, 649 + %zmm24, %zmm25, %zmm26, %zmm27, 650 + %zmm28, %zmm29, %zmm30, %zmm31, 651 + %rax, %r9, 1); 652 + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, 653 + %zmm4, %zmm5, %zmm6, %zmm7, 654 + %zmm8, %zmm9, %zmm10, %zmm11, 655 + %zmm12, %zmm13, %zmm14, %zmm15, 656 + %zmm24, %zmm25, %zmm26, %zmm27, 657 + %zmm28, %zmm29, %zmm30, %zmm31, 658 + %rax, %r9, 2); 659 + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, 660 + %zmm6, %zmm7, %zmm4, %zmm5, 661 + %zmm9, %zmm8, %zmm11, %zmm10, 662 + %zmm12, %zmm13, %zmm14, %zmm15, 663 + %zmm24, %zmm25, %zmm26, %zmm27, 664 + %zmm28, %zmm29, %zmm30, %zmm31, 665 + %rax, %r9, 3); 666 + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, 667 + %zmm4, %zmm5, %zmm6, %zmm7, 668 + %zmm8, %zmm9, %zmm10, %zmm11, 669 + %zmm12, %zmm13, %zmm14, %zmm15, 670 + %zmm24, %zmm25, %zmm26, %zmm27, 671 + %zmm28, %zmm29, %zmm30, %zmm31, 672 + %rax, %r9, 4); 673 + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, 674 + %zmm6, %zmm7, %zmm4, %zmm5, 675 + %zmm9, %zmm8, %zmm11, %zmm10, 676 + %zmm12, %zmm13, %zmm14, %zmm15, 677 + %zmm24, %zmm25, %zmm26, %zmm27, 678 + %zmm28, %zmm29, %zmm30, %zmm31, 679 + %rax, %r9, 5); 680 + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, 681 + %zmm4, %zmm5, %zmm6, %zmm7, 682 + %zmm8, %zmm9, %zmm10, %zmm11, 683 + %zmm12, %zmm13, %zmm14, %zmm15, 684 + %zmm24, %zmm25, %zmm26, %zmm27, 685 + %zmm28, %zmm29, %zmm30, %zmm31, 686 + %rax, %r9, 6); 687 + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, 688 + %zmm6, %zmm7, %zmm4, %zmm5, 689 + %zmm9, %zmm8, %zmm11, %zmm10, 690 + %zmm12, %zmm13, %zmm14, %zmm15, 691 + %zmm24, %zmm25, %zmm26, %zmm27, 692 + %zmm28, %zmm29, %zmm30, %zmm31, 693 + %rax, %r9, 7); 694 + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, 695 + %zmm4, %zmm5, %zmm6, %zmm7, 696 + %zmm8, %zmm9, %zmm10, %zmm11, 697 + %zmm12, %zmm13, %zmm14, %zmm15, 698 + %zmm24, %zmm25, %zmm26, %zmm27, 699 + %zmm28, %zmm29, %zmm30, %zmm31, 700 + %rax, %r9, 8); 701 + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, 702 + %zmm6, %zmm7, %zmm4, %zmm5, 703 + %zmm9, %zmm8, %zmm11, %zmm10, 704 + %zmm12, %zmm13, %zmm14, %zmm15, 705 + %zmm24, %zmm25, %zmm26, %zmm27, 706 + %zmm28, %zmm29, %zmm30, %zmm31, 707 + %rax, %r9, 9); 708 + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, 709 + %zmm4, %zmm5, %zmm6, %zmm7, 710 + %zmm8, %zmm9, %zmm10, %zmm11, 711 + %zmm12, %zmm13, %zmm14, %zmm15, 712 + %zmm24, %zmm25, %zmm26, %zmm27, 713 + %zmm28, %zmm29, %zmm30, %zmm31, 714 + %rax, %r9, 10); 715 + cmpl $12, ARIA_CTX_rounds(CTX); 716 + jne .Laria_gfni_192; 717 + aria_ff_gfni(%zmm3, %zmm2, %zmm1, %zmm0, 718 + %zmm6, %zmm7, %zmm4, %zmm5, 719 + %zmm9, %zmm8, %zmm11, %zmm10, 720 + %zmm12, %zmm13, %zmm14, %zmm15, 721 + %zmm24, %zmm25, %zmm26, %zmm27, 722 + %zmm28, %zmm29, %zmm30, %zmm31, 723 + %rax, %r9, 11, 12); 724 + jmp .Laria_gfni_end; 725 + .Laria_gfni_192: 726 + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, 727 + %zmm6, %zmm7, %zmm4, %zmm5, 728 + %zmm9, %zmm8, %zmm11, %zmm10, 729 + %zmm12, %zmm13, %zmm14, %zmm15, 730 + %zmm24, %zmm25, %zmm26, %zmm27, 731 + %zmm28, %zmm29, %zmm30, %zmm31, 732 + %rax, %r9, 11); 733 + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, 734 + %zmm4, %zmm5, %zmm6, %zmm7, 735 + %zmm8, %zmm9, %zmm10, %zmm11, 736 + %zmm12, %zmm13, %zmm14, %zmm15, 737 + %zmm24, %zmm25, %zmm26, %zmm27, 738 + %zmm28, %zmm29, %zmm30, %zmm31, 739 + %rax, %r9, 12); 740 + cmpl $14, ARIA_CTX_rounds(CTX); 741 + jne .Laria_gfni_256; 742 + aria_ff_gfni(%zmm3, %zmm2, %zmm1, %zmm0, 743 + %zmm6, %zmm7, %zmm4, %zmm5, 744 + %zmm9, %zmm8, %zmm11, %zmm10, 745 + %zmm12, %zmm13, %zmm14, %zmm15, 746 + %zmm24, %zmm25, %zmm26, %zmm27, 747 + %zmm28, %zmm29, %zmm30, %zmm31, 748 + %rax, %r9, 13, 14); 749 + jmp .Laria_gfni_end; 750 + .Laria_gfni_256: 751 + aria_fe_gfni(%zmm3, %zmm2, %zmm1, %zmm0, 752 + %zmm6, %zmm7, %zmm4, %zmm5, 753 + %zmm9, %zmm8, %zmm11, %zmm10, 754 + %zmm12, %zmm13, %zmm14, %zmm15, 755 + %zmm24, %zmm25, %zmm26, %zmm27, 756 + %zmm28, %zmm29, %zmm30, %zmm31, 757 + %rax, %r9, 13); 758 + aria_fo_gfni(%zmm0, %zmm1, %zmm2, %zmm3, 759 + %zmm4, %zmm5, %zmm6, %zmm7, 760 + %zmm8, %zmm9, %zmm10, %zmm11, 761 + %zmm12, %zmm13, %zmm14, %zmm15, 762 + %zmm24, %zmm25, %zmm26, %zmm27, 763 + %zmm28, %zmm29, %zmm30, %zmm31, 764 + %rax, %r9, 14); 765 + aria_ff_gfni(%zmm3, %zmm2, %zmm1, %zmm0, 766 + %zmm6, %zmm7, %zmm4, %zmm5, 767 + %zmm9, %zmm8, %zmm11, %zmm10, 768 + %zmm12, %zmm13, %zmm14, %zmm15, 769 + %zmm24, %zmm25, %zmm26, %zmm27, 770 + %zmm28, %zmm29, %zmm30, %zmm31, 771 + %rax, %r9, 15, 16); 772 + .Laria_gfni_end: 773 + debyteslice_16x16b(%zmm9, %zmm12, %zmm3, %zmm6, 774 + %zmm8, %zmm13, %zmm2, %zmm7, 775 + %zmm11, %zmm14, %zmm1, %zmm4, 776 + %zmm10, %zmm15, %zmm0, %zmm5, 777 + (%rax), (%r8)); 778 + FRAME_END 779 + RET; 780 + SYM_FUNC_END(__aria_gfni_avx512_crypt_64way) 781 + 782 + SYM_TYPED_FUNC_START(aria_gfni_avx512_encrypt_64way) 783 + /* input: 784 + * %rdi: ctx, CTX 785 + * %rsi: dst 786 + * %rdx: src 787 + */ 788 + 789 + FRAME_BEGIN 790 + 791 + leaq ARIA_CTX_enc_key(CTX), %r9; 792 + 793 + inpack16_pre(%zmm0, %zmm1, %zmm2, %zmm3, %zmm4, %zmm5, %zmm6, %zmm7, 794 + %zmm8, %zmm9, %zmm10, %zmm11, %zmm12, %zmm13, %zmm14, 795 + %zmm15, %rdx); 796 + 797 + call __aria_gfni_avx512_crypt_64way; 798 + 799 + write_output(%zmm3, %zmm2, %zmm1, %zmm0, %zmm6, %zmm7, %zmm4, %zmm5, 800 + %zmm9, %zmm8, %zmm11, %zmm10, %zmm12, %zmm13, %zmm14, 801 + %zmm15, %rax); 802 + 803 + FRAME_END 804 + RET; 805 + SYM_FUNC_END(aria_gfni_avx512_encrypt_64way) 806 + 807 + SYM_TYPED_FUNC_START(aria_gfni_avx512_decrypt_64way) 808 + /* input: 809 + * %rdi: ctx, CTX 810 + * %rsi: dst 811 + * %rdx: src 812 + */ 813 + 814 + FRAME_BEGIN 815 + 816 + leaq ARIA_CTX_dec_key(CTX), %r9; 817 + 818 + inpack16_pre(%zmm0, %zmm1, %zmm2, %zmm3, %zmm4, %zmm5, %zmm6, %zmm7, 819 + %zmm8, %zmm9, %zmm10, %zmm11, %zmm12, %zmm13, %zmm14, 820 + %zmm15, %rdx); 821 + 822 + call __aria_gfni_avx512_crypt_64way; 823 + 824 + write_output(%zmm3, %zmm2, %zmm1, %zmm0, %zmm6, %zmm7, %zmm4, %zmm5, 825 + %zmm9, %zmm8, %zmm11, %zmm10, %zmm12, %zmm13, %zmm14, 826 + %zmm15, %rax); 827 + 828 + FRAME_END 829 + RET; 830 + SYM_FUNC_END(aria_gfni_avx512_decrypt_64way) 831 + 832 + SYM_FUNC_START_LOCAL(__aria_gfni_avx512_ctr_gen_keystream_64way) 833 + /* input: 834 + * %rdi: ctx 835 + * %rsi: dst 836 + * %rdx: src 837 + * %rcx: keystream 838 + * %r8: iv (big endian, 128bit) 839 + */ 840 + 841 + FRAME_BEGIN 842 + 843 + vbroadcasti64x2 .Lbswap128_mask (%rip), %zmm19; 844 + vmovdqa64 .Lcounter0123_lo (%rip), %zmm21; 845 + vbroadcasti64x2 .Lcounter4444_lo (%rip), %zmm22; 846 + vbroadcasti64x2 .Lcounter8888_lo (%rip), %zmm23; 847 + vbroadcasti64x2 .Lcounter16161616_lo (%rip), %zmm24; 848 + vbroadcasti64x2 .Lcounter1111_hi (%rip), %zmm25; 849 + 850 + /* load IV and byteswap */ 851 + movq 8(%r8), %r11; 852 + movq (%r8), %r10; 853 + bswapq %r11; 854 + bswapq %r10; 855 + vbroadcasti64x2 (%r8), %zmm20; 856 + vpshufb %zmm19, %zmm20, %zmm20; 857 + 858 + /* check need for handling 64-bit overflow and carry */ 859 + cmpq $(0xffffffffffffffff - 64), %r11; 860 + ja .Lload_ctr_carry; 861 + 862 + /* construct IVs */ 863 + vpaddq %zmm21, %zmm20, %zmm0; /* +0:+1:+2:+3 */ 864 + vpaddq %zmm22, %zmm0, %zmm1; /* +4:+5:+6:+7 */ 865 + vpaddq %zmm23, %zmm0, %zmm2; /* +8:+9:+10:+11 */ 866 + vpaddq %zmm23, %zmm1, %zmm3; /* +12:+13:+14:+15 */ 867 + vpaddq %zmm24, %zmm0, %zmm4; /* +16... */ 868 + vpaddq %zmm24, %zmm1, %zmm5; /* +20... */ 869 + vpaddq %zmm24, %zmm2, %zmm6; /* +24... */ 870 + vpaddq %zmm24, %zmm3, %zmm7; /* +28... */ 871 + vpaddq %zmm24, %zmm4, %zmm8; /* +32... */ 872 + vpaddq %zmm24, %zmm5, %zmm9; /* +36... */ 873 + vpaddq %zmm24, %zmm6, %zmm10; /* +40... */ 874 + vpaddq %zmm24, %zmm7, %zmm11; /* +44... */ 875 + vpaddq %zmm24, %zmm8, %zmm12; /* +48... */ 876 + vpaddq %zmm24, %zmm9, %zmm13; /* +52... */ 877 + vpaddq %zmm24, %zmm10, %zmm14; /* +56... */ 878 + vpaddq %zmm24, %zmm11, %zmm15; /* +60... */ 879 + jmp .Lload_ctr_done; 880 + 881 + .Lload_ctr_carry: 882 + /* construct IVs */ 883 + add_le128(%zmm0, %zmm20, %zmm21, %zmm25); /* +0:+1:+2:+3 */ 884 + add_le128(%zmm1, %zmm0, %zmm22, %zmm25); /* +4:+5:+6:+7 */ 885 + add_le128(%zmm2, %zmm0, %zmm23, %zmm25); /* +8:+9:+10:+11 */ 886 + add_le128(%zmm3, %zmm1, %zmm23, %zmm25); /* +12:+13:+14:+15 */ 887 + add_le128(%zmm4, %zmm0, %zmm24, %zmm25); /* +16... */ 888 + add_le128(%zmm5, %zmm1, %zmm24, %zmm25); /* +20... */ 889 + add_le128(%zmm6, %zmm2, %zmm24, %zmm25); /* +24... */ 890 + add_le128(%zmm7, %zmm3, %zmm24, %zmm25); /* +28... */ 891 + add_le128(%zmm8, %zmm4, %zmm24, %zmm25); /* +32... */ 892 + add_le128(%zmm9, %zmm5, %zmm24, %zmm25); /* +36... */ 893 + add_le128(%zmm10, %zmm6, %zmm24, %zmm25); /* +40... */ 894 + add_le128(%zmm11, %zmm7, %zmm24, %zmm25); /* +44... */ 895 + add_le128(%zmm12, %zmm8, %zmm24, %zmm25); /* +48... */ 896 + add_le128(%zmm13, %zmm9, %zmm24, %zmm25); /* +52... */ 897 + add_le128(%zmm14, %zmm10, %zmm24, %zmm25); /* +56... */ 898 + add_le128(%zmm15, %zmm11, %zmm24, %zmm25); /* +60... */ 899 + 900 + .Lload_ctr_done: 901 + /* Byte-swap IVs and update counter. */ 902 + addq $64, %r11; 903 + adcq $0, %r10; 904 + vpshufb %zmm19, %zmm15, %zmm15; 905 + vpshufb %zmm19, %zmm14, %zmm14; 906 + vpshufb %zmm19, %zmm13, %zmm13; 907 + vpshufb %zmm19, %zmm12, %zmm12; 908 + vpshufb %zmm19, %zmm11, %zmm11; 909 + vpshufb %zmm19, %zmm10, %zmm10; 910 + vpshufb %zmm19, %zmm9, %zmm9; 911 + vpshufb %zmm19, %zmm8, %zmm8; 912 + bswapq %r11; 913 + bswapq %r10; 914 + vpshufb %zmm19, %zmm7, %zmm7; 915 + vpshufb %zmm19, %zmm6, %zmm6; 916 + vpshufb %zmm19, %zmm5, %zmm5; 917 + vpshufb %zmm19, %zmm4, %zmm4; 918 + vpshufb %zmm19, %zmm3, %zmm3; 919 + vpshufb %zmm19, %zmm2, %zmm2; 920 + vpshufb %zmm19, %zmm1, %zmm1; 921 + vpshufb %zmm19, %zmm0, %zmm0; 922 + movq %r11, 8(%r8); 923 + movq %r10, (%r8); 924 + 925 + FRAME_END 926 + RET; 927 + SYM_FUNC_END(__aria_gfni_avx512_ctr_gen_keystream_64way) 928 + 929 + SYM_TYPED_FUNC_START(aria_gfni_avx512_ctr_crypt_64way) 930 + /* input: 931 + * %rdi: ctx 932 + * %rsi: dst 933 + * %rdx: src 934 + * %rcx: keystream 935 + * %r8: iv (big endian, 128bit) 936 + */ 937 + FRAME_BEGIN 938 + 939 + call __aria_gfni_avx512_ctr_gen_keystream_64way 940 + 941 + leaq (%rsi), %r10; 942 + leaq (%rdx), %r11; 943 + leaq (%rcx), %rsi; 944 + leaq (%rcx), %rdx; 945 + leaq ARIA_CTX_enc_key(CTX), %r9; 946 + 947 + call __aria_gfni_avx512_crypt_64way; 948 + 949 + vpxorq (0 * 64)(%r11), %zmm3, %zmm3; 950 + vpxorq (1 * 64)(%r11), %zmm2, %zmm2; 951 + vpxorq (2 * 64)(%r11), %zmm1, %zmm1; 952 + vpxorq (3 * 64)(%r11), %zmm0, %zmm0; 953 + vpxorq (4 * 64)(%r11), %zmm6, %zmm6; 954 + vpxorq (5 * 64)(%r11), %zmm7, %zmm7; 955 + vpxorq (6 * 64)(%r11), %zmm4, %zmm4; 956 + vpxorq (7 * 64)(%r11), %zmm5, %zmm5; 957 + vpxorq (8 * 64)(%r11), %zmm9, %zmm9; 958 + vpxorq (9 * 64)(%r11), %zmm8, %zmm8; 959 + vpxorq (10 * 64)(%r11), %zmm11, %zmm11; 960 + vpxorq (11 * 64)(%r11), %zmm10, %zmm10; 961 + vpxorq (12 * 64)(%r11), %zmm12, %zmm12; 962 + vpxorq (13 * 64)(%r11), %zmm13, %zmm13; 963 + vpxorq (14 * 64)(%r11), %zmm14, %zmm14; 964 + vpxorq (15 * 64)(%r11), %zmm15, %zmm15; 965 + write_output(%zmm3, %zmm2, %zmm1, %zmm0, %zmm6, %zmm7, %zmm4, %zmm5, 966 + %zmm9, %zmm8, %zmm11, %zmm10, %zmm12, %zmm13, %zmm14, 967 + %zmm15, %r10); 968 + 969 + FRAME_END 970 + RET; 971 + SYM_FUNC_END(aria_gfni_avx512_ctr_crypt_64way)
+254
arch/x86/crypto/aria_aesni_avx2_glue.c
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Glue Code for the AVX2/AES-NI/GFNI assembler implementation of the ARIA Cipher 4 + * 5 + * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com> 6 + */ 7 + 8 + #include <crypto/algapi.h> 9 + #include <crypto/internal/simd.h> 10 + #include <crypto/aria.h> 11 + #include <linux/crypto.h> 12 + #include <linux/err.h> 13 + #include <linux/module.h> 14 + #include <linux/types.h> 15 + 16 + #include "ecb_cbc_helpers.h" 17 + #include "aria-avx.h" 18 + 19 + asmlinkage void aria_aesni_avx2_encrypt_32way(const void *ctx, u8 *dst, 20 + const u8 *src); 21 + EXPORT_SYMBOL_GPL(aria_aesni_avx2_encrypt_32way); 22 + asmlinkage void aria_aesni_avx2_decrypt_32way(const void *ctx, u8 *dst, 23 + const u8 *src); 24 + EXPORT_SYMBOL_GPL(aria_aesni_avx2_decrypt_32way); 25 + asmlinkage void aria_aesni_avx2_ctr_crypt_32way(const void *ctx, u8 *dst, 26 + const u8 *src, 27 + u8 *keystream, u8 *iv); 28 + EXPORT_SYMBOL_GPL(aria_aesni_avx2_ctr_crypt_32way); 29 + #ifdef CONFIG_AS_GFNI 30 + asmlinkage void aria_aesni_avx2_gfni_encrypt_32way(const void *ctx, u8 *dst, 31 + const u8 *src); 32 + EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_encrypt_32way); 33 + asmlinkage void aria_aesni_avx2_gfni_decrypt_32way(const void *ctx, u8 *dst, 34 + const u8 *src); 35 + EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_decrypt_32way); 36 + asmlinkage void aria_aesni_avx2_gfni_ctr_crypt_32way(const void *ctx, u8 *dst, 37 + const u8 *src, 38 + u8 *keystream, u8 *iv); 39 + EXPORT_SYMBOL_GPL(aria_aesni_avx2_gfni_ctr_crypt_32way); 40 + #endif /* CONFIG_AS_GFNI */ 41 + 42 + static struct aria_avx_ops aria_ops; 43 + 44 + struct aria_avx2_request_ctx { 45 + u8 keystream[ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE]; 46 + }; 47 + 48 + static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey) 49 + { 50 + ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); 51 + ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_encrypt_32way); 52 + ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_encrypt_16way); 53 + ECB_BLOCK(1, aria_encrypt); 54 + ECB_WALK_END(); 55 + } 56 + 57 + static int ecb_do_decrypt(struct skcipher_request *req, const u32 *rkey) 58 + { 59 + ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); 60 + ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_decrypt_32way); 61 + ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_decrypt_16way); 62 + ECB_BLOCK(1, aria_decrypt); 63 + ECB_WALK_END(); 64 + } 65 + 66 + static int aria_avx2_ecb_encrypt(struct skcipher_request *req) 67 + { 68 + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 69 + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); 70 + 71 + return ecb_do_encrypt(req, ctx->enc_key[0]); 72 + } 73 + 74 + static int aria_avx2_ecb_decrypt(struct skcipher_request *req) 75 + { 76 + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 77 + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); 78 + 79 + return ecb_do_decrypt(req, ctx->dec_key[0]); 80 + } 81 + 82 + static int aria_avx2_set_key(struct crypto_skcipher *tfm, const u8 *key, 83 + unsigned int keylen) 84 + { 85 + return aria_set_key(&tfm->base, key, keylen); 86 + } 87 + 88 + static int aria_avx2_ctr_encrypt(struct skcipher_request *req) 89 + { 90 + struct aria_avx2_request_ctx *req_ctx = skcipher_request_ctx(req); 91 + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 92 + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); 93 + struct skcipher_walk walk; 94 + unsigned int nbytes; 95 + int err; 96 + 97 + err = skcipher_walk_virt(&walk, req, false); 98 + 99 + while ((nbytes = walk.nbytes) > 0) { 100 + const u8 *src = walk.src.virt.addr; 101 + u8 *dst = walk.dst.virt.addr; 102 + 103 + while (nbytes >= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE) { 104 + kernel_fpu_begin(); 105 + aria_ops.aria_ctr_crypt_32way(ctx, dst, src, 106 + &req_ctx->keystream[0], 107 + walk.iv); 108 + kernel_fpu_end(); 109 + dst += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE; 110 + src += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE; 111 + nbytes -= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE; 112 + } 113 + 114 + while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) { 115 + kernel_fpu_begin(); 116 + aria_ops.aria_ctr_crypt_16way(ctx, dst, src, 117 + &req_ctx->keystream[0], 118 + walk.iv); 119 + kernel_fpu_end(); 120 + dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE; 121 + src += ARIA_AESNI_PARALLEL_BLOCK_SIZE; 122 + nbytes -= ARIA_AESNI_PARALLEL_BLOCK_SIZE; 123 + } 124 + 125 + while (nbytes >= ARIA_BLOCK_SIZE) { 126 + memcpy(&req_ctx->keystream[0], walk.iv, ARIA_BLOCK_SIZE); 127 + crypto_inc(walk.iv, ARIA_BLOCK_SIZE); 128 + 129 + aria_encrypt(ctx, &req_ctx->keystream[0], 130 + &req_ctx->keystream[0]); 131 + 132 + crypto_xor_cpy(dst, src, &req_ctx->keystream[0], 133 + ARIA_BLOCK_SIZE); 134 + dst += ARIA_BLOCK_SIZE; 135 + src += ARIA_BLOCK_SIZE; 136 + nbytes -= ARIA_BLOCK_SIZE; 137 + } 138 + 139 + if (walk.nbytes == walk.total && nbytes > 0) { 140 + memcpy(&req_ctx->keystream[0], walk.iv, 141 + ARIA_BLOCK_SIZE); 142 + crypto_inc(walk.iv, ARIA_BLOCK_SIZE); 143 + 144 + aria_encrypt(ctx, &req_ctx->keystream[0], 145 + &req_ctx->keystream[0]); 146 + 147 + crypto_xor_cpy(dst, src, &req_ctx->keystream[0], 148 + nbytes); 149 + dst += nbytes; 150 + src += nbytes; 151 + nbytes = 0; 152 + } 153 + err = skcipher_walk_done(&walk, nbytes); 154 + } 155 + 156 + return err; 157 + } 158 + 159 + static int aria_avx2_init_tfm(struct crypto_skcipher *tfm) 160 + { 161 + crypto_skcipher_set_reqsize(tfm, sizeof(struct aria_avx2_request_ctx)); 162 + 163 + return 0; 164 + } 165 + 166 + static struct skcipher_alg aria_algs[] = { 167 + { 168 + .base.cra_name = "__ecb(aria)", 169 + .base.cra_driver_name = "__ecb-aria-avx2", 170 + .base.cra_priority = 500, 171 + .base.cra_flags = CRYPTO_ALG_INTERNAL, 172 + .base.cra_blocksize = ARIA_BLOCK_SIZE, 173 + .base.cra_ctxsize = sizeof(struct aria_ctx), 174 + .base.cra_module = THIS_MODULE, 175 + .min_keysize = ARIA_MIN_KEY_SIZE, 176 + .max_keysize = ARIA_MAX_KEY_SIZE, 177 + .setkey = aria_avx2_set_key, 178 + .encrypt = aria_avx2_ecb_encrypt, 179 + .decrypt = aria_avx2_ecb_decrypt, 180 + }, { 181 + .base.cra_name = "__ctr(aria)", 182 + .base.cra_driver_name = "__ctr-aria-avx2", 183 + .base.cra_priority = 500, 184 + .base.cra_flags = CRYPTO_ALG_INTERNAL | 185 + CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE, 186 + .base.cra_blocksize = 1, 187 + .base.cra_ctxsize = sizeof(struct aria_ctx), 188 + .base.cra_module = THIS_MODULE, 189 + .min_keysize = ARIA_MIN_KEY_SIZE, 190 + .max_keysize = ARIA_MAX_KEY_SIZE, 191 + .ivsize = ARIA_BLOCK_SIZE, 192 + .chunksize = ARIA_BLOCK_SIZE, 193 + .setkey = aria_avx2_set_key, 194 + .encrypt = aria_avx2_ctr_encrypt, 195 + .decrypt = aria_avx2_ctr_encrypt, 196 + .init = aria_avx2_init_tfm, 197 + } 198 + }; 199 + 200 + static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)]; 201 + 202 + static int __init aria_avx2_init(void) 203 + { 204 + const char *feature_name; 205 + 206 + if (!boot_cpu_has(X86_FEATURE_AVX) || 207 + !boot_cpu_has(X86_FEATURE_AVX2) || 208 + !boot_cpu_has(X86_FEATURE_AES) || 209 + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { 210 + pr_info("AVX2 or AES-NI instructions are not detected.\n"); 211 + return -ENODEV; 212 + } 213 + 214 + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, 215 + &feature_name)) { 216 + pr_info("CPU feature '%s' is not supported.\n", feature_name); 217 + return -ENODEV; 218 + } 219 + 220 + if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) { 221 + aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way; 222 + aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way; 223 + aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way; 224 + aria_ops.aria_encrypt_32way = aria_aesni_avx2_gfni_encrypt_32way; 225 + aria_ops.aria_decrypt_32way = aria_aesni_avx2_gfni_decrypt_32way; 226 + aria_ops.aria_ctr_crypt_32way = aria_aesni_avx2_gfni_ctr_crypt_32way; 227 + } else { 228 + aria_ops.aria_encrypt_16way = aria_aesni_avx_encrypt_16way; 229 + aria_ops.aria_decrypt_16way = aria_aesni_avx_decrypt_16way; 230 + aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_ctr_crypt_16way; 231 + aria_ops.aria_encrypt_32way = aria_aesni_avx2_encrypt_32way; 232 + aria_ops.aria_decrypt_32way = aria_aesni_avx2_decrypt_32way; 233 + aria_ops.aria_ctr_crypt_32way = aria_aesni_avx2_ctr_crypt_32way; 234 + } 235 + 236 + return simd_register_skciphers_compat(aria_algs, 237 + ARRAY_SIZE(aria_algs), 238 + aria_simd_algs); 239 + } 240 + 241 + static void __exit aria_avx2_exit(void) 242 + { 243 + simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs), 244 + aria_simd_algs); 245 + } 246 + 247 + module_init(aria_avx2_init); 248 + module_exit(aria_avx2_exit); 249 + 250 + MODULE_LICENSE("GPL"); 251 + MODULE_AUTHOR("Taehee Yoo <ap420073@gmail.com>"); 252 + MODULE_DESCRIPTION("ARIA Cipher Algorithm, AVX2/AES-NI/GFNI optimized"); 253 + MODULE_ALIAS_CRYPTO("aria"); 254 + MODULE_ALIAS_CRYPTO("aria-aesni-avx2");
+35 -14
arch/x86/crypto/aria_aesni_avx_glue.c
··· 18 18 19 19 asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst, 20 20 const u8 *src); 21 + EXPORT_SYMBOL_GPL(aria_aesni_avx_encrypt_16way); 21 22 asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst, 22 23 const u8 *src); 24 + EXPORT_SYMBOL_GPL(aria_aesni_avx_decrypt_16way); 23 25 asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst, 24 26 const u8 *src, 25 27 u8 *keystream, u8 *iv); 28 + EXPORT_SYMBOL_GPL(aria_aesni_avx_ctr_crypt_16way); 29 + #ifdef CONFIG_AS_GFNI 26 30 asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst, 27 31 const u8 *src); 32 + EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_encrypt_16way); 28 33 asmlinkage void aria_aesni_avx_gfni_decrypt_16way(const void *ctx, u8 *dst, 29 34 const u8 *src); 35 + EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_decrypt_16way); 30 36 asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst, 31 37 const u8 *src, 32 38 u8 *keystream, u8 *iv); 39 + EXPORT_SYMBOL_GPL(aria_aesni_avx_gfni_ctr_crypt_16way); 40 + #endif /* CONFIG_AS_GFNI */ 33 41 34 42 static struct aria_avx_ops aria_ops; 43 + 44 + struct aria_avx_request_ctx { 45 + u8 keystream[ARIA_AESNI_PARALLEL_BLOCK_SIZE]; 46 + }; 35 47 36 48 static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey) 37 49 { ··· 85 73 86 74 static int aria_avx_ctr_encrypt(struct skcipher_request *req) 87 75 { 76 + struct aria_avx_request_ctx *req_ctx = skcipher_request_ctx(req); 88 77 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 89 78 struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); 90 79 struct skcipher_walk walk; ··· 99 86 u8 *dst = walk.dst.virt.addr; 100 87 101 88 while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) { 102 - u8 keystream[ARIA_AESNI_PARALLEL_BLOCK_SIZE]; 103 - 104 89 kernel_fpu_begin(); 105 - aria_ops.aria_ctr_crypt_16way(ctx, dst, src, keystream, 90 + aria_ops.aria_ctr_crypt_16way(ctx, dst, src, 91 + &req_ctx->keystream[0], 106 92 walk.iv); 107 93 kernel_fpu_end(); 108 94 dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE; ··· 110 98 } 111 99 112 100 while (nbytes >= ARIA_BLOCK_SIZE) { 113 - u8 keystream[ARIA_BLOCK_SIZE]; 114 - 115 - memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE); 101 + memcpy(&req_ctx->keystream[0], walk.iv, ARIA_BLOCK_SIZE); 116 102 crypto_inc(walk.iv, ARIA_BLOCK_SIZE); 117 103 118 - aria_encrypt(ctx, keystream, keystream); 104 + aria_encrypt(ctx, &req_ctx->keystream[0], 105 + &req_ctx->keystream[0]); 119 106 120 - crypto_xor_cpy(dst, src, keystream, ARIA_BLOCK_SIZE); 107 + crypto_xor_cpy(dst, src, &req_ctx->keystream[0], 108 + ARIA_BLOCK_SIZE); 121 109 dst += ARIA_BLOCK_SIZE; 122 110 src += ARIA_BLOCK_SIZE; 123 111 nbytes -= ARIA_BLOCK_SIZE; 124 112 } 125 113 126 114 if (walk.nbytes == walk.total && nbytes > 0) { 127 - u8 keystream[ARIA_BLOCK_SIZE]; 128 - 129 - memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE); 115 + memcpy(&req_ctx->keystream[0], walk.iv, 116 + ARIA_BLOCK_SIZE); 130 117 crypto_inc(walk.iv, ARIA_BLOCK_SIZE); 131 118 132 - aria_encrypt(ctx, keystream, keystream); 119 + aria_encrypt(ctx, &req_ctx->keystream[0], 120 + &req_ctx->keystream[0]); 133 121 134 - crypto_xor_cpy(dst, src, keystream, nbytes); 122 + crypto_xor_cpy(dst, src, &req_ctx->keystream[0], 123 + nbytes); 135 124 dst += nbytes; 136 125 src += nbytes; 137 126 nbytes = 0; ··· 141 128 } 142 129 143 130 return err; 131 + } 132 + 133 + static int aria_avx_init_tfm(struct crypto_skcipher *tfm) 134 + { 135 + crypto_skcipher_set_reqsize(tfm, sizeof(struct aria_avx_request_ctx)); 136 + 137 + return 0; 144 138 } 145 139 146 140 static struct skcipher_alg aria_algs[] = { ··· 180 160 .setkey = aria_avx_set_key, 181 161 .encrypt = aria_avx_ctr_encrypt, 182 162 .decrypt = aria_avx_ctr_encrypt, 163 + .init = aria_avx_init_tfm, 183 164 } 184 165 }; 185 166 ··· 203 182 return -ENODEV; 204 183 } 205 184 206 - if (boot_cpu_has(X86_FEATURE_GFNI)) { 185 + if (boot_cpu_has(X86_FEATURE_GFNI) && IS_ENABLED(CONFIG_AS_GFNI)) { 207 186 aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way; 208 187 aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way; 209 188 aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way;
+250
arch/x86/crypto/aria_gfni_avx512_glue.c
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + * Glue Code for the AVX512/GFNI assembler implementation of the ARIA Cipher 4 + * 5 + * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com> 6 + */ 7 + 8 + #include <crypto/algapi.h> 9 + #include <crypto/internal/simd.h> 10 + #include <crypto/aria.h> 11 + #include <linux/crypto.h> 12 + #include <linux/err.h> 13 + #include <linux/module.h> 14 + #include <linux/types.h> 15 + 16 + #include "ecb_cbc_helpers.h" 17 + #include "aria-avx.h" 18 + 19 + asmlinkage void aria_gfni_avx512_encrypt_64way(const void *ctx, u8 *dst, 20 + const u8 *src); 21 + asmlinkage void aria_gfni_avx512_decrypt_64way(const void *ctx, u8 *dst, 22 + const u8 *src); 23 + asmlinkage void aria_gfni_avx512_ctr_crypt_64way(const void *ctx, u8 *dst, 24 + const u8 *src, 25 + u8 *keystream, u8 *iv); 26 + 27 + static struct aria_avx_ops aria_ops; 28 + 29 + struct aria_avx512_request_ctx { 30 + u8 keystream[ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE]; 31 + }; 32 + 33 + static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey) 34 + { 35 + ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); 36 + ECB_BLOCK(ARIA_GFNI_AVX512_PARALLEL_BLOCKS, aria_ops.aria_encrypt_64way); 37 + ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_encrypt_32way); 38 + ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_encrypt_16way); 39 + ECB_BLOCK(1, aria_encrypt); 40 + ECB_WALK_END(); 41 + } 42 + 43 + static int ecb_do_decrypt(struct skcipher_request *req, const u32 *rkey) 44 + { 45 + ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS); 46 + ECB_BLOCK(ARIA_GFNI_AVX512_PARALLEL_BLOCKS, aria_ops.aria_decrypt_64way); 47 + ECB_BLOCK(ARIA_AESNI_AVX2_PARALLEL_BLOCKS, aria_ops.aria_decrypt_32way); 48 + ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_decrypt_16way); 49 + ECB_BLOCK(1, aria_decrypt); 50 + ECB_WALK_END(); 51 + } 52 + 53 + static int aria_avx512_ecb_encrypt(struct skcipher_request *req) 54 + { 55 + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 56 + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); 57 + 58 + return ecb_do_encrypt(req, ctx->enc_key[0]); 59 + } 60 + 61 + static int aria_avx512_ecb_decrypt(struct skcipher_request *req) 62 + { 63 + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 64 + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); 65 + 66 + return ecb_do_decrypt(req, ctx->dec_key[0]); 67 + } 68 + 69 + static int aria_avx512_set_key(struct crypto_skcipher *tfm, const u8 *key, 70 + unsigned int keylen) 71 + { 72 + return aria_set_key(&tfm->base, key, keylen); 73 + } 74 + 75 + static int aria_avx512_ctr_encrypt(struct skcipher_request *req) 76 + { 77 + struct aria_avx512_request_ctx *req_ctx = skcipher_request_ctx(req); 78 + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 79 + struct aria_ctx *ctx = crypto_skcipher_ctx(tfm); 80 + struct skcipher_walk walk; 81 + unsigned int nbytes; 82 + int err; 83 + 84 + err = skcipher_walk_virt(&walk, req, false); 85 + 86 + while ((nbytes = walk.nbytes) > 0) { 87 + const u8 *src = walk.src.virt.addr; 88 + u8 *dst = walk.dst.virt.addr; 89 + 90 + while (nbytes >= ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE) { 91 + kernel_fpu_begin(); 92 + aria_ops.aria_ctr_crypt_64way(ctx, dst, src, 93 + &req_ctx->keystream[0], 94 + walk.iv); 95 + kernel_fpu_end(); 96 + dst += ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE; 97 + src += ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE; 98 + nbytes -= ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE; 99 + } 100 + 101 + while (nbytes >= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE) { 102 + kernel_fpu_begin(); 103 + aria_ops.aria_ctr_crypt_32way(ctx, dst, src, 104 + &req_ctx->keystream[0], 105 + walk.iv); 106 + kernel_fpu_end(); 107 + dst += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE; 108 + src += ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE; 109 + nbytes -= ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE; 110 + } 111 + 112 + while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) { 113 + kernel_fpu_begin(); 114 + aria_ops.aria_ctr_crypt_16way(ctx, dst, src, 115 + &req_ctx->keystream[0], 116 + walk.iv); 117 + kernel_fpu_end(); 118 + dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE; 119 + src += ARIA_AESNI_PARALLEL_BLOCK_SIZE; 120 + nbytes -= ARIA_AESNI_PARALLEL_BLOCK_SIZE; 121 + } 122 + 123 + while (nbytes >= ARIA_BLOCK_SIZE) { 124 + memcpy(&req_ctx->keystream[0], walk.iv, 125 + ARIA_BLOCK_SIZE); 126 + crypto_inc(walk.iv, ARIA_BLOCK_SIZE); 127 + 128 + aria_encrypt(ctx, &req_ctx->keystream[0], 129 + &req_ctx->keystream[0]); 130 + 131 + crypto_xor_cpy(dst, src, &req_ctx->keystream[0], 132 + ARIA_BLOCK_SIZE); 133 + dst += ARIA_BLOCK_SIZE; 134 + src += ARIA_BLOCK_SIZE; 135 + nbytes -= ARIA_BLOCK_SIZE; 136 + } 137 + 138 + if (walk.nbytes == walk.total && nbytes > 0) { 139 + memcpy(&req_ctx->keystream[0], walk.iv, 140 + ARIA_BLOCK_SIZE); 141 + crypto_inc(walk.iv, ARIA_BLOCK_SIZE); 142 + 143 + aria_encrypt(ctx, &req_ctx->keystream[0], 144 + &req_ctx->keystream[0]); 145 + 146 + crypto_xor_cpy(dst, src, &req_ctx->keystream[0], 147 + nbytes); 148 + dst += nbytes; 149 + src += nbytes; 150 + nbytes = 0; 151 + } 152 + err = skcipher_walk_done(&walk, nbytes); 153 + } 154 + 155 + return err; 156 + } 157 + 158 + static int aria_avx512_init_tfm(struct crypto_skcipher *tfm) 159 + { 160 + crypto_skcipher_set_reqsize(tfm, 161 + sizeof(struct aria_avx512_request_ctx)); 162 + 163 + return 0; 164 + } 165 + 166 + static struct skcipher_alg aria_algs[] = { 167 + { 168 + .base.cra_name = "__ecb(aria)", 169 + .base.cra_driver_name = "__ecb-aria-avx512", 170 + .base.cra_priority = 600, 171 + .base.cra_flags = CRYPTO_ALG_INTERNAL, 172 + .base.cra_blocksize = ARIA_BLOCK_SIZE, 173 + .base.cra_ctxsize = sizeof(struct aria_ctx), 174 + .base.cra_module = THIS_MODULE, 175 + .min_keysize = ARIA_MIN_KEY_SIZE, 176 + .max_keysize = ARIA_MAX_KEY_SIZE, 177 + .setkey = aria_avx512_set_key, 178 + .encrypt = aria_avx512_ecb_encrypt, 179 + .decrypt = aria_avx512_ecb_decrypt, 180 + }, { 181 + .base.cra_name = "__ctr(aria)", 182 + .base.cra_driver_name = "__ctr-aria-avx512", 183 + .base.cra_priority = 600, 184 + .base.cra_flags = CRYPTO_ALG_INTERNAL | 185 + CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE, 186 + .base.cra_blocksize = 1, 187 + .base.cra_ctxsize = sizeof(struct aria_ctx), 188 + .base.cra_module = THIS_MODULE, 189 + .min_keysize = ARIA_MIN_KEY_SIZE, 190 + .max_keysize = ARIA_MAX_KEY_SIZE, 191 + .ivsize = ARIA_BLOCK_SIZE, 192 + .chunksize = ARIA_BLOCK_SIZE, 193 + .setkey = aria_avx512_set_key, 194 + .encrypt = aria_avx512_ctr_encrypt, 195 + .decrypt = aria_avx512_ctr_encrypt, 196 + .init = aria_avx512_init_tfm, 197 + } 198 + }; 199 + 200 + static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)]; 201 + 202 + static int __init aria_avx512_init(void) 203 + { 204 + const char *feature_name; 205 + 206 + if (!boot_cpu_has(X86_FEATURE_AVX) || 207 + !boot_cpu_has(X86_FEATURE_AVX2) || 208 + !boot_cpu_has(X86_FEATURE_AVX512F) || 209 + !boot_cpu_has(X86_FEATURE_AVX512VL) || 210 + !boot_cpu_has(X86_FEATURE_GFNI) || 211 + !boot_cpu_has(X86_FEATURE_OSXSAVE)) { 212 + pr_info("AVX512/GFNI instructions are not detected.\n"); 213 + return -ENODEV; 214 + } 215 + 216 + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | 217 + XFEATURE_MASK_AVX512, &feature_name)) { 218 + pr_info("CPU feature '%s' is not supported.\n", feature_name); 219 + return -ENODEV; 220 + } 221 + 222 + aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way; 223 + aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way; 224 + aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way; 225 + aria_ops.aria_encrypt_32way = aria_aesni_avx2_gfni_encrypt_32way; 226 + aria_ops.aria_decrypt_32way = aria_aesni_avx2_gfni_decrypt_32way; 227 + aria_ops.aria_ctr_crypt_32way = aria_aesni_avx2_gfni_ctr_crypt_32way; 228 + aria_ops.aria_encrypt_64way = aria_gfni_avx512_encrypt_64way; 229 + aria_ops.aria_decrypt_64way = aria_gfni_avx512_decrypt_64way; 230 + aria_ops.aria_ctr_crypt_64way = aria_gfni_avx512_ctr_crypt_64way; 231 + 232 + return simd_register_skciphers_compat(aria_algs, 233 + ARRAY_SIZE(aria_algs), 234 + aria_simd_algs); 235 + } 236 + 237 + static void __exit aria_avx512_exit(void) 238 + { 239 + simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs), 240 + aria_simd_algs); 241 + } 242 + 243 + module_init(aria_avx512_init); 244 + module_exit(aria_avx512_exit); 245 + 246 + MODULE_LICENSE("GPL"); 247 + MODULE_AUTHOR("Taehee Yoo <ap420073@gmail.com>"); 248 + MODULE_DESCRIPTION("ARIA Cipher Algorithm, AVX512/GFNI optimized"); 249 + MODULE_ALIAS_CRYPTO("aria"); 250 + MODULE_ALIAS_CRYPTO("aria-gfni-avx512");
+28 -43
arch/x86/crypto/blowfish-x86_64-asm_64.S
··· 6 6 */ 7 7 8 8 #include <linux/linkage.h> 9 - #include <linux/cfi_types.h> 10 9 11 10 .file "blowfish-x86_64-asm.S" 12 11 .text ··· 99 100 bswapq RX0; \ 100 101 movq RX0, (RIO); 101 102 102 - #define xor_block() \ 103 - bswapq RX0; \ 104 - xorq RX0, (RIO); 105 - 106 - SYM_FUNC_START(__blowfish_enc_blk) 103 + SYM_FUNC_START(blowfish_enc_blk) 107 104 /* input: 108 105 * %rdi: ctx 109 106 * %rsi: dst 110 107 * %rdx: src 111 - * %rcx: bool, if true: xor output 112 108 */ 113 109 movq %r12, %r11; 114 110 ··· 124 130 add_roundkey_enc(16); 125 131 126 132 movq %r11, %r12; 127 - 128 133 movq %r10, RIO; 129 - test %cl, %cl; 130 - jnz .L__enc_xor; 131 134 132 135 write_block(); 133 136 RET; 134 - .L__enc_xor: 135 - xor_block(); 136 - RET; 137 - SYM_FUNC_END(__blowfish_enc_blk) 137 + SYM_FUNC_END(blowfish_enc_blk) 138 138 139 - SYM_TYPED_FUNC_START(blowfish_dec_blk) 139 + SYM_FUNC_START(blowfish_dec_blk) 140 140 /* input: 141 141 * %rdi: ctx 142 142 * %rsi: dst ··· 260 272 movq RX3, 24(RIO); 261 273 262 274 #define xor_block4() \ 263 - bswapq RX0; \ 264 - xorq RX0, (RIO); \ 275 + movq (RIO), RT0; \ 276 + bswapq RT0; \ 277 + xorq RT0, RX1; \ 265 278 \ 266 - bswapq RX1; \ 267 - xorq RX1, 8(RIO); \ 279 + movq 8(RIO), RT2; \ 280 + bswapq RT2; \ 281 + xorq RT2, RX2; \ 268 282 \ 269 - bswapq RX2; \ 270 - xorq RX2, 16(RIO); \ 271 - \ 272 - bswapq RX3; \ 273 - xorq RX3, 24(RIO); 283 + movq 16(RIO), RT3; \ 284 + bswapq RT3; \ 285 + xorq RT3, RX3; 274 286 275 - SYM_FUNC_START(__blowfish_enc_blk_4way) 287 + SYM_FUNC_START(blowfish_enc_blk_4way) 276 288 /* input: 277 289 * %rdi: ctx 278 290 * %rsi: dst 279 291 * %rdx: src 280 - * %rcx: bool, if true: xor output 281 292 */ 282 293 pushq %r12; 283 294 pushq %rbx; 284 - pushq %rcx; 285 295 286 296 movq %rdi, CTX 287 297 movq %rsi, %r11; ··· 299 313 round_enc4(14); 300 314 add_preloaded_roundkey4(); 301 315 302 - popq %r12; 303 316 movq %r11, RIO; 304 - 305 - test %r12b, %r12b; 306 - jnz .L__enc_xor4; 307 - 308 317 write_block4(); 309 318 310 319 popq %rbx; 311 320 popq %r12; 312 321 RET; 322 + SYM_FUNC_END(blowfish_enc_blk_4way) 313 323 314 - .L__enc_xor4: 315 - xor_block4(); 316 - 317 - popq %rbx; 318 - popq %r12; 319 - RET; 320 - SYM_FUNC_END(__blowfish_enc_blk_4way) 321 - 322 - SYM_TYPED_FUNC_START(blowfish_dec_blk_4way) 324 + SYM_FUNC_START(__blowfish_dec_blk_4way) 323 325 /* input: 324 326 * %rdi: ctx 325 327 * %rsi: dst 326 328 * %rdx: src 329 + * %rcx: cbc (bool) 327 330 */ 328 331 pushq %r12; 329 332 pushq %rbx; 333 + pushq %rcx; 334 + pushq %rdx; 330 335 331 336 movq %rdi, CTX; 332 - movq %rsi, %r11 337 + movq %rsi, %r11; 333 338 movq %rdx, RIO; 334 339 335 340 preload_roundkey_dec(17); ··· 336 359 round_dec4(3); 337 360 add_preloaded_roundkey4(); 338 361 362 + popq RIO; 363 + popq %r12; 364 + testq %r12, %r12; 365 + jz .L_no_cbc_xor; 366 + 367 + xor_block4(); 368 + 369 + .L_no_cbc_xor: 339 370 movq %r11, RIO; 340 371 write_block4(); 341 372 ··· 351 366 popq %r12; 352 367 353 368 RET; 354 - SYM_FUNC_END(blowfish_dec_blk_4way) 369 + SYM_FUNC_END(__blowfish_dec_blk_4way)
+27 -173
arch/x86/crypto/blowfish_glue.c
··· 16 16 #include <linux/module.h> 17 17 #include <linux/types.h> 18 18 19 + #include "ecb_cbc_helpers.h" 20 + 19 21 /* regular block cipher functions */ 20 - asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, 21 - bool xor); 22 + asmlinkage void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); 22 23 asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); 23 24 24 25 /* 4-way parallel cipher functions */ 25 - asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, 26 - const u8 *src, bool xor); 27 - asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, 26 + asmlinkage void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, 28 27 const u8 *src); 28 + asmlinkage void __blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, 29 + const u8 *src, bool cbc); 29 30 30 - static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) 31 + static inline void blowfish_dec_ecb_4way(struct bf_ctx *ctx, u8 *dst, 32 + const u8 *src) 31 33 { 32 - __blowfish_enc_blk(ctx, dst, src, false); 34 + return __blowfish_dec_blk_4way(ctx, dst, src, false); 33 35 } 34 36 35 - static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, 36 - const u8 *src) 37 + static inline void blowfish_dec_cbc_4way(struct bf_ctx *ctx, u8 *dst, 38 + const u8 *src) 37 39 { 38 - __blowfish_enc_blk_4way(ctx, dst, src, false); 40 + return __blowfish_dec_blk_4way(ctx, dst, src, true); 39 41 } 40 42 41 43 static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) ··· 56 54 return blowfish_setkey(&tfm->base, key, keylen); 57 55 } 58 56 59 - static int ecb_crypt(struct skcipher_request *req, 60 - void (*fn)(struct bf_ctx *, u8 *, const u8 *), 61 - void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) 62 - { 63 - unsigned int bsize = BF_BLOCK_SIZE; 64 - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 65 - struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); 66 - struct skcipher_walk walk; 67 - unsigned int nbytes; 68 - int err; 69 - 70 - err = skcipher_walk_virt(&walk, req, false); 71 - 72 - while ((nbytes = walk.nbytes)) { 73 - u8 *wsrc = walk.src.virt.addr; 74 - u8 *wdst = walk.dst.virt.addr; 75 - 76 - /* Process four block batch */ 77 - if (nbytes >= bsize * 4) { 78 - do { 79 - fn_4way(ctx, wdst, wsrc); 80 - 81 - wsrc += bsize * 4; 82 - wdst += bsize * 4; 83 - nbytes -= bsize * 4; 84 - } while (nbytes >= bsize * 4); 85 - 86 - if (nbytes < bsize) 87 - goto done; 88 - } 89 - 90 - /* Handle leftovers */ 91 - do { 92 - fn(ctx, wdst, wsrc); 93 - 94 - wsrc += bsize; 95 - wdst += bsize; 96 - nbytes -= bsize; 97 - } while (nbytes >= bsize); 98 - 99 - done: 100 - err = skcipher_walk_done(&walk, nbytes); 101 - } 102 - 103 - return err; 104 - } 105 - 106 57 static int ecb_encrypt(struct skcipher_request *req) 107 58 { 108 - return ecb_crypt(req, blowfish_enc_blk, blowfish_enc_blk_4way); 59 + ECB_WALK_START(req, BF_BLOCK_SIZE, -1); 60 + ECB_BLOCK(4, blowfish_enc_blk_4way); 61 + ECB_BLOCK(1, blowfish_enc_blk); 62 + ECB_WALK_END(); 109 63 } 110 64 111 65 static int ecb_decrypt(struct skcipher_request *req) 112 66 { 113 - return ecb_crypt(req, blowfish_dec_blk, blowfish_dec_blk_4way); 114 - } 115 - 116 - static unsigned int __cbc_encrypt(struct bf_ctx *ctx, 117 - struct skcipher_walk *walk) 118 - { 119 - unsigned int bsize = BF_BLOCK_SIZE; 120 - unsigned int nbytes = walk->nbytes; 121 - u64 *src = (u64 *)walk->src.virt.addr; 122 - u64 *dst = (u64 *)walk->dst.virt.addr; 123 - u64 *iv = (u64 *)walk->iv; 124 - 125 - do { 126 - *dst = *src ^ *iv; 127 - blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); 128 - iv = dst; 129 - 130 - src += 1; 131 - dst += 1; 132 - nbytes -= bsize; 133 - } while (nbytes >= bsize); 134 - 135 - *(u64 *)walk->iv = *iv; 136 - return nbytes; 67 + ECB_WALK_START(req, BF_BLOCK_SIZE, -1); 68 + ECB_BLOCK(4, blowfish_dec_ecb_4way); 69 + ECB_BLOCK(1, blowfish_dec_blk); 70 + ECB_WALK_END(); 137 71 } 138 72 139 73 static int cbc_encrypt(struct skcipher_request *req) 140 74 { 141 - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 142 - struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); 143 - struct skcipher_walk walk; 144 - unsigned int nbytes; 145 - int err; 146 - 147 - err = skcipher_walk_virt(&walk, req, false); 148 - 149 - while (walk.nbytes) { 150 - nbytes = __cbc_encrypt(ctx, &walk); 151 - err = skcipher_walk_done(&walk, nbytes); 152 - } 153 - 154 - return err; 155 - } 156 - 157 - static unsigned int __cbc_decrypt(struct bf_ctx *ctx, 158 - struct skcipher_walk *walk) 159 - { 160 - unsigned int bsize = BF_BLOCK_SIZE; 161 - unsigned int nbytes = walk->nbytes; 162 - u64 *src = (u64 *)walk->src.virt.addr; 163 - u64 *dst = (u64 *)walk->dst.virt.addr; 164 - u64 ivs[4 - 1]; 165 - u64 last_iv; 166 - 167 - /* Start of the last block. */ 168 - src += nbytes / bsize - 1; 169 - dst += nbytes / bsize - 1; 170 - 171 - last_iv = *src; 172 - 173 - /* Process four block batch */ 174 - if (nbytes >= bsize * 4) { 175 - do { 176 - nbytes -= bsize * 4 - bsize; 177 - src -= 4 - 1; 178 - dst -= 4 - 1; 179 - 180 - ivs[0] = src[0]; 181 - ivs[1] = src[1]; 182 - ivs[2] = src[2]; 183 - 184 - blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src); 185 - 186 - dst[1] ^= ivs[0]; 187 - dst[2] ^= ivs[1]; 188 - dst[3] ^= ivs[2]; 189 - 190 - nbytes -= bsize; 191 - if (nbytes < bsize) 192 - goto done; 193 - 194 - *dst ^= *(src - 1); 195 - src -= 1; 196 - dst -= 1; 197 - } while (nbytes >= bsize * 4); 198 - } 199 - 200 - /* Handle leftovers */ 201 - for (;;) { 202 - blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); 203 - 204 - nbytes -= bsize; 205 - if (nbytes < bsize) 206 - break; 207 - 208 - *dst ^= *(src - 1); 209 - src -= 1; 210 - dst -= 1; 211 - } 212 - 213 - done: 214 - *dst ^= *(u64 *)walk->iv; 215 - *(u64 *)walk->iv = last_iv; 216 - 217 - return nbytes; 75 + CBC_WALK_START(req, BF_BLOCK_SIZE, -1); 76 + CBC_ENC_BLOCK(blowfish_enc_blk); 77 + CBC_WALK_END(); 218 78 } 219 79 220 80 static int cbc_decrypt(struct skcipher_request *req) 221 81 { 222 - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 223 - struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); 224 - struct skcipher_walk walk; 225 - unsigned int nbytes; 226 - int err; 227 - 228 - err = skcipher_walk_virt(&walk, req, false); 229 - 230 - while (walk.nbytes) { 231 - nbytes = __cbc_decrypt(ctx, &walk); 232 - err = skcipher_walk_done(&walk, nbytes); 233 - } 234 - 235 - return err; 82 + CBC_WALK_START(req, BF_BLOCK_SIZE, -1); 83 + CBC_DEC_BLOCK(4, blowfish_dec_cbc_4way); 84 + CBC_DEC_BLOCK(1, blowfish_dec_blk); 85 + CBC_WALK_END(); 236 86 } 237 87 238 88 static struct crypto_alg bf_cipher_alg = {
+15 -4
arch/x86/crypto/ecb_cbc_helpers.h
··· 13 13 14 14 #define ECB_WALK_START(req, bsize, fpu_blocks) do { \ 15 15 void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); \ 16 + const int __fpu_blocks = (fpu_blocks); \ 16 17 const int __bsize = (bsize); \ 17 18 struct skcipher_walk walk; \ 18 19 int err = skcipher_walk_virt(&walk, (req), false); \ 19 20 while (walk.nbytes > 0) { \ 20 21 unsigned int nbytes = walk.nbytes; \ 21 - bool do_fpu = (fpu_blocks) != -1 && \ 22 - nbytes >= (fpu_blocks) * __bsize; \ 22 + bool do_fpu = __fpu_blocks != -1 && \ 23 + nbytes >= __fpu_blocks * __bsize; \ 23 24 const u8 *src = walk.src.virt.addr; \ 24 25 u8 *dst = walk.dst.virt.addr; \ 25 26 u8 __maybe_unused buf[(bsize)]; \ ··· 36 35 } while (0) 37 36 38 37 #define ECB_BLOCK(blocks, func) do { \ 39 - while (nbytes >= (blocks) * __bsize) { \ 38 + const int __blocks = (blocks); \ 39 + if (do_fpu && __blocks < __fpu_blocks) { \ 40 + kernel_fpu_end(); \ 41 + do_fpu = false; \ 42 + } \ 43 + while (nbytes >= __blocks * __bsize) { \ 40 44 (func)(ctx, dst, src); \ 41 45 ECB_WALK_ADVANCE(blocks); \ 42 46 } \ ··· 59 53 } while (0) 60 54 61 55 #define CBC_DEC_BLOCK(blocks, func) do { \ 62 - while (nbytes >= (blocks) * __bsize) { \ 56 + const int __blocks = (blocks); \ 57 + if (do_fpu && __blocks < __fpu_blocks) { \ 58 + kernel_fpu_end(); \ 59 + do_fpu = false; \ 60 + } \ 61 + while (nbytes >= __blocks * __bsize) { \ 63 62 const u8 *__iv = src + ((blocks) - 1) * __bsize; \ 64 63 if (dst == src) \ 65 64 __iv = memcpy(buf, __iv, __bsize); \
+3 -3
arch/x86/crypto/ghash-clmulni-intel_asm.S
··· 4 4 * instructions. This file contains accelerated part of ghash 5 5 * implementation. More information about PCLMULQDQ can be found at: 6 6 * 7 - * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ 7 + * https://www.intel.com/content/dam/develop/external/us/en/documents/clmul-wp-rev-2-02-2014-04-20.pdf 8 8 * 9 9 * Copyright (c) 2009 Intel Corp. 10 10 * Author: Huang Ying <ying.huang@intel.com> ··· 88 88 RET 89 89 SYM_FUNC_END(__clmul_gf128mul_ble) 90 90 91 - /* void clmul_ghash_mul(char *dst, const u128 *shash) */ 91 + /* void clmul_ghash_mul(char *dst, const le128 *shash) */ 92 92 SYM_FUNC_START(clmul_ghash_mul) 93 93 FRAME_BEGIN 94 94 movups (%rdi), DATA ··· 104 104 105 105 /* 106 106 * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, 107 - * const u128 *shash); 107 + * const le128 *shash); 108 108 */ 109 109 SYM_FUNC_START(clmul_ghash_update) 110 110 FRAME_BEGIN
+32 -13
arch/x86/crypto/ghash-clmulni-intel_glue.c
··· 19 19 #include <crypto/internal/simd.h> 20 20 #include <asm/cpu_device_id.h> 21 21 #include <asm/simd.h> 22 + #include <asm/unaligned.h> 22 23 23 24 #define GHASH_BLOCK_SIZE 16 24 25 #define GHASH_DIGEST_SIZE 16 25 26 26 - void clmul_ghash_mul(char *dst, const u128 *shash); 27 + void clmul_ghash_mul(char *dst, const le128 *shash); 27 28 28 29 void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, 29 - const u128 *shash); 30 + const le128 *shash); 30 31 31 32 struct ghash_async_ctx { 32 33 struct cryptd_ahash *cryptd_tfm; 33 34 }; 34 35 35 36 struct ghash_ctx { 36 - u128 shash; 37 + le128 shash; 37 38 }; 38 39 39 40 struct ghash_desc_ctx { ··· 55 54 const u8 *key, unsigned int keylen) 56 55 { 57 56 struct ghash_ctx *ctx = crypto_shash_ctx(tfm); 58 - be128 *x = (be128 *)key; 59 57 u64 a, b; 60 58 61 59 if (keylen != GHASH_BLOCK_SIZE) 62 60 return -EINVAL; 63 61 64 - /* perform multiplication by 'x' in GF(2^128) */ 65 - a = be64_to_cpu(x->a); 66 - b = be64_to_cpu(x->b); 67 - 68 - ctx->shash.a = (b << 1) | (a >> 63); 69 - ctx->shash.b = (a << 1) | (b >> 63); 70 - 62 + /* 63 + * GHASH maps bits to polynomial coefficients backwards, which makes it 64 + * hard to implement. But it can be shown that the GHASH multiplication 65 + * 66 + * D * K (mod x^128 + x^7 + x^2 + x + 1) 67 + * 68 + * (where D is a data block and K is the key) is equivalent to: 69 + * 70 + * bitreflect(D) * bitreflect(K) * x^(-127) 71 + * (mod x^128 + x^127 + x^126 + x^121 + 1) 72 + * 73 + * So, the code below precomputes: 74 + * 75 + * bitreflect(K) * x^(-127) (mod x^128 + x^127 + x^126 + x^121 + 1) 76 + * 77 + * ... but in Montgomery form (so that Montgomery multiplication can be 78 + * used), i.e. with an extra x^128 factor, which means actually: 79 + * 80 + * bitreflect(K) * x (mod x^128 + x^127 + x^126 + x^121 + 1) 81 + * 82 + * The within-a-byte part of bitreflect() cancels out GHASH's built-in 83 + * reflection, and thus bitreflect() is actually a byteswap. 84 + */ 85 + a = get_unaligned_be64(key); 86 + b = get_unaligned_be64(key + 8); 87 + ctx->shash.a = cpu_to_le64((a << 1) | (b >> 63)); 88 + ctx->shash.b = cpu_to_le64((b << 1) | (a >> 63)); 71 89 if (a >> 63) 72 - ctx->shash.b ^= ((u64)0xc2) << 56; 73 - 90 + ctx->shash.a ^= cpu_to_le64((u64)0xc2 << 56); 74 91 return 0; 75 92 } 76 93
+8
arch/x86/kernel/asm-offsets.c
··· 7 7 #define COMPILE_OFFSETS 8 8 9 9 #include <linux/crypto.h> 10 + #include <crypto/aria.h> 10 11 #include <linux/sched.h> 11 12 #include <linux/stddef.h> 12 13 #include <linux/hardirq.h> ··· 111 110 OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack); 112 111 #ifdef CONFIG_CALL_DEPTH_TRACKING 113 112 OFFSET(X86_call_depth, pcpu_hot, call_depth); 113 + #endif 114 + #if IS_ENABLED(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) 115 + /* Offset for fields in aria_ctx */ 116 + BLANK(); 117 + OFFSET(ARIA_CTX_enc_key, aria_ctx, enc_key); 118 + OFFSET(ARIA_CTX_dec_key, aria_ctx, dec_key); 119 + OFFSET(ARIA_CTX_rounds, aria_ctx, rounds); 114 120 #endif 115 121 116 122 }
+2 -3
crypto/adiantum.c
··· 308 308 return 0; 309 309 } 310 310 311 - static void adiantum_streamcipher_done(struct crypto_async_request *areq, 312 - int err) 311 + static void adiantum_streamcipher_done(void *data, int err) 313 312 { 314 - struct skcipher_request *req = areq->data; 313 + struct skcipher_request *req = data; 315 314 316 315 if (!err) 317 316 err = adiantum_finish(req);
+3 -3
crypto/af_alg.c
··· 1186 1186 1187 1187 /** 1188 1188 * af_alg_async_cb - AIO callback handler 1189 - * @_req: async request info 1189 + * @data: async request completion data 1190 1190 * @err: if non-zero, error result to be returned via ki_complete(); 1191 1191 * otherwise return the AIO output length via ki_complete(). 1192 1192 * ··· 1196 1196 * The number of bytes to be generated with the AIO operation must be set 1197 1197 * in areq->outlen before the AIO callback handler is invoked. 1198 1198 */ 1199 - void af_alg_async_cb(struct crypto_async_request *_req, int err) 1199 + void af_alg_async_cb(void *data, int err) 1200 1200 { 1201 - struct af_alg_async_req *areq = _req->data; 1201 + struct af_alg_async_req *areq = data; 1202 1202 struct sock *sk = areq->sk; 1203 1203 struct kiocb *iocb = areq->iocb; 1204 1204 unsigned int resultlen;
+79 -110
crypto/ahash.c
··· 45 45 unsigned int nbytes = min(walk->entrylen, 46 46 ((unsigned int)(PAGE_SIZE)) - offset); 47 47 48 - walk->data = kmap_atomic(walk->pg); 48 + walk->data = kmap_local_page(walk->pg); 49 49 walk->data += offset; 50 50 51 51 if (offset & alignmask) { ··· 95 95 } 96 96 } 97 97 98 - kunmap_atomic(walk->data); 98 + kunmap_local(walk->data); 99 99 crypto_yield(walk->flags); 100 100 101 101 if (err) ··· 190 190 } 191 191 EXPORT_SYMBOL_GPL(crypto_ahash_setkey); 192 192 193 - static inline unsigned int ahash_align_buffer_size(unsigned len, 194 - unsigned long mask) 195 - { 196 - return len + (mask & ~(crypto_tfm_ctx_alignment() - 1)); 197 - } 198 - 199 - static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt) 193 + static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt, 194 + bool has_state) 200 195 { 201 196 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 202 197 unsigned long alignmask = crypto_ahash_alignmask(tfm); 203 198 unsigned int ds = crypto_ahash_digestsize(tfm); 204 - struct ahash_request_priv *priv; 199 + struct ahash_request *subreq; 200 + unsigned int subreq_size; 201 + unsigned int reqsize; 202 + u8 *result; 203 + gfp_t gfp; 204 + u32 flags; 205 205 206 - priv = kmalloc(sizeof(*priv) + ahash_align_buffer_size(ds, alignmask), 207 - (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? 208 - GFP_KERNEL : GFP_ATOMIC); 209 - if (!priv) 206 + subreq_size = sizeof(*subreq); 207 + reqsize = crypto_ahash_reqsize(tfm); 208 + reqsize = ALIGN(reqsize, crypto_tfm_ctx_alignment()); 209 + subreq_size += reqsize; 210 + subreq_size += ds; 211 + subreq_size += alignmask & ~(crypto_tfm_ctx_alignment() - 1); 212 + 213 + flags = ahash_request_flags(req); 214 + gfp = (flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; 215 + subreq = kmalloc(subreq_size, gfp); 216 + if (!subreq) 210 217 return -ENOMEM; 211 218 212 - /* 213 - * WARNING: Voodoo programming below! 214 - * 215 - * The code below is obscure and hard to understand, thus explanation 216 - * is necessary. See include/crypto/hash.h and include/linux/crypto.h 217 - * to understand the layout of structures used here! 218 - * 219 - * The code here will replace portions of the ORIGINAL request with 220 - * pointers to new code and buffers so the hashing operation can store 221 - * the result in aligned buffer. We will call the modified request 222 - * an ADJUSTED request. 223 - * 224 - * The newly mangled request will look as such: 225 - * 226 - * req { 227 - * .result = ADJUSTED[new aligned buffer] 228 - * .base.complete = ADJUSTED[pointer to completion function] 229 - * .base.data = ADJUSTED[*req (pointer to self)] 230 - * .priv = ADJUSTED[new priv] { 231 - * .result = ORIGINAL(result) 232 - * .complete = ORIGINAL(base.complete) 233 - * .data = ORIGINAL(base.data) 234 - * } 235 - */ 219 + ahash_request_set_tfm(subreq, tfm); 220 + ahash_request_set_callback(subreq, flags, cplt, req); 236 221 237 - priv->result = req->result; 238 - priv->complete = req->base.complete; 239 - priv->data = req->base.data; 240 - priv->flags = req->base.flags; 222 + result = (u8 *)(subreq + 1) + reqsize; 223 + result = PTR_ALIGN(result, alignmask + 1); 241 224 242 - /* 243 - * WARNING: We do not backup req->priv here! The req->priv 244 - * is for internal use of the Crypto API and the 245 - * user must _NOT_ _EVER_ depend on it's content! 246 - */ 225 + ahash_request_set_crypt(subreq, req->src, result, req->nbytes); 247 226 248 - req->result = PTR_ALIGN((u8 *)priv->ubuf, alignmask + 1); 249 - req->base.complete = cplt; 250 - req->base.data = req; 251 - req->priv = priv; 227 + if (has_state) { 228 + void *state; 229 + 230 + state = kmalloc(crypto_ahash_statesize(tfm), gfp); 231 + if (!state) { 232 + kfree(subreq); 233 + return -ENOMEM; 234 + } 235 + 236 + crypto_ahash_export(req, state); 237 + crypto_ahash_import(subreq, state); 238 + kfree_sensitive(state); 239 + } 240 + 241 + req->priv = subreq; 252 242 253 243 return 0; 254 244 } 255 245 256 246 static void ahash_restore_req(struct ahash_request *req, int err) 257 247 { 258 - struct ahash_request_priv *priv = req->priv; 248 + struct ahash_request *subreq = req->priv; 259 249 260 250 if (!err) 261 - memcpy(priv->result, req->result, 251 + memcpy(req->result, subreq->result, 262 252 crypto_ahash_digestsize(crypto_ahash_reqtfm(req))); 263 253 264 - /* Restore the original crypto request. */ 265 - req->result = priv->result; 266 - 267 - ahash_request_set_callback(req, priv->flags, 268 - priv->complete, priv->data); 269 254 req->priv = NULL; 270 255 271 - /* Free the req->priv.priv from the ADJUSTED request. */ 272 - kfree_sensitive(priv); 256 + kfree_sensitive(subreq); 273 257 } 274 258 275 - static void ahash_notify_einprogress(struct ahash_request *req) 259 + static void ahash_op_unaligned_done(void *data, int err) 276 260 { 277 - struct ahash_request_priv *priv = req->priv; 278 - struct crypto_async_request oreq; 261 + struct ahash_request *areq = data; 279 262 280 - oreq.data = priv->data; 281 - 282 - priv->complete(&oreq, -EINPROGRESS); 283 - } 284 - 285 - static void ahash_op_unaligned_done(struct crypto_async_request *req, int err) 286 - { 287 - struct ahash_request *areq = req->data; 288 - 289 - if (err == -EINPROGRESS) { 290 - ahash_notify_einprogress(areq); 291 - return; 292 - } 293 - 294 - /* 295 - * Restore the original request, see ahash_op_unaligned() for what 296 - * goes where. 297 - * 298 - * The "struct ahash_request *req" here is in fact the "req.base" 299 - * from the ADJUSTED request from ahash_op_unaligned(), thus as it 300 - * is a pointer to self, it is also the ADJUSTED "req" . 301 - */ 263 + if (err == -EINPROGRESS) 264 + goto out; 302 265 303 266 /* First copy req->result into req->priv.result */ 304 267 ahash_restore_req(areq, err); 305 268 269 + out: 306 270 /* Complete the ORIGINAL request. */ 307 - areq->base.complete(&areq->base, err); 271 + ahash_request_complete(areq, err); 308 272 } 309 273 310 274 static int ahash_op_unaligned(struct ahash_request *req, 311 - int (*op)(struct ahash_request *)) 275 + int (*op)(struct ahash_request *), 276 + bool has_state) 312 277 { 313 278 int err; 314 279 315 - err = ahash_save_req(req, ahash_op_unaligned_done); 280 + err = ahash_save_req(req, ahash_op_unaligned_done, has_state); 316 281 if (err) 317 282 return err; 318 283 319 - err = op(req); 284 + err = op(req->priv); 320 285 if (err == -EINPROGRESS || err == -EBUSY) 321 286 return err; 322 287 ··· 291 326 } 292 327 293 328 static int crypto_ahash_op(struct ahash_request *req, 294 - int (*op)(struct ahash_request *)) 329 + int (*op)(struct ahash_request *), 330 + bool has_state) 295 331 { 296 332 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 297 333 unsigned long alignmask = crypto_ahash_alignmask(tfm); 298 334 299 335 if ((unsigned long)req->result & alignmask) 300 - return ahash_op_unaligned(req, op); 336 + return ahash_op_unaligned(req, op, has_state); 301 337 302 338 return op(req); 303 339 } ··· 311 345 int ret; 312 346 313 347 crypto_stats_get(alg); 314 - ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final); 348 + ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final, true); 315 349 crypto_stats_ahash_final(nbytes, ret, alg); 316 350 return ret; 317 351 } ··· 325 359 int ret; 326 360 327 361 crypto_stats_get(alg); 328 - ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup); 362 + ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup, true); 329 363 crypto_stats_ahash_final(nbytes, ret, alg); 330 364 return ret; 331 365 } ··· 342 376 if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) 343 377 ret = -ENOKEY; 344 378 else 345 - ret = crypto_ahash_op(req, tfm->digest); 379 + ret = crypto_ahash_op(req, tfm->digest, false); 346 380 crypto_stats_ahash_final(nbytes, ret, alg); 347 381 return ret; 348 382 } 349 383 EXPORT_SYMBOL_GPL(crypto_ahash_digest); 350 384 351 - static void ahash_def_finup_done2(struct crypto_async_request *req, int err) 385 + static void ahash_def_finup_done2(void *data, int err) 352 386 { 353 - struct ahash_request *areq = req->data; 387 + struct ahash_request *areq = data; 354 388 355 389 if (err == -EINPROGRESS) 356 390 return; 357 391 358 392 ahash_restore_req(areq, err); 359 393 360 - areq->base.complete(&areq->base, err); 394 + ahash_request_complete(areq, err); 361 395 } 362 396 363 397 static int ahash_def_finup_finish1(struct ahash_request *req, int err) 364 398 { 399 + struct ahash_request *subreq = req->priv; 400 + 365 401 if (err) 366 402 goto out; 367 403 368 - req->base.complete = ahash_def_finup_done2; 404 + subreq->base.complete = ahash_def_finup_done2; 369 405 370 - err = crypto_ahash_reqtfm(req)->final(req); 406 + err = crypto_ahash_reqtfm(req)->final(subreq); 371 407 if (err == -EINPROGRESS || err == -EBUSY) 372 408 return err; 373 409 ··· 378 410 return err; 379 411 } 380 412 381 - static void ahash_def_finup_done1(struct crypto_async_request *req, int err) 413 + static void ahash_def_finup_done1(void *data, int err) 382 414 { 383 - struct ahash_request *areq = req->data; 415 + struct ahash_request *areq = data; 416 + struct ahash_request *subreq; 384 417 385 - if (err == -EINPROGRESS) { 386 - ahash_notify_einprogress(areq); 387 - return; 388 - } 418 + if (err == -EINPROGRESS) 419 + goto out; 389 420 390 - areq->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 421 + subreq = areq->priv; 422 + subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; 391 423 392 424 err = ahash_def_finup_finish1(areq, err); 393 - if (areq->priv) 425 + if (err == -EINPROGRESS || err == -EBUSY) 394 426 return; 395 427 396 - areq->base.complete(&areq->base, err); 428 + out: 429 + ahash_request_complete(areq, err); 397 430 } 398 431 399 432 static int ahash_def_finup(struct ahash_request *req) ··· 402 433 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 403 434 int err; 404 435 405 - err = ahash_save_req(req, ahash_def_finup_done1); 436 + err = ahash_save_req(req, ahash_def_finup_done1, true); 406 437 if (err) 407 438 return err; 408 439 409 - err = tfm->update(req); 440 + err = tfm->update(req->priv); 410 441 if (err == -EINPROGRESS || err == -EBUSY) 411 442 return err; 412 443
+2 -2
crypto/api.c
··· 643 643 } 644 644 EXPORT_SYMBOL_GPL(crypto_has_alg); 645 645 646 - void crypto_req_done(struct crypto_async_request *req, int err) 646 + void crypto_req_done(void *data, int err) 647 647 { 648 - struct crypto_wait *wait = req->data; 648 + struct crypto_wait *wait = data; 649 649 650 650 if (err == -EINPROGRESS) 651 651 return;
+4
crypto/aria_generic.c
··· 178 178 if (key_len != 16 && key_len != 24 && key_len != 32) 179 179 return -EINVAL; 180 180 181 + BUILD_BUG_ON(sizeof(ctx->enc_key) != 272); 182 + BUILD_BUG_ON(sizeof(ctx->dec_key) != 272); 183 + BUILD_BUG_ON(sizeof(int) != sizeof(ctx->rounds)); 184 + 181 185 ctx->key_length = key_len; 182 186 ctx->rounds = (key_len + 32) / 4; 183 187
+6 -8
crypto/authenc.c
··· 109 109 return err; 110 110 } 111 111 112 - static void authenc_geniv_ahash_done(struct crypto_async_request *areq, int err) 112 + static void authenc_geniv_ahash_done(void *data, int err) 113 113 { 114 - struct aead_request *req = areq->data; 114 + struct aead_request *req = data; 115 115 struct crypto_aead *authenc = crypto_aead_reqtfm(req); 116 116 struct aead_instance *inst = aead_alg_instance(authenc); 117 117 struct authenc_instance_ctx *ictx = aead_instance_ctx(inst); ··· 160 160 return 0; 161 161 } 162 162 163 - static void crypto_authenc_encrypt_done(struct crypto_async_request *req, 164 - int err) 163 + static void crypto_authenc_encrypt_done(void *data, int err) 165 164 { 166 - struct aead_request *areq = req->data; 165 + struct aead_request *areq = data; 167 166 168 167 if (err) 169 168 goto out; ··· 260 261 return crypto_skcipher_decrypt(skreq); 261 262 } 262 263 263 - static void authenc_verify_ahash_done(struct crypto_async_request *areq, 264 - int err) 264 + static void authenc_verify_ahash_done(void *data, int err) 265 265 { 266 - struct aead_request *req = areq->data; 266 + struct aead_request *req = data; 267 267 268 268 if (err) 269 269 goto out;
+6 -9
crypto/authencesn.c
··· 107 107 return 0; 108 108 } 109 109 110 - static void authenc_esn_geniv_ahash_done(struct crypto_async_request *areq, 111 - int err) 110 + static void authenc_esn_geniv_ahash_done(void *data, int err) 112 111 { 113 - struct aead_request *req = areq->data; 112 + struct aead_request *req = data; 114 113 115 114 err = err ?: crypto_authenc_esn_genicv_tail(req, 0); 116 115 aead_request_complete(req, err); ··· 152 153 } 153 154 154 155 155 - static void crypto_authenc_esn_encrypt_done(struct crypto_async_request *req, 156 - int err) 156 + static void crypto_authenc_esn_encrypt_done(void *data, int err) 157 157 { 158 - struct aead_request *areq = req->data; 158 + struct aead_request *areq = data; 159 159 160 160 if (!err) 161 161 err = crypto_authenc_esn_genicv(areq, 0); ··· 256 258 return crypto_skcipher_decrypt(skreq); 257 259 } 258 260 259 - static void authenc_esn_verify_ahash_done(struct crypto_async_request *areq, 260 - int err) 261 + static void authenc_esn_verify_ahash_done(void *data, int err) 261 262 { 262 - struct aead_request *req = areq->data; 263 + struct aead_request *req = data; 263 264 264 265 err = err ?: crypto_authenc_esn_decrypt_tail(req, 0); 265 266 authenc_esn_request_complete(req, err);
+4 -5
crypto/ccm.c
··· 224 224 return err; 225 225 } 226 226 227 - static void crypto_ccm_encrypt_done(struct crypto_async_request *areq, int err) 227 + static void crypto_ccm_encrypt_done(void *data, int err) 228 228 { 229 - struct aead_request *req = areq->data; 229 + struct aead_request *req = data; 230 230 struct crypto_aead *aead = crypto_aead_reqtfm(req); 231 231 struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); 232 232 u8 *odata = pctx->odata; ··· 320 320 return err; 321 321 } 322 322 323 - static void crypto_ccm_decrypt_done(struct crypto_async_request *areq, 324 - int err) 323 + static void crypto_ccm_decrypt_done(void *data, int err) 325 324 { 326 - struct aead_request *req = areq->data; 325 + struct aead_request *req = data; 327 326 struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); 328 327 struct crypto_aead *aead = crypto_aead_reqtfm(req); 329 328 unsigned int authsize = crypto_aead_authsize(aead);
+20 -20
crypto/chacha20poly1305.c
··· 115 115 return 0; 116 116 } 117 117 118 - static void chacha_decrypt_done(struct crypto_async_request *areq, int err) 118 + static void chacha_decrypt_done(void *data, int err) 119 119 { 120 - async_done_continue(areq->data, err, poly_verify_tag); 120 + async_done_continue(data, err, poly_verify_tag); 121 121 } 122 122 123 123 static int chacha_decrypt(struct aead_request *req) ··· 161 161 return chacha_decrypt(req); 162 162 } 163 163 164 - static void poly_tail_done(struct crypto_async_request *areq, int err) 164 + static void poly_tail_done(void *data, int err) 165 165 { 166 - async_done_continue(areq->data, err, poly_tail_continue); 166 + async_done_continue(data, err, poly_tail_continue); 167 167 } 168 168 169 169 static int poly_tail(struct aead_request *req) ··· 191 191 return poly_tail_continue(req); 192 192 } 193 193 194 - static void poly_cipherpad_done(struct crypto_async_request *areq, int err) 194 + static void poly_cipherpad_done(void *data, int err) 195 195 { 196 - async_done_continue(areq->data, err, poly_tail); 196 + async_done_continue(data, err, poly_tail); 197 197 } 198 198 199 199 static int poly_cipherpad(struct aead_request *req) ··· 220 220 return poly_tail(req); 221 221 } 222 222 223 - static void poly_cipher_done(struct crypto_async_request *areq, int err) 223 + static void poly_cipher_done(void *data, int err) 224 224 { 225 - async_done_continue(areq->data, err, poly_cipherpad); 225 + async_done_continue(data, err, poly_cipherpad); 226 226 } 227 227 228 228 static int poly_cipher(struct aead_request *req) ··· 250 250 return poly_cipherpad(req); 251 251 } 252 252 253 - static void poly_adpad_done(struct crypto_async_request *areq, int err) 253 + static void poly_adpad_done(void *data, int err) 254 254 { 255 - async_done_continue(areq->data, err, poly_cipher); 255 + async_done_continue(data, err, poly_cipher); 256 256 } 257 257 258 258 static int poly_adpad(struct aead_request *req) ··· 279 279 return poly_cipher(req); 280 280 } 281 281 282 - static void poly_ad_done(struct crypto_async_request *areq, int err) 282 + static void poly_ad_done(void *data, int err) 283 283 { 284 - async_done_continue(areq->data, err, poly_adpad); 284 + async_done_continue(data, err, poly_adpad); 285 285 } 286 286 287 287 static int poly_ad(struct aead_request *req) ··· 303 303 return poly_adpad(req); 304 304 } 305 305 306 - static void poly_setkey_done(struct crypto_async_request *areq, int err) 306 + static void poly_setkey_done(void *data, int err) 307 307 { 308 - async_done_continue(areq->data, err, poly_ad); 308 + async_done_continue(data, err, poly_ad); 309 309 } 310 310 311 311 static int poly_setkey(struct aead_request *req) ··· 329 329 return poly_ad(req); 330 330 } 331 331 332 - static void poly_init_done(struct crypto_async_request *areq, int err) 332 + static void poly_init_done(void *data, int err) 333 333 { 334 - async_done_continue(areq->data, err, poly_setkey); 334 + async_done_continue(data, err, poly_setkey); 335 335 } 336 336 337 337 static int poly_init(struct aead_request *req) ··· 352 352 return poly_setkey(req); 353 353 } 354 354 355 - static void poly_genkey_done(struct crypto_async_request *areq, int err) 355 + static void poly_genkey_done(void *data, int err) 356 356 { 357 - async_done_continue(areq->data, err, poly_init); 357 + async_done_continue(data, err, poly_init); 358 358 } 359 359 360 360 static int poly_genkey(struct aead_request *req) ··· 391 391 return poly_init(req); 392 392 } 393 393 394 - static void chacha_encrypt_done(struct crypto_async_request *areq, int err) 394 + static void chacha_encrypt_done(void *data, int err) 395 395 { 396 - async_done_continue(areq->data, err, poly_genkey); 396 + async_done_continue(data, err, poly_genkey); 397 397 } 398 398 399 399 static int chacha_encrypt(struct aead_request *req)
+164 -136
crypto/cryptd.c
··· 72 72 }; 73 73 74 74 struct cryptd_skcipher_request_ctx { 75 - crypto_completion_t complete; 76 75 struct skcipher_request req; 77 76 }; 78 77 ··· 82 83 83 84 struct cryptd_hash_request_ctx { 84 85 crypto_completion_t complete; 86 + void *data; 85 87 struct shash_desc desc; 86 88 }; 87 89 ··· 92 92 }; 93 93 94 94 struct cryptd_aead_request_ctx { 95 - crypto_completion_t complete; 95 + struct aead_request req; 96 96 }; 97 97 98 98 static void cryptd_queue_worker(struct work_struct *work); ··· 177 177 return; 178 178 179 179 if (backlog) 180 - backlog->complete(backlog, -EINPROGRESS); 181 - req->complete(req, 0); 180 + crypto_request_complete(backlog, -EINPROGRESS); 181 + crypto_request_complete(req, 0); 182 182 183 183 if (cpu_queue->queue.qlen) 184 184 queue_work(cryptd_wq, &cpu_queue->work); ··· 237 237 return crypto_skcipher_setkey(child, key, keylen); 238 238 } 239 239 240 - static void cryptd_skcipher_complete(struct skcipher_request *req, int err) 240 + static struct skcipher_request *cryptd_skcipher_prepare( 241 + struct skcipher_request *req, int err) 241 242 { 243 + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); 244 + struct skcipher_request *subreq = &rctx->req; 245 + struct cryptd_skcipher_ctx *ctx; 246 + struct crypto_skcipher *child; 247 + 248 + req->base.complete = subreq->base.complete; 249 + req->base.data = subreq->base.data; 250 + 251 + if (unlikely(err == -EINPROGRESS)) 252 + return NULL; 253 + 254 + ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); 255 + child = ctx->child; 256 + 257 + skcipher_request_set_tfm(subreq, child); 258 + skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, 259 + NULL, NULL); 260 + skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, 261 + req->iv); 262 + 263 + return subreq; 264 + } 265 + 266 + static void cryptd_skcipher_complete(struct skcipher_request *req, int err, 267 + crypto_completion_t complete) 268 + { 269 + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); 242 270 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 243 271 struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); 244 - struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); 272 + struct skcipher_request *subreq = &rctx->req; 245 273 int refcnt = refcount_read(&ctx->refcnt); 246 274 247 275 local_bh_disable(); 248 - rctx->complete(&req->base, err); 276 + skcipher_request_complete(req, err); 249 277 local_bh_enable(); 250 278 251 - if (err != -EINPROGRESS && refcnt && refcount_dec_and_test(&ctx->refcnt)) 279 + if (unlikely(err == -EINPROGRESS)) { 280 + subreq->base.complete = req->base.complete; 281 + subreq->base.data = req->base.data; 282 + req->base.complete = complete; 283 + req->base.data = req; 284 + } else if (refcnt && refcount_dec_and_test(&ctx->refcnt)) 252 285 crypto_free_skcipher(tfm); 253 286 } 254 287 255 - static void cryptd_skcipher_encrypt(struct crypto_async_request *base, 256 - int err) 288 + static void cryptd_skcipher_encrypt(void *data, int err) 257 289 { 258 - struct skcipher_request *req = skcipher_request_cast(base); 259 - struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); 260 - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 261 - struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); 262 - struct skcipher_request *subreq = &rctx->req; 263 - struct crypto_skcipher *child = ctx->child; 290 + struct skcipher_request *req = data; 291 + struct skcipher_request *subreq; 264 292 265 - if (unlikely(err == -EINPROGRESS)) 266 - goto out; 293 + subreq = cryptd_skcipher_prepare(req, err); 294 + if (likely(subreq)) 295 + err = crypto_skcipher_encrypt(subreq); 267 296 268 - skcipher_request_set_tfm(subreq, child); 269 - skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, 270 - NULL, NULL); 271 - skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, 272 - req->iv); 273 - 274 - err = crypto_skcipher_encrypt(subreq); 275 - skcipher_request_zero(subreq); 276 - 277 - req->base.complete = rctx->complete; 278 - 279 - out: 280 - cryptd_skcipher_complete(req, err); 297 + cryptd_skcipher_complete(req, err, cryptd_skcipher_encrypt); 281 298 } 282 299 283 - static void cryptd_skcipher_decrypt(struct crypto_async_request *base, 284 - int err) 300 + static void cryptd_skcipher_decrypt(void *data, int err) 285 301 { 286 - struct skcipher_request *req = skcipher_request_cast(base); 287 - struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); 288 - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 289 - struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); 290 - struct skcipher_request *subreq = &rctx->req; 291 - struct crypto_skcipher *child = ctx->child; 302 + struct skcipher_request *req = data; 303 + struct skcipher_request *subreq; 292 304 293 - if (unlikely(err == -EINPROGRESS)) 294 - goto out; 305 + subreq = cryptd_skcipher_prepare(req, err); 306 + if (likely(subreq)) 307 + err = crypto_skcipher_decrypt(subreq); 295 308 296 - skcipher_request_set_tfm(subreq, child); 297 - skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, 298 - NULL, NULL); 299 - skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, 300 - req->iv); 301 - 302 - err = crypto_skcipher_decrypt(subreq); 303 - skcipher_request_zero(subreq); 304 - 305 - req->base.complete = rctx->complete; 306 - 307 - out: 308 - cryptd_skcipher_complete(req, err); 309 + cryptd_skcipher_complete(req, err, cryptd_skcipher_decrypt); 309 310 } 310 311 311 312 static int cryptd_skcipher_enqueue(struct skcipher_request *req, ··· 314 313 { 315 314 struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); 316 315 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 316 + struct skcipher_request *subreq = &rctx->req; 317 317 struct cryptd_queue *queue; 318 318 319 319 queue = cryptd_get_queue(crypto_skcipher_tfm(tfm)); 320 - rctx->complete = req->base.complete; 320 + subreq->base.complete = req->base.complete; 321 + subreq->base.data = req->base.data; 321 322 req->base.complete = compl; 323 + req->base.data = req; 322 324 323 325 return cryptd_enqueue_request(queue, &req->base); 324 326 } ··· 474 470 cryptd_get_queue(crypto_ahash_tfm(tfm)); 475 471 476 472 rctx->complete = req->base.complete; 473 + rctx->data = req->base.data; 477 474 req->base.complete = compl; 475 + req->base.data = req; 478 476 479 477 return cryptd_enqueue_request(queue, &req->base); 480 478 } 481 479 482 - static void cryptd_hash_complete(struct ahash_request *req, int err) 480 + static struct shash_desc *cryptd_hash_prepare(struct ahash_request *req, 481 + int err) 482 + { 483 + struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); 484 + 485 + req->base.complete = rctx->complete; 486 + req->base.data = rctx->data; 487 + 488 + if (unlikely(err == -EINPROGRESS)) 489 + return NULL; 490 + 491 + return &rctx->desc; 492 + } 493 + 494 + static void cryptd_hash_complete(struct ahash_request *req, int err, 495 + crypto_completion_t complete) 483 496 { 484 497 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 485 498 struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); 486 - struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); 487 499 int refcnt = refcount_read(&ctx->refcnt); 488 500 489 501 local_bh_disable(); 490 - rctx->complete(&req->base, err); 502 + ahash_request_complete(req, err); 491 503 local_bh_enable(); 492 504 493 - if (err != -EINPROGRESS && refcnt && refcount_dec_and_test(&ctx->refcnt)) 505 + if (err == -EINPROGRESS) { 506 + req->base.complete = complete; 507 + req->base.data = req; 508 + } else if (refcnt && refcount_dec_and_test(&ctx->refcnt)) 494 509 crypto_free_ahash(tfm); 495 510 } 496 511 497 - static void cryptd_hash_init(struct crypto_async_request *req_async, int err) 512 + static void cryptd_hash_init(void *data, int err) 498 513 { 499 - struct cryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); 514 + struct ahash_request *req = data; 515 + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 516 + struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); 500 517 struct crypto_shash *child = ctx->child; 501 - struct ahash_request *req = ahash_request_cast(req_async); 502 - struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); 503 - struct shash_desc *desc = &rctx->desc; 518 + struct shash_desc *desc; 504 519 505 - if (unlikely(err == -EINPROGRESS)) 520 + desc = cryptd_hash_prepare(req, err); 521 + if (unlikely(!desc)) 506 522 goto out; 507 523 508 524 desc->tfm = child; 509 525 510 526 err = crypto_shash_init(desc); 511 527 512 - req->base.complete = rctx->complete; 513 - 514 528 out: 515 - cryptd_hash_complete(req, err); 529 + cryptd_hash_complete(req, err, cryptd_hash_init); 516 530 } 517 531 518 532 static int cryptd_hash_init_enqueue(struct ahash_request *req) ··· 538 516 return cryptd_hash_enqueue(req, cryptd_hash_init); 539 517 } 540 518 541 - static void cryptd_hash_update(struct crypto_async_request *req_async, int err) 519 + static void cryptd_hash_update(void *data, int err) 542 520 { 543 - struct ahash_request *req = ahash_request_cast(req_async); 544 - struct cryptd_hash_request_ctx *rctx; 521 + struct ahash_request *req = data; 522 + struct shash_desc *desc; 545 523 546 - rctx = ahash_request_ctx(req); 524 + desc = cryptd_hash_prepare(req, err); 525 + if (likely(desc)) 526 + err = shash_ahash_update(req, desc); 547 527 548 - if (unlikely(err == -EINPROGRESS)) 549 - goto out; 550 - 551 - err = shash_ahash_update(req, &rctx->desc); 552 - 553 - req->base.complete = rctx->complete; 554 - 555 - out: 556 - cryptd_hash_complete(req, err); 528 + cryptd_hash_complete(req, err, cryptd_hash_update); 557 529 } 558 530 559 531 static int cryptd_hash_update_enqueue(struct ahash_request *req) ··· 555 539 return cryptd_hash_enqueue(req, cryptd_hash_update); 556 540 } 557 541 558 - static void cryptd_hash_final(struct crypto_async_request *req_async, int err) 542 + static void cryptd_hash_final(void *data, int err) 559 543 { 560 - struct ahash_request *req = ahash_request_cast(req_async); 561 - struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); 544 + struct ahash_request *req = data; 545 + struct shash_desc *desc; 562 546 563 - if (unlikely(err == -EINPROGRESS)) 564 - goto out; 547 + desc = cryptd_hash_prepare(req, err); 548 + if (likely(desc)) 549 + err = crypto_shash_final(desc, req->result); 565 550 566 - err = crypto_shash_final(&rctx->desc, req->result); 567 - 568 - req->base.complete = rctx->complete; 569 - 570 - out: 571 - cryptd_hash_complete(req, err); 551 + cryptd_hash_complete(req, err, cryptd_hash_final); 572 552 } 573 553 574 554 static int cryptd_hash_final_enqueue(struct ahash_request *req) ··· 572 560 return cryptd_hash_enqueue(req, cryptd_hash_final); 573 561 } 574 562 575 - static void cryptd_hash_finup(struct crypto_async_request *req_async, int err) 563 + static void cryptd_hash_finup(void *data, int err) 576 564 { 577 - struct ahash_request *req = ahash_request_cast(req_async); 578 - struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); 565 + struct ahash_request *req = data; 566 + struct shash_desc *desc; 579 567 580 - if (unlikely(err == -EINPROGRESS)) 581 - goto out; 568 + desc = cryptd_hash_prepare(req, err); 569 + if (likely(desc)) 570 + err = shash_ahash_finup(req, desc); 582 571 583 - err = shash_ahash_finup(req, &rctx->desc); 584 - 585 - req->base.complete = rctx->complete; 586 - 587 - out: 588 - cryptd_hash_complete(req, err); 572 + cryptd_hash_complete(req, err, cryptd_hash_finup); 589 573 } 590 574 591 575 static int cryptd_hash_finup_enqueue(struct ahash_request *req) ··· 589 581 return cryptd_hash_enqueue(req, cryptd_hash_finup); 590 582 } 591 583 592 - static void cryptd_hash_digest(struct crypto_async_request *req_async, int err) 584 + static void cryptd_hash_digest(void *data, int err) 593 585 { 594 - struct cryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); 586 + struct ahash_request *req = data; 587 + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 588 + struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); 595 589 struct crypto_shash *child = ctx->child; 596 - struct ahash_request *req = ahash_request_cast(req_async); 597 - struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); 598 - struct shash_desc *desc = &rctx->desc; 590 + struct shash_desc *desc; 599 591 600 - if (unlikely(err == -EINPROGRESS)) 592 + desc = cryptd_hash_prepare(req, err); 593 + if (unlikely(!desc)) 601 594 goto out; 602 595 603 596 desc->tfm = child; 604 597 605 598 err = shash_ahash_digest(req, desc); 606 599 607 - req->base.complete = rctx->complete; 608 - 609 600 out: 610 - cryptd_hash_complete(req, err); 601 + cryptd_hash_complete(req, err, cryptd_hash_digest); 611 602 } 612 603 613 604 static int cryptd_hash_digest_enqueue(struct ahash_request *req) ··· 719 712 } 720 713 721 714 static void cryptd_aead_crypt(struct aead_request *req, 722 - struct crypto_aead *child, 723 - int err, 724 - int (*crypt)(struct aead_request *req)) 715 + struct crypto_aead *child, int err, 716 + int (*crypt)(struct aead_request *req), 717 + crypto_completion_t compl) 725 718 { 726 719 struct cryptd_aead_request_ctx *rctx; 720 + struct aead_request *subreq; 727 721 struct cryptd_aead_ctx *ctx; 728 - crypto_completion_t compl; 729 722 struct crypto_aead *tfm; 730 723 int refcnt; 731 724 732 725 rctx = aead_request_ctx(req); 733 - compl = rctx->complete; 726 + subreq = &rctx->req; 727 + req->base.complete = subreq->base.complete; 728 + req->base.data = subreq->base.data; 734 729 735 730 tfm = crypto_aead_reqtfm(req); 736 731 737 732 if (unlikely(err == -EINPROGRESS)) 738 733 goto out; 739 - aead_request_set_tfm(req, child); 740 - err = crypt( req ); 734 + 735 + aead_request_set_tfm(subreq, child); 736 + aead_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, 737 + NULL, NULL); 738 + aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, 739 + req->iv); 740 + aead_request_set_ad(subreq, req->assoclen); 741 + 742 + err = crypt(subreq); 741 743 742 744 out: 743 745 ctx = crypto_aead_ctx(tfm); 744 746 refcnt = refcount_read(&ctx->refcnt); 745 747 746 748 local_bh_disable(); 747 - compl(&req->base, err); 749 + aead_request_complete(req, err); 748 750 local_bh_enable(); 749 751 750 - if (err != -EINPROGRESS && refcnt && refcount_dec_and_test(&ctx->refcnt)) 752 + if (err == -EINPROGRESS) { 753 + subreq->base.complete = req->base.complete; 754 + subreq->base.data = req->base.data; 755 + req->base.complete = compl; 756 + req->base.data = req; 757 + } else if (refcnt && refcount_dec_and_test(&ctx->refcnt)) 751 758 crypto_free_aead(tfm); 752 759 } 753 760 754 - static void cryptd_aead_encrypt(struct crypto_async_request *areq, int err) 761 + static void cryptd_aead_encrypt(void *data, int err) 755 762 { 756 - struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(areq->tfm); 757 - struct crypto_aead *child = ctx->child; 758 - struct aead_request *req; 763 + struct aead_request *req = data; 764 + struct cryptd_aead_ctx *ctx; 765 + struct crypto_aead *child; 759 766 760 - req = container_of(areq, struct aead_request, base); 761 - cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->encrypt); 767 + ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); 768 + child = ctx->child; 769 + cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->encrypt, 770 + cryptd_aead_encrypt); 762 771 } 763 772 764 - static void cryptd_aead_decrypt(struct crypto_async_request *areq, int err) 773 + static void cryptd_aead_decrypt(void *data, int err) 765 774 { 766 - struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(areq->tfm); 767 - struct crypto_aead *child = ctx->child; 768 - struct aead_request *req; 775 + struct aead_request *req = data; 776 + struct cryptd_aead_ctx *ctx; 777 + struct crypto_aead *child; 769 778 770 - req = container_of(areq, struct aead_request, base); 771 - cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->decrypt); 779 + ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); 780 + child = ctx->child; 781 + cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->decrypt, 782 + cryptd_aead_decrypt); 772 783 } 773 784 774 785 static int cryptd_aead_enqueue(struct aead_request *req, ··· 795 770 struct cryptd_aead_request_ctx *rctx = aead_request_ctx(req); 796 771 struct crypto_aead *tfm = crypto_aead_reqtfm(req); 797 772 struct cryptd_queue *queue = cryptd_get_queue(crypto_aead_tfm(tfm)); 773 + struct aead_request *subreq = &rctx->req; 798 774 799 - rctx->complete = req->base.complete; 775 + subreq->base.complete = req->base.complete; 776 + subreq->base.data = req->base.data; 800 777 req->base.complete = compl; 778 + req->base.data = req; 801 779 return cryptd_enqueue_request(queue, &req->base); 802 780 } 803 781 ··· 828 800 829 801 ctx->child = cipher; 830 802 crypto_aead_set_reqsize( 831 - tfm, max((unsigned)sizeof(struct cryptd_aead_request_ctx), 832 - crypto_aead_reqsize(cipher))); 803 + tfm, sizeof(struct cryptd_aead_request_ctx) + 804 + crypto_aead_reqsize(cipher)); 833 805 return 0; 834 806 } 835 807
+4 -4
crypto/crypto_engine.c
··· 54 54 } 55 55 } 56 56 lockdep_assert_in_softirq(); 57 - req->complete(req, err); 57 + crypto_request_complete(req, err); 58 58 59 59 kthread_queue_work(engine->kworker, &engine->pump_requests); 60 60 } ··· 130 130 engine->cur_req = async_req; 131 131 132 132 if (backlog) 133 - backlog->complete(backlog, -EINPROGRESS); 133 + crypto_request_complete(backlog, -EINPROGRESS); 134 134 135 135 if (engine->busy) 136 136 was_busy = true; ··· 214 214 } 215 215 216 216 req_err_2: 217 - async_req->complete(async_req, ret); 217 + crypto_request_complete(async_req, ret); 218 218 219 219 retry: 220 220 /* If retry mechanism is supported, send new requests to engine */ ··· 499 499 * This has the form: 500 500 * callback(struct crypto_engine *engine) 501 501 * where: 502 - * @engine: the crypto engine structure. 502 + * engine: the crypto engine structure. 503 503 * @rt: whether this queue is set to run as a realtime task 504 504 * @qlen: maximum size of the crypto-engine queue 505 505 *
+6 -6
crypto/cts.c
··· 85 85 return crypto_skcipher_setkey(child, key, keylen); 86 86 } 87 87 88 - static void cts_cbc_crypt_done(struct crypto_async_request *areq, int err) 88 + static void cts_cbc_crypt_done(void *data, int err) 89 89 { 90 - struct skcipher_request *req = areq->data; 90 + struct skcipher_request *req = data; 91 91 92 92 if (err == -EINPROGRESS) 93 93 return; ··· 125 125 return crypto_skcipher_encrypt(subreq); 126 126 } 127 127 128 - static void crypto_cts_encrypt_done(struct crypto_async_request *areq, int err) 128 + static void crypto_cts_encrypt_done(void *data, int err) 129 129 { 130 - struct skcipher_request *req = areq->data; 130 + struct skcipher_request *req = data; 131 131 132 132 if (err) 133 133 goto out; ··· 219 219 return crypto_skcipher_decrypt(subreq); 220 220 } 221 221 222 - static void crypto_cts_decrypt_done(struct crypto_async_request *areq, int err) 222 + static void crypto_cts_decrypt_done(void *data, int err) 223 223 { 224 - struct skcipher_request *req = areq->data; 224 + struct skcipher_request *req = data; 225 225 226 226 if (err) 227 227 goto out;
+2 -3
crypto/dh.c
··· 503 503 return err; 504 504 } 505 505 506 - static void dh_safe_prime_complete_req(struct crypto_async_request *dh_req, 507 - int err) 506 + static void dh_safe_prime_complete_req(void *data, int err) 508 507 { 509 - struct kpp_request *req = dh_req->data; 508 + struct kpp_request *req = data; 510 509 511 510 kpp_request_complete(req, err); 512 511 }
+4 -2
crypto/ecc.c
··· 1384 1384 1385 1385 num_bits = max(vli_num_bits(u1, ndigits), vli_num_bits(u2, ndigits)); 1386 1386 i = num_bits - 1; 1387 - idx = (!!vli_test_bit(u1, i)) | ((!!vli_test_bit(u2, i)) << 1); 1387 + idx = !!vli_test_bit(u1, i); 1388 + idx |= (!!vli_test_bit(u2, i)) << 1; 1388 1389 point = points[idx]; 1389 1390 1390 1391 vli_set(rx, point->x, ndigits); ··· 1395 1394 1396 1395 for (--i; i >= 0; i--) { 1397 1396 ecc_point_double_jacobian(rx, ry, z, curve); 1398 - idx = (!!vli_test_bit(u1, i)) | ((!!vli_test_bit(u2, i)) << 1); 1397 + idx = !!vli_test_bit(u1, i); 1398 + idx |= (!!vli_test_bit(u2, i)) << 1; 1399 1399 point = points[idx]; 1400 1400 if (point) { 1401 1401 u64 tx[ECC_MAX_DIGITS];
+10 -5
crypto/essiv.c
··· 131 131 return crypto_aead_setauthsize(tctx->u.aead, authsize); 132 132 } 133 133 134 - static void essiv_skcipher_done(struct crypto_async_request *areq, int err) 134 + static void essiv_skcipher_done(void *data, int err) 135 135 { 136 - struct skcipher_request *req = areq->data; 136 + struct skcipher_request *req = data; 137 137 138 138 skcipher_request_complete(req, err); 139 139 } ··· 166 166 return essiv_skcipher_crypt(req, false); 167 167 } 168 168 169 - static void essiv_aead_done(struct crypto_async_request *areq, int err) 169 + static void essiv_aead_done(void *data, int err) 170 170 { 171 - struct aead_request *req = areq->data; 171 + struct aead_request *req = data; 172 172 struct essiv_aead_request_ctx *rctx = aead_request_ctx(req); 173 173 174 + if (err == -EINPROGRESS) 175 + goto out; 176 + 174 177 kfree(rctx->assoc); 178 + 179 + out: 175 180 aead_request_complete(req, err); 176 181 } 177 182 ··· 252 247 err = enc ? crypto_aead_encrypt(subreq) : 253 248 crypto_aead_decrypt(subreq); 254 249 255 - if (rctx->assoc && err != -EINPROGRESS) 250 + if (rctx->assoc && err != -EINPROGRESS && err != -EBUSY) 256 251 kfree(rctx->assoc); 257 252 return err; 258 253 }
+17 -19
crypto/gcm.c
··· 197 197 return len ? 16 - len : 0; 198 198 } 199 199 200 - static void gcm_hash_len_done(struct crypto_async_request *areq, int err); 200 + static void gcm_hash_len_done(void *data, int err); 201 201 202 202 static int gcm_hash_update(struct aead_request *req, 203 203 crypto_completion_t compl, ··· 246 246 return gctx->complete(req, flags); 247 247 } 248 248 249 - static void gcm_hash_len_done(struct crypto_async_request *areq, int err) 249 + static void gcm_hash_len_done(void *data, int err) 250 250 { 251 - struct aead_request *req = areq->data; 251 + struct aead_request *req = data; 252 252 253 253 if (err) 254 254 goto out; ··· 267 267 gcm_hash_len_continue(req, flags); 268 268 } 269 269 270 - static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq, 271 - int err) 270 + static void gcm_hash_crypt_remain_done(void *data, int err) 272 271 { 273 - struct aead_request *req = areq->data; 272 + struct aead_request *req = data; 274 273 275 274 if (err) 276 275 goto out; ··· 297 298 return gcm_hash_crypt_remain_continue(req, flags); 298 299 } 299 300 300 - static void gcm_hash_crypt_done(struct crypto_async_request *areq, int err) 301 + static void gcm_hash_crypt_done(void *data, int err) 301 302 { 302 - struct aead_request *req = areq->data; 303 + struct aead_request *req = data; 303 304 304 305 if (err) 305 306 goto out; ··· 325 326 return gcm_hash_crypt_remain_continue(req, flags); 326 327 } 327 328 328 - static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq, 329 - int err) 329 + static void gcm_hash_assoc_remain_done(void *data, int err) 330 330 { 331 - struct aead_request *req = areq->data; 331 + struct aead_request *req = data; 332 332 333 333 if (err) 334 334 goto out; ··· 353 355 return gcm_hash_assoc_remain_continue(req, flags); 354 356 } 355 357 356 - static void gcm_hash_assoc_done(struct crypto_async_request *areq, int err) 358 + static void gcm_hash_assoc_done(void *data, int err) 357 359 { 358 - struct aead_request *req = areq->data; 360 + struct aead_request *req = data; 359 361 360 362 if (err) 361 363 goto out; ··· 378 380 return gcm_hash_assoc_remain_continue(req, flags); 379 381 } 380 382 381 - static void gcm_hash_init_done(struct crypto_async_request *areq, int err) 383 + static void gcm_hash_init_done(void *data, int err) 382 384 { 383 - struct aead_request *req = areq->data; 385 + struct aead_request *req = data; 384 386 385 387 if (err) 386 388 goto out; ··· 431 433 return gcm_hash(req, flags); 432 434 } 433 435 434 - static void gcm_encrypt_done(struct crypto_async_request *areq, int err) 436 + static void gcm_encrypt_done(void *data, int err) 435 437 { 436 - struct aead_request *req = areq->data; 438 + struct aead_request *req = data; 437 439 438 440 if (err) 439 441 goto out; ··· 475 477 return crypto_memneq(iauth_tag, auth_tag, authsize) ? -EBADMSG : 0; 476 478 } 477 479 478 - static void gcm_decrypt_done(struct crypto_async_request *areq, int err) 480 + static void gcm_decrypt_done(void *data, int err) 479 481 { 480 - struct aead_request *req = areq->data; 482 + struct aead_request *req = data; 481 483 482 484 if (!err) 483 485 err = crypto_gcm_verify(req);
+2 -3
crypto/hctr2.c
··· 252 252 return 0; 253 253 } 254 254 255 - static void hctr2_xctr_done(struct crypto_async_request *areq, 256 - int err) 255 + static void hctr2_xctr_done(void *data, int err) 257 256 { 258 - struct skcipher_request *req = areq->data; 257 + struct skcipher_request *req = data; 259 258 260 259 if (!err) 261 260 err = hctr2_finish(req);
+2 -2
crypto/lrw.c
··· 205 205 return lrw_xor_tweak(req, true); 206 206 } 207 207 208 - static void lrw_crypt_done(struct crypto_async_request *areq, int err) 208 + static void lrw_crypt_done(void *data, int err) 209 209 { 210 - struct skcipher_request *req = areq->data; 210 + struct skcipher_request *req = data; 211 211 212 212 if (!err) { 213 213 struct lrw_request_ctx *rctx = skcipher_request_ctx(req);
+2 -2
crypto/pcrypt.c
··· 63 63 aead_request_complete(req->base.data, padata->info); 64 64 } 65 65 66 - static void pcrypt_aead_done(struct crypto_async_request *areq, int err) 66 + static void pcrypt_aead_done(void *data, int err) 67 67 { 68 - struct aead_request *req = areq->data; 68 + struct aead_request *req = data; 69 69 struct pcrypt_request *preq = aead_request_ctx(req); 70 70 struct padata_priv *padata = pcrypt_request_padata(preq); 71 71
+6
crypto/proc.c
··· 11 11 #include <linux/atomic.h> 12 12 #include <linux/init.h> 13 13 #include <linux/crypto.h> 14 + #include <linux/fips.h> 14 15 #include <linux/module.h> /* for module_name() */ 15 16 #include <linux/rwsem.h> 16 17 #include <linux/proc_fs.h> ··· 49 48 seq_printf(m, "internal : %s\n", 50 49 (alg->cra_flags & CRYPTO_ALG_INTERNAL) ? 51 50 "yes" : "no"); 51 + if (fips_enabled) { 52 + seq_printf(m, "fips : %s\n", 53 + (alg->cra_flags & CRYPTO_ALG_FIPS_INTERNAL) ? 54 + "no" : "yes"); 55 + } 52 56 53 57 if (alg->cra_flags & CRYPTO_ALG_LARVAL) { 54 58 seq_printf(m, "type : larval\n");
+22 -29
crypto/rsa-pkcs1pad.c
··· 190 190 if (likely(!pad_len)) 191 191 goto out; 192 192 193 - out_buf = kzalloc(ctx->key_size, GFP_KERNEL); 193 + out_buf = kzalloc(ctx->key_size, GFP_ATOMIC); 194 194 err = -ENOMEM; 195 195 if (!out_buf) 196 196 goto out; ··· 210 210 return err; 211 211 } 212 212 213 - static void pkcs1pad_encrypt_sign_complete_cb( 214 - struct crypto_async_request *child_async_req, int err) 213 + static void pkcs1pad_encrypt_sign_complete_cb(void *data, int err) 215 214 { 216 - struct akcipher_request *req = child_async_req->data; 217 - struct crypto_async_request async_req; 215 + struct akcipher_request *req = data; 218 216 219 217 if (err == -EINPROGRESS) 220 - return; 218 + goto out; 221 219 222 - async_req.data = req->base.data; 223 - async_req.tfm = crypto_akcipher_tfm(crypto_akcipher_reqtfm(req)); 224 - async_req.flags = child_async_req->flags; 225 - req->base.complete(&async_req, 226 - pkcs1pad_encrypt_sign_complete(req, err)); 220 + err = pkcs1pad_encrypt_sign_complete(req, err); 221 + 222 + out: 223 + akcipher_request_complete(req, err); 227 224 } 228 225 229 226 static int pkcs1pad_encrypt(struct akcipher_request *req) ··· 325 328 return err; 326 329 } 327 330 328 - static void pkcs1pad_decrypt_complete_cb( 329 - struct crypto_async_request *child_async_req, int err) 331 + static void pkcs1pad_decrypt_complete_cb(void *data, int err) 330 332 { 331 - struct akcipher_request *req = child_async_req->data; 332 - struct crypto_async_request async_req; 333 + struct akcipher_request *req = data; 333 334 334 335 if (err == -EINPROGRESS) 335 - return; 336 + goto out; 336 337 337 - async_req.data = req->base.data; 338 - async_req.tfm = crypto_akcipher_tfm(crypto_akcipher_reqtfm(req)); 339 - async_req.flags = child_async_req->flags; 340 - req->base.complete(&async_req, pkcs1pad_decrypt_complete(req, err)); 338 + err = pkcs1pad_decrypt_complete(req, err); 339 + 340 + out: 341 + akcipher_request_complete(req, err); 341 342 } 342 343 343 344 static int pkcs1pad_decrypt(struct akcipher_request *req) ··· 504 509 return err; 505 510 } 506 511 507 - static void pkcs1pad_verify_complete_cb( 508 - struct crypto_async_request *child_async_req, int err) 512 + static void pkcs1pad_verify_complete_cb(void *data, int err) 509 513 { 510 - struct akcipher_request *req = child_async_req->data; 511 - struct crypto_async_request async_req; 514 + struct akcipher_request *req = data; 512 515 513 516 if (err == -EINPROGRESS) 514 - return; 517 + goto out; 515 518 516 - async_req.data = req->base.data; 517 - async_req.tfm = crypto_akcipher_tfm(crypto_akcipher_reqtfm(req)); 518 - async_req.flags = child_async_req->flags; 519 - req->base.complete(&async_req, pkcs1pad_verify_complete(req, err)); 519 + err = pkcs1pad_verify_complete(req, err); 520 + 521 + out: 522 + akcipher_request_complete(req, err); 520 523 } 521 524 522 525 /*
+3 -4
crypto/seqiv.c
··· 23 23 struct aead_request *subreq = aead_request_ctx(req); 24 24 struct crypto_aead *geniv; 25 25 26 - if (err == -EINPROGRESS) 26 + if (err == -EINPROGRESS || err == -EBUSY) 27 27 return; 28 28 29 29 if (err) ··· 36 36 kfree_sensitive(subreq->iv); 37 37 } 38 38 39 - static void seqiv_aead_encrypt_complete(struct crypto_async_request *base, 40 - int err) 39 + static void seqiv_aead_encrypt_complete(void *data, int err) 41 40 { 42 - struct aead_request *req = base->data; 41 + struct aead_request *req = data; 43 42 44 43 seqiv_aead_encrypt_complete2(req, err); 45 44 aead_request_complete(req, err);
+2 -2
crypto/shash.c
··· 320 320 nbytes <= min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset))) { 321 321 void *data; 322 322 323 - data = kmap_atomic(sg_page(sg)); 323 + data = kmap_local_page(sg_page(sg)); 324 324 err = crypto_shash_digest(desc, data + offset, nbytes, 325 325 req->result); 326 - kunmap_atomic(data); 326 + kunmap_local(data); 327 327 } else 328 328 err = crypto_shash_init(desc) ?: 329 329 shash_ahash_finup(req, desc);
+4 -18
crypto/skcipher.c
··· 42 42 43 43 static int skcipher_walk_next(struct skcipher_walk *walk); 44 44 45 - static inline void skcipher_unmap(struct scatter_walk *walk, void *vaddr) 46 - { 47 - if (PageHighMem(scatterwalk_page(walk))) 48 - kunmap_atomic(vaddr); 49 - } 50 - 51 - static inline void *skcipher_map(struct scatter_walk *walk) 52 - { 53 - struct page *page = scatterwalk_page(walk); 54 - 55 - return (PageHighMem(page) ? kmap_atomic(page) : page_address(page)) + 56 - offset_in_page(walk->offset); 57 - } 58 - 59 45 static inline void skcipher_map_src(struct skcipher_walk *walk) 60 46 { 61 - walk->src.virt.addr = skcipher_map(&walk->in); 47 + walk->src.virt.addr = scatterwalk_map(&walk->in); 62 48 } 63 49 64 50 static inline void skcipher_map_dst(struct skcipher_walk *walk) 65 51 { 66 - walk->dst.virt.addr = skcipher_map(&walk->out); 52 + walk->dst.virt.addr = scatterwalk_map(&walk->out); 67 53 } 68 54 69 55 static inline void skcipher_unmap_src(struct skcipher_walk *walk) 70 56 { 71 - skcipher_unmap(&walk->in, walk->src.virt.addr); 57 + scatterwalk_unmap(walk->src.virt.addr); 72 58 } 73 59 74 60 static inline void skcipher_unmap_dst(struct skcipher_walk *walk) 75 61 { 76 - skcipher_unmap(&walk->out, walk->dst.virt.addr); 62 + scatterwalk_unmap(walk->dst.virt.addr); 77 63 } 78 64 79 65 static inline gfp_t skcipher_walk_gfp(struct skcipher_walk *walk)
+4 -4
crypto/tcrypt.c
··· 2044 2044 2045 2045 case 211: 2046 2046 test_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec, 2047 - NULL, 0, 16, 16, aead_speed_template_20); 2047 + NULL, 0, 16, 16, aead_speed_template_20_28_36); 2048 2048 test_aead_speed("gcm(aes)", ENCRYPT, sec, 2049 2049 NULL, 0, 16, 8, speed_template_16_24_32); 2050 2050 test_aead_speed("rfc4106(gcm(aes))", DECRYPT, sec, 2051 - NULL, 0, 16, 16, aead_speed_template_20); 2051 + NULL, 0, 16, 16, aead_speed_template_20_28_36); 2052 2052 test_aead_speed("gcm(aes)", DECRYPT, sec, 2053 2053 NULL, 0, 16, 8, speed_template_16_24_32); 2054 2054 break; ··· 2074 2074 2075 2075 case 215: 2076 2076 test_mb_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec, NULL, 2077 - 0, 16, 16, aead_speed_template_20, num_mb); 2077 + 0, 16, 16, aead_speed_template_20_28_36, num_mb); 2078 2078 test_mb_aead_speed("gcm(aes)", ENCRYPT, sec, NULL, 0, 16, 8, 2079 2079 speed_template_16_24_32, num_mb); 2080 2080 test_mb_aead_speed("rfc4106(gcm(aes))", DECRYPT, sec, NULL, 2081 - 0, 16, 16, aead_speed_template_20, num_mb); 2081 + 0, 16, 16, aead_speed_template_20_28_36, num_mb); 2082 2082 test_mb_aead_speed("gcm(aes)", DECRYPT, sec, NULL, 0, 16, 8, 2083 2083 speed_template_16_24_32, num_mb); 2084 2084 break;
+1 -1
crypto/tcrypt.h
··· 62 62 * AEAD speed tests 63 63 */ 64 64 static u8 aead_speed_template_19[] = {19, 0}; 65 - static u8 aead_speed_template_20[] = {20, 0}; 65 + static u8 aead_speed_template_20_28_36[] = {20, 28, 36, 0}; 66 66 static u8 aead_speed_template_36[] = {36, 0}; 67 67 68 68 /*
+10 -6
crypto/testmgr.c
··· 357 357 { .proportion_of_total = 5000 }, 358 358 }, 359 359 }, { 360 + .name = "one src, two even splits dst", 361 + .inplace_mode = OUT_OF_PLACE, 362 + .src_divs = { { .proportion_of_total = 10000 } }, 363 + .dst_divs = { 364 + { .proportion_of_total = 5000 }, 365 + { .proportion_of_total = 5000 }, 366 + }, 367 + }, { 360 368 .name = "uneven misaligned splits, may sleep", 361 369 .req_flags = CRYPTO_TFM_REQ_MAY_SLEEP, 362 370 .src_divs = { ··· 4509 4501 }, { 4510 4502 #endif 4511 4503 .alg = "cbcmac(aes)", 4512 - .fips_allowed = 1, 4513 4504 .test = alg_test_hash, 4514 4505 .suite = { 4515 4506 .hash = __VECS(aes_cbcmac_tv_template) ··· 4789 4782 }, { 4790 4783 /* covered by drbg_nopr_hmac_sha256 test */ 4791 4784 .alg = "drbg_nopr_hmac_sha384", 4792 - .fips_allowed = 1, 4793 4785 .test = alg_test_null, 4794 4786 }, { 4795 4787 .alg = "drbg_nopr_hmac_sha512", ··· 4811 4805 }, { 4812 4806 /* covered by drbg_nopr_sha256 test */ 4813 4807 .alg = "drbg_nopr_sha384", 4814 - .fips_allowed = 1, 4815 4808 .test = alg_test_null, 4816 4809 }, { 4817 4810 .alg = "drbg_nopr_sha512", ··· 4846 4841 }, { 4847 4842 /* covered by drbg_pr_hmac_sha256 test */ 4848 4843 .alg = "drbg_pr_hmac_sha384", 4849 - .fips_allowed = 1, 4850 4844 .test = alg_test_null, 4851 4845 }, { 4852 4846 .alg = "drbg_pr_hmac_sha512", ··· 4865 4861 }, { 4866 4862 /* covered by drbg_pr_sha256 test */ 4867 4863 .alg = "drbg_pr_sha384", 4868 - .fips_allowed = 1, 4869 4864 .test = alg_test_null, 4870 4865 }, { 4871 4866 .alg = "drbg_pr_sha512", ··· 5038 5035 }, { 5039 5036 .alg = "ecdsa-nist-p256", 5040 5037 .test = alg_test_akcipher, 5038 + .fips_allowed = 1, 5041 5039 .suite = { 5042 5040 .akcipher = __VECS(ecdsa_nist_p256_tv_template) 5043 5041 } 5044 5042 }, { 5045 5043 .alg = "ecdsa-nist-p384", 5046 5044 .test = alg_test_akcipher, 5045 + .fips_allowed = 1, 5047 5046 .suite = { 5048 5047 .akcipher = __VECS(ecdsa_nist_p384_tv_template) 5049 5048 } ··· 5131 5126 }, { 5132 5127 .alg = "ghash", 5133 5128 .test = alg_test_hash, 5134 - .fips_allowed = 1, 5135 5129 .suite = { 5136 5130 .hash = __VECS(ghash_tv_template) 5137 5131 }
+1 -1
crypto/wp512.c
··· 779 779 * The core Whirlpool transform. 780 780 */ 781 781 782 - static void wp512_process_buffer(struct wp512_ctx *wctx) { 782 + static __no_kmsan_checks void wp512_process_buffer(struct wp512_ctx *wctx) { 783 783 int i, r; 784 784 u64 K[8]; /* the round key */ 785 785 u64 block[8]; /* mu(buffer) */
+10 -10
crypto/xts.c
··· 140 140 return xts_xor_tweak(req, true, enc); 141 141 } 142 142 143 - static void xts_cts_done(struct crypto_async_request *areq, int err) 143 + static void xts_cts_done(void *data, int err) 144 144 { 145 - struct skcipher_request *req = areq->data; 145 + struct skcipher_request *req = data; 146 146 le128 b; 147 147 148 148 if (!err) { ··· 196 196 return 0; 197 197 } 198 198 199 - static void xts_encrypt_done(struct crypto_async_request *areq, int err) 199 + static void xts_encrypt_done(void *data, int err) 200 200 { 201 - struct skcipher_request *req = areq->data; 201 + struct skcipher_request *req = data; 202 202 203 203 if (!err) { 204 204 struct xts_request_ctx *rctx = skcipher_request_ctx(req); 205 205 206 - rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 206 + rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; 207 207 err = xts_xor_tweak_post(req, true); 208 208 209 209 if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { 210 210 err = xts_cts_final(req, crypto_skcipher_encrypt); 211 - if (err == -EINPROGRESS) 211 + if (err == -EINPROGRESS || err == -EBUSY) 212 212 return; 213 213 } 214 214 } ··· 216 216 skcipher_request_complete(req, err); 217 217 } 218 218 219 - static void xts_decrypt_done(struct crypto_async_request *areq, int err) 219 + static void xts_decrypt_done(void *data, int err) 220 220 { 221 - struct skcipher_request *req = areq->data; 221 + struct skcipher_request *req = data; 222 222 223 223 if (!err) { 224 224 struct xts_request_ctx *rctx = skcipher_request_ctx(req); 225 225 226 - rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 226 + rctx->subreq.base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; 227 227 err = xts_xor_tweak_post(req, false); 228 228 229 229 if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { 230 230 err = xts_cts_final(req, crypto_skcipher_decrypt); 231 - if (err == -EINPROGRESS) 231 + if (err == -EINPROGRESS || err == -EBUSY) 232 232 return; 233 233 } 234 234 }
+10
drivers/char/hw_random/Kconfig
··· 549 549 To compile this driver as a module, choose M here. 550 550 The module will be called cn10k_rng. If unsure, say Y. 551 551 552 + config HW_RANDOM_JH7110 553 + tristate "StarFive JH7110 Random Number Generator support" 554 + depends on SOC_STARFIVE || COMPILE_TEST 555 + help 556 + This driver provides support for the True Random Number 557 + Generator in StarFive JH7110 SoCs. 558 + 559 + To compile this driver as a module, choose M here. 560 + The module will be called jh7110-trng. 561 + 552 562 endif # HW_RANDOM 553 563 554 564 config UML_RANDOM
+1
drivers/char/hw_random/Makefile
··· 47 47 obj-$(CONFIG_HW_RANDOM_ARM_SMCCC_TRNG) += arm_smccc_trng.o 48 48 obj-$(CONFIG_HW_RANDOM_CN10K) += cn10k-rng.o 49 49 obj-$(CONFIG_HW_RANDOM_POLARFIRE_SOC) += mpfs-rng.o 50 + obj-$(CONFIG_HW_RANDOM_JH7110) += jh7110-trng.o
+393
drivers/char/hw_random/jh7110-trng.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * TRNG driver for the StarFive JH7110 SoC 4 + * 5 + * Copyright (C) 2022 StarFive Technology Co. 6 + */ 7 + 8 + #include <linux/clk.h> 9 + #include <linux/completion.h> 10 + #include <linux/delay.h> 11 + #include <linux/err.h> 12 + #include <linux/hw_random.h> 13 + #include <linux/interrupt.h> 14 + #include <linux/io.h> 15 + #include <linux/iopoll.h> 16 + #include <linux/kernel.h> 17 + #include <linux/module.h> 18 + #include <linux/of.h> 19 + #include <linux/platform_device.h> 20 + #include <linux/pm_runtime.h> 21 + #include <linux/random.h> 22 + #include <linux/reset.h> 23 + 24 + /* trng register offset */ 25 + #define STARFIVE_CTRL 0x00 26 + #define STARFIVE_STAT 0x04 27 + #define STARFIVE_MODE 0x08 28 + #define STARFIVE_SMODE 0x0C 29 + #define STARFIVE_IE 0x10 30 + #define STARFIVE_ISTAT 0x14 31 + #define STARFIVE_RAND0 0x20 32 + #define STARFIVE_RAND1 0x24 33 + #define STARFIVE_RAND2 0x28 34 + #define STARFIVE_RAND3 0x2C 35 + #define STARFIVE_RAND4 0x30 36 + #define STARFIVE_RAND5 0x34 37 + #define STARFIVE_RAND6 0x38 38 + #define STARFIVE_RAND7 0x3C 39 + #define STARFIVE_AUTO_RQSTS 0x60 40 + #define STARFIVE_AUTO_AGE 0x64 41 + 42 + /* CTRL CMD */ 43 + #define STARFIVE_CTRL_EXEC_NOP 0x0 44 + #define STARFIVE_CTRL_GENE_RANDNUM 0x1 45 + #define STARFIVE_CTRL_EXEC_RANDRESEED 0x2 46 + 47 + /* STAT */ 48 + #define STARFIVE_STAT_NONCE_MODE BIT(2) 49 + #define STARFIVE_STAT_R256 BIT(3) 50 + #define STARFIVE_STAT_MISSION_MODE BIT(8) 51 + #define STARFIVE_STAT_SEEDED BIT(9) 52 + #define STARFIVE_STAT_LAST_RESEED(x) ((x) << 16) 53 + #define STARFIVE_STAT_SRVC_RQST BIT(27) 54 + #define STARFIVE_STAT_RAND_GENERATING BIT(30) 55 + #define STARFIVE_STAT_RAND_SEEDING BIT(31) 56 + 57 + /* MODE */ 58 + #define STARFIVE_MODE_R256 BIT(3) 59 + 60 + /* SMODE */ 61 + #define STARFIVE_SMODE_NONCE_MODE BIT(2) 62 + #define STARFIVE_SMODE_MISSION_MODE BIT(8) 63 + #define STARFIVE_SMODE_MAX_REJECTS(x) ((x) << 16) 64 + 65 + /* IE */ 66 + #define STARFIVE_IE_RAND_RDY_EN BIT(0) 67 + #define STARFIVE_IE_SEED_DONE_EN BIT(1) 68 + #define STARFIVE_IE_LFSR_LOCKUP_EN BIT(4) 69 + #define STARFIVE_IE_GLBL_EN BIT(31) 70 + 71 + #define STARFIVE_IE_ALL (STARFIVE_IE_GLBL_EN | \ 72 + STARFIVE_IE_RAND_RDY_EN | \ 73 + STARFIVE_IE_SEED_DONE_EN | \ 74 + STARFIVE_IE_LFSR_LOCKUP_EN) 75 + 76 + /* ISTAT */ 77 + #define STARFIVE_ISTAT_RAND_RDY BIT(0) 78 + #define STARFIVE_ISTAT_SEED_DONE BIT(1) 79 + #define STARFIVE_ISTAT_LFSR_LOCKUP BIT(4) 80 + 81 + #define STARFIVE_RAND_LEN sizeof(u32) 82 + 83 + #define to_trng(p) container_of(p, struct starfive_trng, rng) 84 + 85 + enum reseed { 86 + RANDOM_RESEED, 87 + NONCE_RESEED, 88 + }; 89 + 90 + enum mode { 91 + PRNG_128BIT, 92 + PRNG_256BIT, 93 + }; 94 + 95 + struct starfive_trng { 96 + struct device *dev; 97 + void __iomem *base; 98 + struct clk *hclk; 99 + struct clk *ahb; 100 + struct reset_control *rst; 101 + struct hwrng rng; 102 + struct completion random_done; 103 + struct completion reseed_done; 104 + u32 mode; 105 + u32 mission; 106 + u32 reseed; 107 + /* protects against concurrent write to ctrl register */ 108 + spinlock_t write_lock; 109 + }; 110 + 111 + static u16 autoreq; 112 + module_param(autoreq, ushort, 0); 113 + MODULE_PARM_DESC(autoreq, "Auto-reseeding after random number requests by host reaches specified counter:\n" 114 + " 0 - disable counter\n" 115 + " other - reload value for internal counter"); 116 + 117 + static u16 autoage; 118 + module_param(autoage, ushort, 0); 119 + MODULE_PARM_DESC(autoage, "Auto-reseeding after specified timer countdowns to 0:\n" 120 + " 0 - disable timer\n" 121 + " other - reload value for internal timer"); 122 + 123 + static inline int starfive_trng_wait_idle(struct starfive_trng *trng) 124 + { 125 + u32 stat; 126 + 127 + return readl_relaxed_poll_timeout(trng->base + STARFIVE_STAT, stat, 128 + !(stat & (STARFIVE_STAT_RAND_GENERATING | 129 + STARFIVE_STAT_RAND_SEEDING)), 130 + 10, 100000); 131 + } 132 + 133 + static inline void starfive_trng_irq_mask_clear(struct starfive_trng *trng) 134 + { 135 + /* clear register: ISTAT */ 136 + u32 data = readl(trng->base + STARFIVE_ISTAT); 137 + 138 + writel(data, trng->base + STARFIVE_ISTAT); 139 + } 140 + 141 + static int starfive_trng_cmd(struct starfive_trng *trng, u32 cmd, bool wait) 142 + { 143 + int wait_time = 1000; 144 + 145 + /* allow up to 40 us for wait == 0 */ 146 + if (!wait) 147 + wait_time = 40; 148 + 149 + switch (cmd) { 150 + case STARFIVE_CTRL_GENE_RANDNUM: 151 + reinit_completion(&trng->random_done); 152 + spin_lock_irq(&trng->write_lock); 153 + writel(cmd, trng->base + STARFIVE_CTRL); 154 + spin_unlock_irq(&trng->write_lock); 155 + if (!wait_for_completion_timeout(&trng->random_done, usecs_to_jiffies(wait_time))) 156 + return -ETIMEDOUT; 157 + break; 158 + case STARFIVE_CTRL_EXEC_RANDRESEED: 159 + reinit_completion(&trng->reseed_done); 160 + spin_lock_irq(&trng->write_lock); 161 + writel(cmd, trng->base + STARFIVE_CTRL); 162 + spin_unlock_irq(&trng->write_lock); 163 + if (!wait_for_completion_timeout(&trng->reseed_done, usecs_to_jiffies(wait_time))) 164 + return -ETIMEDOUT; 165 + break; 166 + default: 167 + return -EINVAL; 168 + } 169 + 170 + return 0; 171 + } 172 + 173 + static int starfive_trng_init(struct hwrng *rng) 174 + { 175 + struct starfive_trng *trng = to_trng(rng); 176 + u32 mode, intr = 0; 177 + 178 + /* setup Auto Request/Age register */ 179 + writel(autoage, trng->base + STARFIVE_AUTO_AGE); 180 + writel(autoreq, trng->base + STARFIVE_AUTO_RQSTS); 181 + 182 + /* clear register: ISTAT */ 183 + starfive_trng_irq_mask_clear(trng); 184 + 185 + intr |= STARFIVE_IE_ALL; 186 + writel(intr, trng->base + STARFIVE_IE); 187 + 188 + mode = readl(trng->base + STARFIVE_MODE); 189 + 190 + switch (trng->mode) { 191 + case PRNG_128BIT: 192 + mode &= ~STARFIVE_MODE_R256; 193 + break; 194 + case PRNG_256BIT: 195 + mode |= STARFIVE_MODE_R256; 196 + break; 197 + default: 198 + mode |= STARFIVE_MODE_R256; 199 + break; 200 + } 201 + 202 + writel(mode, trng->base + STARFIVE_MODE); 203 + 204 + return starfive_trng_cmd(trng, STARFIVE_CTRL_EXEC_RANDRESEED, 1); 205 + } 206 + 207 + static irqreturn_t starfive_trng_irq(int irq, void *priv) 208 + { 209 + u32 status; 210 + struct starfive_trng *trng = (struct starfive_trng *)priv; 211 + 212 + status = readl(trng->base + STARFIVE_ISTAT); 213 + if (status & STARFIVE_ISTAT_RAND_RDY) { 214 + writel(STARFIVE_ISTAT_RAND_RDY, trng->base + STARFIVE_ISTAT); 215 + complete(&trng->random_done); 216 + } 217 + 218 + if (status & STARFIVE_ISTAT_SEED_DONE) { 219 + writel(STARFIVE_ISTAT_SEED_DONE, trng->base + STARFIVE_ISTAT); 220 + complete(&trng->reseed_done); 221 + } 222 + 223 + if (status & STARFIVE_ISTAT_LFSR_LOCKUP) { 224 + writel(STARFIVE_ISTAT_LFSR_LOCKUP, trng->base + STARFIVE_ISTAT); 225 + /* SEU occurred, reseeding required*/ 226 + spin_lock(&trng->write_lock); 227 + writel(STARFIVE_CTRL_EXEC_RANDRESEED, trng->base + STARFIVE_CTRL); 228 + spin_unlock(&trng->write_lock); 229 + } 230 + 231 + return IRQ_HANDLED; 232 + } 233 + 234 + static void starfive_trng_cleanup(struct hwrng *rng) 235 + { 236 + struct starfive_trng *trng = to_trng(rng); 237 + 238 + writel(0, trng->base + STARFIVE_CTRL); 239 + 240 + reset_control_assert(trng->rst); 241 + clk_disable_unprepare(trng->hclk); 242 + clk_disable_unprepare(trng->ahb); 243 + } 244 + 245 + static int starfive_trng_read(struct hwrng *rng, void *buf, size_t max, bool wait) 246 + { 247 + struct starfive_trng *trng = to_trng(rng); 248 + int ret; 249 + 250 + pm_runtime_get_sync(trng->dev); 251 + 252 + if (trng->mode == PRNG_256BIT) 253 + max = min_t(size_t, max, (STARFIVE_RAND_LEN * 8)); 254 + else 255 + max = min_t(size_t, max, (STARFIVE_RAND_LEN * 4)); 256 + 257 + if (wait) { 258 + ret = starfive_trng_wait_idle(trng); 259 + if (ret) 260 + return -ETIMEDOUT; 261 + } 262 + 263 + ret = starfive_trng_cmd(trng, STARFIVE_CTRL_GENE_RANDNUM, wait); 264 + if (ret) 265 + return ret; 266 + 267 + memcpy_fromio(buf, trng->base + STARFIVE_RAND0, max); 268 + 269 + pm_runtime_put_sync_autosuspend(trng->dev); 270 + 271 + return max; 272 + } 273 + 274 + static int starfive_trng_probe(struct platform_device *pdev) 275 + { 276 + int ret; 277 + int irq; 278 + struct starfive_trng *trng; 279 + 280 + trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL); 281 + if (!trng) 282 + return -ENOMEM; 283 + 284 + platform_set_drvdata(pdev, trng); 285 + trng->dev = &pdev->dev; 286 + 287 + trng->base = devm_platform_ioremap_resource(pdev, 0); 288 + if (IS_ERR(trng->base)) 289 + return dev_err_probe(&pdev->dev, PTR_ERR(trng->base), 290 + "Error remapping memory for platform device.\n"); 291 + 292 + irq = platform_get_irq(pdev, 0); 293 + if (irq < 0) 294 + return irq; 295 + 296 + init_completion(&trng->random_done); 297 + init_completion(&trng->reseed_done); 298 + spin_lock_init(&trng->write_lock); 299 + 300 + ret = devm_request_irq(&pdev->dev, irq, starfive_trng_irq, 0, pdev->name, 301 + (void *)trng); 302 + if (ret) 303 + return dev_err_probe(&pdev->dev, irq, 304 + "Failed to register interrupt handler\n"); 305 + 306 + trng->hclk = devm_clk_get(&pdev->dev, "hclk"); 307 + if (IS_ERR(trng->hclk)) 308 + return dev_err_probe(&pdev->dev, PTR_ERR(trng->hclk), 309 + "Error getting hardware reference clock\n"); 310 + 311 + trng->ahb = devm_clk_get(&pdev->dev, "ahb"); 312 + if (IS_ERR(trng->ahb)) 313 + return dev_err_probe(&pdev->dev, PTR_ERR(trng->ahb), 314 + "Error getting ahb reference clock\n"); 315 + 316 + trng->rst = devm_reset_control_get_shared(&pdev->dev, NULL); 317 + if (IS_ERR(trng->rst)) 318 + return dev_err_probe(&pdev->dev, PTR_ERR(trng->rst), 319 + "Error getting hardware reset line\n"); 320 + 321 + clk_prepare_enable(trng->hclk); 322 + clk_prepare_enable(trng->ahb); 323 + reset_control_deassert(trng->rst); 324 + 325 + trng->rng.name = dev_driver_string(&pdev->dev); 326 + trng->rng.init = starfive_trng_init; 327 + trng->rng.cleanup = starfive_trng_cleanup; 328 + trng->rng.read = starfive_trng_read; 329 + 330 + trng->mode = PRNG_256BIT; 331 + trng->mission = 1; 332 + trng->reseed = RANDOM_RESEED; 333 + 334 + pm_runtime_use_autosuspend(&pdev->dev); 335 + pm_runtime_set_autosuspend_delay(&pdev->dev, 100); 336 + pm_runtime_enable(&pdev->dev); 337 + 338 + ret = devm_hwrng_register(&pdev->dev, &trng->rng); 339 + if (ret) { 340 + pm_runtime_disable(&pdev->dev); 341 + 342 + reset_control_assert(trng->rst); 343 + clk_disable_unprepare(trng->ahb); 344 + clk_disable_unprepare(trng->hclk); 345 + 346 + return dev_err_probe(&pdev->dev, ret, "Failed to register hwrng\n"); 347 + } 348 + 349 + return 0; 350 + } 351 + 352 + static int __maybe_unused starfive_trng_suspend(struct device *dev) 353 + { 354 + struct starfive_trng *trng = dev_get_drvdata(dev); 355 + 356 + clk_disable_unprepare(trng->hclk); 357 + clk_disable_unprepare(trng->ahb); 358 + 359 + return 0; 360 + } 361 + 362 + static int __maybe_unused starfive_trng_resume(struct device *dev) 363 + { 364 + struct starfive_trng *trng = dev_get_drvdata(dev); 365 + 366 + clk_prepare_enable(trng->hclk); 367 + clk_prepare_enable(trng->ahb); 368 + 369 + return 0; 370 + } 371 + 372 + static DEFINE_SIMPLE_DEV_PM_OPS(starfive_trng_pm_ops, starfive_trng_suspend, 373 + starfive_trng_resume); 374 + 375 + static const struct of_device_id trng_dt_ids[] __maybe_unused = { 376 + { .compatible = "starfive,jh7110-trng" }, 377 + { } 378 + }; 379 + MODULE_DEVICE_TABLE(of, trng_dt_ids); 380 + 381 + static struct platform_driver starfive_trng_driver = { 382 + .probe = starfive_trng_probe, 383 + .driver = { 384 + .name = "jh7110-trng", 385 + .pm = &starfive_trng_pm_ops, 386 + .of_match_table = of_match_ptr(trng_dt_ids), 387 + }, 388 + }; 389 + 390 + module_platform_driver(starfive_trng_driver); 391 + 392 + MODULE_LICENSE("GPL"); 393 + MODULE_DESCRIPTION("StarFive True Random Number Generator");
-10
drivers/crypto/Kconfig
··· 390 390 source "drivers/crypto/nx/Kconfig" 391 391 endif 392 392 393 - config CRYPTO_DEV_UX500 394 - tristate "Driver for ST-Ericsson UX500 crypto hardware acceleration" 395 - depends on ARCH_U8500 396 - help 397 - Driver for ST-Ericsson UX500 crypto engine. 398 - 399 - if CRYPTO_DEV_UX500 400 - source "drivers/crypto/ux500/Kconfig" 401 - endif # if CRYPTO_DEV_UX500 402 - 403 393 config CRYPTO_DEV_ATMEL_AUTHENC 404 394 bool "Support for Atmel IPSEC/SSL hw accelerator" 405 395 depends on ARCH_AT91 || COMPILE_TEST
-1
drivers/crypto/Makefile
··· 43 43 obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/ 44 44 obj-y += stm32/ 45 45 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o 46 - obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ 47 46 obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/ 48 47 obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ 49 48 obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
+1
drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
··· 118 118 { "bus", 0, 200000000 }, 119 119 { "mod", 300000000, 0 }, 120 120 { "ram", 0, 400000000 }, 121 + { "trng", 0, 0 }, 121 122 }, 122 123 .esr = ESR_D1, 123 124 .prng = CE_ALG_PRNG,
+1 -1
drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h
··· 105 105 106 106 #define MAX_SG 8 107 107 108 - #define CE_MAX_CLOCKS 3 108 + #define CE_MAX_CLOCKS 4 109 109 110 110 #define MAXFLOW 4 111 111
+2 -2
drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
··· 452 452 } 453 453 kfree_sensitive(op->key); 454 454 op->keylen = keylen; 455 - op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA); 455 + op->key = kmemdup(key, keylen, GFP_KERNEL); 456 456 if (!op->key) 457 457 return -ENOMEM; 458 458 ··· 475 475 476 476 kfree_sensitive(op->key); 477 477 op->keylen = keylen; 478 - op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA); 478 + op->key = kmemdup(key, keylen, GFP_KERNEL); 479 479 if (!op->key) 480 480 return -ENOMEM; 481 481
+8 -5
drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
··· 16 16 #include <linux/interrupt.h> 17 17 #include <linux/io.h> 18 18 #include <linux/irq.h> 19 + #include <linux/kernel.h> 19 20 #include <linux/module.h> 20 21 #include <linux/of.h> 21 22 #include <linux/of_device.h> ··· 528 527 init_completion(&ss->flows[i].complete); 529 528 530 529 ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE, 531 - GFP_KERNEL | GFP_DMA); 530 + GFP_KERNEL); 532 531 if (!ss->flows[i].biv) { 533 532 err = -ENOMEM; 534 533 goto error_engine; ··· 536 535 537 536 for (j = 0; j < MAX_SG; j++) { 538 537 ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE, 539 - GFP_KERNEL | GFP_DMA); 538 + GFP_KERNEL); 540 539 if (!ss->flows[i].iv[j]) { 541 540 err = -ENOMEM; 542 541 goto error_engine; ··· 545 544 546 545 /* the padding could be up to two block. */ 547 546 ss->flows[i].pad = devm_kmalloc(ss->dev, MAX_PAD_SIZE, 548 - GFP_KERNEL | GFP_DMA); 547 + GFP_KERNEL); 549 548 if (!ss->flows[i].pad) { 550 549 err = -ENOMEM; 551 550 goto error_engine; 552 551 } 553 - ss->flows[i].result = devm_kmalloc(ss->dev, SHA256_DIGEST_SIZE, 554 - GFP_KERNEL | GFP_DMA); 552 + ss->flows[i].result = 553 + devm_kmalloc(ss->dev, max(SHA256_DIGEST_SIZE, 554 + dma_get_cache_alignment()), 555 + GFP_KERNEL); 555 556 if (!ss->flows[i].result) { 556 557 err = -ENOMEM; 557 558 goto error_engine;
+2 -2
drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
··· 79 79 memcpy(tfmctx->key, key, keylen); 80 80 } 81 81 82 - tfmctx->ipad = kzalloc(bs, GFP_KERNEL | GFP_DMA); 82 + tfmctx->ipad = kzalloc(bs, GFP_KERNEL); 83 83 if (!tfmctx->ipad) 84 84 return -ENOMEM; 85 - tfmctx->opad = kzalloc(bs, GFP_KERNEL | GFP_DMA); 85 + tfmctx->opad = kzalloc(bs, GFP_KERNEL); 86 86 if (!tfmctx->opad) { 87 87 ret = -ENOMEM; 88 88 goto err_opad;
+9 -2
drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c
··· 11 11 */ 12 12 #include "sun8i-ss.h" 13 13 #include <linux/dma-mapping.h> 14 + #include <linux/kernel.h> 15 + #include <linux/mm.h> 14 16 #include <linux/pm_runtime.h> 15 17 #include <crypto/internal/rng.h> 16 18 ··· 27 25 ctx->seed = NULL; 28 26 } 29 27 if (!ctx->seed) 30 - ctx->seed = kmalloc(slen, GFP_KERNEL | GFP_DMA); 28 + ctx->seed = kmalloc(slen, GFP_KERNEL); 31 29 if (!ctx->seed) 32 30 return -ENOMEM; 33 31 ··· 60 58 struct sun8i_ss_rng_tfm_ctx *ctx = crypto_rng_ctx(tfm); 61 59 struct rng_alg *alg = crypto_rng_alg(tfm); 62 60 struct sun8i_ss_alg_template *algt; 61 + unsigned int todo_with_padding; 63 62 struct sun8i_ss_dev *ss; 64 63 dma_addr_t dma_iv, dma_dst; 65 64 unsigned int todo; ··· 84 81 todo = dlen + PRNG_SEED_SIZE + PRNG_DATA_SIZE; 85 82 todo -= todo % PRNG_DATA_SIZE; 86 83 87 - d = kzalloc(todo, GFP_KERNEL | GFP_DMA); 84 + todo_with_padding = ALIGN(todo, dma_get_cache_alignment()); 85 + if (todo_with_padding < todo || todo < dlen) 86 + return -EOVERFLOW; 87 + 88 + d = kzalloc(todo_with_padding, GFP_KERNEL); 88 89 if (!d) 89 90 return -ENOMEM; 90 91
+4 -6
drivers/crypto/amcc/crypto4xx_core.c
··· 522 522 { 523 523 struct skcipher_request *req; 524 524 struct scatterlist *dst; 525 - dma_addr_t addr; 526 525 527 526 req = skcipher_request_cast(pd_uinfo->async_req); 528 527 ··· 530 531 req->cryptlen, req->dst); 531 532 } else { 532 533 dst = pd_uinfo->dest_va; 533 - addr = dma_map_page(dev->core_dev->device, sg_page(dst), 534 - dst->offset, dst->length, DMA_FROM_DEVICE); 534 + dma_unmap_page(dev->core_dev->device, pd->dest, dst->length, 535 + DMA_FROM_DEVICE); 535 536 } 536 537 537 538 if (pd_uinfo->sa_va->sa_command_0.bf.save_iv == SA_SAVE_IV) { ··· 556 557 struct ahash_request *ahash_req; 557 558 558 559 ahash_req = ahash_request_cast(pd_uinfo->async_req); 559 - ctx = crypto_tfm_ctx(ahash_req->base.tfm); 560 + ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(ahash_req)); 560 561 561 - crypto4xx_copy_digest_to_dst(ahash_req->result, pd_uinfo, 562 - crypto_tfm_ctx(ahash_req->base.tfm)); 562 + crypto4xx_copy_digest_to_dst(ahash_req->result, pd_uinfo, ctx); 563 563 crypto4xx_ret_sg_desc(dev, pd_uinfo); 564 564 565 565 if (pd_uinfo->state & PD_ENTRY_BUSY)
+11
drivers/crypto/aspeed/Kconfig
··· 46 46 crypto driver. 47 47 Supports AES/DES symmetric-key encryption and decryption 48 48 with ECB/CBC/CFB/OFB/CTR options. 49 + 50 + config CRYPTO_DEV_ASPEED_ACRY 51 + bool "Enable Aspeed ACRY RSA Engine" 52 + depends on CRYPTO_DEV_ASPEED 53 + select CRYPTO_ENGINE 54 + select CRYPTO_RSA 55 + help 56 + Select here to enable Aspeed ECC/RSA Engine (ACRY) 57 + RSA driver. 58 + Supports 256 bits to 4096 bits RSA encryption/decryption 59 + and signature/verification.
+4
drivers/crypto/aspeed/Makefile
··· 5 5 aspeed_crypto-objs := aspeed-hace.o \ 6 6 $(hace-hash-y) \ 7 7 $(hace-crypto-y) 8 + 9 + aspeed_acry-$(CONFIG_CRYPTO_DEV_ASPEED_ACRY) += aspeed-acry.o 10 + 11 + obj-$(CONFIG_CRYPTO_DEV_ASPEED) += $(aspeed_acry-y)
+828
drivers/crypto/aspeed/aspeed-acry.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* 3 + * Copyright 2021 Aspeed Technology Inc. 4 + */ 5 + #include <crypto/akcipher.h> 6 + #include <crypto/algapi.h> 7 + #include <crypto/engine.h> 8 + #include <crypto/internal/akcipher.h> 9 + #include <crypto/internal/rsa.h> 10 + #include <crypto/scatterwalk.h> 11 + #include <linux/clk.h> 12 + #include <linux/platform_device.h> 13 + #include <linux/module.h> 14 + #include <linux/of_address.h> 15 + #include <linux/of_irq.h> 16 + #include <linux/of.h> 17 + #include <linux/of_device.h> 18 + #include <linux/mfd/syscon.h> 19 + #include <linux/interrupt.h> 20 + #include <linux/count_zeros.h> 21 + #include <linux/err.h> 22 + #include <linux/dma-mapping.h> 23 + #include <linux/regmap.h> 24 + 25 + #ifdef CONFIG_CRYPTO_DEV_ASPEED_DEBUG 26 + #define ACRY_DBG(d, fmt, ...) \ 27 + dev_info((d)->dev, "%s() " fmt, __func__, ##__VA_ARGS__) 28 + #else 29 + #define ACRY_DBG(d, fmt, ...) \ 30 + dev_dbg((d)->dev, "%s() " fmt, __func__, ##__VA_ARGS__) 31 + #endif 32 + 33 + /***************************** 34 + * * 35 + * ACRY register definitions * 36 + * * 37 + * ***************************/ 38 + #define ASPEED_ACRY_TRIGGER 0x000 /* ACRY Engine Control: trigger */ 39 + #define ASPEED_ACRY_DMA_CMD 0x048 /* ACRY Engine Control: Command */ 40 + #define ASPEED_ACRY_DMA_SRC_BASE 0x04C /* ACRY DRAM base address for DMA */ 41 + #define ASPEED_ACRY_DMA_LEN 0x050 /* ACRY Data Length of DMA */ 42 + #define ASPEED_ACRY_RSA_KEY_LEN 0x058 /* ACRY RSA Exp/Mod Key Length (Bits) */ 43 + #define ASPEED_ACRY_INT_MASK 0x3F8 /* ACRY Interrupt Mask */ 44 + #define ASPEED_ACRY_STATUS 0x3FC /* ACRY Interrupt Status */ 45 + 46 + /* rsa trigger */ 47 + #define ACRY_CMD_RSA_TRIGGER BIT(0) 48 + #define ACRY_CMD_DMA_RSA_TRIGGER BIT(1) 49 + 50 + /* rsa dma cmd */ 51 + #define ACRY_CMD_DMA_SRAM_MODE_RSA (0x3 << 4) 52 + #define ACRY_CMD_DMEM_AHB BIT(8) 53 + #define ACRY_CMD_DMA_SRAM_AHB_ENGINE 0 54 + 55 + /* rsa key len */ 56 + #define RSA_E_BITS_LEN(x) ((x) << 16) 57 + #define RSA_M_BITS_LEN(x) (x) 58 + 59 + /* acry isr */ 60 + #define ACRY_RSA_ISR BIT(1) 61 + 62 + #define ASPEED_ACRY_BUFF_SIZE 0x1800 /* DMA buffer size */ 63 + #define ASPEED_ACRY_SRAM_MAX_LEN 2048 /* ACRY SRAM maximum length (Bytes) */ 64 + #define ASPEED_ACRY_RSA_MAX_KEY_LEN 512 /* ACRY RSA maximum key length (Bytes) */ 65 + 66 + #define CRYPTO_FLAGS_BUSY BIT(1) 67 + #define BYTES_PER_DWORD 4 68 + 69 + /***************************** 70 + * * 71 + * AHBC register definitions * 72 + * * 73 + * ***************************/ 74 + #define AHBC_REGION_PROT 0x240 75 + #define REGION_ACRYM BIT(23) 76 + 77 + #define ast_acry_write(acry, val, offset) \ 78 + writel((val), (acry)->regs + (offset)) 79 + 80 + #define ast_acry_read(acry, offset) \ 81 + readl((acry)->regs + (offset)) 82 + 83 + struct aspeed_acry_dev; 84 + 85 + typedef int (*aspeed_acry_fn_t)(struct aspeed_acry_dev *); 86 + 87 + struct aspeed_acry_dev { 88 + void __iomem *regs; 89 + struct device *dev; 90 + int irq; 91 + struct clk *clk; 92 + struct regmap *ahbc; 93 + 94 + struct akcipher_request *req; 95 + struct tasklet_struct done_task; 96 + aspeed_acry_fn_t resume; 97 + unsigned long flags; 98 + 99 + /* ACRY output SRAM buffer */ 100 + void __iomem *acry_sram; 101 + 102 + /* ACRY input DMA buffer */ 103 + void *buf_addr; 104 + dma_addr_t buf_dma_addr; 105 + 106 + struct crypto_engine *crypt_engine_rsa; 107 + 108 + /* ACRY SRAM memory mapped */ 109 + int exp_dw_mapping[ASPEED_ACRY_RSA_MAX_KEY_LEN]; 110 + int mod_dw_mapping[ASPEED_ACRY_RSA_MAX_KEY_LEN]; 111 + int data_byte_mapping[ASPEED_ACRY_SRAM_MAX_LEN]; 112 + }; 113 + 114 + struct aspeed_acry_ctx { 115 + struct crypto_engine_ctx enginectx; 116 + struct aspeed_acry_dev *acry_dev; 117 + 118 + struct rsa_key key; 119 + int enc; 120 + u8 *n; 121 + u8 *e; 122 + u8 *d; 123 + size_t n_sz; 124 + size_t e_sz; 125 + size_t d_sz; 126 + 127 + aspeed_acry_fn_t trigger; 128 + 129 + struct crypto_akcipher *fallback_tfm; 130 + }; 131 + 132 + struct aspeed_acry_alg { 133 + struct aspeed_acry_dev *acry_dev; 134 + struct akcipher_alg akcipher; 135 + }; 136 + 137 + enum aspeed_rsa_key_mode { 138 + ASPEED_RSA_EXP_MODE = 0, 139 + ASPEED_RSA_MOD_MODE, 140 + ASPEED_RSA_DATA_MODE, 141 + }; 142 + 143 + static inline struct akcipher_request * 144 + akcipher_request_cast(struct crypto_async_request *req) 145 + { 146 + return container_of(req, struct akcipher_request, base); 147 + } 148 + 149 + static int aspeed_acry_do_fallback(struct akcipher_request *req) 150 + { 151 + struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req); 152 + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher); 153 + int err; 154 + 155 + akcipher_request_set_tfm(req, ctx->fallback_tfm); 156 + 157 + if (ctx->enc) 158 + err = crypto_akcipher_encrypt(req); 159 + else 160 + err = crypto_akcipher_decrypt(req); 161 + 162 + akcipher_request_set_tfm(req, cipher); 163 + 164 + return err; 165 + } 166 + 167 + static bool aspeed_acry_need_fallback(struct akcipher_request *req) 168 + { 169 + struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req); 170 + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher); 171 + 172 + return ctx->key.n_sz > ASPEED_ACRY_RSA_MAX_KEY_LEN; 173 + } 174 + 175 + static int aspeed_acry_handle_queue(struct aspeed_acry_dev *acry_dev, 176 + struct akcipher_request *req) 177 + { 178 + if (aspeed_acry_need_fallback(req)) { 179 + ACRY_DBG(acry_dev, "SW fallback\n"); 180 + return aspeed_acry_do_fallback(req); 181 + } 182 + 183 + return crypto_transfer_akcipher_request_to_engine(acry_dev->crypt_engine_rsa, req); 184 + } 185 + 186 + static int aspeed_acry_do_request(struct crypto_engine *engine, void *areq) 187 + { 188 + struct akcipher_request *req = akcipher_request_cast(areq); 189 + struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req); 190 + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher); 191 + struct aspeed_acry_dev *acry_dev = ctx->acry_dev; 192 + 193 + acry_dev->req = req; 194 + acry_dev->flags |= CRYPTO_FLAGS_BUSY; 195 + 196 + return ctx->trigger(acry_dev); 197 + } 198 + 199 + static int aspeed_acry_complete(struct aspeed_acry_dev *acry_dev, int err) 200 + { 201 + struct akcipher_request *req = acry_dev->req; 202 + 203 + acry_dev->flags &= ~CRYPTO_FLAGS_BUSY; 204 + 205 + crypto_finalize_akcipher_request(acry_dev->crypt_engine_rsa, req, err); 206 + 207 + return err; 208 + } 209 + 210 + /* 211 + * Copy Data to DMA buffer for engine used. 212 + */ 213 + static void aspeed_acry_rsa_sg_copy_to_buffer(struct aspeed_acry_dev *acry_dev, 214 + u8 *buf, struct scatterlist *src, 215 + size_t nbytes) 216 + { 217 + static u8 dram_buffer[ASPEED_ACRY_SRAM_MAX_LEN]; 218 + int i = 0, j; 219 + int data_idx; 220 + 221 + ACRY_DBG(acry_dev, "\n"); 222 + 223 + scatterwalk_map_and_copy(dram_buffer, src, 0, nbytes, 0); 224 + 225 + for (j = nbytes - 1; j >= 0; j--) { 226 + data_idx = acry_dev->data_byte_mapping[i]; 227 + buf[data_idx] = dram_buffer[j]; 228 + i++; 229 + } 230 + 231 + for (; i < ASPEED_ACRY_SRAM_MAX_LEN; i++) { 232 + data_idx = acry_dev->data_byte_mapping[i]; 233 + buf[data_idx] = 0; 234 + } 235 + } 236 + 237 + /* 238 + * Copy Exp/Mod to DMA buffer for engine used. 239 + * 240 + * Params: 241 + * - mode 0 : Exponential 242 + * - mode 1 : Modulus 243 + * 244 + * Example: 245 + * - DRAM memory layout: 246 + * D[0], D[4], D[8], D[12] 247 + * - ACRY SRAM memory layout should reverse the order of source data: 248 + * D[12], D[8], D[4], D[0] 249 + */ 250 + static int aspeed_acry_rsa_ctx_copy(struct aspeed_acry_dev *acry_dev, void *buf, 251 + const void *xbuf, size_t nbytes, 252 + enum aspeed_rsa_key_mode mode) 253 + { 254 + const u8 *src = xbuf; 255 + __le32 *dw_buf = buf; 256 + int nbits, ndw; 257 + int i, j, idx; 258 + u32 data = 0; 259 + 260 + ACRY_DBG(acry_dev, "nbytes:%zu, mode:%d\n", nbytes, mode); 261 + 262 + if (nbytes > ASPEED_ACRY_RSA_MAX_KEY_LEN) 263 + return -ENOMEM; 264 + 265 + /* Remove the leading zeros */ 266 + while (nbytes > 0 && src[0] == 0) { 267 + src++; 268 + nbytes--; 269 + } 270 + 271 + nbits = nbytes * 8; 272 + if (nbytes > 0) 273 + nbits -= count_leading_zeros(src[0]) - (BITS_PER_LONG - 8); 274 + 275 + /* double-world alignment */ 276 + ndw = DIV_ROUND_UP(nbytes, BYTES_PER_DWORD); 277 + 278 + if (nbytes > 0) { 279 + i = BYTES_PER_DWORD - nbytes % BYTES_PER_DWORD; 280 + i %= BYTES_PER_DWORD; 281 + 282 + for (j = ndw; j > 0; j--) { 283 + for (; i < BYTES_PER_DWORD; i++) { 284 + data <<= 8; 285 + data |= *src++; 286 + } 287 + 288 + i = 0; 289 + 290 + if (mode == ASPEED_RSA_EXP_MODE) 291 + idx = acry_dev->exp_dw_mapping[j - 1]; 292 + else if (mode == ASPEED_RSA_MOD_MODE) 293 + idx = acry_dev->mod_dw_mapping[j - 1]; 294 + 295 + dw_buf[idx] = cpu_to_le32(data); 296 + } 297 + } 298 + 299 + return nbits; 300 + } 301 + 302 + static int aspeed_acry_rsa_transfer(struct aspeed_acry_dev *acry_dev) 303 + { 304 + struct akcipher_request *req = acry_dev->req; 305 + u8 __iomem *sram_buffer = acry_dev->acry_sram; 306 + struct scatterlist *out_sg = req->dst; 307 + static u8 dram_buffer[ASPEED_ACRY_SRAM_MAX_LEN]; 308 + int leading_zero = 1; 309 + int result_nbytes; 310 + int i = 0, j; 311 + int data_idx; 312 + 313 + /* Set Data Memory to AHB(CPU) Access Mode */ 314 + ast_acry_write(acry_dev, ACRY_CMD_DMEM_AHB, ASPEED_ACRY_DMA_CMD); 315 + 316 + /* Disable ACRY SRAM protection */ 317 + regmap_update_bits(acry_dev->ahbc, AHBC_REGION_PROT, 318 + REGION_ACRYM, 0); 319 + 320 + result_nbytes = ASPEED_ACRY_SRAM_MAX_LEN; 321 + 322 + for (j = ASPEED_ACRY_SRAM_MAX_LEN - 1; j >= 0; j--) { 323 + data_idx = acry_dev->data_byte_mapping[j]; 324 + if (readb(sram_buffer + data_idx) == 0 && leading_zero) { 325 + result_nbytes--; 326 + } else { 327 + leading_zero = 0; 328 + dram_buffer[i] = readb(sram_buffer + data_idx); 329 + i++; 330 + } 331 + } 332 + 333 + ACRY_DBG(acry_dev, "result_nbytes:%d, req->dst_len:%d\n", 334 + result_nbytes, req->dst_len); 335 + 336 + if (result_nbytes <= req->dst_len) { 337 + scatterwalk_map_and_copy(dram_buffer, out_sg, 0, result_nbytes, 338 + 1); 339 + req->dst_len = result_nbytes; 340 + 341 + } else { 342 + dev_err(acry_dev->dev, "RSA engine error!\n"); 343 + } 344 + 345 + memzero_explicit(acry_dev->buf_addr, ASPEED_ACRY_BUFF_SIZE); 346 + 347 + return aspeed_acry_complete(acry_dev, 0); 348 + } 349 + 350 + static int aspeed_acry_rsa_trigger(struct aspeed_acry_dev *acry_dev) 351 + { 352 + struct akcipher_request *req = acry_dev->req; 353 + struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req); 354 + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher); 355 + int ne, nm; 356 + 357 + if (!ctx->n || !ctx->n_sz) { 358 + dev_err(acry_dev->dev, "%s: key n is not set\n", __func__); 359 + return -EINVAL; 360 + } 361 + 362 + memzero_explicit(acry_dev->buf_addr, ASPEED_ACRY_BUFF_SIZE); 363 + 364 + /* Copy source data to DMA buffer */ 365 + aspeed_acry_rsa_sg_copy_to_buffer(acry_dev, acry_dev->buf_addr, 366 + req->src, req->src_len); 367 + 368 + nm = aspeed_acry_rsa_ctx_copy(acry_dev, acry_dev->buf_addr, ctx->n, 369 + ctx->n_sz, ASPEED_RSA_MOD_MODE); 370 + if (ctx->enc) { 371 + if (!ctx->e || !ctx->e_sz) { 372 + dev_err(acry_dev->dev, "%s: key e is not set\n", 373 + __func__); 374 + return -EINVAL; 375 + } 376 + /* Copy key e to DMA buffer */ 377 + ne = aspeed_acry_rsa_ctx_copy(acry_dev, acry_dev->buf_addr, 378 + ctx->e, ctx->e_sz, 379 + ASPEED_RSA_EXP_MODE); 380 + } else { 381 + if (!ctx->d || !ctx->d_sz) { 382 + dev_err(acry_dev->dev, "%s: key d is not set\n", 383 + __func__); 384 + return -EINVAL; 385 + } 386 + /* Copy key d to DMA buffer */ 387 + ne = aspeed_acry_rsa_ctx_copy(acry_dev, acry_dev->buf_addr, 388 + ctx->key.d, ctx->key.d_sz, 389 + ASPEED_RSA_EXP_MODE); 390 + } 391 + 392 + ast_acry_write(acry_dev, acry_dev->buf_dma_addr, 393 + ASPEED_ACRY_DMA_SRC_BASE); 394 + ast_acry_write(acry_dev, (ne << 16) + nm, 395 + ASPEED_ACRY_RSA_KEY_LEN); 396 + ast_acry_write(acry_dev, ASPEED_ACRY_BUFF_SIZE, 397 + ASPEED_ACRY_DMA_LEN); 398 + 399 + acry_dev->resume = aspeed_acry_rsa_transfer; 400 + 401 + /* Enable ACRY SRAM protection */ 402 + regmap_update_bits(acry_dev->ahbc, AHBC_REGION_PROT, 403 + REGION_ACRYM, REGION_ACRYM); 404 + 405 + ast_acry_write(acry_dev, ACRY_RSA_ISR, ASPEED_ACRY_INT_MASK); 406 + ast_acry_write(acry_dev, ACRY_CMD_DMA_SRAM_MODE_RSA | 407 + ACRY_CMD_DMA_SRAM_AHB_ENGINE, ASPEED_ACRY_DMA_CMD); 408 + 409 + /* Trigger RSA engines */ 410 + ast_acry_write(acry_dev, ACRY_CMD_RSA_TRIGGER | 411 + ACRY_CMD_DMA_RSA_TRIGGER, ASPEED_ACRY_TRIGGER); 412 + 413 + return 0; 414 + } 415 + 416 + static int aspeed_acry_rsa_enc(struct akcipher_request *req) 417 + { 418 + struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req); 419 + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher); 420 + struct aspeed_acry_dev *acry_dev = ctx->acry_dev; 421 + 422 + ctx->trigger = aspeed_acry_rsa_trigger; 423 + ctx->enc = 1; 424 + 425 + return aspeed_acry_handle_queue(acry_dev, req); 426 + } 427 + 428 + static int aspeed_acry_rsa_dec(struct akcipher_request *req) 429 + { 430 + struct crypto_akcipher *cipher = crypto_akcipher_reqtfm(req); 431 + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(cipher); 432 + struct aspeed_acry_dev *acry_dev = ctx->acry_dev; 433 + 434 + ctx->trigger = aspeed_acry_rsa_trigger; 435 + ctx->enc = 0; 436 + 437 + return aspeed_acry_handle_queue(acry_dev, req); 438 + } 439 + 440 + static u8 *aspeed_rsa_key_copy(u8 *src, size_t len) 441 + { 442 + return kmemdup(src, len, GFP_KERNEL); 443 + } 444 + 445 + static int aspeed_rsa_set_n(struct aspeed_acry_ctx *ctx, u8 *value, 446 + size_t len) 447 + { 448 + ctx->n_sz = len; 449 + ctx->n = aspeed_rsa_key_copy(value, len); 450 + if (!ctx->n) 451 + return -ENOMEM; 452 + 453 + return 0; 454 + } 455 + 456 + static int aspeed_rsa_set_e(struct aspeed_acry_ctx *ctx, u8 *value, 457 + size_t len) 458 + { 459 + ctx->e_sz = len; 460 + ctx->e = aspeed_rsa_key_copy(value, len); 461 + if (!ctx->e) 462 + return -ENOMEM; 463 + 464 + return 0; 465 + } 466 + 467 + static int aspeed_rsa_set_d(struct aspeed_acry_ctx *ctx, u8 *value, 468 + size_t len) 469 + { 470 + ctx->d_sz = len; 471 + ctx->d = aspeed_rsa_key_copy(value, len); 472 + if (!ctx->d) 473 + return -ENOMEM; 474 + 475 + return 0; 476 + } 477 + 478 + static void aspeed_rsa_key_free(struct aspeed_acry_ctx *ctx) 479 + { 480 + kfree_sensitive(ctx->n); 481 + kfree_sensitive(ctx->e); 482 + kfree_sensitive(ctx->d); 483 + ctx->n_sz = 0; 484 + ctx->e_sz = 0; 485 + ctx->d_sz = 0; 486 + } 487 + 488 + static int aspeed_acry_rsa_setkey(struct crypto_akcipher *tfm, const void *key, 489 + unsigned int keylen, int priv) 490 + { 491 + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm); 492 + struct aspeed_acry_dev *acry_dev = ctx->acry_dev; 493 + int ret; 494 + 495 + if (priv) 496 + ret = rsa_parse_priv_key(&ctx->key, key, keylen); 497 + else 498 + ret = rsa_parse_pub_key(&ctx->key, key, keylen); 499 + 500 + if (ret) { 501 + dev_err(acry_dev->dev, "rsa parse key failed, ret:0x%x\n", 502 + ret); 503 + return ret; 504 + } 505 + 506 + /* Aspeed engine supports up to 4096 bits, 507 + * Use software fallback instead. 508 + */ 509 + if (ctx->key.n_sz > ASPEED_ACRY_RSA_MAX_KEY_LEN) 510 + return 0; 511 + 512 + ret = aspeed_rsa_set_n(ctx, (u8 *)ctx->key.n, ctx->key.n_sz); 513 + if (ret) 514 + goto err; 515 + 516 + ret = aspeed_rsa_set_e(ctx, (u8 *)ctx->key.e, ctx->key.e_sz); 517 + if (ret) 518 + goto err; 519 + 520 + if (priv) { 521 + ret = aspeed_rsa_set_d(ctx, (u8 *)ctx->key.d, ctx->key.d_sz); 522 + if (ret) 523 + goto err; 524 + } 525 + 526 + return 0; 527 + 528 + err: 529 + dev_err(acry_dev->dev, "rsa set key failed\n"); 530 + aspeed_rsa_key_free(ctx); 531 + 532 + return ret; 533 + } 534 + 535 + static int aspeed_acry_rsa_set_pub_key(struct crypto_akcipher *tfm, 536 + const void *key, 537 + unsigned int keylen) 538 + { 539 + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm); 540 + int ret; 541 + 542 + ret = crypto_akcipher_set_pub_key(ctx->fallback_tfm, key, keylen); 543 + if (ret) 544 + return ret; 545 + 546 + return aspeed_acry_rsa_setkey(tfm, key, keylen, 0); 547 + } 548 + 549 + static int aspeed_acry_rsa_set_priv_key(struct crypto_akcipher *tfm, 550 + const void *key, 551 + unsigned int keylen) 552 + { 553 + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm); 554 + int ret; 555 + 556 + ret = crypto_akcipher_set_priv_key(ctx->fallback_tfm, key, keylen); 557 + if (ret) 558 + return ret; 559 + 560 + return aspeed_acry_rsa_setkey(tfm, key, keylen, 1); 561 + } 562 + 563 + static unsigned int aspeed_acry_rsa_max_size(struct crypto_akcipher *tfm) 564 + { 565 + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm); 566 + 567 + if (ctx->key.n_sz > ASPEED_ACRY_RSA_MAX_KEY_LEN) 568 + return crypto_akcipher_maxsize(ctx->fallback_tfm); 569 + 570 + return ctx->n_sz; 571 + } 572 + 573 + static int aspeed_acry_rsa_init_tfm(struct crypto_akcipher *tfm) 574 + { 575 + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm); 576 + struct akcipher_alg *alg = crypto_akcipher_alg(tfm); 577 + const char *name = crypto_tfm_alg_name(&tfm->base); 578 + struct aspeed_acry_alg *acry_alg; 579 + 580 + acry_alg = container_of(alg, struct aspeed_acry_alg, akcipher); 581 + 582 + ctx->acry_dev = acry_alg->acry_dev; 583 + 584 + ctx->fallback_tfm = crypto_alloc_akcipher(name, 0, CRYPTO_ALG_ASYNC | 585 + CRYPTO_ALG_NEED_FALLBACK); 586 + if (IS_ERR(ctx->fallback_tfm)) { 587 + dev_err(ctx->acry_dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n", 588 + name, PTR_ERR(ctx->fallback_tfm)); 589 + return PTR_ERR(ctx->fallback_tfm); 590 + } 591 + 592 + ctx->enginectx.op.do_one_request = aspeed_acry_do_request; 593 + ctx->enginectx.op.prepare_request = NULL; 594 + ctx->enginectx.op.unprepare_request = NULL; 595 + 596 + return 0; 597 + } 598 + 599 + static void aspeed_acry_rsa_exit_tfm(struct crypto_akcipher *tfm) 600 + { 601 + struct aspeed_acry_ctx *ctx = akcipher_tfm_ctx(tfm); 602 + 603 + crypto_free_akcipher(ctx->fallback_tfm); 604 + } 605 + 606 + static struct aspeed_acry_alg aspeed_acry_akcipher_algs[] = { 607 + { 608 + .akcipher = { 609 + .encrypt = aspeed_acry_rsa_enc, 610 + .decrypt = aspeed_acry_rsa_dec, 611 + .sign = aspeed_acry_rsa_dec, 612 + .verify = aspeed_acry_rsa_enc, 613 + .set_pub_key = aspeed_acry_rsa_set_pub_key, 614 + .set_priv_key = aspeed_acry_rsa_set_priv_key, 615 + .max_size = aspeed_acry_rsa_max_size, 616 + .init = aspeed_acry_rsa_init_tfm, 617 + .exit = aspeed_acry_rsa_exit_tfm, 618 + .base = { 619 + .cra_name = "rsa", 620 + .cra_driver_name = "aspeed-rsa", 621 + .cra_priority = 300, 622 + .cra_flags = CRYPTO_ALG_TYPE_AKCIPHER | 623 + CRYPTO_ALG_ASYNC | 624 + CRYPTO_ALG_KERN_DRIVER_ONLY | 625 + CRYPTO_ALG_NEED_FALLBACK, 626 + .cra_module = THIS_MODULE, 627 + .cra_ctxsize = sizeof(struct aspeed_acry_ctx), 628 + }, 629 + }, 630 + }, 631 + }; 632 + 633 + static void aspeed_acry_register(struct aspeed_acry_dev *acry_dev) 634 + { 635 + int i, rc; 636 + 637 + for (i = 0; i < ARRAY_SIZE(aspeed_acry_akcipher_algs); i++) { 638 + aspeed_acry_akcipher_algs[i].acry_dev = acry_dev; 639 + rc = crypto_register_akcipher(&aspeed_acry_akcipher_algs[i].akcipher); 640 + if (rc) { 641 + ACRY_DBG(acry_dev, "Failed to register %s\n", 642 + aspeed_acry_akcipher_algs[i].akcipher.base.cra_name); 643 + } 644 + } 645 + } 646 + 647 + static void aspeed_acry_unregister(struct aspeed_acry_dev *acry_dev) 648 + { 649 + int i; 650 + 651 + for (i = 0; i < ARRAY_SIZE(aspeed_acry_akcipher_algs); i++) 652 + crypto_unregister_akcipher(&aspeed_acry_akcipher_algs[i].akcipher); 653 + } 654 + 655 + /* ACRY interrupt service routine. */ 656 + static irqreturn_t aspeed_acry_irq(int irq, void *dev) 657 + { 658 + struct aspeed_acry_dev *acry_dev = (struct aspeed_acry_dev *)dev; 659 + u32 sts; 660 + 661 + sts = ast_acry_read(acry_dev, ASPEED_ACRY_STATUS); 662 + ast_acry_write(acry_dev, sts, ASPEED_ACRY_STATUS); 663 + 664 + ACRY_DBG(acry_dev, "irq sts:0x%x\n", sts); 665 + 666 + if (sts & ACRY_RSA_ISR) { 667 + /* Stop RSA engine */ 668 + ast_acry_write(acry_dev, 0, ASPEED_ACRY_TRIGGER); 669 + 670 + if (acry_dev->flags & CRYPTO_FLAGS_BUSY) 671 + tasklet_schedule(&acry_dev->done_task); 672 + else 673 + dev_err(acry_dev->dev, "RSA no active requests.\n"); 674 + } 675 + 676 + return IRQ_HANDLED; 677 + } 678 + 679 + /* 680 + * ACRY SRAM has its own memory layout. 681 + * Set the DRAM to SRAM indexing for future used. 682 + */ 683 + static void aspeed_acry_sram_mapping(struct aspeed_acry_dev *acry_dev) 684 + { 685 + int i, j = 0; 686 + 687 + for (i = 0; i < (ASPEED_ACRY_SRAM_MAX_LEN / BYTES_PER_DWORD); i++) { 688 + acry_dev->exp_dw_mapping[i] = j; 689 + acry_dev->mod_dw_mapping[i] = j + 4; 690 + acry_dev->data_byte_mapping[(i * 4)] = (j + 8) * 4; 691 + acry_dev->data_byte_mapping[(i * 4) + 1] = (j + 8) * 4 + 1; 692 + acry_dev->data_byte_mapping[(i * 4) + 2] = (j + 8) * 4 + 2; 693 + acry_dev->data_byte_mapping[(i * 4) + 3] = (j + 8) * 4 + 3; 694 + j++; 695 + j = j % 4 ? j : j + 8; 696 + } 697 + } 698 + 699 + static void aspeed_acry_done_task(unsigned long data) 700 + { 701 + struct aspeed_acry_dev *acry_dev = (struct aspeed_acry_dev *)data; 702 + 703 + (void)acry_dev->resume(acry_dev); 704 + } 705 + 706 + static const struct of_device_id aspeed_acry_of_matches[] = { 707 + { .compatible = "aspeed,ast2600-acry", }, 708 + {}, 709 + }; 710 + 711 + static int aspeed_acry_probe(struct platform_device *pdev) 712 + { 713 + struct aspeed_acry_dev *acry_dev; 714 + struct device *dev = &pdev->dev; 715 + struct resource *res; 716 + int rc; 717 + 718 + acry_dev = devm_kzalloc(dev, sizeof(struct aspeed_acry_dev), 719 + GFP_KERNEL); 720 + if (!acry_dev) 721 + return -ENOMEM; 722 + 723 + acry_dev->dev = dev; 724 + 725 + platform_set_drvdata(pdev, acry_dev); 726 + 727 + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 728 + acry_dev->regs = devm_ioremap_resource(dev, res); 729 + if (IS_ERR(acry_dev->regs)) 730 + return PTR_ERR(acry_dev->regs); 731 + 732 + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); 733 + acry_dev->acry_sram = devm_ioremap_resource(dev, res); 734 + if (IS_ERR(acry_dev->acry_sram)) 735 + return PTR_ERR(acry_dev->acry_sram); 736 + 737 + /* Get irq number and register it */ 738 + acry_dev->irq = platform_get_irq(pdev, 0); 739 + if (acry_dev->irq < 0) 740 + return -ENXIO; 741 + 742 + rc = devm_request_irq(dev, acry_dev->irq, aspeed_acry_irq, 0, 743 + dev_name(dev), acry_dev); 744 + if (rc) { 745 + dev_err(dev, "Failed to request irq.\n"); 746 + return rc; 747 + } 748 + 749 + acry_dev->clk = devm_clk_get_enabled(dev, NULL); 750 + if (IS_ERR(acry_dev->clk)) { 751 + dev_err(dev, "Failed to get acry clk\n"); 752 + return PTR_ERR(acry_dev->clk); 753 + } 754 + 755 + acry_dev->ahbc = syscon_regmap_lookup_by_phandle(dev->of_node, 756 + "aspeed,ahbc"); 757 + if (IS_ERR(acry_dev->ahbc)) { 758 + dev_err(dev, "Failed to get AHBC regmap\n"); 759 + return -ENODEV; 760 + } 761 + 762 + /* Initialize crypto hardware engine structure for RSA */ 763 + acry_dev->crypt_engine_rsa = crypto_engine_alloc_init(dev, true); 764 + if (!acry_dev->crypt_engine_rsa) { 765 + rc = -ENOMEM; 766 + goto clk_exit; 767 + } 768 + 769 + rc = crypto_engine_start(acry_dev->crypt_engine_rsa); 770 + if (rc) 771 + goto err_engine_rsa_start; 772 + 773 + tasklet_init(&acry_dev->done_task, aspeed_acry_done_task, 774 + (unsigned long)acry_dev); 775 + 776 + /* Set Data Memory to AHB(CPU) Access Mode */ 777 + ast_acry_write(acry_dev, ACRY_CMD_DMEM_AHB, ASPEED_ACRY_DMA_CMD); 778 + 779 + /* Initialize ACRY SRAM index */ 780 + aspeed_acry_sram_mapping(acry_dev); 781 + 782 + acry_dev->buf_addr = dmam_alloc_coherent(dev, ASPEED_ACRY_BUFF_SIZE, 783 + &acry_dev->buf_dma_addr, 784 + GFP_KERNEL); 785 + memzero_explicit(acry_dev->buf_addr, ASPEED_ACRY_BUFF_SIZE); 786 + 787 + aspeed_acry_register(acry_dev); 788 + 789 + dev_info(dev, "Aspeed ACRY Accelerator successfully registered\n"); 790 + 791 + return 0; 792 + 793 + err_engine_rsa_start: 794 + crypto_engine_exit(acry_dev->crypt_engine_rsa); 795 + clk_exit: 796 + clk_disable_unprepare(acry_dev->clk); 797 + 798 + return rc; 799 + } 800 + 801 + static int aspeed_acry_remove(struct platform_device *pdev) 802 + { 803 + struct aspeed_acry_dev *acry_dev = platform_get_drvdata(pdev); 804 + 805 + aspeed_acry_unregister(acry_dev); 806 + crypto_engine_exit(acry_dev->crypt_engine_rsa); 807 + tasklet_kill(&acry_dev->done_task); 808 + clk_disable_unprepare(acry_dev->clk); 809 + 810 + return 0; 811 + } 812 + 813 + MODULE_DEVICE_TABLE(of, aspeed_acry_of_matches); 814 + 815 + static struct platform_driver aspeed_acry_driver = { 816 + .probe = aspeed_acry_probe, 817 + .remove = aspeed_acry_remove, 818 + .driver = { 819 + .name = KBUILD_MODNAME, 820 + .of_match_table = aspeed_acry_of_matches, 821 + }, 822 + }; 823 + 824 + module_platform_driver(aspeed_acry_driver); 825 + 826 + MODULE_AUTHOR("Neal Liu <neal_liu@aspeedtech.com>"); 827 + MODULE_DESCRIPTION("ASPEED ACRY driver for hardware RSA Engine"); 828 + MODULE_LICENSE("GPL");
+1 -4
drivers/crypto/aspeed/aspeed-hace.c
··· 99 99 const struct of_device_id *hace_dev_id; 100 100 struct aspeed_engine_hash *hash_engine; 101 101 struct aspeed_hace_dev *hace_dev; 102 - struct resource *res; 103 102 int rc; 104 103 105 104 hace_dev = devm_kzalloc(&pdev->dev, sizeof(struct aspeed_hace_dev), ··· 117 118 hash_engine = &hace_dev->hash_engine; 118 119 crypto_engine = &hace_dev->crypto_engine; 119 120 120 - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 121 - 122 121 platform_set_drvdata(pdev, hace_dev); 123 122 124 - hace_dev->regs = devm_ioremap_resource(&pdev->dev, res); 123 + hace_dev->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); 125 124 if (IS_ERR(hace_dev->regs)) 126 125 return PTR_ERR(hace_dev->regs); 127 126
+1 -1
drivers/crypto/aspeed/aspeed-hace.h
··· 183 183 struct aspeed_hace_dev *hace_dev; 184 184 unsigned long flags; /* hmac flag */ 185 185 186 - struct aspeed_sha_hmac_ctx base[0]; 186 + struct aspeed_sha_hmac_ctx base[]; 187 187 }; 188 188 189 189 struct aspeed_sham_reqctx {
+4 -3
drivers/crypto/atmel-aes.c
··· 554 554 } 555 555 556 556 if (dd->is_async) 557 - dd->areq->complete(dd->areq, err); 557 + crypto_request_complete(dd->areq, err); 558 558 559 559 tasklet_schedule(&dd->queue_task); 560 560 ··· 955 955 return ret; 956 956 957 957 if (backlog) 958 - backlog->complete(backlog, -EINPROGRESS); 958 + crypto_request_complete(backlog, -EINPROGRESS); 959 959 960 960 ctx = crypto_tfm_ctx(areq->tfm); 961 961 ··· 1879 1879 struct atmel_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); 1880 1880 int err; 1881 1881 1882 - err = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen); 1882 + err = xts_verify_key(tfm, key, keylen); 1883 1883 if (err) 1884 1884 return err; 1885 1885 ··· 2510 2510 /* keep only major version number */ 2511 2511 switch (dd->hw_version & 0xff0) { 2512 2512 case 0x700: 2513 + case 0x600: 2513 2514 case 0x500: 2514 2515 dd->caps.has_dualbuff = 1; 2515 2516 dd->caps.has_cfb64 = 1;
+1 -2
drivers/crypto/atmel-ecc.c
··· 313 313 314 314 static int atmel_ecc_probe(struct i2c_client *client) 315 315 { 316 - const struct i2c_device_id *id = i2c_client_get_device_id(client); 317 316 struct atmel_i2c_client_priv *i2c_priv; 318 317 int ret; 319 318 320 - ret = atmel_i2c_probe(client, id); 319 + ret = atmel_i2c_probe(client); 321 320 if (ret) 322 321 return ret; 323 322
+2 -2
drivers/crypto/atmel-i2c.c
··· 59 59 * Read the word from Configuration zone that contains the lock bytes 60 60 * (UserExtra, Selector, LockValue, LockConfig). 61 61 */ 62 - cmd->param1 = CONFIG_ZONE; 62 + cmd->param1 = CONFIGURATION_ZONE; 63 63 cmd->param2 = cpu_to_le16(DEVICE_LOCK_ADDR); 64 64 cmd->count = READ_COUNT; 65 65 ··· 324 324 return ret; 325 325 } 326 326 327 - int atmel_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) 327 + int atmel_i2c_probe(struct i2c_client *client) 328 328 { 329 329 struct atmel_i2c_client_priv *i2c_priv; 330 330 struct device *dev = &client->dev;
+2 -2
drivers/crypto/atmel-i2c.h
··· 63 63 #define STATUS_WAKE_SUCCESSFUL 0x11 64 64 65 65 /* Definitions for eeprom organization */ 66 - #define CONFIG_ZONE 0 66 + #define CONFIGURATION_ZONE 0 67 67 68 68 /* Definitions for Indexes common to all commands */ 69 69 #define RSP_DATA_IDX 1 /* buffer index of data in response */ ··· 167 167 struct atmel_i2c_cmd cmd; 168 168 }; 169 169 170 - int atmel_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id); 170 + int atmel_i2c_probe(struct i2c_client *client); 171 171 172 172 void atmel_i2c_enqueue(struct atmel_i2c_work_data *work_data, 173 173 void (*cbk)(struct atmel_i2c_work_data *work_data,
+5 -5
drivers/crypto/atmel-sha.c
··· 292 292 clk_disable(dd->iclk); 293 293 294 294 if ((dd->is_async || dd->force_complete) && req->base.complete) 295 - req->base.complete(&req->base, err); 295 + ahash_request_complete(req, err); 296 296 297 297 /* handle new request */ 298 298 tasklet_schedule(&dd->queue_task); ··· 1080 1080 return ret; 1081 1081 1082 1082 if (backlog) 1083 - backlog->complete(backlog, -EINPROGRESS); 1083 + crypto_request_complete(backlog, -EINPROGRESS); 1084 1084 1085 1085 ctx = crypto_tfm_ctx(async_req->tfm); 1086 1086 ··· 2099 2099 unsigned int digestlen; 2100 2100 }; 2101 2101 2102 - static void atmel_sha_authenc_complete(struct crypto_async_request *areq, 2103 - int err) 2102 + static void atmel_sha_authenc_complete(void *data, int err) 2104 2103 { 2105 - struct ahash_request *req = areq->data; 2104 + struct ahash_request *req = data; 2106 2105 struct atmel_sha_authenc_reqctx *authctx = ahash_request_ctx(req); 2107 2106 2108 2107 authctx->cb(authctx->aes_dev, err, authctx->base.dd->is_async); ··· 2508 2509 /* keep only major version number */ 2509 2510 switch (dd->hw_version & 0xff0) { 2510 2511 case 0x700: 2512 + case 0x600: 2511 2513 case 0x510: 2512 2514 dd->caps.has_dma = 1; 2513 2515 dd->caps.has_dualbuff = 1;
+1 -2
drivers/crypto/atmel-sha204a.c
··· 93 93 94 94 static int atmel_sha204a_probe(struct i2c_client *client) 95 95 { 96 - const struct i2c_device_id *id = i2c_client_get_device_id(client); 97 96 struct atmel_i2c_client_priv *i2c_priv; 98 97 int ret; 99 98 100 - ret = atmel_i2c_probe(client, id); 99 + ret = atmel_i2c_probe(client); 101 100 if (ret) 102 101 return ret; 103 102
+2 -2
drivers/crypto/atmel-tdes.c
··· 590 590 if (!err && (rctx->mode & TDES_FLAGS_OPMODE_MASK) != TDES_FLAGS_ECB) 591 591 atmel_tdes_set_iv_as_last_ciphertext_block(dd); 592 592 593 - req->base.complete(&req->base, err); 593 + skcipher_request_complete(req, err); 594 594 } 595 595 596 596 static int atmel_tdes_handle_queue(struct atmel_tdes_dev *dd, ··· 619 619 return ret; 620 620 621 621 if (backlog) 622 - backlog->complete(backlog, -EINPROGRESS); 622 + crypto_request_complete(backlog, -EINPROGRESS); 623 623 624 624 req = skcipher_request_cast(async_req); 625 625
+7 -7
drivers/crypto/axis/artpec6_crypto.c
··· 1621 1621 crypto_skcipher_ctx(cipher); 1622 1622 int ret; 1623 1623 1624 - ret = xts_check_key(&cipher->base, key, keylen); 1624 + ret = xts_verify_key(cipher, key, keylen); 1625 1625 if (ret) 1626 1626 return ret; 1627 1627 ··· 2143 2143 2144 2144 list_for_each_entry_safe(req, n, &complete_in_progress, 2145 2145 complete_in_progress) { 2146 - req->req->complete(req->req, -EINPROGRESS); 2146 + crypto_request_complete(req->req, -EINPROGRESS); 2147 2147 } 2148 2148 } 2149 2149 2150 2150 static void artpec6_crypto_complete_crypto(struct crypto_async_request *req) 2151 2151 { 2152 - req->complete(req, 0); 2152 + crypto_request_complete(req, 0); 2153 2153 } 2154 2154 2155 2155 static void ··· 2161 2161 scatterwalk_map_and_copy(cipher_req->iv, cipher_req->src, 2162 2162 cipher_req->cryptlen - AES_BLOCK_SIZE, 2163 2163 AES_BLOCK_SIZE, 0); 2164 - req->complete(req, 0); 2164 + skcipher_request_complete(cipher_req, 0); 2165 2165 } 2166 2166 2167 2167 static void ··· 2173 2173 scatterwalk_map_and_copy(cipher_req->iv, cipher_req->dst, 2174 2174 cipher_req->cryptlen - AES_BLOCK_SIZE, 2175 2175 AES_BLOCK_SIZE, 0); 2176 - req->complete(req, 0); 2176 + skcipher_request_complete(cipher_req, 0); 2177 2177 } 2178 2178 2179 2179 static void artpec6_crypto_complete_aead(struct crypto_async_request *req) ··· 2211 2211 } 2212 2212 } 2213 2213 2214 - req->complete(req, result); 2214 + aead_request_complete(areq, result); 2215 2215 } 2216 2216 2217 2217 static void artpec6_crypto_complete_hash(struct crypto_async_request *req) 2218 2218 { 2219 - req->complete(req, 0); 2219 + crypto_request_complete(req, 0); 2220 2220 } 2221 2221 2222 2222
+34 -64
drivers/crypto/bcm/cipher.c
··· 1614 1614 spu_chunk_cleanup(rctx); 1615 1615 1616 1616 if (areq) 1617 - areq->complete(areq, err); 1617 + crypto_request_complete(areq, err); 1618 1618 } 1619 1619 1620 1620 /** ··· 2570 2570 return payload_len > ctx->max_payload; 2571 2571 } 2572 2572 2573 - static void aead_complete(struct crypto_async_request *areq, int err) 2574 - { 2575 - struct aead_request *req = 2576 - container_of(areq, struct aead_request, base); 2577 - struct iproc_reqctx_s *rctx = aead_request_ctx(req); 2578 - struct crypto_aead *aead = crypto_aead_reqtfm(req); 2579 - 2580 - flow_log("%s() err:%d\n", __func__, err); 2581 - 2582 - areq->tfm = crypto_aead_tfm(aead); 2583 - 2584 - areq->complete = rctx->old_complete; 2585 - areq->data = rctx->old_data; 2586 - 2587 - areq->complete(areq, err); 2588 - } 2589 - 2590 2573 static int aead_do_fallback(struct aead_request *req, bool is_encrypt) 2591 2574 { 2592 2575 struct crypto_aead *aead = crypto_aead_reqtfm(req); 2593 2576 struct crypto_tfm *tfm = crypto_aead_tfm(aead); 2594 2577 struct iproc_reqctx_s *rctx = aead_request_ctx(req); 2595 2578 struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm); 2596 - int err; 2597 - u32 req_flags; 2579 + struct aead_request *subreq; 2598 2580 2599 2581 flow_log("%s() enc:%u\n", __func__, is_encrypt); 2600 2582 2601 - if (ctx->fallback_cipher) { 2602 - /* Store the cipher tfm and then use the fallback tfm */ 2603 - rctx->old_tfm = tfm; 2604 - aead_request_set_tfm(req, ctx->fallback_cipher); 2605 - /* 2606 - * Save the callback and chain ourselves in, so we can restore 2607 - * the tfm 2608 - */ 2609 - rctx->old_complete = req->base.complete; 2610 - rctx->old_data = req->base.data; 2611 - req_flags = aead_request_flags(req); 2612 - aead_request_set_callback(req, req_flags, aead_complete, req); 2613 - err = is_encrypt ? crypto_aead_encrypt(req) : 2614 - crypto_aead_decrypt(req); 2583 + if (!ctx->fallback_cipher) 2584 + return -EINVAL; 2615 2585 2616 - if (err == 0) { 2617 - /* 2618 - * fallback was synchronous (did not return 2619 - * -EINPROGRESS). So restore request state here. 2620 - */ 2621 - aead_request_set_callback(req, req_flags, 2622 - rctx->old_complete, req); 2623 - req->base.data = rctx->old_data; 2624 - aead_request_set_tfm(req, aead); 2625 - flow_log("%s() fallback completed successfully\n\n", 2626 - __func__); 2627 - } 2628 - } else { 2629 - err = -EINVAL; 2630 - } 2586 + subreq = &rctx->req; 2587 + aead_request_set_tfm(subreq, ctx->fallback_cipher); 2588 + aead_request_set_callback(subreq, aead_request_flags(req), 2589 + req->base.complete, req->base.data); 2590 + aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, 2591 + req->iv); 2592 + aead_request_set_ad(subreq, req->assoclen); 2631 2593 2632 - return err; 2594 + return is_encrypt ? crypto_aead_encrypt(req) : 2595 + crypto_aead_decrypt(req); 2633 2596 } 2634 2597 2635 2598 static int aead_enqueue(struct aead_request *req, bool is_encrypt) ··· 4206 4243 4207 4244 static int aead_cra_init(struct crypto_aead *aead) 4208 4245 { 4246 + unsigned int reqsize = sizeof(struct iproc_reqctx_s); 4209 4247 struct crypto_tfm *tfm = crypto_aead_tfm(aead); 4210 4248 struct iproc_ctx_s *ctx = crypto_tfm_ctx(tfm); 4211 4249 struct crypto_alg *alg = tfm->__crt_alg; ··· 4218 4254 4219 4255 flow_log("%s()\n", __func__); 4220 4256 4221 - crypto_aead_set_reqsize(aead, sizeof(struct iproc_reqctx_s)); 4222 4257 ctx->is_esp = false; 4223 4258 ctx->salt_len = 0; 4224 4259 ctx->salt_offset = 0; ··· 4226 4263 get_random_bytes(ctx->iv, MAX_IV_SIZE); 4227 4264 flow_dump(" iv: ", ctx->iv, MAX_IV_SIZE); 4228 4265 4229 - if (!err) { 4230 - if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { 4231 - flow_log("%s() creating fallback cipher\n", __func__); 4266 + if (err) 4267 + goto out; 4232 4268 4233 - ctx->fallback_cipher = 4234 - crypto_alloc_aead(alg->cra_name, 0, 4235 - CRYPTO_ALG_ASYNC | 4236 - CRYPTO_ALG_NEED_FALLBACK); 4237 - if (IS_ERR(ctx->fallback_cipher)) { 4238 - pr_err("%s() Error: failed to allocate fallback for %s\n", 4239 - __func__, alg->cra_name); 4240 - return PTR_ERR(ctx->fallback_cipher); 4241 - } 4242 - } 4269 + if (!(alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK)) 4270 + goto reqsize; 4271 + 4272 + flow_log("%s() creating fallback cipher\n", __func__); 4273 + 4274 + ctx->fallback_cipher = crypto_alloc_aead(alg->cra_name, 0, 4275 + CRYPTO_ALG_ASYNC | 4276 + CRYPTO_ALG_NEED_FALLBACK); 4277 + if (IS_ERR(ctx->fallback_cipher)) { 4278 + pr_err("%s() Error: failed to allocate fallback for %s\n", 4279 + __func__, alg->cra_name); 4280 + return PTR_ERR(ctx->fallback_cipher); 4243 4281 } 4244 4282 4283 + reqsize += crypto_aead_reqsize(ctx->fallback_cipher); 4284 + 4285 + reqsize: 4286 + crypto_aead_set_reqsize(aead, reqsize); 4287 + 4288 + out: 4245 4289 return err; 4246 4290 } 4247 4291
+2 -5
drivers/crypto/bcm/cipher.h
··· 339 339 /* hmac context */ 340 340 bool is_sw_hmac; 341 341 342 - /* aead context */ 343 - struct crypto_tfm *old_tfm; 344 - crypto_completion_t old_complete; 345 - void *old_data; 346 - 347 342 gfp_t gfp; 348 343 349 344 /* Buffers used to build SPU request and response messages */ 350 345 struct spu_msg_buf msg_buf; 346 + 347 + struct aead_request req; 351 348 }; 352 349 353 350 /*
+1 -1
drivers/crypto/caam/blob_gen.c
··· 83 83 output_len = info->input_len - CAAM_BLOB_OVERHEAD; 84 84 } 85 85 86 - desc = kzalloc(CAAM_BLOB_DESC_BYTES_MAX, GFP_KERNEL | GFP_DMA); 86 + desc = kzalloc(CAAM_BLOB_DESC_BYTES_MAX, GFP_KERNEL); 87 87 if (!desc) 88 88 return -ENOMEM; 89 89
+10 -6
drivers/crypto/caam/caamalg.c
··· 59 59 #include <crypto/engine.h> 60 60 #include <crypto/xts.h> 61 61 #include <asm/unaligned.h> 62 + #include <linux/dma-mapping.h> 63 + #include <linux/kernel.h> 62 64 63 65 /* 64 66 * crypto alg ··· 1381 1379 sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); 1382 1380 1383 1381 /* allocate space for base edesc and hw desc commands, link tables */ 1384 - edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes, 1385 - GFP_DMA | flags); 1382 + edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes, flags); 1386 1383 if (!edesc) { 1387 1384 caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0, 1388 1385 0, 0, 0); ··· 1609 1608 u8 *iv; 1610 1609 int ivsize = crypto_skcipher_ivsize(skcipher); 1611 1610 int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes; 1611 + unsigned int aligned_size; 1612 1612 1613 1613 src_nents = sg_nents_for_len(req->src, req->cryptlen); 1614 1614 if (unlikely(src_nents < 0)) { ··· 1683 1681 /* 1684 1682 * allocate space for base edesc and hw desc commands, link tables, IV 1685 1683 */ 1686 - edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes + ivsize, 1687 - GFP_DMA | flags); 1688 - if (!edesc) { 1684 + aligned_size = ALIGN(ivsize, __alignof__(*edesc)); 1685 + aligned_size += sizeof(*edesc) + desc_bytes + sec4_sg_bytes; 1686 + aligned_size = ALIGN(aligned_size, dma_get_cache_alignment()); 1687 + iv = kzalloc(aligned_size, flags); 1688 + if (!iv) { 1689 1689 dev_err(jrdev, "could not allocate extended descriptor\n"); 1690 1690 caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0, 1691 1691 0, 0, 0); 1692 1692 return ERR_PTR(-ENOMEM); 1693 1693 } 1694 1694 1695 + edesc = (void *)(iv + ALIGN(ivsize, __alignof__(*edesc))); 1695 1696 edesc->src_nents = src_nents; 1696 1697 edesc->dst_nents = dst_nents; 1697 1698 edesc->mapped_src_nents = mapped_src_nents; ··· 1706 1701 1707 1702 /* Make sure IV is located in a DMAable area */ 1708 1703 if (ivsize) { 1709 - iv = (u8 *)edesc->sec4_sg + sec4_sg_bytes; 1710 1704 memcpy(iv, req->iv, ivsize); 1711 1705 1712 1706 iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_BIDIRECTIONAL);
+10 -6
drivers/crypto/caam/caamalg_qi.c
··· 20 20 #include "caamalg_desc.h" 21 21 #include <crypto/xts.h> 22 22 #include <asm/unaligned.h> 23 + #include <linux/dma-mapping.h> 24 + #include <linux/kernel.h> 23 25 24 26 /* 25 27 * crypto alg ··· 961 959 return (struct aead_edesc *)drv_ctx; 962 960 963 961 /* allocate space for base edesc and hw desc commands, link tables */ 964 - edesc = qi_cache_alloc(GFP_DMA | flags); 962 + edesc = qi_cache_alloc(flags); 965 963 if (unlikely(!edesc)) { 966 964 dev_err(qidev, "could not allocate extended descriptor\n"); 967 965 return ERR_PTR(-ENOMEM); ··· 1319 1317 qm_sg_ents = 1 + pad_sg_nents(qm_sg_ents); 1320 1318 1321 1319 qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry); 1322 - if (unlikely(offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes + 1323 - ivsize > CAAM_QI_MEMCACHE_SIZE)) { 1320 + if (unlikely(ALIGN(ivsize, __alignof__(*edesc)) + 1321 + offsetof(struct skcipher_edesc, sgt) + qm_sg_bytes > 1322 + CAAM_QI_MEMCACHE_SIZE)) { 1324 1323 dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n", 1325 1324 qm_sg_ents, ivsize); 1326 1325 caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0, ··· 1330 1327 } 1331 1328 1332 1329 /* allocate space for base edesc, link tables and IV */ 1333 - edesc = qi_cache_alloc(GFP_DMA | flags); 1334 - if (unlikely(!edesc)) { 1330 + iv = qi_cache_alloc(flags); 1331 + if (unlikely(!iv)) { 1335 1332 dev_err(qidev, "could not allocate extended descriptor\n"); 1336 1333 caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0, 1337 1334 0, DMA_NONE, 0, 0); 1338 1335 return ERR_PTR(-ENOMEM); 1339 1336 } 1340 1337 1338 + edesc = (void *)(iv + ALIGN(ivsize, __alignof__(*edesc))); 1339 + 1341 1340 /* Make sure IV is located in a DMAable area */ 1342 1341 sg_table = &edesc->sgt[0]; 1343 - iv = (u8 *)(sg_table + qm_sg_ents); 1344 1342 memcpy(iv, req->iv, ivsize); 1345 1343 1346 1344 iv_dma = dma_map_single(qidev, iv, ivsize, DMA_BIDIRECTIONAL);
+33 -23
drivers/crypto/caam/caamalg_qi2.c
··· 16 16 #include "caamalg_desc.h" 17 17 #include "caamhash_desc.h" 18 18 #include "dpseci-debugfs.h" 19 + #include <linux/dma-mapping.h> 19 20 #include <linux/fsl/mc.h> 21 + #include <linux/kernel.h> 20 22 #include <soc/fsl/dpaa2-io.h> 21 23 #include <soc/fsl/dpaa2-fd.h> 22 24 #include <crypto/xts.h> ··· 372 370 struct dpaa2_sg_entry *sg_table; 373 371 374 372 /* allocate space for base edesc, link tables and IV */ 375 - edesc = qi_cache_zalloc(GFP_DMA | flags); 373 + edesc = qi_cache_zalloc(flags); 376 374 if (unlikely(!edesc)) { 377 375 dev_err(dev, "could not allocate extended descriptor\n"); 378 376 return ERR_PTR(-ENOMEM); ··· 1191 1189 } 1192 1190 1193 1191 /* allocate space for base edesc, link tables and IV */ 1194 - edesc = qi_cache_zalloc(GFP_DMA | flags); 1192 + edesc = qi_cache_zalloc(flags); 1195 1193 if (unlikely(!edesc)) { 1196 1194 dev_err(dev, "could not allocate extended descriptor\n"); 1197 1195 caam_unmap(dev, req->src, req->dst, src_nents, dst_nents, 0, ··· 3222 3220 int ret = -ENOMEM; 3223 3221 struct dpaa2_fl_entry *in_fle, *out_fle; 3224 3222 3225 - req_ctx = kzalloc(sizeof(*req_ctx), GFP_KERNEL | GFP_DMA); 3223 + req_ctx = kzalloc(sizeof(*req_ctx), GFP_KERNEL); 3226 3224 if (!req_ctx) 3227 3225 return -ENOMEM; 3228 3226 3229 3227 in_fle = &req_ctx->fd_flt[1]; 3230 3228 out_fle = &req_ctx->fd_flt[0]; 3231 3229 3232 - flc = kzalloc(sizeof(*flc), GFP_KERNEL | GFP_DMA); 3230 + flc = kzalloc(sizeof(*flc), GFP_KERNEL); 3233 3231 if (!flc) 3234 3232 goto err_flc; 3235 3233 ··· 3318 3316 dev_dbg(ctx->dev, "keylen %d blocksize %d\n", keylen, blocksize); 3319 3317 3320 3318 if (keylen > blocksize) { 3321 - hashed_key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA); 3319 + unsigned int aligned_len = 3320 + ALIGN(keylen, dma_get_cache_alignment()); 3321 + 3322 + if (aligned_len < keylen) 3323 + return -EOVERFLOW; 3324 + 3325 + hashed_key = kmemdup(key, aligned_len, GFP_KERNEL); 3322 3326 if (!hashed_key) 3323 3327 return -ENOMEM; 3324 3328 ret = hash_digest_key(ctx, &keylen, hashed_key, digestsize); ··· 3419 3411 DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, 3420 3412 ctx->ctx_len, 1); 3421 3413 3422 - req->base.complete(&req->base, ecode); 3414 + ahash_request_complete(req, ecode); 3423 3415 } 3424 3416 3425 3417 static void ahash_done_bi(void *cbk_ctx, u32 status) ··· 3457 3449 DUMP_PREFIX_ADDRESS, 16, 4, req->result, 3458 3450 crypto_ahash_digestsize(ahash), 1); 3459 3451 3460 - req->base.complete(&req->base, ecode); 3452 + ahash_request_complete(req, ecode); 3461 3453 } 3462 3454 3463 3455 static void ahash_done_ctx_src(void *cbk_ctx, u32 status) ··· 3484 3476 DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, 3485 3477 ctx->ctx_len, 1); 3486 3478 3487 - req->base.complete(&req->base, ecode); 3479 + ahash_request_complete(req, ecode); 3488 3480 } 3489 3481 3490 3482 static void ahash_done_ctx_dst(void *cbk_ctx, u32 status) ··· 3522 3514 DUMP_PREFIX_ADDRESS, 16, 4, req->result, 3523 3515 crypto_ahash_digestsize(ahash), 1); 3524 3516 3525 - req->base.complete(&req->base, ecode); 3517 + ahash_request_complete(req, ecode); 3526 3518 } 3527 3519 3528 3520 static int ahash_update_ctx(struct ahash_request *req) ··· 3568 3560 } 3569 3561 3570 3562 /* allocate space for base edesc and link tables */ 3571 - edesc = qi_cache_zalloc(GFP_DMA | flags); 3563 + edesc = qi_cache_zalloc(flags); 3572 3564 if (!edesc) { 3573 3565 dma_unmap_sg(ctx->dev, req->src, src_nents, 3574 3566 DMA_TO_DEVICE); ··· 3662 3654 int ret; 3663 3655 3664 3656 /* allocate space for base edesc and link tables */ 3665 - edesc = qi_cache_zalloc(GFP_DMA | flags); 3657 + edesc = qi_cache_zalloc(flags); 3666 3658 if (!edesc) 3667 3659 return -ENOMEM; 3668 3660 ··· 3751 3743 } 3752 3744 3753 3745 /* allocate space for base edesc and link tables */ 3754 - edesc = qi_cache_zalloc(GFP_DMA | flags); 3746 + edesc = qi_cache_zalloc(flags); 3755 3747 if (!edesc) { 3756 3748 dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE); 3757 3749 return -ENOMEM; ··· 3844 3836 } 3845 3837 3846 3838 /* allocate space for base edesc and link tables */ 3847 - edesc = qi_cache_zalloc(GFP_DMA | flags); 3839 + edesc = qi_cache_zalloc(flags); 3848 3840 if (!edesc) { 3849 3841 dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE); 3850 3842 return ret; ··· 3921 3913 int ret = -ENOMEM; 3922 3914 3923 3915 /* allocate space for base edesc and link tables */ 3924 - edesc = qi_cache_zalloc(GFP_DMA | flags); 3916 + edesc = qi_cache_zalloc(flags); 3925 3917 if (!edesc) 3926 3918 return ret; 3927 3919 ··· 4020 4012 } 4021 4013 4022 4014 /* allocate space for base edesc and link tables */ 4023 - edesc = qi_cache_zalloc(GFP_DMA | flags); 4015 + edesc = qi_cache_zalloc(flags); 4024 4016 if (!edesc) { 4025 4017 dma_unmap_sg(ctx->dev, req->src, src_nents, 4026 4018 DMA_TO_DEVICE); ··· 4133 4125 } 4134 4126 4135 4127 /* allocate space for base edesc and link tables */ 4136 - edesc = qi_cache_zalloc(GFP_DMA | flags); 4128 + edesc = qi_cache_zalloc(flags); 4137 4129 if (!edesc) { 4138 4130 dma_unmap_sg(ctx->dev, req->src, src_nents, DMA_TO_DEVICE); 4139 4131 return ret; ··· 4238 4230 } 4239 4231 4240 4232 /* allocate space for base edesc and link tables */ 4241 - edesc = qi_cache_zalloc(GFP_DMA | flags); 4233 + edesc = qi_cache_zalloc(flags); 4242 4234 if (!edesc) { 4243 4235 dma_unmap_sg(ctx->dev, req->src, src_nents, 4244 4236 DMA_TO_DEVICE); ··· 4934 4926 { 4935 4927 struct dpseci_congestion_notification_cfg cong_notif_cfg = { 0 }; 4936 4928 struct device *dev = priv->dev; 4929 + unsigned int alignmask; 4937 4930 int err; 4938 4931 4939 4932 /* ··· 4945 4936 !(priv->dpseci_attr.options & DPSECI_OPT_HAS_CG)) 4946 4937 return 0; 4947 4938 4948 - priv->cscn_mem = kzalloc(DPAA2_CSCN_SIZE + DPAA2_CSCN_ALIGN, 4949 - GFP_KERNEL | GFP_DMA); 4939 + alignmask = DPAA2_CSCN_ALIGN - 1; 4940 + alignmask |= dma_get_cache_alignment() - 1; 4941 + priv->cscn_mem = kzalloc(ALIGN(DPAA2_CSCN_SIZE, alignmask + 1), 4942 + GFP_KERNEL); 4950 4943 if (!priv->cscn_mem) 4951 4944 return -ENOMEM; 4952 4945 4953 - priv->cscn_mem_aligned = PTR_ALIGN(priv->cscn_mem, DPAA2_CSCN_ALIGN); 4954 - priv->cscn_dma = dma_map_single(dev, priv->cscn_mem_aligned, 4946 + priv->cscn_dma = dma_map_single(dev, priv->cscn_mem, 4955 4947 DPAA2_CSCN_SIZE, DMA_FROM_DEVICE); 4956 4948 if (dma_mapping_error(dev, priv->cscn_dma)) { 4957 4949 dev_err(dev, "Error mapping CSCN memory area\n"); ··· 5184 5174 priv->domain = iommu_get_domain_for_dev(dev); 5185 5175 5186 5176 qi_cache = kmem_cache_create("dpaa2_caamqicache", CAAM_QI_MEMCACHE_SIZE, 5187 - 0, SLAB_CACHE_DMA, NULL); 5177 + 0, 0, NULL); 5188 5178 if (!qi_cache) { 5189 5179 dev_err(dev, "Can't allocate SEC cache\n"); 5190 5180 return -ENOMEM; ··· 5461 5451 dma_sync_single_for_cpu(priv->dev, priv->cscn_dma, 5462 5452 DPAA2_CSCN_SIZE, 5463 5453 DMA_FROM_DEVICE); 5464 - if (unlikely(dpaa2_cscn_state_congested(priv->cscn_mem_aligned))) { 5454 + if (unlikely(dpaa2_cscn_state_congested(priv->cscn_mem))) { 5465 5455 dev_dbg_ratelimited(dev, "Dropping request\n"); 5466 5456 return -EBUSY; 5467 5457 }
+4 -6
drivers/crypto/caam/caamalg_qi2.h
··· 7 7 #ifndef _CAAMALG_QI2_H_ 8 8 #define _CAAMALG_QI2_H_ 9 9 10 + #include <crypto/internal/skcipher.h> 11 + #include <linux/compiler_attributes.h> 10 12 #include <soc/fsl/dpaa2-io.h> 11 13 #include <soc/fsl/dpaa2-fd.h> 12 14 #include <linux/threads.h> 13 15 #include <linux/netdevice.h> 14 16 #include "dpseci.h" 15 17 #include "desc_constr.h" 16 - #include <crypto/skcipher.h> 17 18 18 19 #define DPAA2_CAAM_STORE_SIZE 16 19 20 /* NAPI weight *must* be a multiple of the store size. */ ··· 37 36 * @tx_queue_attr: array of Tx queue attributes 38 37 * @cscn_mem: pointer to memory region containing the congestion SCN 39 38 * it's size is larger than to accommodate alignment 40 - * @cscn_mem_aligned: pointer to congestion SCN; it is computed as 41 - * PTR_ALIGN(cscn_mem, DPAA2_CSCN_ALIGN) 42 39 * @cscn_dma: dma address used by the QMAN to write CSCN messages 43 40 * @dev: device associated with the DPSECI object 44 41 * @mc_io: pointer to MC portal's I/O object ··· 57 58 58 59 /* congestion */ 59 60 void *cscn_mem; 60 - void *cscn_mem_aligned; 61 61 dma_addr_t cscn_dma; 62 62 63 63 struct device *dev; ··· 156 158 struct caam_flc { 157 159 u32 flc[16]; 158 160 u32 sh_desc[MAX_SDLEN]; 159 - } ____cacheline_aligned; 161 + } __aligned(CRYPTO_DMA_ALIGN); 160 162 161 163 enum optype { 162 164 ENCRYPT = 0, ··· 178 180 * @edesc: extended descriptor; points to one of {skcipher,aead}_edesc 179 181 */ 180 182 struct caam_request { 181 - struct dpaa2_fl_entry fd_flt[2]; 183 + struct dpaa2_fl_entry fd_flt[2] __aligned(CRYPTO_DMA_ALIGN); 182 184 dma_addr_t fd_flt_dma; 183 185 struct caam_flc *flc; 184 186 dma_addr_t flc_dma;
+13 -5
drivers/crypto/caam/caamhash.c
··· 66 66 #include "key_gen.h" 67 67 #include "caamhash_desc.h" 68 68 #include <crypto/engine.h> 69 + #include <linux/dma-mapping.h> 70 + #include <linux/kernel.h> 69 71 70 72 #define CAAM_CRA_PRIORITY 3000 71 73 ··· 367 365 dma_addr_t key_dma; 368 366 int ret; 369 367 370 - desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); 368 + desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL); 371 369 if (!desc) { 372 370 dev_err(jrdev, "unable to allocate key input memory\n"); 373 371 return -ENOMEM; ··· 434 432 dev_dbg(jrdev, "keylen %d\n", keylen); 435 433 436 434 if (keylen > blocksize) { 437 - hashed_key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA); 435 + unsigned int aligned_len = 436 + ALIGN(keylen, dma_get_cache_alignment()); 437 + 438 + if (aligned_len < keylen) 439 + return -EOVERFLOW; 440 + 441 + hashed_key = kmemdup(key, keylen, GFP_KERNEL); 438 442 if (!hashed_key) 439 443 return -ENOMEM; 440 444 ret = hash_digest_key(ctx, &keylen, hashed_key, digestsize); ··· 614 606 * by CAAM, not crypto engine. 615 607 */ 616 608 if (!has_bklog) 617 - req->base.complete(&req->base, ecode); 609 + ahash_request_complete(req, ecode); 618 610 else 619 611 crypto_finalize_hash_request(jrp->engine, req, ecode); 620 612 } ··· 676 668 * by CAAM, not crypto engine. 677 669 */ 678 670 if (!has_bklog) 679 - req->base.complete(&req->base, ecode); 671 + ahash_request_complete(req, ecode); 680 672 else 681 673 crypto_finalize_hash_request(jrp->engine, req, ecode); 682 674 ··· 710 702 struct ahash_edesc *edesc; 711 703 unsigned int sg_size = sg_num * sizeof(struct sec4_sg_entry); 712 704 713 - edesc = kzalloc(sizeof(*edesc) + sg_size, GFP_DMA | flags); 705 + edesc = kzalloc(sizeof(*edesc) + sg_size, flags); 714 706 if (!edesc) { 715 707 dev_err(ctx->jrdev, "could not allocate extended descriptor\n"); 716 708 return NULL;
+17 -14
drivers/crypto/caam/caampkc.c
··· 16 16 #include "desc_constr.h" 17 17 #include "sg_sw_sec4.h" 18 18 #include "caampkc.h" 19 + #include <linux/dma-mapping.h> 20 + #include <linux/kernel.h> 19 21 20 22 #define DESC_RSA_PUB_LEN (2 * CAAM_CMD_SZ + SIZEOF_RSA_PUB_PDB) 21 23 #define DESC_RSA_PRIV_F1_LEN (2 * CAAM_CMD_SZ + \ ··· 312 310 sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); 313 311 314 312 /* allocate space for base edesc, hw desc commands and link tables */ 315 - edesc = kzalloc(sizeof(*edesc) + desclen + sec4_sg_bytes, 316 - GFP_DMA | flags); 313 + edesc = kzalloc(sizeof(*edesc) + desclen + sec4_sg_bytes, flags); 317 314 if (!edesc) 318 315 goto dst_fail; 319 316 ··· 899 898 if (!nbytes) 900 899 return NULL; 901 900 902 - dst = kzalloc(dstlen, GFP_DMA | GFP_KERNEL); 901 + dst = kzalloc(dstlen, GFP_KERNEL); 903 902 if (!dst) 904 903 return NULL; 905 904 ··· 911 910 /** 912 911 * caam_read_raw_data - Read a raw byte stream as a positive integer. 913 912 * The function skips buffer's leading zeros, copies the remained data 914 - * to a buffer allocated in the GFP_DMA | GFP_KERNEL zone and returns 913 + * to a buffer allocated in the GFP_KERNEL zone and returns 915 914 * the address of the new buffer. 916 915 * 917 916 * @buf : The data to read ··· 924 923 if (!*nbytes) 925 924 return NULL; 926 925 927 - return kmemdup(buf, *nbytes, GFP_DMA | GFP_KERNEL); 926 + return kmemdup(buf, *nbytes, GFP_KERNEL); 928 927 } 929 928 930 929 static int caam_rsa_check_key_length(unsigned int len) ··· 950 949 return ret; 951 950 952 951 /* Copy key in DMA zone */ 953 - rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_DMA | GFP_KERNEL); 952 + rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_KERNEL); 954 953 if (!rsa_key->e) 955 954 goto err; 956 955 957 956 /* 958 957 * Skip leading zeros and copy the positive integer to a buffer 959 - * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor 958 + * allocated in the GFP_KERNEL zone. The decryption descriptor 960 959 * expects a positive integer for the RSA modulus and uses its length as 961 960 * decryption output length. 962 961 */ ··· 984 983 struct caam_rsa_key *rsa_key = &ctx->key; 985 984 size_t p_sz = raw_key->p_sz; 986 985 size_t q_sz = raw_key->q_sz; 986 + unsigned aligned_size; 987 987 988 988 rsa_key->p = caam_read_raw_data(raw_key->p, &p_sz); 989 989 if (!rsa_key->p) ··· 996 994 goto free_p; 997 995 rsa_key->q_sz = q_sz; 998 996 999 - rsa_key->tmp1 = kzalloc(raw_key->p_sz, GFP_DMA | GFP_KERNEL); 997 + aligned_size = ALIGN(raw_key->p_sz, dma_get_cache_alignment()); 998 + rsa_key->tmp1 = kzalloc(aligned_size, GFP_KERNEL); 1000 999 if (!rsa_key->tmp1) 1001 1000 goto free_q; 1002 1001 1003 - rsa_key->tmp2 = kzalloc(raw_key->q_sz, GFP_DMA | GFP_KERNEL); 1002 + aligned_size = ALIGN(raw_key->q_sz, dma_get_cache_alignment()); 1003 + rsa_key->tmp2 = kzalloc(aligned_size, GFP_KERNEL); 1004 1004 if (!rsa_key->tmp2) 1005 1005 goto free_tmp1; 1006 1006 ··· 1055 1051 return ret; 1056 1052 1057 1053 /* Copy key in DMA zone */ 1058 - rsa_key->d = kmemdup(raw_key.d, raw_key.d_sz, GFP_DMA | GFP_KERNEL); 1054 + rsa_key->d = kmemdup(raw_key.d, raw_key.d_sz, GFP_KERNEL); 1059 1055 if (!rsa_key->d) 1060 1056 goto err; 1061 1057 1062 - rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_DMA | GFP_KERNEL); 1058 + rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_KERNEL); 1063 1059 if (!rsa_key->e) 1064 1060 goto err; 1065 1061 1066 1062 /* 1067 1063 * Skip leading zeros and copy the positive integer to a buffer 1068 - * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor 1064 + * allocated in the GFP_KERNEL zone. The decryption descriptor 1069 1065 * expects a positive integer for the RSA modulus and uses its length as 1070 1066 * decryption output length. 1071 1067 */ ··· 1189 1185 return 0; 1190 1186 1191 1187 /* allocate zero buffer, used for padding input */ 1192 - zero_buffer = kzalloc(CAAM_RSA_MAX_INPUT_SIZE - 1, GFP_DMA | 1193 - GFP_KERNEL); 1188 + zero_buffer = kzalloc(CAAM_RSA_MAX_INPUT_SIZE - 1, GFP_KERNEL); 1194 1189 if (!zero_buffer) 1195 1190 return -ENOMEM; 1196 1191
+9 -3
drivers/crypto/caam/caamprng.c
··· 8 8 9 9 #include <linux/completion.h> 10 10 #include <crypto/internal/rng.h> 11 + #include <linux/dma-mapping.h> 12 + #include <linux/kernel.h> 11 13 #include "compat.h" 12 14 #include "regs.h" 13 15 #include "intern.h" ··· 77 75 const u8 *src, unsigned int slen, 78 76 u8 *dst, unsigned int dlen) 79 77 { 78 + unsigned int aligned_dlen = ALIGN(dlen, dma_get_cache_alignment()); 80 79 struct caam_prng_ctx ctx; 81 80 struct device *jrdev; 82 81 dma_addr_t dst_dma; ··· 85 82 u8 *buf; 86 83 int ret; 87 84 88 - buf = kzalloc(dlen, GFP_KERNEL); 85 + if (aligned_dlen < dlen) 86 + return -EOVERFLOW; 87 + 88 + buf = kzalloc(aligned_dlen, GFP_KERNEL); 89 89 if (!buf) 90 90 return -ENOMEM; 91 91 ··· 100 94 return ret; 101 95 } 102 96 103 - desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL | GFP_DMA); 97 + desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL); 104 98 if (!desc) { 105 99 ret = -ENOMEM; 106 100 goto out1; ··· 162 156 return ret; 163 157 } 164 158 165 - desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL | GFP_DMA); 159 + desc = kzalloc(CAAM_PRNG_MAX_DESC_LEN, GFP_KERNEL); 166 160 if (!desc) { 167 161 caam_jr_free(jrdev); 168 162 return -ENOMEM;
+7 -4
drivers/crypto/caam/caamrng.c
··· 12 12 #include <linux/hw_random.h> 13 13 #include <linux/completion.h> 14 14 #include <linux/atomic.h> 15 + #include <linux/dma-mapping.h> 16 + #include <linux/kernel.h> 15 17 #include <linux/kfifo.h> 16 18 17 19 #include "compat.h" ··· 178 176 int err; 179 177 180 178 ctx->desc_sync = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN, 181 - GFP_DMA | GFP_KERNEL); 179 + GFP_KERNEL); 182 180 if (!ctx->desc_sync) 183 181 return -ENOMEM; 184 182 185 183 ctx->desc_async = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN, 186 - GFP_DMA | GFP_KERNEL); 184 + GFP_KERNEL); 187 185 if (!ctx->desc_async) 188 186 return -ENOMEM; 189 187 190 - if (kfifo_alloc(&ctx->fifo, CAAM_RNG_MAX_FIFO_STORE_SIZE, 191 - GFP_DMA | GFP_KERNEL)) 188 + if (kfifo_alloc(&ctx->fifo, ALIGN(CAAM_RNG_MAX_FIFO_STORE_SIZE, 189 + dma_get_cache_alignment()), 190 + GFP_KERNEL)) 192 191 return -ENOMEM; 193 192 194 193 INIT_WORK(&ctx->worker, caam_rng_worker);
+2 -2
drivers/crypto/caam/ctrl.c
··· 199 199 u32 *desc, status; 200 200 int sh_idx, ret = 0; 201 201 202 - desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL | GFP_DMA); 202 + desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL); 203 203 if (!desc) 204 204 return -ENOMEM; 205 205 ··· 276 276 int ret = 0, sh_idx; 277 277 278 278 ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; 279 - desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL | GFP_DMA); 279 + desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL); 280 280 if (!desc) 281 281 return -ENOMEM; 282 282
+2 -1
drivers/crypto/caam/desc_constr.h
··· 163 163 { 164 164 u32 *offset = desc_end(desc); 165 165 166 - if (len) /* avoid sparse warning: memcpy with byte count of 0 */ 166 + /* Avoid gcc warning: memcpy with data == NULL */ 167 + if (!IS_ENABLED(CONFIG_CRYPTO_DEV_FSL_CAAM_DEBUG) || data) 167 168 memcpy(offset, data, len); 168 169 169 170 (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) +
+1 -1
drivers/crypto/caam/key_gen.c
··· 64 64 if (local_max > max_keylen) 65 65 return -EINVAL; 66 66 67 - desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); 67 + desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL); 68 68 if (!desc) { 69 69 dev_err(jrdev, "unable to allocate key input memory\n"); 70 70 return ret;
+2 -2
drivers/crypto/caam/qi.c
··· 614 614 struct qman_fq *fq; 615 615 int ret; 616 616 617 - fq = kzalloc(sizeof(*fq), GFP_KERNEL | GFP_DMA); 617 + fq = kzalloc(sizeof(*fq), GFP_KERNEL); 618 618 if (!fq) 619 619 return -ENOMEM; 620 620 ··· 756 756 } 757 757 758 758 qi_cache = kmem_cache_create("caamqicache", CAAM_QI_MEMCACHE_SIZE, 0, 759 - SLAB_CACHE_DMA, NULL); 759 + 0, NULL); 760 760 if (!qi_cache) { 761 761 dev_err(qidev, "Can't allocate CAAM cache\n"); 762 762 free_rsp_fqs();
+8 -4
drivers/crypto/caam/qi.h
··· 9 9 #ifndef __QI_H__ 10 10 #define __QI_H__ 11 11 12 + #include <crypto/algapi.h> 13 + #include <linux/compiler_attributes.h> 12 14 #include <soc/fsl/qman.h> 13 15 #include "compat.h" 14 16 #include "desc.h" ··· 60 58 * @qidev: device pointer for CAAM/QI backend 61 59 */ 62 60 struct caam_drv_ctx { 63 - u32 prehdr[2]; 64 - u32 sh_desc[MAX_SDLEN]; 61 + struct { 62 + u32 prehdr[2]; 63 + u32 sh_desc[MAX_SDLEN]; 64 + } __aligned(CRYPTO_DMA_ALIGN); 65 65 dma_addr_t context_a; 66 66 struct qman_fq *req_fq; 67 67 struct qman_fq *rsp_fq; ··· 71 67 int cpu; 72 68 enum optype op_type; 73 69 struct device *qidev; 74 - } ____cacheline_aligned; 70 + }; 75 71 76 72 /** 77 73 * caam_drv_req - The request structure the driver application should fill while ··· 92 88 struct caam_drv_ctx *drv_ctx; 93 89 caam_qi_cbk cbk; 94 90 void *app_ctx; 95 - } ____cacheline_aligned; 91 + } __aligned(CRYPTO_DMA_ALIGN); 96 92 97 93 /** 98 94 * caam_drv_ctx_init - Initialise a CAAM/QI driver context
+4 -6
drivers/crypto/cavium/cpt/cptvf_algs.c
··· 28 28 { 29 29 struct crypto_async_request *req = (struct crypto_async_request *)arg; 30 30 31 - req->complete(req, !status); 31 + crypto_request_complete(req, !status); 32 32 } 33 33 34 34 static inline void update_input_iv(struct cpt_request_info *req_info, ··· 232 232 static int cvm_xts_setkey(struct crypto_skcipher *cipher, const u8 *key, 233 233 u32 keylen) 234 234 { 235 - struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); 236 - struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm); 235 + struct cvm_enc_ctx *ctx = crypto_skcipher_ctx(cipher); 237 236 int err; 238 237 const u8 *key1 = key; 239 238 const u8 *key2 = key + (keylen / 2); 240 239 241 - err = xts_check_key(tfm, key, keylen); 240 + err = xts_verify_key(cipher, key, keylen); 242 241 if (err) 243 242 return err; 244 243 ctx->key_len = keylen; ··· 288 289 static int cvm_setkey(struct crypto_skcipher *cipher, const u8 *key, 289 290 u32 keylen, u8 cipher_type) 290 291 { 291 - struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); 292 - struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm); 292 + struct cvm_enc_ctx *ctx = crypto_skcipher_ctx(cipher); 293 293 294 294 ctx->cipher_type = cipher_type; 295 295 if (!cvm_validate_keylen(ctx, keylen)) {
+2 -2
drivers/crypto/cavium/nitrox/nitrox_aead.c
··· 199 199 err = -EINVAL; 200 200 } 201 201 202 - areq->base.complete(&areq->base, err); 202 + aead_request_complete(areq, err); 203 203 } 204 204 205 205 static inline bool nitrox_aes_gcm_assoclen_supported(unsigned int assoclen) ··· 434 434 err = -EINVAL; 435 435 } 436 436 437 - areq->base.complete(&areq->base, err); 437 + aead_request_complete(areq, err); 438 438 } 439 439 440 440 static int nitrox_rfc4106_enc(struct aead_request *areq)
+3 -5
drivers/crypto/cavium/nitrox/nitrox_skcipher.c
··· 337 337 static int nitrox_aes_xts_setkey(struct crypto_skcipher *cipher, 338 338 const u8 *key, unsigned int keylen) 339 339 { 340 - struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); 341 - struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm); 340 + struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(cipher); 342 341 struct flexi_crypto_context *fctx; 343 342 int aes_keylen, ret; 344 343 345 - ret = xts_check_key(tfm, key, keylen); 344 + ret = xts_verify_key(cipher, key, keylen); 346 345 if (ret) 347 346 return ret; 348 347 ··· 361 362 static int nitrox_aes_ctr_rfc3686_setkey(struct crypto_skcipher *cipher, 362 363 const u8 *key, unsigned int keylen) 363 364 { 364 - struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); 365 - struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm); 365 + struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(cipher); 366 366 struct flexi_crypto_context *fctx; 367 367 int aes_keylen; 368 368
+6 -6
drivers/crypto/ccp/ccp-crypto-main.c
··· 146 146 /* Only propagate the -EINPROGRESS if necessary */ 147 147 if (crypto_cmd->ret == -EBUSY) { 148 148 crypto_cmd->ret = -EINPROGRESS; 149 - req->complete(req, -EINPROGRESS); 149 + crypto_request_complete(req, -EINPROGRESS); 150 150 } 151 151 152 152 return; ··· 159 159 held = ccp_crypto_cmd_complete(crypto_cmd, &backlog); 160 160 if (backlog) { 161 161 backlog->ret = -EINPROGRESS; 162 - backlog->req->complete(backlog->req, -EINPROGRESS); 162 + crypto_request_complete(backlog->req, -EINPROGRESS); 163 163 } 164 164 165 165 /* Transition the state from -EBUSY to -EINPROGRESS first */ 166 166 if (crypto_cmd->ret == -EBUSY) 167 - req->complete(req, -EINPROGRESS); 167 + crypto_request_complete(req, -EINPROGRESS); 168 168 169 169 /* Completion callbacks */ 170 170 ret = err; 171 171 if (ctx->complete) 172 172 ret = ctx->complete(req, ret); 173 - req->complete(req, ret); 173 + crypto_request_complete(req, ret); 174 174 175 175 /* Submit the next cmd */ 176 176 while (held) { ··· 186 186 ctx = crypto_tfm_ctx_dma(held->req->tfm); 187 187 if (ctx->complete) 188 188 ret = ctx->complete(held->req, ret); 189 - held->req->complete(held->req, ret); 189 + crypto_request_complete(held->req, ret); 190 190 191 191 next = ccp_crypto_cmd_complete(held, &backlog); 192 192 if (backlog) { 193 193 backlog->ret = -EINPROGRESS; 194 - backlog->req->complete(backlog->req, -EINPROGRESS); 194 + crypto_request_complete(backlog->req, -EINPROGRESS); 195 195 } 196 196 197 197 kfree(held);
+17 -4
drivers/crypto/ccp/ccp-dmaengine.c
··· 642 642 chan = ccp->ccp_dma_chan + i; 643 643 dma_chan = &chan->dma_chan; 644 644 645 - if (dma_chan->client_count) 646 - dma_release_channel(dma_chan); 647 - 648 645 tasklet_kill(&chan->cleanup_tasklet); 649 646 list_del_rcu(&dma_chan->device_node); 647 + } 648 + } 649 + 650 + static void ccp_dma_release_channels(struct ccp_device *ccp) 651 + { 652 + struct ccp_dma_chan *chan; 653 + struct dma_chan *dma_chan; 654 + unsigned int i; 655 + 656 + for (i = 0; i < ccp->cmd_q_count; i++) { 657 + chan = ccp->ccp_dma_chan + i; 658 + dma_chan = &chan->dma_chan; 659 + 660 + if (dma_chan->client_count) 661 + dma_release_channel(dma_chan); 650 662 } 651 663 } 652 664 ··· 782 770 if (!dmaengine) 783 771 return; 784 772 785 - ccp_dma_release(ccp); 773 + ccp_dma_release_channels(ccp); 786 774 dma_async_device_unregister(dma_dev); 775 + ccp_dma_release(ccp); 787 776 788 777 kmem_cache_destroy(ccp->dma_desc_cache); 789 778 kmem_cache_destroy(ccp->dma_cmd_cache);
+14 -2
drivers/crypto/ccp/sev-dev.c
··· 26 26 #include <linux/fs_struct.h> 27 27 28 28 #include <asm/smp.h> 29 + #include <asm/cacheflush.h> 29 30 30 31 #include "psp-dev.h" 31 32 #include "sev-dev.h" ··· 57 56 MODULE_FIRMWARE("amd/amd_sev_fam17h_model0xh.sbin"); /* 1st gen EPYC */ 58 57 MODULE_FIRMWARE("amd/amd_sev_fam17h_model3xh.sbin"); /* 2nd gen EPYC */ 59 58 MODULE_FIRMWARE("amd/amd_sev_fam19h_model0xh.sbin"); /* 3rd gen EPYC */ 59 + MODULE_FIRMWARE("amd/amd_sev_fam19h_model1xh.sbin"); /* 4th gen EPYC */ 60 60 61 61 static bool psp_dead; 62 62 static int psp_timeout; ··· 883 881 input_address = (void __user *)input.address; 884 882 885 883 if (input.address && input.length) { 886 - id_blob = kzalloc(input.length, GFP_KERNEL); 884 + /* 885 + * The length of the ID shouldn't be assumed by software since 886 + * it may change in the future. The allocation size is limited 887 + * to 1 << (PAGE_SHIFT + MAX_ORDER - 1) by the page allocator. 888 + * If the allocation fails, simply return ENOMEM rather than 889 + * warning in the kernel log. 890 + */ 891 + id_blob = kzalloc(input.length, GFP_KERNEL | __GFP_NOWARN); 887 892 if (!id_blob) 888 893 return -ENOMEM; 889 894 ··· 1336 1327 1337 1328 /* Obtain the TMR memory area for SEV-ES use */ 1338 1329 sev_es_tmr = sev_fw_alloc(SEV_ES_TMR_SIZE); 1339 - if (!sev_es_tmr) 1330 + if (sev_es_tmr) 1331 + /* Must flush the cache before giving it to the firmware */ 1332 + clflush_cache_range(sev_es_tmr, SEV_ES_TMR_SIZE); 1333 + else 1340 1334 dev_warn(sev->dev, 1341 1335 "SEV: TMR allocation failed, SEV-ES support unavailable\n"); 1342 1336
+23 -23
drivers/crypto/ccp/sp-pci.c
··· 342 342 343 343 #ifdef CONFIG_CRYPTO_DEV_SP_PSP 344 344 static const struct sev_vdata sevv1 = { 345 - .cmdresp_reg = 0x10580, 346 - .cmdbuff_addr_lo_reg = 0x105e0, 347 - .cmdbuff_addr_hi_reg = 0x105e4, 345 + .cmdresp_reg = 0x10580, /* C2PMSG_32 */ 346 + .cmdbuff_addr_lo_reg = 0x105e0, /* C2PMSG_56 */ 347 + .cmdbuff_addr_hi_reg = 0x105e4, /* C2PMSG_57 */ 348 348 }; 349 349 350 350 static const struct sev_vdata sevv2 = { 351 - .cmdresp_reg = 0x10980, 352 - .cmdbuff_addr_lo_reg = 0x109e0, 353 - .cmdbuff_addr_hi_reg = 0x109e4, 351 + .cmdresp_reg = 0x10980, /* C2PMSG_32 */ 352 + .cmdbuff_addr_lo_reg = 0x109e0, /* C2PMSG_56 */ 353 + .cmdbuff_addr_hi_reg = 0x109e4, /* C2PMSG_57 */ 354 354 }; 355 355 356 356 static const struct tee_vdata teev1 = { 357 - .cmdresp_reg = 0x10544, 358 - .cmdbuff_addr_lo_reg = 0x10548, 359 - .cmdbuff_addr_hi_reg = 0x1054c, 360 - .ring_wptr_reg = 0x10550, 361 - .ring_rptr_reg = 0x10554, 357 + .cmdresp_reg = 0x10544, /* C2PMSG_17 */ 358 + .cmdbuff_addr_lo_reg = 0x10548, /* C2PMSG_18 */ 359 + .cmdbuff_addr_hi_reg = 0x1054c, /* C2PMSG_19 */ 360 + .ring_wptr_reg = 0x10550, /* C2PMSG_20 */ 361 + .ring_rptr_reg = 0x10554, /* C2PMSG_21 */ 362 362 }; 363 363 364 364 static const struct psp_vdata pspv1 = { 365 365 .sev = &sevv1, 366 - .feature_reg = 0x105fc, 367 - .inten_reg = 0x10610, 368 - .intsts_reg = 0x10614, 366 + .feature_reg = 0x105fc, /* C2PMSG_63 */ 367 + .inten_reg = 0x10610, /* P2CMSG_INTEN */ 368 + .intsts_reg = 0x10614, /* P2CMSG_INTSTS */ 369 369 }; 370 370 371 371 static const struct psp_vdata pspv2 = { 372 372 .sev = &sevv2, 373 - .feature_reg = 0x109fc, 374 - .inten_reg = 0x10690, 375 - .intsts_reg = 0x10694, 373 + .feature_reg = 0x109fc, /* C2PMSG_63 */ 374 + .inten_reg = 0x10690, /* P2CMSG_INTEN */ 375 + .intsts_reg = 0x10694, /* P2CMSG_INTSTS */ 376 376 }; 377 377 378 378 static const struct psp_vdata pspv3 = { 379 379 .tee = &teev1, 380 - .feature_reg = 0x109fc, 381 - .inten_reg = 0x10690, 382 - .intsts_reg = 0x10694, 380 + .feature_reg = 0x109fc, /* C2PMSG_63 */ 381 + .inten_reg = 0x10690, /* P2CMSG_INTEN */ 382 + .intsts_reg = 0x10694, /* P2CMSG_INTSTS */ 383 383 }; 384 384 385 385 static const struct psp_vdata pspv4 = { 386 386 .sev = &sevv2, 387 387 .tee = &teev1, 388 - .feature_reg = 0x109fc, 389 - .inten_reg = 0x10690, 390 - .intsts_reg = 0x10694, 388 + .feature_reg = 0x109fc, /* C2PMSG_63 */ 389 + .inten_reg = 0x10690, /* P2CMSG_INTEN */ 390 + .intsts_reg = 0x10694, /* P2CMSG_INTSTS */ 391 391 }; 392 392 393 393 #endif
+1 -1
drivers/crypto/ccree/cc_cipher.c
··· 460 460 } 461 461 462 462 if (ctx_p->cipher_mode == DRV_CIPHER_XTS && 463 - xts_check_key(tfm, key, keylen)) { 463 + xts_verify_key(sktfm, key, keylen)) { 464 464 dev_dbg(dev, "weak XTS key"); 465 465 return -EINVAL; 466 466 }
+3 -3
drivers/crypto/chelsio/chcr_algo.c
··· 220 220 reqctx->verify = VERIFY_HW; 221 221 } 222 222 chcr_dec_wrcount(dev); 223 - req->base.complete(&req->base, err); 223 + aead_request_complete(req, err); 224 224 225 225 return err; 226 226 } ··· 1235 1235 complete(&ctx->cbc_aes_aio_done); 1236 1236 } 1237 1237 chcr_dec_wrcount(dev); 1238 - req->base.complete(&req->base, err); 1238 + skcipher_request_complete(req, err); 1239 1239 return err; 1240 1240 } 1241 1241 ··· 2132 2132 2133 2133 out: 2134 2134 chcr_dec_wrcount(dev); 2135 - req->base.complete(&req->base, err); 2135 + ahash_request_complete(req, err); 2136 2136 } 2137 2137 2138 2138 /*
+2 -2
drivers/crypto/hifn_795x.c
··· 1705 1705 hifn_cipher_walk_exit(&rctx->walk); 1706 1706 } 1707 1707 1708 - req->base.complete(&req->base, error); 1708 + skcipher_request_complete(req, error); 1709 1709 } 1710 1710 1711 1711 static void hifn_clear_rings(struct hifn_device *dev, int error) ··· 2054 2054 break; 2055 2055 2056 2056 if (backlog) 2057 - backlog->complete(backlog, -EINPROGRESS); 2057 + crypto_request_complete(backlog, -EINPROGRESS); 2058 2058 2059 2059 req = skcipher_request_cast(async_req); 2060 2060
+4 -4
drivers/crypto/hisilicon/Kconfig
··· 27 27 select CRYPTO_SHA256 28 28 select CRYPTO_SHA512 29 29 select CRYPTO_SM4_GENERIC 30 - depends on PCI && PCI_MSI 30 + depends on PCI_MSI 31 31 depends on UACCE || UACCE=n 32 32 depends on ARM64 || (COMPILE_TEST && 64BIT) 33 33 depends on ACPI ··· 42 42 config CRYPTO_DEV_HISI_QM 43 43 tristate 44 44 depends on ARM64 || COMPILE_TEST 45 - depends on PCI && PCI_MSI 45 + depends on PCI_MSI 46 46 depends on UACCE || UACCE=n 47 47 depends on ACPI 48 48 help ··· 51 51 52 52 config CRYPTO_DEV_HISI_ZIP 53 53 tristate "Support for HiSilicon ZIP accelerator" 54 - depends on PCI && PCI_MSI 54 + depends on PCI_MSI 55 55 depends on ARM64 || (COMPILE_TEST && 64BIT) 56 56 depends on !CPU_BIG_ENDIAN || COMPILE_TEST 57 57 depends on UACCE || UACCE=n ··· 62 62 63 63 config CRYPTO_DEV_HISI_HPRE 64 64 tristate "Support for HISI HPRE accelerator" 65 - depends on PCI && PCI_MSI 65 + depends on PCI_MSI 66 66 depends on UACCE || UACCE=n 67 67 depends on ARM64 || (COMPILE_TEST && 64BIT) 68 68 depends on ACPI
+19 -35
drivers/crypto/hisilicon/qm.c
··· 95 95 #define QM_VFT_CFG_RDY 0x10006c 96 96 #define QM_VFT_CFG_OP_WR 0x100058 97 97 #define QM_VFT_CFG_TYPE 0x10005c 98 - #define QM_SQC_VFT 0x0 99 - #define QM_CQC_VFT 0x1 100 98 #define QM_VFT_CFG 0x100060 101 99 #define QM_VFT_CFG_OP_ENABLE 0x100054 102 100 #define QM_PM_CTRL 0x100148 ··· 116 118 #define QM_SQC_VFT_BASE_SHIFT_V2 28 117 119 #define QM_SQC_VFT_BASE_MASK_V2 GENMASK(15, 0) 118 120 #define QM_SQC_VFT_NUM_SHIFT_V2 45 119 - #define QM_SQC_VFT_NUM_MASK_v2 GENMASK(9, 0) 121 + #define QM_SQC_VFT_NUM_MASK_V2 GENMASK(9, 0) 120 122 121 123 #define QM_ABNORMAL_INT_SOURCE 0x100000 122 124 #define QM_ABNORMAL_INT_MASK 0x100004 ··· 162 164 163 165 /* interfunction communication */ 164 166 #define QM_IFC_READY_STATUS 0x100128 165 - #define QM_IFC_C_STS_M 0x10012C 166 167 #define QM_IFC_INT_SET_P 0x100130 167 168 #define QM_IFC_INT_CFG 0x100134 168 169 #define QM_IFC_INT_SOURCE_P 0x100138 ··· 195 198 196 199 #define PCI_BAR_2 2 197 200 #define PCI_BAR_4 4 198 - #define QM_SQE_DATA_ALIGN_MASK GENMASK(6, 0) 199 201 #define QMC_ALIGN(sz) ALIGN(sz, 32) 200 202 201 203 #define QM_DBG_READ_LEN 256 ··· 208 212 #define QM_DRIVER_REMOVING 0 209 213 #define QM_RST_SCHED 1 210 214 #define QM_QOS_PARAM_NUM 2 211 - #define QM_QOS_VAL_NUM 1 212 - #define QM_QOS_BDF_PARAM_NUM 4 213 215 #define QM_QOS_MAX_VAL 1000 214 216 #define QM_QOS_RATE 100 215 217 #define QM_QOS_EXPAND_RATE 1000 ··· 219 225 #define QM_SHAPER_FACTOR_CBS_B_SHIFT 15 220 226 #define QM_SHAPER_FACTOR_CBS_S_SHIFT 19 221 227 #define QM_SHAPER_CBS_B 1 222 - #define QM_SHAPER_CBS_S 16 223 228 #define QM_SHAPER_VFT_OFFSET 6 224 - #define WAIT_FOR_QOS_VF 100 225 229 #define QM_QOS_MIN_ERROR_RATE 5 226 - #define QM_QOS_TYPICAL_NUM 8 227 230 #define QM_SHAPER_MIN_CBS_S 8 228 231 #define QM_QOS_TICK 0x300U 229 232 #define QM_QOS_DIVISOR_CLK 0x1f40U 230 233 #define QM_QOS_MAX_CIR_B 200 231 234 #define QM_QOS_MIN_CIR_B 100 232 235 #define QM_QOS_MAX_CIR_U 6 233 - #define QM_QOS_MAX_CIR_S 11 234 236 #define QM_AUTOSUSPEND_DELAY 3000 235 237 236 238 #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \ 237 - (((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \ 238 - ((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \ 239 - ((buf_sz) << QM_CQ_BUF_SIZE_SHIFT) | \ 239 + (((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \ 240 + ((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \ 241 + ((buf_sz) << QM_CQ_BUF_SIZE_SHIFT) | \ 240 242 ((cqe_sz) << QM_CQ_CQE_SIZE_SHIFT)) 241 243 242 244 #define QM_MK_CQC_DW3_V2(cqe_sz, cq_depth) \ 243 245 ((((u32)cq_depth) - 1) | ((cqe_sz) << QM_CQ_CQE_SIZE_SHIFT)) 244 246 245 247 #define QM_MK_SQC_W13(priority, orders, alg_type) \ 246 - (((priority) << QM_SQ_PRIORITY_SHIFT) | \ 247 - ((orders) << QM_SQ_ORDERS_SHIFT) | \ 248 + (((priority) << QM_SQ_PRIORITY_SHIFT) | \ 249 + ((orders) << QM_SQ_ORDERS_SHIFT) | \ 248 250 (((alg_type) & QM_SQ_TYPE_MASK) << QM_SQ_TYPE_SHIFT)) 249 251 250 252 #define QM_MK_SQC_DW3_V1(hop_num, pg_sz, buf_sz, sqe_sz) \ 251 - (((hop_num) << QM_SQ_HOP_NUM_SHIFT) | \ 252 - ((pg_sz) << QM_SQ_PAGE_SIZE_SHIFT) | \ 253 - ((buf_sz) << QM_SQ_BUF_SIZE_SHIFT) | \ 253 + (((hop_num) << QM_SQ_HOP_NUM_SHIFT) | \ 254 + ((pg_sz) << QM_SQ_PAGE_SIZE_SHIFT) | \ 255 + ((buf_sz) << QM_SQ_BUF_SIZE_SHIFT) | \ 254 256 ((u32)ilog2(sqe_sz) << QM_SQ_SQE_SIZE_SHIFT)) 255 257 256 258 #define QM_MK_SQC_DW3_V2(sqe_sz, sq_depth) \ ··· 696 706 697 707 doorbell = qn | ((u64)cmd << QM_DB_CMD_SHIFT_V2) | 698 708 ((u64)randata << QM_DB_RAND_SHIFT_V2) | 699 - ((u64)index << QM_DB_INDEX_SHIFT_V2) | 709 + ((u64)index << QM_DB_INDEX_SHIFT_V2) | 700 710 ((u64)priority << QM_DB_PRIORITY_SHIFT_V2); 701 711 702 712 writeq(doorbell, io_base); ··· 895 905 } 896 906 } 897 907 898 - static bool do_qm_irq(struct hisi_qm *qm) 908 + static bool do_qm_eq_irq(struct hisi_qm *qm) 899 909 { 900 910 struct qm_eqe *eqe = qm->eqe + qm->status.eq_head; 901 911 struct hisi_qm_poll_data *poll_data; ··· 915 925 return false; 916 926 } 917 927 918 - static irqreturn_t qm_irq(int irq, void *data) 928 + static irqreturn_t qm_eq_irq(int irq, void *data) 919 929 { 920 930 struct hisi_qm *qm = data; 921 931 bool ret; 922 932 923 - ret = do_qm_irq(qm); 933 + ret = do_qm_eq_irq(qm); 924 934 if (ret) 925 935 return IRQ_HANDLED; 926 936 ··· 1294 1304 sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) | 1295 1305 ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32); 1296 1306 *base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2); 1297 - *number = (QM_SQC_VFT_NUM_MASK_v2 & 1307 + *number = (QM_SQC_VFT_NUM_MASK_V2 & 1298 1308 (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1; 1299 1309 1300 1310 return 0; ··· 1882 1892 * @qm: The qm we create a qp from. 1883 1893 * @alg_type: Accelerator specific algorithm type in sqc. 1884 1894 * 1885 - * return created qp, -EBUSY if all qps in qm allocated, -ENOMEM if allocating 1886 - * qp memory fails. 1895 + * Return created qp, negative error code if failed. 1887 1896 */ 1888 1897 static struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) 1889 1898 { ··· 2051 2062 * @arg: Accelerator specific argument. 2052 2063 * 2053 2064 * After this function, qp can receive request from user. Return 0 if 2054 - * successful, Return -EBUSY if failed. 2065 + * successful, negative error code if failed. 2055 2066 */ 2056 2067 int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg) 2057 2068 { ··· 3063 3074 return 0; 3064 3075 } 3065 3076 3066 - 3067 3077 /** 3068 3078 * qm_clear_queues() - Clear all queues memory in a qm. 3069 3079 * @qm: The qm in which the queues will be cleared. ··· 3359 3371 act_q_num = q_num; 3360 3372 } 3361 3373 3362 - act_q_num = min_t(int, act_q_num, max_qp_num); 3374 + act_q_num = min(act_q_num, max_qp_num); 3363 3375 ret = hisi_qm_set_vft(qm, i, q_base, act_q_num); 3364 3376 if (ret) { 3365 3377 for (j = num_vfs; j > i; j--) ··· 3546 3558 qos_val = ir / QM_QOS_RATE; 3547 3559 ret = scnprintf(tbuf, QM_DBG_READ_LEN, "%u\n", qos_val); 3548 3560 3549 - ret = simple_read_from_buffer(buf, count, pos, tbuf, ret); 3561 + ret = simple_read_from_buffer(buf, count, pos, tbuf, ret); 3550 3562 3551 3563 err_get_status: 3552 3564 clear_bit(QM_RESETTING, &qm->misc_ctl); ··· 4037 4049 if (!qm->err_status.is_dev_ecc_mbit && 4038 4050 qm->err_status.is_qm_ecc_mbit && 4039 4051 qm->err_ini->close_axi_master_ooo) { 4040 - 4041 4052 qm->err_ini->close_axi_master_ooo(qm); 4042 - 4043 4053 } else if (qm->err_status.is_dev_ecc_mbit && 4044 4054 !qm->err_status.is_qm_ecc_mbit && 4045 4055 !qm->err_ini->close_axi_master_ooo) { 4046 - 4047 4056 nfe_enb = readl(qm->io_base + QM_RAS_NFE_ENABLE); 4048 4057 writel(nfe_enb & QM_RAS_NFE_MBIT_DISABLE, 4049 4058 qm->io_base + QM_RAS_NFE_ENABLE); ··· 4484 4499 return IRQ_HANDLED; 4485 4500 } 4486 4501 4487 - 4488 4502 /** 4489 4503 * hisi_qm_dev_shutdown() - Shutdown device. 4490 4504 * @pdev: The device will be shutdown. ··· 4887 4903 return 0; 4888 4904 4889 4905 irq_vector = val & QM_IRQ_VECTOR_MASK; 4890 - ret = request_irq(pci_irq_vector(pdev, irq_vector), qm_irq, 0, qm->dev_name, qm); 4906 + ret = request_irq(pci_irq_vector(pdev, irq_vector), qm_eq_irq, 0, qm->dev_name, qm); 4891 4907 if (ret) 4892 4908 dev_err(&pdev->dev, "failed to request eq irq, ret = %d", ret); 4893 4909
+3 -3
drivers/crypto/hisilicon/sec/sec_algs.c
··· 504 504 kfifo_avail(&ctx->queue->softqueue) > 505 505 backlog_req->num_elements)) { 506 506 sec_send_request(backlog_req, ctx->queue); 507 - backlog_req->req_base->complete(backlog_req->req_base, 508 - -EINPROGRESS); 507 + crypto_request_complete(backlog_req->req_base, 508 + -EINPROGRESS); 509 509 list_del(&backlog_req->backlog_head); 510 510 } 511 511 } ··· 534 534 if (skreq->src != skreq->dst) 535 535 dma_unmap_sg(dev, skreq->dst, sec_req->len_out, 536 536 DMA_BIDIRECTIONAL); 537 - skreq->base.complete(&skreq->base, sec_req->err); 537 + skcipher_request_complete(skreq, sec_req->err); 538 538 } 539 539 } 540 540
+4 -6
drivers/crypto/hisilicon/sec2/sec_crypto.c
··· 1459 1459 break; 1460 1460 1461 1461 backlog_sk_req = backlog_req->c_req.sk_req; 1462 - backlog_sk_req->base.complete(&backlog_sk_req->base, 1463 - -EINPROGRESS); 1462 + skcipher_request_complete(backlog_sk_req, -EINPROGRESS); 1464 1463 atomic64_inc(&ctx->sec->debug.dfx.recv_busy_cnt); 1465 1464 } 1466 1465 1467 - sk_req->base.complete(&sk_req->base, err); 1466 + skcipher_request_complete(sk_req, err); 1468 1467 } 1469 1468 1470 1469 static void set_aead_auth_iv(struct sec_ctx *ctx, struct sec_req *req) ··· 1735 1736 break; 1736 1737 1737 1738 backlog_aead_req = backlog_req->aead_req.aead_req; 1738 - backlog_aead_req->base.complete(&backlog_aead_req->base, 1739 - -EINPROGRESS); 1739 + aead_request_complete(backlog_aead_req, -EINPROGRESS); 1740 1740 atomic64_inc(&c->sec->debug.dfx.recv_busy_cnt); 1741 1741 } 1742 1742 1743 - a_req->base.complete(&a_req->base, err); 1743 + aead_request_complete(a_req, err); 1744 1744 } 1745 1745 1746 1746 static void sec_request_uninit(struct sec_ctx *ctx, struct sec_req *req)
-1
drivers/crypto/hisilicon/sgl.c
··· 249 249 dev_err(dev, "Get SGL error!\n"); 250 250 dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL); 251 251 return ERR_PTR(-ENOMEM); 252 - 253 252 } 254 253 curr_hw_sgl->entry_length_in_sgl = cpu_to_le16(pool->sge_nr); 255 254 curr_hw_sge = curr_hw_sgl->sge_entries;
+6 -6
drivers/crypto/img-hash.c
··· 157 157 writel_relaxed(value, hdev->io_base + offset); 158 158 } 159 159 160 - static inline u32 img_hash_read_result_queue(struct img_hash_dev *hdev) 160 + static inline __be32 img_hash_read_result_queue(struct img_hash_dev *hdev) 161 161 { 162 - return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE)); 162 + return cpu_to_be32(img_hash_read(hdev, CR_RESULT_QUEUE)); 163 163 } 164 164 165 165 static void img_hash_start(struct img_hash_dev *hdev, bool dma) ··· 283 283 static void img_hash_copy_hash(struct ahash_request *req) 284 284 { 285 285 struct img_hash_request_ctx *ctx = ahash_request_ctx(req); 286 - u32 *hash = (u32 *)ctx->digest; 286 + __be32 *hash = (__be32 *)ctx->digest; 287 287 int i; 288 288 289 - for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--) 289 + for (i = (ctx->digsize / sizeof(*hash)) - 1; i >= 0; i--) 290 290 hash[i] = img_hash_read_result_queue(ctx->hdev); 291 291 } 292 292 ··· 308 308 DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY | DRIVER_FLAGS_FINAL); 309 309 310 310 if (req->base.complete) 311 - req->base.complete(&req->base, err); 311 + ahash_request_complete(req, err); 312 312 } 313 313 314 314 static int img_hash_write_via_dma(struct img_hash_dev *hdev) ··· 526 526 return res; 527 527 528 528 if (backlog) 529 - backlog->complete(backlog, -EINPROGRESS); 529 + crypto_request_complete(backlog, -EINPROGRESS); 530 530 531 531 req = ahash_request_cast(async_req); 532 532 hdev->req = req;
+2 -13
drivers/crypto/inside-secure/safexcel.c
··· 850 850 goto request_failed; 851 851 852 852 if (backlog) 853 - backlog->complete(backlog, -EINPROGRESS); 853 + crypto_request_complete(backlog, -EINPROGRESS); 854 854 855 855 /* In case the send() helper did not issue any command to push 856 856 * to the engine because the input data was cached, continue to ··· 970 970 } while (!cdesc->last_seg); 971 971 } 972 972 973 - void safexcel_inv_complete(struct crypto_async_request *req, int error) 974 - { 975 - struct safexcel_inv_result *result = req->data; 976 - 977 - if (error == -EINPROGRESS) 978 - return; 979 - 980 - result->error = error; 981 - complete(&result->completion); 982 - } 983 - 984 973 int safexcel_invalidate_cache(struct crypto_async_request *async, 985 974 struct safexcel_crypto_priv *priv, 986 975 dma_addr_t ctxr_dma, int ring) ··· 1039 1050 1040 1051 if (should_complete) { 1041 1052 local_bh_disable(); 1042 - req->complete(req, ret); 1053 + crypto_request_complete(req, ret); 1043 1054 local_bh_enable(); 1044 1055 } 1045 1056
-6
drivers/crypto/inside-secure/safexcel.h
··· 884 884 } alg; 885 885 }; 886 886 887 - struct safexcel_inv_result { 888 - struct completion completion; 889 - int error; 890 - }; 891 - 892 887 void safexcel_dequeue(struct safexcel_crypto_priv *priv, int ring); 893 888 int safexcel_rdesc_check_errors(struct safexcel_crypto_priv *priv, 894 889 void *rdp); ··· 922 927 struct crypto_async_request *req); 923 928 inline struct crypto_async_request * 924 929 safexcel_rdr_req_get(struct safexcel_crypto_priv *priv, int ring); 925 - void safexcel_inv_complete(struct crypto_async_request *req, int error); 926 930 int safexcel_hmac_setkey(struct safexcel_context *base, const u8 *key, 927 931 unsigned int keylen, const char *alg, 928 932 unsigned int state_sz);
+10 -11
drivers/crypto/inside-secure/safexcel_cipher.c
··· 1091 1091 static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm, 1092 1092 struct crypto_async_request *base, 1093 1093 struct safexcel_cipher_req *sreq, 1094 - struct safexcel_inv_result *result) 1094 + struct crypto_wait *result) 1095 1095 { 1096 1096 struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm); 1097 1097 struct safexcel_crypto_priv *priv = ctx->base.priv; 1098 1098 int ring = ctx->base.ring; 1099 - 1100 - init_completion(&result->completion); 1099 + int err; 1101 1100 1102 1101 ctx = crypto_tfm_ctx(base->tfm); 1103 1102 ctx->base.exit_inv = true; ··· 1109 1110 queue_work(priv->ring[ring].workqueue, 1110 1111 &priv->ring[ring].work_data.work); 1111 1112 1112 - wait_for_completion(&result->completion); 1113 + err = crypto_wait_req(-EINPROGRESS, result); 1113 1114 1114 - if (result->error) { 1115 + if (err) { 1115 1116 dev_warn(priv->dev, 1116 1117 "cipher: sync: invalidate: completion error %d\n", 1117 - result->error); 1118 - return result->error; 1118 + err); 1119 + return err; 1119 1120 } 1120 1121 1121 1122 return 0; ··· 1125 1126 { 1126 1127 EIP197_REQUEST_ON_STACK(req, skcipher, EIP197_SKCIPHER_REQ_SIZE); 1127 1128 struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); 1128 - struct safexcel_inv_result result = {}; 1129 + DECLARE_CRYPTO_WAIT(result); 1129 1130 1130 1131 memset(req, 0, sizeof(struct skcipher_request)); 1131 1132 1132 1133 skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 1133 - safexcel_inv_complete, &result); 1134 + crypto_req_done, &result); 1134 1135 skcipher_request_set_tfm(req, __crypto_skcipher_cast(tfm)); 1135 1136 1136 1137 return safexcel_cipher_exit_inv(tfm, &req->base, sreq, &result); ··· 1140 1141 { 1141 1142 EIP197_REQUEST_ON_STACK(req, aead, EIP197_AEAD_REQ_SIZE); 1142 1143 struct safexcel_cipher_req *sreq = aead_request_ctx(req); 1143 - struct safexcel_inv_result result = {}; 1144 + DECLARE_CRYPTO_WAIT(result); 1144 1145 1145 1146 memset(req, 0, sizeof(struct aead_request)); 1146 1147 1147 1148 aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 1148 - safexcel_inv_complete, &result); 1149 + crypto_req_done, &result); 1149 1150 aead_request_set_tfm(req, __crypto_aead_cast(tfm)); 1150 1151 1151 1152 return safexcel_cipher_exit_inv(tfm, &req->base, sreq, &result);
+14 -40
drivers/crypto/inside-secure/safexcel_hash.c
··· 625 625 struct safexcel_crypto_priv *priv = ctx->base.priv; 626 626 EIP197_REQUEST_ON_STACK(req, ahash, EIP197_AHASH_REQ_SIZE); 627 627 struct safexcel_ahash_req *rctx = ahash_request_ctx_dma(req); 628 - struct safexcel_inv_result result = {}; 628 + DECLARE_CRYPTO_WAIT(result); 629 629 int ring = ctx->base.ring; 630 + int err; 630 631 631 632 memset(req, 0, EIP197_AHASH_REQ_SIZE); 632 633 633 634 /* create invalidation request */ 634 635 init_completion(&result.completion); 635 636 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 636 - safexcel_inv_complete, &result); 637 + crypto_req_done, &result); 637 638 638 639 ahash_request_set_tfm(req, __crypto_ahash_cast(tfm)); 639 640 ctx = crypto_tfm_ctx(req->base.tfm); ··· 648 647 queue_work(priv->ring[ring].workqueue, 649 648 &priv->ring[ring].work_data.work); 650 649 651 - wait_for_completion(&result.completion); 650 + err = crypto_wait_req(-EINPROGRESS, &result); 652 651 653 - if (result.error) { 654 - dev_warn(priv->dev, "hash: completion error (%d)\n", 655 - result.error); 656 - return result.error; 652 + if (err) { 653 + dev_warn(priv->dev, "hash: completion error (%d)\n", err); 654 + return err; 657 655 } 658 656 659 657 return 0; ··· 1042 1042 return safexcel_ahash_finup(areq); 1043 1043 } 1044 1044 1045 - struct safexcel_ahash_result { 1046 - struct completion completion; 1047 - int error; 1048 - }; 1049 - 1050 - static void safexcel_ahash_complete(struct crypto_async_request *req, int error) 1051 - { 1052 - struct safexcel_ahash_result *result = req->data; 1053 - 1054 - if (error == -EINPROGRESS) 1055 - return; 1056 - 1057 - result->error = error; 1058 - complete(&result->completion); 1059 - } 1060 - 1061 1045 static int safexcel_hmac_init_pad(struct ahash_request *areq, 1062 1046 unsigned int blocksize, const u8 *key, 1063 1047 unsigned int keylen, u8 *ipad, u8 *opad) 1064 1048 { 1065 - struct safexcel_ahash_result result; 1049 + DECLARE_CRYPTO_WAIT(result); 1066 1050 struct scatterlist sg; 1067 1051 int ret, i; 1068 1052 u8 *keydup; ··· 1059 1075 return -ENOMEM; 1060 1076 1061 1077 ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_BACKLOG, 1062 - safexcel_ahash_complete, &result); 1078 + crypto_req_done, &result); 1063 1079 sg_init_one(&sg, keydup, keylen); 1064 1080 ahash_request_set_crypt(areq, &sg, ipad, keylen); 1065 - init_completion(&result.completion); 1066 1081 1067 1082 ret = crypto_ahash_digest(areq); 1068 - if (ret == -EINPROGRESS || ret == -EBUSY) { 1069 - wait_for_completion_interruptible(&result.completion); 1070 - ret = result.error; 1071 - } 1083 + ret = crypto_wait_req(ret, &result); 1072 1084 1073 1085 /* Avoid leaking */ 1074 1086 kfree_sensitive(keydup); ··· 1089 1109 static int safexcel_hmac_init_iv(struct ahash_request *areq, 1090 1110 unsigned int blocksize, u8 *pad, void *state) 1091 1111 { 1092 - struct safexcel_ahash_result result; 1093 1112 struct safexcel_ahash_req *req; 1113 + DECLARE_CRYPTO_WAIT(result); 1094 1114 struct scatterlist sg; 1095 1115 int ret; 1096 1116 1097 1117 ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_BACKLOG, 1098 - safexcel_ahash_complete, &result); 1118 + crypto_req_done, &result); 1099 1119 sg_init_one(&sg, pad, blocksize); 1100 1120 ahash_request_set_crypt(areq, &sg, pad, blocksize); 1101 - init_completion(&result.completion); 1102 1121 1103 1122 ret = crypto_ahash_init(areq); 1104 1123 if (ret) ··· 1108 1129 req->last_req = true; 1109 1130 1110 1131 ret = crypto_ahash_update(areq); 1111 - if (ret && ret != -EINPROGRESS && ret != -EBUSY) 1112 - return ret; 1132 + ret = crypto_wait_req(ret, &result); 1113 1133 1114 - wait_for_completion_interruptible(&result.completion); 1115 - if (result.error) 1116 - return result.error; 1117 - 1118 - return crypto_ahash_export(areq, state); 1134 + return ret ?: crypto_ahash_export(areq, state); 1119 1135 } 1120 1136 1121 1137 static int __safexcel_hmac_setkey(const char *alg, const u8 *key,
+2 -2
drivers/crypto/ixp4xx_crypto.c
··· 382 382 if (req_ctx->hmac_virt) 383 383 finish_scattered_hmac(crypt); 384 384 385 - req->base.complete(&req->base, failed); 385 + aead_request_complete(req, failed); 386 386 break; 387 387 } 388 388 case CTL_FLAG_PERFORM_ABLK: { ··· 407 407 free_buf_chain(dev, req_ctx->dst, crypt->dst_buf); 408 408 409 409 free_buf_chain(dev, req_ctx->src, crypt->src_buf); 410 - req->base.complete(&req->base, failed); 410 + skcipher_request_complete(req, failed); 411 411 break; 412 412 } 413 413 case CTL_FLAG_GEN_ICV:
+2 -2
drivers/crypto/marvell/cesa/cesa.c
··· 66 66 return; 67 67 68 68 if (backlog) 69 - backlog->complete(backlog, -EINPROGRESS); 69 + crypto_request_complete(backlog, -EINPROGRESS); 70 70 71 71 ctx = crypto_tfm_ctx(req->tfm); 72 72 ctx->ops->step(req); ··· 106 106 { 107 107 ctx->ops->cleanup(req); 108 108 local_bh_disable(); 109 - req->complete(req, res); 109 + crypto_request_complete(req, res); 110 110 local_bh_enable(); 111 111 } 112 112
+8 -33
drivers/crypto/marvell/cesa/hash.c
··· 1104 1104 } 1105 1105 }; 1106 1106 1107 - struct mv_cesa_ahash_result { 1108 - struct completion completion; 1109 - int error; 1110 - }; 1111 - 1112 - static void mv_cesa_hmac_ahash_complete(struct crypto_async_request *req, 1113 - int error) 1114 - { 1115 - struct mv_cesa_ahash_result *result = req->data; 1116 - 1117 - if (error == -EINPROGRESS) 1118 - return; 1119 - 1120 - result->error = error; 1121 - complete(&result->completion); 1122 - } 1123 - 1124 1107 static int mv_cesa_ahmac_iv_state_init(struct ahash_request *req, u8 *pad, 1125 1108 void *state, unsigned int blocksize) 1126 1109 { 1127 - struct mv_cesa_ahash_result result; 1110 + DECLARE_CRYPTO_WAIT(result); 1128 1111 struct scatterlist sg; 1129 1112 int ret; 1130 1113 1131 1114 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 1132 - mv_cesa_hmac_ahash_complete, &result); 1115 + crypto_req_done, &result); 1133 1116 sg_init_one(&sg, pad, blocksize); 1134 1117 ahash_request_set_crypt(req, &sg, pad, blocksize); 1135 - init_completion(&result.completion); 1136 1118 1137 1119 ret = crypto_ahash_init(req); 1138 1120 if (ret) 1139 1121 return ret; 1140 1122 1141 1123 ret = crypto_ahash_update(req); 1142 - if (ret && ret != -EINPROGRESS) 1143 - return ret; 1124 + ret = crypto_wait_req(ret, &result); 1144 1125 1145 - wait_for_completion_interruptible(&result.completion); 1146 - if (result.error) 1147 - return result.error; 1126 + if (ret) 1127 + return ret; 1148 1128 1149 1129 ret = crypto_ahash_export(req, state); 1150 1130 if (ret) ··· 1138 1158 u8 *ipad, u8 *opad, 1139 1159 unsigned int blocksize) 1140 1160 { 1141 - struct mv_cesa_ahash_result result; 1161 + DECLARE_CRYPTO_WAIT(result); 1142 1162 struct scatterlist sg; 1143 1163 int ret; 1144 1164 int i; ··· 1152 1172 return -ENOMEM; 1153 1173 1154 1174 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 1155 - mv_cesa_hmac_ahash_complete, 1156 - &result); 1175 + crypto_req_done, &result); 1157 1176 sg_init_one(&sg, keydup, keylen); 1158 1177 ahash_request_set_crypt(req, &sg, ipad, keylen); 1159 - init_completion(&result.completion); 1160 1178 1161 1179 ret = crypto_ahash_digest(req); 1162 - if (ret == -EINPROGRESS) { 1163 - wait_for_completion_interruptible(&result.completion); 1164 - ret = result.error; 1165 - } 1180 + ret = crypto_wait_req(ret, &result); 1166 1181 1167 1182 /* Set the memory region to 0 to avoid any leak. */ 1168 1183 kfree_sensitive(keydup);
+1 -1
drivers/crypto/marvell/cesa/tdma.c
··· 168 168 req); 169 169 170 170 if (backlog) 171 - backlog->complete(backlog, -EINPROGRESS); 171 + crypto_request_complete(backlog, -EINPROGRESS); 172 172 } 173 173 174 174 if (res || tdma->cur_dma == tdma_cur)
+3 -3
drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
··· 138 138 139 139 complete: 140 140 if (areq) 141 - areq->complete(areq, status); 141 + crypto_request_complete(areq, status); 142 142 } 143 143 144 144 static void output_iv_copyback(struct crypto_async_request *areq) ··· 188 188 pdev = cpt_info->pdev; 189 189 do_request_cleanup(pdev, cpt_info); 190 190 } 191 - areq->complete(areq, status); 191 + crypto_request_complete(areq, status); 192 192 } 193 193 } 194 194 ··· 398 398 const u8 *key1 = key; 399 399 int ret; 400 400 401 - ret = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen); 401 + ret = xts_verify_key(tfm, key, keylen); 402 402 if (ret) 403 403 return ret; 404 404 ctx->key_len = keylen;
+5 -6
drivers/crypto/marvell/octeontx2/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 - obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += rvu_cptpf.o rvu_cptvf.o 2 + obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += rvu_cptcommon.o rvu_cptpf.o rvu_cptvf.o 3 3 4 + rvu_cptcommon-objs := cn10k_cpt.o otx2_cptlf.o otx2_cpt_mbox_common.o 4 5 rvu_cptpf-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ 5 - otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o \ 6 - cn10k_cpt.o otx2_cpt_devlink.o 7 - rvu_cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \ 8 - otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o \ 9 - otx2_cptvf_algs.o cn10k_cpt.o 6 + otx2_cptpf_ucode.o otx2_cpt_devlink.o 7 + rvu_cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o \ 8 + otx2_cptvf_reqmgr.o otx2_cptvf_algs.o 10 9 11 10 ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
+7 -2
drivers/crypto/marvell/octeontx2/cn10k_cpt.c
··· 7 7 #include "otx2_cptlf.h" 8 8 #include "cn10k_cpt.h" 9 9 10 + static void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num, 11 + struct otx2_cptlf_info *lf); 12 + 10 13 static struct cpt_hw_ops otx2_hw_ops = { 11 14 .send_cmd = otx2_cpt_send_cmd, 12 15 .cpt_get_compcode = otx2_cpt_get_compcode, ··· 22 19 .cpt_get_uc_compcode = cn10k_cpt_get_uc_compcode, 23 20 }; 24 21 25 - void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num, 26 - struct otx2_cptlf_info *lf) 22 + static void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num, 23 + struct otx2_cptlf_info *lf) 27 24 { 28 25 void __iomem *lmtline = lf->lmtline; 29 26 u64 val = (lf->slot & 0x7FF); ··· 71 68 72 69 return 0; 73 70 } 71 + EXPORT_SYMBOL_NS_GPL(cn10k_cptpf_lmtst_init, CRYPTO_DEV_OCTEONTX2_CPT); 74 72 75 73 int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf) 76 74 { ··· 95 91 96 92 return 0; 97 93 } 94 + EXPORT_SYMBOL_NS_GPL(cn10k_cptvf_lmtst_init, CRYPTO_DEV_OCTEONTX2_CPT);
-2
drivers/crypto/marvell/octeontx2/cn10k_cpt.h
··· 28 28 return ((struct cn9k_cpt_res_s *)result)->uc_compcode; 29 29 } 30 30 31 - void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num, 32 - struct otx2_cptlf_info *lf); 33 31 int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf); 34 32 int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf); 35 33
-2
drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
··· 145 145 146 146 int otx2_cpt_send_af_reg_requests(struct otx2_mbox *mbox, 147 147 struct pci_dev *pdev); 148 - int otx2_cpt_add_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, 149 - u64 reg, u64 *val, int blkaddr); 150 148 int otx2_cpt_add_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, 151 149 u64 reg, u64 val, int blkaddr); 152 150 int otx2_cpt_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev,
+12 -2
drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c
··· 19 19 } 20 20 return ret; 21 21 } 22 + EXPORT_SYMBOL_NS_GPL(otx2_cpt_send_mbox_msg, CRYPTO_DEV_OCTEONTX2_CPT); 22 23 23 24 int otx2_cpt_send_ready_msg(struct otx2_mbox *mbox, struct pci_dev *pdev) 24 25 { ··· 37 36 38 37 return otx2_cpt_send_mbox_msg(mbox, pdev); 39 38 } 39 + EXPORT_SYMBOL_NS_GPL(otx2_cpt_send_ready_msg, CRYPTO_DEV_OCTEONTX2_CPT); 40 40 41 41 int otx2_cpt_send_af_reg_requests(struct otx2_mbox *mbox, struct pci_dev *pdev) 42 42 { 43 43 return otx2_cpt_send_mbox_msg(mbox, pdev); 44 44 } 45 + EXPORT_SYMBOL_NS_GPL(otx2_cpt_send_af_reg_requests, CRYPTO_DEV_OCTEONTX2_CPT); 45 46 46 - int otx2_cpt_add_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, 47 - u64 reg, u64 *val, int blkaddr) 47 + static int otx2_cpt_add_read_af_reg(struct otx2_mbox *mbox, 48 + struct pci_dev *pdev, u64 reg, 49 + u64 *val, int blkaddr) 48 50 { 49 51 struct cpt_rd_wr_reg_msg *reg_msg; 50 52 ··· 95 91 96 92 return 0; 97 93 } 94 + EXPORT_SYMBOL_NS_GPL(otx2_cpt_add_write_af_reg, CRYPTO_DEV_OCTEONTX2_CPT); 98 95 99 96 int otx2_cpt_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, 100 97 u64 reg, u64 *val, int blkaddr) ··· 108 103 109 104 return otx2_cpt_send_mbox_msg(mbox, pdev); 110 105 } 106 + EXPORT_SYMBOL_NS_GPL(otx2_cpt_read_af_reg, CRYPTO_DEV_OCTEONTX2_CPT); 111 107 112 108 int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, 113 109 u64 reg, u64 val, int blkaddr) ··· 121 115 122 116 return otx2_cpt_send_mbox_msg(mbox, pdev); 123 117 } 118 + EXPORT_SYMBOL_NS_GPL(otx2_cpt_write_af_reg, CRYPTO_DEV_OCTEONTX2_CPT); 124 119 125 120 int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs) 126 121 { ··· 177 170 178 171 return ret; 179 172 } 173 + EXPORT_SYMBOL_NS_GPL(otx2_cpt_detach_rsrcs_msg, CRYPTO_DEV_OCTEONTX2_CPT); 180 174 181 175 int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs) 182 176 { ··· 210 202 } 211 203 return ret; 212 204 } 205 + EXPORT_SYMBOL_NS_GPL(otx2_cpt_msix_offset_msg, CRYPTO_DEV_OCTEONTX2_CPT); 213 206 214 207 int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox) 215 208 { ··· 225 216 226 217 return otx2_mbox_check_rsp_msgs(mbox, 0); 227 218 } 219 + EXPORT_SYMBOL_NS_GPL(otx2_cpt_sync_mbox_msg, CRYPTO_DEV_OCTEONTX2_CPT);
+11
drivers/crypto/marvell/octeontx2/otx2_cptlf.c
··· 274 274 } 275 275 cptlf_disable_intrs(lfs); 276 276 } 277 + EXPORT_SYMBOL_NS_GPL(otx2_cptlf_unregister_interrupts, 278 + CRYPTO_DEV_OCTEONTX2_CPT); 277 279 278 280 static int cptlf_do_register_interrrupts(struct otx2_cptlfs_info *lfs, 279 281 int lf_num, int irq_offset, ··· 323 321 otx2_cptlf_unregister_interrupts(lfs); 324 322 return ret; 325 323 } 324 + EXPORT_SYMBOL_NS_GPL(otx2_cptlf_register_interrupts, CRYPTO_DEV_OCTEONTX2_CPT); 326 325 327 326 void otx2_cptlf_free_irqs_affinity(struct otx2_cptlfs_info *lfs) 328 327 { ··· 337 334 free_cpumask_var(lfs->lf[slot].affinity_mask); 338 335 } 339 336 } 337 + EXPORT_SYMBOL_NS_GPL(otx2_cptlf_free_irqs_affinity, CRYPTO_DEV_OCTEONTX2_CPT); 340 338 341 339 int otx2_cptlf_set_irqs_affinity(struct otx2_cptlfs_info *lfs) 342 340 { ··· 370 366 otx2_cptlf_free_irqs_affinity(lfs); 371 367 return ret; 372 368 } 369 + EXPORT_SYMBOL_NS_GPL(otx2_cptlf_set_irqs_affinity, CRYPTO_DEV_OCTEONTX2_CPT); 373 370 374 371 int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_mask, int pri, 375 372 int lfs_num) ··· 427 422 lfs->lfs_num = 0; 428 423 return ret; 429 424 } 425 + EXPORT_SYMBOL_NS_GPL(otx2_cptlf_init, CRYPTO_DEV_OCTEONTX2_CPT); 430 426 431 427 void otx2_cptlf_shutdown(struct otx2_cptlfs_info *lfs) 432 428 { ··· 437 431 /* Send request to detach LFs */ 438 432 otx2_cpt_detach_rsrcs_msg(lfs); 439 433 } 434 + EXPORT_SYMBOL_NS_GPL(otx2_cptlf_shutdown, CRYPTO_DEV_OCTEONTX2_CPT); 435 + 436 + MODULE_AUTHOR("Marvell"); 437 + MODULE_DESCRIPTION("Marvell RVU CPT Common module"); 438 + MODULE_LICENSE("GPL");
+2
drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
··· 831 831 832 832 module_pci_driver(otx2_cpt_pci_driver); 833 833 834 + MODULE_IMPORT_NS(CRYPTO_DEV_OCTEONTX2_CPT); 835 + 834 836 MODULE_AUTHOR("Marvell"); 835 837 MODULE_DESCRIPTION(OTX2_CPT_DRV_STRING); 836 838 MODULE_LICENSE("GPL v2");
+3 -3
drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
··· 120 120 otx2_cpt_info_destroy(pdev, inst_info); 121 121 } 122 122 if (areq) 123 - areq->complete(areq, status); 123 + crypto_request_complete(areq, status); 124 124 } 125 125 126 126 static void output_iv_copyback(struct crypto_async_request *areq) ··· 170 170 pdev = inst_info->pdev; 171 171 otx2_cpt_info_destroy(pdev, inst_info); 172 172 } 173 - areq->complete(areq, status); 173 + crypto_request_complete(areq, status); 174 174 } 175 175 } 176 176 ··· 412 412 const u8 *key1 = key; 413 413 int ret; 414 414 415 - ret = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen); 415 + ret = xts_verify_key(tfm, key, keylen); 416 416 if (ret) 417 417 return ret; 418 418 ctx->key_len = keylen;
+2
drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
··· 429 429 430 430 module_pci_driver(otx2_cptvf_pci_driver); 431 431 432 + MODULE_IMPORT_NS(CRYPTO_DEV_OCTEONTX2_CPT); 433 + 432 434 MODULE_AUTHOR("Marvell"); 433 435 MODULE_DESCRIPTION("Marvell RVU CPT Virtual Function Driver"); 434 436 MODULE_LICENSE("GPL v2");
+4 -4
drivers/crypto/mxs-dcp.c
··· 413 413 set_current_state(TASK_RUNNING); 414 414 415 415 if (backlog) 416 - backlog->complete(backlog, -EINPROGRESS); 416 + crypto_request_complete(backlog, -EINPROGRESS); 417 417 418 418 if (arq) { 419 419 ret = mxs_dcp_aes_block_crypt(arq); 420 - arq->complete(arq, ret); 420 + crypto_request_complete(arq, ret); 421 421 } 422 422 } 423 423 ··· 709 709 set_current_state(TASK_RUNNING); 710 710 711 711 if (backlog) 712 - backlog->complete(backlog, -EINPROGRESS); 712 + crypto_request_complete(backlog, -EINPROGRESS); 713 713 714 714 if (arq) { 715 715 ret = dcp_sha_req_to_buf(arq); 716 - arq->complete(arq, ret); 716 + crypto_request_complete(arq, ret); 717 717 } 718 718 } 719 719
+5 -8
drivers/crypto/nx/nx-common-powernv.c
··· 72 72 unsigned int inlen, unsigned char *out, 73 73 unsigned int *outlenp, void *workmem, int fc); 74 74 75 - /** 75 + /* 76 76 * setup_indirect_dde - Setup an indirect DDE 77 77 * 78 78 * The DDE is setup with the DDE count, byte count, and address of ··· 89 89 dde->address = cpu_to_be64(nx842_get_pa(ddl)); 90 90 } 91 91 92 - /** 92 + /* 93 93 * setup_direct_dde - Setup single DDE from buffer 94 94 * 95 95 * The DDE is setup with the buffer and length. The buffer must be properly ··· 111 111 return l; 112 112 } 113 113 114 - /** 114 + /* 115 115 * setup_ddl - Setup DDL from buffer 116 116 * 117 117 * Returns: ··· 181 181 CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__, \ 182 182 (unsigned long)be64_to_cpu((csb)->address)) 183 183 184 - /** 185 - * wait_for_csb 186 - */ 187 184 static int wait_for_csb(struct nx842_workmem *wmem, 188 185 struct coprocessor_status_block *csb) 189 186 { ··· 629 632 * @inlen: input buffer size 630 633 * @out: output buffer pointer 631 634 * @outlenp: output buffer size pointer 632 - * @workmem: working memory buffer pointer, size determined by 633 - * nx842_powernv_driver.workmem_size 635 + * @wmem: working memory buffer pointer, size determined by 636 + * nx842_powernv_driver.workmem_size 634 637 * 635 638 * Returns: see @nx842_powernv_exec() 636 639 */
+4 -2
drivers/crypto/nx/nx-common-pseries.c
··· 123 123 atomic64_t decomp_times[32]; 124 124 }; 125 125 126 - static struct nx842_devdata { 126 + struct nx842_devdata { 127 127 struct vio_dev *vdev; 128 128 struct device *dev; 129 129 struct ibm_nx842_counters *counters; 130 130 unsigned int max_sg_len; 131 131 unsigned int max_sync_size; 132 132 unsigned int max_sync_sg; 133 - } __rcu *devdata; 133 + }; 134 + 135 + static struct nx842_devdata __rcu *devdata; 134 136 static DEFINE_SPINLOCK(devdata_mutex); 135 137 136 138 #define NX842_COUNTER_INC(_x) \
+1 -1
drivers/crypto/qat/qat_common/adf_transport_access_macros.h
··· 37 37 #define ADF_SIZE_TO_RING_SIZE_IN_BYTES(SIZE) ((1 << (SIZE - 1)) << 7) 38 38 #define ADF_RING_SIZE_IN_BYTES_TO_SIZE(SIZE) ((1 << (SIZE - 1)) >> 7) 39 39 40 - /* Minimum ring bufer size for memory allocation */ 40 + /* Minimum ring buffer size for memory allocation */ 41 41 #define ADF_RING_SIZE_BYTES_MIN(SIZE) \ 42 42 ((SIZE < ADF_SIZE_TO_RING_SIZE_IN_BYTES(ADF_RING_SIZE_4K)) ? \ 43 43 ADF_SIZE_TO_RING_SIZE_IN_BYTES(ADF_RING_SIZE_4K) : SIZE)
+3 -3
drivers/crypto/qat/qat_common/qat_algs.c
··· 435 435 } else if (aes_v2_capable && mode == ICP_QAT_HW_CIPHER_CTR_MODE) { 436 436 ICP_QAT_FW_LA_SLICE_TYPE_SET(header->serv_specif_flags, 437 437 ICP_QAT_FW_LA_USE_UCS_SLICE_TYPE); 438 - keylen = round_up(keylen, 16); 439 438 memcpy(cd->ucs_aes.key, key, keylen); 439 + keylen = round_up(keylen, 16); 440 440 } else { 441 441 memcpy(cd->aes.key, key, keylen); 442 442 } ··· 676 676 qat_bl_free_bufl(inst->accel_dev, &qat_req->buf); 677 677 if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) 678 678 res = -EBADMSG; 679 - areq->base.complete(&areq->base, res); 679 + aead_request_complete(areq, res); 680 680 } 681 681 682 682 static void qat_alg_update_iv_ctr_mode(struct qat_crypto_request *qat_req) ··· 752 752 753 753 memcpy(sreq->iv, qat_req->iv, AES_BLOCK_SIZE); 754 754 755 - sreq->base.complete(&sreq->base, res); 755 + skcipher_request_complete(sreq, res); 756 756 } 757 757 758 758 void qat_alg_callback(void *resp)
+2 -1
drivers/crypto/qat/qat_common/qat_algs_send.c
··· 1 1 // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) 2 2 /* Copyright(c) 2022 Intel Corporation */ 3 + #include <crypto/algapi.h> 3 4 #include "adf_transport.h" 4 5 #include "qat_algs_send.h" 5 6 #include "qat_crypto.h" ··· 35 34 break; 36 35 } 37 36 list_del(&req->list); 38 - req->base->complete(req->base, -EINPROGRESS); 37 + crypto_request_complete(req->base, -EINPROGRESS); 39 38 } 40 39 spin_unlock_bh(&backlog->lock); 41 40 }
+71 -44
drivers/crypto/qat/qat_common/qat_bl.c
··· 26 26 bl_dma_dir = blp != blpout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; 27 27 28 28 for (i = 0; i < bl->num_bufs; i++) 29 - dma_unmap_single(dev, bl->bufers[i].addr, 30 - bl->bufers[i].len, bl_dma_dir); 29 + dma_unmap_single(dev, bl->buffers[i].addr, 30 + bl->buffers[i].len, bl_dma_dir); 31 31 32 32 dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); 33 33 ··· 36 36 37 37 if (blp != blpout) { 38 38 for (i = 0; i < blout->num_mapped_bufs; i++) { 39 - dma_unmap_single(dev, blout->bufers[i].addr, 40 - blout->bufers[i].len, 39 + dma_unmap_single(dev, blout->buffers[i].addr, 40 + blout->buffers[i].len, 41 41 DMA_FROM_DEVICE); 42 42 } 43 43 dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE); ··· 53 53 struct qat_request_buffs *buf, 54 54 dma_addr_t extra_dst_buff, 55 55 size_t sz_extra_dst_buff, 56 + unsigned int sskip, 57 + unsigned int dskip, 56 58 gfp_t flags) 57 59 { 58 60 struct device *dev = &GET_DEV(accel_dev); ··· 65 63 dma_addr_t blp = DMA_MAPPING_ERROR; 66 64 dma_addr_t bloutp = DMA_MAPPING_ERROR; 67 65 struct scatterlist *sg; 68 - size_t sz_out, sz = struct_size(bufl, bufers, n); 66 + size_t sz_out, sz = struct_size(bufl, buffers, n); 69 67 int node = dev_to_node(&GET_DEV(accel_dev)); 68 + unsigned int left; 70 69 int bufl_dma_dir; 71 70 72 71 if (unlikely(!n)) ··· 89 86 bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; 90 87 91 88 for (i = 0; i < n; i++) 92 - bufl->bufers[i].addr = DMA_MAPPING_ERROR; 89 + bufl->buffers[i].addr = DMA_MAPPING_ERROR; 90 + 91 + left = sskip; 93 92 94 93 for_each_sg(sgl, sg, n, i) { 95 94 int y = sg_nctr; ··· 99 94 if (!sg->length) 100 95 continue; 101 96 102 - bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg), 103 - sg->length, 104 - bufl_dma_dir); 105 - bufl->bufers[y].len = sg->length; 106 - if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr))) 97 + if (left >= sg->length) { 98 + left -= sg->length; 99 + continue; 100 + } 101 + bufl->buffers[y].addr = dma_map_single(dev, sg_virt(sg) + left, 102 + sg->length - left, 103 + bufl_dma_dir); 104 + bufl->buffers[y].len = sg->length; 105 + if (unlikely(dma_mapping_error(dev, bufl->buffers[y].addr))) 107 106 goto err_in; 108 107 sg_nctr++; 108 + if (left) { 109 + bufl->buffers[y].len -= left; 110 + left = 0; 111 + } 109 112 } 110 113 bufl->num_bufs = sg_nctr; 111 114 blp = dma_map_single(dev, bufl, sz, DMA_TO_DEVICE); ··· 124 111 buf->sz = sz; 125 112 /* Handle out of place operation */ 126 113 if (sgl != sglout) { 127 - struct qat_alg_buf *bufers; 114 + struct qat_alg_buf *buffers; 128 115 int extra_buff = extra_dst_buff ? 1 : 0; 129 116 int n_sglout = sg_nents(sglout); 130 117 131 118 n = n_sglout + extra_buff; 132 - sz_out = struct_size(buflout, bufers, n); 119 + sz_out = struct_size(buflout, buffers, n); 120 + left = dskip; 121 + 133 122 sg_nctr = 0; 134 123 135 124 if (n > QAT_MAX_BUFF_DESC) { ··· 144 129 buf->sgl_dst_valid = true; 145 130 } 146 131 147 - bufers = buflout->bufers; 132 + buffers = buflout->buffers; 148 133 for (i = 0; i < n; i++) 149 - bufers[i].addr = DMA_MAPPING_ERROR; 134 + buffers[i].addr = DMA_MAPPING_ERROR; 150 135 151 136 for_each_sg(sglout, sg, n_sglout, i) { 152 137 int y = sg_nctr; ··· 154 139 if (!sg->length) 155 140 continue; 156 141 157 - bufers[y].addr = dma_map_single(dev, sg_virt(sg), 158 - sg->length, 159 - DMA_FROM_DEVICE); 160 - if (unlikely(dma_mapping_error(dev, bufers[y].addr))) 142 + if (left >= sg->length) { 143 + left -= sg->length; 144 + continue; 145 + } 146 + buffers[y].addr = dma_map_single(dev, sg_virt(sg) + left, 147 + sg->length - left, 148 + DMA_FROM_DEVICE); 149 + if (unlikely(dma_mapping_error(dev, buffers[y].addr))) 161 150 goto err_out; 162 - bufers[y].len = sg->length; 151 + buffers[y].len = sg->length; 163 152 sg_nctr++; 153 + if (left) { 154 + buffers[y].len -= left; 155 + left = 0; 156 + } 164 157 } 165 158 if (extra_buff) { 166 - bufers[sg_nctr].addr = extra_dst_buff; 167 - bufers[sg_nctr].len = sz_extra_dst_buff; 159 + buffers[sg_nctr].addr = extra_dst_buff; 160 + buffers[sg_nctr].len = sz_extra_dst_buff; 168 161 } 169 162 170 163 buflout->num_bufs = sg_nctr; ··· 197 174 198 175 n = sg_nents(sglout); 199 176 for (i = 0; i < n; i++) { 200 - if (buflout->bufers[i].addr == extra_dst_buff) 177 + if (buflout->buffers[i].addr == extra_dst_buff) 201 178 break; 202 - if (!dma_mapping_error(dev, buflout->bufers[i].addr)) 203 - dma_unmap_single(dev, buflout->bufers[i].addr, 204 - buflout->bufers[i].len, 179 + if (!dma_mapping_error(dev, buflout->buffers[i].addr)) 180 + dma_unmap_single(dev, buflout->buffers[i].addr, 181 + buflout->buffers[i].len, 205 182 DMA_FROM_DEVICE); 206 183 } 207 184 ··· 214 191 215 192 n = sg_nents(sgl); 216 193 for (i = 0; i < n; i++) 217 - if (!dma_mapping_error(dev, bufl->bufers[i].addr)) 218 - dma_unmap_single(dev, bufl->bufers[i].addr, 219 - bufl->bufers[i].len, 194 + if (!dma_mapping_error(dev, bufl->buffers[i].addr)) 195 + dma_unmap_single(dev, bufl->buffers[i].addr, 196 + bufl->buffers[i].len, 220 197 bufl_dma_dir); 221 198 222 199 if (!buf->sgl_src_valid) ··· 235 212 { 236 213 dma_addr_t extra_dst_buff = 0; 237 214 size_t sz_extra_dst_buff = 0; 215 + unsigned int sskip = 0; 216 + unsigned int dskip = 0; 238 217 239 218 if (params) { 240 219 extra_dst_buff = params->extra_dst_buff; 241 220 sz_extra_dst_buff = params->sz_extra_dst_buff; 221 + sskip = params->sskip; 222 + dskip = params->dskip; 242 223 } 243 224 244 225 return __qat_bl_sgl_to_bufl(accel_dev, sgl, sglout, buf, 245 226 extra_dst_buff, sz_extra_dst_buff, 246 - flags); 227 + sskip, dskip, flags); 247 228 } 248 229 249 230 static void qat_bl_sgl_unmap(struct adf_accel_dev *accel_dev, ··· 258 231 int i; 259 232 260 233 for (i = 0; i < n; i++) 261 - if (!dma_mapping_error(dev, bl->bufers[i].addr)) 262 - dma_unmap_single(dev, bl->bufers[i].addr, 263 - bl->bufers[i].len, DMA_FROM_DEVICE); 234 + if (!dma_mapping_error(dev, bl->buffers[i].addr)) 235 + dma_unmap_single(dev, bl->buffers[i].addr, 236 + bl->buffers[i].len, DMA_FROM_DEVICE); 264 237 } 265 238 266 239 static int qat_bl_sgl_map(struct adf_accel_dev *accel_dev, ··· 275 248 size_t sz; 276 249 277 250 n = sg_nents(sgl); 278 - sz = struct_size(bufl, bufers, n); 251 + sz = struct_size(bufl, buffers, n); 279 252 bufl = kzalloc_node(sz, GFP_KERNEL, node); 280 253 if (unlikely(!bufl)) 281 254 return -ENOMEM; 282 255 283 256 for (i = 0; i < n; i++) 284 - bufl->bufers[i].addr = DMA_MAPPING_ERROR; 257 + bufl->buffers[i].addr = DMA_MAPPING_ERROR; 285 258 286 259 sg_nctr = 0; 287 260 for_each_sg(sgl, sg, n, i) { ··· 290 263 if (!sg->length) 291 264 continue; 292 265 293 - bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg), 294 - sg->length, 295 - DMA_FROM_DEVICE); 296 - bufl->bufers[y].len = sg->length; 297 - if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr))) 266 + bufl->buffers[y].addr = dma_map_single(dev, sg_virt(sg), 267 + sg->length, 268 + DMA_FROM_DEVICE); 269 + bufl->buffers[y].len = sg->length; 270 + if (unlikely(dma_mapping_error(dev, bufl->buffers[y].addr))) 298 271 goto err_map; 299 272 sg_nctr++; 300 273 } ··· 307 280 308 281 err_map: 309 282 for (i = 0; i < n; i++) 310 - if (!dma_mapping_error(dev, bufl->bufers[i].addr)) 311 - dma_unmap_single(dev, bufl->bufers[i].addr, 312 - bufl->bufers[i].len, 283 + if (!dma_mapping_error(dev, bufl->buffers[i].addr)) 284 + dma_unmap_single(dev, bufl->buffers[i].addr, 285 + bufl->buffers[i].len, 313 286 DMA_FROM_DEVICE); 314 287 kfree(bufl); 315 288 *bl = NULL; ··· 378 351 if (ret) 379 352 return ret; 380 353 381 - new_bl_size = struct_size(new_bl, bufers, new_bl->num_bufs); 354 + new_bl_size = struct_size(new_bl, buffers, new_bl->num_bufs); 382 355 383 356 /* Map new firmware SGL descriptor */ 384 357 new_blp = dma_map_single(dev, new_bl, new_bl_size, DMA_TO_DEVICE);
+3 -1
drivers/crypto/qat/qat_common/qat_bl.h
··· 18 18 u64 resrvd; 19 19 u32 num_bufs; 20 20 u32 num_mapped_bufs; 21 - struct qat_alg_buf bufers[]; 21 + struct qat_alg_buf buffers[]; 22 22 } __packed; 23 23 24 24 struct qat_alg_fixed_buf_list { ··· 42 42 struct qat_sgl_to_bufl_params { 43 43 dma_addr_t extra_dst_buff; 44 44 size_t sz_extra_dst_buff; 45 + unsigned int sskip; 46 + unsigned int dskip; 45 47 }; 46 48 47 49 void qat_bl_free_bufl(struct adf_accel_dev *accel_dev,
+157 -12
drivers/crypto/qat/qat_common/qat_comp_algs.c
··· 13 13 #include "qat_compression.h" 14 14 #include "qat_algs_send.h" 15 15 16 + #define QAT_RFC_1950_HDR_SIZE 2 17 + #define QAT_RFC_1950_FOOTER_SIZE 4 18 + #define QAT_RFC_1950_CM_DEFLATE 8 19 + #define QAT_RFC_1950_CM_DEFLATE_CINFO_32K 7 20 + #define QAT_RFC_1950_CM_MASK 0x0f 21 + #define QAT_RFC_1950_CM_OFFSET 4 22 + #define QAT_RFC_1950_DICT_MASK 0x20 23 + #define QAT_RFC_1950_COMP_HDR 0x785e 24 + 16 25 static DEFINE_MUTEX(algs_lock); 17 26 static unsigned int active_devs; 18 27 ··· 30 21 COMPRESSION = 1, 31 22 }; 32 23 24 + struct qat_compression_req; 25 + 33 26 struct qat_compression_ctx { 34 27 u8 comp_ctx[QAT_COMP_CTX_SIZE]; 35 28 struct qat_compression_instance *inst; 29 + int (*qat_comp_callback)(struct qat_compression_req *qat_req, void *resp); 36 30 }; 37 31 38 32 struct qat_dst { ··· 106 94 107 95 err: 108 96 qat_bl_free_bufl(accel_dev, qat_bufs); 109 - areq->base.complete(&areq->base, ret); 97 + acomp_request_complete(areq, ret); 98 + } 99 + 100 + static int parse_zlib_header(u16 zlib_h) 101 + { 102 + int ret = -EINVAL; 103 + __be16 header; 104 + u8 *header_p; 105 + u8 cmf, flg; 106 + 107 + header = cpu_to_be16(zlib_h); 108 + header_p = (u8 *)&header; 109 + 110 + flg = header_p[0]; 111 + cmf = header_p[1]; 112 + 113 + if (cmf >> QAT_RFC_1950_CM_OFFSET > QAT_RFC_1950_CM_DEFLATE_CINFO_32K) 114 + return ret; 115 + 116 + if ((cmf & QAT_RFC_1950_CM_MASK) != QAT_RFC_1950_CM_DEFLATE) 117 + return ret; 118 + 119 + if (flg & QAT_RFC_1950_DICT_MASK) 120 + return ret; 121 + 122 + return 0; 123 + } 124 + 125 + static int qat_comp_rfc1950_callback(struct qat_compression_req *qat_req, 126 + void *resp) 127 + { 128 + struct acomp_req *areq = qat_req->acompress_req; 129 + enum direction dir = qat_req->dir; 130 + __be32 qat_produced_adler; 131 + 132 + qat_produced_adler = cpu_to_be32(qat_comp_get_produced_adler32(resp)); 133 + 134 + if (dir == COMPRESSION) { 135 + __be16 zlib_header; 136 + 137 + zlib_header = cpu_to_be16(QAT_RFC_1950_COMP_HDR); 138 + scatterwalk_map_and_copy(&zlib_header, areq->dst, 0, QAT_RFC_1950_HDR_SIZE, 1); 139 + areq->dlen += QAT_RFC_1950_HDR_SIZE; 140 + 141 + scatterwalk_map_and_copy(&qat_produced_adler, areq->dst, areq->dlen, 142 + QAT_RFC_1950_FOOTER_SIZE, 1); 143 + areq->dlen += QAT_RFC_1950_FOOTER_SIZE; 144 + } else { 145 + __be32 decomp_adler; 146 + int footer_offset; 147 + int consumed; 148 + 149 + consumed = qat_comp_get_consumed_ctr(resp); 150 + footer_offset = consumed + QAT_RFC_1950_HDR_SIZE; 151 + if (footer_offset + QAT_RFC_1950_FOOTER_SIZE > areq->slen) 152 + return -EBADMSG; 153 + 154 + scatterwalk_map_and_copy(&decomp_adler, areq->src, footer_offset, 155 + QAT_RFC_1950_FOOTER_SIZE, 0); 156 + 157 + if (qat_produced_adler != decomp_adler) 158 + return -EBADMSG; 159 + } 160 + return 0; 110 161 } 111 162 112 163 static void qat_comp_generic_callback(struct qat_compression_req *qat_req, ··· 242 167 res = 0; 243 168 areq->dlen = produced; 244 169 170 + if (ctx->qat_comp_callback) 171 + res = ctx->qat_comp_callback(qat_req, resp); 172 + 245 173 end: 246 174 qat_bl_free_bufl(accel_dev, &qat_req->buf); 247 - areq->base.complete(&areq->base, res); 175 + acomp_request_complete(areq, res); 248 176 } 249 177 250 178 void qat_comp_alg_callback(void *resp) ··· 293 215 memset(ctx, 0, sizeof(*ctx)); 294 216 } 295 217 296 - static int qat_comp_alg_compress_decompress(struct acomp_req *areq, 297 - enum direction dir) 218 + static int qat_comp_alg_rfc1950_init_tfm(struct crypto_acomp *acomp_tfm) 219 + { 220 + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); 221 + struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm); 222 + int ret; 223 + 224 + ret = qat_comp_alg_init_tfm(acomp_tfm); 225 + ctx->qat_comp_callback = &qat_comp_rfc1950_callback; 226 + 227 + return ret; 228 + } 229 + 230 + static int qat_comp_alg_compress_decompress(struct acomp_req *areq, enum direction dir, 231 + unsigned int shdr, unsigned int sftr, 232 + unsigned int dhdr, unsigned int dftr) 298 233 { 299 234 struct qat_compression_req *qat_req = acomp_request_ctx(areq); 300 235 struct crypto_acomp *acomp_tfm = crypto_acomp_reqtfm(areq); 301 236 struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); 302 237 struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm); 303 238 struct qat_compression_instance *inst = ctx->inst; 304 - struct qat_sgl_to_bufl_params *p_params = NULL; 305 239 gfp_t f = qat_algs_alloc_flags(&areq->base); 306 - struct qat_sgl_to_bufl_params params; 307 - unsigned int slen = areq->slen; 308 - unsigned int dlen = areq->dlen; 240 + struct qat_sgl_to_bufl_params params = {0}; 241 + int slen = areq->slen - shdr - sftr; 242 + int dlen = areq->dlen - dhdr - dftr; 309 243 dma_addr_t sfbuf, dfbuf; 310 244 u8 *req = qat_req->req; 311 245 size_t ovf_buff_sz; 312 246 int ret; 247 + 248 + params.sskip = shdr; 249 + params.dskip = dhdr; 313 250 314 251 if (!areq->src || !slen) 315 252 return -EINVAL; ··· 347 254 if (!areq->dst) 348 255 return -ENOMEM; 349 256 257 + dlen -= dhdr + dftr; 350 258 areq->dlen = dlen; 351 259 qat_req->dst.resubmitted = false; 352 260 } ··· 356 262 params.extra_dst_buff = inst->dc_data->ovf_buff_p; 357 263 ovf_buff_sz = inst->dc_data->ovf_buff_sz; 358 264 params.sz_extra_dst_buff = ovf_buff_sz; 359 - p_params = &params; 360 265 } 361 266 362 267 ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst, 363 - &qat_req->buf, p_params, f); 268 + &qat_req->buf, &params, f); 364 269 if (unlikely(ret)) 365 270 return ret; 366 271 ··· 392 299 393 300 static int qat_comp_alg_compress(struct acomp_req *req) 394 301 { 395 - return qat_comp_alg_compress_decompress(req, COMPRESSION); 302 + return qat_comp_alg_compress_decompress(req, COMPRESSION, 0, 0, 0, 0); 396 303 } 397 304 398 305 static int qat_comp_alg_decompress(struct acomp_req *req) 399 306 { 400 - return qat_comp_alg_compress_decompress(req, DECOMPRESSION); 307 + return qat_comp_alg_compress_decompress(req, DECOMPRESSION, 0, 0, 0, 0); 308 + } 309 + 310 + static int qat_comp_alg_rfc1950_compress(struct acomp_req *req) 311 + { 312 + if (!req->dst && req->dlen != 0) 313 + return -EINVAL; 314 + 315 + if (req->dst && req->dlen <= QAT_RFC_1950_HDR_SIZE + QAT_RFC_1950_FOOTER_SIZE) 316 + return -EINVAL; 317 + 318 + return qat_comp_alg_compress_decompress(req, COMPRESSION, 0, 0, 319 + QAT_RFC_1950_HDR_SIZE, 320 + QAT_RFC_1950_FOOTER_SIZE); 321 + } 322 + 323 + static int qat_comp_alg_rfc1950_decompress(struct acomp_req *req) 324 + { 325 + struct crypto_acomp *acomp_tfm = crypto_acomp_reqtfm(req); 326 + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); 327 + struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm); 328 + struct adf_accel_dev *accel_dev = ctx->inst->accel_dev; 329 + u16 zlib_header; 330 + int ret; 331 + 332 + if (req->slen <= QAT_RFC_1950_HDR_SIZE + QAT_RFC_1950_FOOTER_SIZE) 333 + return -EBADMSG; 334 + 335 + scatterwalk_map_and_copy(&zlib_header, req->src, 0, QAT_RFC_1950_HDR_SIZE, 0); 336 + 337 + ret = parse_zlib_header(zlib_header); 338 + if (ret) { 339 + dev_dbg(&GET_DEV(accel_dev), "Error parsing zlib header\n"); 340 + return ret; 341 + } 342 + 343 + return qat_comp_alg_compress_decompress(req, DECOMPRESSION, QAT_RFC_1950_HDR_SIZE, 344 + QAT_RFC_1950_FOOTER_SIZE, 0, 0); 401 345 } 402 346 403 347 static struct acomp_alg qat_acomp[] = { { ··· 450 320 .exit = qat_comp_alg_exit_tfm, 451 321 .compress = qat_comp_alg_compress, 452 322 .decompress = qat_comp_alg_decompress, 323 + .dst_free = sgl_free, 324 + .reqsize = sizeof(struct qat_compression_req), 325 + }, { 326 + .base = { 327 + .cra_name = "zlib-deflate", 328 + .cra_driver_name = "qat_zlib_deflate", 329 + .cra_priority = 4001, 330 + .cra_flags = CRYPTO_ALG_ASYNC, 331 + .cra_ctxsize = sizeof(struct qat_compression_ctx), 332 + .cra_module = THIS_MODULE, 333 + }, 334 + .init = qat_comp_alg_rfc1950_init_tfm, 335 + .exit = qat_comp_alg_exit_tfm, 336 + .compress = qat_comp_alg_rfc1950_compress, 337 + .decompress = qat_comp_alg_rfc1950_decompress, 453 338 .dst_free = sgl_free, 454 339 .reqsize = sizeof(struct qat_compression_req), 455 340 } };
+1 -1
drivers/crypto/qat/qat_common/qat_compression.c
··· 72 72 } 73 73 74 74 if (!accel_dev) { 75 - pr_info("QAT: Could not find a device on node %d\n", node); 75 + pr_debug_ratelimited("QAT: Could not find a device on node %d\n", node); 76 76 /* Get any started device */ 77 77 list_for_each(itr, adf_devmgr_get_head()) { 78 78 struct adf_accel_dev *tmp_dev;
+1 -1
drivers/crypto/qat/qat_common/qat_crypto.c
··· 70 70 } 71 71 72 72 if (!accel_dev) { 73 - pr_info("QAT: Could not find a device on node %d\n", node); 73 + pr_debug_ratelimited("QAT: Could not find a device on node %d\n", node); 74 74 /* Get any started device */ 75 75 list_for_each_entry(tmp_dev, adf_devmgr_get_head(), list) { 76 76 if (adf_dev_started(tmp_dev) &&
+2 -2
drivers/crypto/qce/core.c
··· 107 107 108 108 if (backlog) { 109 109 spin_lock_bh(&qce->lock); 110 - backlog->complete(backlog, -EINPROGRESS); 110 + crypto_request_complete(backlog, -EINPROGRESS); 111 111 spin_unlock_bh(&qce->lock); 112 112 } 113 113 ··· 132 132 spin_unlock_irqrestore(&qce->lock, flags); 133 133 134 134 if (req) 135 - req->complete(req, qce->result); 135 + crypto_request_complete(req, qce->result); 136 136 137 137 qce_handle_queue(qce, NULL); 138 138 }
+4 -4
drivers/crypto/s5p-sss.c
··· 499 499 /* Calls the completion. Cannot be called with dev->lock hold. */ 500 500 static void s5p_aes_complete(struct skcipher_request *req, int err) 501 501 { 502 - req->base.complete(&req->base, err); 502 + skcipher_request_complete(req, err); 503 503 } 504 504 505 505 static void s5p_unset_outdata(struct s5p_aes_dev *dev) ··· 1355 1355 spin_unlock_irqrestore(&dd->hash_lock, flags); 1356 1356 1357 1357 if (req->base.complete) 1358 - req->base.complete(&req->base, err); 1358 + ahash_request_complete(req, err); 1359 1359 } 1360 1360 1361 1361 /** ··· 1397 1397 return ret; 1398 1398 1399 1399 if (backlog) 1400 - backlog->complete(backlog, -EINPROGRESS); 1400 + crypto_request_complete(backlog, -EINPROGRESS); 1401 1401 1402 1402 req = ahash_request_cast(async_req); 1403 1403 dd->hash_req = req; ··· 1991 1991 spin_unlock_irqrestore(&dev->lock, flags); 1992 1992 1993 1993 if (backlog) 1994 - backlog->complete(backlog, -EINPROGRESS); 1994 + crypto_request_complete(backlog, -EINPROGRESS); 1995 1995 1996 1996 dev->req = skcipher_request_cast(async_req); 1997 1997 dev->ctx = crypto_tfm_ctx(dev->req->base.tfm);
+2 -2
drivers/crypto/sahara.c
··· 1049 1049 spin_unlock_bh(&dev->queue_spinlock); 1050 1050 1051 1051 if (backlog) 1052 - backlog->complete(backlog, -EINPROGRESS); 1052 + crypto_request_complete(backlog, -EINPROGRESS); 1053 1053 1054 1054 if (async_req) { 1055 1055 if (crypto_tfm_alg_type(async_req->tfm) == ··· 1065 1065 ret = sahara_aes_process(req); 1066 1066 } 1067 1067 1068 - async_req->complete(async_req, ret); 1068 + crypto_request_complete(async_req, ret); 1069 1069 1070 1070 continue; 1071 1071 }
+11 -26
drivers/crypto/stm32/stm32-cryp.c
··· 597 597 598 598 static void stm32_cryp_write_ccm_first_header(struct stm32_cryp *cryp) 599 599 { 600 - unsigned int i; 601 600 size_t written; 602 601 size_t len; 603 602 u32 alen = cryp->areq->assoclen; ··· 622 623 written = min_t(size_t, AES_BLOCK_SIZE - len, alen); 623 624 624 625 scatterwalk_copychunks((char *)block + len, &cryp->in_walk, written, 0); 625 - for (i = 0; i < AES_BLOCK_32; i++) 626 - stm32_cryp_write(cryp, cryp->caps->din, block[i]); 626 + 627 + writesl(cryp->regs + cryp->caps->din, block, AES_BLOCK_32); 627 628 628 629 cryp->header_in -= written; 629 630 ··· 1362 1363 u32 out_tag[AES_BLOCK_32]; 1363 1364 1364 1365 /* Get and write tag */ 1365 - for (i = 0; i < AES_BLOCK_32; i++) 1366 - out_tag[i] = stm32_cryp_read(cryp, cryp->caps->dout); 1367 - 1366 + readsl(cryp->regs + cryp->caps->dout, out_tag, AES_BLOCK_32); 1368 1367 scatterwalk_copychunks(out_tag, &cryp->out_walk, cryp->authsize, 1); 1369 1368 } else { 1370 1369 /* Get and check tag */ 1371 1370 u32 in_tag[AES_BLOCK_32], out_tag[AES_BLOCK_32]; 1372 1371 1373 1372 scatterwalk_copychunks(in_tag, &cryp->in_walk, cryp->authsize, 0); 1374 - 1375 - for (i = 0; i < AES_BLOCK_32; i++) 1376 - out_tag[i] = stm32_cryp_read(cryp, cryp->caps->dout); 1373 + readsl(cryp->regs + cryp->caps->dout, out_tag, AES_BLOCK_32); 1377 1374 1378 1375 if (crypto_memneq(in_tag, out_tag, cryp->authsize)) 1379 1376 ret = -EBADMSG; ··· 1410 1415 1411 1416 static void stm32_cryp_irq_read_data(struct stm32_cryp *cryp) 1412 1417 { 1413 - unsigned int i; 1414 1418 u32 block[AES_BLOCK_32]; 1415 1419 1416 - for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) 1417 - block[i] = stm32_cryp_read(cryp, cryp->caps->dout); 1418 - 1420 + readsl(cryp->regs + cryp->caps->dout, block, cryp->hw_blocksize / sizeof(u32)); 1419 1421 scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, 1420 1422 cryp->payload_out), 1); 1421 1423 cryp->payload_out -= min_t(size_t, cryp->hw_blocksize, ··· 1421 1429 1422 1430 static void stm32_cryp_irq_write_block(struct stm32_cryp *cryp) 1423 1431 { 1424 - unsigned int i; 1425 1432 u32 block[AES_BLOCK_32] = {0}; 1426 1433 1427 1434 scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, cryp->hw_blocksize, 1428 1435 cryp->payload_in), 0); 1429 - for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) 1430 - stm32_cryp_write(cryp, cryp->caps->din, block[i]); 1431 - 1436 + writesl(cryp->regs + cryp->caps->din, block, cryp->hw_blocksize / sizeof(u32)); 1432 1437 cryp->payload_in -= min_t(size_t, cryp->hw_blocksize, cryp->payload_in); 1433 1438 } 1434 1439 ··· 1469 1480 * Same code as stm32_cryp_irq_read_data(), but we want to store 1470 1481 * block value 1471 1482 */ 1472 - for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) 1473 - block[i] = stm32_cryp_read(cryp, cryp->caps->dout); 1483 + readsl(cryp->regs + cryp->caps->dout, block, cryp->hw_blocksize / sizeof(u32)); 1474 1484 1475 1485 scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, 1476 1486 cryp->payload_out), 1); ··· 1487 1499 stm32_cryp_write(cryp, cryp->caps->cr, cfg); 1488 1500 1489 1501 /* f) write padded data */ 1490 - for (i = 0; i < AES_BLOCK_32; i++) 1491 - stm32_cryp_write(cryp, cryp->caps->din, block[i]); 1502 + writesl(cryp->regs + cryp->caps->din, block, AES_BLOCK_32); 1492 1503 1493 1504 /* g) Empty fifo out */ 1494 1505 err = stm32_cryp_wait_output(cryp); ··· 1567 1580 * Same code as stm32_cryp_irq_read_data(), but we want to store 1568 1581 * block value 1569 1582 */ 1570 - for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) 1571 - block[i] = stm32_cryp_read(cryp, cryp->caps->dout); 1583 + readsl(cryp->regs + cryp->caps->dout, block, cryp->hw_blocksize / sizeof(u32)); 1572 1584 1573 1585 scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, 1574 1586 cryp->payload_out), 1); ··· 1646 1660 1647 1661 static void stm32_cryp_irq_write_gcmccm_header(struct stm32_cryp *cryp) 1648 1662 { 1649 - unsigned int i; 1650 1663 u32 block[AES_BLOCK_32] = {0}; 1651 1664 size_t written; 1652 1665 1653 1666 written = min_t(size_t, AES_BLOCK_SIZE, cryp->header_in); 1654 1667 1655 1668 scatterwalk_copychunks(block, &cryp->in_walk, written, 0); 1656 - for (i = 0; i < AES_BLOCK_32; i++) 1657 - stm32_cryp_write(cryp, cryp->caps->din, block[i]); 1669 + 1670 + writesl(cryp->regs + cryp->caps->din, block, AES_BLOCK_32); 1658 1671 1659 1672 cryp->header_in -= written; 1660 1673
+227 -39
drivers/crypto/stm32/stm32-hash.c
··· 32 32 #define HASH_CR 0x00 33 33 #define HASH_DIN 0x04 34 34 #define HASH_STR 0x08 35 + #define HASH_UX500_HREG(x) (0x0c + ((x) * 0x04)) 35 36 #define HASH_IMR 0x20 36 37 #define HASH_SR 0x24 37 38 #define HASH_CSR(x) (0x0F8 + ((x) * 0x04)) ··· 54 53 #define HASH_CR_ALGO_MD5 0x80 55 54 #define HASH_CR_ALGO_SHA224 0x40000 56 55 #define HASH_CR_ALGO_SHA256 0x40080 56 + 57 + #define HASH_CR_UX500_EMPTYMSG BIT(20) 58 + #define HASH_CR_UX500_ALGO_SHA1 BIT(7) 59 + #define HASH_CR_UX500_ALGO_SHA256 0x0 57 60 58 61 /* Interrupt */ 59 62 #define HASH_DINIE BIT(0) ··· 120 115 struct stm32_hash_ctx { 121 116 struct crypto_engine_ctx enginectx; 122 117 struct stm32_hash_dev *hdev; 118 + struct crypto_shash *xtfm; 123 119 unsigned long flags; 124 120 125 121 u8 key[HASH_MAX_KEY_SIZE]; ··· 163 157 struct stm32_hash_pdata { 164 158 struct stm32_hash_algs_info *algs_info; 165 159 size_t algs_info_size; 160 + bool has_sr; 161 + bool has_mdmat; 162 + bool broken_emptymsg; 163 + bool ux500; 166 164 }; 167 165 168 166 struct stm32_hash_dev { ··· 178 168 phys_addr_t phys_base; 179 169 u32 dma_mode; 180 170 u32 dma_maxburst; 171 + bool polled; 181 172 182 173 struct ahash_request *req; 183 174 struct crypto_engine *engine; ··· 218 207 static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev) 219 208 { 220 209 u32 status; 210 + 211 + /* The Ux500 lacks the special status register, we poll the DCAL bit instead */ 212 + if (!hdev->pdata->has_sr) 213 + return readl_relaxed_poll_timeout(hdev->io_base + HASH_STR, status, 214 + !(status & HASH_STR_DCAL), 10, 10000); 221 215 222 216 return readl_relaxed_poll_timeout(hdev->io_base + HASH_SR, status, 223 217 !(status & HASH_SR_BUSY), 10, 10000); ··· 265 249 return 0; 266 250 } 267 251 268 - static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev) 252 + static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev, int bufcnt) 269 253 { 270 254 struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); 271 255 struct crypto_ahash *tfm = crypto_ahash_reqtfm(hdev->req); ··· 279 263 reg |= HASH_CR_ALGO_MD5; 280 264 break; 281 265 case HASH_FLAGS_SHA1: 282 - reg |= HASH_CR_ALGO_SHA1; 266 + if (hdev->pdata->ux500) 267 + reg |= HASH_CR_UX500_ALGO_SHA1; 268 + else 269 + reg |= HASH_CR_ALGO_SHA1; 283 270 break; 284 271 case HASH_FLAGS_SHA224: 285 272 reg |= HASH_CR_ALGO_SHA224; 286 273 break; 287 274 case HASH_FLAGS_SHA256: 288 - reg |= HASH_CR_ALGO_SHA256; 275 + if (hdev->pdata->ux500) 276 + reg |= HASH_CR_UX500_ALGO_SHA256; 277 + else 278 + reg |= HASH_CR_ALGO_SHA256; 289 279 break; 290 280 default: 291 281 reg |= HASH_CR_ALGO_MD5; ··· 306 284 reg |= HASH_CR_LKEY; 307 285 } 308 286 309 - stm32_hash_write(hdev, HASH_IMR, HASH_DCIE); 287 + /* 288 + * On the Ux500 we need to set a special flag to indicate that 289 + * the message is zero length. 290 + */ 291 + if (hdev->pdata->ux500 && bufcnt == 0) 292 + reg |= HASH_CR_UX500_EMPTYMSG; 293 + 294 + if (!hdev->polled) 295 + stm32_hash_write(hdev, HASH_IMR, HASH_DCIE); 310 296 311 297 stm32_hash_write(hdev, HASH_CR, reg); 312 298 ··· 375 345 376 346 hdev->flags |= HASH_FLAGS_CPU; 377 347 378 - stm32_hash_write_ctrl(hdev); 348 + stm32_hash_write_ctrl(hdev, length); 379 349 380 350 if (stm32_hash_wait_busy(hdev)) 381 351 return -ETIMEDOUT; ··· 392 362 stm32_hash_write(hdev, HASH_DIN, buffer[count]); 393 363 394 364 if (final) { 365 + if (stm32_hash_wait_busy(hdev)) 366 + return -ETIMEDOUT; 367 + 395 368 stm32_hash_set_nblw(hdev, length); 396 369 reg = stm32_hash_read(hdev, HASH_STR); 397 370 reg |= HASH_STR_DCAL; ··· 432 399 if (final) { 433 400 bufcnt = rctx->bufcnt; 434 401 rctx->bufcnt = 0; 435 - err = stm32_hash_xmit_cpu(hdev, rctx->buffer, bufcnt, 436 - (rctx->flags & HASH_FLAGS_FINUP)); 402 + err = stm32_hash_xmit_cpu(hdev, rctx->buffer, bufcnt, 1); 403 + 404 + /* If we have an IRQ, wait for that, else poll for completion */ 405 + if (hdev->polled) { 406 + if (stm32_hash_wait_busy(hdev)) 407 + return -ETIMEDOUT; 408 + hdev->flags |= HASH_FLAGS_OUTPUT_READY; 409 + err = 0; 410 + } 437 411 } 438 412 439 413 return err; ··· 471 431 472 432 reg = stm32_hash_read(hdev, HASH_CR); 473 433 474 - if (mdma) 475 - reg |= HASH_CR_MDMAT; 476 - else 477 - reg &= ~HASH_CR_MDMAT; 478 - 434 + if (!hdev->pdata->has_mdmat) { 435 + if (mdma) 436 + reg |= HASH_CR_MDMAT; 437 + else 438 + reg &= ~HASH_CR_MDMAT; 439 + } 479 440 reg |= HASH_CR_DMAE; 480 441 481 442 stm32_hash_write(hdev, HASH_CR, reg); ··· 597 556 if (rctx->nents < 0) 598 557 return -EINVAL; 599 558 600 - stm32_hash_write_ctrl(hdev); 559 + stm32_hash_write_ctrl(hdev, rctx->total); 601 560 602 561 if (hdev->flags & HASH_FLAGS_HMAC) { 603 562 err = stm32_hash_hmac_dma_send(hdev); ··· 784 743 else 785 744 err = stm32_hash_xmit_cpu(hdev, rctx->buffer, buflen, 1); 786 745 746 + /* If we have an IRQ, wait for that, else poll for completion */ 747 + if (hdev->polled) { 748 + if (stm32_hash_wait_busy(hdev)) 749 + return -ETIMEDOUT; 750 + hdev->flags |= HASH_FLAGS_OUTPUT_READY; 751 + /* Caller will call stm32_hash_finish_req() */ 752 + err = 0; 753 + } 787 754 788 755 return err; 756 + } 757 + 758 + static void stm32_hash_emptymsg_fallback(struct ahash_request *req) 759 + { 760 + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); 761 + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(ahash); 762 + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); 763 + struct stm32_hash_dev *hdev = rctx->hdev; 764 + int ret; 765 + 766 + dev_dbg(hdev->dev, "use fallback message size 0 key size %d\n", 767 + ctx->keylen); 768 + 769 + if (!ctx->xtfm) { 770 + dev_err(hdev->dev, "no fallback engine\n"); 771 + return; 772 + } 773 + 774 + if (ctx->keylen) { 775 + ret = crypto_shash_setkey(ctx->xtfm, ctx->key, ctx->keylen); 776 + if (ret) { 777 + dev_err(hdev->dev, "failed to set key ret=%d\n", ret); 778 + return; 779 + } 780 + } 781 + 782 + ret = crypto_shash_tfm_digest(ctx->xtfm, NULL, 0, rctx->digest); 783 + if (ret) 784 + dev_err(hdev->dev, "shash digest error\n"); 789 785 } 790 786 791 787 static void stm32_hash_copy_hash(struct ahash_request *req) 792 788 { 793 789 struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); 790 + struct stm32_hash_dev *hdev = rctx->hdev; 794 791 __be32 *hash = (void *)rctx->digest; 795 792 unsigned int i, hashsize; 793 + 794 + if (hdev->pdata->broken_emptymsg && !req->nbytes) 795 + return stm32_hash_emptymsg_fallback(req); 796 796 797 797 switch (rctx->flags & HASH_FLAGS_ALGO_MASK) { 798 798 case HASH_FLAGS_MD5: ··· 852 770 return; 853 771 } 854 772 855 - for (i = 0; i < hashsize / sizeof(u32); i++) 856 - hash[i] = cpu_to_be32(stm32_hash_read(rctx->hdev, 857 - HASH_HREG(i))); 773 + for (i = 0; i < hashsize / sizeof(u32); i++) { 774 + if (hdev->pdata->ux500) 775 + hash[i] = cpu_to_be32(stm32_hash_read(hdev, 776 + HASH_UX500_HREG(i))); 777 + else 778 + hash[i] = cpu_to_be32(stm32_hash_read(hdev, 779 + HASH_HREG(i))); 780 + } 858 781 } 859 782 860 783 static int stm32_hash_finish(struct ahash_request *req) ··· 1048 961 struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); 1049 962 u32 *preg; 1050 963 unsigned int i; 964 + int ret; 1051 965 1052 966 pm_runtime_get_sync(hdev->dev); 1053 967 1054 - while ((stm32_hash_read(hdev, HASH_SR) & HASH_SR_BUSY)) 1055 - cpu_relax(); 968 + ret = stm32_hash_wait_busy(hdev); 969 + if (ret) 970 + return ret; 1056 971 1057 972 rctx->hw_context = kmalloc_array(3 + HASH_CSR_REGISTER_NUMBER, 1058 973 sizeof(u32), ··· 1062 973 1063 974 preg = rctx->hw_context; 1064 975 1065 - *preg++ = stm32_hash_read(hdev, HASH_IMR); 976 + if (!hdev->pdata->ux500) 977 + *preg++ = stm32_hash_read(hdev, HASH_IMR); 1066 978 *preg++ = stm32_hash_read(hdev, HASH_STR); 1067 979 *preg++ = stm32_hash_read(hdev, HASH_CR); 1068 980 for (i = 0; i < HASH_CSR_REGISTER_NUMBER; i++) ··· 1092 1002 1093 1003 pm_runtime_get_sync(hdev->dev); 1094 1004 1095 - stm32_hash_write(hdev, HASH_IMR, *preg++); 1005 + if (!hdev->pdata->ux500) 1006 + stm32_hash_write(hdev, HASH_IMR, *preg++); 1096 1007 stm32_hash_write(hdev, HASH_STR, *preg++); 1097 1008 stm32_hash_write(hdev, HASH_CR, *preg); 1098 1009 reg = *preg++ | HASH_CR_INIT; ··· 1125 1034 return 0; 1126 1035 } 1127 1036 1037 + static int stm32_hash_init_fallback(struct crypto_tfm *tfm) 1038 + { 1039 + struct stm32_hash_ctx *ctx = crypto_tfm_ctx(tfm); 1040 + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); 1041 + const char *name = crypto_tfm_alg_name(tfm); 1042 + struct crypto_shash *xtfm; 1043 + 1044 + /* The fallback is only needed on Ux500 */ 1045 + if (!hdev->pdata->ux500) 1046 + return 0; 1047 + 1048 + xtfm = crypto_alloc_shash(name, 0, CRYPTO_ALG_NEED_FALLBACK); 1049 + if (IS_ERR(xtfm)) { 1050 + dev_err(hdev->dev, "failed to allocate %s fallback\n", 1051 + name); 1052 + return PTR_ERR(xtfm); 1053 + } 1054 + dev_info(hdev->dev, "allocated %s fallback\n", name); 1055 + ctx->xtfm = xtfm; 1056 + 1057 + return 0; 1058 + } 1059 + 1128 1060 static int stm32_hash_cra_init_algs(struct crypto_tfm *tfm, 1129 1061 const char *algs_hmac_name) 1130 1062 { ··· 1164 1050 ctx->enginectx.op.do_one_request = stm32_hash_one_request; 1165 1051 ctx->enginectx.op.prepare_request = stm32_hash_prepare_req; 1166 1052 ctx->enginectx.op.unprepare_request = NULL; 1167 - return 0; 1053 + 1054 + return stm32_hash_init_fallback(tfm); 1168 1055 } 1169 1056 1170 1057 static int stm32_hash_cra_init(struct crypto_tfm *tfm) ··· 1191 1076 static int stm32_hash_cra_sha256_init(struct crypto_tfm *tfm) 1192 1077 { 1193 1078 return stm32_hash_cra_init_algs(tfm, "sha256"); 1079 + } 1080 + 1081 + static void stm32_hash_cra_exit(struct crypto_tfm *tfm) 1082 + { 1083 + struct stm32_hash_ctx *ctx = crypto_tfm_ctx(tfm); 1084 + 1085 + if (ctx->xtfm) 1086 + crypto_free_shash(ctx->xtfm); 1194 1087 } 1195 1088 1196 1089 static irqreturn_t stm32_hash_irq_thread(int irq, void *dev_id) ··· 1244 1121 return IRQ_NONE; 1245 1122 } 1246 1123 1247 - static struct ahash_alg algs_md5_sha1[] = { 1124 + static struct ahash_alg algs_md5[] = { 1248 1125 { 1249 1126 .init = stm32_hash_init, 1250 1127 .update = stm32_hash_update, ··· 1266 1143 .cra_ctxsize = sizeof(struct stm32_hash_ctx), 1267 1144 .cra_alignmask = 3, 1268 1145 .cra_init = stm32_hash_cra_init, 1146 + .cra_exit = stm32_hash_cra_exit, 1269 1147 .cra_module = THIS_MODULE, 1270 1148 } 1271 1149 } ··· 1293 1169 .cra_ctxsize = sizeof(struct stm32_hash_ctx), 1294 1170 .cra_alignmask = 3, 1295 1171 .cra_init = stm32_hash_cra_md5_init, 1172 + .cra_exit = stm32_hash_cra_exit, 1296 1173 .cra_module = THIS_MODULE, 1297 1174 } 1298 1175 } 1299 1176 }, 1177 + }; 1178 + 1179 + static struct ahash_alg algs_sha1[] = { 1300 1180 { 1301 1181 .init = stm32_hash_init, 1302 1182 .update = stm32_hash_update, ··· 1322 1194 .cra_ctxsize = sizeof(struct stm32_hash_ctx), 1323 1195 .cra_alignmask = 3, 1324 1196 .cra_init = stm32_hash_cra_init, 1197 + .cra_exit = stm32_hash_cra_exit, 1325 1198 .cra_module = THIS_MODULE, 1326 1199 } 1327 1200 } ··· 1349 1220 .cra_ctxsize = sizeof(struct stm32_hash_ctx), 1350 1221 .cra_alignmask = 3, 1351 1222 .cra_init = stm32_hash_cra_sha1_init, 1223 + .cra_exit = stm32_hash_cra_exit, 1352 1224 .cra_module = THIS_MODULE, 1353 1225 } 1354 1226 } 1355 1227 }, 1356 1228 }; 1357 1229 1358 - static struct ahash_alg algs_sha224_sha256[] = { 1230 + static struct ahash_alg algs_sha224[] = { 1359 1231 { 1360 1232 .init = stm32_hash_init, 1361 1233 .update = stm32_hash_update, ··· 1378 1248 .cra_ctxsize = sizeof(struct stm32_hash_ctx), 1379 1249 .cra_alignmask = 3, 1380 1250 .cra_init = stm32_hash_cra_init, 1251 + .cra_exit = stm32_hash_cra_exit, 1381 1252 .cra_module = THIS_MODULE, 1382 1253 } 1383 1254 } ··· 1405 1274 .cra_ctxsize = sizeof(struct stm32_hash_ctx), 1406 1275 .cra_alignmask = 3, 1407 1276 .cra_init = stm32_hash_cra_sha224_init, 1277 + .cra_exit = stm32_hash_cra_exit, 1408 1278 .cra_module = THIS_MODULE, 1409 1279 } 1410 1280 } 1411 1281 }, 1282 + }; 1283 + 1284 + static struct ahash_alg algs_sha256[] = { 1412 1285 { 1413 1286 .init = stm32_hash_init, 1414 1287 .update = stm32_hash_update, ··· 1434 1299 .cra_ctxsize = sizeof(struct stm32_hash_ctx), 1435 1300 .cra_alignmask = 3, 1436 1301 .cra_init = stm32_hash_cra_init, 1302 + .cra_exit = stm32_hash_cra_exit, 1437 1303 .cra_module = THIS_MODULE, 1438 1304 } 1439 1305 } ··· 1461 1325 .cra_ctxsize = sizeof(struct stm32_hash_ctx), 1462 1326 .cra_alignmask = 3, 1463 1327 .cra_init = stm32_hash_cra_sha256_init, 1328 + .cra_exit = stm32_hash_cra_exit, 1464 1329 .cra_module = THIS_MODULE, 1465 1330 } 1466 1331 } ··· 1507 1370 return 0; 1508 1371 } 1509 1372 1373 + static struct stm32_hash_algs_info stm32_hash_algs_info_ux500[] = { 1374 + { 1375 + .algs_list = algs_sha1, 1376 + .size = ARRAY_SIZE(algs_sha1), 1377 + }, 1378 + { 1379 + .algs_list = algs_sha256, 1380 + .size = ARRAY_SIZE(algs_sha256), 1381 + }, 1382 + }; 1383 + 1384 + static const struct stm32_hash_pdata stm32_hash_pdata_ux500 = { 1385 + .algs_info = stm32_hash_algs_info_ux500, 1386 + .algs_info_size = ARRAY_SIZE(stm32_hash_algs_info_ux500), 1387 + .broken_emptymsg = true, 1388 + .ux500 = true, 1389 + }; 1390 + 1510 1391 static struct stm32_hash_algs_info stm32_hash_algs_info_stm32f4[] = { 1511 1392 { 1512 - .algs_list = algs_md5_sha1, 1513 - .size = ARRAY_SIZE(algs_md5_sha1), 1393 + .algs_list = algs_md5, 1394 + .size = ARRAY_SIZE(algs_md5), 1395 + }, 1396 + { 1397 + .algs_list = algs_sha1, 1398 + .size = ARRAY_SIZE(algs_sha1), 1514 1399 }, 1515 1400 }; 1516 1401 1517 1402 static const struct stm32_hash_pdata stm32_hash_pdata_stm32f4 = { 1518 1403 .algs_info = stm32_hash_algs_info_stm32f4, 1519 1404 .algs_info_size = ARRAY_SIZE(stm32_hash_algs_info_stm32f4), 1405 + .has_sr = true, 1406 + .has_mdmat = true, 1520 1407 }; 1521 1408 1522 1409 static struct stm32_hash_algs_info stm32_hash_algs_info_stm32f7[] = { 1523 1410 { 1524 - .algs_list = algs_md5_sha1, 1525 - .size = ARRAY_SIZE(algs_md5_sha1), 1411 + .algs_list = algs_md5, 1412 + .size = ARRAY_SIZE(algs_md5), 1526 1413 }, 1527 1414 { 1528 - .algs_list = algs_sha224_sha256, 1529 - .size = ARRAY_SIZE(algs_sha224_sha256), 1415 + .algs_list = algs_sha1, 1416 + .size = ARRAY_SIZE(algs_sha1), 1417 + }, 1418 + { 1419 + .algs_list = algs_sha224, 1420 + .size = ARRAY_SIZE(algs_sha224), 1421 + }, 1422 + { 1423 + .algs_list = algs_sha256, 1424 + .size = ARRAY_SIZE(algs_sha256), 1530 1425 }, 1531 1426 }; 1532 1427 1533 1428 static const struct stm32_hash_pdata stm32_hash_pdata_stm32f7 = { 1534 1429 .algs_info = stm32_hash_algs_info_stm32f7, 1535 1430 .algs_info_size = ARRAY_SIZE(stm32_hash_algs_info_stm32f7), 1431 + .has_sr = true, 1432 + .has_mdmat = true, 1536 1433 }; 1537 1434 1538 1435 static const struct of_device_id stm32_hash_of_match[] = { 1436 + { 1437 + .compatible = "stericsson,ux500-hash", 1438 + .data = &stm32_hash_pdata_ux500, 1439 + }, 1539 1440 { 1540 1441 .compatible = "st,stm32f456-hash", 1541 1442 .data = &stm32_hash_pdata_stm32f4, ··· 1627 1452 if (ret) 1628 1453 return ret; 1629 1454 1630 - irq = platform_get_irq(pdev, 0); 1631 - if (irq < 0) 1455 + irq = platform_get_irq_optional(pdev, 0); 1456 + if (irq < 0 && irq != -ENXIO) 1632 1457 return irq; 1633 1458 1634 - ret = devm_request_threaded_irq(dev, irq, stm32_hash_irq_handler, 1635 - stm32_hash_irq_thread, IRQF_ONESHOT, 1636 - dev_name(dev), hdev); 1637 - if (ret) { 1638 - dev_err(dev, "Cannot grab IRQ\n"); 1639 - return ret; 1459 + if (irq > 0) { 1460 + ret = devm_request_threaded_irq(dev, irq, 1461 + stm32_hash_irq_handler, 1462 + stm32_hash_irq_thread, 1463 + IRQF_ONESHOT, 1464 + dev_name(dev), hdev); 1465 + if (ret) { 1466 + dev_err(dev, "Cannot grab IRQ\n"); 1467 + return ret; 1468 + } 1469 + } else { 1470 + dev_info(dev, "No IRQ, use polling mode\n"); 1471 + hdev->polled = true; 1640 1472 } 1641 1473 1642 1474 hdev->clk = devm_clk_get(&pdev->dev, NULL); ··· 1685 1503 case 0: 1686 1504 break; 1687 1505 case -ENOENT: 1688 - dev_dbg(dev, "DMA mode not available\n"); 1506 + case -ENODEV: 1507 + dev_info(dev, "DMA mode not available\n"); 1689 1508 break; 1690 1509 default: 1510 + dev_err(dev, "DMA init error %d\n", ret); 1691 1511 goto err_dma; 1692 1512 } 1693 1513 ··· 1708 1524 if (ret) 1709 1525 goto err_engine_start; 1710 1526 1711 - hdev->dma_mode = stm32_hash_read(hdev, HASH_HWCFGR); 1527 + if (hdev->pdata->ux500) 1528 + /* FIXME: implement DMA mode for Ux500 */ 1529 + hdev->dma_mode = 0; 1530 + else 1531 + hdev->dma_mode = stm32_hash_read(hdev, HASH_HWCFGR); 1712 1532 1713 1533 /* Register algos */ 1714 1534 ret = stm32_hash_register_algs(hdev);
+3 -3
drivers/crypto/talitos.c
··· 1393 1393 alloc_len += sizeof(struct talitos_desc); 1394 1394 alloc_len += ivsize; 1395 1395 1396 - edesc = kmalloc(alloc_len, GFP_DMA | flags); 1396 + edesc = kmalloc(ALIGN(alloc_len, dma_get_cache_alignment()), flags); 1397 1397 if (!edesc) 1398 1398 return ERR_PTR(-ENOMEM); 1399 1399 if (ivsize) { ··· 1560 1560 1561 1561 kfree(edesc); 1562 1562 1563 - areq->base.complete(&areq->base, err); 1563 + skcipher_request_complete(areq, err); 1564 1564 } 1565 1565 1566 1566 static int common_nonsnoop(struct talitos_edesc *edesc, ··· 1759 1759 1760 1760 kfree(edesc); 1761 1761 1762 - areq->base.complete(&areq->base, err); 1762 + ahash_request_complete(areq, err); 1763 1763 } 1764 1764 1765 1765 /*
-22
drivers/crypto/ux500/Kconfig
··· 1 - # SPDX-License-Identifier: GPL-2.0-only 2 - # 3 - # Copyright (C) ST-Ericsson SA 2010 4 - # Author: Shujuan Chen (shujuan.chen@stericsson.com) 5 - # 6 - 7 - config CRYPTO_DEV_UX500_HASH 8 - tristate "UX500 crypto driver for HASH block" 9 - depends on CRYPTO_DEV_UX500 10 - select CRYPTO_HASH 11 - select CRYPTO_SHA1 12 - select CRYPTO_SHA256 13 - help 14 - This selects the hash driver for the UX500_HASH hardware. 15 - Depends on UX500/STM DMA if running in DMA mode. 16 - 17 - config CRYPTO_DEV_UX500_DEBUG 18 - bool "Activate ux500 platform debug-mode for crypto and hash block" 19 - depends on CRYPTO_DEV_UX500_CRYP || CRYPTO_DEV_UX500_HASH 20 - help 21 - Say Y if you want to add debug prints to ux500_hash and 22 - ux500_cryp devices.
-7
drivers/crypto/ux500/Makefile
··· 1 - # SPDX-License-Identifier: GPL-2.0-only 2 - # 3 - # Copyright (C) ST-Ericsson SA 2010 4 - # Author: Shujuan Chen (shujuan.chen@stericsson.com) 5 - # 6 - 7 - obj-$(CONFIG_CRYPTO_DEV_UX500_HASH) += hash/
-11
drivers/crypto/ux500/hash/Makefile
··· 1 - # SPDX-License-Identifier: GPL-2.0-only 2 - # 3 - # Copyright (C) ST-Ericsson SA 2010 4 - # Author: Shujuan Chen (shujuan.chen@stericsson.com) 5 - # 6 - ifdef CONFIG_CRYPTO_DEV_UX500_DEBUG 7 - CFLAGS_hash_core.o := -DDEBUG 8 - endif 9 - 10 - obj-$(CONFIG_CRYPTO_DEV_UX500_HASH) += ux500_hash.o 11 - ux500_hash-objs := hash_core.o
-398
drivers/crypto/ux500/hash/hash_alg.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0-only */ 2 - /* 3 - * Copyright (C) ST-Ericsson SA 2010 4 - * Author: Shujuan Chen (shujuan.chen@stericsson.com) 5 - * Author: Joakim Bech (joakim.xx.bech@stericsson.com) 6 - * Author: Berne Hebark (berne.hebark@stericsson.com)) 7 - */ 8 - #ifndef _HASH_ALG_H 9 - #define _HASH_ALG_H 10 - 11 - #include <linux/bitops.h> 12 - 13 - #define HASH_BLOCK_SIZE 64 14 - #define HASH_DMA_FIFO 4 15 - #define HASH_DMA_ALIGN_SIZE 4 16 - #define HASH_DMA_PERFORMANCE_MIN_SIZE 1024 17 - #define HASH_BYTES_PER_WORD 4 18 - 19 - /* Maximum value of the length's high word */ 20 - #define HASH_HIGH_WORD_MAX_VAL 0xFFFFFFFFUL 21 - 22 - /* Power on Reset values HASH registers */ 23 - #define HASH_RESET_CR_VALUE 0x0 24 - #define HASH_RESET_STR_VALUE 0x0 25 - 26 - /* Number of context swap registers */ 27 - #define HASH_CSR_COUNT 52 28 - 29 - #define HASH_RESET_CSRX_REG_VALUE 0x0 30 - #define HASH_RESET_CSFULL_REG_VALUE 0x0 31 - #define HASH_RESET_CSDATAIN_REG_VALUE 0x0 32 - 33 - #define HASH_RESET_INDEX_VAL 0x0 34 - #define HASH_RESET_BIT_INDEX_VAL 0x0 35 - #define HASH_RESET_BUFFER_VAL 0x0 36 - #define HASH_RESET_LEN_HIGH_VAL 0x0 37 - #define HASH_RESET_LEN_LOW_VAL 0x0 38 - 39 - /* Control register bitfields */ 40 - #define HASH_CR_RESUME_MASK 0x11FCF 41 - 42 - #define HASH_CR_SWITCHON_POS 31 43 - #define HASH_CR_SWITCHON_MASK BIT(31) 44 - 45 - #define HASH_CR_EMPTYMSG_POS 20 46 - #define HASH_CR_EMPTYMSG_MASK BIT(20) 47 - 48 - #define HASH_CR_DINF_POS 12 49 - #define HASH_CR_DINF_MASK BIT(12) 50 - 51 - #define HASH_CR_NBW_POS 8 52 - #define HASH_CR_NBW_MASK 0x00000F00UL 53 - 54 - #define HASH_CR_LKEY_POS 16 55 - #define HASH_CR_LKEY_MASK BIT(16) 56 - 57 - #define HASH_CR_ALGO_POS 7 58 - #define HASH_CR_ALGO_MASK BIT(7) 59 - 60 - #define HASH_CR_MODE_POS 6 61 - #define HASH_CR_MODE_MASK BIT(6) 62 - 63 - #define HASH_CR_DATAFORM_POS 4 64 - #define HASH_CR_DATAFORM_MASK (BIT(4) | BIT(5)) 65 - 66 - #define HASH_CR_DMAE_POS 3 67 - #define HASH_CR_DMAE_MASK BIT(3) 68 - 69 - #define HASH_CR_INIT_POS 2 70 - #define HASH_CR_INIT_MASK BIT(2) 71 - 72 - #define HASH_CR_PRIVN_POS 1 73 - #define HASH_CR_PRIVN_MASK BIT(1) 74 - 75 - #define HASH_CR_SECN_POS 0 76 - #define HASH_CR_SECN_MASK BIT(0) 77 - 78 - /* Start register bitfields */ 79 - #define HASH_STR_DCAL_POS 8 80 - #define HASH_STR_DCAL_MASK BIT(8) 81 - #define HASH_STR_DEFAULT 0x0 82 - 83 - #define HASH_STR_NBLW_POS 0 84 - #define HASH_STR_NBLW_MASK 0x0000001FUL 85 - 86 - #define HASH_NBLW_MAX_VAL 0x1F 87 - 88 - /* PrimeCell IDs */ 89 - #define HASH_P_ID0 0xE0 90 - #define HASH_P_ID1 0x05 91 - #define HASH_P_ID2 0x38 92 - #define HASH_P_ID3 0x00 93 - #define HASH_CELL_ID0 0x0D 94 - #define HASH_CELL_ID1 0xF0 95 - #define HASH_CELL_ID2 0x05 96 - #define HASH_CELL_ID3 0xB1 97 - 98 - #define HASH_SET_BITS(reg_name, mask) \ 99 - writel_relaxed((readl_relaxed(reg_name) | mask), reg_name) 100 - 101 - #define HASH_CLEAR_BITS(reg_name, mask) \ 102 - writel_relaxed((readl_relaxed(reg_name) & ~mask), reg_name) 103 - 104 - #define HASH_PUT_BITS(reg, val, shift, mask) \ 105 - writel_relaxed(((readl(reg) & ~(mask)) | \ 106 - (((u32)val << shift) & (mask))), reg) 107 - 108 - #define HASH_SET_DIN(val, len) writesl(&device_data->base->din, (val), (len)) 109 - 110 - #define HASH_INITIALIZE \ 111 - HASH_PUT_BITS( \ 112 - &device_data->base->cr, \ 113 - 0x01, HASH_CR_INIT_POS, \ 114 - HASH_CR_INIT_MASK) 115 - 116 - #define HASH_SET_DATA_FORMAT(data_format) \ 117 - HASH_PUT_BITS( \ 118 - &device_data->base->cr, \ 119 - (u32) (data_format), HASH_CR_DATAFORM_POS, \ 120 - HASH_CR_DATAFORM_MASK) 121 - #define HASH_SET_NBLW(val) \ 122 - HASH_PUT_BITS( \ 123 - &device_data->base->str, \ 124 - (u32) (val), HASH_STR_NBLW_POS, \ 125 - HASH_STR_NBLW_MASK) 126 - #define HASH_SET_DCAL \ 127 - HASH_PUT_BITS( \ 128 - &device_data->base->str, \ 129 - 0x01, HASH_STR_DCAL_POS, \ 130 - HASH_STR_DCAL_MASK) 131 - 132 - /* Hardware access method */ 133 - enum hash_mode { 134 - HASH_MODE_CPU, 135 - HASH_MODE_DMA 136 - }; 137 - 138 - /** 139 - * struct uint64 - Structure to handle 64 bits integers. 140 - * @high_word: Most significant bits. 141 - * @low_word: Least significant bits. 142 - * 143 - * Used to handle 64 bits integers. 144 - */ 145 - struct uint64 { 146 - u32 high_word; 147 - u32 low_word; 148 - }; 149 - 150 - /** 151 - * struct hash_register - Contains all registers in ux500 hash hardware. 152 - * @cr: HASH control register (0x000). 153 - * @din: HASH data input register (0x004). 154 - * @str: HASH start register (0x008). 155 - * @hx: HASH digest register 0..7 (0x00c-0x01C). 156 - * @padding0: Reserved (0x02C). 157 - * @itcr: Integration test control register (0x080). 158 - * @itip: Integration test input register (0x084). 159 - * @itop: Integration test output register (0x088). 160 - * @padding1: Reserved (0x08C). 161 - * @csfull: HASH context full register (0x0F8). 162 - * @csdatain: HASH context swap data input register (0x0FC). 163 - * @csrx: HASH context swap register 0..51 (0x100-0x1CC). 164 - * @padding2: Reserved (0x1D0). 165 - * @periphid0: HASH peripheral identification register 0 (0xFE0). 166 - * @periphid1: HASH peripheral identification register 1 (0xFE4). 167 - * @periphid2: HASH peripheral identification register 2 (0xFE8). 168 - * @periphid3: HASH peripheral identification register 3 (0xFEC). 169 - * @cellid0: HASH PCell identification register 0 (0xFF0). 170 - * @cellid1: HASH PCell identification register 1 (0xFF4). 171 - * @cellid2: HASH PCell identification register 2 (0xFF8). 172 - * @cellid3: HASH PCell identification register 3 (0xFFC). 173 - * 174 - * The device communicates to the HASH via 32-bit-wide control registers 175 - * accessible via the 32-bit width AMBA rev. 2.0 AHB Bus. Below is a structure 176 - * with the registers used. 177 - */ 178 - struct hash_register { 179 - u32 cr; 180 - u32 din; 181 - u32 str; 182 - u32 hx[8]; 183 - 184 - u32 padding0[(0x080 - 0x02C) / sizeof(u32)]; 185 - 186 - u32 itcr; 187 - u32 itip; 188 - u32 itop; 189 - 190 - u32 padding1[(0x0F8 - 0x08C) / sizeof(u32)]; 191 - 192 - u32 csfull; 193 - u32 csdatain; 194 - u32 csrx[HASH_CSR_COUNT]; 195 - 196 - u32 padding2[(0xFE0 - 0x1D0) / sizeof(u32)]; 197 - 198 - u32 periphid0; 199 - u32 periphid1; 200 - u32 periphid2; 201 - u32 periphid3; 202 - 203 - u32 cellid0; 204 - u32 cellid1; 205 - u32 cellid2; 206 - u32 cellid3; 207 - }; 208 - 209 - /** 210 - * struct hash_state - Hash context state. 211 - * @temp_cr: Temporary HASH Control Register. 212 - * @str_reg: HASH Start Register. 213 - * @din_reg: HASH Data Input Register. 214 - * @csr[52]: HASH Context Swap Registers 0-39. 215 - * @csfull: HASH Context Swap Registers 40 ie Status flags. 216 - * @csdatain: HASH Context Swap Registers 41 ie Input data. 217 - * @buffer: Working buffer for messages going to the hardware. 218 - * @length: Length of the part of message hashed so far (floor(N/64) * 64). 219 - * @index: Valid number of bytes in buffer (N % 64). 220 - * @bit_index: Valid number of bits in buffer (N % 8). 221 - * 222 - * This structure is used between context switches, i.e. when ongoing jobs are 223 - * interupted with new jobs. When this happens we need to store intermediate 224 - * results in software. 225 - * 226 - * WARNING: "index" is the member of the structure, to be sure that "buffer" 227 - * is aligned on a 4-bytes boundary. This is highly implementation dependent 228 - * and MUST be checked whenever this code is ported on new platforms. 229 - */ 230 - struct hash_state { 231 - u32 temp_cr; 232 - u32 str_reg; 233 - u32 din_reg; 234 - u32 csr[52]; 235 - u32 csfull; 236 - u32 csdatain; 237 - u32 buffer[HASH_BLOCK_SIZE / sizeof(u32)]; 238 - struct uint64 length; 239 - u8 index; 240 - u8 bit_index; 241 - }; 242 - 243 - /** 244 - * enum hash_device_id - HASH device ID. 245 - * @HASH_DEVICE_ID_0: Hash hardware with ID 0 246 - * @HASH_DEVICE_ID_1: Hash hardware with ID 1 247 - */ 248 - enum hash_device_id { 249 - HASH_DEVICE_ID_0 = 0, 250 - HASH_DEVICE_ID_1 = 1 251 - }; 252 - 253 - /** 254 - * enum hash_data_format - HASH data format. 255 - * @HASH_DATA_32_BITS: 32 bits data format 256 - * @HASH_DATA_16_BITS: 16 bits data format 257 - * @HASH_DATA_8_BITS: 8 bits data format. 258 - * @HASH_DATA_1_BITS: 1 bit data format. 259 - */ 260 - enum hash_data_format { 261 - HASH_DATA_32_BITS = 0x0, 262 - HASH_DATA_16_BITS = 0x1, 263 - HASH_DATA_8_BITS = 0x2, 264 - HASH_DATA_1_BIT = 0x3 265 - }; 266 - 267 - /** 268 - * enum hash_algo - Enumeration for selecting between SHA1 or SHA2 algorithm. 269 - * @HASH_ALGO_SHA1: Indicates that SHA1 is used. 270 - * @HASH_ALGO_SHA2: Indicates that SHA2 (SHA256) is used. 271 - */ 272 - enum hash_algo { 273 - HASH_ALGO_SHA1 = 0x0, 274 - HASH_ALGO_SHA256 = 0x1 275 - }; 276 - 277 - /** 278 - * enum hash_op - Enumeration for selecting between HASH or HMAC mode. 279 - * @HASH_OPER_MODE_HASH: Indicates usage of normal HASH mode. 280 - * @HASH_OPER_MODE_HMAC: Indicates usage of HMAC. 281 - */ 282 - enum hash_op { 283 - HASH_OPER_MODE_HASH = 0x0, 284 - HASH_OPER_MODE_HMAC = 0x1 285 - }; 286 - 287 - /** 288 - * struct hash_config - Configuration data for the hardware. 289 - * @data_format: Format of data entered into the hash data in register. 290 - * @algorithm: Algorithm selection bit. 291 - * @oper_mode: Operating mode selection bit. 292 - */ 293 - struct hash_config { 294 - int data_format; 295 - int algorithm; 296 - int oper_mode; 297 - }; 298 - 299 - /** 300 - * struct hash_dma - Structure used for dma. 301 - * @mask: DMA capabilities bitmap mask. 302 - * @complete: Used to maintain state for a "completion". 303 - * @chan_mem2hash: DMA channel. 304 - * @cfg_mem2hash: DMA channel configuration. 305 - * @sg_len: Scatterlist length. 306 - * @sg: Scatterlist. 307 - * @nents: Number of sg entries. 308 - */ 309 - struct hash_dma { 310 - dma_cap_mask_t mask; 311 - struct completion complete; 312 - struct dma_chan *chan_mem2hash; 313 - void *cfg_mem2hash; 314 - int sg_len; 315 - struct scatterlist *sg; 316 - int nents; 317 - }; 318 - 319 - /** 320 - * struct hash_ctx - The context used for hash calculations. 321 - * @key: The key used in the operation. 322 - * @keylen: The length of the key. 323 - * @state: The state of the current calculations. 324 - * @config: The current configuration. 325 - * @digestsize: The size of current digest. 326 - * @device: Pointer to the device structure. 327 - */ 328 - struct hash_ctx { 329 - u8 *key; 330 - u32 keylen; 331 - struct hash_config config; 332 - int digestsize; 333 - struct hash_device_data *device; 334 - }; 335 - 336 - /** 337 - * struct hash_ctx - The request context used for hash calculations. 338 - * @state: The state of the current calculations. 339 - * @dma_mode: Used in special cases (workaround), e.g. need to change to 340 - * cpu mode, if not supported/working in dma mode. 341 - * @updated: Indicates if hardware is initialized for new operations. 342 - */ 343 - struct hash_req_ctx { 344 - struct hash_state state; 345 - bool dma_mode; 346 - u8 updated; 347 - }; 348 - 349 - /** 350 - * struct hash_device_data - structure for a hash device. 351 - * @base: Pointer to virtual base address of the hash device. 352 - * @phybase: Pointer to physical memory location of the hash device. 353 - * @list_node: For inclusion in klist. 354 - * @dev: Pointer to the device dev structure. 355 - * @ctx_lock: Spinlock for current_ctx. 356 - * @current_ctx: Pointer to the currently allocated context. 357 - * @power_state: TRUE = power state on, FALSE = power state off. 358 - * @power_state_lock: Spinlock for power_state. 359 - * @regulator: Pointer to the device's power control. 360 - * @clk: Pointer to the device's clock control. 361 - * @restore_dev_state: TRUE = saved state, FALSE = no saved state. 362 - * @dma: Structure used for dma. 363 - */ 364 - struct hash_device_data { 365 - struct hash_register __iomem *base; 366 - phys_addr_t phybase; 367 - struct klist_node list_node; 368 - struct device *dev; 369 - spinlock_t ctx_lock; 370 - struct hash_ctx *current_ctx; 371 - bool power_state; 372 - spinlock_t power_state_lock; 373 - struct regulator *regulator; 374 - struct clk *clk; 375 - bool restore_dev_state; 376 - struct hash_state state; /* Used for saving and resuming state */ 377 - struct hash_dma dma; 378 - }; 379 - 380 - int hash_check_hw(struct hash_device_data *device_data); 381 - 382 - int hash_setconfiguration(struct hash_device_data *device_data, 383 - struct hash_config *config); 384 - 385 - void hash_begin(struct hash_device_data *device_data, struct hash_ctx *ctx); 386 - 387 - void hash_get_digest(struct hash_device_data *device_data, 388 - u8 *digest, int algorithm); 389 - 390 - int hash_hw_update(struct ahash_request *req); 391 - 392 - int hash_save_state(struct hash_device_data *device_data, 393 - struct hash_state *state); 394 - 395 - int hash_resume_state(struct hash_device_data *device_data, 396 - const struct hash_state *state); 397 - 398 - #endif
-1966
drivers/crypto/ux500/hash/hash_core.c
··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* 3 - * Cryptographic API. 4 - * Support for Nomadik hardware crypto engine. 5 - 6 - * Copyright (C) ST-Ericsson SA 2010 7 - * Author: Shujuan Chen <shujuan.chen@stericsson.com> for ST-Ericsson 8 - * Author: Joakim Bech <joakim.xx.bech@stericsson.com> for ST-Ericsson 9 - * Author: Berne Hebark <berne.herbark@stericsson.com> for ST-Ericsson. 10 - * Author: Niklas Hernaeus <niklas.hernaeus@stericsson.com> for ST-Ericsson. 11 - * Author: Andreas Westin <andreas.westin@stericsson.com> for ST-Ericsson. 12 - */ 13 - 14 - #define pr_fmt(fmt) "hashX hashX: " fmt 15 - 16 - #include <linux/clk.h> 17 - #include <linux/device.h> 18 - #include <linux/dma-mapping.h> 19 - #include <linux/err.h> 20 - #include <linux/init.h> 21 - #include <linux/io.h> 22 - #include <linux/klist.h> 23 - #include <linux/kernel.h> 24 - #include <linux/module.h> 25 - #include <linux/mod_devicetable.h> 26 - #include <linux/platform_device.h> 27 - #include <linux/crypto.h> 28 - 29 - #include <linux/regulator/consumer.h> 30 - #include <linux/dmaengine.h> 31 - #include <linux/bitops.h> 32 - 33 - #include <crypto/internal/hash.h> 34 - #include <crypto/sha1.h> 35 - #include <crypto/sha2.h> 36 - #include <crypto/scatterwalk.h> 37 - #include <crypto/algapi.h> 38 - 39 - #include <linux/platform_data/crypto-ux500.h> 40 - 41 - #include "hash_alg.h" 42 - 43 - static int hash_mode; 44 - module_param(hash_mode, int, 0); 45 - MODULE_PARM_DESC(hash_mode, "CPU or DMA mode. CPU = 0 (default), DMA = 1"); 46 - 47 - /* HMAC-SHA1, no key */ 48 - static const u8 zero_message_hmac_sha1[SHA1_DIGEST_SIZE] = { 49 - 0xfb, 0xdb, 0x1d, 0x1b, 0x18, 0xaa, 0x6c, 0x08, 50 - 0x32, 0x4b, 0x7d, 0x64, 0xb7, 0x1f, 0xb7, 0x63, 51 - 0x70, 0x69, 0x0e, 0x1d 52 - }; 53 - 54 - /* HMAC-SHA256, no key */ 55 - static const u8 zero_message_hmac_sha256[SHA256_DIGEST_SIZE] = { 56 - 0xb6, 0x13, 0x67, 0x9a, 0x08, 0x14, 0xd9, 0xec, 57 - 0x77, 0x2f, 0x95, 0xd7, 0x78, 0xc3, 0x5f, 0xc5, 58 - 0xff, 0x16, 0x97, 0xc4, 0x93, 0x71, 0x56, 0x53, 59 - 0xc6, 0xc7, 0x12, 0x14, 0x42, 0x92, 0xc5, 0xad 60 - }; 61 - 62 - /** 63 - * struct hash_driver_data - data specific to the driver. 64 - * 65 - * @device_list: A list of registered devices to choose from. 66 - * @device_allocation: A semaphore initialized with number of devices. 67 - */ 68 - struct hash_driver_data { 69 - struct klist device_list; 70 - struct semaphore device_allocation; 71 - }; 72 - 73 - static struct hash_driver_data driver_data; 74 - 75 - /* Declaration of functions */ 76 - /** 77 - * hash_messagepad - Pads a message and write the nblw bits. 78 - * @device_data: Structure for the hash device. 79 - * @message: Last word of a message 80 - * @index_bytes: The number of bytes in the last message 81 - * 82 - * This function manages the final part of the digest calculation, when less 83 - * than 512 bits (64 bytes) remain in message. This means index_bytes < 64. 84 - * 85 - */ 86 - static void hash_messagepad(struct hash_device_data *device_data, 87 - const u32 *message, u8 index_bytes); 88 - 89 - /** 90 - * release_hash_device - Releases a previously allocated hash device. 91 - * @device_data: Structure for the hash device. 92 - * 93 - */ 94 - static void release_hash_device(struct hash_device_data *device_data) 95 - { 96 - spin_lock(&device_data->ctx_lock); 97 - device_data->current_ctx->device = NULL; 98 - device_data->current_ctx = NULL; 99 - spin_unlock(&device_data->ctx_lock); 100 - 101 - /* 102 - * The down_interruptible part for this semaphore is called in 103 - * cryp_get_device_data. 104 - */ 105 - up(&driver_data.device_allocation); 106 - } 107 - 108 - static void hash_dma_setup_channel(struct hash_device_data *device_data, 109 - struct device *dev) 110 - { 111 - struct hash_platform_data *platform_data = dev->platform_data; 112 - struct dma_slave_config conf = { 113 - .direction = DMA_MEM_TO_DEV, 114 - .dst_addr = device_data->phybase + HASH_DMA_FIFO, 115 - .dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, 116 - .dst_maxburst = 16, 117 - }; 118 - 119 - dma_cap_zero(device_data->dma.mask); 120 - dma_cap_set(DMA_SLAVE, device_data->dma.mask); 121 - 122 - device_data->dma.cfg_mem2hash = platform_data->mem_to_engine; 123 - device_data->dma.chan_mem2hash = 124 - dma_request_channel(device_data->dma.mask, 125 - platform_data->dma_filter, 126 - device_data->dma.cfg_mem2hash); 127 - 128 - dmaengine_slave_config(device_data->dma.chan_mem2hash, &conf); 129 - 130 - init_completion(&device_data->dma.complete); 131 - } 132 - 133 - static void hash_dma_callback(void *data) 134 - { 135 - struct hash_ctx *ctx = data; 136 - 137 - complete(&ctx->device->dma.complete); 138 - } 139 - 140 - static int hash_set_dma_transfer(struct hash_ctx *ctx, struct scatterlist *sg, 141 - int len, enum dma_data_direction direction) 142 - { 143 - struct dma_async_tx_descriptor *desc = NULL; 144 - struct dma_chan *channel = NULL; 145 - 146 - if (direction != DMA_TO_DEVICE) { 147 - dev_err(ctx->device->dev, "%s: Invalid DMA direction\n", 148 - __func__); 149 - return -EFAULT; 150 - } 151 - 152 - sg->length = ALIGN(sg->length, HASH_DMA_ALIGN_SIZE); 153 - 154 - channel = ctx->device->dma.chan_mem2hash; 155 - ctx->device->dma.sg = sg; 156 - ctx->device->dma.sg_len = dma_map_sg(channel->device->dev, 157 - ctx->device->dma.sg, ctx->device->dma.nents, 158 - direction); 159 - 160 - if (!ctx->device->dma.sg_len) { 161 - dev_err(ctx->device->dev, "%s: Could not map the sg list (TO_DEVICE)\n", 162 - __func__); 163 - return -EFAULT; 164 - } 165 - 166 - dev_dbg(ctx->device->dev, "%s: Setting up DMA for buffer (TO_DEVICE)\n", 167 - __func__); 168 - desc = dmaengine_prep_slave_sg(channel, 169 - ctx->device->dma.sg, ctx->device->dma.sg_len, 170 - DMA_MEM_TO_DEV, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); 171 - if (!desc) { 172 - dev_err(ctx->device->dev, 173 - "%s: dmaengine_prep_slave_sg() failed!\n", __func__); 174 - return -EFAULT; 175 - } 176 - 177 - desc->callback = hash_dma_callback; 178 - desc->callback_param = ctx; 179 - 180 - dmaengine_submit(desc); 181 - dma_async_issue_pending(channel); 182 - 183 - return 0; 184 - } 185 - 186 - static void hash_dma_done(struct hash_ctx *ctx) 187 - { 188 - struct dma_chan *chan; 189 - 190 - chan = ctx->device->dma.chan_mem2hash; 191 - dmaengine_terminate_all(chan); 192 - dma_unmap_sg(chan->device->dev, ctx->device->dma.sg, 193 - ctx->device->dma.nents, DMA_TO_DEVICE); 194 - } 195 - 196 - static int hash_dma_write(struct hash_ctx *ctx, 197 - struct scatterlist *sg, int len) 198 - { 199 - int error = hash_set_dma_transfer(ctx, sg, len, DMA_TO_DEVICE); 200 - if (error) { 201 - dev_dbg(ctx->device->dev, 202 - "%s: hash_set_dma_transfer() failed\n", __func__); 203 - return error; 204 - } 205 - 206 - return len; 207 - } 208 - 209 - /** 210 - * get_empty_message_digest - Returns a pre-calculated digest for 211 - * the empty message. 212 - * @device_data: Structure for the hash device. 213 - * @zero_hash: Buffer to return the empty message digest. 214 - * @zero_hash_size: Hash size of the empty message digest. 215 - * @zero_digest: True if zero_digest returned. 216 - */ 217 - static int get_empty_message_digest( 218 - struct hash_device_data *device_data, 219 - u8 *zero_hash, u32 *zero_hash_size, bool *zero_digest) 220 - { 221 - int ret = 0; 222 - struct hash_ctx *ctx = device_data->current_ctx; 223 - *zero_digest = false; 224 - 225 - /** 226 - * Caller responsible for ctx != NULL. 227 - */ 228 - 229 - if (HASH_OPER_MODE_HASH == ctx->config.oper_mode) { 230 - if (HASH_ALGO_SHA1 == ctx->config.algorithm) { 231 - memcpy(zero_hash, &sha1_zero_message_hash[0], 232 - SHA1_DIGEST_SIZE); 233 - *zero_hash_size = SHA1_DIGEST_SIZE; 234 - *zero_digest = true; 235 - } else if (HASH_ALGO_SHA256 == 236 - ctx->config.algorithm) { 237 - memcpy(zero_hash, &sha256_zero_message_hash[0], 238 - SHA256_DIGEST_SIZE); 239 - *zero_hash_size = SHA256_DIGEST_SIZE; 240 - *zero_digest = true; 241 - } else { 242 - dev_err(device_data->dev, "%s: Incorrect algorithm!\n", 243 - __func__); 244 - ret = -EINVAL; 245 - goto out; 246 - } 247 - } else if (HASH_OPER_MODE_HMAC == ctx->config.oper_mode) { 248 - if (!ctx->keylen) { 249 - if (HASH_ALGO_SHA1 == ctx->config.algorithm) { 250 - memcpy(zero_hash, &zero_message_hmac_sha1[0], 251 - SHA1_DIGEST_SIZE); 252 - *zero_hash_size = SHA1_DIGEST_SIZE; 253 - *zero_digest = true; 254 - } else if (HASH_ALGO_SHA256 == ctx->config.algorithm) { 255 - memcpy(zero_hash, &zero_message_hmac_sha256[0], 256 - SHA256_DIGEST_SIZE); 257 - *zero_hash_size = SHA256_DIGEST_SIZE; 258 - *zero_digest = true; 259 - } else { 260 - dev_err(device_data->dev, "%s: Incorrect algorithm!\n", 261 - __func__); 262 - ret = -EINVAL; 263 - goto out; 264 - } 265 - } else { 266 - dev_dbg(device_data->dev, 267 - "%s: Continue hash calculation, since hmac key available\n", 268 - __func__); 269 - } 270 - } 271 - out: 272 - 273 - return ret; 274 - } 275 - 276 - /** 277 - * hash_disable_power - Request to disable power and clock. 278 - * @device_data: Structure for the hash device. 279 - * @save_device_state: If true, saves the current hw state. 280 - * 281 - * This function request for disabling power (regulator) and clock, 282 - * and could also save current hw state. 283 - */ 284 - static int hash_disable_power(struct hash_device_data *device_data, 285 - bool save_device_state) 286 - { 287 - int ret = 0; 288 - struct device *dev = device_data->dev; 289 - 290 - spin_lock(&device_data->power_state_lock); 291 - if (!device_data->power_state) 292 - goto out; 293 - 294 - if (save_device_state) { 295 - hash_save_state(device_data, 296 - &device_data->state); 297 - device_data->restore_dev_state = true; 298 - } 299 - 300 - clk_disable(device_data->clk); 301 - ret = regulator_disable(device_data->regulator); 302 - if (ret) 303 - dev_err(dev, "%s: regulator_disable() failed!\n", __func__); 304 - 305 - device_data->power_state = false; 306 - 307 - out: 308 - spin_unlock(&device_data->power_state_lock); 309 - 310 - return ret; 311 - } 312 - 313 - /** 314 - * hash_enable_power - Request to enable power and clock. 315 - * @device_data: Structure for the hash device. 316 - * @restore_device_state: If true, restores a previous saved hw state. 317 - * 318 - * This function request for enabling power (regulator) and clock, 319 - * and could also restore a previously saved hw state. 320 - */ 321 - static int hash_enable_power(struct hash_device_data *device_data, 322 - bool restore_device_state) 323 - { 324 - int ret = 0; 325 - struct device *dev = device_data->dev; 326 - 327 - spin_lock(&device_data->power_state_lock); 328 - if (!device_data->power_state) { 329 - ret = regulator_enable(device_data->regulator); 330 - if (ret) { 331 - dev_err(dev, "%s: regulator_enable() failed!\n", 332 - __func__); 333 - goto out; 334 - } 335 - ret = clk_enable(device_data->clk); 336 - if (ret) { 337 - dev_err(dev, "%s: clk_enable() failed!\n", __func__); 338 - ret = regulator_disable( 339 - device_data->regulator); 340 - goto out; 341 - } 342 - device_data->power_state = true; 343 - } 344 - 345 - if (device_data->restore_dev_state) { 346 - if (restore_device_state) { 347 - device_data->restore_dev_state = false; 348 - hash_resume_state(device_data, &device_data->state); 349 - } 350 - } 351 - out: 352 - spin_unlock(&device_data->power_state_lock); 353 - 354 - return ret; 355 - } 356 - 357 - /** 358 - * hash_get_device_data - Checks for an available hash device and return it. 359 - * @ctx: Structure for the hash context. 360 - * @device_data: Structure for the hash device. 361 - * 362 - * This function check for an available hash device and return it to 363 - * the caller. 364 - * Note! Caller need to release the device, calling up(). 365 - */ 366 - static int hash_get_device_data(struct hash_ctx *ctx, 367 - struct hash_device_data **device_data) 368 - { 369 - int ret; 370 - struct klist_iter device_iterator; 371 - struct klist_node *device_node; 372 - struct hash_device_data *local_device_data = NULL; 373 - 374 - /* Wait until a device is available */ 375 - ret = down_interruptible(&driver_data.device_allocation); 376 - if (ret) 377 - return ret; /* Interrupted */ 378 - 379 - /* Select a device */ 380 - klist_iter_init(&driver_data.device_list, &device_iterator); 381 - device_node = klist_next(&device_iterator); 382 - while (device_node) { 383 - local_device_data = container_of(device_node, 384 - struct hash_device_data, list_node); 385 - spin_lock(&local_device_data->ctx_lock); 386 - /* current_ctx allocates a device, NULL = unallocated */ 387 - if (local_device_data->current_ctx) { 388 - device_node = klist_next(&device_iterator); 389 - } else { 390 - local_device_data->current_ctx = ctx; 391 - ctx->device = local_device_data; 392 - spin_unlock(&local_device_data->ctx_lock); 393 - break; 394 - } 395 - spin_unlock(&local_device_data->ctx_lock); 396 - } 397 - klist_iter_exit(&device_iterator); 398 - 399 - if (!device_node) { 400 - /** 401 - * No free device found. 402 - * Since we allocated a device with down_interruptible, this 403 - * should not be able to happen. 404 - * Number of available devices, which are contained in 405 - * device_allocation, is therefore decremented by not doing 406 - * an up(device_allocation). 407 - */ 408 - return -EBUSY; 409 - } 410 - 411 - *device_data = local_device_data; 412 - 413 - return 0; 414 - } 415 - 416 - /** 417 - * hash_hw_write_key - Writes the key to the hardware registries. 418 - * 419 - * @device_data: Structure for the hash device. 420 - * @key: Key to be written. 421 - * @keylen: The lengt of the key. 422 - * 423 - * Note! This function DOES NOT write to the NBLW registry, even though 424 - * specified in the hw design spec. Either due to incorrect info in the 425 - * spec or due to a bug in the hw. 426 - */ 427 - static void hash_hw_write_key(struct hash_device_data *device_data, 428 - const u8 *key, unsigned int keylen) 429 - { 430 - u32 word = 0; 431 - int nwords = 1; 432 - 433 - HASH_CLEAR_BITS(&device_data->base->str, HASH_STR_NBLW_MASK); 434 - 435 - while (keylen >= 4) { 436 - u32 *key_word = (u32 *)key; 437 - 438 - HASH_SET_DIN(key_word, nwords); 439 - keylen -= 4; 440 - key += 4; 441 - } 442 - 443 - /* Take care of the remaining bytes in the last word */ 444 - if (keylen) { 445 - word = 0; 446 - while (keylen) { 447 - word |= (key[keylen - 1] << (8 * (keylen - 1))); 448 - keylen--; 449 - } 450 - 451 - HASH_SET_DIN(&word, nwords); 452 - } 453 - 454 - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) 455 - cpu_relax(); 456 - 457 - HASH_SET_DCAL; 458 - 459 - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) 460 - cpu_relax(); 461 - } 462 - 463 - /** 464 - * init_hash_hw - Initialise the hash hardware for a new calculation. 465 - * @device_data: Structure for the hash device. 466 - * @ctx: The hash context. 467 - * 468 - * This function will enable the bits needed to clear and start a new 469 - * calculation. 470 - */ 471 - static int init_hash_hw(struct hash_device_data *device_data, 472 - struct hash_ctx *ctx) 473 - { 474 - int ret = 0; 475 - 476 - ret = hash_setconfiguration(device_data, &ctx->config); 477 - if (ret) { 478 - dev_err(device_data->dev, "%s: hash_setconfiguration() failed!\n", 479 - __func__); 480 - return ret; 481 - } 482 - 483 - hash_begin(device_data, ctx); 484 - 485 - if (ctx->config.oper_mode == HASH_OPER_MODE_HMAC) 486 - hash_hw_write_key(device_data, ctx->key, ctx->keylen); 487 - 488 - return ret; 489 - } 490 - 491 - /** 492 - * hash_get_nents - Return number of entries (nents) in scatterlist (sg). 493 - * 494 - * @sg: Scatterlist. 495 - * @size: Size in bytes. 496 - * @aligned: True if sg data aligned to work in DMA mode. 497 - * 498 - */ 499 - static int hash_get_nents(struct scatterlist *sg, int size, bool *aligned) 500 - { 501 - int nents = 0; 502 - bool aligned_data = true; 503 - 504 - while (size > 0 && sg) { 505 - nents++; 506 - size -= sg->length; 507 - 508 - /* hash_set_dma_transfer will align last nent */ 509 - if ((aligned && !IS_ALIGNED(sg->offset, HASH_DMA_ALIGN_SIZE)) || 510 - (!IS_ALIGNED(sg->length, HASH_DMA_ALIGN_SIZE) && size > 0)) 511 - aligned_data = false; 512 - 513 - sg = sg_next(sg); 514 - } 515 - 516 - if (aligned) 517 - *aligned = aligned_data; 518 - 519 - if (size != 0) 520 - return -EFAULT; 521 - 522 - return nents; 523 - } 524 - 525 - /** 526 - * hash_dma_valid_data - checks for dma valid sg data. 527 - * @sg: Scatterlist. 528 - * @datasize: Datasize in bytes. 529 - * 530 - * NOTE! This function checks for dma valid sg data, since dma 531 - * only accept datasizes of even wordsize. 532 - */ 533 - static bool hash_dma_valid_data(struct scatterlist *sg, int datasize) 534 - { 535 - bool aligned; 536 - 537 - /* Need to include at least one nent, else error */ 538 - if (hash_get_nents(sg, datasize, &aligned) < 1) 539 - return false; 540 - 541 - return aligned; 542 - } 543 - 544 - /** 545 - * ux500_hash_init - Common hash init function for SHA1/SHA2 (SHA256). 546 - * @req: The hash request for the job. 547 - * 548 - * Initialize structures. 549 - */ 550 - static int ux500_hash_init(struct ahash_request *req) 551 - { 552 - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 553 - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); 554 - struct hash_req_ctx *req_ctx = ahash_request_ctx(req); 555 - 556 - if (!ctx->key) 557 - ctx->keylen = 0; 558 - 559 - memset(&req_ctx->state, 0, sizeof(struct hash_state)); 560 - req_ctx->updated = 0; 561 - if (hash_mode == HASH_MODE_DMA) { 562 - if (req->nbytes < HASH_DMA_ALIGN_SIZE) { 563 - req_ctx->dma_mode = false; /* Don't use DMA */ 564 - 565 - pr_debug("%s: DMA mode, but direct to CPU mode for data size < %d\n", 566 - __func__, HASH_DMA_ALIGN_SIZE); 567 - } else { 568 - if (req->nbytes >= HASH_DMA_PERFORMANCE_MIN_SIZE && 569 - hash_dma_valid_data(req->src, req->nbytes)) { 570 - req_ctx->dma_mode = true; 571 - } else { 572 - req_ctx->dma_mode = false; 573 - pr_debug("%s: DMA mode, but use CPU mode for datalength < %d or non-aligned data, except in last nent\n", 574 - __func__, 575 - HASH_DMA_PERFORMANCE_MIN_SIZE); 576 - } 577 - } 578 - } 579 - return 0; 580 - } 581 - 582 - /** 583 - * hash_processblock - This function processes a single block of 512 bits (64 584 - * bytes), word aligned, starting at message. 585 - * @device_data: Structure for the hash device. 586 - * @message: Block (512 bits) of message to be written to 587 - * the HASH hardware. 588 - * @length: Message length 589 - * 590 - */ 591 - static void hash_processblock(struct hash_device_data *device_data, 592 - const u32 *message, int length) 593 - { 594 - int len = length / HASH_BYTES_PER_WORD; 595 - /* 596 - * NBLW bits. Reset the number of bits in last word (NBLW). 597 - */ 598 - HASH_CLEAR_BITS(&device_data->base->str, HASH_STR_NBLW_MASK); 599 - 600 - /* 601 - * Write message data to the HASH_DIN register. 602 - */ 603 - HASH_SET_DIN(message, len); 604 - } 605 - 606 - /** 607 - * hash_messagepad - Pads a message and write the nblw bits. 608 - * @device_data: Structure for the hash device. 609 - * @message: Last word of a message. 610 - * @index_bytes: The number of bytes in the last message. 611 - * 612 - * This function manages the final part of the digest calculation, when less 613 - * than 512 bits (64 bytes) remain in message. This means index_bytes < 64. 614 - * 615 - */ 616 - static void hash_messagepad(struct hash_device_data *device_data, 617 - const u32 *message, u8 index_bytes) 618 - { 619 - int nwords = 1; 620 - 621 - /* 622 - * Clear hash str register, only clear NBLW 623 - * since DCAL will be reset by hardware. 624 - */ 625 - HASH_CLEAR_BITS(&device_data->base->str, HASH_STR_NBLW_MASK); 626 - 627 - /* Main loop */ 628 - while (index_bytes >= 4) { 629 - HASH_SET_DIN(message, nwords); 630 - index_bytes -= 4; 631 - message++; 632 - } 633 - 634 - if (index_bytes) 635 - HASH_SET_DIN(message, nwords); 636 - 637 - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) 638 - cpu_relax(); 639 - 640 - /* num_of_bytes == 0 => NBLW <- 0 (32 bits valid in DATAIN) */ 641 - HASH_SET_NBLW(index_bytes * 8); 642 - dev_dbg(device_data->dev, "%s: DIN=0x%08x NBLW=%lu\n", 643 - __func__, readl_relaxed(&device_data->base->din), 644 - readl_relaxed(&device_data->base->str) & HASH_STR_NBLW_MASK); 645 - HASH_SET_DCAL; 646 - dev_dbg(device_data->dev, "%s: after dcal -> DIN=0x%08x NBLW=%lu\n", 647 - __func__, readl_relaxed(&device_data->base->din), 648 - readl_relaxed(&device_data->base->str) & HASH_STR_NBLW_MASK); 649 - 650 - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) 651 - cpu_relax(); 652 - } 653 - 654 - /** 655 - * hash_incrementlength - Increments the length of the current message. 656 - * @ctx: Hash context 657 - * @incr: Length of message processed already 658 - * 659 - * Overflow cannot occur, because conditions for overflow are checked in 660 - * hash_hw_update. 661 - */ 662 - static void hash_incrementlength(struct hash_req_ctx *ctx, u32 incr) 663 - { 664 - ctx->state.length.low_word += incr; 665 - 666 - /* Check for wrap-around */ 667 - if (ctx->state.length.low_word < incr) 668 - ctx->state.length.high_word++; 669 - } 670 - 671 - /** 672 - * hash_setconfiguration - Sets the required configuration for the hash 673 - * hardware. 674 - * @device_data: Structure for the hash device. 675 - * @config: Pointer to a configuration structure. 676 - */ 677 - int hash_setconfiguration(struct hash_device_data *device_data, 678 - struct hash_config *config) 679 - { 680 - int ret = 0; 681 - 682 - if (config->algorithm != HASH_ALGO_SHA1 && 683 - config->algorithm != HASH_ALGO_SHA256) 684 - return -EPERM; 685 - 686 - /* 687 - * DATAFORM bits. Set the DATAFORM bits to 0b11, which means the data 688 - * to be written to HASH_DIN is considered as 32 bits. 689 - */ 690 - HASH_SET_DATA_FORMAT(config->data_format); 691 - 692 - /* 693 - * ALGO bit. Set to 0b1 for SHA-1 and 0b0 for SHA-256 694 - */ 695 - switch (config->algorithm) { 696 - case HASH_ALGO_SHA1: 697 - HASH_SET_BITS(&device_data->base->cr, HASH_CR_ALGO_MASK); 698 - break; 699 - 700 - case HASH_ALGO_SHA256: 701 - HASH_CLEAR_BITS(&device_data->base->cr, HASH_CR_ALGO_MASK); 702 - break; 703 - 704 - default: 705 - dev_err(device_data->dev, "%s: Incorrect algorithm\n", 706 - __func__); 707 - return -EPERM; 708 - } 709 - 710 - /* 711 - * MODE bit. This bit selects between HASH or HMAC mode for the 712 - * selected algorithm. 0b0 = HASH and 0b1 = HMAC. 713 - */ 714 - if (HASH_OPER_MODE_HASH == config->oper_mode) 715 - HASH_CLEAR_BITS(&device_data->base->cr, 716 - HASH_CR_MODE_MASK); 717 - else if (HASH_OPER_MODE_HMAC == config->oper_mode) { 718 - HASH_SET_BITS(&device_data->base->cr, HASH_CR_MODE_MASK); 719 - if (device_data->current_ctx->keylen > HASH_BLOCK_SIZE) { 720 - /* Truncate key to blocksize */ 721 - dev_dbg(device_data->dev, "%s: LKEY set\n", __func__); 722 - HASH_SET_BITS(&device_data->base->cr, 723 - HASH_CR_LKEY_MASK); 724 - } else { 725 - dev_dbg(device_data->dev, "%s: LKEY cleared\n", 726 - __func__); 727 - HASH_CLEAR_BITS(&device_data->base->cr, 728 - HASH_CR_LKEY_MASK); 729 - } 730 - } else { /* Wrong hash mode */ 731 - ret = -EPERM; 732 - dev_err(device_data->dev, "%s: HASH_INVALID_PARAMETER!\n", 733 - __func__); 734 - } 735 - return ret; 736 - } 737 - 738 - /** 739 - * hash_begin - This routine resets some globals and initializes the hash 740 - * hardware. 741 - * @device_data: Structure for the hash device. 742 - * @ctx: Hash context. 743 - */ 744 - void hash_begin(struct hash_device_data *device_data, struct hash_ctx *ctx) 745 - { 746 - /* HW and SW initializations */ 747 - /* Note: there is no need to initialize buffer and digest members */ 748 - 749 - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) 750 - cpu_relax(); 751 - 752 - /* 753 - * INIT bit. Set this bit to 0b1 to reset the HASH processor core and 754 - * prepare the initialize the HASH accelerator to compute the message 755 - * digest of a new message. 756 - */ 757 - HASH_INITIALIZE; 758 - 759 - /* 760 - * NBLW bits. Reset the number of bits in last word (NBLW). 761 - */ 762 - HASH_CLEAR_BITS(&device_data->base->str, HASH_STR_NBLW_MASK); 763 - } 764 - 765 - static int hash_process_data(struct hash_device_data *device_data, 766 - struct hash_ctx *ctx, struct hash_req_ctx *req_ctx, 767 - int msg_length, u8 *data_buffer, u8 *buffer, 768 - u8 *index) 769 - { 770 - int ret = 0; 771 - u32 count; 772 - 773 - do { 774 - if ((*index + msg_length) < HASH_BLOCK_SIZE) { 775 - for (count = 0; count < msg_length; count++) { 776 - buffer[*index + count] = 777 - *(data_buffer + count); 778 - } 779 - *index += msg_length; 780 - msg_length = 0; 781 - } else { 782 - if (req_ctx->updated) { 783 - ret = hash_resume_state(device_data, 784 - &device_data->state); 785 - memmove(req_ctx->state.buffer, 786 - device_data->state.buffer, 787 - HASH_BLOCK_SIZE); 788 - if (ret) { 789 - dev_err(device_data->dev, 790 - "%s: hash_resume_state() failed!\n", 791 - __func__); 792 - goto out; 793 - } 794 - } else { 795 - ret = init_hash_hw(device_data, ctx); 796 - if (ret) { 797 - dev_err(device_data->dev, 798 - "%s: init_hash_hw() failed!\n", 799 - __func__); 800 - goto out; 801 - } 802 - req_ctx->updated = 1; 803 - } 804 - /* 805 - * If 'data_buffer' is four byte aligned and 806 - * local buffer does not have any data, we can 807 - * write data directly from 'data_buffer' to 808 - * HW peripheral, otherwise we first copy data 809 - * to a local buffer 810 - */ 811 - if (IS_ALIGNED((unsigned long)data_buffer, 4) && 812 - (0 == *index)) 813 - hash_processblock(device_data, 814 - (const u32 *)data_buffer, 815 - HASH_BLOCK_SIZE); 816 - else { 817 - for (count = 0; 818 - count < (u32)(HASH_BLOCK_SIZE - *index); 819 - count++) { 820 - buffer[*index + count] = 821 - *(data_buffer + count); 822 - } 823 - hash_processblock(device_data, 824 - (const u32 *)buffer, 825 - HASH_BLOCK_SIZE); 826 - } 827 - hash_incrementlength(req_ctx, HASH_BLOCK_SIZE); 828 - data_buffer += (HASH_BLOCK_SIZE - *index); 829 - 830 - msg_length -= (HASH_BLOCK_SIZE - *index); 831 - *index = 0; 832 - 833 - ret = hash_save_state(device_data, 834 - &device_data->state); 835 - 836 - memmove(device_data->state.buffer, 837 - req_ctx->state.buffer, 838 - HASH_BLOCK_SIZE); 839 - if (ret) { 840 - dev_err(device_data->dev, "%s: hash_save_state() failed!\n", 841 - __func__); 842 - goto out; 843 - } 844 - } 845 - } while (msg_length != 0); 846 - out: 847 - 848 - return ret; 849 - } 850 - 851 - /** 852 - * hash_dma_final - The hash dma final function for SHA1/SHA256. 853 - * @req: The hash request for the job. 854 - */ 855 - static int hash_dma_final(struct ahash_request *req) 856 - { 857 - int ret = 0; 858 - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 859 - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); 860 - struct hash_req_ctx *req_ctx = ahash_request_ctx(req); 861 - struct hash_device_data *device_data; 862 - u8 digest[SHA256_DIGEST_SIZE]; 863 - int bytes_written = 0; 864 - 865 - ret = hash_get_device_data(ctx, &device_data); 866 - if (ret) 867 - return ret; 868 - 869 - dev_dbg(device_data->dev, "%s: (ctx=0x%lx)!\n", __func__, 870 - (unsigned long)ctx); 871 - 872 - if (req_ctx->updated) { 873 - ret = hash_resume_state(device_data, &device_data->state); 874 - 875 - if (ret) { 876 - dev_err(device_data->dev, "%s: hash_resume_state() failed!\n", 877 - __func__); 878 - goto out; 879 - } 880 - } else { 881 - ret = hash_setconfiguration(device_data, &ctx->config); 882 - if (ret) { 883 - dev_err(device_data->dev, 884 - "%s: hash_setconfiguration() failed!\n", 885 - __func__); 886 - goto out; 887 - } 888 - 889 - /* Enable DMA input */ 890 - if (hash_mode != HASH_MODE_DMA || !req_ctx->dma_mode) { 891 - HASH_CLEAR_BITS(&device_data->base->cr, 892 - HASH_CR_DMAE_MASK); 893 - } else { 894 - HASH_SET_BITS(&device_data->base->cr, 895 - HASH_CR_DMAE_MASK); 896 - HASH_SET_BITS(&device_data->base->cr, 897 - HASH_CR_PRIVN_MASK); 898 - } 899 - 900 - HASH_INITIALIZE; 901 - 902 - if (ctx->config.oper_mode == HASH_OPER_MODE_HMAC) 903 - hash_hw_write_key(device_data, ctx->key, ctx->keylen); 904 - 905 - /* Number of bits in last word = (nbytes * 8) % 32 */ 906 - HASH_SET_NBLW((req->nbytes * 8) % 32); 907 - req_ctx->updated = 1; 908 - } 909 - 910 - /* Store the nents in the dma struct. */ 911 - ctx->device->dma.nents = hash_get_nents(req->src, req->nbytes, NULL); 912 - if (!ctx->device->dma.nents) { 913 - dev_err(device_data->dev, "%s: ctx->device->dma.nents = 0\n", 914 - __func__); 915 - ret = ctx->device->dma.nents; 916 - goto out; 917 - } 918 - 919 - bytes_written = hash_dma_write(ctx, req->src, req->nbytes); 920 - if (bytes_written != req->nbytes) { 921 - dev_err(device_data->dev, "%s: hash_dma_write() failed!\n", 922 - __func__); 923 - ret = bytes_written; 924 - goto out; 925 - } 926 - 927 - wait_for_completion(&ctx->device->dma.complete); 928 - hash_dma_done(ctx); 929 - 930 - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) 931 - cpu_relax(); 932 - 933 - if (ctx->config.oper_mode == HASH_OPER_MODE_HMAC && ctx->key) { 934 - unsigned int keylen = ctx->keylen; 935 - u8 *key = ctx->key; 936 - 937 - dev_dbg(device_data->dev, "%s: keylen: %d\n", 938 - __func__, ctx->keylen); 939 - hash_hw_write_key(device_data, key, keylen); 940 - } 941 - 942 - hash_get_digest(device_data, digest, ctx->config.algorithm); 943 - memcpy(req->result, digest, ctx->digestsize); 944 - 945 - out: 946 - release_hash_device(device_data); 947 - 948 - /** 949 - * Allocated in setkey, and only used in HMAC. 950 - */ 951 - kfree(ctx->key); 952 - 953 - return ret; 954 - } 955 - 956 - /** 957 - * hash_hw_final - The final hash calculation function 958 - * @req: The hash request for the job. 959 - */ 960 - static int hash_hw_final(struct ahash_request *req) 961 - { 962 - int ret = 0; 963 - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 964 - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); 965 - struct hash_req_ctx *req_ctx = ahash_request_ctx(req); 966 - struct hash_device_data *device_data; 967 - u8 digest[SHA256_DIGEST_SIZE]; 968 - 969 - ret = hash_get_device_data(ctx, &device_data); 970 - if (ret) 971 - return ret; 972 - 973 - dev_dbg(device_data->dev, "%s: (ctx=0x%lx)!\n", __func__, 974 - (unsigned long)ctx); 975 - 976 - if (req_ctx->updated) { 977 - ret = hash_resume_state(device_data, &device_data->state); 978 - 979 - if (ret) { 980 - dev_err(device_data->dev, 981 - "%s: hash_resume_state() failed!\n", __func__); 982 - goto out; 983 - } 984 - } else if (req->nbytes == 0 && ctx->keylen == 0) { 985 - u8 zero_hash[SHA256_DIGEST_SIZE]; 986 - u32 zero_hash_size = 0; 987 - bool zero_digest = false; 988 - /** 989 - * Use a pre-calculated empty message digest 990 - * (workaround since hw return zeroes, hw bug!?) 991 - */ 992 - ret = get_empty_message_digest(device_data, &zero_hash[0], 993 - &zero_hash_size, &zero_digest); 994 - if (!ret && likely(zero_hash_size == ctx->digestsize) && 995 - zero_digest) { 996 - memcpy(req->result, &zero_hash[0], ctx->digestsize); 997 - goto out; 998 - } else if (!ret && !zero_digest) { 999 - dev_dbg(device_data->dev, 1000 - "%s: HMAC zero msg with key, continue...\n", 1001 - __func__); 1002 - } else { 1003 - dev_err(device_data->dev, 1004 - "%s: ret=%d, or wrong digest size? %s\n", 1005 - __func__, ret, 1006 - zero_hash_size == ctx->digestsize ? 1007 - "true" : "false"); 1008 - /* Return error */ 1009 - goto out; 1010 - } 1011 - } else if (req->nbytes == 0 && ctx->keylen > 0) { 1012 - ret = -EPERM; 1013 - dev_err(device_data->dev, "%s: Empty message with keylength > 0, NOT supported\n", 1014 - __func__); 1015 - goto out; 1016 - } 1017 - 1018 - if (!req_ctx->updated) { 1019 - ret = init_hash_hw(device_data, ctx); 1020 - if (ret) { 1021 - dev_err(device_data->dev, 1022 - "%s: init_hash_hw() failed!\n", __func__); 1023 - goto out; 1024 - } 1025 - } 1026 - 1027 - if (req_ctx->state.index) { 1028 - hash_messagepad(device_data, req_ctx->state.buffer, 1029 - req_ctx->state.index); 1030 - } else { 1031 - HASH_SET_DCAL; 1032 - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) 1033 - cpu_relax(); 1034 - } 1035 - 1036 - if (ctx->config.oper_mode == HASH_OPER_MODE_HMAC && ctx->key) { 1037 - unsigned int keylen = ctx->keylen; 1038 - u8 *key = ctx->key; 1039 - 1040 - dev_dbg(device_data->dev, "%s: keylen: %d\n", 1041 - __func__, ctx->keylen); 1042 - hash_hw_write_key(device_data, key, keylen); 1043 - } 1044 - 1045 - hash_get_digest(device_data, digest, ctx->config.algorithm); 1046 - memcpy(req->result, digest, ctx->digestsize); 1047 - 1048 - out: 1049 - release_hash_device(device_data); 1050 - 1051 - /** 1052 - * Allocated in setkey, and only used in HMAC. 1053 - */ 1054 - kfree(ctx->key); 1055 - 1056 - return ret; 1057 - } 1058 - 1059 - /** 1060 - * hash_hw_update - Updates current HASH computation hashing another part of 1061 - * the message. 1062 - * @req: Byte array containing the message to be hashed (caller 1063 - * allocated). 1064 - */ 1065 - int hash_hw_update(struct ahash_request *req) 1066 - { 1067 - int ret = 0; 1068 - u8 index = 0; 1069 - u8 *buffer; 1070 - struct hash_device_data *device_data; 1071 - u8 *data_buffer; 1072 - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 1073 - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); 1074 - struct hash_req_ctx *req_ctx = ahash_request_ctx(req); 1075 - struct crypto_hash_walk walk; 1076 - int msg_length; 1077 - 1078 - index = req_ctx->state.index; 1079 - buffer = (u8 *)req_ctx->state.buffer; 1080 - 1081 - ret = hash_get_device_data(ctx, &device_data); 1082 - if (ret) 1083 - return ret; 1084 - 1085 - msg_length = crypto_hash_walk_first(req, &walk); 1086 - 1087 - /* Empty message ("") is correct indata */ 1088 - if (msg_length == 0) { 1089 - ret = 0; 1090 - goto release_dev; 1091 - } 1092 - 1093 - /* Check if ctx->state.length + msg_length 1094 - overflows */ 1095 - if (msg_length > (req_ctx->state.length.low_word + msg_length) && 1096 - HASH_HIGH_WORD_MAX_VAL == req_ctx->state.length.high_word) { 1097 - pr_err("%s: HASH_MSG_LENGTH_OVERFLOW!\n", __func__); 1098 - ret = crypto_hash_walk_done(&walk, -EPERM); 1099 - goto release_dev; 1100 - } 1101 - 1102 - /* Main loop */ 1103 - while (0 != msg_length) { 1104 - data_buffer = walk.data; 1105 - ret = hash_process_data(device_data, ctx, req_ctx, msg_length, 1106 - data_buffer, buffer, &index); 1107 - 1108 - if (ret) { 1109 - dev_err(device_data->dev, "%s: hash_internal_hw_update() failed!\n", 1110 - __func__); 1111 - crypto_hash_walk_done(&walk, ret); 1112 - goto release_dev; 1113 - } 1114 - 1115 - msg_length = crypto_hash_walk_done(&walk, 0); 1116 - } 1117 - 1118 - req_ctx->state.index = index; 1119 - dev_dbg(device_data->dev, "%s: indata length=%d, bin=%d\n", 1120 - __func__, req_ctx->state.index, req_ctx->state.bit_index); 1121 - 1122 - release_dev: 1123 - release_hash_device(device_data); 1124 - 1125 - return ret; 1126 - } 1127 - 1128 - /** 1129 - * hash_resume_state - Function that resumes the state of an calculation. 1130 - * @device_data: Pointer to the device structure. 1131 - * @device_state: The state to be restored in the hash hardware 1132 - */ 1133 - int hash_resume_state(struct hash_device_data *device_data, 1134 - const struct hash_state *device_state) 1135 - { 1136 - u32 temp_cr; 1137 - s32 count; 1138 - int hash_mode = HASH_OPER_MODE_HASH; 1139 - 1140 - if (NULL == device_state) { 1141 - dev_err(device_data->dev, "%s: HASH_INVALID_PARAMETER!\n", 1142 - __func__); 1143 - return -EPERM; 1144 - } 1145 - 1146 - /* Check correctness of index and length members */ 1147 - if (device_state->index > HASH_BLOCK_SIZE || 1148 - (device_state->length.low_word % HASH_BLOCK_SIZE) != 0) { 1149 - dev_err(device_data->dev, "%s: HASH_INVALID_PARAMETER!\n", 1150 - __func__); 1151 - return -EPERM; 1152 - } 1153 - 1154 - /* 1155 - * INIT bit. Set this bit to 0b1 to reset the HASH processor core and 1156 - * prepare the initialize the HASH accelerator to compute the message 1157 - * digest of a new message. 1158 - */ 1159 - HASH_INITIALIZE; 1160 - 1161 - temp_cr = device_state->temp_cr; 1162 - writel_relaxed(temp_cr & HASH_CR_RESUME_MASK, &device_data->base->cr); 1163 - 1164 - if (readl(&device_data->base->cr) & HASH_CR_MODE_MASK) 1165 - hash_mode = HASH_OPER_MODE_HMAC; 1166 - else 1167 - hash_mode = HASH_OPER_MODE_HASH; 1168 - 1169 - for (count = 0; count < HASH_CSR_COUNT; count++) { 1170 - if ((count >= 36) && (hash_mode == HASH_OPER_MODE_HASH)) 1171 - break; 1172 - 1173 - writel_relaxed(device_state->csr[count], 1174 - &device_data->base->csrx[count]); 1175 - } 1176 - 1177 - writel_relaxed(device_state->csfull, &device_data->base->csfull); 1178 - writel_relaxed(device_state->csdatain, &device_data->base->csdatain); 1179 - 1180 - writel_relaxed(device_state->str_reg, &device_data->base->str); 1181 - writel_relaxed(temp_cr, &device_data->base->cr); 1182 - 1183 - return 0; 1184 - } 1185 - 1186 - /** 1187 - * hash_save_state - Function that saves the state of hardware. 1188 - * @device_data: Pointer to the device structure. 1189 - * @device_state: The strucure where the hardware state should be saved. 1190 - */ 1191 - int hash_save_state(struct hash_device_data *device_data, 1192 - struct hash_state *device_state) 1193 - { 1194 - u32 temp_cr; 1195 - u32 count; 1196 - int hash_mode = HASH_OPER_MODE_HASH; 1197 - 1198 - if (NULL == device_state) { 1199 - dev_err(device_data->dev, "%s: HASH_INVALID_PARAMETER!\n", 1200 - __func__); 1201 - return -ENOTSUPP; 1202 - } 1203 - 1204 - /* Write dummy value to force digest intermediate calculation. This 1205 - * actually makes sure that there isn't any ongoing calculation in the 1206 - * hardware. 1207 - */ 1208 - while (readl(&device_data->base->str) & HASH_STR_DCAL_MASK) 1209 - cpu_relax(); 1210 - 1211 - temp_cr = readl_relaxed(&device_data->base->cr); 1212 - 1213 - device_state->str_reg = readl_relaxed(&device_data->base->str); 1214 - 1215 - device_state->din_reg = readl_relaxed(&device_data->base->din); 1216 - 1217 - if (readl(&device_data->base->cr) & HASH_CR_MODE_MASK) 1218 - hash_mode = HASH_OPER_MODE_HMAC; 1219 - else 1220 - hash_mode = HASH_OPER_MODE_HASH; 1221 - 1222 - for (count = 0; count < HASH_CSR_COUNT; count++) { 1223 - if ((count >= 36) && (hash_mode == HASH_OPER_MODE_HASH)) 1224 - break; 1225 - 1226 - device_state->csr[count] = 1227 - readl_relaxed(&device_data->base->csrx[count]); 1228 - } 1229 - 1230 - device_state->csfull = readl_relaxed(&device_data->base->csfull); 1231 - device_state->csdatain = readl_relaxed(&device_data->base->csdatain); 1232 - 1233 - device_state->temp_cr = temp_cr; 1234 - 1235 - return 0; 1236 - } 1237 - 1238 - /** 1239 - * hash_check_hw - This routine checks for peripheral Ids and PCell Ids. 1240 - * @device_data: 1241 - * 1242 - */ 1243 - int hash_check_hw(struct hash_device_data *device_data) 1244 - { 1245 - /* Checking Peripheral Ids */ 1246 - if (HASH_P_ID0 == readl_relaxed(&device_data->base->periphid0) && 1247 - HASH_P_ID1 == readl_relaxed(&device_data->base->periphid1) && 1248 - HASH_P_ID2 == readl_relaxed(&device_data->base->periphid2) && 1249 - HASH_P_ID3 == readl_relaxed(&device_data->base->periphid3) && 1250 - HASH_CELL_ID0 == readl_relaxed(&device_data->base->cellid0) && 1251 - HASH_CELL_ID1 == readl_relaxed(&device_data->base->cellid1) && 1252 - HASH_CELL_ID2 == readl_relaxed(&device_data->base->cellid2) && 1253 - HASH_CELL_ID3 == readl_relaxed(&device_data->base->cellid3)) { 1254 - return 0; 1255 - } 1256 - 1257 - dev_err(device_data->dev, "%s: HASH_UNSUPPORTED_HW!\n", __func__); 1258 - return -ENOTSUPP; 1259 - } 1260 - 1261 - /** 1262 - * hash_get_digest - Gets the digest. 1263 - * @device_data: Pointer to the device structure. 1264 - * @digest: User allocated byte array for the calculated digest. 1265 - * @algorithm: The algorithm in use. 1266 - */ 1267 - void hash_get_digest(struct hash_device_data *device_data, 1268 - u8 *digest, int algorithm) 1269 - { 1270 - u32 temp_hx_val, count; 1271 - int loop_ctr; 1272 - 1273 - if (algorithm != HASH_ALGO_SHA1 && algorithm != HASH_ALGO_SHA256) { 1274 - dev_err(device_data->dev, "%s: Incorrect algorithm %d\n", 1275 - __func__, algorithm); 1276 - return; 1277 - } 1278 - 1279 - if (algorithm == HASH_ALGO_SHA1) 1280 - loop_ctr = SHA1_DIGEST_SIZE / sizeof(u32); 1281 - else 1282 - loop_ctr = SHA256_DIGEST_SIZE / sizeof(u32); 1283 - 1284 - dev_dbg(device_data->dev, "%s: digest array:(0x%lx)\n", 1285 - __func__, (unsigned long)digest); 1286 - 1287 - /* Copy result into digest array */ 1288 - for (count = 0; count < loop_ctr; count++) { 1289 - temp_hx_val = readl_relaxed(&device_data->base->hx[count]); 1290 - digest[count * 4] = (u8) ((temp_hx_val >> 24) & 0xFF); 1291 - digest[count * 4 + 1] = (u8) ((temp_hx_val >> 16) & 0xFF); 1292 - digest[count * 4 + 2] = (u8) ((temp_hx_val >> 8) & 0xFF); 1293 - digest[count * 4 + 3] = (u8) ((temp_hx_val >> 0) & 0xFF); 1294 - } 1295 - } 1296 - 1297 - /** 1298 - * ahash_update - The hash update function for SHA1/SHA2 (SHA256). 1299 - * @req: The hash request for the job. 1300 - */ 1301 - static int ahash_update(struct ahash_request *req) 1302 - { 1303 - int ret = 0; 1304 - struct hash_req_ctx *req_ctx = ahash_request_ctx(req); 1305 - 1306 - if (hash_mode != HASH_MODE_DMA || !req_ctx->dma_mode) 1307 - ret = hash_hw_update(req); 1308 - /* Skip update for DMA, all data will be passed to DMA in final */ 1309 - 1310 - if (ret) { 1311 - pr_err("%s: hash_hw_update() failed!\n", __func__); 1312 - } 1313 - 1314 - return ret; 1315 - } 1316 - 1317 - /** 1318 - * ahash_final - The hash final function for SHA1/SHA2 (SHA256). 1319 - * @req: The hash request for the job. 1320 - */ 1321 - static int ahash_final(struct ahash_request *req) 1322 - { 1323 - int ret = 0; 1324 - struct hash_req_ctx *req_ctx = ahash_request_ctx(req); 1325 - 1326 - pr_debug("%s: data size: %d\n", __func__, req->nbytes); 1327 - 1328 - if ((hash_mode == HASH_MODE_DMA) && req_ctx->dma_mode) 1329 - ret = hash_dma_final(req); 1330 - else 1331 - ret = hash_hw_final(req); 1332 - 1333 - if (ret) { 1334 - pr_err("%s: hash_hw/dma_final() failed\n", __func__); 1335 - } 1336 - 1337 - return ret; 1338 - } 1339 - 1340 - static int hash_setkey(struct crypto_ahash *tfm, 1341 - const u8 *key, unsigned int keylen, int alg) 1342 - { 1343 - int ret = 0; 1344 - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); 1345 - 1346 - /** 1347 - * Freed in final. 1348 - */ 1349 - ctx->key = kmemdup(key, keylen, GFP_KERNEL); 1350 - if (!ctx->key) { 1351 - pr_err("%s: Failed to allocate ctx->key for %d\n", 1352 - __func__, alg); 1353 - return -ENOMEM; 1354 - } 1355 - ctx->keylen = keylen; 1356 - 1357 - return ret; 1358 - } 1359 - 1360 - static int ahash_sha1_init(struct ahash_request *req) 1361 - { 1362 - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 1363 - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); 1364 - 1365 - ctx->config.data_format = HASH_DATA_8_BITS; 1366 - ctx->config.algorithm = HASH_ALGO_SHA1; 1367 - ctx->config.oper_mode = HASH_OPER_MODE_HASH; 1368 - ctx->digestsize = SHA1_DIGEST_SIZE; 1369 - 1370 - return ux500_hash_init(req); 1371 - } 1372 - 1373 - static int ahash_sha256_init(struct ahash_request *req) 1374 - { 1375 - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 1376 - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); 1377 - 1378 - ctx->config.data_format = HASH_DATA_8_BITS; 1379 - ctx->config.algorithm = HASH_ALGO_SHA256; 1380 - ctx->config.oper_mode = HASH_OPER_MODE_HASH; 1381 - ctx->digestsize = SHA256_DIGEST_SIZE; 1382 - 1383 - return ux500_hash_init(req); 1384 - } 1385 - 1386 - static int ahash_sha1_digest(struct ahash_request *req) 1387 - { 1388 - int ret2, ret1; 1389 - 1390 - ret1 = ahash_sha1_init(req); 1391 - if (ret1) 1392 - goto out; 1393 - 1394 - ret1 = ahash_update(req); 1395 - ret2 = ahash_final(req); 1396 - 1397 - out: 1398 - return ret1 ? ret1 : ret2; 1399 - } 1400 - 1401 - static int ahash_sha256_digest(struct ahash_request *req) 1402 - { 1403 - int ret2, ret1; 1404 - 1405 - ret1 = ahash_sha256_init(req); 1406 - if (ret1) 1407 - goto out; 1408 - 1409 - ret1 = ahash_update(req); 1410 - ret2 = ahash_final(req); 1411 - 1412 - out: 1413 - return ret1 ? ret1 : ret2; 1414 - } 1415 - 1416 - static int ahash_noimport(struct ahash_request *req, const void *in) 1417 - { 1418 - return -ENOSYS; 1419 - } 1420 - 1421 - static int ahash_noexport(struct ahash_request *req, void *out) 1422 - { 1423 - return -ENOSYS; 1424 - } 1425 - 1426 - static int hmac_sha1_init(struct ahash_request *req) 1427 - { 1428 - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 1429 - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); 1430 - 1431 - ctx->config.data_format = HASH_DATA_8_BITS; 1432 - ctx->config.algorithm = HASH_ALGO_SHA1; 1433 - ctx->config.oper_mode = HASH_OPER_MODE_HMAC; 1434 - ctx->digestsize = SHA1_DIGEST_SIZE; 1435 - 1436 - return ux500_hash_init(req); 1437 - } 1438 - 1439 - static int hmac_sha256_init(struct ahash_request *req) 1440 - { 1441 - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); 1442 - struct hash_ctx *ctx = crypto_ahash_ctx(tfm); 1443 - 1444 - ctx->config.data_format = HASH_DATA_8_BITS; 1445 - ctx->config.algorithm = HASH_ALGO_SHA256; 1446 - ctx->config.oper_mode = HASH_OPER_MODE_HMAC; 1447 - ctx->digestsize = SHA256_DIGEST_SIZE; 1448 - 1449 - return ux500_hash_init(req); 1450 - } 1451 - 1452 - static int hmac_sha1_digest(struct ahash_request *req) 1453 - { 1454 - int ret2, ret1; 1455 - 1456 - ret1 = hmac_sha1_init(req); 1457 - if (ret1) 1458 - goto out; 1459 - 1460 - ret1 = ahash_update(req); 1461 - ret2 = ahash_final(req); 1462 - 1463 - out: 1464 - return ret1 ? ret1 : ret2; 1465 - } 1466 - 1467 - static int hmac_sha256_digest(struct ahash_request *req) 1468 - { 1469 - int ret2, ret1; 1470 - 1471 - ret1 = hmac_sha256_init(req); 1472 - if (ret1) 1473 - goto out; 1474 - 1475 - ret1 = ahash_update(req); 1476 - ret2 = ahash_final(req); 1477 - 1478 - out: 1479 - return ret1 ? ret1 : ret2; 1480 - } 1481 - 1482 - static int hmac_sha1_setkey(struct crypto_ahash *tfm, 1483 - const u8 *key, unsigned int keylen) 1484 - { 1485 - return hash_setkey(tfm, key, keylen, HASH_ALGO_SHA1); 1486 - } 1487 - 1488 - static int hmac_sha256_setkey(struct crypto_ahash *tfm, 1489 - const u8 *key, unsigned int keylen) 1490 - { 1491 - return hash_setkey(tfm, key, keylen, HASH_ALGO_SHA256); 1492 - } 1493 - 1494 - struct hash_algo_template { 1495 - struct hash_config conf; 1496 - struct ahash_alg hash; 1497 - }; 1498 - 1499 - static int hash_cra_init(struct crypto_tfm *tfm) 1500 - { 1501 - struct hash_ctx *ctx = crypto_tfm_ctx(tfm); 1502 - struct crypto_alg *alg = tfm->__crt_alg; 1503 - struct hash_algo_template *hash_alg; 1504 - 1505 - hash_alg = container_of(__crypto_ahash_alg(alg), 1506 - struct hash_algo_template, 1507 - hash); 1508 - 1509 - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), 1510 - sizeof(struct hash_req_ctx)); 1511 - 1512 - ctx->config.data_format = HASH_DATA_8_BITS; 1513 - ctx->config.algorithm = hash_alg->conf.algorithm; 1514 - ctx->config.oper_mode = hash_alg->conf.oper_mode; 1515 - 1516 - ctx->digestsize = hash_alg->hash.halg.digestsize; 1517 - 1518 - return 0; 1519 - } 1520 - 1521 - static struct hash_algo_template hash_algs[] = { 1522 - { 1523 - .conf.algorithm = HASH_ALGO_SHA1, 1524 - .conf.oper_mode = HASH_OPER_MODE_HASH, 1525 - .hash = { 1526 - .init = ux500_hash_init, 1527 - .update = ahash_update, 1528 - .final = ahash_final, 1529 - .digest = ahash_sha1_digest, 1530 - .export = ahash_noexport, 1531 - .import = ahash_noimport, 1532 - .halg.digestsize = SHA1_DIGEST_SIZE, 1533 - .halg.statesize = sizeof(struct hash_ctx), 1534 - .halg.base = { 1535 - .cra_name = "sha1", 1536 - .cra_driver_name = "sha1-ux500", 1537 - .cra_flags = CRYPTO_ALG_ASYNC, 1538 - .cra_blocksize = SHA1_BLOCK_SIZE, 1539 - .cra_ctxsize = sizeof(struct hash_ctx), 1540 - .cra_init = hash_cra_init, 1541 - .cra_module = THIS_MODULE, 1542 - } 1543 - } 1544 - }, 1545 - { 1546 - .conf.algorithm = HASH_ALGO_SHA256, 1547 - .conf.oper_mode = HASH_OPER_MODE_HASH, 1548 - .hash = { 1549 - .init = ux500_hash_init, 1550 - .update = ahash_update, 1551 - .final = ahash_final, 1552 - .digest = ahash_sha256_digest, 1553 - .export = ahash_noexport, 1554 - .import = ahash_noimport, 1555 - .halg.digestsize = SHA256_DIGEST_SIZE, 1556 - .halg.statesize = sizeof(struct hash_ctx), 1557 - .halg.base = { 1558 - .cra_name = "sha256", 1559 - .cra_driver_name = "sha256-ux500", 1560 - .cra_flags = CRYPTO_ALG_ASYNC, 1561 - .cra_blocksize = SHA256_BLOCK_SIZE, 1562 - .cra_ctxsize = sizeof(struct hash_ctx), 1563 - .cra_init = hash_cra_init, 1564 - .cra_module = THIS_MODULE, 1565 - } 1566 - } 1567 - }, 1568 - { 1569 - .conf.algorithm = HASH_ALGO_SHA1, 1570 - .conf.oper_mode = HASH_OPER_MODE_HMAC, 1571 - .hash = { 1572 - .init = ux500_hash_init, 1573 - .update = ahash_update, 1574 - .final = ahash_final, 1575 - .digest = hmac_sha1_digest, 1576 - .setkey = hmac_sha1_setkey, 1577 - .export = ahash_noexport, 1578 - .import = ahash_noimport, 1579 - .halg.digestsize = SHA1_DIGEST_SIZE, 1580 - .halg.statesize = sizeof(struct hash_ctx), 1581 - .halg.base = { 1582 - .cra_name = "hmac(sha1)", 1583 - .cra_driver_name = "hmac-sha1-ux500", 1584 - .cra_flags = CRYPTO_ALG_ASYNC, 1585 - .cra_blocksize = SHA1_BLOCK_SIZE, 1586 - .cra_ctxsize = sizeof(struct hash_ctx), 1587 - .cra_init = hash_cra_init, 1588 - .cra_module = THIS_MODULE, 1589 - } 1590 - } 1591 - }, 1592 - { 1593 - .conf.algorithm = HASH_ALGO_SHA256, 1594 - .conf.oper_mode = HASH_OPER_MODE_HMAC, 1595 - .hash = { 1596 - .init = ux500_hash_init, 1597 - .update = ahash_update, 1598 - .final = ahash_final, 1599 - .digest = hmac_sha256_digest, 1600 - .setkey = hmac_sha256_setkey, 1601 - .export = ahash_noexport, 1602 - .import = ahash_noimport, 1603 - .halg.digestsize = SHA256_DIGEST_SIZE, 1604 - .halg.statesize = sizeof(struct hash_ctx), 1605 - .halg.base = { 1606 - .cra_name = "hmac(sha256)", 1607 - .cra_driver_name = "hmac-sha256-ux500", 1608 - .cra_flags = CRYPTO_ALG_ASYNC, 1609 - .cra_blocksize = SHA256_BLOCK_SIZE, 1610 - .cra_ctxsize = sizeof(struct hash_ctx), 1611 - .cra_init = hash_cra_init, 1612 - .cra_module = THIS_MODULE, 1613 - } 1614 - } 1615 - } 1616 - }; 1617 - 1618 - static int ahash_algs_register_all(struct hash_device_data *device_data) 1619 - { 1620 - int ret; 1621 - int i; 1622 - int count; 1623 - 1624 - for (i = 0; i < ARRAY_SIZE(hash_algs); i++) { 1625 - ret = crypto_register_ahash(&hash_algs[i].hash); 1626 - if (ret) { 1627 - count = i; 1628 - dev_err(device_data->dev, "%s: alg registration failed\n", 1629 - hash_algs[i].hash.halg.base.cra_driver_name); 1630 - goto unreg; 1631 - } 1632 - } 1633 - return 0; 1634 - unreg: 1635 - for (i = 0; i < count; i++) 1636 - crypto_unregister_ahash(&hash_algs[i].hash); 1637 - return ret; 1638 - } 1639 - 1640 - static void ahash_algs_unregister_all(struct hash_device_data *device_data) 1641 - { 1642 - int i; 1643 - 1644 - for (i = 0; i < ARRAY_SIZE(hash_algs); i++) 1645 - crypto_unregister_ahash(&hash_algs[i].hash); 1646 - } 1647 - 1648 - /** 1649 - * ux500_hash_probe - Function that probes the hash hardware. 1650 - * @pdev: The platform device. 1651 - */ 1652 - static int ux500_hash_probe(struct platform_device *pdev) 1653 - { 1654 - int ret = 0; 1655 - struct resource *res = NULL; 1656 - struct hash_device_data *device_data; 1657 - struct device *dev = &pdev->dev; 1658 - 1659 - device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_KERNEL); 1660 - if (!device_data) { 1661 - ret = -ENOMEM; 1662 - goto out; 1663 - } 1664 - 1665 - device_data->dev = dev; 1666 - device_data->current_ctx = NULL; 1667 - 1668 - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1669 - if (!res) { 1670 - dev_dbg(dev, "%s: platform_get_resource() failed!\n", __func__); 1671 - ret = -ENODEV; 1672 - goto out; 1673 - } 1674 - 1675 - device_data->phybase = res->start; 1676 - device_data->base = devm_ioremap_resource(dev, res); 1677 - if (IS_ERR(device_data->base)) { 1678 - ret = PTR_ERR(device_data->base); 1679 - goto out; 1680 - } 1681 - spin_lock_init(&device_data->ctx_lock); 1682 - spin_lock_init(&device_data->power_state_lock); 1683 - 1684 - /* Enable power for HASH1 hardware block */ 1685 - device_data->regulator = regulator_get(dev, "v-ape"); 1686 - if (IS_ERR(device_data->regulator)) { 1687 - dev_err(dev, "%s: regulator_get() failed!\n", __func__); 1688 - ret = PTR_ERR(device_data->regulator); 1689 - device_data->regulator = NULL; 1690 - goto out; 1691 - } 1692 - 1693 - /* Enable the clock for HASH1 hardware block */ 1694 - device_data->clk = devm_clk_get(dev, NULL); 1695 - if (IS_ERR(device_data->clk)) { 1696 - dev_err(dev, "%s: clk_get() failed!\n", __func__); 1697 - ret = PTR_ERR(device_data->clk); 1698 - goto out_regulator; 1699 - } 1700 - 1701 - ret = clk_prepare(device_data->clk); 1702 - if (ret) { 1703 - dev_err(dev, "%s: clk_prepare() failed!\n", __func__); 1704 - goto out_regulator; 1705 - } 1706 - 1707 - /* Enable device power (and clock) */ 1708 - ret = hash_enable_power(device_data, false); 1709 - if (ret) { 1710 - dev_err(dev, "%s: hash_enable_power() failed!\n", __func__); 1711 - goto out_clk_unprepare; 1712 - } 1713 - 1714 - ret = hash_check_hw(device_data); 1715 - if (ret) { 1716 - dev_err(dev, "%s: hash_check_hw() failed!\n", __func__); 1717 - goto out_power; 1718 - } 1719 - 1720 - if (hash_mode == HASH_MODE_DMA) 1721 - hash_dma_setup_channel(device_data, dev); 1722 - 1723 - platform_set_drvdata(pdev, device_data); 1724 - 1725 - /* Put the new device into the device list... */ 1726 - klist_add_tail(&device_data->list_node, &driver_data.device_list); 1727 - /* ... and signal that a new device is available. */ 1728 - up(&driver_data.device_allocation); 1729 - 1730 - ret = ahash_algs_register_all(device_data); 1731 - if (ret) { 1732 - dev_err(dev, "%s: ahash_algs_register_all() failed!\n", 1733 - __func__); 1734 - goto out_power; 1735 - } 1736 - 1737 - dev_info(dev, "successfully registered\n"); 1738 - return 0; 1739 - 1740 - out_power: 1741 - hash_disable_power(device_data, false); 1742 - 1743 - out_clk_unprepare: 1744 - clk_unprepare(device_data->clk); 1745 - 1746 - out_regulator: 1747 - regulator_put(device_data->regulator); 1748 - 1749 - out: 1750 - return ret; 1751 - } 1752 - 1753 - /** 1754 - * ux500_hash_remove - Function that removes the hash device from the platform. 1755 - * @pdev: The platform device. 1756 - */ 1757 - static int ux500_hash_remove(struct platform_device *pdev) 1758 - { 1759 - struct hash_device_data *device_data; 1760 - struct device *dev = &pdev->dev; 1761 - 1762 - device_data = platform_get_drvdata(pdev); 1763 - if (!device_data) { 1764 - dev_err(dev, "%s: platform_get_drvdata() failed!\n", __func__); 1765 - return -ENOMEM; 1766 - } 1767 - 1768 - /* Try to decrease the number of available devices. */ 1769 - if (down_trylock(&driver_data.device_allocation)) 1770 - return -EBUSY; 1771 - 1772 - /* Check that the device is free */ 1773 - spin_lock(&device_data->ctx_lock); 1774 - /* current_ctx allocates a device, NULL = unallocated */ 1775 - if (device_data->current_ctx) { 1776 - /* The device is busy */ 1777 - spin_unlock(&device_data->ctx_lock); 1778 - /* Return the device to the pool. */ 1779 - up(&driver_data.device_allocation); 1780 - return -EBUSY; 1781 - } 1782 - 1783 - spin_unlock(&device_data->ctx_lock); 1784 - 1785 - /* Remove the device from the list */ 1786 - if (klist_node_attached(&device_data->list_node)) 1787 - klist_remove(&device_data->list_node); 1788 - 1789 - /* If this was the last device, remove the services */ 1790 - if (list_empty(&driver_data.device_list.k_list)) 1791 - ahash_algs_unregister_all(device_data); 1792 - 1793 - if (hash_disable_power(device_data, false)) 1794 - dev_err(dev, "%s: hash_disable_power() failed\n", 1795 - __func__); 1796 - 1797 - clk_unprepare(device_data->clk); 1798 - regulator_put(device_data->regulator); 1799 - 1800 - return 0; 1801 - } 1802 - 1803 - /** 1804 - * ux500_hash_shutdown - Function that shutdown the hash device. 1805 - * @pdev: The platform device 1806 - */ 1807 - static void ux500_hash_shutdown(struct platform_device *pdev) 1808 - { 1809 - struct hash_device_data *device_data; 1810 - 1811 - device_data = platform_get_drvdata(pdev); 1812 - if (!device_data) { 1813 - dev_err(&pdev->dev, "%s: platform_get_drvdata() failed!\n", 1814 - __func__); 1815 - return; 1816 - } 1817 - 1818 - /* Check that the device is free */ 1819 - spin_lock(&device_data->ctx_lock); 1820 - /* current_ctx allocates a device, NULL = unallocated */ 1821 - if (!device_data->current_ctx) { 1822 - if (down_trylock(&driver_data.device_allocation)) 1823 - dev_dbg(&pdev->dev, "%s: Cryp still in use! Shutting down anyway...\n", 1824 - __func__); 1825 - /** 1826 - * (Allocate the device) 1827 - * Need to set this to non-null (dummy) value, 1828 - * to avoid usage if context switching. 1829 - */ 1830 - device_data->current_ctx++; 1831 - } 1832 - spin_unlock(&device_data->ctx_lock); 1833 - 1834 - /* Remove the device from the list */ 1835 - if (klist_node_attached(&device_data->list_node)) 1836 - klist_remove(&device_data->list_node); 1837 - 1838 - /* If this was the last device, remove the services */ 1839 - if (list_empty(&driver_data.device_list.k_list)) 1840 - ahash_algs_unregister_all(device_data); 1841 - 1842 - if (hash_disable_power(device_data, false)) 1843 - dev_err(&pdev->dev, "%s: hash_disable_power() failed\n", 1844 - __func__); 1845 - } 1846 - 1847 - #ifdef CONFIG_PM_SLEEP 1848 - /** 1849 - * ux500_hash_suspend - Function that suspends the hash device. 1850 - * @dev: Device to suspend. 1851 - */ 1852 - static int ux500_hash_suspend(struct device *dev) 1853 - { 1854 - int ret; 1855 - struct hash_device_data *device_data; 1856 - struct hash_ctx *temp_ctx = NULL; 1857 - 1858 - device_data = dev_get_drvdata(dev); 1859 - if (!device_data) { 1860 - dev_err(dev, "%s: platform_get_drvdata() failed!\n", __func__); 1861 - return -ENOMEM; 1862 - } 1863 - 1864 - spin_lock(&device_data->ctx_lock); 1865 - if (!device_data->current_ctx) 1866 - device_data->current_ctx++; 1867 - spin_unlock(&device_data->ctx_lock); 1868 - 1869 - if (device_data->current_ctx == ++temp_ctx) { 1870 - if (down_interruptible(&driver_data.device_allocation)) 1871 - dev_dbg(dev, "%s: down_interruptible() failed\n", 1872 - __func__); 1873 - ret = hash_disable_power(device_data, false); 1874 - 1875 - } else { 1876 - ret = hash_disable_power(device_data, true); 1877 - } 1878 - 1879 - if (ret) 1880 - dev_err(dev, "%s: hash_disable_power()\n", __func__); 1881 - 1882 - return ret; 1883 - } 1884 - 1885 - /** 1886 - * ux500_hash_resume - Function that resume the hash device. 1887 - * @dev: Device to resume. 1888 - */ 1889 - static int ux500_hash_resume(struct device *dev) 1890 - { 1891 - int ret = 0; 1892 - struct hash_device_data *device_data; 1893 - struct hash_ctx *temp_ctx = NULL; 1894 - 1895 - device_data = dev_get_drvdata(dev); 1896 - if (!device_data) { 1897 - dev_err(dev, "%s: platform_get_drvdata() failed!\n", __func__); 1898 - return -ENOMEM; 1899 - } 1900 - 1901 - spin_lock(&device_data->ctx_lock); 1902 - if (device_data->current_ctx == ++temp_ctx) 1903 - device_data->current_ctx = NULL; 1904 - spin_unlock(&device_data->ctx_lock); 1905 - 1906 - if (!device_data->current_ctx) 1907 - up(&driver_data.device_allocation); 1908 - else 1909 - ret = hash_enable_power(device_data, true); 1910 - 1911 - if (ret) 1912 - dev_err(dev, "%s: hash_enable_power() failed!\n", __func__); 1913 - 1914 - return ret; 1915 - } 1916 - #endif 1917 - 1918 - static SIMPLE_DEV_PM_OPS(ux500_hash_pm, ux500_hash_suspend, ux500_hash_resume); 1919 - 1920 - static const struct of_device_id ux500_hash_match[] = { 1921 - { .compatible = "stericsson,ux500-hash" }, 1922 - { }, 1923 - }; 1924 - MODULE_DEVICE_TABLE(of, ux500_hash_match); 1925 - 1926 - static struct platform_driver hash_driver = { 1927 - .probe = ux500_hash_probe, 1928 - .remove = ux500_hash_remove, 1929 - .shutdown = ux500_hash_shutdown, 1930 - .driver = { 1931 - .name = "hash1", 1932 - .of_match_table = ux500_hash_match, 1933 - .pm = &ux500_hash_pm, 1934 - } 1935 - }; 1936 - 1937 - /** 1938 - * ux500_hash_mod_init - The kernel module init function. 1939 - */ 1940 - static int __init ux500_hash_mod_init(void) 1941 - { 1942 - klist_init(&driver_data.device_list, NULL, NULL); 1943 - /* Initialize the semaphore to 0 devices (locked state) */ 1944 - sema_init(&driver_data.device_allocation, 0); 1945 - 1946 - return platform_driver_register(&hash_driver); 1947 - } 1948 - 1949 - /** 1950 - * ux500_hash_mod_fini - The kernel module exit function. 1951 - */ 1952 - static void __exit ux500_hash_mod_fini(void) 1953 - { 1954 - platform_driver_unregister(&hash_driver); 1955 - } 1956 - 1957 - module_init(ux500_hash_mod_init); 1958 - module_exit(ux500_hash_mod_fini); 1959 - 1960 - MODULE_DESCRIPTION("Driver for ST-Ericsson UX500 HASH engine."); 1961 - MODULE_LICENSE("GPL"); 1962 - 1963 - MODULE_ALIAS_CRYPTO("sha1-all"); 1964 - MODULE_ALIAS_CRYPTO("sha256-all"); 1965 - MODULE_ALIAS_CRYPTO("hmac-sha1-all"); 1966 - MODULE_ALIAS_CRYPTO("hmac-sha256-all");
+1 -1
drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
··· 116 116 struct virtio_crypto_session_input *input; 117 117 struct virtio_crypto_ctrl_request *vc_ctrl_req; 118 118 119 - pkey = kmemdup(key, keylen, GFP_ATOMIC); 119 + pkey = kmemdup(key, keylen, GFP_KERNEL); 120 120 if (!pkey) 121 121 return -ENOMEM; 122 122
+3 -5
drivers/md/dm-crypt.c
··· 1458 1458 return r; 1459 1459 } 1460 1460 1461 - static void kcryptd_async_done(struct crypto_async_request *async_req, 1462 - int error); 1461 + static void kcryptd_async_done(void *async_req, int error); 1463 1462 1464 1463 static int crypt_alloc_req_skcipher(struct crypt_config *cc, 1465 1464 struct convert_context *ctx) ··· 2146 2147 crypt_dec_pending(io); 2147 2148 } 2148 2149 2149 - static void kcryptd_async_done(struct crypto_async_request *async_req, 2150 - int error) 2150 + static void kcryptd_async_done(void *data, int error) 2151 2151 { 2152 - struct dm_crypt_request *dmreq = async_req->data; 2152 + struct dm_crypt_request *dmreq = data; 2153 2153 struct convert_context *ctx = dmreq->ctx; 2154 2154 struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); 2155 2155 struct crypt_config *cc = io->cc;
+2 -2
drivers/md/dm-integrity.c
··· 955 955 async_tx_issue_pending_all(); 956 956 } 957 957 958 - static void complete_journal_encrypt(struct crypto_async_request *req, int err) 958 + static void complete_journal_encrypt(void *data, int err) 959 959 { 960 - struct journal_completion *comp = req->data; 960 + struct journal_completion *comp = data; 961 961 if (unlikely(err)) { 962 962 if (likely(err == -EINPROGRESS)) { 963 963 complete(&comp->ic->crypto_backoff);
+4 -4
drivers/net/macsec.c
··· 528 528 } 529 529 } 530 530 531 - static void macsec_encrypt_done(struct crypto_async_request *base, int err) 531 + static void macsec_encrypt_done(void *data, int err) 532 532 { 533 - struct sk_buff *skb = base->data; 533 + struct sk_buff *skb = data; 534 534 struct net_device *dev = skb->dev; 535 535 struct macsec_dev *macsec = macsec_priv(dev); 536 536 struct macsec_tx_sa *sa = macsec_skb_cb(skb)->tx_sa; ··· 835 835 u64_stats_update_end(&stats->syncp); 836 836 } 837 837 838 - static void macsec_decrypt_done(struct crypto_async_request *base, int err) 838 + static void macsec_decrypt_done(void *data, int err) 839 839 { 840 - struct sk_buff *skb = base->data; 840 + struct sk_buff *skb = data; 841 841 struct net_device *dev = skb->dev; 842 842 struct macsec_dev *macsec = macsec_priv(dev); 843 843 struct macsec_rx_sa *rx_sa = macsec_skb_cb(skb)->rx_sa;
+3 -27
fs/ecryptfs/crypto.c
··· 260 260 return i; 261 261 } 262 262 263 - struct extent_crypt_result { 264 - struct completion completion; 265 - int rc; 266 - }; 267 - 268 - static void extent_crypt_complete(struct crypto_async_request *req, int rc) 269 - { 270 - struct extent_crypt_result *ecr = req->data; 271 - 272 - if (rc == -EINPROGRESS) 273 - return; 274 - 275 - ecr->rc = rc; 276 - complete(&ecr->completion); 277 - } 278 - 279 263 /** 280 264 * crypt_scatterlist 281 265 * @crypt_stat: Pointer to the crypt_stat struct to initialize. ··· 277 293 unsigned char *iv, int op) 278 294 { 279 295 struct skcipher_request *req = NULL; 280 - struct extent_crypt_result ecr; 296 + DECLARE_CRYPTO_WAIT(ecr); 281 297 int rc = 0; 282 298 283 299 if (unlikely(ecryptfs_verbosity > 0)) { ··· 286 302 ecryptfs_dump_hex(crypt_stat->key, 287 303 crypt_stat->key_size); 288 304 } 289 - 290 - init_completion(&ecr.completion); 291 305 292 306 mutex_lock(&crypt_stat->cs_tfm_mutex); 293 307 req = skcipher_request_alloc(crypt_stat->tfm, GFP_NOFS); ··· 297 315 298 316 skcipher_request_set_callback(req, 299 317 CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, 300 - extent_crypt_complete, &ecr); 318 + crypto_req_done, &ecr); 301 319 /* Consider doing this once, when the file is opened */ 302 320 if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) { 303 321 rc = crypto_skcipher_setkey(crypt_stat->tfm, crypt_stat->key, ··· 316 334 skcipher_request_set_crypt(req, src_sg, dst_sg, size, iv); 317 335 rc = op == ENCRYPT ? crypto_skcipher_encrypt(req) : 318 336 crypto_skcipher_decrypt(req); 319 - if (rc == -EINPROGRESS || rc == -EBUSY) { 320 - struct extent_crypt_result *ecr = req->base.data; 321 - 322 - wait_for_completion(&ecr->completion); 323 - rc = ecr->rc; 324 - reinit_completion(&ecr->completion); 325 - } 337 + rc = crypto_wait_req(rc, &ecr); 326 338 out: 327 339 skcipher_request_free(req); 328 340 return rc;
+10 -10
include/crypto/aead.h
··· 27 27 * 28 28 * For example: authenc(hmac(sha256), cbc(aes)) 29 29 * 30 - * The example code provided for the symmetric key cipher operation 31 - * applies here as well. Naturally all *skcipher* symbols must be exchanged 32 - * the *aead* pendants discussed in the following. In addition, for the AEAD 33 - * operation, the aead_request_set_ad function must be used to set the 34 - * pointer to the associated data memory location before performing the 35 - * encryption or decryption operation. In case of an encryption, the associated 36 - * data memory is filled during the encryption operation. For decryption, the 37 - * associated data memory must contain data that is used to verify the integrity 38 - * of the decrypted data. Another deviation from the asynchronous block cipher 30 + * The example code provided for the symmetric key cipher operation applies 31 + * here as well. Naturally all *skcipher* symbols must be exchanged the *aead* 32 + * pendants discussed in the following. In addition, for the AEAD operation, 33 + * the aead_request_set_ad function must be used to set the pointer to the 34 + * associated data memory location before performing the encryption or 35 + * decryption operation. Another deviation from the asynchronous block cipher 39 36 * operation is that the caller should explicitly check for -EBADMSG of the 40 37 * crypto_aead_decrypt. That error indicates an authentication error, i.e. 41 38 * a breach in the integrity of the message. In essence, that -EBADMSG error ··· 46 49 * 47 50 * The destination scatterlist has the same layout, except that the plaintext 48 51 * (resp. ciphertext) will grow (resp. shrink) by the authentication tag size 49 - * during encryption (resp. decryption). 52 + * during encryption (resp. decryption). The authentication tag is generated 53 + * during the encryption operation and appended to the ciphertext. During 54 + * decryption, the authentication tag is consumed along with the ciphertext and 55 + * used to verify the integrity of the plaintext and the associated data. 50 56 * 51 57 * In-place encryption/decryption is enabled by using the same scatterlist 52 58 * pointer for both the source and destination.
+6
include/crypto/algapi.h
··· 302 302 CRYPTO_MSG_ALG_LOADED, 303 303 }; 304 304 305 + static inline void crypto_request_complete(struct crypto_async_request *req, 306 + int err) 307 + { 308 + req->complete(req->data, err); 309 + } 310 + 305 311 #endif /* _CRYPTO_ALGAPI_H */
+1 -3
include/crypto/if_alg.h
··· 21 21 22 22 #define ALG_MAX_PAGES 16 23 23 24 - struct crypto_async_request; 25 - 26 24 struct alg_sock { 27 25 /* struct sock must be the first member of struct alg_sock */ 28 26 struct sock sk; ··· 233 235 ssize_t af_alg_sendpage(struct socket *sock, struct page *page, 234 236 int offset, size_t size, int flags); 235 237 void af_alg_free_resources(struct af_alg_async_req *areq); 236 - void af_alg_async_cb(struct crypto_async_request *_req, int err); 238 + void af_alg_async_cb(void *data, int err); 237 239 __poll_t af_alg_poll(struct file *file, struct socket *sock, 238 240 poll_table *wait); 239 241 struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
+1 -1
include/crypto/internal/acompress.h
··· 28 28 static inline void acomp_request_complete(struct acomp_req *req, 29 29 int err) 30 30 { 31 - req->base.complete(&req->base, err); 31 + crypto_request_complete(&req->base, err); 32 32 } 33 33 34 34 static inline const char *acomp_alg_name(struct crypto_acomp *tfm)
+1 -1
include/crypto/internal/aead.h
··· 82 82 83 83 static inline void aead_request_complete(struct aead_request *req, int err) 84 84 { 85 - req->base.complete(&req->base, err); 85 + crypto_request_complete(&req->base, err); 86 86 } 87 87 88 88 static inline u32 aead_request_flags(struct aead_request *req)
+1 -1
include/crypto/internal/akcipher.h
··· 69 69 static inline void akcipher_request_complete(struct akcipher_request *req, 70 70 int err) 71 71 { 72 - req->base.complete(&req->base, err); 72 + crypto_request_complete(&req->base, err); 73 73 } 74 74 75 75 static inline const char *akcipher_alg_name(struct crypto_akcipher *tfm)
+1 -1
include/crypto/internal/hash.h
··· 199 199 200 200 static inline void ahash_request_complete(struct ahash_request *req, int err) 201 201 { 202 - req->base.complete(&req->base, err); 202 + crypto_request_complete(&req->base, err); 203 203 } 204 204 205 205 static inline u32 ahash_request_flags(struct ahash_request *req)
+1 -1
include/crypto/internal/kpp.h
··· 85 85 86 86 static inline void kpp_request_complete(struct kpp_request *req, int err) 87 87 { 88 - req->base.complete(&req->base, err); 88 + crypto_request_complete(&req->base, err); 89 89 } 90 90 91 91 static inline const char *kpp_alg_name(struct crypto_kpp *tfm)
+1 -1
include/crypto/internal/skcipher.h
··· 94 94 95 95 static inline void skcipher_request_complete(struct skcipher_request *req, int err) 96 96 { 97 - req->base.complete(&req->base, err); 97 + crypto_request_complete(&req->base, err); 98 98 } 99 99 100 100 int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn,
+2 -2
include/crypto/scatterwalk.h
··· 53 53 54 54 static inline void scatterwalk_unmap(void *vaddr) 55 55 { 56 - kunmap_atomic(vaddr); 56 + kunmap_local(vaddr); 57 57 } 58 58 59 59 static inline void scatterwalk_start(struct scatter_walk *walk, ··· 65 65 66 66 static inline void *scatterwalk_map(struct scatter_walk *walk) 67 67 { 68 - return kmap_atomic(scatterwalk_page(walk)) + 68 + return kmap_local_page(scatterwalk_page(walk)) + 69 69 offset_in_page(walk->offset); 70 70 } 71 71
+11 -18
include/crypto/xts.h
··· 8 8 9 9 #define XTS_BLOCK_SIZE 16 10 10 11 - static inline int xts_check_key(struct crypto_tfm *tfm, 12 - const u8 *key, unsigned int keylen) 13 - { 14 - /* 15 - * key consists of keys of equal size concatenated, therefore 16 - * the length must be even. 17 - */ 18 - if (keylen % 2) 19 - return -EINVAL; 20 - 21 - /* ensure that the AES and tweak key are not identical */ 22 - if (fips_enabled && !crypto_memneq(key, key + (keylen / 2), keylen / 2)) 23 - return -EINVAL; 24 - 25 - return 0; 26 - } 27 - 28 11 static inline int xts_verify_key(struct crypto_skcipher *tfm, 29 12 const u8 *key, unsigned int keylen) 30 13 { ··· 18 35 if (keylen % 2) 19 36 return -EINVAL; 20 37 21 - /* ensure that the AES and tweak key are not identical */ 38 + /* 39 + * In FIPS mode only a combined key length of either 256 or 40 + * 512 bits is allowed, c.f. FIPS 140-3 IG C.I. 41 + */ 42 + if (fips_enabled && keylen != 32 && keylen != 64) 43 + return -EINVAL; 44 + 45 + /* 46 + * Ensure that the AES and tweak key are not identical when 47 + * in FIPS mode or the FORBID_WEAK_KEYS flag is set. 48 + */ 22 49 if ((fips_enabled || (crypto_skcipher_get_flags(tfm) & 23 50 CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) && 24 51 !crypto_memneq(key, key + (keylen / 2), keylen / 2))
+2 -2
include/linux/crypto.h
··· 176 176 struct crypto_tfm; 177 177 struct crypto_type; 178 178 179 - typedef void (*crypto_completion_t)(struct crypto_async_request *req, int err); 179 + typedef void (*crypto_completion_t)(void *req, int err); 180 180 181 181 /** 182 182 * DOC: Block Cipher Context Data Structures ··· 595 595 /* 596 596 * Async ops completion helper functioons 597 597 */ 598 - void crypto_req_done(struct crypto_async_request *req, int err); 598 + void crypto_req_done(void *req, int err); 599 599 600 600 static inline int crypto_wait_req(int err, struct crypto_wait *wait) 601 601 {
+2 -3
include/linux/hisi_acc_qm.h
··· 122 122 }; 123 123 124 124 enum qm_hw_ver { 125 - QM_HW_UNKNOWN = -1, 126 125 QM_HW_V1 = 0x20, 127 126 QM_HW_V2 = 0x21, 128 127 QM_HW_V3 = 0x30, ··· 308 309 const struct hisi_qm_err_ini *err_ini; 309 310 struct hisi_qm_err_info err_info; 310 311 struct hisi_qm_err_status err_status; 311 - unsigned long misc_ctl; /* driver removing and reset sched */ 312 + /* driver removing and reset sched */ 313 + unsigned long misc_ctl; 312 314 /* Device capability bit */ 313 315 unsigned long caps; 314 316 ··· 332 332 333 333 const char *algs; 334 334 bool use_sva; 335 - bool is_frozen; 336 335 337 336 resource_size_t phys_base; 338 337 resource_size_t db_phys_base;
+22 -3
lib/crypto/blake2s-selftest.c
··· 545 545 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, }, 546 546 }; 547 547 548 - bool __init blake2s_selftest(void) 548 + static bool __init noinline_for_stack blake2s_digest_test(void) 549 549 { 550 550 u8 key[BLAKE2S_KEY_SIZE]; 551 551 u8 buf[ARRAY_SIZE(blake2s_testvecs)]; ··· 589 589 } 590 590 } 591 591 592 + return success; 593 + } 594 + 595 + static bool __init noinline_for_stack blake2s_random_test(void) 596 + { 597 + struct blake2s_state state; 598 + bool success = true; 599 + int i, l; 600 + 592 601 for (i = 0; i < 32; ++i) { 593 602 enum { TEST_ALIGNMENT = 16 }; 594 - u8 unaligned_block[BLAKE2S_BLOCK_SIZE + TEST_ALIGNMENT - 1] 603 + u8 blocks[BLAKE2S_BLOCK_SIZE * 2 + TEST_ALIGNMENT - 1] 595 604 __aligned(TEST_ALIGNMENT); 596 - u8 blocks[BLAKE2S_BLOCK_SIZE * 2]; 605 + u8 *unaligned_block = blocks + BLAKE2S_BLOCK_SIZE; 597 606 struct blake2s_state state1, state2; 598 607 599 608 get_random_bytes(blocks, sizeof(blocks)); ··· 636 627 } 637 628 } 638 629 } 630 + 631 + return success; 632 + } 633 + 634 + bool __init blake2s_selftest(void) 635 + { 636 + bool success; 637 + 638 + success = blake2s_digest_test(); 639 + success &= blake2s_random_test(); 639 640 640 641 return success; 641 642 }
+2 -1
lib/mpi/mpicoder.c
··· 504 504 505 505 while (sg_miter_next(&miter)) { 506 506 buff = miter.addr; 507 - len = miter.length; 507 + len = min_t(unsigned, miter.length, nbytes); 508 + nbytes -= len; 508 509 509 510 for (x = 0; x < len; x++) { 510 511 a <<= 8;
+6 -31
net/bluetooth/ecdh_helper.c
··· 25 25 #include <linux/scatterlist.h> 26 26 #include <crypto/ecdh.h> 27 27 28 - struct ecdh_completion { 29 - struct completion completion; 30 - int err; 31 - }; 32 - 33 - static void ecdh_complete(struct crypto_async_request *req, int err) 34 - { 35 - struct ecdh_completion *res = req->data; 36 - 37 - if (err == -EINPROGRESS) 38 - return; 39 - 40 - res->err = err; 41 - complete(&res->completion); 42 - } 43 - 44 28 static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) 45 29 { 46 30 int i; ··· 44 60 int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64], 45 61 u8 secret[32]) 46 62 { 63 + DECLARE_CRYPTO_WAIT(result); 47 64 struct kpp_request *req; 48 65 u8 *tmp; 49 - struct ecdh_completion result; 50 66 struct scatterlist src, dst; 51 67 int err; 52 68 ··· 60 76 goto free_tmp; 61 77 } 62 78 63 - init_completion(&result.completion); 64 - 65 79 swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */ 66 80 swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */ 67 81 ··· 68 86 kpp_request_set_input(req, &src, 64); 69 87 kpp_request_set_output(req, &dst, 32); 70 88 kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 71 - ecdh_complete, &result); 89 + crypto_req_done, &result); 72 90 err = crypto_kpp_compute_shared_secret(req); 73 - if (err == -EINPROGRESS) { 74 - wait_for_completion(&result.completion); 75 - err = result.err; 76 - } 91 + err = crypto_wait_req(err, &result); 77 92 if (err < 0) { 78 93 pr_err("alg: ecdh: compute shared secret failed. err %d\n", 79 94 err); ··· 144 165 */ 145 166 int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]) 146 167 { 168 + DECLARE_CRYPTO_WAIT(result); 147 169 struct kpp_request *req; 148 170 u8 *tmp; 149 - struct ecdh_completion result; 150 171 struct scatterlist dst; 151 172 int err; 152 173 ··· 160 181 goto free_tmp; 161 182 } 162 183 163 - init_completion(&result.completion); 164 184 sg_init_one(&dst, tmp, 64); 165 185 kpp_request_set_input(req, NULL, 0); 166 186 kpp_request_set_output(req, &dst, 64); 167 187 kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 168 - ecdh_complete, &result); 188 + crypto_req_done, &result); 169 189 170 190 err = crypto_kpp_generate_public_key(req); 171 - if (err == -EINPROGRESS) { 172 - wait_for_completion(&result.completion); 173 - err = result.err; 174 - } 191 + err = crypto_wait_req(err, &result); 175 192 if (err < 0) 176 193 goto free_all; 177 194
+4 -4
net/ipv4/ah4.c
··· 117 117 return 0; 118 118 } 119 119 120 - static void ah_output_done(struct crypto_async_request *base, int err) 120 + static void ah_output_done(void *data, int err) 121 121 { 122 122 u8 *icv; 123 123 struct iphdr *iph; 124 - struct sk_buff *skb = base->data; 124 + struct sk_buff *skb = data; 125 125 struct xfrm_state *x = skb_dst(skb)->xfrm; 126 126 struct ah_data *ahp = x->data; 127 127 struct iphdr *top_iph = ip_hdr(skb); ··· 262 262 return err; 263 263 } 264 264 265 - static void ah_input_done(struct crypto_async_request *base, int err) 265 + static void ah_input_done(void *data, int err) 266 266 { 267 267 u8 *auth_data; 268 268 u8 *icv; 269 269 struct iphdr *work_iph; 270 - struct sk_buff *skb = base->data; 270 + struct sk_buff *skb = data; 271 271 struct xfrm_state *x = xfrm_input_state(skb); 272 272 struct ah_data *ahp = x->data; 273 273 struct ip_auth_hdr *ah = ip_auth_hdr(skb);
+10 -10
net/ipv4/esp4.c
··· 244 244 } 245 245 #endif 246 246 247 - static void esp_output_done(struct crypto_async_request *base, int err) 247 + static void esp_output_done(void *data, int err) 248 248 { 249 - struct sk_buff *skb = base->data; 249 + struct sk_buff *skb = data; 250 250 struct xfrm_offload *xo = xfrm_offload(skb); 251 251 void *tmp; 252 252 struct xfrm_state *x; ··· 332 332 return esph; 333 333 } 334 334 335 - static void esp_output_done_esn(struct crypto_async_request *base, int err) 335 + static void esp_output_done_esn(void *data, int err) 336 336 { 337 - struct sk_buff *skb = base->data; 337 + struct sk_buff *skb = data; 338 338 339 339 esp_output_restore_header(skb); 340 - esp_output_done(base, err); 340 + esp_output_done(data, err); 341 341 } 342 342 343 343 static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, ··· 830 830 } 831 831 EXPORT_SYMBOL_GPL(esp_input_done2); 832 832 833 - static void esp_input_done(struct crypto_async_request *base, int err) 833 + static void esp_input_done(void *data, int err) 834 834 { 835 - struct sk_buff *skb = base->data; 835 + struct sk_buff *skb = data; 836 836 837 837 xfrm_input_resume(skb, esp_input_done2(skb, err)); 838 838 } ··· 860 860 } 861 861 } 862 862 863 - static void esp_input_done_esn(struct crypto_async_request *base, int err) 863 + static void esp_input_done_esn(void *data, int err) 864 864 { 865 - struct sk_buff *skb = base->data; 865 + struct sk_buff *skb = data; 866 866 867 867 esp_input_restore_header(skb); 868 - esp_input_done(base, err); 868 + esp_input_done(data, err); 869 869 } 870 870 871 871 /*
+4 -4
net/ipv6/ah6.c
··· 281 281 return 0; 282 282 } 283 283 284 - static void ah6_output_done(struct crypto_async_request *base, int err) 284 + static void ah6_output_done(void *data, int err) 285 285 { 286 286 int extlen; 287 287 u8 *iph_base; 288 288 u8 *icv; 289 - struct sk_buff *skb = base->data; 289 + struct sk_buff *skb = data; 290 290 struct xfrm_state *x = skb_dst(skb)->xfrm; 291 291 struct ah_data *ahp = x->data; 292 292 struct ipv6hdr *top_iph = ipv6_hdr(skb); ··· 451 451 return err; 452 452 } 453 453 454 - static void ah6_input_done(struct crypto_async_request *base, int err) 454 + static void ah6_input_done(void *data, int err) 455 455 { 456 456 u8 *auth_data; 457 457 u8 *icv; 458 458 u8 *work_iph; 459 - struct sk_buff *skb = base->data; 459 + struct sk_buff *skb = data; 460 460 struct xfrm_state *x = xfrm_input_state(skb); 461 461 struct ah_data *ahp = x->data; 462 462 struct ip_auth_hdr *ah = ip_auth_hdr(skb);
+10 -10
net/ipv6/esp6.c
··· 278 278 } 279 279 } 280 280 281 - static void esp_output_done(struct crypto_async_request *base, int err) 281 + static void esp_output_done(void *data, int err) 282 282 { 283 - struct sk_buff *skb = base->data; 283 + struct sk_buff *skb = data; 284 284 struct xfrm_offload *xo = xfrm_offload(skb); 285 285 void *tmp; 286 286 struct xfrm_state *x; ··· 368 368 return esph; 369 369 } 370 370 371 - static void esp_output_done_esn(struct crypto_async_request *base, int err) 371 + static void esp_output_done_esn(void *data, int err) 372 372 { 373 - struct sk_buff *skb = base->data; 373 + struct sk_buff *skb = data; 374 374 375 375 esp_output_restore_header(skb); 376 - esp_output_done(base, err); 376 + esp_output_done(data, err); 377 377 } 378 378 379 379 static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb, ··· 879 879 } 880 880 EXPORT_SYMBOL_GPL(esp6_input_done2); 881 881 882 - static void esp_input_done(struct crypto_async_request *base, int err) 882 + static void esp_input_done(void *data, int err) 883 883 { 884 - struct sk_buff *skb = base->data; 884 + struct sk_buff *skb = data; 885 885 886 886 xfrm_input_resume(skb, esp6_input_done2(skb, err)); 887 887 } ··· 909 909 } 910 910 } 911 911 912 - static void esp_input_done_esn(struct crypto_async_request *base, int err) 912 + static void esp_input_done_esn(void *data, int err) 913 913 { 914 - struct sk_buff *skb = base->data; 914 + struct sk_buff *skb = data; 915 915 916 916 esp_input_restore_header(skb); 917 - esp_input_done(base, err); 917 + esp_input_done(data, err); 918 918 } 919 919 920 920 static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
+6 -6
net/tipc/crypto.c
··· 267 267 struct tipc_bearer *b, 268 268 struct tipc_media_addr *dst, 269 269 struct tipc_node *__dnode); 270 - static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err); 270 + static void tipc_aead_encrypt_done(void *data, int err); 271 271 static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, 272 272 struct sk_buff *skb, struct tipc_bearer *b); 273 - static void tipc_aead_decrypt_done(struct crypto_async_request *base, int err); 273 + static void tipc_aead_decrypt_done(void *data, int err); 274 274 static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr); 275 275 static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead, 276 276 u8 tx_key, struct sk_buff *skb, ··· 830 830 return rc; 831 831 } 832 832 833 - static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err) 833 + static void tipc_aead_encrypt_done(void *data, int err) 834 834 { 835 - struct sk_buff *skb = base->data; 835 + struct sk_buff *skb = data; 836 836 struct tipc_crypto_tx_ctx *tx_ctx = TIPC_SKB_CB(skb)->crypto_ctx; 837 837 struct tipc_bearer *b = tx_ctx->bearer; 838 838 struct tipc_aead *aead = tx_ctx->aead; ··· 954 954 return rc; 955 955 } 956 956 957 - static void tipc_aead_decrypt_done(struct crypto_async_request *base, int err) 957 + static void tipc_aead_decrypt_done(void *data, int err) 958 958 { 959 - struct sk_buff *skb = base->data; 959 + struct sk_buff *skb = data; 960 960 struct tipc_crypto_rx_ctx *rx_ctx = TIPC_SKB_CB(skb)->crypto_ctx; 961 961 struct tipc_bearer *b = rx_ctx->bearer; 962 962 struct tipc_aead *aead = rx_ctx->aead;
+2
net/tls/tls.h
··· 70 70 char content_type; 71 71 struct scatterlist sg_content_type; 72 72 73 + struct sock *sk; 74 + 73 75 char aad_space[TLS_AAD_SPACE_SIZE]; 74 76 u8 iv_data[MAX_IV_SIZE]; 75 77 struct aead_request aead_req;
+29 -13
net/tls/tls_sw.c
··· 38 38 #include <linux/bug.h> 39 39 #include <linux/sched/signal.h> 40 40 #include <linux/module.h> 41 + #include <linux/kernel.h> 41 42 #include <linux/splice.h> 42 43 #include <crypto/aead.h> 43 44 ··· 58 57 }; 59 58 60 59 struct tls_decrypt_ctx { 60 + struct sock *sk; 61 61 u8 iv[MAX_IV_SIZE]; 62 62 u8 aad[TLS_MAX_AAD_SIZE]; 63 63 u8 tail; ··· 179 177 return sub; 180 178 } 181 179 182 - static void tls_decrypt_done(struct crypto_async_request *req, int err) 180 + static void tls_decrypt_done(void *data, int err) 183 181 { 184 - struct aead_request *aead_req = (struct aead_request *)req; 182 + struct aead_request *aead_req = data; 183 + struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); 185 184 struct scatterlist *sgout = aead_req->dst; 186 185 struct scatterlist *sgin = aead_req->src; 187 186 struct tls_sw_context_rx *ctx; 187 + struct tls_decrypt_ctx *dctx; 188 188 struct tls_context *tls_ctx; 189 189 struct scatterlist *sg; 190 190 unsigned int pages; 191 191 struct sock *sk; 192 + int aead_size; 192 193 193 - sk = (struct sock *)req->data; 194 + aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead); 195 + aead_size = ALIGN(aead_size, __alignof__(*dctx)); 196 + dctx = (void *)((u8 *)aead_req + aead_size); 197 + 198 + sk = dctx->sk; 194 199 tls_ctx = tls_get_ctx(sk); 195 200 ctx = tls_sw_ctx_rx(tls_ctx); 196 201 ··· 249 240 if (darg->async) { 250 241 aead_request_set_callback(aead_req, 251 242 CRYPTO_TFM_REQ_MAY_BACKLOG, 252 - tls_decrypt_done, sk); 243 + tls_decrypt_done, aead_req); 253 244 atomic_inc(&ctx->decrypt_pending); 254 245 } else { 255 246 aead_request_set_callback(aead_req, ··· 345 336 sg_set_buf(&rec->sg_aead_out[0], rec->aad_space, prot->aad_size); 346 337 sg_unmark_end(&rec->sg_aead_out[1]); 347 338 339 + rec->sk = sk; 340 + 348 341 return rec; 349 342 } 350 343 ··· 428 417 return rc; 429 418 } 430 419 431 - static void tls_encrypt_done(struct crypto_async_request *req, int err) 420 + static void tls_encrypt_done(void *data, int err) 432 421 { 433 - struct aead_request *aead_req = (struct aead_request *)req; 434 - struct sock *sk = req->data; 435 - struct tls_context *tls_ctx = tls_get_ctx(sk); 436 - struct tls_prot_info *prot = &tls_ctx->prot_info; 437 - struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); 422 + struct tls_sw_context_tx *ctx; 423 + struct tls_context *tls_ctx; 424 + struct tls_prot_info *prot; 425 + struct tls_rec *rec = data; 438 426 struct scatterlist *sge; 439 427 struct sk_msg *msg_en; 440 - struct tls_rec *rec; 441 428 bool ready = false; 429 + struct sock *sk; 442 430 int pending; 443 431 444 - rec = container_of(aead_req, struct tls_rec, aead_req); 445 432 msg_en = &rec->msg_encrypted; 433 + 434 + sk = rec->sk; 435 + tls_ctx = tls_get_ctx(sk); 436 + prot = &tls_ctx->prot_info; 437 + ctx = tls_sw_ctx_tx(tls_ctx); 446 438 447 439 sge = sk_msg_elem(msg_en, msg_en->sg.curr); 448 440 sge->offset -= prot->prepend_size; ··· 534 520 data_len, rec->iv_data); 535 521 536 522 aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, 537 - tls_encrypt_done, sk); 523 + tls_encrypt_done, rec); 538 524 539 525 /* Add the record in tx_list */ 540 526 list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); ··· 1499 1485 * Both structs are variable length. 1500 1486 */ 1501 1487 aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); 1488 + aead_size = ALIGN(aead_size, __alignof__(*dctx)); 1502 1489 mem = kmalloc(aead_size + struct_size(dctx, sg, n_sgin + n_sgout), 1503 1490 sk->sk_allocation); 1504 1491 if (!mem) { ··· 1510 1495 /* Segment the allocated memory */ 1511 1496 aead_req = (struct aead_request *)mem; 1512 1497 dctx = (struct tls_decrypt_ctx *)(mem + aead_size); 1498 + dctx->sk = sk; 1513 1499 sgin = &dctx->sg[0]; 1514 1500 sgout = &dctx->sg[n_sgin]; 1515 1501
+5 -25
security/keys/dh.c
··· 64 64 kfree_sensitive(dh->g); 65 65 } 66 66 67 - struct dh_completion { 68 - struct completion completion; 69 - int err; 70 - }; 71 - 72 - static void dh_crypto_done(struct crypto_async_request *req, int err) 73 - { 74 - struct dh_completion *compl = req->data; 75 - 76 - if (err == -EINPROGRESS) 77 - return; 78 - 79 - compl->err = err; 80 - complete(&compl->completion); 81 - } 82 - 83 67 static int kdf_alloc(struct crypto_shash **hash, char *hashname) 84 68 { 85 69 struct crypto_shash *tfm; ··· 130 146 struct keyctl_dh_params pcopy; 131 147 struct dh dh_inputs; 132 148 struct scatterlist outsg; 133 - struct dh_completion compl; 149 + DECLARE_CRYPTO_WAIT(compl); 134 150 struct crypto_kpp *tfm; 135 151 struct kpp_request *req; 136 152 uint8_t *secret; ··· 250 266 251 267 kpp_request_set_input(req, NULL, 0); 252 268 kpp_request_set_output(req, &outsg, outlen); 253 - init_completion(&compl.completion); 254 269 kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | 255 270 CRYPTO_TFM_REQ_MAY_SLEEP, 256 - dh_crypto_done, &compl); 271 + crypto_req_done, &compl); 257 272 258 273 /* 259 274 * For DH, generate_public_key and generate_shared_secret are 260 275 * the same calculation 261 276 */ 262 277 ret = crypto_kpp_generate_public_key(req); 263 - if (ret == -EINPROGRESS) { 264 - wait_for_completion(&compl.completion); 265 - ret = compl.err; 266 - if (ret) 267 - goto out6; 268 - } 278 + ret = crypto_wait_req(ret, &compl); 279 + if (ret) 280 + goto out6; 269 281 270 282 if (kdfcopy) { 271 283 /*