A game about forced loneliness, made by TACStudios
at master 135 lines 5.8 kB view raw
1using Unity.Burst.Intrinsics; 2using Unity.Collections.LowLevel.Unsafe; 3 4namespace Unity.Collections 5{ 6 [GenerateTestsForBurstCompatibility] 7 public static partial class xxHash3 8 { 9 internal static unsafe void Avx2HashLongInternalLoop(ulong* acc, byte* input, byte* dest, long length, byte* secret, int isHash64) 10 { 11 if (X86.Avx2.IsAvx2Supported) 12 { 13 // Process packets of 512 bits 14 var nb_blocks = (length-1) / BLOCK_LEN; 15 for (int n = 0; n < nb_blocks; n++) 16 { 17 Avx2Accumulate(acc, input + n * BLOCK_LEN, dest == null ? null : dest + n * BLOCK_LEN, secret, NB_ROUNDS, isHash64); 18 Avx2ScrambleAcc(acc, secret + SECRET_KEY_SIZE - STRIPE_LEN); 19 } 20 21 var nbStripes = ((length-1) - (BLOCK_LEN * nb_blocks)) / STRIPE_LEN; 22 Avx2Accumulate(acc, input + nb_blocks * BLOCK_LEN, dest == null ? null : dest + nb_blocks * BLOCK_LEN, secret, nbStripes, isHash64); 23 24 var p = input + length - STRIPE_LEN; 25 Avx2Accumulate512(acc, p, null, secret + SECRET_KEY_SIZE - STRIPE_LEN - SECRET_LASTACC_START); 26 27 if (dest != null) 28 { 29 var remaining = length % STRIPE_LEN; 30 if (remaining != 0) 31 { 32 UnsafeUtility.MemCpy(dest + length - remaining, input + length - remaining, remaining); 33 } 34 } 35 } 36 } 37 38 internal static unsafe void Avx2ScrambleAcc(ulong* acc, byte* secret) 39 { 40 if (X86.Avx2.IsAvx2Supported) 41 { 42 var xAcc = (v256*) acc; 43 var xSecret = (v256*) secret; 44 var prime32 = X86.Avx.mm256_set1_epi32(unchecked((int) PRIME32_1)); 45 46 // First bank 47 var acc_vec = xAcc[0]; 48 var shifted = X86.Avx2.mm256_srli_epi64(acc_vec, 47); 49 var data_vec = X86.Avx2.mm256_xor_si256(acc_vec, shifted); 50 51 var key_vec = X86.Avx.mm256_loadu_si256(xSecret + 0); 52 var data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec); 53 54 var data_key_hi = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1)); 55 var prod_lo = X86.Avx2.mm256_mul_epu32(data_key, prime32); 56 var prod_hi = X86.Avx2.mm256_mul_epu32(data_key_hi, prime32); 57 58 xAcc[0] = X86.Avx2.mm256_add_epi64(prod_lo, X86.Avx2.mm256_slli_epi64(prod_hi, 32)); 59 60 // Second bank 61 acc_vec = xAcc[1]; 62 shifted = X86.Avx2.mm256_srli_epi64(acc_vec, 47); 63 data_vec = X86.Avx2.mm256_xor_si256(acc_vec, shifted); 64 65 key_vec = X86.Avx.mm256_loadu_si256(xSecret + 1); 66 data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec); 67 68 data_key_hi = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1)); 69 prod_lo = X86.Avx2.mm256_mul_epu32(data_key, prime32); 70 prod_hi = X86.Avx2.mm256_mul_epu32(data_key_hi, prime32); 71 72 xAcc[1] = X86.Avx2.mm256_add_epi64(prod_lo, X86.Avx2.mm256_slli_epi64(prod_hi, 32)); 73 } 74 } 75 76 internal static unsafe void Avx2Accumulate(ulong* acc, byte* input, byte* dest, byte* secret, long nbStripes, 77 int isHash64) 78 { 79 if (X86.Avx2.IsAvx2Supported) 80 { 81 for (var n = 0; n < nbStripes; n++) 82 { 83 var xInput = input + n * STRIPE_LEN; 84 Avx2Accumulate512(acc, xInput, dest == null ? null : dest + n * STRIPE_LEN, 85 secret + n * SECRET_CONSUME_RATE); 86 } 87 } 88 } 89 90 internal static unsafe void Avx2Accumulate512(ulong* acc, byte* input, byte* dest, byte* secret) 91 { 92 if (X86.Avx2.IsAvx2Supported) 93 { 94 var xAcc = (v256*) acc; 95 var xSecret = (v256*) secret; 96 var xInput = (v256*) input; 97 98 // First bank 99 var data_vec = X86.Avx.mm256_loadu_si256(xInput + 0); 100 var key_vec = X86.Avx.mm256_loadu_si256(xSecret + 0); 101 var data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec); 102 103 if (dest != null) 104 { 105 X86.Avx.mm256_storeu_si256(dest, data_vec); 106 } 107 108 var data_key_lo = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1)); 109 var product = X86.Avx2.mm256_mul_epu32(data_key, data_key_lo); 110 var data_swap= X86.Avx2.mm256_shuffle_epi32(data_vec, X86.Sse.SHUFFLE(1, 0, 3, 2)); 111 var sum= X86.Avx2.mm256_add_epi64(xAcc[0], data_swap); 112 113 xAcc[0] = X86.Avx2.mm256_add_epi64(product, sum); 114 115 // Second bank 116 data_vec = X86.Avx.mm256_loadu_si256(xInput + 1); 117 key_vec = X86.Avx.mm256_loadu_si256(xSecret + 1); 118 data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec); 119 120 if (dest != null) 121 { 122 X86.Avx.mm256_storeu_si256(dest + 32, data_vec); 123 } 124 125 data_key_lo = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1)); 126 product = X86.Avx2.mm256_mul_epu32(data_key, data_key_lo); 127 data_swap = X86.Avx2.mm256_shuffle_epi32(data_vec, X86.Sse.SHUFFLE(1, 0, 3, 2)); 128 sum = X86.Avx2.mm256_add_epi64(xAcc[1], data_swap); 129 130 xAcc[1] = X86.Avx2.mm256_add_epi64(product, sum); 131 } 132 } 133 134 } 135}