A game about forced loneliness, made by TACStudios
1using Unity.Burst.Intrinsics;
2using Unity.Collections.LowLevel.Unsafe;
3
4namespace Unity.Collections
5{
6 [GenerateTestsForBurstCompatibility]
7 public static partial class xxHash3
8 {
9 internal static unsafe void Avx2HashLongInternalLoop(ulong* acc, byte* input, byte* dest, long length, byte* secret, int isHash64)
10 {
11 if (X86.Avx2.IsAvx2Supported)
12 {
13 // Process packets of 512 bits
14 var nb_blocks = (length-1) / BLOCK_LEN;
15 for (int n = 0; n < nb_blocks; n++)
16 {
17 Avx2Accumulate(acc, input + n * BLOCK_LEN, dest == null ? null : dest + n * BLOCK_LEN, secret, NB_ROUNDS, isHash64);
18 Avx2ScrambleAcc(acc, secret + SECRET_KEY_SIZE - STRIPE_LEN);
19 }
20
21 var nbStripes = ((length-1) - (BLOCK_LEN * nb_blocks)) / STRIPE_LEN;
22 Avx2Accumulate(acc, input + nb_blocks * BLOCK_LEN, dest == null ? null : dest + nb_blocks * BLOCK_LEN, secret, nbStripes, isHash64);
23
24 var p = input + length - STRIPE_LEN;
25 Avx2Accumulate512(acc, p, null, secret + SECRET_KEY_SIZE - STRIPE_LEN - SECRET_LASTACC_START);
26
27 if (dest != null)
28 {
29 var remaining = length % STRIPE_LEN;
30 if (remaining != 0)
31 {
32 UnsafeUtility.MemCpy(dest + length - remaining, input + length - remaining, remaining);
33 }
34 }
35 }
36 }
37
38 internal static unsafe void Avx2ScrambleAcc(ulong* acc, byte* secret)
39 {
40 if (X86.Avx2.IsAvx2Supported)
41 {
42 var xAcc = (v256*) acc;
43 var xSecret = (v256*) secret;
44 var prime32 = X86.Avx.mm256_set1_epi32(unchecked((int) PRIME32_1));
45
46 // First bank
47 var acc_vec = xAcc[0];
48 var shifted = X86.Avx2.mm256_srli_epi64(acc_vec, 47);
49 var data_vec = X86.Avx2.mm256_xor_si256(acc_vec, shifted);
50
51 var key_vec = X86.Avx.mm256_loadu_si256(xSecret + 0);
52 var data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec);
53
54 var data_key_hi = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1));
55 var prod_lo = X86.Avx2.mm256_mul_epu32(data_key, prime32);
56 var prod_hi = X86.Avx2.mm256_mul_epu32(data_key_hi, prime32);
57
58 xAcc[0] = X86.Avx2.mm256_add_epi64(prod_lo, X86.Avx2.mm256_slli_epi64(prod_hi, 32));
59
60 // Second bank
61 acc_vec = xAcc[1];
62 shifted = X86.Avx2.mm256_srli_epi64(acc_vec, 47);
63 data_vec = X86.Avx2.mm256_xor_si256(acc_vec, shifted);
64
65 key_vec = X86.Avx.mm256_loadu_si256(xSecret + 1);
66 data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec);
67
68 data_key_hi = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1));
69 prod_lo = X86.Avx2.mm256_mul_epu32(data_key, prime32);
70 prod_hi = X86.Avx2.mm256_mul_epu32(data_key_hi, prime32);
71
72 xAcc[1] = X86.Avx2.mm256_add_epi64(prod_lo, X86.Avx2.mm256_slli_epi64(prod_hi, 32));
73 }
74 }
75
76 internal static unsafe void Avx2Accumulate(ulong* acc, byte* input, byte* dest, byte* secret, long nbStripes,
77 int isHash64)
78 {
79 if (X86.Avx2.IsAvx2Supported)
80 {
81 for (var n = 0; n < nbStripes; n++)
82 {
83 var xInput = input + n * STRIPE_LEN;
84 Avx2Accumulate512(acc, xInput, dest == null ? null : dest + n * STRIPE_LEN,
85 secret + n * SECRET_CONSUME_RATE);
86 }
87 }
88 }
89
90 internal static unsafe void Avx2Accumulate512(ulong* acc, byte* input, byte* dest, byte* secret)
91 {
92 if (X86.Avx2.IsAvx2Supported)
93 {
94 var xAcc = (v256*) acc;
95 var xSecret = (v256*) secret;
96 var xInput = (v256*) input;
97
98 // First bank
99 var data_vec = X86.Avx.mm256_loadu_si256(xInput + 0);
100 var key_vec = X86.Avx.mm256_loadu_si256(xSecret + 0);
101 var data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec);
102
103 if (dest != null)
104 {
105 X86.Avx.mm256_storeu_si256(dest, data_vec);
106 }
107
108 var data_key_lo = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1));
109 var product = X86.Avx2.mm256_mul_epu32(data_key, data_key_lo);
110 var data_swap= X86.Avx2.mm256_shuffle_epi32(data_vec, X86.Sse.SHUFFLE(1, 0, 3, 2));
111 var sum= X86.Avx2.mm256_add_epi64(xAcc[0], data_swap);
112
113 xAcc[0] = X86.Avx2.mm256_add_epi64(product, sum);
114
115 // Second bank
116 data_vec = X86.Avx.mm256_loadu_si256(xInput + 1);
117 key_vec = X86.Avx.mm256_loadu_si256(xSecret + 1);
118 data_key = X86.Avx2.mm256_xor_si256(data_vec, key_vec);
119
120 if (dest != null)
121 {
122 X86.Avx.mm256_storeu_si256(dest + 32, data_vec);
123 }
124
125 data_key_lo = X86.Avx2.mm256_shuffle_epi32(data_key, X86.Sse.SHUFFLE(0, 3, 0, 1));
126 product = X86.Avx2.mm256_mul_epu32(data_key, data_key_lo);
127 data_swap = X86.Avx2.mm256_shuffle_epi32(data_vec, X86.Sse.SHUFFLE(1, 0, 3, 2));
128 sum = X86.Avx2.mm256_add_epi64(xAcc[1], data_swap);
129
130 xAcc[1] = X86.Avx2.mm256_add_epi64(product, sum);
131 }
132 }
133
134 }
135}