A game about forced loneliness, made by TACStudios
1// Ref: https://github.com/knarkowicz/GPURealTimeBC6H/blob/master/bin/compress.hlsl 2// Doc: https://msdn.microsoft.com/en-us/library/windows/desktop/hh308952(v=vs.85).aspx 3 4// Measure compression error 5float CalcMSLE(float3 a, float3 b) 6{ 7 float3 err = log2(( b + 1.0) / (a + 1.0 )); 8 err = err * err; 9 return err.x + err.y + err.z; 10} 11 12// Quantification Helpers 13float3 Quantize7(float3 x) 14{ 15 return (f32tof16(x) * 128.0) / (0x7bff + 1.0); 16} 17 18float3 Quantize9(float3 x) 19{ 20 return (f32tof16(x) * 512.0) / (0x7bff + 1.0); 21} 22 23float3 Quantize10(float3 x) 24{ 25 return (f32tof16(x) * 1024.0) / (0x7bff + 1.0); 26} 27 28float3 Unquantize7(float3 x) 29{ 30 return (x * 65536.0 + 0x8000) / 128.0; 31} 32 33float3 Unquantize9(float3 x) 34{ 35 return (x * 65536.0 + 0x8000) / 512.0; 36} 37 38float3 Unquantize10(float3 x) 39{ 40 return (x * 65536.0 + 0x8000) / 1024.0; 41} 42 43// BC6H Helpers 44// Compute index of a texel projected against endpoints 45uint ComputeIndex3(float texelPos, float endPoint0Pos, float endPoint1Pos ) 46{ 47 float r = ( texelPos - endPoint0Pos ) / ( endPoint1Pos - endPoint0Pos ); 48 return (uint) clamp( r * 6.98182f + 0.00909f + 0.5f, 0.0, 7.0 ); 49} 50 51uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos ) 52{ 53 float r = ( texelPos - endPoint0Pos ) / ( endPoint1Pos - endPoint0Pos ); 54 return (uint) clamp( r * 14.93333f + 0.03333f + 0.5f, 0.0, 15.0 ); 55} 56 57void SignExtend(inout float3 v1, uint mask, uint signFlag ) 58{ 59 int3 v = (int3) v1; 60 v.x = ( v.x & mask ) | ( v.x < 0 ? signFlag : 0 ); 61 v.y = ( v.y & mask ) | ( v.y < 0 ? signFlag : 0 ); 62 v.z = ( v.z & mask ) | ( v.z < 0 ? signFlag : 0 ); 63 v1 = v; 64} 65 66// 2nd step for unquantize 67float3 FinishUnquantize( float3 endpoint0Unq, float3 endpoint1Unq, float weight ) 68{ 69 float3 comp = ( endpoint0Unq * ( 64.0 - weight ) + endpoint1Unq * weight + 32.0 ) * ( 31.0 / 4096.0 ); 70 return f16tof32( uint3( comp ) ); 71} 72 73// BC6H Modes 74void EncodeMode11( inout uint4 block, inout float blockMSLE, float3 texels[ 16 ] ) 75{ 76 // compute endpoints (min/max RGB bbox) 77 float3 blockMin = texels[ 0 ]; 78 float3 blockMax = texels[ 0 ]; 79 uint i; 80 for (i = 1; i < 16; ++i ) 81 { 82 blockMin = min( blockMin, texels[ i ] ); 83 blockMax = max( blockMax, texels[ i ] ); 84 } 85 86 // refine endpoints in log2 RGB space - find the second mix and max value 87 float3 refinedBlockMin = blockMax; 88 float3 refinedBlockMax = blockMin; 89 for (i = 0; i < 16; ++i ) 90 { 91 float3 minTexel = float3( 92 (texels[i].x == blockMin.x) ? refinedBlockMin.x : texels[i].x, 93 (texels[i].y == blockMin.y) ? refinedBlockMin.y : texels[i].y, 94 (texels[i].z == blockMin.z) ? refinedBlockMin.z : texels[i].z 95 ); 96 97 float3 maxTexel = float3( 98 (texels[i].x == blockMax.x) ? refinedBlockMax.x : texels[i].x, 99 (texels[i].y == blockMax.y) ? refinedBlockMax.y : texels[i].y, 100 (texels[i].z == blockMax.z) ? refinedBlockMax.z : texels[i].z 101 ); 102 103 refinedBlockMin = min(refinedBlockMin, minTexel); 104 refinedBlockMax = max(refinedBlockMax, maxTexel); 105 } 106 107 float3 logBlockMax = log2( blockMax + 1.0 ); 108 float3 logBlockMin = log2( blockMin + 1.0 ); 109 float3 logRefinedBlockMax = log2( refinedBlockMax + 1.0 ); 110 float3 logRefinedBlockMin = log2( refinedBlockMin + 1.0 ); 111 float3 logBlockMaxExt = ( logBlockMax - logBlockMin ) * ( 1.0 / 32.0 ); 112 logBlockMin += min( logRefinedBlockMin - logBlockMin, logBlockMaxExt ); 113 logBlockMax -= min( logBlockMax - logRefinedBlockMax, logBlockMaxExt ); 114 blockMin = exp2( logBlockMin ) - 1.0; 115 blockMax = exp2( logBlockMax ) - 1.0; 116 117 float3 blockDir = blockMax - blockMin; 118 blockDir = blockDir / ( blockDir.x + blockDir.y + blockDir.z ); 119 120 float3 endpoint0 = Quantize10( blockMin ); 121 float3 endpoint1 = Quantize10( blockMax ); 122 float endPoint0Pos = f32tof16( dot( blockMin, blockDir ) ); 123 float endPoint1Pos = f32tof16( dot( blockMax, blockDir ) ); 124 125 126 // check if endpoint swap is required 127 float fixupTexelPos = f32tof16( dot( texels[ 0 ], blockDir ) ); 128 uint fixupIndex = ComputeIndex4( fixupTexelPos, endPoint0Pos, endPoint1Pos ); 129 if ( fixupIndex > 7 ) 130 { 131 Swap( endPoint0Pos, endPoint1Pos ); 132 Swap( endpoint0, endpoint1 ); 133 } 134 135 // compute indices 136 uint indices[ 16 ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 137 for (i = 0; i < 16; ++i ) 138 { 139 float texelPos = f32tof16( dot( texels[ i ], blockDir ) ); 140 indices[ i ] = ComputeIndex4( texelPos, endPoint0Pos, endPoint1Pos ); 141 } 142 143 // compute compression error (MSLE) 144 float3 endpoint0Unq = Unquantize10( endpoint0 ); 145 float3 endpoint1Unq = Unquantize10( endpoint1 ); 146 float msle = 0.0; 147 for (i = 0; i < 16; ++i ) 148 { 149 float weight = floor( ( indices[ i ] * 64.0 ) / 15.0 + 0.5); 150 float3 texelUnc = FinishUnquantize( endpoint0Unq, endpoint1Unq, weight ); 151 152 msle += CalcMSLE( texels[ i ], texelUnc ); 153 } 154 155 156 // encode block for mode 11 157 blockMSLE = msle; 158 block.x = 0x03; 159 160 // endpoints 161 block.x |= (uint) endpoint0.x << 5; 162 block.x |= (uint) endpoint0.y << 15; 163 block.x |= (uint) endpoint0.z << 25; 164 block.y |= (uint) endpoint0.z >> 7; 165 block.y |= (uint) endpoint1.x << 3; 166 block.y |= (uint) endpoint1.y << 13; 167 block.y |= (uint) endpoint1.z << 23; 168 block.z |= (uint) endpoint1.z >> 9; 169 170 // indices 171 block.z |= indices[ 0 ] << 1; 172 block.z |= indices[ 1 ] << 4; 173 block.z |= indices[ 2 ] << 8; 174 block.z |= indices[ 3 ] << 12; 175 block.z |= indices[ 4 ] << 16; 176 block.z |= indices[ 5 ] << 20; 177 block.z |= indices[ 6 ] << 24; 178 block.z |= indices[ 7 ] << 28; 179 block.w |= indices[ 8 ] << 0; 180 block.w |= indices[ 9 ] << 4; 181 block.w |= indices[ 10 ] << 8; 182 block.w |= indices[ 11 ] << 12; 183 block.w |= indices[ 12 ] << 16; 184 block.w |= indices[ 13 ] << 20; 185 block.w |= indices[ 14 ] << 24; 186 block.w |= indices[ 15 ] << 28; 187}