2018-03-26 20:36:13 +02:00
//
// Shared and general purpose code for the encoding implementations.
//
//
// Copyright (c) 2016 Intel Corporation
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify,
// merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be included in all copies
// or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
// CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
// OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
///////////////////////////////////////////////////////////
// Typedefs.
//
2018-04-02 23:12:34 +02:00
typedef unsigned int8 uint8;
typedef unsigned int16 uint16;
2018-03-26 20:36:13 +02:00
typedef unsigned int32 uint32;
typedef unsigned int64 uint64;
///////////////////////////////////////////////////////////
// The following helpers isolate performance warnings
//
// int32
inline int32 gather_int(const uniform int32* const uniform ptr, int idx)
{
return ptr[idx]; // (perf warning expected)
}
inline void scatter_int(varying int32* uniform ptr, int idx, uint32 value)
{
ptr[idx] = value; // (perf warning expected)
}
// uint8
inline unsigned int8 gather_uint8(const uniform unsigned int8* const uniform ptr, int idx)
{
return ptr[idx]; // (perf warning expected)
}
inline unsigned int8 gather_uint8(const varying unsigned int8* const uniform ptr, int idx)
{
return ptr[idx]; // (perf warning expected)
}
// uint16
inline unsigned int16 gather_uint16(const uniform unsigned int16* const uniform ptr, int idx)
{
return ptr[idx]; // (perf warning expected)
}
inline unsigned int16 gather_uint16(const varying unsigned int16* const uniform ptr, int idx)
{
return ptr[idx]; // (perf warning expected)
}
// uint32
inline unsigned int32 gather_uint(const uniform unsigned int32* const uniform ptr, int idx)
{
return ptr[idx]; // (perf warning expected)
}
inline unsigned int32 gather_uint(const varying unsigned int32* const uniform ptr, int idx)
{
return ptr[idx]; // (perf warning expected)
}
inline void scatter_uint(uniform unsigned int32* ptr, int idx, uint32 value)
{
ptr[idx] = value; // (perf warning expected)
}
inline uint32 shift_right(uint32 v, const uniform int bits)
{
return v>>bits; // (perf warning expected)
}
// float
inline float gather_float(uniform float* uniform ptr, int idx)
{
return ptr[idx]; // (perf warning expected)
}
inline float gather_float(varying float* uniform ptr, int idx)
{
return ptr[idx]; // (perf warning expected)
}
inline void scatter_float(uniform float* uniform ptr, int idx, float value)
{
ptr[idx] = value; // (perf warning expected)
}
inline void scatter_float(varying float* uniform ptr, int idx, float value)
{
ptr[idx] = value; // (perf warning expected)
}
///////////////////////////////////////////////////////////
// Swapping helpers
inline void swap(float& a, float& b)
{
int t = a;
a = b; b = t;
}
inline void swap(int& a, int& b)
{
int t = a;
a = b; b = t;
}
inline void swap(uint32& a, uint32& b)
{
uint32 t = a;
a = b; b = t;
}
inline void swap(uint8& a, uint8& b)
{
uint8 t = a;
a = b; b = t;
}
inline void swap_ints(int u[], int v[], uniform int n)
{
for (uniform int i=0; i<n; i++)
{
int t = u[i];
u[i] = v[i];
v[i] = t;
}
}
inline void swap_uints(uint32 u[], uint32 v[], uniform int n)
{
for (uniform int i=0; i<n; i++)
{
uint32 t = u[i];
u[i] = v[i];
v[i] = t;
}
}
inline uint32 bswap32(uint32 v)
{
uint32 r = 0;
r += ((v >> 24) & 255) << 0;
r += ((v >> 16) & 255) << 8;
r += ((v >> 8) & 255) << 16;
r += ((v >> 0) & 255) << 24;
return r;
}
///////////////////////////////////////////////////////////
// Scalar math helpers
inline float sq(float v)
{
return v*v;
}
inline int pow2(int x)
{
return 1<<x;
}
inline float clamp(float v, int a, int b)
{
return clamp(v, (float)a, (float)b);
}
///////////////////////////////////////////////////////////
// Vector/Matrix/Tensor math helpers
// vector dot product.
inline float dot3(float a[3], float b[3])
{
return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];
}
inline float dot4(float a[4], float b[4])
{
return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];
}
// a = covar*b, where a,b are vectors and b is a matrix.
inline void ssymv(float a[3], float covar[6], float b[3])
{
a[0] = covar[0]*b[0]+covar[1]*b[1]+covar[2]*b[2];
a[1] = covar[1]*b[0]+covar[3]*b[1]+covar[4]*b[2];
a[2] = covar[2]*b[0]+covar[4]*b[1]+covar[5]*b[2];
}
inline void ssymv3(float a[4], float covar[10], float b[4])
{
a[0] = covar[0]*b[0]+covar[1]*b[1]+covar[2]*b[2];
a[1] = covar[1]*b[0]+covar[4]*b[1]+covar[5]*b[2];
a[2] = covar[2]*b[0]+covar[5]*b[1]+covar[7]*b[2];
}
inline void ssymv4(float a[4], float covar[10], float b[4])
{
a[0] = covar[0]*b[0]+covar[1]*b[1]+covar[2]*b[2]+covar[3]*b[3];
a[1] = covar[1]*b[0]+covar[4]*b[1]+covar[5]*b[2]+covar[6]*b[3];
a[2] = covar[2]*b[0]+covar[5]*b[1]+covar[7]*b[2]+covar[8]*b[3];
a[3] = covar[3]*b[0]+covar[6]*b[1]+covar[8]*b[2]+covar[9]*b[3];
}
///////////////////////////////////////////////////////////
// Bit manipulation helpers.
//
inline void put_bits(uint32 data[5], uniform int* uniform pos, uniform int bits, int v)
{
assert(v<pow2(bits));
data[*pos/32] |= ((uint32)v) << (*pos%32);
if (*pos%32+bits>32)
data[*pos/32+1] |= shift_right(v, 32-*pos%32);
*pos += bits;
}
///////////////////////////////////////////////////////////
// Helper for pixel access in variable sized block.
//
inline float get_pixel(float pixels[], uniform int p, uniform int x, uniform int y)
{
uniform static const int ystride = 8;
uniform static const int pstride = 64;
return pixels[pstride * p + ystride * y + x];
}
inline void set_pixel(float pixels[], uniform int p, uniform int x, uniform int y, float value)
{
uniform static const int ystride = 8;
uniform static const int pstride = 64;
pixels[pstride * p + ystride * y + x] = value;
}
///////////////////////////////////////////////////////////
// Generic helpers
///////////////////////////////////////////////////////////
// Axis computation.
//
inline void compute_axis3(float axis[3], float covar[6], uniform const int powerIterations)
{
float vec[3] = {1,1,1};
for (uniform int i=0; i<powerIterations; i++)
{
ssymv(axis, covar, vec);
for (uniform int p=0; p<3; p++)
vec[p] = axis[p];
if (i%2==1) // renormalize every other iteration
{
float norm_sq = 0;
for (uniform int p=0; p<3; p++)
norm_sq += axis[p]*axis[p];
float rnorm = rsqrt(norm_sq);
for (uniform int p=0; p<3; p++)
vec[p] *= rnorm;
}
}
for (uniform int p=0; p<3; p++)
axis[p] = vec[p];
}
inline void compute_axis(float axis[4], float covar[10], uniform const int powerIterations, uniform int channels)
{
float vec[4] = {1,1,1,1};
for (uniform int i=0; i<powerIterations; i++)
{
if (channels == 3)
ssymv3(axis, covar, vec);
if (channels == 4)
ssymv4(axis, covar, vec);
for (uniform int p=0; p<channels; p++)
vec[p] = axis[p];
if (i%2==1) // renormalize every other iteration
{
float norm_sq = 0;
for (uniform int p=0; p<channels; p++)
norm_sq += axis[p]*axis[p];
float rnorm = rsqrt(norm_sq);
for (uniform int p=0; p<channels; p++)
vec[p] *= rnorm;
}
}
for (uniform int p=0; p<channels; p++)
axis[p] = vec[p];
}
///////////////////////////////////////////////////////////
// Sorting helpers
//
inline void partial_sort_list(int list[], uniform int length, uniform int partial_count)
{
for (uniform int k=0; k<partial_count; k++)
{
int best_idx = k;
int best_value = list[k];
for (uniform int i=k+1; i<length; i++)
{
if (best_value > list[i])
{
best_value = list[i];
best_idx = i;
}
}
// swap
scatter_int(list, best_idx, list[k]);
list[k] = best_value;
}
}
///////////////////////////////////////////////////////////
// Input surface structures.
//
struct rgba_surface
{
uint8* ptr;
int width, height, stride;
};
struct rg_surface
{
uint8* ptr;
int width, height, stride;
};
struct red_surface
{
uint8* ptr;
int width, height, stride;
};
2018-04-02 23:12:34 +02:00
// Surface definitions.
#define DELARE_SURFACE(name) \
struct name \
{ \
uint8* Ptr; \
int32 Width; \
int32 Height; \
int32 Stride; \
};
// 8 bit unsigned int/component
DELARE_SURFACE(Surface_R8)
DELARE_SURFACE(Surface_RG8)
DELARE_SURFACE(Surface_RGBA8)
// 8 bit signed int/component
DELARE_SURFACE(Surface_R8S)
DELARE_SURFACE(Surface_RG8S)
// 16 bit unsigned int/component
DELARE_SURFACE(Surface_R16)
DELARE_SURFACE(Surface_RG16)
DELARE_SURFACE(Surface_RGBA16)
// 16 bit signed int/component
DELARE_SURFACE(Surface_R16S)
DELARE_SURFACE(Surface_RG16S)
// 16 bit float/component (uint16_t in lack of buildin type).
DELARE_SURFACE(Surface_R16F)
DELARE_SURFACE(Surface_RG16F)
DELARE_SURFACE(Surface_RGBA16F)
// 32 bit float/component
DELARE_SURFACE(Surface_R32F)
DELARE_SURFACE(Surface_RG32F)
DELARE_SURFACE(Surface_RGBA32F)
2018-03-26 20:36:13 +02:00
///////////////////////////////////////////////////////////
// Load data from input surface structures.
//
2018-04-02 23:12:34 +02:00
inline void LoadUncompressed4x4Block_R8( float Block[16], uniform Surface_R8* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex )
2018-03-26 20:36:13 +02:00
{
2018-04-02 23:12:34 +02:00
const uniform int BlockHeight = 4;
const uniform int BlockWidth = 4;
2018-03-26 20:36:13 +02:00
2018-04-02 23:12:34 +02:00
for ( uniform int Row=0; Row<BlockWidth; Row++ )
{
uniform uint32* uniform SurfaceRowStartPtr = (uint32*)&InputSurface->Ptr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride];
uint32 RRRR = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex ); // *(BlockWidth/sizeof(uint32)) breaks things for reasons I can't figure out.
Block[Row*BlockWidth+0] = (int)((RRRR>> 0)&0xFF);
Block[Row*BlockWidth+1] = (int)((RRRR>> 8)&0xFF);
Block[Row*BlockWidth+2] = (int)((RRRR>>16)&0xFF);
Block[Row*BlockWidth+3] = (int)((RRRR>>24)&0xFF);
}
}
inline void LoadUncompressed4x4BlockInterleaved_RG8( float InterleavedBlock[32], uniform Surface_RG8* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex )
{
const uniform int BlockHeight = 4;
const uniform int BlockWidth = 4;
for ( uniform int Row=0; Row<BlockWidth; Row++ )
{
uniform uint32* uniform SurfaceRowStartPtr = (uint32*)&InputSurface->Ptr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride];
uint32 RG0RG1 = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*2+0 );
uint32 RG2RG3 = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*2+1 );
InterleavedBlock[16*0+Row*BlockWidth+0] = (int)((RG0RG1>> 0)&0xFF);
InterleavedBlock[16*1+Row*BlockWidth+0] = (int)((RG0RG1>> 8)&0xFF);
InterleavedBlock[16*0+Row*BlockWidth+1] = (int)((RG0RG1>>16)&0xFF);
InterleavedBlock[16*1+Row*BlockWidth+1] = (int)((RG0RG1>>24)&0xFF);
InterleavedBlock[16*0+Row*BlockWidth+2] = (int)((RG2RG3>> 0)&0xFF);
InterleavedBlock[16*1+Row*BlockWidth+2] = (int)((RG2RG3>> 8)&0xFF);
InterleavedBlock[16*0+Row*BlockWidth+3] = (int)((RG2RG3>>16)&0xFF);
InterleavedBlock[16*1+Row*BlockWidth+3] = (int)((RG2RG3>>24)&0xFF);
}
}
inline void LoadUncompressed4x4BlockInterleaved_RGBA8( float InterleavedBlock[64], uniform Surface_RGBA8* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex )
{
const uniform int BlockHeight = 4;
const uniform int BlockWidth = 4;
for ( uniform int Row=0; Row<BlockHeight; Row++ )
{
for ( uniform int Col=0; Col<BlockWidth; Col++ )
{
uniform uint32* uniform SurfaceRowStartPtr = (uint32*)&InputSurface->Ptr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride];
uint32 RGBA = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*BlockWidth+Col );
InterleavedBlock[16*0+Row*BlockWidth+Col] = (int)((RGBA>> 0)&0xFF);
InterleavedBlock[16*1+Row*BlockWidth+Col] = (int)((RGBA>> 8)&0xFF);
InterleavedBlock[16*2+Row*BlockWidth+Col] = (int)((RGBA>>16)&0xFF);
InterleavedBlock[16*3+Row*BlockWidth+Col] = (int)((RGBA>>24)&0xFF);
2018-03-26 20:36:13 +02:00
}
}
}
2018-04-02 23:12:34 +02:00
inline void LoadUncompressed4x4BlockInterleavedNoAlpha_RGBA8( float InterleavedBlock[48], uniform Surface_RGBA8* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex )
2018-03-26 20:36:13 +02:00
{
2018-04-02 23:12:34 +02:00
const uniform int BlockHeight = 4;
const uniform int BlockWidth = 4;
2018-03-26 20:36:13 +02:00
2018-04-02 23:12:34 +02:00
for ( uniform int Row=0; Row<BlockHeight; Row++ )
{
for ( uniform int Col=0; Col<BlockWidth; Col++ )
{
uniform uint32* uniform SurfaceRowStartPtr = (uint32*)&InputSurface->Ptr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride];
uint32 RGBX = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*BlockWidth+Col );
InterleavedBlock[16*0+Row*BlockWidth+Col] = (int)((RGBX>> 0)&0xFF);
InterleavedBlock[16*1+Row*BlockWidth+Col] = (int)((RGBX>> 8)&0xFF);
InterleavedBlock[16*2+Row*BlockWidth+Col] = (int)((RGBX>>16)&0xFF);
2018-03-26 20:36:13 +02:00
}
}
}
2018-04-02 23:12:34 +02:00
inline void LoadUncompressedVariableBlockInterleaved_RGBA8( float pixels[], uniform rgba_surface InputSurface[], int HorizontalBlockIndex, int VerticalBlockIndex, uniform int BlockWidth, uniform int BlockHeight )
2018-03-26 20:36:13 +02:00
{
2018-04-02 23:12:34 +02:00
uniform int pitch = BlockWidth * BlockHeight;
for (uniform int y = 0; y < BlockHeight; y++)
2018-03-26 20:36:13 +02:00
{
2018-04-02 23:12:34 +02:00
for (uniform int x = 0; x < BlockWidth; x++)
2018-03-26 20:36:13 +02:00
{
2018-04-02 23:12:34 +02:00
uint32 rgba = gather_uint((uint32*)InputSurface->ptr, ((VerticalBlockIndex * BlockHeight + y)*InputSurface->stride + (HorizontalBlockIndex * BlockWidth + x) * 4)/4);
2018-03-26 20:36:13 +02:00
set_pixel(pixels, 0, x, y, (int)((rgba >> 0) & 255));
set_pixel(pixels, 1, x, y, (int)((rgba >> 8) & 255));
set_pixel(pixels, 2, x, y, (int)((rgba >> 16) & 255));
set_pixel(pixels, 3, x, y, (int)((rgba >> 24) & 255));
}
}
}
2018-04-02 23:12:34 +02:00
inline void LoadUncompressed4x4Block_R16( float Block[16], uniform Surface_R16* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex )
{
const uniform int BlockHeight = 4;
const uniform int BlockWidth = 4;
for ( uniform int Row=0; Row<BlockWidth; Row++ )
{
uniform uint32* uniform SurfaceRowStartPtr = (uint32*)&InputSurface->Ptr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride];
uint32 R0R1 = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*2+0 );
uint32 R2R3 = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*2+1 );
Block[Row*BlockWidth+0] = ((int)((R0R1>> 0)&0xFFFFu))*(255f/65535f);
Block[Row*BlockWidth+1] = ((int)((R0R1>>16)&0xFFFFu))*(255f/65535f);
Block[Row*BlockWidth+2] = ((int)((R2R3>> 0)&0xFFFFu))*(255f/65535f);
Block[Row*BlockWidth+3] = ((int)((R2R3>>16)&0xFFFFu))*(255f/65535f);
}
}
inline void LoadUncompressed4x4BlockInterleaved_RG16( float InterleavedBlock[32], uniform Surface_RG16* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex )
{
const uniform int BlockHeight = 4;
const uniform int BlockWidth = 4;
for ( uniform int Row=0; Row<BlockHeight; Row++ )
{
for ( uniform int Col=0; Col<BlockWidth; Col++ )
{
uniform uint32* uniform SurfaceRowStartPtr = (uint32*)&InputSurface->Ptr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride];
uint32 RG = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*BlockWidth+Col );
InterleavedBlock[16*0+Row*BlockWidth+Col] = ((int)((RG>> 0)&0xFFFFu))*(255f/65535f);
InterleavedBlock[16*1+Row*BlockWidth+Col] = ((int)((RG>>16)&0xFFFFu))*(255f/65535f);
}
}
}
inline void LoadUncompressed4x4BlockInterleaved_RGBA16( float InterleavedBlock[64], uniform Surface_RGBA16* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex )
{
const uniform int BlockHeight = 4;
const uniform int BlockWidth = 4;
for ( uniform int Row=0; Row<BlockHeight; Row++ )
{
for ( uniform int Col=0; Col<BlockWidth; Col++ )
{
uniform uint32* uniform SurfaceRowStartPtr = (uint32*)&InputSurface->Ptr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride];
uint32 RG = gather_uint( SurfaceRowStartPtr, (HorizontalBlockIndex*BlockWidth+Col)*2+0 );
uint32 BA = gather_uint( SurfaceRowStartPtr, (HorizontalBlockIndex*BlockWidth+Col)*2+1 );
InterleavedBlock[16*0+Row*BlockWidth+Col] = ((int)((RG>> 0)&0xFFFFu))*(255f/65535f);
InterleavedBlock[16*1+Row*BlockWidth+Col] = ((int)((RG>>16)&0xFFFFu))*(255f/65535f);
InterleavedBlock[16*2+Row*BlockWidth+Col] = ((int)((BA>> 0)&0xFFFFu))*(255f/65535f);
InterleavedBlock[16*3+Row*BlockWidth+Col] = ((int)((BA>>16)&0xFFFFu))*(255f/65535f);
}
}
}
inline void LoadUncompressed4x4BlockInterleavedNoAlpha_RGBA16( float InterleavedBlock[48], uniform Surface_RGBA16* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex )
{
const uniform int BlockHeight = 4;
const uniform int BlockWidth = 4;
for ( uniform int Row=0; Row<BlockHeight; Row++ )
{
for ( uniform int Col=0; Col<BlockWidth; Col++ )
{
uniform uint32* uniform SurfaceRowStartPtr = (uint32*)&InputSurface->Ptr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride];
uint32 RG = gather_uint( SurfaceRowStartPtr, (HorizontalBlockIndex*BlockWidth+Col)*2+0 );
uint32 BX = gather_uint( SurfaceRowStartPtr, (HorizontalBlockIndex*BlockWidth+Col)*2+1 );
InterleavedBlock[16*0+Row*BlockWidth+Col] = ((int)((RG>> 0)&0xFFFFu))*(255f/65535f);
InterleavedBlock[16*1+Row*BlockWidth+Col] = ((int)((RG>>16)&0xFFFFu))*(255f/65535f);
InterleavedBlock[16*2+Row*BlockWidth+Col] = ((int)((BX>> 0)&0xFFFFu))*(255f/65535f);
}
}
}
inline void load_4x4_block_interleaved_16bit( float InterleavedBlock[48], uniform rgba_surface* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex )
{
for (uniform int y = 0; y<4; y++)
{
for (uniform int x = 0; x<4; x++)
{
uniform uint32* uniform src_ptr_r = (uint32*)&InputSurface->ptr[(VerticalBlockIndex * 4 + y)*InputSurface->stride + 0];
uniform uint32* uniform src_ptr_g = (uint32*)&InputSurface->ptr[(VerticalBlockIndex * 4 + y)*InputSurface->stride + 2];
uniform uint32* uniform src_ptr_b = (uint32*)&InputSurface->ptr[(VerticalBlockIndex * 4 + y)*InputSurface->stride + 4];
uint32 xr = gather_uint(src_ptr_r, (HorizontalBlockIndex * 4 + x) * 2);
uint32 xg = gather_uint(src_ptr_g, (HorizontalBlockIndex * 4 + x) * 2); // 2 byte aligned load?
uint32 xb = gather_uint(src_ptr_b, (HorizontalBlockIndex * 4 + x) * 2);
InterleavedBlock[16 * 0 + y * 4 + x] = (int)(xr & 0xFFFF);
InterleavedBlock[16 * 1 + y * 4 + x] = (int)(xg & 0xFFFF);
InterleavedBlock[16 * 2 + y * 4 + x] = (int)(xb & 0xFFFF);
InterleavedBlock[16 * 3 + y * 4 + x] = 0;
}
}
}
2018-03-26 20:36:13 +02:00
///////////////////////////////////////////////////////////
// Store data.
//
2018-04-02 23:12:34 +02:00
inline void StoreCompressedBlock( uniform uint8 OutputBase[], int TextureWidth, int HorizontalBlockIndex, uniform int VerticalBlockIndex, uint32 Data[], int NumDataDwords )
2018-03-26 20:36:13 +02:00
{
2018-04-02 23:12:34 +02:00
for ( uniform int k=0; k<NumDataDwords; k++ )
2018-03-26 20:36:13 +02:00
{
2018-04-02 23:12:34 +02:00
uniform uint32* OutputRow = (uint32*)&OutputBase[(VerticalBlockIndex)*TextureWidth*NumDataDwords];
scatter_uint( OutputRow, HorizontalBlockIndex*NumDataDwords+k, Data[k]);
2018-03-26 20:36:13 +02:00
}
}