// // Shared and general purpose code for the encoding implementations. // // // Copyright (c) 2016 Intel Corporation // // Permission is hereby granted, free of charge, to any person obtaining a copy of this // software and associated documentation files (the "Software"), to deal in the Software // without restriction, including without limitation the rights to use, copy, modify, // merge, publish, distribute, sublicense, and/or sell copies of the Software, and to // permit persons to whom the Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be included in all copies // or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, // INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A // PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF // CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE // OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // /////////////////////////////////////////////////////////// // Typedefs. // #if !ISPC_UINT_IS_DEFINED typedef unsigned int8 uint8; typedef unsigned int16 uint16; typedef unsigned int32 uint32; typedef unsigned int64 uint64; #endif /////////////////////////////////////////////////////////// // The following helpers isolate performance warnings // // int32 inline int32 gather_int(const uniform int32* const uniform ptr, int idx) { return ptr[idx]; // (perf warning expected) } inline void scatter_int(varying int32* uniform ptr, int idx, uint32 value) { ptr[idx] = value; // (perf warning expected) } // uint8 inline unsigned int8 gather_uint8(const uniform unsigned int8* const uniform ptr, int idx) { return ptr[idx]; // (perf warning expected) } inline unsigned int8 gather_uint8(const varying unsigned int8* const uniform ptr, int idx) { return ptr[idx]; // (perf warning expected) } // uint16 inline unsigned int16 gather_uint16(const uniform unsigned int16* const uniform ptr, int idx) { return ptr[idx]; // (perf warning expected) } inline unsigned int16 gather_uint16(const varying unsigned int16* const uniform ptr, int idx) { return ptr[idx]; // (perf warning expected) } // uint32 inline unsigned int32 gather_uint(const uniform unsigned int32* const uniform ptr, int idx) { return ptr[idx]; // (perf warning expected) } inline unsigned int32 gather_uint(const varying unsigned int32* const uniform ptr, int idx) { return ptr[idx]; // (perf warning expected) } inline void scatter_uint(uniform unsigned int32* ptr, int idx, uint32 value) { ptr[idx] = value; // (perf warning expected) } inline uint32 shift_right(uint32 v, const uniform int bits) { return v>>bits; // (perf warning expected) } // float inline float gather_float(uniform float* uniform ptr, int idx) { return ptr[idx]; // (perf warning expected) } inline float gather_float(varying float* uniform ptr, int idx) { return ptr[idx]; // (perf warning expected) } inline void scatter_float(uniform float* uniform ptr, int idx, float value) { ptr[idx] = value; // (perf warning expected) } inline void scatter_float(varying float* uniform ptr, int idx, float value) { ptr[idx] = value; // (perf warning expected) } /////////////////////////////////////////////////////////// // Swapping helpers inline void swap(float& a, float& b) { int t = a; a = b; b = t; } inline void swap(int& a, int& b) { int t = a; a = b; b = t; } inline void swap(uint32& a, uint32& b) { uint32 t = a; a = b; b = t; } inline void swap(uint8& a, uint8& b) { uint8 t = a; a = b; b = t; } inline void swap_ints(int u[], int v[], uniform int n) { for (uniform int i=0; i> 24) & 255) << 0; r += ((v >> 16) & 255) << 8; r += ((v >> 8) & 255) << 16; r += ((v >> 0) & 255) << 24; return r; } /////////////////////////////////////////////////////////// // Scalar math helpers inline float sq(float v) { return v*v; } inline int pow2(int x) { return 1<32) data[*pos/32+1] |= shift_right(v, 32-*pos%32); *pos += bits; } /////////////////////////////////////////////////////////// // Helper for pixel access in variable sized block. // inline float get_pixel(float pixels[], uniform int p, uniform int x, uniform int y) { uniform static const int ystride = 8; uniform static const int pstride = 64; return pixels[pstride * p + ystride * y + x]; } inline void set_pixel(float pixels[], uniform int p, uniform int x, uniform int y, float value) { uniform static const int ystride = 8; uniform static const int pstride = 64; pixels[pstride * p + ystride * y + x] = value; } /////////////////////////////////////////////////////////// // Generic helpers /////////////////////////////////////////////////////////// // Axis computation. // inline void compute_axis3(float axis[3], float covar[6], uniform const int powerIterations) { float vec[3] = {1,1,1}; for (uniform int i=0; i list[i]) { best_value = list[i]; best_idx = i; } } // swap scatter_int(list, best_idx, list[k]); list[k] = best_value; } } /////////////////////////////////////////////////////////// // Input surface structures. // struct rgba_surface { uint8* ptr; int width, height, stride; }; struct rg_surface { uint8* ptr; int width, height, stride; }; struct red_surface { uint8* ptr; int width, height, stride; }; // Surface definitions. #define DELARE_SURFACE(name) \ struct name \ { \ uint8* Ptr; \ int32 Width; \ int32 Height; \ int32 Stride; \ }; // 8 bit unsigned int/component DELARE_SURFACE(Surface_R8) DELARE_SURFACE(Surface_RG8) DELARE_SURFACE(Surface_RGBA8) // 8 bit signed int/component DELARE_SURFACE(Surface_R8S) DELARE_SURFACE(Surface_RG8S) // 16 bit unsigned int/component DELARE_SURFACE(Surface_R16) DELARE_SURFACE(Surface_RG16) DELARE_SURFACE(Surface_RGBA16) // 16 bit signed int/component DELARE_SURFACE(Surface_R16S) DELARE_SURFACE(Surface_RG16S) // 16 bit float/component (uint16_t in lack of buildin type). DELARE_SURFACE(Surface_R16F) DELARE_SURFACE(Surface_RG16F) DELARE_SURFACE(Surface_RGBA16F) // 32 bit float/component DELARE_SURFACE(Surface_R32F) DELARE_SURFACE(Surface_RG32F) DELARE_SURFACE(Surface_RGBA32F) /////////////////////////////////////////////////////////// // Load data from input surface structures. // inline void LoadUncompressed4x4Block_R8( float Block[16], uniform Surface_R8* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex ) { const uniform int BlockHeight = 4; const uniform int BlockWidth = 4; for ( uniform int Row=0; RowPtr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride]; uint32 RRRR = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex ); // *(BlockWidth/sizeof(uint32)) breaks things for reasons I can't figure out. Block[Row*BlockWidth+0] = (int)((RRRR>> 0)&0xFF); Block[Row*BlockWidth+1] = (int)((RRRR>> 8)&0xFF); Block[Row*BlockWidth+2] = (int)((RRRR>>16)&0xFF); Block[Row*BlockWidth+3] = (int)((RRRR>>24)&0xFF); } } inline void LoadUncompressed4x4BlockInterleaved_RG8( float InterleavedBlock[32], uniform Surface_RG8* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex ) { const uniform int BlockHeight = 4; const uniform int BlockWidth = 4; for ( uniform int Row=0; RowPtr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride]; uint32 RG0RG1 = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*2+0 ); uint32 RG2RG3 = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*2+1 ); InterleavedBlock[16*0+Row*BlockWidth+0] = (int)((RG0RG1>> 0)&0xFF); InterleavedBlock[16*1+Row*BlockWidth+0] = (int)((RG0RG1>> 8)&0xFF); InterleavedBlock[16*0+Row*BlockWidth+1] = (int)((RG0RG1>>16)&0xFF); InterleavedBlock[16*1+Row*BlockWidth+1] = (int)((RG0RG1>>24)&0xFF); InterleavedBlock[16*0+Row*BlockWidth+2] = (int)((RG2RG3>> 0)&0xFF); InterleavedBlock[16*1+Row*BlockWidth+2] = (int)((RG2RG3>> 8)&0xFF); InterleavedBlock[16*0+Row*BlockWidth+3] = (int)((RG2RG3>>16)&0xFF); InterleavedBlock[16*1+Row*BlockWidth+3] = (int)((RG2RG3>>24)&0xFF); } } inline void LoadUncompressed4x4BlockInterleaved_RGBA8( float InterleavedBlock[64], uniform Surface_RGBA8* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex ) { const uniform int BlockHeight = 4; const uniform int BlockWidth = 4; for ( uniform int Row=0; RowPtr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride]; uint32 RGBA = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*BlockWidth+Col ); InterleavedBlock[16*0+Row*BlockWidth+Col] = (int)((RGBA>> 0)&0xFF); InterleavedBlock[16*1+Row*BlockWidth+Col] = (int)((RGBA>> 8)&0xFF); InterleavedBlock[16*2+Row*BlockWidth+Col] = (int)((RGBA>>16)&0xFF); InterleavedBlock[16*3+Row*BlockWidth+Col] = (int)((RGBA>>24)&0xFF); } } } inline void LoadUncompressed4x4BlockInterleavedNoAlpha_RGBA8( float InterleavedBlock[48], uniform Surface_RGBA8* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex ) { const uniform int BlockHeight = 4; const uniform int BlockWidth = 4; for ( uniform int Row=0; RowPtr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride]; uint32 RGBX = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*BlockWidth+Col ); InterleavedBlock[16*0+Row*BlockWidth+Col] = (int)((RGBX>> 0)&0xFF); InterleavedBlock[16*1+Row*BlockWidth+Col] = (int)((RGBX>> 8)&0xFF); InterleavedBlock[16*2+Row*BlockWidth+Col] = (int)((RGBX>>16)&0xFF); } } } inline void LoadUncompressedVariableBlockInterleaved_RGBA8( float pixels[], uniform rgba_surface InputSurface[], int HorizontalBlockIndex, int VerticalBlockIndex, uniform int BlockWidth, uniform int BlockHeight ) { uniform int pitch = BlockWidth * BlockHeight; for (uniform int y = 0; y < BlockHeight; y++) { for (uniform int x = 0; x < BlockWidth; x++) { uint32 rgba = gather_uint((uint32*)InputSurface->ptr, ((VerticalBlockIndex * BlockHeight + y)*InputSurface->stride + (HorizontalBlockIndex * BlockWidth + x) * 4)/4); set_pixel(pixels, 0, x, y, (int)((rgba >> 0) & 255)); set_pixel(pixels, 1, x, y, (int)((rgba >> 8) & 255)); set_pixel(pixels, 2, x, y, (int)((rgba >> 16) & 255)); set_pixel(pixels, 3, x, y, (int)((rgba >> 24) & 255)); } } } inline void LoadUncompressed4x4Block_R16( float Block[16], uniform Surface_R16* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex ) { const uniform int BlockHeight = 4; const uniform int BlockWidth = 4; for ( uniform int Row=0; RowPtr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride]; uint32 R0R1 = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*2+0 ); uint32 R2R3 = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*2+1 ); Block[Row*BlockWidth+0] = ((int)((R0R1>> 0)&0xFFFFu))*(255f/65535f); Block[Row*BlockWidth+1] = ((int)((R0R1>>16)&0xFFFFu))*(255f/65535f); Block[Row*BlockWidth+2] = ((int)((R2R3>> 0)&0xFFFFu))*(255f/65535f); Block[Row*BlockWidth+3] = ((int)((R2R3>>16)&0xFFFFu))*(255f/65535f); } } inline void LoadUncompressed4x4BlockInterleaved_RG16( float InterleavedBlock[32], uniform Surface_RG16* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex ) { const uniform int BlockHeight = 4; const uniform int BlockWidth = 4; for ( uniform int Row=0; RowPtr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride]; uint32 RG = gather_uint( SurfaceRowStartPtr, HorizontalBlockIndex*BlockWidth+Col ); InterleavedBlock[16*0+Row*BlockWidth+Col] = ((int)((RG>> 0)&0xFFFFu))*(255f/65535f); InterleavedBlock[16*1+Row*BlockWidth+Col] = ((int)((RG>>16)&0xFFFFu))*(255f/65535f); } } } inline void LoadUncompressed4x4BlockInterleaved_RGBA16( float InterleavedBlock[64], uniform Surface_RGBA16* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex ) { const uniform int BlockHeight = 4; const uniform int BlockWidth = 4; for ( uniform int Row=0; RowPtr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride]; uint32 RG = gather_uint( SurfaceRowStartPtr, (HorizontalBlockIndex*BlockWidth+Col)*2+0 ); uint32 BA = gather_uint( SurfaceRowStartPtr, (HorizontalBlockIndex*BlockWidth+Col)*2+1 ); InterleavedBlock[16*0+Row*BlockWidth+Col] = ((int)((RG>> 0)&0xFFFFu))*(255f/65535f); InterleavedBlock[16*1+Row*BlockWidth+Col] = ((int)((RG>>16)&0xFFFFu))*(255f/65535f); InterleavedBlock[16*2+Row*BlockWidth+Col] = ((int)((BA>> 0)&0xFFFFu))*(255f/65535f); InterleavedBlock[16*3+Row*BlockWidth+Col] = ((int)((BA>>16)&0xFFFFu))*(255f/65535f); } } } inline void LoadUncompressed4x4BlockInterleavedNoAlpha_RGBA16( float InterleavedBlock[48], uniform Surface_RGBA16* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex ) { const uniform int BlockHeight = 4; const uniform int BlockWidth = 4; for ( uniform int Row=0; RowPtr[(VerticalBlockIndex*BlockHeight+Row)*InputSurface->Stride]; uint32 RG = gather_uint( SurfaceRowStartPtr, (HorizontalBlockIndex*BlockWidth+Col)*2+0 ); uint32 BX = gather_uint( SurfaceRowStartPtr, (HorizontalBlockIndex*BlockWidth+Col)*2+1 ); InterleavedBlock[16*0+Row*BlockWidth+Col] = ((int)((RG>> 0)&0xFFFFu))*(255f/65535f); InterleavedBlock[16*1+Row*BlockWidth+Col] = ((int)((RG>>16)&0xFFFFu))*(255f/65535f); InterleavedBlock[16*2+Row*BlockWidth+Col] = ((int)((BX>> 0)&0xFFFFu))*(255f/65535f); } } } inline void load_4x4_block_interleaved_16bit( float InterleavedBlock[48], uniform rgba_surface* uniform InputSurface, int HorizontalBlockIndex, uniform int VerticalBlockIndex ) { for (uniform int y = 0; y<4; y++) { for (uniform int x = 0; x<4; x++) { uniform uint32* uniform src_ptr_r = (uint32*)&InputSurface->ptr[(VerticalBlockIndex * 4 + y)*InputSurface->stride + 0]; uniform uint32* uniform src_ptr_g = (uint32*)&InputSurface->ptr[(VerticalBlockIndex * 4 + y)*InputSurface->stride + 2]; uniform uint32* uniform src_ptr_b = (uint32*)&InputSurface->ptr[(VerticalBlockIndex * 4 + y)*InputSurface->stride + 4]; uint32 xr = gather_uint(src_ptr_r, (HorizontalBlockIndex * 4 + x) * 2); uint32 xg = gather_uint(src_ptr_g, (HorizontalBlockIndex * 4 + x) * 2); // 2 byte aligned load? uint32 xb = gather_uint(src_ptr_b, (HorizontalBlockIndex * 4 + x) * 2); InterleavedBlock[16 * 0 + y * 4 + x] = (int)(xr & 0xFFFF); InterleavedBlock[16 * 1 + y * 4 + x] = (int)(xg & 0xFFFF); InterleavedBlock[16 * 2 + y * 4 + x] = (int)(xb & 0xFFFF); InterleavedBlock[16 * 3 + y * 4 + x] = 0; } } } /////////////////////////////////////////////////////////// // Store data. // inline void StoreCompressedBlock( uniform uint8 OutputBase[], int TextureWidth, int HorizontalBlockIndex, uniform int VerticalBlockIndex, uint32 Data[], int NumDataDwords ) { for ( uniform int k=0; k