Optimize NVTT texture compression using AVX2

- Load the proper delay-loaded dll depending on instruction support of the current platform.
  - Replaced reciprocal approximation with proper division.
    - This improves performance as the division is faster than a reciprocal with refinement on recent CPUs.
    - This improves quality as the division provides higher precision.
    - It fixes a nasty non-determinism in the DDC because reciprocal yield different results on different CPUs.
  - Configured so that AVX2 and SSE2 results are exactly the same (see config.h).
  - More details can be found in config.h

  - 80.773s -> 47.547s for single-threaded 8k texture compression
  - 18m34s -> 14m50s for recompressing all textures of Reverb P_World during level loading.

#rb Francis.Hurteau, Geoff.Evans

[CL 14362917 by danny couture in ue5-main branch]
This commit is contained in:
danny couture
2020-09-22 05:48:20 -04:00
parent 2ed9d18f8f
commit 90ed5f144f
24 changed files with 953 additions and 18 deletions

View File

@@ -2032,7 +2032,9 @@ static bool CompressMipChain(
int32 MipCount = MipChain.Num();
check(MipCount >= (int32)CompressorCaps.NumMipsInTail);
const bool bImageHasAlphaChannel = DetectAlphaChannel(MipChain[0]);
const int32 MinAsyncCompressionSize = 128;
// This number was too small (128) for current hardware and caused too many
// context switch for work taking < 1ms. Bump the value for 2020 CPUs.
const int32 MinAsyncCompressionSize = 512;
const bool bAllowParallelBuild = TextureFormat->AllowParallelBuild();
bool bCompressionSucceeded = true;
int32 FirstMipTailIndex = MipCount;
@@ -2225,7 +2227,14 @@ public:
{
#if PLATFORM_WINDOWS
#if PLATFORM_64BITS
nvTextureToolsHandle = FPlatformProcess::GetDllHandle(*(FPaths::EngineDir() / TEXT("Binaries/ThirdParty/nvTextureTools/Win64/nvtt_64.dll")));
if (FWindowsPlatformMisc::HasAVX2InstructionSupport())
{
nvTextureToolsHandle = FPlatformProcess::GetDllHandle(*(FPaths::EngineDir() / TEXT("Binaries/ThirdParty/nvTextureTools/Win64/AVX2/nvtt_64.dll")));
}
else
{
nvTextureToolsHandle = FPlatformProcess::GetDllHandle(*(FPaths::EngineDir() / TEXT("Binaries/ThirdParty/nvTextureTools/Win64/nvtt_64.dll")));
}
#else //32-bit platform
nvTextureToolsHandle = FPlatformProcess::GetDllHandle(*(FPaths::EngineDir() / TEXT("Binaries/ThirdParty/nvTextureTools/Win32/nvtt_.dll")));
#endif