You've already forked UnrealEngineUWP
mirror of
https://github.com/izzy2lost/UnrealEngineUWP.git
synced 2026-03-26 18:15:20 -07:00
#lockdown Nick.Penwarden #rb None ========================== MAJOR FEATURES + CHANGES ========================== Change 3358140 on 2017/03/22 by Rolando.Caloca DR - Fix copy to cube face - Compile fix when using dump layer - Add new error enum Change 3358301 on 2017/03/22 by Mitchell.Wilson Initial check in of LODs in InfiltratorForward. First pass on optimization in level. Adding a visibility track for SceneCapture2D in tunnel section. Change 3358477 on 2017/03/22 by Mitchell.Wilson Updating Skeletal Mesh DPW_Robot_Export to resolve screen size being too low for LOD1. Cleaned up LOD1 which was showing some visible popping when transitioning. Change 3358529 on 2017/03/22 by Mark.Satterthwaite Globally disable clang's "constant-logical-operand" warning when running under Distcc - it is much easier and less invasive than constantly fixing the code. Change 3358745 on 2017/03/22 by Mark.Satterthwaite Disable another warning (parentheses-equality) under Distcc because again the separation of preprocessing from compilation means it turns up where it isn't expected. Change 3358837 on 2017/03/22 by Joe.Graf Merge of pull request #3214 for the RenderDocPlugin #CodeReview: matt.kuhlenschmidt, marcus.wassmer #rb: marcus.wassmer Change 3359112 on 2017/03/22 by Ben.Salem Update perf monitor to include frame time by default. Also, use only game/PIE world timers when in editor, instead of all worlds combined. #tests Ran several Showdown test runs with plugin! Change 3359363 on 2017/03/22 by Joe.Graf First pass at non-unity & no pch compilation Change 3359449 on 2017/03/22 by Joe.Graf Added missing null check when exporting a EXR on Linux (UE-40268) #CodeReview: dmitry.rekman #rb: n/a Change 3360349 on 2017/03/23 by Guillaume.Abadie Fixes TAA's AA_FORCE_ALPHA_CLAMP causing DOF layouts. #jira UE-42920 Change 3360405 on 2017/03/23 by Marcus.Wassmer Better method for detecting Kepler Change 3360718 on 2017/03/23 by Daniel.Wright Planar reflections handle views smaller than the render target in a general way * Fixes planar reflections with adaptive pixel density (ViewFamily size larger than actual views combined) * Planar reflections are now supported in splitscreen Change 3360758 on 2017/03/23 by Daniel.Wright [Copy] Added new light property bCastVolumetricShadow, which defaults to true for directional and sky lights, but false for point / spot lights as supporting volumetric fog shadowing has significant GPU overhead Change 3360762 on 2017/03/23 by Daniel.Wright [Copy] Texture flags are now properly routed to RHICreateTexture3D from the render target pool Change 3360768 on 2017/03/23 by Daniel.Wright [Copy] Disabled GPUProfiler histogram by default, controlled by r.ProfileGPU.ShowEventHistogram Change 3360770 on 2017/03/23 by Daniel.Wright [Copy] Disabled fast clears on CustomDepth, saves .2ms on xbox Change 3360771 on 2017/03/23 by Daniel.Wright [Copy] Particle lights no longer force tiled deferred lighting. Tiled deferred lighting is only used if enough unshadowed lights + particle lights are on screen. Saves 1.5ms Xbox with one particle light. Change 3360774 on 2017/03/23 by Daniel.Wright [Copy] Distance field cvar comments Change 3360782 on 2017/03/23 by Daniel.Wright [Copy] Disabled selection color on Volume materials Change 3360795 on 2017/03/23 by Daniel.Wright [Copy] Volume materials now specify Albedo and Extinction, which is more intuitive than Scattering and Absorption. Albedo is [0-1] reflectance, while Extinction is a world space density. Change 3360799 on 2017/03/23 by Daniel.Wright [Copy] Cinematic scalability levels get 2x volumetric fog resolution in x and y Change 3360806 on 2017/03/23 by Daniel.Wright [Copy] Fixed volumetric fog being offset when viewport min is not 0 Change 3360809 on 2017/03/23 by Daniel.Wright [Copy] Volumetric fog now adds a bias to the inverse squared light falloff denominator, prevents extreme aliasing from the hotspot. Can be controlled with r.VolumetricFog.InverseSquaredLightDistanceBiasScale. Change 3361651 on 2017/03/23 by Brian.Karis Higher quality sharp SSR at quality 4 Change 3361678 on 2017/03/23 by Brian.Karis Fresnel darkens diffuse for clearcoat. Change 3361683 on 2017/03/23 by Brian.Karis Fixed SSR artifact Change 3361691 on 2017/03/23 by Brian.Karis Chagned min roughness limit Change 3361707 on 2017/03/23 by Brian.Karis Added inverse film tone map Change 3361726 on 2017/03/23 by Brian.Karis Better precision inverse Change 3361758 on 2017/03/23 by Brian.Karis Material flag normal curvature to roughness is no longer forward only. Change 3361765 on 2017/03/23 by Brian.Karis Update ACES Change 3361774 on 2017/03/23 by Brian.Karis Cleaned up alpha support and disabled screen edge clipping. Change 3362478 on 2017/03/24 by Guillaume.Abadie Cherry pick 3316084's PostProcessing.cpp: Fixes a bug in Circle DOF where the apply pass was no longer using the downres DOF's TAA output. #author Brian.Karis #jira UE-42920 Change 3362738 on 2017/03/24 by Rolando.Caloca DR - Hide scene capture on IF Change 3362890 on 2017/03/24 by Guillaume.Abadie Renames r.SceneAlpha to r.PostProcessing.PropagateAlpha Change 3363665 on 2017/03/24 by Mark.Satterthwaite PR #3414: Add command line option "-noheartbeatthread" to disable heart beat thread (Contributed by JeffRous) Change 3363866 on 2017/03/24 by Arne.Schober DR - Updated NVAPI #RB Marcus.Wassmer Change 3364300 on 2017/03/24 by Brian.Karis SSR use dynamic velocity Change 3364372 on 2017/03/24 by Brian.Karis Fix changing off axis projection velocities. Change 3364373 on 2017/03/24 by Brian.Karis Enabled velocity drawing in scene captures Change 3365531 on 2017/03/27 by Guillaume.Abadie Computes the material's screen position material expression directly from the pixel shader SvPosition Change 3365764 on 2017/03/27 by Chris.Bunner Lowering severity of crash for missing values in scalability.ini. #jira UE-41331 Change 3365916 on 2017/03/27 by Guillaume.Abadie Exposes the viewport offset within the view property material expression Change 3365979 on 2017/03/27 by Brian.Karis Fixed skylight intensity from double applying Change 3365987 on 2017/03/27 by Brian.Karis Stopped post process indirect lighting intensity from scaling skylight reflections Change 3365991 on 2017/03/27 by Brian.Karis Fix for static analysis Change 3366028 on 2017/03/27 by Daniel.Wright Volumetric fog supports static shadowing from Stationary lights * Using bilinear on static shadowmap depths + 1 PCF to smooth out results Change 3366029 on 2017/03/27 by Daniel.Wright Static shadow depth maps for Stationary point and spot lights are 2x higher res by default (4x more texels), which is more appropriate for volumetric fog Change 3366055 on 2017/03/27 by Guillaume.Abadie Cherry picks 3251469: Implements scene capture component's CaptureSortPriority to control GPU execution order in order to manage inter dependencies. Change 3366447 on 2017/03/27 by Simon.Tourangeau Fix IES light profile importer. - Bug in the LM-63-1986 format importer. Change3366836on 2017/03/27 by Brian.Karis ClearUAV now supports int types Change 3367435 on 2017/03/28 by Benjamin.Hyder Submitting Decal Automation map for initial approval Change 3367572 on 2017/03/28 by Chris.Bunner Changed ClampedPow {max(abs(x),0.00001)} to PositiveClampedPow {max(x,0)} to give more expected results to Power node in material graphs. #jira UE-42989 Change 3367756 on 2017/03/28 by Olaf.Piesche Niagara material usage flags Change 3367835 on 2017/03/28 by Marcus.Wassmer Fix crash when TileRenderer runs before anything else. Make explicit behavior when rendering at a time when there is no valid scene. Change 3367837 on 2017/03/28 by Marcus.Wassmer Missed a file. Change 3367838 on 2017/03/28 by Richard.Wallis Updated items from original shelved version by Mark Satt: - Added MetalBackend.cpp to change main function string to have an initial crc + code length zero's **Description below taken from Mark Satt's original verison of this in CL3343280** Updated for Dev-Rendering's PSOs & integrates Richard's work on RHI shader libraries. Replace the FShaderCache's cook-time binary shader cache with Dmitriy Dyomin's standalone FShaderCodeLibrary that saves all shader byte-code arrays to files named by the FSHAHash. This de-duplicates shaders so we only ever store the byte code once. Includes optional support for generating a platform specific library file - which Metal implements to provide a single Metal library. The platform-native implementation can perform more de-duplication and in the case of Metal has lower file overheads and will compress more efficiently. - All of the support code for the FShaderCache's cook caching is gone, which affects all platforms. The FShaderCodeLibrary is currently supported by Cook-By-The-Book but can be used with iterate or child cookers - only DLC cooking requires further work. - With further modifications it should be possible to support Cook-on-the-Fly as well (output directories would be needed in FShaderCodeLibrary::InitForCooking) and the file-access pattern should be changed to use async. IO so that Material loading is not considered complete until all required byte-code arrays are loaded into the FShaderCodeLibrary. - For Metal archiving shaders this way will compile with debug information and the FShaderCodeLibrary, with some help from extensions to IShaderFormat, will save the debug information out into separate files during cooking - these can then be used to debug the game without having to locally recompile, recook & repackage but the shipped byte-code is stripped. Global shader caches are also subject to de-duplication in the library in order to support Metal's shader stripping. - File Move operations need to respect the 'Replace' flag - for FShaderCodeLibrary to work we need Move to be atomic. - This bumps the object version and will cause all content to recook. - Native library support is optional - only Metal currently implements one, but so could Vulkan and D3D12. For Metal the big advantages are further de-duplication where different materials generate the same MetalSL text but a different FSHAHash, that the single Metal library has lower overhead and that as a single file it all compresses far better (esp. with LZMA - 5x smaller). Change 3367854 on 2017/03/28 by Mark.Satterthwaite Don't track or record draw call resources for non-OpenGL shader platforms in the shader-cache as it is unnecessary and makes it slower on the CPU than it needs to be. Change 3367877 on 2017/03/28 by Brian.Karis Fixed linux build hopefully Change 3368001 on 2017/03/28 by Mark.Satterthwaite Compile fixes from Richard's checkin caused by not having visibility to all platforms from my original shelves. Change 3368019 on 2017/03/28 by Mark.Satterthwaite And another fix for Windows compilation of MetalShaderFormat. Change 3368042 on 2017/03/28 by Mark.Satterthwaite And a couple of simpler MSVC errors. Change 3368271 on 2017/03/28 by Mark.Satterthwaite Make SceneRenderTargets compile again. Change 3368691 on 2017/03/28 by Daniel.Wright [Copy from BenW] Renamed r.Shadow.MaxCSMShadowResolution to r.Shadow.MaxCSMResolution to match scalability inis Change 3369689 on 2017/03/29 by Marcus.Wassmer Fix non editor compile for now Change 3369862 on 2017/03/29 by Marcus.Wassmer Get the rest of the things compiling again. Change 3369896 on 2017/03/29 by Chris.Bunner Enabling AMD HDR support by default. #jira UE-42113 Change 3370535 on 2017/03/29 by Marcus.Wassmer DR - Fix template explicit instantiation for ClearUAV permutations #RB Brian.Karis, Arne.Schober Change 3370704 on 2017/03/29 by Rolando.Caloca DR - Rewrote GPU Skin Cache - Per section buffers - Limited memory per non-editor worlds (control with r.SkinCache.SceneMemoryLimitInMB) Copied from 3370529 Change 3371389 on 2017/03/30 by Richard.Wallis Remove temp working directories after archive packages built. Change 3371641 on 2017/03/30 by Rolando.Caloca DR - Copy 3371640 (fix mem leak) Change 3372436 on 2017/03/30 by Uriel.Doyon Added flags in UPrimitiveComponent to keep track of its state in the streaming manager. This allows to avoid unnecessary callback and processing in begin destroy reattach and being destroy logic. Removed the limitation of only processing UMeshComponent when handling spawed primitive. This releases the level manager from having to manage dynamic primitives. This improves performance by not having to manage dynamic references in the level manager. Primitives managed as dynamic now have a callback when ever their proxy is udpated, handling many cases automatically where previously a manual callback to notify would have been required. Fixed an issue where primitives with no reference to streaming textures would loose they dynamic state because of lack of references in the streamer. Change 3372740 on 2017/03/30 by Chris.Bunner [Experimental] Partial compute post process pipeline (r.PostProcess.PreferCompute). StencilSceneTexture added to deferred list. A few known issues to be fixed in a follow-up CL. Change 3372765 on 2017/03/30 by Uriel.Doyon Disabled concurrent call to NotifyPrimitiveUpdated while we don't have a safe concurrent update Change 3372979 on 2017/03/30 by Richard.Hinckley #jira UE-43501 The stencil buffer can now use single-channel bitmasks that ignore depth. This makes it possible to detect overlaps between stencil objects. Change 3373053 on 2017/03/30 by Simon.Tourangeau LPV Fade support - mostly integrated from CL 2959511 Change3373272on 2017/03/30 by Uriel.Doyon Added support for the concurrent update of dynamic primitives by the streaming manager. Change 3373450 on 2017/03/30 by Rolando.Caloca DR - FNT - Fix bad data for odd texcoord channels used on skin cache passthrough factory Copy 3373364 #jira UE-43492 Change 3373470 on 2017/03/30 by Marcus.Wassmer Nvidia Aftermath support Change 3374187 on 2017/03/31 by Chris.Bunner Volume texture support for CombineLUTs/Tonemap compute pass. Refactored common param code to shared sub-class in CombineLUTs and Tonemap PS/CS. Skip compute post process out-of-bounds writes. Unsigned type conversion fixes. Trimmed compute post process shader inputs. Change 3374233 on 2017/03/31 by Chris.Bunner Removed several redundant post process compute fences and resource transitions. Added testing CVar to force compute post processes to async (r.PostProcess.ForceAsyncDispatch). Change 3374412 on 2017/03/31 by Rolando.Caloca DR - Fix static analysis Change 3374544 on 2017/03/31 by Richard.Wallis FShaderCache Parallel-Context-Aware Merged with FShaderCache Single Library. Future Work - This was done before Engine PSO were in so this now needs a refector in the recording and playback on pipeline states instead an emulate PSO in OpenGL Driver. - Remove FShaderCacheState and replace the logic with FGraphicsPipelineStateInitializer which should be able to record from the RHI current pipeline state - This would reduce the Locking required as it's naturally per thread/context and only the final record would need a lock Change 3374588 on 2017/03/31 by Richard.Wallis Windows Compile Fixes Change 3374810 on 2017/03/31 by Benjamin.Hyder updating recommended GPU drivers Change 3375207 on 2017/03/31 by Rolando.Caloca DR - vk - Fixed swapchain format selection for some Linux platforms Change 3375248 on 2017/03/31 by Rolando.Caloca DR - vk - Prefer D32S8 Change 3375495 on 2017/03/31 by Rolando.Caloca DR - vk - Update to sdk 1.0.42.2 Change 3375496 on 2017/03/31 by Rolando.Caloca DR - Force compiling with updated Vulkan SDK Change 3375636 on 2017/03/31 by Mark.Satterthwaite Copying Metal improvements from task stream, with some modifications: - Off-by-default implementations for MTLFence & MTLHeap, including some small changes to the RHI interface for parallel contexts. - Support for Apple's Instruments "Points of Interest" tool. - Consolidation of some Mac & iOS compiler, memory and thread handling code. - Fixes for Metal not having implicit buffer SRV typecasting for DistanceField effects. - Improvements to the internal FMetalDebug layer, still off by default. - Limited support for Xcode automatic code-signing for iOS/tvOS. - Minimisation of render-target changes in some rendering code, esp. SceneOcclusion, DBufferDecals. - Added RHISetResourceAliasability_RenderThread to FDynamicRHI for RHIs to implement simple render-target aliasing. - Added FApplePlatformObject, a custom block allocator for Objective-C types (with NSZombie support) which is now used in MetalRHI to decrease allocation costs of Objective-C types. - Smattering of lesser fixes. Change 3375654 on 2017/03/31 by Mark.Satterthwaite Incremental Windows build fix. Change 3375656 on 2017/04/01 by Mark.Satterthwaite Correct extern declaration, including the module export macro which Mac unhelpfully doesn't enforce (for now...). Change 3375797 on 2017/04/01 by Mark.Satterthwaite Nullability qualifiers to fix Mac build-farm compilation: perversely this is not a problem for local builds... Change 3375798 on 2017/04/01 by Mark.Satterthwaite Fix the first mis-merge in ParticleGpuSimulation - these changes clearly weren't properly resolved in the task-stream. Change 3375835 on 2017/04/01 by Mark.Satterthwaite Try again with nullability and fix the occlusion changes as the PSO work wasn't merged correctly. Change 3376143 on 2017/04/02 by Mark.Satterthwaite Switch back to flat dSYMs for Dev-Rendering - they don't work with Instruments etc. but they are required by our build system. Change 3376324 on 2017/04/03 by Chris.Bunner Fixed cvar re-registration log spam and flagged a testing-only cvar as such. Change 3376726 on 2017/04/03 by Benjamin.Hyder Submitting initial HDR test map (WIP) Change 3376756 on 2017/04/03 by Guillaume.Abadie Fixes scene captures ordering's backward compatibility. Before, 2d scene captures were rendered before cube scene captures. The CaptureSortPriority broke backward compatibility by settings this new member to 0 in the USceneCaptureComponent's constructor. Since it is a higher come first policy, this CL set the default of this value to 1 in USceneCaptureComponent2D's constructor. Change 3377378 on 2017/04/03 by Arne.Schober DR - Fix ShaderRecompiling over and over again #RB Chris.Bunner Change 3377512 on 2017/04/03 by Daniel.Wright [Copy] Fixed profilegpu in d3d12 - initialize FLongGPUTaskPS when it is safe to do so, and fixed FSlateRHIRenderer's incorrect usage of draw events Change 3377518 on 2017/04/03 by Daniel.Wright [Copy] Distance field atlas coalesces updates to reduce RHIUpdateTexture3D memory overhead on d3d12 Change 3377526 on 2017/04/03 by Daniel.Wright [Copy] "Ran out of GPU queries!" log only happens once Change 3377535 on 2017/04/03 by Daniel.Wright [Copy] Fixed unreferenced local variable Change 3377539 on 2017/04/03 by Daniel.Wright [Copy] Xbox One RHIGetResourceInfo takes ESRAM into account - fixes render target pool 'VRamInKB request failed' messages Change 3377546 on 2017/04/03 by Daniel.Wright [Copy] Added r.LightMaxDrawDistanceScale for local light scalability Change 3377553 on 2017/04/03 by Daniel.Wright [Copy] Removed NEW_ESRAM_ALLOCATOR define and old unused path Change 3377560 on 2017/04/03 by Daniel.Wright [Copy] Fixed two d3d12 refcounting bugs causing -norhithread crashes Change 3377565 on 2017/04/03 by Daniel.Wright [Copy] Fixed Xbox One deleting GPU resources before the GPU is done reading from them (GRHINeedsExtraDeletionLatency was false) Change 3377572 on 2017/04/03 by Daniel.Wright [Copy] Disabled point / spot lights with MaxDrawDistance on LowPC Change 3377586 on 2017/04/03 by Daniel.Wright Fixed compile error Change 3377699 on 2017/04/03 by David.Hill FFT Code. Moved over from raven and refactored #review-3374589 @guillaume.abadie Change 3377910 on 2017/04/03 by David.Hill GPU FFT: Fix Linux Build adding a missing template<> to an IMPLEMENT_SHADER_TYPE Change 3378751 on 2017/04/04 by Marcus.Wassmer HQ particle lights now spawn attached to the same socket as their parent module. Change 3378819 on 2017/04/04 by Richard.Wallis Should be no need to protect shader cache against RHI thread now. Change 3378823 on 2017/04/04 by Richard.Wallis FRHIShaderLibrary Opaque Type - Base FRHIShaderLibrary has no Create*Shader functions and is passed to Overloaded RHICreate*Shader functions instead of creation directly through the library. - Assumed that only Native libraries will end up in the RHICreate*Shader functions. - ShaderCache and ShaderCode Libraries now inherit from a common factory interface. Change 3378883 on 2017/04/04 by Arne.Schober DR - Fix DCC build Change 3378885 on 2017/04/04 by Richard.Wallis Metal resource cast compile fix post merge. Change 3378946 on 2017/04/04 by Chris.Bunner SM4 assert fix. Change 3378953 on 2017/04/04 by Chris.Bunner Fixed type-correctness on legacy BreakMA material nodes and set more flexible formats to global attributes which should result in much more forgiving graphs for users. Allowed material nodes to opt out of mask-based pin coloration. #tests Compiled most Paragon materials + QAGame test maps. #jira UE-39885 Change 3379189 on 2017/04/04 by Arne.Schober DR - Fix aftermath staging Change 3379229 on 2017/04/04 by Arne.Schober DR - Fix missing include Change 3379374 on 2017/04/04 by Mark.Satterthwaite Revert an accidentally merged change in MacPlatformProcess that relies on further changes from the Metal task stream. Change 3379505 on 2017/04/04 by Rolando.Caloca DR - Fix mismatched interpolators Change 3379539 on 2017/04/04 by Mark.Satterthwaite No FFT for any hlslcc platform - the IR for one or more RWTexture2D isn't quite right... #jira UE-43626 Change 3379561 on 2017/04/04 by Rolando.Caloca DR - Fix root signature issues on D3D12 PC Change 3379590 on 2017/04/04 by Mark.Satterthwaite Back out changelist 3379539 & change the shader slightly instead, the HLSLCC library generates bogus IR when you have an inout RWTexture. #jira UE-43626 Change 3379917 on 2017/04/04 by Uriel.Doyon Fix to input mismatch Change 3380578 on 2017/04/05 by Chris.Bunner Shader type fixes. #jira UE-43652 Change 3380639 on 2017/04/05 by Rolando.Caloca DR - Expose GetOrCreate PSO and document Change 3380821 on 2017/04/05 by Guillaume.Abadie Fixes a crash in USceneCaptureComponent::UpdateDeferredCaptures() #jira UE-43642 Change 3381092 on 2017/04/05 by Guillaume.Abadie Cherry pick 3362517: Implements TAA's scene color unpremultiplication from alpha channel to reduce DOF alpha channel temporal ghosting. This CL take the oportunity to transform AA_ALPHA to an compile time enumeration, and add a basic TAA compile time configuration validation to improve readability of the different TAA passes' configurations. Change 3381300 on 2017/04/05 by Mark.Satterthwaite Quick fix for changes to MetalRHI's render-thread safe texture creation not correctly handling AVFoundation video player handing us an IOSurface. #jira UE-43597 Change3381359on 2017/04/05 by Guillaume.Abadie Back out changelist 3381092 Change 3381421 on 2017/04/05 by Mark.Satterthwaite Amended CL #3380995 from Richard Wallis to address crash in the Material Editor under the validation layer - when there are no textures bound the default pass descriptor assigns store actions, which means we can't override them with our deferred store actions. #jira UE-43689 Change 3381422 on 2017/04/05 by Mark.Satterthwaite Absolute time queries can't be batched in Metal but I also can't rely on them being started with a call to BeginQuery - only EndQuery. #jira UE-43691 Change3381503on 2017/04/05 by Daniel.Wright More intuitive controls for Volumetric Fog * Removed ScatteringScale / AbsorptionScale on Exponential Height Fog and added Albedo / Extinction * InscatteringColorCubemap is now supported by Volumetric Fog * Particle lights have a default VolumetricScatteringIntensity of 0 to avoid trailing * Tweaked GVolumetricFogDepthDistributionScale better for nearby details * Volume Materials have twice the interpolators available Change 3381527 on 2017/04/05 by Mark.Satterthwaite Disable Private GPU storage for PVRTC texture formats on iOS Metal - these require more changes to the blit-encoder usage as PVRTC has strange requirements. Change 3381671 on 2017/04/05 by Mark.Satterthwaite Better error message for failure to compile shaders remotely from PC for Metal. Change 3381769 on 2017/04/05 by Rolando.Caloca DR - Added lock texture array 2d on Vulkan Change 3382003 on 2017/04/05 by Mark.Satterthwaite Remove the automatic Metal aliasing/re-use when releasing some resource types as it doesn't work as intended. Change 3382030 on 2017/04/05 by Zachary.Wilson Fix compiling Metal text shaders from PC broken in merge from task stream. #submitter mark.satterthwaite #jira UE-43652 Change 3382880 on 2017/04/06 by Mark.Satterthwaite Michael Trepka's CL #3379927: VolumetricFogVoxelization implementation for Mac Change 3383315 on 2017/04/06 by Mark.Satterthwaite Partially revert CL #3382003 - the emulated Metal heaps require invoking makeAliasable in order to reclaim memory. #jira UE-43739 Change 3384639 on 2017/04/07 by Marcus.Wassmer Move ShaderResource version bump to RenderingObjectVersion Change 3384704 on 2017/04/07 by Mark.Satterthwaite Compile fix for merge. Change 3384933 on 2017/04/07 by Rolando.Caloca DR - Fix skin cache crash with BP (copy 3384714) Change 3385104 on 2017/04/07 by Mark.Satterthwaite Fix MetalRHI's abs(int2) handling - it can't be translated to fabs(int2) as that won't compile. Also rebuild hlslcc for my sanity. #jira UE-43783 Change 3385105 on 2017/04/07 by Mark.Satterthwaite Force a shader rebuild to ensure that everybody picks up the fix for #jira UE-43783 #jira UE-43783 Change 3385118 on 2017/04/07 by Arne.Schober DR - [OR-37359] - Fix disapearing Decals when StencilLod Fade is enabled #RB none Change 3385149 on 2017/04/07 by Marcus.Wassmer Fix skincache motion blur Change 3385189 on 2017/04/07 by Rolando.Caloca DR - Fix swapchain format for editor on Vulkan Change 3385287 on 2017/04/07 by Mark.Satterthwaite Enable SM5 on Intel as of 10.12.4 and later. Change 3385347 on 2017/04/07 by Rolando.Caloca DR - Temp fix for GL4 corruption on editor #jira UE-43785 Change 3385363 on 2017/04/07 by Rolando.Caloca DR - Actually fix all win platforms for GL bug #jira UE-43785 Change 3385557 on 2017/04/07 by Arne.Schober DR - [UE-43205] - Fix mesh paint #RB none Change3385608on 2017/04/07 by Daniel.Wright Fixed SampleCmp being used on a non-depth texture, causing a d3d error Change 3385980 on 2017/04/10 by Rolando.Caloca DR - Remove transition functions RHIClearColor* RHIClearDepthStencilTexture Change 3386042 on 2017/04/10 by Rolando.Caloca DR - Fix metal merge issue Change 3386157 on 2017/04/10 by Rolando.Caloca DR - Remove VS2013 libs generation off hlslcc & glslang (to match main) Change 3386356 on 2017/04/10 by Chris.Bunner Resolving merge errors. Change 3386414 on 2017/04/10 by Chris.Bunner Resolved merge issue in RendererScene.cpp. Change 3386700 on 2017/04/10 by Mark.Satterthwaite Silence documentation warnings. Change 3387178 on 2017/04/10 by Chris.Bunner Removed invalid mask correction on MakeMA material nodes. Change 3388177 on 2017/04/11 by Marcus.Wassmer Disable ensure that is no longer relevant now that we bind clear colors on texture creation Change 3388261 on 2017/04/11 by Chris.Bunner Static analysis fix. [CL3388266by Chris Bunner in Main branch]
3199 lines
109 KiB
C++
3199 lines
109 KiB
C++
// Copyright 1998-2017 Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
OpenGLShaders.cpp: OpenGL shader RHI implementation.
|
|
=============================================================================*/
|
|
|
|
#include "OpenGLShaders.h"
|
|
#include "HAL/PlatformFilemanager.h"
|
|
#include "HAL/FileManager.h"
|
|
#include "Misc/Paths.h"
|
|
#include "Serialization/MemoryWriter.h"
|
|
#include "Serialization/MemoryReader.h"
|
|
#include "OpenGLDrvPrivate.h"
|
|
#include "Shader.h"
|
|
#include "GlobalShader.h"
|
|
|
|
#define CHECK_FOR_GL_SHADERS_TO_REPLACE 0
|
|
|
|
#if PLATFORM_WINDOWS
|
|
#include <mmintrin.h>
|
|
#elif PLATFORM_MAC
|
|
#include <xmmintrin.h>
|
|
#endif
|
|
#include "SceneUtils.h"
|
|
|
|
const uint32 SizeOfFloat4 = 16;
|
|
const uint32 NumFloatsInFloat4 = 4;
|
|
|
|
FORCEINLINE void FOpenGLShaderParameterCache::FRange::MarkDirtyRange(uint32 NewStartVector, uint32 NewNumVectors)
|
|
{
|
|
if (NumVectors > 0)
|
|
{
|
|
uint32 High = StartVector + NumVectors;
|
|
uint32 NewHigh = NewStartVector + NewNumVectors;
|
|
|
|
uint32 MaxVector = FMath::Max(High, NewHigh);
|
|
uint32 MinVector = FMath::Min(StartVector, NewStartVector);
|
|
|
|
StartVector = MinVector;
|
|
NumVectors = (MaxVector - MinVector) + 1;
|
|
}
|
|
else
|
|
{
|
|
StartVector = NewStartVector;
|
|
NumVectors = NewNumVectors;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Verify that an OpenGL program has linked successfully.
|
|
*/
|
|
static bool VerifyLinkedProgram(GLuint Program)
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_OpenGLShaderLinkVerifyTime);
|
|
|
|
#if UE_BUILD_DEBUG || DEBUG_GL_SHADERS
|
|
GLint LinkStatus = 0;
|
|
glGetProgramiv(Program, GL_LINK_STATUS, &LinkStatus);
|
|
if (LinkStatus != GL_TRUE)
|
|
{
|
|
GLint LogLength;
|
|
ANSICHAR DefaultLog[] = "No log";
|
|
ANSICHAR *CompileLog = DefaultLog;
|
|
glGetProgramiv(Program, GL_INFO_LOG_LENGTH, &LogLength);
|
|
if (LogLength > 1)
|
|
{
|
|
CompileLog = (ANSICHAR *)FMemory::Malloc(LogLength);
|
|
glGetProgramInfoLog(Program, LogLength, NULL, CompileLog);
|
|
}
|
|
|
|
UE_LOG(LogRHI,Error,TEXT("Failed to link program. Compile log:\n%s"),
|
|
ANSI_TO_TCHAR(CompileLog));
|
|
|
|
if (LogLength > 1)
|
|
{
|
|
FMemory::Free(CompileLog);
|
|
}
|
|
return false;
|
|
}
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Verify that an OpenGL shader has compiled successfully.
|
|
*/
|
|
static bool VerifyCompiledShader(GLuint Shader, const ANSICHAR* GlslCode )
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_OpenGLShaderCompileVerifyTime);
|
|
|
|
#if UE_BUILD_DEBUG || DEBUG_GL_SHADERS
|
|
if (FOpenGL::SupportsSeparateShaderObjects() && glIsProgram(Shader))
|
|
{
|
|
bool const bCompiledOK = VerifyLinkedProgram(Shader);
|
|
#if DEBUG_GL_SHADERS
|
|
if (!bCompiledOK && GlslCode)
|
|
{
|
|
UE_LOG(LogRHI,Error,TEXT("Shader:\n%s"),ANSI_TO_TCHAR(GlslCode));
|
|
|
|
#if 0
|
|
const ANSICHAR *Temp = GlslCode;
|
|
|
|
for ( int i = 0; i < 30 && (*Temp != '\0'); ++i )
|
|
{
|
|
FString Converted = ANSI_TO_TCHAR( Temp );
|
|
Converted.LeftChop( 256 );
|
|
|
|
UE_LOG(LogRHI,Display,TEXT("%s"), *Converted );
|
|
Temp += Converted.Len();
|
|
}
|
|
#endif
|
|
}
|
|
#endif
|
|
return bCompiledOK;
|
|
}
|
|
else
|
|
{
|
|
GLint CompileStatus;
|
|
glGetShaderiv(Shader, GL_COMPILE_STATUS, &CompileStatus);
|
|
if (CompileStatus != GL_TRUE)
|
|
{
|
|
GLint LogLength;
|
|
ANSICHAR DefaultLog[] = "No log";
|
|
ANSICHAR *CompileLog = DefaultLog;
|
|
glGetShaderiv(Shader, GL_INFO_LOG_LENGTH, &LogLength);
|
|
#if PLATFORM_ANDROID
|
|
if ( LogLength == 0 )
|
|
{
|
|
// make it big anyway
|
|
// there was a bug in android 2.2 where glGetShaderiv would return 0 even though there was a error message
|
|
// https://code.google.com/p/android/issues/detail?id=9953
|
|
LogLength = 4096;
|
|
}
|
|
#endif
|
|
if (LogLength > 1)
|
|
{
|
|
CompileLog = (ANSICHAR *)FMemory::Malloc(LogLength);
|
|
glGetShaderInfoLog(Shader, LogLength, NULL, CompileLog);
|
|
}
|
|
|
|
#if DEBUG_GL_SHADERS
|
|
if (GlslCode)
|
|
{
|
|
UE_LOG(LogRHI,Error,TEXT("Shader:\n%s"),ANSI_TO_TCHAR(GlslCode));
|
|
|
|
#if 0
|
|
const ANSICHAR *Temp = GlslCode;
|
|
|
|
for ( int i = 0; i < 30 && (*Temp != '\0'); ++i )
|
|
{
|
|
FString Converted = ANSI_TO_TCHAR( Temp );
|
|
Converted.LeftChop( 256 );
|
|
|
|
UE_LOG(LogRHI,Display,TEXT("%s"), *Converted );
|
|
Temp += Converted.Len();
|
|
}
|
|
#endif
|
|
}
|
|
#endif
|
|
UE_LOG(LogRHI,Fatal,TEXT("Failed to compile shader. Compile log:\n%s"), ANSI_TO_TCHAR(CompileLog));
|
|
|
|
if (LogLength > 1)
|
|
{
|
|
FMemory::Free(CompileLog);
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
static bool VerifyProgramPipeline(GLuint Program)
|
|
{
|
|
bool bOK = true;
|
|
// Don't try and validate SSOs here - the draw state matters to SSOs and it definitely can't be guaranteed to be valid at this stage
|
|
if ( FOpenGL::SupportsSeparateShaderObjects() )
|
|
{
|
|
#if DEBUG_GL_SHADERS
|
|
bOK = FOpenGL::IsProgramPipeline(Program);
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
bOK = VerifyLinkedProgram(Program);
|
|
}
|
|
return bOK;
|
|
}
|
|
|
|
// ============================================================================================================================
|
|
|
|
class FOpenGLCompiledShaderKey
|
|
{
|
|
public:
|
|
FOpenGLCompiledShaderKey(
|
|
GLenum InTypeEnum,
|
|
uint32 InCodeSize,
|
|
uint32 InCodeCRC
|
|
)
|
|
: TypeEnum(InTypeEnum)
|
|
, CodeSize(InCodeSize)
|
|
, CodeCRC(InCodeCRC)
|
|
{}
|
|
|
|
friend bool operator ==(const FOpenGLCompiledShaderKey& A,const FOpenGLCompiledShaderKey& B)
|
|
{
|
|
return A.TypeEnum == B.TypeEnum && A.CodeSize == B.CodeSize && A.CodeCRC == B.CodeCRC;
|
|
}
|
|
|
|
friend uint32 GetTypeHash(const FOpenGLCompiledShaderKey &Key)
|
|
{
|
|
return GetTypeHash(Key.TypeEnum) ^ GetTypeHash(Key.CodeSize) ^ GetTypeHash(Key.CodeCRC);
|
|
}
|
|
|
|
private:
|
|
GLenum TypeEnum;
|
|
uint32 CodeSize;
|
|
uint32 CodeCRC;
|
|
};
|
|
|
|
typedef TMap<FOpenGLCompiledShaderKey,GLuint> FOpenGLCompiledShaderCache;
|
|
|
|
static FOpenGLCompiledShaderCache& GetOpenGLCompiledShaderCache()
|
|
{
|
|
static FOpenGLCompiledShaderCache CompiledShaderCache;
|
|
return CompiledShaderCache;
|
|
}
|
|
|
|
// ============================================================================================================================
|
|
|
|
|
|
static const TCHAR* ShaderNameFromShaderType(GLenum ShaderType)
|
|
{
|
|
switch(ShaderType)
|
|
{
|
|
case GL_VERTEX_SHADER: return TEXT("vertex");
|
|
case GL_FRAGMENT_SHADER: return TEXT("fragment");
|
|
case GL_GEOMETRY_SHADER: return TEXT("geometry");
|
|
case GL_TESS_CONTROL_SHADER: return TEXT("hull");
|
|
case GL_TESS_EVALUATION_SHADER: return TEXT("domain");
|
|
case GL_COMPUTE_SHADER: return TEXT("compute");
|
|
default: return NULL;
|
|
}
|
|
}
|
|
|
|
// ============================================================================================================================
|
|
|
|
namespace
|
|
{
|
|
inline void AppendCString(TArray<ANSICHAR> & Dest, const ANSICHAR * Source)
|
|
{
|
|
if (Dest.Num() > 0)
|
|
{
|
|
Dest.Insert(Source, FCStringAnsi::Strlen(Source), Dest.Num() - 1);;
|
|
}
|
|
else
|
|
{
|
|
Dest.Append(Source, FCStringAnsi::Strlen(Source) + 1);
|
|
}
|
|
}
|
|
|
|
inline void ReplaceCString(TArray<ANSICHAR> & Dest, const ANSICHAR * Source, const ANSICHAR * Replacement)
|
|
{
|
|
int32 SourceLen = FCStringAnsi::Strlen(Source);
|
|
int32 ReplacementLen = FCStringAnsi::Strlen(Replacement);
|
|
int32 FoundIndex = 0;
|
|
for (const ANSICHAR * FoundPointer = FCStringAnsi::Strstr(Dest.GetData(), Source);
|
|
nullptr != FoundPointer;
|
|
FoundPointer = FCStringAnsi::Strstr(Dest.GetData()+FoundIndex, Source))
|
|
{
|
|
FoundIndex = FoundPointer - Dest.GetData();
|
|
Dest.RemoveAt(FoundIndex, SourceLen);
|
|
Dest.Insert(Replacement, ReplacementLen, FoundIndex);
|
|
}
|
|
}
|
|
|
|
inline const ANSICHAR * CStringEndOfLine(const ANSICHAR * Text)
|
|
{
|
|
const ANSICHAR * LineEnd = FCStringAnsi::Strchr(Text, '\n');
|
|
if (nullptr == LineEnd)
|
|
{
|
|
LineEnd = Text + FCStringAnsi::Strlen(Text);
|
|
}
|
|
return LineEnd;
|
|
}
|
|
|
|
inline bool CStringIsBlankLine(const ANSICHAR * Text)
|
|
{
|
|
while (!FCharAnsi::IsLinebreak(*Text))
|
|
{
|
|
if (!FCharAnsi::IsWhitespace(*Text))
|
|
{
|
|
return false;
|
|
}
|
|
++Text;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
inline int CStringCountOccurances(TArray<ANSICHAR> & Source, const ANSICHAR * TargetString)
|
|
{
|
|
int32 TargetLen = FCStringAnsi::Strlen(TargetString);
|
|
int Count = 0;
|
|
int32 FoundIndex = 0;
|
|
for (const ANSICHAR * FoundPointer = FCStringAnsi::Strstr(Source.GetData(), TargetString);
|
|
nullptr != FoundPointer;
|
|
FoundPointer = FCStringAnsi::Strstr(Source.GetData() + FoundIndex, TargetString))
|
|
{
|
|
FoundIndex = FoundPointer - Source.GetData();
|
|
FoundIndex += TargetLen;
|
|
Count++;
|
|
}
|
|
return Count;
|
|
}
|
|
|
|
inline bool MoveHashLines(TArray<ANSICHAR> & Dest, TArray<ANSICHAR> & Source)
|
|
{
|
|
// Walk through the lines to find the first non-# line...
|
|
const ANSICHAR * LineStart = Source.GetData();
|
|
for (bool FoundNonHashLine = false; !FoundNonHashLine;)
|
|
{
|
|
const ANSICHAR * LineEnd = CStringEndOfLine(LineStart);
|
|
if (LineStart[0] != '#' && !CStringIsBlankLine(LineStart))
|
|
{
|
|
FoundNonHashLine = true;
|
|
}
|
|
else if (LineEnd[0] == '\n')
|
|
{
|
|
LineStart = LineEnd + 1;
|
|
}
|
|
else
|
|
{
|
|
LineStart = LineEnd;
|
|
}
|
|
}
|
|
// Copy the hash lines over, if we found any. And delete from
|
|
// the source.
|
|
if (LineStart > Source.GetData())
|
|
{
|
|
int32 LineLength = LineStart - Source.GetData();
|
|
if (Dest.Num() > 0)
|
|
{
|
|
Dest.Insert(Source.GetData(), LineLength, Dest.Num() - 1);
|
|
}
|
|
else
|
|
{
|
|
Dest.Append(Source.GetData(), LineLength);
|
|
Dest.Append("", 1);
|
|
}
|
|
if (Dest.Last(1) != '\n')
|
|
{
|
|
Dest.Insert("\n", 1, Dest.Num() - 1);
|
|
}
|
|
Source.RemoveAt(0, LineStart - Source.GetData());
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
|
|
inline uint32 GetTypeHash(FAnsiCharArray const& CharArray)
|
|
{
|
|
return FCrc::MemCrc32(CharArray.GetData(), CharArray.Num() * sizeof(ANSICHAR));
|
|
}
|
|
|
|
static void BindShaderLocations(GLenum TypeEnum, GLuint Resource, uint16 InOutMask, const uint8 * RemapTable = nullptr)
|
|
{
|
|
if ( OpenGLShaderPlatformNeedsBindLocation(GMaxRHIShaderPlatform) )
|
|
{
|
|
ANSICHAR Buf[32] = {0};
|
|
switch(TypeEnum)
|
|
{
|
|
case GL_VERTEX_SHADER:
|
|
{
|
|
uint32 Mask = InOutMask;
|
|
uint32 Index = 0;
|
|
FCStringAnsi::Strcpy(Buf, "in_ATTRIBUTE");
|
|
while (Mask)
|
|
{
|
|
if (Mask & 0x1)
|
|
{
|
|
if (Index < 10)
|
|
{
|
|
Buf[12] = '0' + Index;
|
|
Buf[13] = 0;
|
|
}
|
|
else
|
|
{
|
|
Buf[12] = '1';
|
|
Buf[13] = '0' + (Index % 10);
|
|
Buf[14] = 0;
|
|
}
|
|
|
|
if (FOpenGL::NeedsVertexAttribRemapTable())
|
|
{
|
|
check(RemapTable != nullptr);
|
|
uint32 MappedAttributeIndex = RemapTable[Index];
|
|
check(MappedAttributeIndex < NUM_OPENGL_VERTEX_STREAMS);
|
|
glBindAttribLocation(Resource, MappedAttributeIndex, Buf);
|
|
}
|
|
else
|
|
{
|
|
glBindAttribLocation(Resource, Index, Buf);
|
|
}
|
|
}
|
|
Index++;
|
|
Mask >>= 1;
|
|
}
|
|
break;
|
|
}
|
|
case GL_FRAGMENT_SHADER:
|
|
{
|
|
uint32 Mask = (InOutMask) & 0x7fff; // mask out the depth bit
|
|
uint32 Index = 0;
|
|
FCStringAnsi::Strcpy(Buf, "out_Target");
|
|
while (Mask)
|
|
{
|
|
if (Mask & 0x1)
|
|
{
|
|
if (Index < 10)
|
|
{
|
|
Buf[10] = '0' + Index;
|
|
Buf[11] = 0;
|
|
}
|
|
else
|
|
{
|
|
Buf[10] = '1';
|
|
Buf[11] = '0' + (Index % 10);
|
|
Buf[12] = 0;
|
|
}
|
|
FOpenGL::BindFragDataLocation(Resource, Index, Buf);
|
|
}
|
|
Index++;
|
|
Mask >>= 1;
|
|
}
|
|
break;
|
|
}
|
|
case GL_GEOMETRY_SHADER:
|
|
case GL_COMPUTE_SHADER:
|
|
case GL_TESS_CONTROL_SHADER:
|
|
case GL_TESS_EVALUATION_SHADER:
|
|
break;
|
|
default:
|
|
check(0);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Helper to compile a shader and return success, logging errors if necessary.
|
|
GLint CompileCurrentShader(const GLuint Resource, const FAnsiCharArray& GlslCode)
|
|
{
|
|
const ANSICHAR * GlslCodeString = GlslCode.GetData();
|
|
int32 GlslCodeLength = GlslCode.Num() - 1;
|
|
|
|
glShaderSource(Resource, 1, (const GLchar**)&GlslCodeString, &GlslCodeLength);
|
|
glCompileShader(Resource);
|
|
|
|
GLint CompileStatus = GL_TRUE;
|
|
#if PLATFORM_ANDROID
|
|
// On Android the same shader is compiled with different hacks to find the right one(s) to apply so don't cache unless successful if currently testing them
|
|
if (FOpenGL::IsCheckingShaderCompilerHacks())
|
|
{
|
|
glGetShaderiv(Resource, GL_COMPILE_STATUS, &CompileStatus);
|
|
}
|
|
#endif
|
|
#if (PLATFORM_HTML5 || PLATFORM_ANDROID || PLATFORM_IOS) && !UE_BUILD_SHIPPING
|
|
if (!FOpenGL::IsCheckingShaderCompilerHacks())
|
|
{
|
|
glGetShaderiv(Resource, GL_COMPILE_STATUS, &CompileStatus);
|
|
if (CompileStatus == GL_FALSE)
|
|
{
|
|
char Msg[2048];
|
|
glGetShaderInfoLog(Resource, 2048, nullptr, Msg);
|
|
UE_LOG(LogRHI, Error, TEXT("Shader compile failed: %s\n Original Source is (len %d) %s"), ANSI_TO_TCHAR(Msg), GlslCodeLength, ANSI_TO_TCHAR(GlslCodeString));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if PLATFORM_IOS // fix for running out of memory in the driver when compiling/linking a lot of shaders on the first frame
|
|
if (FOpenGL::IsLimitingShaderCompileCount())
|
|
{
|
|
static int CompileCount = 0;
|
|
CompileCount++;
|
|
if (CompileCount == 2500)
|
|
{
|
|
glFlush();
|
|
CompileCount = 0;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return CompileStatus;
|
|
}
|
|
|
|
/**
|
|
* Compiles an OpenGL shader using the given GLSL microcode.
|
|
* @returns the compiled shader upon success.
|
|
*/
|
|
template <typename ShaderType>
|
|
ShaderType* CompileOpenGLShader(const TArray<uint8>& InShaderCode)
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_OpenGLShaderCompileTime);
|
|
VERIFY_GL_SCOPE();
|
|
|
|
FShaderCodeReader ShaderCode(InShaderCode);
|
|
|
|
ShaderType* Shader = nullptr;
|
|
const GLenum TypeEnum = ShaderType::TypeEnum;
|
|
FMemoryReader Ar(InShaderCode, true);
|
|
|
|
Ar.SetLimitSize(ShaderCode.GetActualShaderCodeSize());
|
|
|
|
FOpenGLCodeHeader Header = { 0 };
|
|
|
|
Ar << Header;
|
|
// Suppress static code analysis warning about a potential comparison of two constants
|
|
CA_SUPPRESS(6326);
|
|
if (Header.GlslMarker != 0x474c534c
|
|
|| (TypeEnum == GL_VERTEX_SHADER && Header.FrequencyMarker != 0x5653)
|
|
|| (TypeEnum == GL_FRAGMENT_SHADER && Header.FrequencyMarker != 0x5053)
|
|
|| (TypeEnum == GL_GEOMETRY_SHADER && Header.FrequencyMarker != 0x4753)
|
|
|| (TypeEnum == GL_COMPUTE_SHADER && Header.FrequencyMarker != 0x4353 && FOpenGL::SupportsComputeShaders())
|
|
|| (TypeEnum == GL_TESS_CONTROL_SHADER && Header.FrequencyMarker != 0x4853 && FOpenGL::SupportsTessellation()) /* hull shader*/
|
|
|| (TypeEnum == GL_TESS_EVALUATION_SHADER && Header.FrequencyMarker != 0x4453 && FOpenGL::SupportsTessellation()) /* domain shader*/
|
|
)
|
|
{
|
|
UE_LOG(LogRHI,Fatal,
|
|
TEXT("Corrupt shader bytecode. GlslMarker=0x%08x FrequencyMarker=0x%04x"),
|
|
Header.GlslMarker,
|
|
Header.FrequencyMarker
|
|
);
|
|
return nullptr;
|
|
}
|
|
|
|
int32 CodeOffset = Ar.Tell();
|
|
|
|
// The code as given to us.
|
|
FAnsiCharArray GlslCodeOriginal;
|
|
AppendCString(GlslCodeOriginal, (ANSICHAR*)InShaderCode.GetData() + CodeOffset);
|
|
uint32 GlslCodeOriginalCRC = FCrc::MemCrc_DEPRECATED(GlslCodeOriginal.GetData(), GlslCodeOriginal.Num());
|
|
|
|
// The amended code we actually compile.
|
|
FAnsiCharArray GlslCode;
|
|
|
|
// Find the existing compiled shader in the cache.
|
|
FOpenGLCompiledShaderKey Key(TypeEnum, GlslCodeOriginal.Num(), GlslCodeOriginalCRC);
|
|
GLuint Resource = GetOpenGLCompiledShaderCache().FindRef(Key);
|
|
if (!Resource)
|
|
{
|
|
#if CHECK_FOR_GL_SHADERS_TO_REPLACE
|
|
{
|
|
// 1. Check for specific file
|
|
FString PotentialShaderFileName = FString::Printf(TEXT("%s-%d-0x%x.txt"), ShaderNameFromShaderType(TypeEnum), GlslCodeOriginal.Num(), GlslCodeOriginalCRC);
|
|
FString PotentialShaderFile = FPaths::ProfilingDir();
|
|
PotentialShaderFile *= PotentialShaderFileName;
|
|
|
|
UE_LOG( LogRHI, Log, TEXT("Looking for shader file '%s' for potential replacement."), *PotentialShaderFileName );
|
|
|
|
int64 FileSize = IFileManager::Get().FileSize(*PotentialShaderFile);
|
|
if( FileSize > 0 )
|
|
{
|
|
FArchive* Ar = IFileManager::Get().CreateFileReader(*PotentialShaderFile);
|
|
if( Ar != NULL )
|
|
{
|
|
UE_LOG(LogRHI, Log, TEXT("Replacing %s shader with length %d and CRC 0x%x with the one from a file."), (TypeEnum == GL_VERTEX_SHADER) ? TEXT("vertex") : ((TypeEnum == GL_FRAGMENT_SHADER) ? TEXT("fragment") : TEXT("geometry")), GlslCodeOriginal.Num(), GlslCodeOriginalCRC);
|
|
|
|
// read in the file
|
|
GlslCodeOriginal.Empty();
|
|
GlslCodeOriginal.AddUninitialized(FileSize + 1);
|
|
Ar->Serialize(GlslCodeOriginal.GetData(), FileSize);
|
|
delete Ar;
|
|
GlslCodeOriginal[FileSize] = 0;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
Resource = FOpenGL::CreateShader(TypeEnum);
|
|
|
|
// get a modified version of the shader based on device capabilities to compile (destructive to GlslCodeOriginal copy)
|
|
FOpenGLShaderDeviceCapabilities Capabilities;
|
|
GetCurrentOpenGLShaderDeviceCapabilities(Capabilities);
|
|
GLSLToDeviceCompatibleGLSL(GlslCodeOriginal, Header.ShaderName, TypeEnum, Capabilities, GlslCode);
|
|
|
|
GLint CompileStatus = GL_TRUE;
|
|
|
|
// Save the code and defer compilation if our device supports program binaries and we're not checking for shader compatibility.
|
|
if (!FOpenGLProgramBinaryCache::DeferShaderCompilation(Resource, GlslCode))
|
|
{
|
|
CompileStatus = CompileCurrentShader(Resource, GlslCode);
|
|
}
|
|
|
|
if ( CompileStatus == GL_TRUE )
|
|
{
|
|
if (Capabilities.bSupportsSeparateShaderObjects)
|
|
{
|
|
ANSICHAR Buf[32] = {0};
|
|
// Create separate shader program
|
|
GLuint SeparateResource = FOpenGL::CreateProgram();
|
|
FOpenGL::ProgramParameter( SeparateResource, GL_PROGRAM_SEPARABLE, GL_TRUE );
|
|
glAttachShader(SeparateResource, Resource);
|
|
|
|
glLinkProgram(SeparateResource);
|
|
bool const bLinkedOK = VerifyLinkedProgram(SeparateResource);
|
|
if (!bLinkedOK)
|
|
{
|
|
const ANSICHAR* GlslCodeString = GlslCode.GetData();
|
|
check(VerifyCompiledShader(Resource, GlslCodeString));
|
|
}
|
|
|
|
#if ENABLE_UNIFORM_BUFFER_LAYOUT_VERIFICATION
|
|
void VerifyUniformBufferLayouts(GLuint Program);
|
|
VerifyUniformBufferLayouts(SeparateResource);
|
|
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_VERIFICATION
|
|
|
|
Resource = SeparateResource;
|
|
}
|
|
|
|
// Cache it; compile status will be checked later on link (always caching will prevent multiple attempts to compile a failed shader)
|
|
GetOpenGLCompiledShaderCache().Add(Key, Resource);
|
|
}
|
|
}
|
|
|
|
Shader = new ShaderType();
|
|
Shader->Resource = Resource;
|
|
Shader->Bindings = Header.Bindings;
|
|
Shader->UniformBuffersCopyInfo = Header.UniformBuffersCopyInfo;
|
|
|
|
// If there is no shader cache then we must assign the hash here
|
|
if (FOpenGL::SupportsSeparateShaderObjects() && !FShaderCache::GetShaderCache())
|
|
{
|
|
// Just use the CRC - if it isn't being cached & logged we'll be dependent on the CRC alone anyway
|
|
FSHAHash Hash;
|
|
FMemory::Memcpy(Hash.Hash, &GlslCodeOriginalCRC, sizeof(uint32));
|
|
Shader->SetHash(Hash);
|
|
}
|
|
|
|
#if DEBUG_GL_SHADERS
|
|
Shader->GlslCode = GlslCode;
|
|
Shader->GlslCodeString = (ANSICHAR*)Shader->GlslCode.GetData();
|
|
#endif
|
|
|
|
return Shader;
|
|
}
|
|
|
|
void OPENGLDRV_API GetCurrentOpenGLShaderDeviceCapabilities(FOpenGLShaderDeviceCapabilities& Capabilities)
|
|
{
|
|
FMemory::Memzero(Capabilities);
|
|
|
|
#if PLATFORM_DESKTOP
|
|
Capabilities.TargetPlatform = EOpenGLShaderTargetPlatform::OGLSTP_Desktop;
|
|
#elif PLATFORM_ANDROID
|
|
Capabilities.TargetPlatform = EOpenGLShaderTargetPlatform::OGLSTP_Android;
|
|
Capabilities.bUseES30ShadingLanguage = FOpenGL::UseES30ShadingLanguage();
|
|
Capabilities.bSupportsStandardDerivativesExtension = FOpenGL::SupportsStandardDerivativesExtension();
|
|
Capabilities.bSupportsRenderTargetFormat_PF_FloatRGBA = GSupportsRenderTargetFormat_PF_FloatRGBA;
|
|
Capabilities.bSupportsShaderFramebufferFetch = FOpenGL::SupportsShaderFramebufferFetch();
|
|
Capabilities.bRequiresARMShaderFramebufferFetchDepthStencilUndef = FOpenGL::RequiresARMShaderFramebufferFetchDepthStencilUndef();
|
|
Capabilities.bRequiresDontEmitPrecisionForTextureSamplers = FOpenGL::RequiresDontEmitPrecisionForTextureSamplers();
|
|
Capabilities.bSupportsShaderTextureLod = FOpenGL::SupportsShaderTextureLod();
|
|
Capabilities.bSupportsShaderTextureCubeLod = FOpenGL::SupportsShaderTextureCubeLod();
|
|
Capabilities.bRequiresTextureCubeLodEXTToTextureCubeLodDefine = FOpenGL::RequiresTextureCubeLodEXTToTextureCubeLodDefine();
|
|
Capabilities.bRequiresGLFragCoordVaryingLimitHack = FOpenGL::RequiresGLFragCoordVaryingLimitHack();
|
|
Capabilities.MaxVaryingVectors = FOpenGL::GetMaxVaryingVectors();
|
|
Capabilities.bRequiresTexture2DPrecisionHack = FOpenGL::RequiresTexture2DPrecisionHack();
|
|
#elif PLATFORM_HTML5
|
|
Capabilities.TargetPlatform = EOpenGLShaderTargetPlatform::OGLSTP_HTML5;
|
|
Capabilities.bUseES30ShadingLanguage = FOpenGL::UseES30ShadingLanguage();
|
|
Capabilities.bSupportsShaderTextureLod = FOpenGL::SupportsShaderTextureLod();
|
|
#elif PLATFORM_IOS
|
|
Capabilities.TargetPlatform = EOpenGLShaderTargetPlatform::OGLSTP_iOS;
|
|
#else
|
|
Capabilities.TargetPlatform = EOpenGLShaderTargetPlatform::OGLSTP_Unknown;
|
|
#endif
|
|
Capabilities.MaxRHIShaderPlatform = GMaxRHIShaderPlatform;
|
|
Capabilities.bSupportsSeparateShaderObjects = FOpenGL::SupportsSeparateShaderObjects();
|
|
|
|
#if OPENGL_ES2 || OPENGL_ESDEFERRED
|
|
Capabilities.bRequiresUEShaderFramebufferFetchDef = FOpenGL::RequiresUEShaderFramebufferFetchDef();
|
|
#endif
|
|
|
|
}
|
|
|
|
void OPENGLDRV_API GLSLToDeviceCompatibleGLSL(FAnsiCharArray& GlslCodeOriginal, const FString& ShaderName, GLenum TypeEnum, const FOpenGLShaderDeviceCapabilities& Capabilities, FAnsiCharArray& GlslCode)
|
|
{
|
|
// Whether shader was compiled for ES 3.1
|
|
const bool bES31 = (FCStringAnsi::Strstr(GlslCodeOriginal.GetData(), "#version 310 es") != nullptr);
|
|
|
|
// Whether we need to emit mobile multi-view code or not.
|
|
const bool bEmitMobileMultiView = (FCStringAnsi::Strstr(GlslCodeOriginal.GetData(), "gl_ViewID_OVR") != nullptr);
|
|
|
|
if (Capabilities.TargetPlatform == EOpenGLShaderTargetPlatform::OGLSTP_Android || Capabilities.TargetPlatform == EOpenGLShaderTargetPlatform::OGLSTP_HTML5)
|
|
{
|
|
if (IsES2Platform(Capabilities.MaxRHIShaderPlatform) && !bES31)
|
|
{
|
|
// #version NNN has to be the first line in the file, so it has to be added before anything else.
|
|
if (Capabilities.bUseES30ShadingLanguage)
|
|
{
|
|
AppendCString(GlslCode, "#version 300 es\n");
|
|
}
|
|
else
|
|
{
|
|
AppendCString(GlslCode, "#version 100\n");
|
|
}
|
|
ReplaceCString(GlslCodeOriginal, "#version 100", "");
|
|
}
|
|
}
|
|
else if (Capabilities.TargetPlatform == EOpenGLShaderTargetPlatform::OGLSTP_Desktop && PLATFORM_MAC)
|
|
{
|
|
AppendCString(GlslCode, "#version 330\n");
|
|
ReplaceCString(GlslCodeOriginal, "#version 150", "");
|
|
}
|
|
else if (Capabilities.TargetPlatform == EOpenGLShaderTargetPlatform::OGLSTP_iOS)
|
|
{
|
|
AppendCString(GlslCode, "#version 100\n");
|
|
ReplaceCString(GlslCodeOriginal, "#version 100", "");
|
|
}
|
|
|
|
if (bEmitMobileMultiView)
|
|
{
|
|
MoveHashLines(GlslCode, GlslCodeOriginal);
|
|
|
|
if (GSupportsMobileMultiView)
|
|
{
|
|
AppendCString(GlslCode, "\n\n");
|
|
AppendCString(GlslCode, "#extension GL_OVR_multiview2 : enable\n");
|
|
AppendCString(GlslCode, "\n\n");
|
|
}
|
|
else
|
|
{
|
|
// Strip out multi-view for devices that don't support it.
|
|
AppendCString(GlslCode, "#define gl_ViewID_OVR 0\n");
|
|
}
|
|
}
|
|
|
|
// Only desktop with separable shader platform can use GL_ARB_separate_shader_objects for reduced shader compile/link hitches
|
|
// however ES3.1 relies on layout(location=) support
|
|
bool const bNeedsBindLocation = OpenGLShaderPlatformNeedsBindLocation(Capabilities.MaxRHIShaderPlatform) && !bES31;
|
|
if (OpenGLShaderPlatformSeparable(Capabilities.MaxRHIShaderPlatform) || !bNeedsBindLocation)
|
|
{
|
|
// Move version tag & extensions before beginning all other operations
|
|
MoveHashLines(GlslCode, GlslCodeOriginal);
|
|
|
|
// OpenGL SM5 shader platforms require location declarations for the layout, but don't necessarily use SSOs
|
|
if (Capabilities.bSupportsSeparateShaderObjects || !bNeedsBindLocation)
|
|
{
|
|
if (Capabilities.TargetPlatform == EOpenGLShaderTargetPlatform::OGLSTP_Desktop)
|
|
{
|
|
AppendCString(GlslCode, "#extension GL_ARB_separate_shader_objects : enable\n");
|
|
AppendCString(GlslCode, "#define INTERFACE_LOCATION(Pos) layout(location=Pos) \n");
|
|
AppendCString(GlslCode, "#define INTERFACE_BLOCK(Pos, Interp, Modifiers, Semantic, PreType, PostType) layout(location=Pos) Interp Modifiers struct { PreType PostType; }\n");
|
|
}
|
|
else
|
|
{
|
|
AppendCString(GlslCode, "#define INTERFACE_LOCATION(Pos) layout(location=Pos) \n");
|
|
AppendCString(GlslCode, "#define INTERFACE_BLOCK(Pos, Interp, Modifiers, Semantic, PreType, PostType) layout(location=Pos) Modifiers Semantic { PreType PostType; }\n");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
AppendCString(GlslCode, "#define INTERFACE_LOCATION(Pos) \n");
|
|
AppendCString(GlslCode, "#define INTERFACE_BLOCK(Pos, Interp, Modifiers, Semantic, PreType, PostType) Modifiers Semantic { Interp PreType PostType; }\n");
|
|
}
|
|
}
|
|
|
|
if (ShaderName.IsEmpty() == false)
|
|
{
|
|
AppendCString(GlslCode, "// ");
|
|
AppendCString(GlslCode, TCHAR_TO_ANSI(ShaderName.GetCharArray().GetData()));
|
|
AppendCString(GlslCode, "\n");
|
|
}
|
|
|
|
if (bEmitMobileMultiView && GSupportsMobileMultiView && TypeEnum == GL_VERTEX_SHADER)
|
|
{
|
|
AppendCString(GlslCode, "\n\n");
|
|
AppendCString(GlslCode, "layout(num_views = 2) in;\n");
|
|
AppendCString(GlslCode, "\n\n");
|
|
}
|
|
|
|
if (Capabilities.bRequiresUEShaderFramebufferFetchDef && TypeEnum == GL_FRAGMENT_SHADER)
|
|
{
|
|
// Some devices (Zenfone5) support GL_EXT_shader_framebuffer_fetch but do not define GL_EXT_shader_framebuffer_fetch in GLSL compiler
|
|
// We can't define anything with GL_, so we use UE_EXT_shader_framebuffer_fetch to enable frame buffer fetch
|
|
AppendCString(GlslCode, "#define UE_EXT_shader_framebuffer_fetch 1\n");
|
|
}
|
|
|
|
if (Capabilities.TargetPlatform == EOpenGLShaderTargetPlatform::OGLSTP_Android)
|
|
{
|
|
// Temporary patch to remove #extension GL_OES_standard_derivaties if not supported
|
|
if (Capabilities.bSupportsStandardDerivativesExtension)
|
|
{
|
|
const ANSICHAR * FoundPointer = FCStringAnsi::Strstr(GlslCodeOriginal.GetData(), "#extension GL_OES_standard_derivatives");
|
|
if (FoundPointer != nullptr)
|
|
{
|
|
// Replace the extension enable with dFdx, dFdy, and fwidth definitions so shader will compile.
|
|
// Currently SimpleElementPixelShader.usf is the most likely place this will come from for mobile
|
|
// as it is used for distance field text rendering (GammaDistanceFieldMain) so use a constant
|
|
// for the texture step rate of 1/512. This will not work for other use cases.
|
|
ReplaceCString(GlslCodeOriginal, "#extension GL_OES_standard_derivatives : enable",
|
|
"#define dFdx(a) (0.001953125)\n"
|
|
"#define dFdy(a) (0.001953125)\n"
|
|
"#define fwidth(a) (0.00390625)\n");
|
|
}
|
|
}
|
|
|
|
if (IsES2Platform(Capabilities.MaxRHIShaderPlatform) && !bES31)
|
|
{
|
|
if (Capabilities.bSupportsRenderTargetFormat_PF_FloatRGBA || !IsMobileHDR())
|
|
{
|
|
AppendCString(GlslCode, "#define HDR_32BPP_ENCODE_MODE 0.0\n");
|
|
}
|
|
else
|
|
{
|
|
if (!Capabilities.bSupportsShaderFramebufferFetch)
|
|
{
|
|
// mosaic
|
|
AppendCString(GlslCode, "#define HDR_32BPP_ENCODE_MODE 1.0\n");
|
|
}
|
|
else
|
|
{
|
|
AppendCString(GlslCode, "#define HDR_32BPP_ENCODE_MODE 2.0\n");
|
|
}
|
|
}
|
|
|
|
if (Capabilities.bRequiresARMShaderFramebufferFetchDepthStencilUndef && TypeEnum == GL_FRAGMENT_SHADER)
|
|
{
|
|
// This is to avoid a bug in Adreno drivers that define GL_ARM_shader_framebuffer_fetch_depth_stencil even when device does not support this extension
|
|
// OpenGL ES 3.1 V@127.0 (GIT@I1af360237c)
|
|
AppendCString(GlslCode, "#undef GL_ARM_shader_framebuffer_fetch_depth_stencil\n");
|
|
}
|
|
|
|
// This #define fixes compiler errors on Android (which doesn't seem to support textureCubeLodEXT)
|
|
if (Capabilities.bUseES30ShadingLanguage)
|
|
{
|
|
if (TypeEnum == GL_VERTEX_SHADER)
|
|
{
|
|
AppendCString(GlslCode,
|
|
"#define texture2D texture \n"
|
|
"#define texture2DProj textureProj \n"
|
|
"#define texture2DLod textureLod \n"
|
|
"#define texture2DLodEXT textureLod \n"
|
|
"#define texture2DProjLod textureProjLod \n"
|
|
"#define textureCube texture \n"
|
|
"#define textureCubeLod textureLod \n"
|
|
"#define textureCubeLodEXT textureLod \n"
|
|
"#define texture3D texture \n"
|
|
"#define texture3DProj textureProj \n"
|
|
"#define texture3DLod textureLod \n");
|
|
|
|
ReplaceCString(GlslCodeOriginal, "attribute", "in");
|
|
ReplaceCString(GlslCodeOriginal, "varying", "out");
|
|
}
|
|
else if (TypeEnum == GL_FRAGMENT_SHADER)
|
|
{
|
|
// #extension directives have to come before any non-# directives. Because
|
|
// we add non-# stuff below and the #extension directives
|
|
// get added to the incoming shader source we move any # directives
|
|
// to be right after the #version to ensure they are always correct.
|
|
MoveHashLines(GlslCode, GlslCodeOriginal);
|
|
|
|
AppendCString(GlslCode,
|
|
"#define texture2D texture \n"
|
|
"#define texture2DProj textureProj \n"
|
|
"#define texture2DLod textureLod \n"
|
|
"#define texture2DLodEXT textureLod \n"
|
|
"#define texture2DProjLod textureProjLod \n"
|
|
"#define textureCube texture \n"
|
|
"#define textureCubeLod textureLod \n"
|
|
"#define textureCubeLodEXT textureLod \n"
|
|
"#define texture3D texture \n"
|
|
"#define texture3DProj textureProj \n"
|
|
"#define texture3DLod textureLod \n"
|
|
"#define texture3DProjLod textureProjLod \n"
|
|
"\n"
|
|
"#define gl_FragColor out_FragColor \n"
|
|
"#ifdef EXT_shader_framebuffer_fetch_enabled \n"
|
|
"inout mediump vec4 out_FragColor; \n"
|
|
"#else \n"
|
|
"out mediump vec4 out_FragColor; \n"
|
|
"#endif \n");
|
|
|
|
ReplaceCString(GlslCodeOriginal, "varying", "in");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (TypeEnum == GL_FRAGMENT_SHADER)
|
|
{
|
|
// Apply #defines to deal with incompatible sections of code
|
|
|
|
if (Capabilities.bRequiresDontEmitPrecisionForTextureSamplers)
|
|
{
|
|
AppendCString(GlslCode,
|
|
"#define DONTEMITSAMPLERDEFAULTPRECISION \n");
|
|
}
|
|
|
|
if (!Capabilities.bSupportsShaderTextureLod || !Capabilities.bSupportsShaderTextureCubeLod)
|
|
{
|
|
AppendCString(GlslCode,
|
|
"#define DONTEMITEXTENSIONSHADERTEXTURELODENABLE \n"
|
|
"#define texture2DLodEXT(a, b, c) texture2D(a, b) \n"
|
|
"#define textureCubeLodEXT(a, b, c) textureCube(a, b) \n");
|
|
}
|
|
else if (Capabilities.bRequiresTextureCubeLodEXTToTextureCubeLodDefine)
|
|
{
|
|
AppendCString(GlslCode,
|
|
"#define textureCubeLodEXT textureCubeLod \n");
|
|
}
|
|
|
|
// Deal with gl_FragCoord using one of the varying vectors and shader possibly exceeding the limit
|
|
if (Capabilities.bRequiresGLFragCoordVaryingLimitHack)
|
|
{
|
|
if (CStringCountOccurances(GlslCodeOriginal, "vec4 var_TEXCOORD") >= Capabilities.MaxVaryingVectors)
|
|
{
|
|
// It is likely gl_FragCoord is used for mosaic color output so use an appropriate constant
|
|
ReplaceCString(GlslCodeOriginal, "gl_FragCoord.xy", "vec2(400.5,240.5)");
|
|
}
|
|
}
|
|
|
|
if (Capabilities.bRequiresTexture2DPrecisionHack)
|
|
{
|
|
AppendCString(GlslCode, "#define TEXCOORDPRECISIONWORKAROUND \n");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (Capabilities.TargetPlatform == EOpenGLShaderTargetPlatform::OGLSTP_HTML5)
|
|
{
|
|
// HTML5 use case is much simpler, use a separate chunk of code from android.
|
|
if (!Capabilities.bSupportsShaderTextureLod)
|
|
{
|
|
AppendCString(GlslCode,
|
|
"#define DONTEMITEXTENSIONSHADERTEXTURELODENABLE \n"
|
|
"#define texture2DLodEXT(a, b, c) texture2D(a, b) \n"
|
|
"#define textureCubeLodEXT(a, b, c) textureCube(a, b) \n");
|
|
}
|
|
}
|
|
|
|
if (FOpenGL::SupportsClipControl())
|
|
{
|
|
AppendCString(GlslCode, "#define HLSLCC_DX11ClipSpace 0 \n");
|
|
}
|
|
else
|
|
{
|
|
AppendCString(GlslCode, "#define HLSLCC_DX11ClipSpace 1 \n");
|
|
}
|
|
|
|
// Append the possibly edited shader to the one we will compile.
|
|
// This is to make it easier to debug as we can see the whole
|
|
// shader source.
|
|
AppendCString(GlslCode, "\n\n");
|
|
AppendCString(GlslCode, GlslCodeOriginal.GetData());
|
|
}
|
|
|
|
/**
|
|
* Helper for constructing strings of the form XXXXX##.
|
|
* @param Str - The string to build.
|
|
* @param Offset - Offset into the string at which to set the number.
|
|
* @param Index - Number to set. Must be in the range [0,100).
|
|
*/
|
|
static ANSICHAR* SetIndex(ANSICHAR* Str, int32 Offset, int32 Index)
|
|
{
|
|
check(Index >= 0 && Index < 100);
|
|
|
|
Str += Offset;
|
|
if (Index >= 10)
|
|
{
|
|
*Str++ = '0' + (ANSICHAR)(Index / 10);
|
|
}
|
|
*Str++ = '0' + (ANSICHAR)(Index % 10);
|
|
*Str = '\0';
|
|
return Str;
|
|
}
|
|
|
|
FVertexShaderRHIRef FOpenGLDynamicRHI::RHICreateVertexShader(const TArray<uint8>& Code)
|
|
{
|
|
return CompileOpenGLShader<FOpenGLVertexShader>(Code);
|
|
}
|
|
|
|
FPixelShaderRHIRef FOpenGLDynamicRHI::RHICreatePixelShader(const TArray<uint8>& Code)
|
|
{
|
|
return CompileOpenGLShader<FOpenGLPixelShader>(Code);
|
|
}
|
|
|
|
FGeometryShaderRHIRef FOpenGLDynamicRHI::RHICreateGeometryShader(const TArray<uint8>& Code)
|
|
{
|
|
return CompileOpenGLShader<FOpenGLGeometryShader>(Code);
|
|
}
|
|
|
|
FHullShaderRHIRef FOpenGLDynamicRHI::RHICreateHullShader(const TArray<uint8>& Code)
|
|
{
|
|
check(GMaxRHIFeatureLevel >= ERHIFeatureLevel::SM5);
|
|
return CompileOpenGLShader<FOpenGLHullShader>(Code);
|
|
}
|
|
|
|
FDomainShaderRHIRef FOpenGLDynamicRHI::RHICreateDomainShader(const TArray<uint8>& Code)
|
|
{
|
|
check(GMaxRHIFeatureLevel >= ERHIFeatureLevel::SM5);
|
|
return CompileOpenGLShader<FOpenGLDomainShader>(Code);
|
|
}
|
|
|
|
FGeometryShaderRHIRef FOpenGLDynamicRHI::RHICreateGeometryShaderWithStreamOutput(const TArray<uint8>& Code, const FStreamOutElementList& ElementList, uint32 NumStrides, const uint32* Strides, int32 RasterizedStream)
|
|
{
|
|
UE_LOG(LogRHI, Fatal,TEXT("OpenGL Render path does not support stream output!"));
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static void MarkShaderParameterCachesDirty(FOpenGLShaderParameterCache* ShaderParameters, bool UpdateCompute)
|
|
{
|
|
const int32 StageStart = UpdateCompute ? CrossCompiler::SHADER_STAGE_COMPUTE : CrossCompiler::SHADER_STAGE_VERTEX;
|
|
const int32 StageEnd = UpdateCompute ? CrossCompiler::NUM_SHADER_STAGES : CrossCompiler::NUM_NON_COMPUTE_SHADER_STAGES;
|
|
for (int32 Stage = StageStart; Stage < StageEnd; ++Stage)
|
|
{
|
|
ShaderParameters[Stage].MarkAllDirty();
|
|
}
|
|
}
|
|
|
|
void FOpenGLDynamicRHI::BindUniformBufferBase(FOpenGLContextState& ContextState, int32 NumUniformBuffers, FUniformBufferRHIRef* BoundUniformBuffers, uint32 FirstUniformBuffer, bool ForceUpdate)
|
|
{
|
|
SCOPE_CYCLE_COUNTER_DETAILED(STAT_OpenGLUniformBindTime);
|
|
checkSlow(IsInRenderingThread());
|
|
for (int32 BufferIndex = 0; BufferIndex < NumUniformBuffers; ++BufferIndex)
|
|
{
|
|
GLuint Buffer = 0;
|
|
uint32 Offset = 0;
|
|
uint32 Size = ZERO_FILLED_DUMMY_UNIFORM_BUFFER_SIZE;
|
|
int32 BindIndex = FirstUniformBuffer + BufferIndex;
|
|
if (IsValidRef(BoundUniformBuffers[BufferIndex]))
|
|
{
|
|
FRHIUniformBuffer* UB = BoundUniformBuffers[BufferIndex].GetReference();
|
|
Buffer = ((FOpenGLUniformBuffer*)UB)->Resource;
|
|
Size = ((FOpenGLUniformBuffer*)UB)->GetSize();
|
|
#if SUBALLOCATED_CONSTANT_BUFFER
|
|
Offset = ((FOpenGLUniformBuffer*)UB)->Offset;
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
if (PendingState.ZeroFilledDummyUniformBuffer == 0)
|
|
{
|
|
void* ZeroBuffer = FMemory::Malloc(ZERO_FILLED_DUMMY_UNIFORM_BUFFER_SIZE);
|
|
FMemory::Memzero(ZeroBuffer,ZERO_FILLED_DUMMY_UNIFORM_BUFFER_SIZE);
|
|
FOpenGL::GenBuffers(1, &PendingState.ZeroFilledDummyUniformBuffer);
|
|
check(PendingState.ZeroFilledDummyUniformBuffer != 0);
|
|
CachedBindUniformBuffer(ContextState,PendingState.ZeroFilledDummyUniformBuffer);
|
|
glBufferData(GL_UNIFORM_BUFFER, ZERO_FILLED_DUMMY_UNIFORM_BUFFER_SIZE, ZeroBuffer, GL_STATIC_DRAW);
|
|
FMemory::Free(ZeroBuffer);
|
|
IncrementBufferMemory(GL_UNIFORM_BUFFER, false, ZERO_FILLED_DUMMY_UNIFORM_BUFFER_SIZE);
|
|
}
|
|
|
|
Buffer = PendingState.ZeroFilledDummyUniformBuffer;
|
|
}
|
|
|
|
if (ForceUpdate || (Buffer != 0 && ContextState.UniformBuffers[BindIndex] != Buffer)|| ContextState.UniformBufferOffsets[BindIndex] != Offset)
|
|
{
|
|
FOpenGL::BindBufferRange(GL_UNIFORM_BUFFER, BindIndex, Buffer, Offset, Size);
|
|
ContextState.UniformBuffers[BindIndex] = Buffer;
|
|
ContextState.UniformBufferOffsets[BindIndex] = Offset;
|
|
ContextState.UniformBufferBound = Buffer; // yes, calling glBindBufferRange also changes uniform buffer binding.
|
|
}
|
|
}
|
|
}
|
|
|
|
// ============================================================================================================================
|
|
|
|
struct FOpenGLUniformName
|
|
{
|
|
FOpenGLUniformName()
|
|
{
|
|
FMemory::Memzero(Buffer);
|
|
}
|
|
|
|
ANSICHAR Buffer[10];
|
|
|
|
friend bool operator ==(const FOpenGLUniformName& A,const FOpenGLUniformName& B)
|
|
{
|
|
return FMemory::Memcmp(A.Buffer, B.Buffer, sizeof(A.Buffer)) == 0;
|
|
}
|
|
|
|
friend uint32 GetTypeHash(const FOpenGLUniformName &Key)
|
|
{
|
|
return FCrc::MemCrc32(Key.Buffer, sizeof(Key.Buffer));
|
|
}
|
|
};
|
|
|
|
static TMap<GLuint, TMap<FOpenGLUniformName, int64>>& GetOpenGLUniformBlockLocations()
|
|
{
|
|
static TMap<GLuint, TMap<FOpenGLUniformName, int64>> UniformBlockLocations;
|
|
return UniformBlockLocations;
|
|
}
|
|
|
|
static TMap<GLuint, TMap<int64, int64>>& GetOpenGLUniformBlockBindings()
|
|
{
|
|
static TMap<GLuint, TMap<int64, int64>> UniformBlockBindings;
|
|
return UniformBlockBindings;
|
|
}
|
|
|
|
static GLuint GetOpenGLProgramUniformBlockIndex(GLuint Program, const FOpenGLUniformName& UniformBlockName)
|
|
{
|
|
TMap<FOpenGLUniformName, int64>& Locations = GetOpenGLUniformBlockLocations().FindOrAdd(Program);
|
|
int64* Location = Locations.Find(UniformBlockName);
|
|
if(Location)
|
|
{
|
|
return *Location;
|
|
}
|
|
else
|
|
{
|
|
int64& Loc = Locations.Emplace(UniformBlockName);
|
|
Loc = (int64)FOpenGL::GetUniformBlockIndex(Program, UniformBlockName.Buffer);
|
|
return Loc;
|
|
}
|
|
}
|
|
|
|
static void GetOpenGLProgramUniformBlockBinding(GLuint Program, GLuint UniformBlockIndex, GLuint UniformBlockBinding)
|
|
{
|
|
TMap<int64, int64>& Bindings = GetOpenGLUniformBlockBindings().FindOrAdd(Program);
|
|
int64* Bind = static_cast<int64 *>(Bindings.Find(UniformBlockIndex));
|
|
if(!Bind)
|
|
{
|
|
Bind = &(Bindings.Emplace(UniformBlockIndex));
|
|
check(Bind);
|
|
*Bind = -1;
|
|
}
|
|
check(Bind);
|
|
if(*Bind != static_cast<int64>(UniformBlockBinding))
|
|
{
|
|
*Bind = static_cast<int64>(UniformBlockBinding);
|
|
FOpenGL::UniformBlockBinding(Program, UniformBlockIndex, UniformBlockBinding);
|
|
}
|
|
}
|
|
|
|
// ============================================================================================================================
|
|
|
|
class FOpenGLLinkedProgram
|
|
{
|
|
public:
|
|
FOpenGLLinkedProgramConfiguration Config;
|
|
|
|
struct FPackedUniformInfo
|
|
{
|
|
GLint Location;
|
|
uint8 ArrayType; // OGL_PACKED_ARRAYINDEX_TYPE
|
|
uint8 Index; // OGL_PACKED_INDEX_TYPE
|
|
};
|
|
|
|
// Holds information needed per stage regarding packed uniform globals and uniform buffers
|
|
struct FStagePackedUniformInfo
|
|
{
|
|
// Packed Uniform Arrays (regular globals); array elements per precision/type
|
|
TArray<FPackedUniformInfo> PackedUniformInfos;
|
|
|
|
// Packed Uniform Buffers; outer array is per Uniform Buffer; inner array is per precision/type
|
|
TArray<TArray<FPackedUniformInfo>> PackedUniformBufferInfos;
|
|
|
|
// Holds the unique ID of the last uniform buffer uploaded to the program; since we don't reuse uniform buffers
|
|
// (can't modify existing ones), we use this as a check for dirty/need to mem copy on Mobile
|
|
TArray<uint32> LastEmulatedUniformBufferSet;
|
|
};
|
|
FStagePackedUniformInfo StagePackedUniformInfo[CrossCompiler::NUM_SHADER_STAGES];
|
|
|
|
GLuint Program;
|
|
bool bUsingTessellation;
|
|
bool bDrawn;
|
|
|
|
TBitArray<> TextureStageNeeds;
|
|
TBitArray<> UAVStageNeeds;
|
|
int32 MaxTextureStage;
|
|
|
|
TArray<FOpenGLBindlessSamplerInfo> Samplers;
|
|
|
|
FOpenGLLinkedProgram()
|
|
: Program(0), bUsingTessellation(false), bDrawn(false), MaxTextureStage(-1)
|
|
{
|
|
TextureStageNeeds.Init( false, FOpenGL::GetMaxCombinedTextureImageUnits() );
|
|
UAVStageNeeds.Init( false, OGL_MAX_COMPUTE_STAGE_UAV_UNITS );
|
|
}
|
|
|
|
~FOpenGLLinkedProgram()
|
|
{
|
|
check(Program);
|
|
FOpenGL::DeleteProgramPipelines(1, &Program);
|
|
|
|
if (!FOpenGL::SupportsSeparateShaderObjects())
|
|
{
|
|
GetOpenGLUniformBlockLocations().Remove(Program);
|
|
GetOpenGLUniformBlockBindings().Remove(Program);
|
|
}
|
|
}
|
|
|
|
// Rebind the uniform blocks when changing the separable shader pipeline as different stages will have different uniform block arrangements. Does nothing for non-separable GLs.
|
|
void VerifyUniformBlockBindings( int Stage, uint32 FirstUniformBuffer );
|
|
|
|
void ConfigureShaderStage( int Stage, uint32 FirstUniformBuffer );
|
|
|
|
// Make sure GlobalArrays (created from shader reflection) matches our info (from the cross compiler)
|
|
static inline void SortPackedUniformInfos(const TArray<FPackedUniformInfo>& ReflectedUniformInfos, const TArray<CrossCompiler::FPackedArrayInfo>& PackedGlobalArrays, TArray<FPackedUniformInfo>& OutPackedUniformInfos)
|
|
{
|
|
check(OutPackedUniformInfos.Num() == 0);
|
|
OutPackedUniformInfos.Empty(PackedGlobalArrays.Num());
|
|
for (int32 Index = 0; Index < PackedGlobalArrays.Num(); ++Index)
|
|
{
|
|
auto& PackedArray = PackedGlobalArrays[Index];
|
|
FPackedUniformInfo OutInfo = {-1, PackedArray.TypeName, CrossCompiler::PACKED_TYPEINDEX_MAX};
|
|
|
|
// Find this Global Array in the reflection list
|
|
for (int32 FindIndex = 0; FindIndex < ReflectedUniformInfos.Num(); ++FindIndex)
|
|
{
|
|
auto& ReflectedInfo = ReflectedUniformInfos[FindIndex];
|
|
if (ReflectedInfo.ArrayType == PackedArray.TypeName)
|
|
{
|
|
OutInfo = ReflectedInfo;
|
|
break;
|
|
}
|
|
}
|
|
|
|
OutPackedUniformInfos.Add(OutInfo);
|
|
}
|
|
}
|
|
};
|
|
|
|
typedef TMap<FOpenGLLinkedProgramConfiguration,FOpenGLLinkedProgram*> FOpenGLProgramsForReuse;
|
|
|
|
static FOpenGLProgramsForReuse& GetOpenGLProgramsCache()
|
|
{
|
|
static FOpenGLProgramsForReuse ProgramsCache;
|
|
return ProgramsCache;
|
|
}
|
|
|
|
// This short queue preceding released programs cache is here because usually the programs are requested again
|
|
// very shortly after they're released, so looking through recently released programs first provides tangible
|
|
// performance improvement.
|
|
|
|
#define LAST_RELEASED_PROGRAMS_CACHE_COUNT 10
|
|
|
|
static FOpenGLLinkedProgram* StaticLastReleasedPrograms[LAST_RELEASED_PROGRAMS_CACHE_COUNT] = { 0 };
|
|
static int32 StaticLastReleasedProgramsIndex = 0;
|
|
|
|
// ============================================================================================================================
|
|
|
|
static int32 CountSetBits(const TBitArray<>& Array)
|
|
{
|
|
int32 Result = 0;
|
|
for (TBitArray<>::FConstIterator BitIt(Array); BitIt; ++BitIt)
|
|
{
|
|
Result += BitIt.GetValue();
|
|
}
|
|
return Result;
|
|
}
|
|
|
|
void FOpenGLLinkedProgram::VerifyUniformBlockBindings( int Stage, uint32 FirstUniformBuffer )
|
|
{
|
|
if ( FOpenGL::SupportsSeparateShaderObjects() && FOpenGL::SupportsUniformBuffers() )
|
|
{
|
|
FOpenGLUniformName Name;
|
|
Name.Buffer[0] = CrossCompiler::ShaderStageIndexToTypeName(Stage);
|
|
Name.Buffer[1] = 'b';
|
|
|
|
GLuint StageProgram = Config.Shaders[Stage].Resource;
|
|
|
|
for (int32 BufferIndex = 0; BufferIndex < Config.Shaders[Stage].Bindings.NumUniformBuffers; ++BufferIndex)
|
|
{
|
|
SetIndex(Name.Buffer, 2, BufferIndex);
|
|
GLint Location = GetOpenGLProgramUniformBlockIndex(StageProgram, Name);
|
|
if (Location >= 0)
|
|
{
|
|
GetOpenGLProgramUniformBlockBinding(StageProgram, Location, FirstUniformBuffer + BufferIndex);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void FOpenGLLinkedProgram::ConfigureShaderStage( int Stage, uint32 FirstUniformBuffer )
|
|
{
|
|
static const GLint FirstTextureUnit[CrossCompiler::NUM_SHADER_STAGES] =
|
|
{
|
|
FOpenGL::GetFirstVertexTextureUnit(),
|
|
FOpenGL::GetFirstPixelTextureUnit(),
|
|
FOpenGL::GetFirstGeometryTextureUnit(),
|
|
FOpenGL::GetFirstHullTextureUnit(),
|
|
FOpenGL::GetFirstDomainTextureUnit(),
|
|
FOpenGL::GetFirstComputeTextureUnit()
|
|
};
|
|
static const GLint FirstUAVUnit[CrossCompiler::NUM_SHADER_STAGES] =
|
|
{
|
|
OGL_UAV_NOT_SUPPORTED_FOR_GRAPHICS_UNIT,
|
|
OGL_UAV_NOT_SUPPORTED_FOR_GRAPHICS_UNIT,
|
|
OGL_UAV_NOT_SUPPORTED_FOR_GRAPHICS_UNIT,
|
|
OGL_UAV_NOT_SUPPORTED_FOR_GRAPHICS_UNIT,
|
|
OGL_UAV_NOT_SUPPORTED_FOR_GRAPHICS_UNIT,
|
|
FOpenGL::GetFirstComputeUAVUnit()
|
|
};
|
|
|
|
// verify that only CS uses UAVs
|
|
check((Stage != CrossCompiler::SHADER_STAGE_COMPUTE) ? (CountSetBits(UAVStageNeeds) == 0) : true);
|
|
|
|
SCOPE_CYCLE_COUNTER(STAT_OpenGLShaderBindParameterTime);
|
|
VERIFY_GL_SCOPE();
|
|
|
|
FOpenGLUniformName Name;
|
|
Name.Buffer[0] = CrossCompiler::ShaderStageIndexToTypeName(Stage);
|
|
|
|
GLuint StageProgram = FOpenGL::SupportsSeparateShaderObjects() ? Config.Shaders[Stage].Resource : Program;
|
|
|
|
// Bind Global uniform arrays (vu_h, pu_i, etc)
|
|
{
|
|
Name.Buffer[1] = 'u';
|
|
Name.Buffer[2] = '_';
|
|
Name.Buffer[3] = 0;
|
|
Name.Buffer[4] = 0;
|
|
|
|
TArray<FPackedUniformInfo> PackedUniformInfos;
|
|
for (uint8 Index = 0; Index < CrossCompiler::PACKED_TYPEINDEX_MAX; ++Index)
|
|
{
|
|
uint8 ArrayIndexType = CrossCompiler::PackedTypeIndexToTypeName(Index);
|
|
Name.Buffer[3] = ArrayIndexType;
|
|
GLint Location = glGetUniformLocation(StageProgram, Name.Buffer);
|
|
if ((int32)Location != -1)
|
|
{
|
|
FPackedUniformInfo Info = {Location, ArrayIndexType, Index};
|
|
PackedUniformInfos.Add(Info);
|
|
}
|
|
}
|
|
|
|
SortPackedUniformInfos(PackedUniformInfos, Config.Shaders[Stage].Bindings.PackedGlobalArrays, StagePackedUniformInfo[Stage].PackedUniformInfos);
|
|
}
|
|
|
|
// Bind uniform buffer packed arrays (vc0_h, pc2_i, etc)
|
|
{
|
|
Name.Buffer[1] = 'c';
|
|
Name.Buffer[2] = 0;
|
|
Name.Buffer[3] = 0;
|
|
Name.Buffer[4] = 0;
|
|
Name.Buffer[5] = 0;
|
|
Name.Buffer[6] = 0;
|
|
for (uint8 UB = 0; UB < Config.Shaders[Stage].Bindings.NumUniformBuffers; ++UB)
|
|
{
|
|
TArray<FPackedUniformInfo> PackedBuffers;
|
|
ANSICHAR* Str = SetIndex(Name.Buffer, 2, UB);
|
|
*Str++ = '_';
|
|
Str[1] = 0;
|
|
for (uint8 Index = 0; Index < CrossCompiler::PACKED_TYPEINDEX_MAX; ++Index)
|
|
{
|
|
uint8 ArrayIndexType = CrossCompiler::PackedTypeIndexToTypeName(Index);
|
|
Str[0] = ArrayIndexType;
|
|
GLint Location = glGetUniformLocation(StageProgram, Name.Buffer);
|
|
if ((int32)Location != -1)
|
|
{
|
|
FPackedUniformInfo Info = {Location, ArrayIndexType, Index};
|
|
PackedBuffers.Add(Info);
|
|
}
|
|
}
|
|
|
|
StagePackedUniformInfo[Stage].PackedUniformBufferInfos.Add(PackedBuffers);
|
|
}
|
|
}
|
|
|
|
// Reserve and setup Space for Emulated Uniform Buffers
|
|
StagePackedUniformInfo[Stage].LastEmulatedUniformBufferSet.Empty(Config.Shaders[Stage].Bindings.NumUniformBuffers);
|
|
StagePackedUniformInfo[Stage].LastEmulatedUniformBufferSet.AddZeroed(Config.Shaders[Stage].Bindings.NumUniformBuffers);
|
|
|
|
// Bind samplers.
|
|
Name.Buffer[1] = 's';
|
|
Name.Buffer[2] = 0;
|
|
Name.Buffer[3] = 0;
|
|
Name.Buffer[4] = 0;
|
|
int32 LastFoundIndex = -1;
|
|
for (int32 SamplerIndex = 0; SamplerIndex < Config.Shaders[Stage].Bindings.NumSamplers; ++SamplerIndex)
|
|
{
|
|
SetIndex(Name.Buffer, 2, SamplerIndex);
|
|
GLint Location = glGetUniformLocation(StageProgram, Name.Buffer);
|
|
if (Location == -1)
|
|
{
|
|
if (LastFoundIndex != -1)
|
|
{
|
|
// It may be an array of samplers. Get the initial element location, if available, and count from it.
|
|
SetIndex(Name.Buffer, 2, LastFoundIndex);
|
|
int32 OffsetOfArraySpecifier = (LastFoundIndex>9)?4:3;
|
|
int32 ArrayIndex = SamplerIndex-LastFoundIndex;
|
|
Name.Buffer[OffsetOfArraySpecifier] = '[';
|
|
ANSICHAR* EndBracket = SetIndex(Name.Buffer, OffsetOfArraySpecifier+1, ArrayIndex);
|
|
*EndBracket++ = ']';
|
|
*EndBracket = 0;
|
|
Location = glGetUniformLocation(StageProgram, Name.Buffer);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
LastFoundIndex = SamplerIndex;
|
|
}
|
|
|
|
if (Location != -1)
|
|
{
|
|
if ( OpenGLConsoleVariables::bBindlessTexture == 0 || !FOpenGL::SupportsBindlessTexture())
|
|
{
|
|
// Non-bindless, setup the unit info
|
|
FOpenGL::ProgramUniform1i(StageProgram, Location, FirstTextureUnit[Stage] + SamplerIndex);
|
|
TextureStageNeeds[ FirstTextureUnit[Stage] + SamplerIndex ] = true;
|
|
MaxTextureStage = FMath::Max( MaxTextureStage, FirstTextureUnit[Stage] + SamplerIndex);
|
|
}
|
|
else
|
|
{
|
|
//Bindless, save off the slot information
|
|
FOpenGLBindlessSamplerInfo Info;
|
|
Info.Handle = Location;
|
|
Info.Slot = FirstTextureUnit[Stage] + SamplerIndex;
|
|
Samplers.Add(Info);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Bind UAVs/images.
|
|
Name.Buffer[1] = 'i';
|
|
Name.Buffer[2] = 0;
|
|
Name.Buffer[3] = 0;
|
|
Name.Buffer[4] = 0;
|
|
int32 LastFoundUAVIndex = -1;
|
|
for (int32 UAVIndex = 0; UAVIndex < Config.Shaders[Stage].Bindings.NumUAVs; ++UAVIndex)
|
|
{
|
|
SetIndex(Name.Buffer, 2, UAVIndex);
|
|
GLint Location = glGetUniformLocation(StageProgram, Name.Buffer);
|
|
if (Location == -1)
|
|
{
|
|
if (LastFoundUAVIndex != -1)
|
|
{
|
|
// It may be an array of UAVs. Get the initial element location, if available, and count from it.
|
|
SetIndex(Name.Buffer, 2, LastFoundUAVIndex);
|
|
int32 OffsetOfArraySpecifier = (LastFoundUAVIndex>9)?4:3;
|
|
int32 ArrayIndex = UAVIndex-LastFoundUAVIndex;
|
|
Name.Buffer[OffsetOfArraySpecifier] = '[';
|
|
ANSICHAR* EndBracket = SetIndex(Name.Buffer, OffsetOfArraySpecifier+1, ArrayIndex);
|
|
*EndBracket++ = ']';
|
|
*EndBracket = '\0';
|
|
Location = glGetUniformLocation(StageProgram, Name.Buffer);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
LastFoundUAVIndex = UAVIndex;
|
|
}
|
|
|
|
if (Location != -1)
|
|
{
|
|
// compute shaders have layout(binding) for images
|
|
// glUniform1i(Location, FirstUAVUnit[Stage] + UAVIndex);
|
|
|
|
UAVStageNeeds[ FirstUAVUnit[Stage] + UAVIndex ] = true;
|
|
}
|
|
}
|
|
|
|
// Bind uniform buffers.
|
|
if (FOpenGL::SupportsUniformBuffers())
|
|
{
|
|
Name.Buffer[1] = 'b';
|
|
Name.Buffer[2] = 0;
|
|
Name.Buffer[3] = 0;
|
|
Name.Buffer[4] = 0;
|
|
for (int32 BufferIndex = 0; BufferIndex < Config.Shaders[Stage].Bindings.NumUniformBuffers; ++BufferIndex)
|
|
{
|
|
SetIndex(Name.Buffer, 2, BufferIndex);
|
|
GLint Location = GetOpenGLProgramUniformBlockIndex(StageProgram, Name);
|
|
if (Location >= 0)
|
|
{
|
|
GetOpenGLProgramUniformBlockBinding(StageProgram, Location, FirstUniformBuffer + BufferIndex);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#if ENABLE_UNIFORM_BUFFER_LAYOUT_VERIFICATION
|
|
|
|
#define ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097 1
|
|
/*
|
|
As of CL 1862097 uniform buffer names are mangled to avoid collisions between variables referenced
|
|
in different shaders of the same program
|
|
|
|
layout(std140) uniform _vb0
|
|
{
|
|
#define View View_vb0
|
|
anon_struct_0000 View;
|
|
};
|
|
|
|
layout(std140) uniform _vb1
|
|
{
|
|
#define Primitive Primitive_vb1
|
|
anon_struct_0001 Primitive;
|
|
};
|
|
*/
|
|
|
|
|
|
struct UniformData
|
|
{
|
|
UniformData(uint32 InOffset, uint32 InArrayElements)
|
|
: Offset(InOffset)
|
|
, ArrayElements(InArrayElements)
|
|
{
|
|
}
|
|
uint32 Offset;
|
|
uint32 ArrayElements;
|
|
|
|
bool operator == (const UniformData& RHS) const
|
|
{
|
|
return Offset == RHS.Offset && ArrayElements == RHS.ArrayElements;
|
|
}
|
|
bool operator != (const UniformData& RHS) const
|
|
{
|
|
return !(*this == RHS);
|
|
}
|
|
};
|
|
#if ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097
|
|
static void VerifyUniformLayout(const FString& BlockName, const TCHAR* UniformName, const UniformData& GLSLUniform)
|
|
#else
|
|
static void VerifyUniformLayout(const TCHAR* UniformName, const UniformData& GLSLUniform)
|
|
#endif //#if ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097
|
|
{
|
|
static TMap<FString, UniformData> Uniforms;
|
|
|
|
if(!Uniforms.Num())
|
|
{
|
|
for (TLinkedList<FUniformBufferStruct*>::TIterator StructIt(FUniformBufferStruct::GetStructList()); StructIt; StructIt.Next())
|
|
{
|
|
#if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
|
|
UE_LOG(LogRHI, Log, TEXT("UniformBufferStruct %s %s %d"),
|
|
StructIt->GetStructTypeName(),
|
|
StructIt->GetShaderVariableName(),
|
|
StructIt->GetSize()
|
|
);
|
|
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
|
|
const TArray<FUniformBufferStruct::FMember>& StructMembers = StructIt->GetMembers();
|
|
for(int32 MemberIndex = 0;MemberIndex < StructMembers.Num();++MemberIndex)
|
|
{
|
|
const FUniformBufferStruct::FMember& Member = StructMembers[MemberIndex];
|
|
|
|
FString BaseTypeName;
|
|
switch(Member.GetBaseType())
|
|
{
|
|
case UBMT_STRUCT: BaseTypeName = TEXT("struct"); break;
|
|
case UBMT_BOOL: BaseTypeName = TEXT("bool"); break;
|
|
case UBMT_INT32: BaseTypeName = TEXT("int"); break;
|
|
case UBMT_UINT32: BaseTypeName = TEXT("uint"); break;
|
|
case UBMT_FLOAT32: BaseTypeName = TEXT("float"); break;
|
|
case UBMT_TEXTURE: BaseTypeName = TEXT("texture"); break;
|
|
case UBMT_SAMPLER: BaseTypeName = TEXT("sampler"); break;
|
|
default: UE_LOG(LogShaders, Fatal,TEXT("Unrecognized uniform buffer struct member base type."));
|
|
};
|
|
#if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
|
|
UE_LOG(LogRHI, Log, TEXT(" +%d %s%dx%d %s[%d]"),
|
|
Member.GetOffset(),
|
|
*BaseTypeName,
|
|
Member.GetNumRows(),
|
|
Member.GetNumColumns(),
|
|
Member.GetName(),
|
|
Member.GetNumElements()
|
|
);
|
|
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
|
|
FString CompositeName = FString(StructIt->GetShaderVariableName()) + TEXT("_") + Member.GetName();
|
|
|
|
// GLSL returns array members with a "[0]" suffix
|
|
if(Member.GetNumElements())
|
|
{
|
|
CompositeName += TEXT("[0]");
|
|
}
|
|
|
|
check(!Uniforms.Contains(CompositeName));
|
|
Uniforms.Add(CompositeName, UniformData(Member.GetOffset(), Member.GetNumElements()));
|
|
}
|
|
}
|
|
}
|
|
|
|
#if ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097
|
|
/* unmangle the uniform name by stripping the block name from it
|
|
|
|
layout(std140) uniform _vb0
|
|
{
|
|
#define View View_vb0
|
|
anon_struct_0000 View;
|
|
};
|
|
*/
|
|
FString RequestedUniformName(UniformName);
|
|
RequestedUniformName = RequestedUniformName.Replace(*BlockName, TEXT(""));
|
|
if(RequestedUniformName.StartsWith(TEXT(".")))
|
|
{
|
|
RequestedUniformName = RequestedUniformName.RightChop(1);
|
|
}
|
|
#else
|
|
FString RequestedUniformName = UniformName;
|
|
#endif
|
|
|
|
const UniformData* FoundUniform = Uniforms.Find(RequestedUniformName);
|
|
|
|
// MaterialTemplate uniform buffer does not have an entry in the FUniformBufferStructs list, so skipping it here
|
|
if(!(RequestedUniformName.StartsWith("Material_") || RequestedUniformName.StartsWith("MaterialCollection")))
|
|
{
|
|
if(!FoundUniform || (*FoundUniform != GLSLUniform))
|
|
{
|
|
UE_LOG(LogRHI, Fatal, TEXT("uniform buffer member %s in the GLSL source doesn't match it's declaration in it's FUniformBufferStruct"), *RequestedUniformName);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void VerifyUniformBufferLayouts(GLuint Program)
|
|
{
|
|
GLint NumBlocks = 0;
|
|
glGetProgramiv(Program, GL_ACTIVE_UNIFORM_BLOCKS, &NumBlocks);
|
|
|
|
#if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
|
|
UE_LOG(LogRHI, Log, TEXT("program %d has %d uniform blocks"), Program, NumBlocks);
|
|
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
|
|
|
|
for(GLint BlockIndex = 0; BlockIndex < NumBlocks; ++BlockIndex)
|
|
{
|
|
const GLsizei BufferSize = 256;
|
|
char Buffer[BufferSize] = {0};
|
|
GLsizei Length = 0;
|
|
|
|
GLint ActiveUniforms = 0;
|
|
GLint BlockBytes = 0;
|
|
|
|
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS, &ActiveUniforms);
|
|
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_DATA_SIZE, &BlockBytes);
|
|
glGetActiveUniformBlockName(Program, BlockIndex, BufferSize, &Length, Buffer);
|
|
|
|
#if ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097
|
|
FString BlockName(Buffer);
|
|
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097
|
|
|
|
FString ReferencedBy;
|
|
{
|
|
GLint ReferencedByVS = 0;
|
|
GLint ReferencedByPS = 0;
|
|
GLint ReferencedByGS = 0;
|
|
GLint ReferencedByHS = 0;
|
|
GLint ReferencedByDS = 0;
|
|
GLint ReferencedByCS = 0;
|
|
|
|
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER, &ReferencedByVS);
|
|
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER, &ReferencedByPS);
|
|
#ifdef GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER
|
|
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER, &ReferencedByGS);
|
|
#endif
|
|
if (GMaxRHIFeatureLevel >= ERHIFeatureLevel::SM5)
|
|
{
|
|
#ifdef GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_CONTROL_SHADER
|
|
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_CONTROL_SHADER, &ReferencedByHS);
|
|
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_EVALUATION_SHADER, &ReferencedByDS);
|
|
#endif
|
|
#ifdef GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER
|
|
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER, &ReferencedByCS);
|
|
#endif
|
|
}
|
|
|
|
if(ReferencedByVS) {ReferencedBy += TEXT("V");}
|
|
if(ReferencedByHS) {ReferencedBy += TEXT("H");}
|
|
if(ReferencedByDS) {ReferencedBy += TEXT("D");}
|
|
if(ReferencedByGS) {ReferencedBy += TEXT("G");}
|
|
if(ReferencedByPS) {ReferencedBy += TEXT("P");}
|
|
if(ReferencedByCS) {ReferencedBy += TEXT("C");}
|
|
}
|
|
#if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
|
|
UE_LOG(LogRHI, Log, TEXT(" [%d] uniform block (%s) = %s, %d active uniforms, %d bytes {"),
|
|
BlockIndex,
|
|
*ReferencedBy,
|
|
ANSI_TO_TCHAR(Buffer),
|
|
ActiveUniforms,
|
|
BlockBytes
|
|
);
|
|
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
|
|
if(ActiveUniforms)
|
|
{
|
|
// the other TArrays copy construct this to get the proper array size
|
|
TArray<GLint> ActiveUniformIndices;
|
|
ActiveUniformIndices.Init(ActiveUniforms);
|
|
|
|
glGetActiveUniformBlockiv(Program, BlockIndex, GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES, ActiveUniformIndices.GetData());
|
|
|
|
TArray<GLint> ActiveUniformOffsets(ActiveUniformIndices);
|
|
glGetActiveUniformsiv(Program, ActiveUniforms, reinterpret_cast<const GLuint*>(ActiveUniformIndices.GetData()), GL_UNIFORM_OFFSET, ActiveUniformOffsets.GetData());
|
|
|
|
TArray<GLint> ActiveUniformSizes(ActiveUniformIndices);
|
|
glGetActiveUniformsiv(Program, ActiveUniforms, reinterpret_cast<const GLuint*>(ActiveUniformIndices.GetData()), GL_UNIFORM_SIZE, ActiveUniformSizes.GetData());
|
|
|
|
TArray<GLint> ActiveUniformTypes(ActiveUniformIndices);
|
|
glGetActiveUniformsiv(Program, ActiveUniforms, reinterpret_cast<const GLuint*>(ActiveUniformIndices.GetData()), GL_UNIFORM_TYPE, ActiveUniformTypes.GetData());
|
|
|
|
TArray<GLint> ActiveUniformArrayStrides(ActiveUniformIndices);
|
|
glGetActiveUniformsiv(Program, ActiveUniforms, reinterpret_cast<const GLuint*>(ActiveUniformIndices.GetData()), GL_UNIFORM_ARRAY_STRIDE, ActiveUniformArrayStrides.GetData());
|
|
|
|
extern const TCHAR* GetGLUniformTypeString( GLint UniformType );
|
|
|
|
for(GLint i = 0; i < ActiveUniformIndices.Num(); ++i)
|
|
{
|
|
const GLint UniformIndex = ActiveUniformIndices[i];
|
|
GLsizei Size = 0;
|
|
GLenum Type = 0;
|
|
glGetActiveUniform(Program, UniformIndex , BufferSize, &Length, &Size, &Type, Buffer);
|
|
|
|
#if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
|
|
UE_LOG(LogRHI, Log, TEXT(" [%d] +%d %s %s %d elements %d array stride"),
|
|
UniformIndex,
|
|
ActiveUniformOffsets[i],
|
|
GetGLUniformTypeString(ActiveUniformTypes[i]),
|
|
ANSI_TO_TCHAR(Buffer),
|
|
ActiveUniformSizes[i],
|
|
ActiveUniformArrayStrides[i]
|
|
);
|
|
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_DUMP
|
|
|
|
const UniformData GLSLUniform
|
|
(
|
|
ActiveUniformOffsets[i],
|
|
ActiveUniformArrayStrides[i] > 0 ? ActiveUniformSizes[i] : 0 // GLSL has 1 as array size for non-array uniforms, but FUniformBufferStruct assumes 0
|
|
);
|
|
#if ENABLE_UNIFORM_BUFFER_LAYOUT_NAME_MANGLING_CL1862097
|
|
VerifyUniformLayout(BlockName, ANSI_TO_TCHAR(Buffer), GLSLUniform);
|
|
#else
|
|
VerifyUniformLayout(ANSI_TO_TCHAR(Buffer), GLSLUniform);
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_VERIFICATION
|
|
|
|
/**
|
|
* Link vertex and pixel shaders in to an OpenGL program.
|
|
*/
|
|
static FOpenGLLinkedProgram* LinkProgram( const FOpenGLLinkedProgramConfiguration& Config)
|
|
{
|
|
ANSICHAR Buf[32] = {0};
|
|
|
|
SCOPE_CYCLE_COUNTER(STAT_OpenGLShaderLinkTime);
|
|
VERIFY_GL_SCOPE();
|
|
|
|
// ensure that compute shaders are always alone
|
|
check( (Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Resource == 0) != (Config.Shaders[CrossCompiler::SHADER_STAGE_COMPUTE].Resource == 0));
|
|
check( (Config.Shaders[CrossCompiler::SHADER_STAGE_PIXEL].Resource == 0) != (Config.Shaders[CrossCompiler::SHADER_STAGE_COMPUTE].Resource == 0));
|
|
|
|
GLuint Program = 0;
|
|
FOpenGL::GenProgramPipelines(1, &Program);
|
|
|
|
bool bShouldLinkProgram = true;
|
|
if (FOpenGLProgramBinaryCache::IsEnabled())
|
|
{
|
|
// Try to create program from a saved binary
|
|
bShouldLinkProgram = !FOpenGLProgramBinaryCache::UseCachedProgram(Program, Config);
|
|
if (bShouldLinkProgram)
|
|
{
|
|
// In case there is no saved binary in the cache, compile required shaders we have deferred before
|
|
FOpenGLProgramBinaryCache::CompilePendingShaders(Config);
|
|
}
|
|
}
|
|
|
|
if (bShouldLinkProgram)
|
|
{
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Resource)
|
|
{
|
|
FOpenGL::UseProgramStages(Program, GL_VERTEX_SHADER_BIT, Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Resource);
|
|
}
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_PIXEL].Resource)
|
|
{
|
|
FOpenGL::UseProgramStages(Program, GL_FRAGMENT_SHADER_BIT, Config.Shaders[CrossCompiler::SHADER_STAGE_PIXEL].Resource);
|
|
}
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_GEOMETRY].Resource)
|
|
{
|
|
FOpenGL::UseProgramStages(Program, GL_GEOMETRY_SHADER_BIT, Config.Shaders[CrossCompiler::SHADER_STAGE_GEOMETRY].Resource);
|
|
}
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_HULL].Resource)
|
|
{
|
|
FOpenGL::UseProgramStages(Program, GL_TESS_CONTROL_SHADER_BIT, Config.Shaders[CrossCompiler::SHADER_STAGE_HULL].Resource);
|
|
}
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_DOMAIN].Resource)
|
|
{
|
|
FOpenGL::UseProgramStages(Program, GL_TESS_EVALUATION_SHADER_BIT, Config.Shaders[CrossCompiler::SHADER_STAGE_DOMAIN].Resource);
|
|
}
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_COMPUTE].Resource)
|
|
{
|
|
FOpenGL::UseProgramStages(Program, GL_COMPUTE_SHADER_BIT, Config.Shaders[CrossCompiler::SHADER_STAGE_COMPUTE].Resource);
|
|
}
|
|
|
|
if( !FOpenGL::SupportsSeparateShaderObjects() )
|
|
{
|
|
// E.g. GLSL_430 uses layout(location=xx) instead of having to call glBindAttribLocation and glBindFragDataLocation
|
|
if (OpenGLShaderPlatformNeedsBindLocation(GMaxRHIShaderPlatform))
|
|
{
|
|
// Bind attribute indices.
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Resource)
|
|
{
|
|
auto& VertexBindings = Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Bindings;
|
|
BindShaderLocations(GL_VERTEX_SHADER, Program, VertexBindings.InOutMask, VertexBindings.VertexAttributeRemap);
|
|
}
|
|
|
|
// Bind frag data locations.
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_PIXEL].Resource)
|
|
{
|
|
BindShaderLocations(GL_FRAGMENT_SHADER, Program, Config.Shaders[CrossCompiler::SHADER_STAGE_PIXEL].Bindings.InOutMask);
|
|
}
|
|
}
|
|
|
|
// Link.
|
|
glLinkProgram(Program);
|
|
|
|
if (FOpenGLProgramBinaryCache::IsEnabled())
|
|
{
|
|
FOpenGLProgramBinaryCache::CacheProgram(Program, Config);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!VerifyProgramPipeline(Program))
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
FOpenGL::BindProgramPipeline(Program);
|
|
|
|
FOpenGLLinkedProgram* LinkedProgram = new FOpenGLLinkedProgram;
|
|
LinkedProgram->Config = Config;
|
|
LinkedProgram->Program = Program;
|
|
LinkedProgram->bUsingTessellation = Config.Shaders[CrossCompiler::SHADER_STAGE_HULL].Resource && Config.Shaders[CrossCompiler::SHADER_STAGE_DOMAIN].Resource;
|
|
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Resource)
|
|
{
|
|
LinkedProgram->ConfigureShaderStage(
|
|
CrossCompiler::SHADER_STAGE_VERTEX,
|
|
OGL_FIRST_UNIFORM_BUFFER
|
|
);
|
|
check(LinkedProgram->StagePackedUniformInfo[CrossCompiler::SHADER_STAGE_VERTEX].PackedUniformInfos.Num() <= Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Bindings.PackedGlobalArrays.Num());
|
|
}
|
|
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_PIXEL].Resource)
|
|
{
|
|
LinkedProgram->ConfigureShaderStage(
|
|
CrossCompiler::SHADER_STAGE_PIXEL,
|
|
OGL_FIRST_UNIFORM_BUFFER +
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Bindings.NumUniformBuffers
|
|
);
|
|
check(LinkedProgram->StagePackedUniformInfo[CrossCompiler::SHADER_STAGE_PIXEL].PackedUniformInfos.Num() <= Config.Shaders[CrossCompiler::SHADER_STAGE_PIXEL].Bindings.PackedGlobalArrays.Num());
|
|
}
|
|
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_GEOMETRY].Resource)
|
|
{
|
|
LinkedProgram->ConfigureShaderStage(
|
|
CrossCompiler::SHADER_STAGE_GEOMETRY,
|
|
OGL_FIRST_UNIFORM_BUFFER +
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Bindings.NumUniformBuffers +
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_PIXEL].Bindings.NumUniformBuffers
|
|
);
|
|
check(LinkedProgram->StagePackedUniformInfo[CrossCompiler::SHADER_STAGE_GEOMETRY].PackedUniformInfos.Num() <= Config.Shaders[CrossCompiler::SHADER_STAGE_GEOMETRY].Bindings.PackedGlobalArrays.Num());
|
|
}
|
|
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_HULL].Resource)
|
|
{
|
|
LinkedProgram->ConfigureShaderStage(
|
|
CrossCompiler::SHADER_STAGE_HULL,
|
|
OGL_FIRST_UNIFORM_BUFFER +
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Bindings.NumUniformBuffers +
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_PIXEL].Bindings.NumUniformBuffers +
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_GEOMETRY].Bindings.NumUniformBuffers
|
|
);
|
|
}
|
|
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_DOMAIN].Resource)
|
|
{
|
|
LinkedProgram->ConfigureShaderStage(
|
|
CrossCompiler::SHADER_STAGE_DOMAIN,
|
|
OGL_FIRST_UNIFORM_BUFFER +
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Bindings.NumUniformBuffers +
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_PIXEL].Bindings.NumUniformBuffers +
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_GEOMETRY].Bindings.NumUniformBuffers +
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_HULL].Bindings.NumUniformBuffers
|
|
);
|
|
}
|
|
|
|
if (Config.Shaders[CrossCompiler::SHADER_STAGE_COMPUTE].Resource)
|
|
{
|
|
LinkedProgram->ConfigureShaderStage(
|
|
CrossCompiler::SHADER_STAGE_COMPUTE,
|
|
OGL_FIRST_UNIFORM_BUFFER
|
|
);
|
|
check(LinkedProgram->StagePackedUniformInfo[CrossCompiler::SHADER_STAGE_COMPUTE].PackedUniformInfos.Num() <= Config.Shaders[CrossCompiler::SHADER_STAGE_COMPUTE].Bindings.PackedGlobalArrays.Num());
|
|
}
|
|
#if ENABLE_UNIFORM_BUFFER_LAYOUT_VERIFICATION
|
|
VerifyUniformBufferLayouts(Program);
|
|
#endif // #if ENABLE_UNIFORM_BUFFER_LAYOUT_VERIFICATION
|
|
return LinkedProgram;
|
|
}
|
|
|
|
FComputeShaderRHIRef FOpenGLDynamicRHI::RHICreateComputeShader(const TArray<uint8>& Code)
|
|
{
|
|
check(GMaxRHIFeatureLevel >= ERHIFeatureLevel::SM5);
|
|
|
|
FOpenGLComputeShader* ComputeShader = CompileOpenGLShader<FOpenGLComputeShader>(Code);
|
|
const ANSICHAR* GlslCode = NULL;
|
|
if (!ComputeShader->bSuccessfullyCompiled)
|
|
{
|
|
#if DEBUG_GL_SHADERS
|
|
GlslCode = ComputeShader->GlslCodeString;
|
|
#endif
|
|
ComputeShader->bSuccessfullyCompiled = VerifyCompiledShader(ComputeShader->Resource, GlslCode);
|
|
}
|
|
|
|
check( ComputeShader != 0);
|
|
|
|
// @todo WARNING: We have to hash here because of the way we immediately link and don't afford the cache a chance to set the OutputHash from ShaderCore.
|
|
if (FShaderCache::GetShaderCache())
|
|
{
|
|
FSHAHash Hash;
|
|
FSHA1::HashBuffer(Code.GetData(), Code.Num(), Hash.Hash);
|
|
ComputeShader->SetHash(Hash);
|
|
}
|
|
|
|
FOpenGLLinkedProgramConfiguration Config;
|
|
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_COMPUTE].Resource = ComputeShader->Resource;
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_COMPUTE].Hash = ComputeShader->GetHash();
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_COMPUTE].Bindings = ComputeShader->Bindings;
|
|
|
|
ComputeShader->LinkedProgram = LinkProgram( Config );
|
|
|
|
if (ComputeShader->LinkedProgram == NULL)
|
|
{
|
|
#if DEBUG_GL_SHADERS
|
|
if (ComputeShader->bSuccessfullyCompiled)
|
|
{
|
|
UE_LOG(LogRHI,Error,TEXT("Compute Shader:\n%s"),ANSI_TO_TCHAR(ComputeShader->GlslCode.GetData()));
|
|
}
|
|
#endif //DEBUG_GL_SHADERS
|
|
checkf(ComputeShader->LinkedProgram, TEXT("Compute shader failed to compile & link."));
|
|
}
|
|
|
|
return ComputeShader;
|
|
}
|
|
|
|
template<class TOpenGLStage>
|
|
static FString GetShaderStageSource(TOpenGLStage* Shader)
|
|
{
|
|
FString Source;
|
|
#if DEBUG_GL_SHADERS
|
|
Source = Shader->GlslCodeString;
|
|
#else
|
|
GLsizei NumShaders = 0;
|
|
glGetProgramiv(Shader->Resource, GL_ATTACHED_SHADERS, (GLint*)&NumShaders);
|
|
if(NumShaders > 0)
|
|
{
|
|
GLuint* Shaders = (GLuint*)alloca(sizeof(GLuint)*NumShaders);
|
|
glGetAttachedShaders(Shader->Resource, NumShaders, &NumShaders, Shaders);
|
|
for(int32 i = 0; i < NumShaders; i++)
|
|
{
|
|
GLint Len = 0;
|
|
glGetShaderiv(Shaders[i], GL_SHADER_SOURCE_LENGTH, &Len);
|
|
if(Len > 0)
|
|
{
|
|
ANSICHAR* Code = new ANSICHAR[Len + 1];
|
|
glGetShaderSource(Shaders[i], Len + 1, &Len, Code);
|
|
Source += Code;
|
|
delete [] Code;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
return Source;
|
|
}
|
|
|
|
// ============================================================================================================================
|
|
|
|
struct FOpenGLShaderVaryingMapping
|
|
{
|
|
FAnsiCharArray Name;
|
|
int32 WriteLoc;
|
|
int32 ReadLoc;
|
|
};
|
|
|
|
typedef TMap<FOpenGLLinkedProgramConfiguration,FOpenGLLinkedProgramConfiguration::ShaderInfo> FOpenGLSeparateShaderObjectCache;
|
|
|
|
static FOpenGLSeparateShaderObjectCache& GetOpenGLSeparateShaderObjectCache()
|
|
{
|
|
static FOpenGLSeparateShaderObjectCache SeparateShaderObjectCache;
|
|
return SeparateShaderObjectCache;
|
|
}
|
|
|
|
template<class TOpenGLStage0, class TOpenGLStage1>
|
|
static void BindShaderStage(FOpenGLLinkedProgramConfiguration::ShaderInfo& ShaderInfo, TOpenGLStage0* NextStage, FOpenGLLinkedProgramConfiguration::ShaderInfo& PrevInfo, TOpenGLStage1* PrevStage)
|
|
{
|
|
check(NextStage && PrevStage);
|
|
|
|
GLuint NextStageResource = NextStage->Resource;
|
|
FOpenGLShaderBindings NextStageBindings = NextStage->Bindings;
|
|
|
|
if ( FOpenGL::SupportsSeparateShaderObjects() )
|
|
{
|
|
FOpenGLLinkedProgramConfiguration Config;
|
|
Config.Shaders[0] = PrevInfo;
|
|
Config.Shaders[1] = ShaderInfo;
|
|
FOpenGLLinkedProgramConfiguration::ShaderInfo* PrevResource = GetOpenGLSeparateShaderObjectCache().Find(Config);
|
|
if(PrevResource)
|
|
{
|
|
PrevInfo.Bindings = PrevResource->Bindings;
|
|
PrevInfo.Resource = PrevResource->Resource;
|
|
}
|
|
else
|
|
{
|
|
FOpenGLShaderBindings& PrevStageBindings = PrevStage->Bindings;
|
|
TMap<FAnsiCharArray, int32> PrevStageVaryings;
|
|
for (int32 i = 0; i < PrevStageBindings.OutputVaryings.Num(); i++)
|
|
{
|
|
FAnsiCharArray Name = PrevStageBindings.OutputVaryings[i].Varying;
|
|
if ( Name.Num() >= 4 && (FCStringAnsi::Strncmp(Name.GetData(), "out_", 4) == 0 || FCStringAnsi::Strncmp(Name.GetData(), "var_", 4) == 0) )
|
|
{
|
|
Name.RemoveAt(0, 4);
|
|
}
|
|
PrevStageVaryings.Add(Name, PrevStageBindings.OutputVaryings[i].Location);
|
|
}
|
|
|
|
bool bInterpolatorMatches = true;
|
|
|
|
TMap<FAnsiCharArray, int32> NextStageVaryings;
|
|
TArray<FString> InputErrors;
|
|
TArray<FOpenGLShaderVaryingMapping> VaryingMapping;
|
|
for (int32 i = 0; i < NextStageBindings.InputVaryings.Num(); i++)
|
|
{
|
|
FAnsiCharArray Name = NextStageBindings.InputVaryings[i].Varying;
|
|
if ( Name.Num() >= 3 && FCStringAnsi::Strncmp(Name.GetData(), "in_", 3) == 0 )
|
|
{
|
|
Name.RemoveAt(0, 3);
|
|
}
|
|
if ( Name.Num() >= 4 && FCStringAnsi::Strncmp(Name.GetData(), "var_", 4) == 0 )
|
|
{
|
|
Name.RemoveAt(0, 4);
|
|
}
|
|
NextStageVaryings.Add(Name, NextStageBindings.InputVaryings[i].Location);
|
|
if( PrevStageVaryings.Contains(Name) )
|
|
{
|
|
int32& PrevLocation = PrevStageVaryings.FindChecked(Name);
|
|
if(PrevLocation != NextStageBindings.InputVaryings[i].Location)
|
|
{
|
|
if(PrevLocation >= 0 && NextStageBindings.InputVaryings[i].Location >= 0)
|
|
{
|
|
FOpenGLShaderVaryingMapping Pair;
|
|
Pair.Name = Name;
|
|
Pair.WriteLoc = PrevLocation;
|
|
Pair.ReadLoc = NextStageBindings.InputVaryings[i].Location;
|
|
VaryingMapping.Add(Pair);
|
|
UE_LOG(LogRHI,Warning,TEXT("Separate Shader Object Binding Warning: Input %s @ %d of stage 0x%x written by stage 0x%x at wrong location %d"), ANSI_TO_TCHAR(NextStageBindings.InputVaryings[i].Varying.GetData()), NextStageBindings.InputVaryings[i].Location, TOpenGLStage0::TypeEnum, TOpenGLStage1::TypeEnum, PrevLocation);
|
|
}
|
|
else if(NextStageBindings.InputVaryings[i].Location == -1)
|
|
{
|
|
InputErrors.Add(FString::Printf(TEXT("Separate Shader Object Binding Error: Input %s of stage 0x%x written by stage 0x%x at location %d, can't be rewritten."), ANSI_TO_TCHAR(NextStageBindings.InputVaryings[i].Varying.GetData()), TOpenGLStage0::TypeEnum, TOpenGLStage1::TypeEnum, PrevLocation));
|
|
}
|
|
else
|
|
{
|
|
InputErrors.Add(FString::Printf(TEXT("Separate Shader Object Binding Error: Input %s @ %d of stage 0x%x written by stage 0x%x without location, can't be rewritten."), ANSI_TO_TCHAR(NextStageBindings.InputVaryings[i].Varying.GetData()), NextStageBindings.InputVaryings[i].Location, TOpenGLStage0::TypeEnum, TOpenGLStage1::TypeEnum));
|
|
}
|
|
bInterpolatorMatches = false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
InputErrors.Add(FString::Printf(TEXT("Separate Shader Object Binding Error: Input %s @ %d of stage 0x%x not written by stage 0x%x"), ANSI_TO_TCHAR(NextStageBindings.InputVaryings[i].Varying.GetData()), NextStageBindings.InputVaryings[i].Location, TOpenGLStage0::TypeEnum, TOpenGLStage1::TypeEnum));
|
|
bInterpolatorMatches = false;
|
|
}
|
|
}
|
|
|
|
TArray<FOpenGLShaderVarying> OutputElimination;
|
|
for (int32 i = 0; i < PrevStageBindings.OutputVaryings.Num(); i++)
|
|
{
|
|
if ( PrevStageBindings.OutputVaryings[i].Location == -1 )
|
|
{
|
|
FAnsiCharArray Name = PrevStageBindings.OutputVaryings[i].Varying;
|
|
if ( Name.Num() >= 4 && (FCStringAnsi::Strncmp(Name.GetData(), "out_", 4) == 0 || FCStringAnsi::Strncmp(Name.GetData(), "var_", 4) == 0) )
|
|
{
|
|
Name.RemoveAt(0, 4);
|
|
}
|
|
if( !NextStageVaryings.Contains(Name) )
|
|
{
|
|
OutputElimination.Add(PrevStageBindings.OutputVaryings[i]);
|
|
UE_LOG(LogRHI,Warning,TEXT("Separate Shader Object Binding Warning: Named output %s of stage 0x%x not read by stage 0x%x"), ANSI_TO_TCHAR(PrevStageBindings.OutputVaryings[i].Varying.GetData()), TOpenGLStage1::TypeEnum, TOpenGLStage0::TypeEnum);
|
|
bInterpolatorMatches = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
if(!bInterpolatorMatches)
|
|
{
|
|
if(InputErrors.Num() == 0)
|
|
{
|
|
FOpenGLCodeHeader Header;
|
|
Header.GlslMarker = 0x474c534c;
|
|
CA_SUPPRESS(6326);
|
|
switch ((int32)TOpenGLStage1::StaticFrequency)
|
|
{
|
|
case SF_Vertex:
|
|
Header.FrequencyMarker = 0x5653;
|
|
break;
|
|
case SF_Pixel:
|
|
Header.FrequencyMarker = 0x5053;
|
|
break;
|
|
case SF_Geometry:
|
|
Header.FrequencyMarker = 0x4753;
|
|
break;
|
|
case SF_Hull:
|
|
Header.FrequencyMarker = 0x4853;
|
|
break;
|
|
case SF_Domain:
|
|
Header.FrequencyMarker = 0x4453;
|
|
break;
|
|
case SF_Compute:
|
|
Header.FrequencyMarker = 0x4353;
|
|
break;
|
|
default:
|
|
UE_LOG(LogRHI, Fatal, TEXT("Invalid shader frequency: %d"), (int32)TOpenGLStage1::StaticFrequency);
|
|
}
|
|
Header.Bindings = PrevStage->Bindings;
|
|
Header.UniformBuffersCopyInfo = PrevStage->UniformBuffersCopyInfo;
|
|
|
|
TArray<FString> PrevLines;
|
|
FString PrevSource = GetShaderStageSource<TOpenGLStage1>(PrevStage);
|
|
PrevSource.ParseIntoArrayLines(PrevLines);
|
|
bool const bOutputElimination = OutputElimination.Num() > 0;
|
|
for(FOpenGLShaderVarying Output : OutputElimination)
|
|
{
|
|
for(int32 i = 0; i < PrevLines.Num(); i++)
|
|
{
|
|
if(PrevLines[i].Contains(Output.Varying.GetData()))
|
|
{
|
|
PrevLines[i].Empty();
|
|
}
|
|
}
|
|
for(int32 i = 0; i < Header.Bindings.OutputVaryings.Num(); i++)
|
|
{
|
|
if(Output == Header.Bindings.OutputVaryings[i])
|
|
{
|
|
Header.Bindings.OutputVaryings.RemoveAt(i);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
OutputElimination.Empty();
|
|
|
|
bool const bVaryingRemapping = VaryingMapping.Num() > 0;
|
|
|
|
if (OutputElimination.Num() == 0 && VaryingMapping.Num() == 0 && (bOutputElimination || bVaryingRemapping))
|
|
{
|
|
FString NewPrevSource;
|
|
for(FString Line : PrevLines)
|
|
{
|
|
if(!Line.IsEmpty())
|
|
{
|
|
NewPrevSource += Line + TEXT("\n");
|
|
}
|
|
}
|
|
|
|
TArray<uint8> Bytes;
|
|
FMemoryWriter Ar(Bytes);
|
|
Ar << Header;
|
|
TArray<ANSICHAR> Chars;
|
|
int32 Len = FCStringAnsi::Strlen(TCHAR_TO_ANSI(*NewPrevSource)) + 1;
|
|
Chars.Append(TCHAR_TO_ANSI(*NewPrevSource), Len);
|
|
Ar.Serialize(Chars.GetData(), Chars.Num());
|
|
|
|
TRefCountPtr<TOpenGLStage1> NewPrev(CompileOpenGLShader<TOpenGLStage1>(Bytes));
|
|
PrevInfo.Bindings = Header.Bindings;
|
|
PrevInfo.Resource = NewPrev->Resource;
|
|
}
|
|
|
|
bInterpolatorMatches = (OutputElimination.Num() == 0 && VaryingMapping.Num() == 0);
|
|
}
|
|
else
|
|
{
|
|
for(int32 i = 0; i < InputErrors.Num(); i++)
|
|
{
|
|
UE_LOG(LogRHI, Error, TEXT("%s"), *InputErrors[i]);
|
|
}
|
|
}
|
|
|
|
if(!bInterpolatorMatches)
|
|
{
|
|
FString PrevShaderStageSource = GetShaderStageSource<TOpenGLStage1>(PrevStage);
|
|
FString NextShaderStageSource = GetShaderStageSource<TOpenGLStage0>(NextStage);
|
|
UE_LOG(LogRHI, Error, TEXT("Separate Shader Object Stage 0x%x:\n%s"), TOpenGLStage1::TypeEnum, *PrevShaderStageSource);
|
|
UE_LOG(LogRHI, Error, TEXT("Separate Shader Object Stage 0x%x:\n%s"), TOpenGLStage0::TypeEnum, *NextShaderStageSource);
|
|
}
|
|
}
|
|
|
|
GetOpenGLSeparateShaderObjectCache().Add(Config, PrevInfo);
|
|
}
|
|
}
|
|
|
|
ShaderInfo.Bindings = NextStageBindings;
|
|
ShaderInfo.Resource = NextStageResource;
|
|
ShaderInfo.Hash = NextStage->GetHash();
|
|
}
|
|
|
|
// ============================================================================================================================
|
|
|
|
FBoundShaderStateRHIRef FOpenGLDynamicRHI::RHICreateBoundShaderState(
|
|
FVertexDeclarationRHIParamRef VertexDeclarationRHI,
|
|
FVertexShaderRHIParamRef VertexShaderRHI,
|
|
FHullShaderRHIParamRef HullShaderRHI,
|
|
FDomainShaderRHIParamRef DomainShaderRHI,
|
|
FPixelShaderRHIParamRef PixelShaderRHI,
|
|
FGeometryShaderRHIParamRef GeometryShaderRHI
|
|
)
|
|
{
|
|
check(IsInRenderingThread());
|
|
|
|
VERIFY_GL_SCOPE();
|
|
|
|
SCOPE_CYCLE_COUNTER(STAT_OpenGLCreateBoundShaderStateTime);
|
|
|
|
if(!PixelShaderRHI)
|
|
{
|
|
// use special null pixel shader when PixelShader was set to NULL
|
|
PixelShaderRHI = TShaderMapRef<FNULLPS>(GetGlobalShaderMap(GMaxRHIFeatureLevel))->GetPixelShader();
|
|
}
|
|
|
|
// Check for an existing bound shader state which matches the parameters
|
|
FCachedBoundShaderStateLink* CachedBoundShaderStateLink = GetCachedBoundShaderState(
|
|
VertexDeclarationRHI,
|
|
VertexShaderRHI,
|
|
PixelShaderRHI,
|
|
HullShaderRHI,
|
|
DomainShaderRHI,
|
|
GeometryShaderRHI
|
|
);
|
|
|
|
if(CachedBoundShaderStateLink)
|
|
{
|
|
// If we've already created a bound shader state with these parameters, reuse it.
|
|
return CachedBoundShaderStateLink->BoundShaderState;
|
|
}
|
|
else
|
|
{
|
|
FOpenGLVertexShader* VertexShader = ResourceCast(VertexShaderRHI);
|
|
FOpenGLPixelShader* PixelShader = ResourceCast(PixelShaderRHI);
|
|
FOpenGLHullShader* HullShader = ResourceCast(HullShaderRHI);
|
|
FOpenGLDomainShader* DomainShader = ResourceCast(DomainShaderRHI);
|
|
FOpenGLGeometryShader* GeometryShader = ResourceCast(GeometryShaderRHI);
|
|
|
|
FOpenGLLinkedProgramConfiguration Config;
|
|
|
|
check(VertexShader);
|
|
check(PixelShader);
|
|
|
|
// Fill-in the configuration
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Bindings = VertexShader->Bindings;
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Resource = VertexShader->Resource;
|
|
Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX].Hash = VertexShader->GetHash();
|
|
|
|
if ( FOpenGL::SupportsTessellation())
|
|
{
|
|
if ( HullShader)
|
|
{
|
|
check(VertexShader);
|
|
BindShaderStage(Config.Shaders[CrossCompiler::SHADER_STAGE_HULL], HullShader, Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX], VertexShader);
|
|
}
|
|
if ( DomainShader)
|
|
{
|
|
check(HullShader);
|
|
BindShaderStage(Config.Shaders[CrossCompiler::SHADER_STAGE_DOMAIN], DomainShader, Config.Shaders[CrossCompiler::SHADER_STAGE_HULL], HullShader);
|
|
}
|
|
}
|
|
|
|
if (GeometryShader)
|
|
{
|
|
check(DomainShader || VertexShader);
|
|
if ( DomainShader )
|
|
{
|
|
BindShaderStage(Config.Shaders[CrossCompiler::SHADER_STAGE_GEOMETRY], GeometryShader, Config.Shaders[CrossCompiler::SHADER_STAGE_DOMAIN], DomainShader);
|
|
}
|
|
else
|
|
{
|
|
BindShaderStage(Config.Shaders[CrossCompiler::SHADER_STAGE_GEOMETRY], GeometryShader, Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX], VertexShader);
|
|
}
|
|
}
|
|
|
|
check(DomainShader || GeometryShader || VertexShader);
|
|
if ( DomainShader )
|
|
{
|
|
BindShaderStage(Config.Shaders[CrossCompiler::SHADER_STAGE_PIXEL], PixelShader, Config.Shaders[CrossCompiler::SHADER_STAGE_DOMAIN], DomainShader);
|
|
}
|
|
else if ( GeometryShader )
|
|
{
|
|
BindShaderStage(Config.Shaders[CrossCompiler::SHADER_STAGE_PIXEL], PixelShader, Config.Shaders[CrossCompiler::SHADER_STAGE_GEOMETRY], GeometryShader);
|
|
}
|
|
else
|
|
{
|
|
BindShaderStage(Config.Shaders[CrossCompiler::SHADER_STAGE_PIXEL], PixelShader, Config.Shaders[CrossCompiler::SHADER_STAGE_VERTEX], VertexShader);
|
|
}
|
|
|
|
// Check if we already have such a program in released programs cache. Use it, if we do.
|
|
FOpenGLLinkedProgram* LinkedProgram = 0;
|
|
|
|
int32 Index = StaticLastReleasedProgramsIndex;
|
|
for( int CacheIndex = 0; CacheIndex < LAST_RELEASED_PROGRAMS_CACHE_COUNT; ++CacheIndex )
|
|
{
|
|
FOpenGLLinkedProgram* Prog = StaticLastReleasedPrograms[Index];
|
|
if( Prog && Prog->Config == Config )
|
|
{
|
|
StaticLastReleasedPrograms[Index] = 0;
|
|
LinkedProgram = Prog;
|
|
break;
|
|
}
|
|
Index = (Index == LAST_RELEASED_PROGRAMS_CACHE_COUNT-1) ? 0 : Index+1;
|
|
}
|
|
|
|
if (!LinkedProgram)
|
|
{
|
|
FOpenGLLinkedProgram** CachedProgram = GetOpenGLProgramsCache().Find( Config);
|
|
|
|
if (CachedProgram)
|
|
{
|
|
LinkedProgram = *CachedProgram;
|
|
}
|
|
else
|
|
{
|
|
// In case ProgramBinaryCache is enabled we defer shader compilation, look LinkProgram
|
|
if (!FOpenGLProgramBinaryCache::IsEnabled())
|
|
{
|
|
const ANSICHAR* GlslCode = NULL;
|
|
if (!VertexShader->bSuccessfullyCompiled)
|
|
{
|
|
#if DEBUG_GL_SHADERS
|
|
GlslCode = VertexShader->GlslCodeString;
|
|
#endif
|
|
VertexShader->bSuccessfullyCompiled = VerifyCompiledShader(VertexShader->Resource, GlslCode);
|
|
}
|
|
if (!PixelShader->bSuccessfullyCompiled)
|
|
{
|
|
#if DEBUG_GL_SHADERS
|
|
GlslCode = PixelShader->GlslCodeString;
|
|
#endif
|
|
PixelShader->bSuccessfullyCompiled = VerifyCompiledShader(PixelShader->Resource, GlslCode);
|
|
}
|
|
if (GeometryShader && !GeometryShader->bSuccessfullyCompiled)
|
|
{
|
|
#if DEBUG_GL_SHADERS
|
|
GlslCode = GeometryShader->GlslCodeString;
|
|
#endif
|
|
GeometryShader->bSuccessfullyCompiled = VerifyCompiledShader(GeometryShader->Resource, GlslCode);
|
|
}
|
|
if (FOpenGL::SupportsTessellation())
|
|
{
|
|
if (HullShader && !HullShader->bSuccessfullyCompiled)
|
|
{
|
|
#if DEBUG_GL_SHADERS
|
|
GlslCode = HullShader->GlslCodeString;
|
|
#endif
|
|
HullShader->bSuccessfullyCompiled = VerifyCompiledShader(HullShader->Resource, GlslCode);
|
|
}
|
|
if (DomainShader && !DomainShader->bSuccessfullyCompiled)
|
|
{
|
|
#if DEBUG_GL_SHADERS
|
|
GlslCode = DomainShader->GlslCodeString;
|
|
#endif
|
|
DomainShader->bSuccessfullyCompiled = VerifyCompiledShader(DomainShader->Resource, GlslCode);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Make sure we have OpenGL context set up, and invalidate the parameters cache and current program (as we'll link a new one soon)
|
|
GetContextStateForCurrentContext().Program = -1;
|
|
MarkShaderParameterCachesDirty(PendingState.ShaderParameters, false);
|
|
|
|
// Link program, using the data provided in config
|
|
LinkedProgram = LinkProgram(Config);
|
|
|
|
// Add this program to the cache
|
|
GetOpenGLProgramsCache().Add(Config,LinkedProgram);
|
|
|
|
if (LinkedProgram == NULL)
|
|
{
|
|
#if DEBUG_GL_SHADERS
|
|
if (VertexShader->bSuccessfullyCompiled)
|
|
{
|
|
UE_LOG(LogRHI,Error,TEXT("Vertex Shader:\n%s"),ANSI_TO_TCHAR(VertexShader->GlslCode.GetData()));
|
|
}
|
|
if (PixelShader->bSuccessfullyCompiled)
|
|
{
|
|
UE_LOG(LogRHI,Error,TEXT("Pixel Shader:\n%s"),ANSI_TO_TCHAR(PixelShader->GlslCode.GetData()));
|
|
}
|
|
if (GeometryShader && GeometryShader->bSuccessfullyCompiled)
|
|
{
|
|
UE_LOG(LogRHI,Error,TEXT("Geometry Shader:\n%s"),ANSI_TO_TCHAR(GeometryShader->GlslCode.GetData()));
|
|
}
|
|
if ( FOpenGL::SupportsTessellation() )
|
|
{
|
|
if (HullShader && HullShader->bSuccessfullyCompiled)
|
|
{
|
|
UE_LOG(LogRHI,Error,TEXT("Hull Shader:\n%s"),ANSI_TO_TCHAR(HullShader->GlslCode.GetData()));
|
|
}
|
|
if (DomainShader && DomainShader->bSuccessfullyCompiled)
|
|
{
|
|
UE_LOG(LogRHI,Error,TEXT("Domain Shader:\n%s"),ANSI_TO_TCHAR(DomainShader->GlslCode.GetData()));
|
|
}
|
|
}
|
|
#endif //DEBUG_GL_SHADERS
|
|
check(LinkedProgram);
|
|
}
|
|
}
|
|
}
|
|
|
|
if(FShaderCache::IsPrebindCall(FShaderCache::GetDefaultCacheState()) && !VertexDeclarationRHI)
|
|
{
|
|
return nullptr;
|
|
}
|
|
else
|
|
{
|
|
check(VertexDeclarationRHI);
|
|
|
|
FOpenGLVertexDeclaration* VertexDeclaration = ResourceCast(VertexDeclarationRHI);
|
|
FOpenGLBoundShaderState* BoundShaderState = new FOpenGLBoundShaderState(
|
|
LinkedProgram,
|
|
VertexDeclarationRHI,
|
|
VertexShaderRHI,
|
|
PixelShaderRHI,
|
|
GeometryShaderRHI,
|
|
HullShaderRHI,
|
|
DomainShaderRHI
|
|
);
|
|
|
|
FShaderCache::LogBoundShaderState(FShaderCache::GetDefaultCacheState(), FOpenGL::GetShaderPlatform(), VertexDeclarationRHI, VertexShaderRHI, PixelShaderRHI, HullShaderRHI, DomainShaderRHI, GeometryShaderRHI, BoundShaderState);
|
|
|
|
return BoundShaderState;
|
|
}
|
|
}
|
|
}
|
|
|
|
void DestroyShadersAndPrograms()
|
|
{
|
|
GetOpenGLUniformBlockLocations().Empty();
|
|
GetOpenGLUniformBlockBindings().Empty();
|
|
|
|
FOpenGLProgramsForReuse& ProgramCache = GetOpenGLProgramsCache();
|
|
|
|
for( TMap<FOpenGLLinkedProgramConfiguration,FOpenGLLinkedProgram*>::TIterator It( ProgramCache ); It; ++It )
|
|
{
|
|
delete It.Value();
|
|
}
|
|
ProgramCache.Empty();
|
|
|
|
StaticLastReleasedProgramsIndex = 0;
|
|
|
|
FOpenGLCompiledShaderCache& ShaderCache = GetOpenGLCompiledShaderCache();
|
|
for( TMap<FOpenGLCompiledShaderKey,GLuint>::TIterator It( ShaderCache ); It; ++It )
|
|
{
|
|
FOpenGL::DeleteShader(It.Value());
|
|
}
|
|
ShaderCache.Empty();
|
|
}
|
|
|
|
struct FSamplerPair
|
|
{
|
|
GLuint Texture;
|
|
GLuint Sampler;
|
|
|
|
friend bool operator ==(const FSamplerPair& A,const FSamplerPair& B)
|
|
{
|
|
return A.Texture == B.Texture && A.Sampler == B.Sampler;
|
|
}
|
|
|
|
friend uint32 GetTypeHash(const FSamplerPair &Key)
|
|
{
|
|
return Key.Texture ^ (Key.Sampler << 18);
|
|
}
|
|
};
|
|
|
|
static TMap<FSamplerPair, GLuint64> BindlessSamplerMap;
|
|
|
|
void FOpenGLDynamicRHI::SetupBindlessTextures( FOpenGLContextState& ContextState, const TArray<FOpenGLBindlessSamplerInfo> &Samplers )
|
|
{
|
|
if ( OpenGLConsoleVariables::bBindlessTexture == 0 || !FOpenGL::SupportsBindlessTexture())
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Bind all textures via Bindless
|
|
for (int32 Texture = 0; Texture < Samplers.Num(); Texture++)
|
|
{
|
|
const FOpenGLBindlessSamplerInfo &Sampler = Samplers[Texture];
|
|
|
|
GLuint64 BindlessSampler = 0xffffffff;
|
|
FSamplerPair Pair;
|
|
Pair.Texture = PendingState.Textures[Sampler.Slot].Resource;
|
|
Pair.Sampler = (PendingState.SamplerStates[Sampler.Slot] != NULL) ? PendingState.SamplerStates[Sampler.Slot]->Resource : 0;
|
|
|
|
if (Pair.Texture)
|
|
{
|
|
// Find Sampler pair
|
|
if ( BindlessSamplerMap.Contains(Pair))
|
|
{
|
|
BindlessSampler = BindlessSamplerMap[Pair];
|
|
}
|
|
else
|
|
{
|
|
// if !found, create
|
|
|
|
if (Pair.Sampler)
|
|
{
|
|
BindlessSampler = FOpenGL::GetTextureSamplerHandle( Pair.Texture, Pair.Sampler);
|
|
}
|
|
else
|
|
{
|
|
BindlessSampler = FOpenGL::GetTextureHandle( Pair.Texture);
|
|
}
|
|
|
|
FOpenGL::MakeTextureHandleResident( BindlessSampler);
|
|
|
|
BindlessSamplerMap.Add( Pair, BindlessSampler);
|
|
}
|
|
|
|
FOpenGL::UniformHandleui64( Sampler.Handle, BindlessSampler);
|
|
}
|
|
}
|
|
}
|
|
|
|
void FOpenGLDynamicRHI::BindPendingShaderState( FOpenGLContextState& ContextState )
|
|
{
|
|
SCOPE_CYCLE_COUNTER_DETAILED(STAT_OpenGLShaderBindTime);
|
|
VERIFY_GL_SCOPE();
|
|
|
|
bool ForceUniformBindingUpdate = false;
|
|
|
|
GLuint PendingProgram = PendingState.BoundShaderState->LinkedProgram->Program;
|
|
if (ContextState.Program != PendingProgram)
|
|
{
|
|
FOpenGL::BindProgramPipeline(PendingProgram);
|
|
ContextState.Program = PendingProgram;
|
|
ContextState.bUsingTessellation = PendingState.BoundShaderState->LinkedProgram->bUsingTessellation;
|
|
MarkShaderParameterCachesDirty(PendingState.ShaderParameters, false);
|
|
//Disable the forced rebinding to reduce driver overhead - required by SSOs
|
|
ForceUniformBindingUpdate = FOpenGL::SupportsSeparateShaderObjects();
|
|
}
|
|
|
|
if (!GUseEmulatedUniformBuffers)
|
|
{
|
|
int32 NextUniformBufferIndex = OGL_FIRST_UNIFORM_BUFFER;
|
|
|
|
int32 NumVertexUniformBuffers = PendingState.BoundShaderState->VertexShader->Bindings.NumUniformBuffers;
|
|
PendingState.BoundShaderState->LinkedProgram->VerifyUniformBlockBindings(CrossCompiler::SHADER_STAGE_VERTEX, NextUniformBufferIndex);
|
|
BindUniformBufferBase(
|
|
ContextState,
|
|
NumVertexUniformBuffers,
|
|
PendingState.BoundUniformBuffers[SF_Vertex],
|
|
NextUniformBufferIndex,
|
|
ForceUniformBindingUpdate);
|
|
NextUniformBufferIndex += NumVertexUniformBuffers;
|
|
|
|
int32 NumPixelUniformBuffers = PendingState.BoundShaderState->PixelShader->Bindings.NumUniformBuffers;
|
|
PendingState.BoundShaderState->LinkedProgram->VerifyUniformBlockBindings(CrossCompiler::SHADER_STAGE_PIXEL, NextUniformBufferIndex);
|
|
BindUniformBufferBase(
|
|
ContextState,
|
|
NumPixelUniformBuffers,
|
|
PendingState.BoundUniformBuffers[SF_Pixel],
|
|
NextUniformBufferIndex,
|
|
ForceUniformBindingUpdate);
|
|
NextUniformBufferIndex += NumPixelUniformBuffers;
|
|
|
|
if (PendingState.BoundShaderState->GeometryShader)
|
|
{
|
|
int32 NumGeometryUniformBuffers = PendingState.BoundShaderState->GeometryShader->Bindings.NumUniformBuffers;
|
|
PendingState.BoundShaderState->LinkedProgram->VerifyUniformBlockBindings(CrossCompiler::SHADER_STAGE_GEOMETRY, NextUniformBufferIndex);
|
|
BindUniformBufferBase(
|
|
ContextState,
|
|
NumGeometryUniformBuffers,
|
|
PendingState.BoundUniformBuffers[SF_Geometry],
|
|
NextUniformBufferIndex,
|
|
ForceUniformBindingUpdate);
|
|
NextUniformBufferIndex += NumGeometryUniformBuffers;
|
|
}
|
|
|
|
if (PendingState.BoundShaderState->HullShader)
|
|
{
|
|
int32 NumHullUniformBuffers = PendingState.BoundShaderState->HullShader->Bindings.NumUniformBuffers;
|
|
PendingState.BoundShaderState->LinkedProgram->VerifyUniformBlockBindings(CrossCompiler::SHADER_STAGE_HULL, NextUniformBufferIndex);
|
|
BindUniformBufferBase(ContextState,
|
|
NumHullUniformBuffers,
|
|
PendingState.BoundUniformBuffers[SF_Hull],
|
|
NextUniformBufferIndex,
|
|
ForceUniformBindingUpdate);
|
|
NextUniformBufferIndex += NumHullUniformBuffers;
|
|
}
|
|
|
|
if (PendingState.BoundShaderState->DomainShader)
|
|
{
|
|
int32 NumDomainUniformBuffers = PendingState.BoundShaderState->DomainShader->Bindings.NumUniformBuffers;
|
|
PendingState.BoundShaderState->LinkedProgram->VerifyUniformBlockBindings(CrossCompiler::SHADER_STAGE_DOMAIN, NextUniformBufferIndex);
|
|
BindUniformBufferBase(ContextState,
|
|
NumDomainUniformBuffers,
|
|
PendingState.BoundUniformBuffers[SF_Domain],
|
|
NextUniformBufferIndex,
|
|
ForceUniformBindingUpdate);
|
|
NextUniformBufferIndex += NumDomainUniformBuffers;
|
|
}
|
|
|
|
SetupBindlessTextures( ContextState, PendingState.BoundShaderState->LinkedProgram->Samplers );
|
|
}
|
|
}
|
|
|
|
FOpenGLBoundShaderState::FOpenGLBoundShaderState(
|
|
FOpenGLLinkedProgram* InLinkedProgram,
|
|
FVertexDeclarationRHIParamRef InVertexDeclarationRHI,
|
|
FVertexShaderRHIParamRef InVertexShaderRHI,
|
|
FPixelShaderRHIParamRef InPixelShaderRHI,
|
|
FGeometryShaderRHIParamRef InGeometryShaderRHI,
|
|
FHullShaderRHIParamRef InHullShaderRHI,
|
|
FDomainShaderRHIParamRef InDomainShaderRHI
|
|
)
|
|
: CacheLink(InVertexDeclarationRHI, InVertexShaderRHI, InPixelShaderRHI,
|
|
InHullShaderRHI, InDomainShaderRHI, InGeometryShaderRHI, this)
|
|
{
|
|
FOpenGLVertexDeclaration* InVertexDeclaration = FOpenGLDynamicRHI::ResourceCast(InVertexDeclarationRHI);
|
|
FOpenGLVertexShader* InVertexShader = FOpenGLDynamicRHI::ResourceCast(InVertexShaderRHI);
|
|
FOpenGLPixelShader* InPixelShader = FOpenGLDynamicRHI::ResourceCast(InPixelShaderRHI);
|
|
FOpenGLHullShader* InHullShader = FOpenGLDynamicRHI::ResourceCast(InHullShaderRHI);
|
|
FOpenGLDomainShader* InDomainShader = FOpenGLDynamicRHI::ResourceCast(InDomainShaderRHI);
|
|
FOpenGLGeometryShader* InGeometryShader = FOpenGLDynamicRHI::ResourceCast(InGeometryShaderRHI);
|
|
|
|
VertexDeclaration = InVertexDeclaration;
|
|
VertexShader = InVertexShader;
|
|
PixelShader = InPixelShader;
|
|
GeometryShader = InGeometryShader;
|
|
|
|
HullShader = InHullShader;
|
|
DomainShader = InDomainShader;
|
|
|
|
LinkedProgram = InLinkedProgram;
|
|
}
|
|
|
|
FOpenGLBoundShaderState::~FOpenGLBoundShaderState()
|
|
{
|
|
check(LinkedProgram);
|
|
FOpenGLLinkedProgram* Prog = StaticLastReleasedPrograms[StaticLastReleasedProgramsIndex];
|
|
StaticLastReleasedPrograms[StaticLastReleasedProgramsIndex++] = LinkedProgram;
|
|
if (StaticLastReleasedProgramsIndex == LAST_RELEASED_PROGRAMS_CACHE_COUNT)
|
|
{
|
|
StaticLastReleasedProgramsIndex = 0;
|
|
}
|
|
OnProgramDeletion(LinkedProgram->Program);
|
|
}
|
|
|
|
bool FOpenGLBoundShaderState::NeedsTextureStage(int32 TextureStageIndex)
|
|
{
|
|
return LinkedProgram->TextureStageNeeds[TextureStageIndex];
|
|
}
|
|
|
|
int32 FOpenGLBoundShaderState::MaxTextureStageUsed()
|
|
{
|
|
return LinkedProgram->MaxTextureStage;
|
|
}
|
|
|
|
bool FOpenGLBoundShaderState::RequiresDriverInstantiation()
|
|
{
|
|
check(LinkedProgram);
|
|
bool const bDrawn = LinkedProgram->bDrawn;
|
|
LinkedProgram->bDrawn = true;
|
|
return !bDrawn;
|
|
}
|
|
|
|
bool FOpenGLComputeShader::NeedsTextureStage(int32 TextureStageIndex)
|
|
{
|
|
return LinkedProgram->TextureStageNeeds[TextureStageIndex];
|
|
}
|
|
|
|
int32 FOpenGLComputeShader::MaxTextureStageUsed()
|
|
{
|
|
return LinkedProgram->MaxTextureStage;
|
|
}
|
|
|
|
bool FOpenGLComputeShader::NeedsUAVStage(int32 UAVStageIndex)
|
|
{
|
|
return LinkedProgram->UAVStageNeeds[UAVStageIndex];
|
|
}
|
|
|
|
void FOpenGLDynamicRHI::BindPendingComputeShaderState(FOpenGLContextState& ContextState, FComputeShaderRHIParamRef ComputeShaderRHI)
|
|
{
|
|
VERIFY_GL_SCOPE();
|
|
FOpenGLComputeShader* ComputeShader = ResourceCast(ComputeShaderRHI);
|
|
bool ForceUniformBindingUpdate = false;
|
|
|
|
GLuint PendingProgram = ComputeShader->LinkedProgram->Program;
|
|
if (ContextState.Program != PendingProgram)
|
|
{
|
|
FOpenGL::BindProgramPipeline(PendingProgram);
|
|
ContextState.Program = PendingProgram;
|
|
MarkShaderParameterCachesDirty(PendingState.ShaderParameters, true);
|
|
ForceUniformBindingUpdate = true;
|
|
}
|
|
|
|
if (!GUseEmulatedUniformBuffers)
|
|
{
|
|
ComputeShader->LinkedProgram->VerifyUniformBlockBindings(CrossCompiler::SHADER_STAGE_COMPUTE, OGL_FIRST_UNIFORM_BUFFER);
|
|
BindUniformBufferBase(
|
|
ContextState,
|
|
ComputeShader->Bindings.NumUniformBuffers,
|
|
PendingState.BoundUniformBuffers[SF_Compute],
|
|
OGL_FIRST_UNIFORM_BUFFER,
|
|
ForceUniformBindingUpdate);
|
|
SetupBindlessTextures( ContextState, ComputeShader->LinkedProgram->Samplers );
|
|
}
|
|
}
|
|
|
|
/** Constructor. */
|
|
FOpenGLShaderParameterCache::FOpenGLShaderParameterCache() :
|
|
GlobalUniformArraySize(-1)
|
|
{
|
|
for (int32 ArrayIndex = 0; ArrayIndex < CrossCompiler::PACKED_TYPEINDEX_MAX; ++ArrayIndex)
|
|
{
|
|
PackedGlobalUniformDirty[ArrayIndex].StartVector = 0;
|
|
PackedGlobalUniformDirty[ArrayIndex].NumVectors = 0;
|
|
}
|
|
}
|
|
|
|
void FOpenGLShaderParameterCache::InitializeResources(int32 UniformArraySize)
|
|
{
|
|
check(GlobalUniformArraySize == -1);
|
|
|
|
// Uniform arrays have to be multiples of float4s.
|
|
UniformArraySize = Align(UniformArraySize,SizeOfFloat4);
|
|
|
|
PackedGlobalUniforms[0] = (uint8*)FMemory::Malloc(UniformArraySize * CrossCompiler::PACKED_TYPEINDEX_MAX);
|
|
PackedUniformsScratch[0] = (uint8*)FMemory::Malloc(UniformArraySize * CrossCompiler::PACKED_TYPEINDEX_MAX);
|
|
|
|
FMemory::Memzero(PackedGlobalUniforms[0], UniformArraySize * CrossCompiler::PACKED_TYPEINDEX_MAX);
|
|
FMemory::Memzero(PackedUniformsScratch[0], UniformArraySize * CrossCompiler::PACKED_TYPEINDEX_MAX);
|
|
for (int32 ArrayIndex = 1; ArrayIndex < CrossCompiler::PACKED_TYPEINDEX_MAX; ++ArrayIndex)
|
|
{
|
|
PackedGlobalUniforms[ArrayIndex] = PackedGlobalUniforms[ArrayIndex - 1] + UniformArraySize;
|
|
PackedUniformsScratch[ArrayIndex] = PackedUniformsScratch[ArrayIndex - 1] + UniformArraySize;
|
|
}
|
|
GlobalUniformArraySize = UniformArraySize;
|
|
|
|
for (int32 ArrayIndex = 0; ArrayIndex < CrossCompiler::PACKED_TYPEINDEX_MAX; ++ArrayIndex)
|
|
{
|
|
PackedGlobalUniformDirty[ArrayIndex].StartVector = 0;
|
|
PackedGlobalUniformDirty[ArrayIndex].NumVectors = UniformArraySize / SizeOfFloat4;
|
|
}
|
|
}
|
|
|
|
/** Destructor. */
|
|
FOpenGLShaderParameterCache::~FOpenGLShaderParameterCache()
|
|
{
|
|
if (GlobalUniformArraySize > 0)
|
|
{
|
|
FMemory::Free(PackedUniformsScratch[0]);
|
|
FMemory::Free(PackedGlobalUniforms[0]);
|
|
}
|
|
|
|
FMemory::Memzero(PackedUniformsScratch);
|
|
FMemory::Memzero(PackedGlobalUniforms);
|
|
|
|
GlobalUniformArraySize = -1;
|
|
}
|
|
|
|
/**
|
|
* Marks all uniform arrays as dirty.
|
|
*/
|
|
void FOpenGLShaderParameterCache::MarkAllDirty()
|
|
{
|
|
for (int32 ArrayIndex = 0; ArrayIndex < CrossCompiler::PACKED_TYPEINDEX_MAX; ++ArrayIndex)
|
|
{
|
|
PackedGlobalUniformDirty[ArrayIndex].StartVector = 0;
|
|
PackedGlobalUniformDirty[ArrayIndex].NumVectors = GlobalUniformArraySize / SizeOfFloat4;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set parameter values.
|
|
*/
|
|
void FOpenGLShaderParameterCache::Set(uint32 BufferIndexName, uint32 ByteOffset, uint32 NumBytes, const void* NewValues)
|
|
{
|
|
uint32 BufferIndex = CrossCompiler::PackedTypeNameToTypeIndex(BufferIndexName);
|
|
check(GlobalUniformArraySize != -1);
|
|
check(BufferIndex < CrossCompiler::PACKED_TYPEINDEX_MAX);
|
|
check(ByteOffset + NumBytes <= (uint32)GlobalUniformArraySize);
|
|
PackedGlobalUniformDirty[BufferIndex].MarkDirtyRange(ByteOffset / SizeOfFloat4, (NumBytes + SizeOfFloat4 - 1) / SizeOfFloat4);
|
|
FMemory::Memcpy(PackedGlobalUniforms[BufferIndex] + ByteOffset, NewValues, NumBytes);
|
|
}
|
|
|
|
/**
|
|
* Commit shader parameters to the currently bound program.
|
|
* @param ParameterTable - Information on the bound uniform arrays for the program.
|
|
*/
|
|
void FOpenGLShaderParameterCache::CommitPackedGlobals(const FOpenGLLinkedProgram* LinkedProgram, int32 Stage)
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_OpenGLUniformCommitTime);
|
|
VERIFY_GL_SCOPE();
|
|
const uint32 BytesPerRegister = 16;
|
|
|
|
/**
|
|
* Note that this always uploads the entire uniform array when it is dirty.
|
|
* The arrays are marked dirty either when the bound shader state changes or
|
|
* a value in the array is modified. OpenGL actually caches uniforms per-
|
|
* program. If we shadowed those per-program uniforms we could avoid calling
|
|
* glUniform4?v for values that have not changed since the last invocation
|
|
* of the program.
|
|
*
|
|
* It's unclear whether the driver does the same thing and whether there is
|
|
* a performance benefit. Even if there is, this type of caching makes any
|
|
* multithreading vastly more difficult, so for now uniforms are not cached
|
|
* per-program.
|
|
*/
|
|
const TArray<FOpenGLLinkedProgram::FPackedUniformInfo>& PackedUniforms = LinkedProgram->StagePackedUniformInfo[Stage].PackedUniformInfos;
|
|
const TArray<CrossCompiler::FPackedArrayInfo>& PackedArrays = LinkedProgram->Config.Shaders[Stage].Bindings.PackedGlobalArrays;
|
|
for (int32 PackedUniform = 0; PackedUniform < PackedUniforms.Num(); ++PackedUniform)
|
|
{
|
|
const FOpenGLLinkedProgram::FPackedUniformInfo& UniformInfo = PackedUniforms[PackedUniform];
|
|
if (UniformInfo.Location < 0)
|
|
{
|
|
// Probably this uniform array was optimized away in a linked program
|
|
continue;
|
|
}
|
|
|
|
const uint32 ArrayIndex = UniformInfo.Index;
|
|
check(ArrayIndex < CrossCompiler::PACKED_TYPEINDEX_MAX);
|
|
const int32 NumVectors = PackedArrays[PackedUniform].Size / BytesPerRegister;
|
|
GLint Location = UniformInfo.Location;
|
|
const void* UniformData = PackedGlobalUniforms[ArrayIndex];
|
|
|
|
// This has to be >=. If LowVector == HighVector it means that particular vector was written to.
|
|
if (PackedGlobalUniformDirty[ArrayIndex].NumVectors > 0)
|
|
{
|
|
const int32 StartVector = PackedGlobalUniformDirty[ArrayIndex].StartVector;
|
|
int32 NumDirtyVectors = FMath::Min((int32)PackedGlobalUniformDirty[ArrayIndex].NumVectors, NumVectors - StartVector);
|
|
check(NumDirtyVectors);
|
|
UniformData = (uint8*)UniformData + StartVector * SizeOfFloat4;
|
|
Location += StartVector;
|
|
switch (UniformInfo.Index)
|
|
{
|
|
case CrossCompiler::PACKED_TYPEINDEX_HIGHP:
|
|
case CrossCompiler::PACKED_TYPEINDEX_MEDIUMP:
|
|
case CrossCompiler::PACKED_TYPEINDEX_LOWP:
|
|
FOpenGL::ProgramUniform4fv(LinkedProgram->Config.Shaders[Stage].Resource, Location, NumDirtyVectors, (GLfloat*)UniformData);
|
|
break;
|
|
|
|
case CrossCompiler::PACKED_TYPEINDEX_INT:
|
|
FOpenGL::ProgramUniform4iv(LinkedProgram->Config.Shaders[Stage].Resource, Location, NumDirtyVectors, (GLint*)UniformData);
|
|
break;
|
|
|
|
case CrossCompiler::PACKED_TYPEINDEX_UINT:
|
|
FOpenGL::ProgramUniform4uiv(LinkedProgram->Config.Shaders[Stage].Resource, Location, NumDirtyVectors, (GLuint*)UniformData);
|
|
break;
|
|
}
|
|
|
|
PackedGlobalUniformDirty[ArrayIndex].StartVector = 0;
|
|
PackedGlobalUniformDirty[ArrayIndex].NumVectors = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
void FOpenGLShaderParameterCache::CommitPackedUniformBuffers(FOpenGLLinkedProgram* LinkedProgram, int32 Stage, FUniformBufferRHIRef* RHIUniformBuffers, const TArray<CrossCompiler::FUniformBufferCopyInfo>& UniformBuffersCopyInfo)
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_OpenGLConstantBufferUpdateTime);
|
|
VERIFY_GL_SCOPE();
|
|
|
|
// Uniform Buffers are split into precision/type; the list of RHI UBs is traversed and if a new one was set, its
|
|
// contents are copied per precision/type into corresponding scratch buffers which are then uploaded to the program
|
|
const FOpenGLShaderBindings& Bindings = LinkedProgram->Config.Shaders[Stage].Bindings;
|
|
check(Bindings.NumUniformBuffers <= FOpenGLRHIState::MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE);
|
|
|
|
if (Bindings.bFlattenUB)
|
|
{
|
|
int32 LastInfoIndex = 0;
|
|
for (int32 BufferIndex = 0; BufferIndex < Bindings.NumUniformBuffers; ++BufferIndex)
|
|
{
|
|
const FOpenGLUniformBuffer* UniformBuffer = (FOpenGLUniformBuffer*)RHIUniformBuffers[BufferIndex].GetReference();
|
|
check(UniformBuffer);
|
|
const uint32* RESTRICT SourceData = UniformBuffer->EmulatedBufferData->Data.GetData();
|
|
for (int32 InfoIndex = LastInfoIndex; InfoIndex < UniformBuffersCopyInfo.Num(); ++InfoIndex)
|
|
{
|
|
const CrossCompiler::FUniformBufferCopyInfo& Info = UniformBuffersCopyInfo[InfoIndex];
|
|
if (Info.SourceUBIndex == BufferIndex)
|
|
{
|
|
check((Info.DestOffsetInFloats + Info.SizeInFloats) * sizeof(float) <= (uint32)GlobalUniformArraySize);
|
|
float* RESTRICT ScratchMem = (float*)PackedGlobalUniforms[Info.DestUBTypeIndex];
|
|
ScratchMem += Info.DestOffsetInFloats;
|
|
FMemory::Memcpy(ScratchMem, SourceData + Info.SourceOffsetInFloats, Info.SizeInFloats * sizeof(float));
|
|
PackedGlobalUniformDirty[Info.DestUBTypeIndex].MarkDirtyRange(Info.DestOffsetInFloats / NumFloatsInFloat4, (Info.SizeInFloats + NumFloatsInFloat4 - 1) / NumFloatsInFloat4);
|
|
}
|
|
else
|
|
{
|
|
LastInfoIndex = InfoIndex;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const auto& PackedUniformBufferInfos = LinkedProgram->StagePackedUniformInfo[Stage].PackedUniformBufferInfos;
|
|
int32 LastCopyInfoIndex = 0;
|
|
auto& EmulatedUniformBufferSet = LinkedProgram->StagePackedUniformInfo[Stage].LastEmulatedUniformBufferSet;
|
|
for (int32 BufferIndex = 0; BufferIndex < Bindings.NumUniformBuffers; ++BufferIndex)
|
|
{
|
|
const FOpenGLUniformBuffer* UniformBuffer = (FOpenGLUniformBuffer*)RHIUniformBuffers[BufferIndex].GetReference();
|
|
check(UniformBuffer);
|
|
if (EmulatedUniformBufferSet[BufferIndex] != UniformBuffer->UniqueID)
|
|
{
|
|
EmulatedUniformBufferSet[BufferIndex] = UniformBuffer->UniqueID;
|
|
|
|
// Go through the list of copy commands and perform the appropriate copy into the scratch buffer
|
|
for (int32 InfoIndex = LastCopyInfoIndex; InfoIndex < UniformBuffersCopyInfo.Num(); ++InfoIndex)
|
|
{
|
|
const CrossCompiler::FUniformBufferCopyInfo& Info = UniformBuffersCopyInfo[InfoIndex];
|
|
if (Info.SourceUBIndex == BufferIndex)
|
|
{
|
|
const uint32* RESTRICT SourceData = UniformBuffer->EmulatedBufferData->Data.GetData();
|
|
SourceData += Info.SourceOffsetInFloats;
|
|
float* RESTRICT ScratchMem = (float*)PackedUniformsScratch[Info.DestUBTypeIndex];
|
|
ScratchMem += Info.DestOffsetInFloats;
|
|
FMemory::Memcpy(ScratchMem, SourceData, Info.SizeInFloats * sizeof(float));
|
|
}
|
|
else if (Info.SourceUBIndex > BufferIndex)
|
|
{
|
|
// Done finding current copies
|
|
LastCopyInfoIndex = InfoIndex;
|
|
break;
|
|
}
|
|
|
|
// keep going since we could have skipped this loop when skipping cached UBs...
|
|
}
|
|
|
|
// Upload the split buffers to the program
|
|
const auto& UniformBufferUploadInfoList = PackedUniformBufferInfos[BufferIndex];
|
|
auto& UBInfo = Bindings.PackedUniformBuffers[BufferIndex];
|
|
for (int32 InfoIndex = 0; InfoIndex < UniformBufferUploadInfoList.Num(); ++InfoIndex)
|
|
{
|
|
const auto& UniformInfo = UniformBufferUploadInfoList[InfoIndex];
|
|
const void* RESTRICT UniformData = PackedUniformsScratch[UniformInfo.Index];
|
|
int32 NumVectors = UBInfo[InfoIndex].Size / SizeOfFloat4;
|
|
check(UniformInfo.ArrayType == UBInfo[InfoIndex].TypeName);
|
|
switch (UniformInfo.Index)
|
|
{
|
|
case CrossCompiler::PACKED_TYPEINDEX_HIGHP:
|
|
case CrossCompiler::PACKED_TYPEINDEX_MEDIUMP:
|
|
case CrossCompiler::PACKED_TYPEINDEX_LOWP:
|
|
FOpenGL::ProgramUniform4fv(LinkedProgram->Config.Shaders[Stage].Resource, UniformInfo.Location, NumVectors, (GLfloat*)UniformData);
|
|
break;
|
|
|
|
case CrossCompiler::PACKED_TYPEINDEX_INT:
|
|
FOpenGL::ProgramUniform4iv(LinkedProgram->Config.Shaders[Stage].Resource, UniformInfo.Location, NumVectors, (GLint*)UniformData);
|
|
break;
|
|
|
|
case CrossCompiler::PACKED_TYPEINDEX_UINT:
|
|
FOpenGL::ProgramUniform4uiv(LinkedProgram->Config.Shaders[Stage].Resource, UniformInfo.Location, NumVectors, (GLuint*)UniformData);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// Currently only Android platform can use binary program cache
|
|
TAutoConsoleVariable<int32> FOpenGLProgramBinaryCache::CVarUseProgramBinaryCache(
|
|
TEXT("r.UseProgramBinaryCache"),
|
|
0,
|
|
TEXT("If true, enables binary program cache"),
|
|
ECVF_ReadOnly | ECVF_RenderThreadSafe
|
|
);
|
|
|
|
FOpenGLProgramBinaryCache* FOpenGLProgramBinaryCache::CachePtr = nullptr;
|
|
|
|
FOpenGLProgramBinaryCache::FOpenGLProgramBinaryCache(const FString& InCachePath)
|
|
: CachePath(InCachePath)
|
|
{
|
|
}
|
|
|
|
FOpenGLProgramBinaryCache::~FOpenGLProgramBinaryCache()
|
|
{
|
|
};
|
|
|
|
bool FOpenGLProgramBinaryCache::IsEnabled()
|
|
{
|
|
return CachePtr != nullptr;
|
|
}
|
|
|
|
void FOpenGLProgramBinaryCache::Initialize()
|
|
{
|
|
check(CachePtr == nullptr);
|
|
// Can be enabled only on Android platform right now
|
|
bool bEnableCache = PLATFORM_ANDROID ? (CVarUseProgramBinaryCache.GetValueOnAnyThread() != 0) : false;
|
|
|
|
if (bEnableCache && FOpenGL::SupportsProgramBinary())
|
|
{
|
|
FString CacheFolderPath;
|
|
#if PLATFORM_ANDROID
|
|
extern FString GExternalFilePath;
|
|
CacheFolderPath = GExternalFilePath / TEXT("ProgramBinaryCache");
|
|
|
|
#else
|
|
CacheFolderPath = FPaths::GameSavedDir() / TEXT("ProgramBinaryCache");
|
|
#endif
|
|
|
|
ANSICHAR* GLVersion = (ANSICHAR*)glGetString(GL_VERSION);
|
|
ANSICHAR* GLRenderer = (ANSICHAR*)glGetString(GL_RENDERER);
|
|
FString HashString;
|
|
HashString.Append(GLVersion);
|
|
HashString.Append(GLRenderer);
|
|
FSHAHash VersionHash;
|
|
FSHA1::HashBuffer(TCHAR_TO_ANSI(*HashString), HashString.Len(), VersionHash.Hash);
|
|
CacheFolderPath = CacheFolderPath / VersionHash.ToString();
|
|
|
|
IPlatformFile& PlatformFile = FPlatformFileManager::Get().GetPlatformFile();
|
|
if (!PlatformFile.CreateDirectoryTree(*CacheFolderPath))
|
|
{
|
|
UE_LOG(LogRHI, Warning, TEXT("Failed to create directory for a program binary cache. Cache will be disabled: %s"), *CacheFolderPath);
|
|
}
|
|
else
|
|
{
|
|
CachePtr = new FOpenGLProgramBinaryCache(CacheFolderPath);
|
|
UE_LOG(LogRHI, Log, TEXT("Using program binary cache: %s"), *CacheFolderPath);
|
|
}
|
|
}
|
|
}
|
|
|
|
void FOpenGLProgramBinaryCache::Shutdown()
|
|
{
|
|
if (CachePtr)
|
|
{
|
|
delete CachePtr;
|
|
CachePtr = nullptr;
|
|
}
|
|
}
|
|
|
|
bool FOpenGLProgramBinaryCache::DeferShaderCompilation(GLuint Shader, const TArray<ANSICHAR>& GlslCode)
|
|
{
|
|
bool bCanDeferShaderCompilation = true;
|
|
#if PLATFORM_ANDROID
|
|
bCanDeferShaderCompilation = !FOpenGL::IsCheckingShaderCompilerHacks();
|
|
#endif
|
|
|
|
if (CachePtr && bCanDeferShaderCompilation)
|
|
{
|
|
FPendingShaderCode PendingShaderCode;
|
|
CompressShader(GlslCode, PendingShaderCode);
|
|
CachePtr->ShadersPendingCompilation.Add(Shader, MoveTemp(PendingShaderCode));
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void FOpenGLProgramBinaryCache::CacheProgram(GLuint Program, const FOpenGLLinkedProgramConfiguration& Config)
|
|
{
|
|
if (CachePtr)
|
|
{
|
|
GLint BinaryLength;
|
|
glGetProgramiv(Program, GL_PROGRAM_BINARY_LENGTH, &BinaryLength);
|
|
if (BinaryLength > 0)
|
|
{
|
|
TArray<uint8> ProgramBinary;
|
|
// BinaryFormat will be stored at the start of ProgramBinary array
|
|
ProgramBinary.SetNumUninitialized(BinaryLength + sizeof(GLenum));
|
|
uint8* ProgramBinaryPtr = ProgramBinary.GetData();
|
|
FOpenGL::GetProgramBinary(Program, BinaryLength, &BinaryLength, (GLenum*)ProgramBinaryPtr, ProgramBinaryPtr + sizeof(GLenum));
|
|
CachePtr->SaveProgramBinary(Config, ProgramBinary);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool FOpenGLProgramBinaryCache::UseCachedProgram(GLuint Program, const FOpenGLLinkedProgramConfiguration& Config)
|
|
{
|
|
if (CachePtr)
|
|
{
|
|
TArray<uint8> ProgramBinary;
|
|
if (CachePtr->LoadProgramBinary(Config, ProgramBinary))
|
|
{
|
|
int32 BinarySize = ProgramBinary.Num();
|
|
uint8* ProgramBinaryPtr = ProgramBinary.GetData();
|
|
// BinaryFormat is stored at the start of ProgramBinary array
|
|
FOpenGL::ProgramBinary(Program, ((GLenum*)ProgramBinaryPtr)[0], ProgramBinaryPtr + sizeof(GLenum), BinarySize - sizeof(GLenum));
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void FOpenGLProgramBinaryCache::CompilePendingShaders(const FOpenGLLinkedProgramConfiguration& Config)
|
|
{
|
|
if (CachePtr)
|
|
{
|
|
for (int32 StageIdx = 0; StageIdx < ARRAY_COUNT(Config.Shaders); ++StageIdx)
|
|
{
|
|
GLuint ShaderResource = Config.Shaders[StageIdx].Resource;
|
|
FPendingShaderCode* PendingShaderCodePtr = CachePtr->ShadersPendingCompilation.Find(ShaderResource);
|
|
if (PendingShaderCodePtr)
|
|
{
|
|
TArray<ANSICHAR> GlslCode;
|
|
UncompressShader(*PendingShaderCodePtr, GlslCode);
|
|
CompileCurrentShader(ShaderResource, GlslCode);
|
|
CachePtr->ShadersPendingCompilation.Remove(ShaderResource);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
FString FOpenGLProgramBinaryCache::GetProgramBinaryFilename(const FOpenGLLinkedProgramConfiguration& Config) const
|
|
{
|
|
FString ProgramFilename = CachePath + TEXT("/");
|
|
for (int32 StageIdx = 0; StageIdx < ARRAY_COUNT(Config.Shaders); StageIdx++)
|
|
{
|
|
if (Config.Shaders[StageIdx].Resource)
|
|
{
|
|
ProgramFilename.Append(Config.Shaders[StageIdx].Hash.ToString());
|
|
}
|
|
}
|
|
|
|
return ProgramFilename;
|
|
}
|
|
|
|
bool FOpenGLProgramBinaryCache::LoadProgramBinary(const FOpenGLLinkedProgramConfiguration& Config, TArray<uint8>& OutBinary) const
|
|
{
|
|
FString ProgramFilename = GetProgramBinaryFilename(Config);
|
|
|
|
IPlatformFile& PlatformFile = FPlatformFileManager::Get().GetPlatformFile();
|
|
TUniquePtr<IFileHandle> FileHandle(PlatformFile.OpenRead(*ProgramFilename));
|
|
if (FileHandle.IsValid())
|
|
{
|
|
int64 BinarySize = FileHandle->Size();
|
|
OutBinary.SetNum((int32)BinarySize);
|
|
FileHandle->Read(OutBinary.GetData(), BinarySize);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void FOpenGLProgramBinaryCache::SaveProgramBinary(const FOpenGLLinkedProgramConfiguration& Config, const TArray<uint8>& InBinary) const
|
|
{
|
|
FString ProgramFilename = GetProgramBinaryFilename(Config);
|
|
|
|
IPlatformFile& PlatformFile = FPlatformFileManager::Get().GetPlatformFile();
|
|
TUniquePtr<IFileHandle> FileHandle(PlatformFile.OpenWrite(*ProgramFilename));
|
|
if (FileHandle.IsValid())
|
|
{
|
|
FileHandle->Write(InBinary.GetData(), InBinary.Num());
|
|
}
|
|
}
|
|
|
|
void FOpenGLProgramBinaryCache::CompressShader(const TArray<ANSICHAR>& InGlslCode, FPendingShaderCode& OutCompressedShader)
|
|
{
|
|
check(InGlslCode.GetTypeSize() == sizeof(uint8));
|
|
check(OutCompressedShader.GlslCode.GetTypeSize() == sizeof(uint8));
|
|
|
|
int32 UncompressedSize = InGlslCode.Num();
|
|
int32 CompressedSize = UncompressedSize * 4.f / 3.f;
|
|
OutCompressedShader.GlslCode.Empty(CompressedSize);
|
|
OutCompressedShader.GlslCode.SetNum(CompressedSize);
|
|
|
|
OutCompressedShader.bCompressed = FCompression::CompressMemory(
|
|
(ECompressionFlags)(COMPRESS_ZLIB | COMPRESS_BiasMemory),
|
|
(void*)OutCompressedShader.GlslCode.GetData(),
|
|
CompressedSize,
|
|
(void*)InGlslCode.GetData(),
|
|
UncompressedSize);
|
|
|
|
if (OutCompressedShader.bCompressed)
|
|
{
|
|
// shrink buffer
|
|
OutCompressedShader.GlslCode.SetNum(CompressedSize, true);
|
|
}
|
|
else
|
|
{
|
|
OutCompressedShader.GlslCode = InGlslCode;
|
|
}
|
|
|
|
OutCompressedShader.UncompressedSize = UncompressedSize;
|
|
|
|
}
|
|
|
|
void FOpenGLProgramBinaryCache::UncompressShader(const FPendingShaderCode& InCompressedShader, TArray<ANSICHAR>& OutGlslCode)
|
|
{
|
|
check(OutGlslCode.GetTypeSize() == sizeof(uint8));
|
|
check(InCompressedShader.GlslCode.GetTypeSize() == sizeof(uint8));
|
|
|
|
if (InCompressedShader.bCompressed)
|
|
{
|
|
int32 UncompressedSize = InCompressedShader.UncompressedSize;
|
|
OutGlslCode.Empty(UncompressedSize);
|
|
OutGlslCode.SetNum(UncompressedSize);
|
|
|
|
bool bResult = FCompression::UncompressMemory(
|
|
(ECompressionFlags)(COMPRESS_ZLIB | COMPRESS_BiasMemory),
|
|
(void*)OutGlslCode.GetData(),
|
|
UncompressedSize,
|
|
(void*)InCompressedShader.GlslCode.GetData(),
|
|
InCompressedShader.GlslCode.Num());
|
|
|
|
check(bResult);
|
|
}
|
|
else
|
|
{
|
|
OutGlslCode = InCompressedShader.GlslCode;
|
|
}
|
|
}
|
|
|