Merge pull request #18642 from hrydgard/vulkan-barrier-optimization

Vulkan barrier optimizations
This commit is contained in:
Henrik Rydgård
2023-12-30 11:14:53 +01:00
committed by GitHub
12 changed files with 283 additions and 170 deletions

View File

@@ -11,3 +11,97 @@ void VulkanBarrier::Flush(VkCommandBuffer cmd) {
dstStageMask_ = 0;
dependencyFlags_ = 0;
}
void VulkanBarrier::TransitionImage(
VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask,
VkImageLayout oldImageLayout, VkImageLayout newImageLayout,
VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask,
VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask
) {
_dbg_assert_(image != VK_NULL_HANDLE);
srcStageMask_ |= srcStageMask;
dstStageMask_ |= dstStageMask;
dependencyFlags_ |= VK_DEPENDENCY_BY_REGION_BIT;
VkImageMemoryBarrier &imageBarrier = imageBarriers_.push_uninitialized();
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
imageBarrier.pNext = nullptr;
imageBarrier.srcAccessMask = srcAccessMask;
imageBarrier.dstAccessMask = dstAccessMask;
imageBarrier.oldLayout = oldImageLayout;
imageBarrier.newLayout = newImageLayout;
imageBarrier.image = image;
imageBarrier.subresourceRange.aspectMask = aspectMask;
imageBarrier.subresourceRange.baseMipLevel = baseMip;
imageBarrier.subresourceRange.levelCount = numMipLevels;
imageBarrier.subresourceRange.layerCount = numLayers; // NOTE: We could usually use VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS, but really old Mali drivers have problems with those.
imageBarrier.subresourceRange.baseArrayLayer = 0;
imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
}
void VulkanBarrier::TransitionImageAuto(
VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask,
VkImageLayout oldImageLayout, VkImageLayout newImageLayout) {
_dbg_assert_(image != VK_NULL_HANDLE);
VkAccessFlags srcAccessMask = 0;
VkAccessFlags dstAccessMask = 0;
switch (oldImageLayout) {
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
// Assert aspect here?
srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
// Assert aspect here?
srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
break;
case VK_IMAGE_LAYOUT_UNDEFINED:
// Actually this seems wrong?
if (aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
}
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
default:
_assert_msg_(false, "Unexpected oldLayout: %d", (int)oldImageLayout);
break;
}
switch (newImageLayout) {
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
dstStageMask_ |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
break;
default:
_assert_msg_(false, "Unexpected newLayout: %d", (int)newImageLayout);
break;
}
VkImageMemoryBarrier &imageBarrier = imageBarriers_.push_uninitialized();
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
imageBarrier.pNext = nullptr;
imageBarrier.srcAccessMask = srcAccessMask;
imageBarrier.dstAccessMask = dstAccessMask;
imageBarrier.oldLayout = oldImageLayout;
imageBarrier.newLayout = newImageLayout;
imageBarrier.image = image;
imageBarrier.subresourceRange.aspectMask = aspectMask;
imageBarrier.subresourceRange.baseMipLevel = baseMip;
imageBarrier.subresourceRange.levelCount = numMipLevels;
imageBarrier.subresourceRange.layerCount = numLayers; // NOTE: We could usually use VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS, but really old Mali drivers have problems with those.
imageBarrier.subresourceRange.baseArrayLayer = 0;
imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
}

View File

@@ -6,9 +6,54 @@
#include "Common/Log.h"
#include "Common/GPU/Vulkan/VulkanLoader.h"
#include "Common/Data/Collections/FastVec.h"
#include "Common/Data/Collections/TinySet.h"
class VulkanContext;
class VulkanBarrierBatch {
public:
~VulkanBarrierBatch() {
_dbg_assert_(imageBarriers_.empty());
}
VkImageMemoryBarrier *Add(VkImage image, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags) {
srcStageMask_ |= srcStageMask;
dstStageMask_ |= dstStageMask;
dependencyFlags_ |= dependencyFlags;
VkImageMemoryBarrier &barrier = imageBarriers_.push_uninitialized();
// Initialize good defaults for the usual things.
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
barrier.subresourceRange.baseArrayLayer = 0;
barrier.subresourceRange.baseMipLevel = 0;
barrier.subresourceRange.layerCount = 1;
barrier.subresourceRange.levelCount = 1;
barrier.image = image;
return &barrier;
}
void Flush(VkCommandBuffer cmd) {
if (!imageBarriers_.empty()) {
vkCmdPipelineBarrier(cmd, srcStageMask_, dstStageMask_, dependencyFlags_, 0, nullptr, 0, nullptr, (uint32_t)imageBarriers_.size(), imageBarriers_.data());
imageBarriers_.clear();
srcStageMask_ = 0;
dstStageMask_ = 0;
dependencyFlags_ = 0;
}
}
bool empty() const { return imageBarriers_.empty(); }
private:
FastVec<VkImageMemoryBarrier> imageBarriers_;
VkPipelineStageFlags srcStageMask_ = 0;
VkPipelineStageFlags dstStageMask_ = 0;
VkDependencyFlags dependencyFlags_ = 0;
};
// Collects multiple barriers into one, then flushes it.
// Reusable after a flush, in case you want to reuse the allocation made by the vector.
// However, not thread safe in any way!
@@ -16,101 +61,19 @@ class VulkanBarrier {
public:
VulkanBarrier() : imageBarriers_(4) {}
bool empty() const { return imageBarriers_.empty(); }
void TransitionImage(
VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask,
VkImageLayout oldImageLayout, VkImageLayout newImageLayout,
VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask,
VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask
) {
_dbg_assert_(image != VK_NULL_HANDLE);
srcStageMask_ |= srcStageMask;
dstStageMask_ |= dstStageMask;
dependencyFlags_ |= VK_DEPENDENCY_BY_REGION_BIT;
VkImageMemoryBarrier &imageBarrier = imageBarriers_.push_uninitialized();
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
imageBarrier.pNext = nullptr;
imageBarrier.srcAccessMask = srcAccessMask;
imageBarrier.dstAccessMask = dstAccessMask;
imageBarrier.oldLayout = oldImageLayout;
imageBarrier.newLayout = newImageLayout;
imageBarrier.image = image;
imageBarrier.subresourceRange.aspectMask = aspectMask;
imageBarrier.subresourceRange.baseMipLevel = baseMip;
imageBarrier.subresourceRange.levelCount = numMipLevels;
imageBarrier.subresourceRange.layerCount = numLayers; // NOTE: We could usually use VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS, but really old Mali drivers have problems with those.
imageBarrier.subresourceRange.baseArrayLayer = 0;
imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
}
);
// Automatically determines access and stage masks from layouts.
// Not universally usable, but works for PPSSPP's use.
void TransitionImageAuto(
VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask, VkImageLayout oldImageLayout, VkImageLayout newImageLayout
) {
_dbg_assert_(image != VK_NULL_HANDLE);
VkAccessFlags srcAccessMask = 0;
VkAccessFlags dstAccessMask = 0;
switch (oldImageLayout) {
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
// Assert aspect here?
srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
// Assert aspect here?
srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
break;
case VK_IMAGE_LAYOUT_UNDEFINED:
// Actually this seems wrong?
if (aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
}
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
srcStageMask_ |= VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
default:
_assert_msg_(false, "Unexpected oldLayout: %d", (int)oldImageLayout);
break;
}
switch (newImageLayout) {
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
dstStageMask_ |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
break;
default:
_assert_msg_(false, "Unexpected newLayout: %d", (int)newImageLayout);
break;
}
VkImageMemoryBarrier &imageBarrier = imageBarriers_.push_uninitialized();
imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
imageBarrier.pNext = nullptr;
imageBarrier.srcAccessMask = srcAccessMask;
imageBarrier.dstAccessMask = dstAccessMask;
imageBarrier.oldLayout = oldImageLayout;
imageBarrier.newLayout = newImageLayout;
imageBarrier.image = image;
imageBarrier.subresourceRange.aspectMask = aspectMask;
imageBarrier.subresourceRange.baseMipLevel = baseMip;
imageBarrier.subresourceRange.levelCount = numMipLevels;
imageBarrier.subresourceRange.layerCount = numLayers; // NOTE: We could usually use VK_REMAINING_ARRAY_LAYERS/VK_REMAINING_MIP_LEVELS, but really old Mali drivers have problems with those.
imageBarrier.subresourceRange.baseArrayLayer = 0;
imageBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
}
void TransitionImageAuto(VkImage image, int baseMip, int numMipLevels, int numLayers, VkImageAspectFlags aspectMask,
VkImageLayout oldImageLayout, VkImageLayout newImageLayout);
void Flush(VkCommandBuffer cmd);

View File

@@ -87,6 +87,15 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
// False positive
// https://github.com/KhronosGroup/Vulkan-ValidationLayers/issues/3615
return false;
case 1835555994: // [AMD] [NVIDIA] Performance warning : Pipeline VkPipeline 0xa808d50000000033[global_texcolor] was bound twice in the frame.
// Benign perf warnings.
return false;
case 1810669668:
// Clear value but no LOAD_OP_CLEAR. Not worth fixing right now.
return false;
default:
break;
}

View File

@@ -56,23 +56,23 @@ void VKRImage::Delete(VulkanContext *vulkan) {
}
}
VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag)
VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VulkanBarrierBatch *barriers, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag)
: vulkan_(vk), tag_(tag), width(_width), height(_height), numLayers(_numLayers) {
_dbg_assert_(tag);
CreateImage(vulkan_, initCmd, color, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
CreateImage(vulkan_, barriers, initCmd, color, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
if (createDepthStencilBuffer) {
CreateImage(vulkan_, initCmd, depth, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
CreateImage(vulkan_, barriers, initCmd, depth, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
}
if (_multiSampleLevel > 0) {
sampleCount = MultiSampleLevelToFlagBits(_multiSampleLevel);
// TODO: Create a different tag for these?
CreateImage(vulkan_, initCmd, msaaColor, width, height, numLayers, sampleCount, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
CreateImage(vulkan_, barriers, initCmd, msaaColor, width, height, numLayers, sampleCount, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
if (createDepthStencilBuffer) {
CreateImage(vulkan_, initCmd, msaaDepth, width, height, numLayers, sampleCount, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
CreateImage(vulkan_, barriers, initCmd, msaaDepth, width, height, numLayers, sampleCount, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
}
} else {
sampleCount = VK_SAMPLE_COUNT_1_BIT;
@@ -161,7 +161,7 @@ VKRFramebuffer::~VKRFramebuffer() {
// NOTE: If numLayers > 1, it will create an array texture, rather than a normal 2D texture.
// This requires a different sampling path!
void VKRFramebuffer::CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkSampleCountFlagBits sampleCount, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag) {
void VKRFramebuffer::CreateImage(VulkanContext *vulkan, VulkanBarrierBatch *barriers, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkSampleCountFlagBits sampleCount, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag) {
// We don't support more exotic layer setups for now. Mono or stereo.
_dbg_assert_(numLayers == 1 || numLayers == 2);
@@ -251,10 +251,14 @@ void VKRFramebuffer::CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKR
return;
}
TransitionImageLayout2(cmd, img.image, 0, 1, numLayers, aspects,
VK_IMAGE_LAYOUT_UNDEFINED, initialLayout,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dstStage,
0, dstAccessMask);
VkImageMemoryBarrier *barrier = barriers->Add(img.image, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dstStage, 0);
barrier->subresourceRange.layerCount = numLayers;
barrier->subresourceRange.aspectMask = aspects;
barrier->oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
barrier->newLayout = initialLayout;
barrier->srcAccessMask = 0;
barrier->dstAccessMask = dstAccessMask;
img.layout = initialLayout;
img.format = format;
img.sampleCount = sampleCount;

View File

@@ -4,6 +4,7 @@
#include "Common/GPU/Vulkan/VulkanContext.h"
class VKRRenderPass;
class VulkanBarrierBatch;
// Pipelines need to be created for the right type of render pass.
// TODO: Rename to RenderPassFlags?
@@ -57,7 +58,7 @@ struct VKRImage {
class VKRFramebuffer {
public:
VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag);
VKRFramebuffer(VulkanContext *vk, VulkanBarrierBatch *barriers, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag);
~VKRFramebuffer();
VkFramebuffer Get(VKRRenderPass *compatibleRenderPass, RenderPassType rpType);
@@ -94,7 +95,7 @@ public:
VulkanContext *Vulkan() const { return vulkan_; }
private:
static void CreateImage(VulkanContext *vulkan, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkSampleCountFlagBits sampleCount, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag);
static void CreateImage(VulkanContext *vulkan, VulkanBarrierBatch *barriers, VkCommandBuffer cmd, VKRImage &img, int width, int height, int numLayers, VkSampleCountFlagBits sampleCount, VkFormat format, VkImageLayout initialLayout, bool color, const char *tag);
VkFramebuffer framebuf[(size_t)RenderPassType::TYPE_COUNT]{};

View File

@@ -5,6 +5,7 @@
#include "Common/GPU/Vulkan/VulkanAlloc.h"
#include "Common/GPU/Vulkan/VulkanImage.h"
#include "Common/GPU/Vulkan/VulkanMemory.h"
#include "Common/GPU/Vulkan/VulkanBarrier.h"
#include "Common/StringUtils.h"
using namespace PPSSPP_VK;
@@ -90,20 +91,21 @@ bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, int w, int h, int depth, i
vulkan_->SetDebugName(image_, VK_OBJECT_TYPE_IMAGE, tag_);
// Write a command to transition the image to the requested layout, if it's not already that layout.
// TODO: We may generate mipmaps right after, so can't add to the end of frame batch. Well actually depending
// on the amount of mips we probably sometimes can..
if (initialLayout != VK_IMAGE_LAYOUT_UNDEFINED && initialLayout != VK_IMAGE_LAYOUT_PREINITIALIZED) {
VkPipelineStageFlags dstStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkAccessFlagBits dstAccessFlags;
switch (initialLayout) {
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
TransitionImageLayout2(cmd, image_, 0, numMips, 1, VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_UNDEFINED, initialLayout,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, VK_ACCESS_TRANSFER_WRITE_BIT);
dstStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
dstAccessFlags = VK_ACCESS_TRANSFER_WRITE_BIT;
break;
case VK_IMAGE_LAYOUT_GENERAL:
// We use this initial layout when we're about to write to the image using a compute shader, only.
TransitionImageLayout2(cmd, image_, 0, numMips, 1, VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_UNDEFINED, initialLayout,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, VK_ACCESS_SHADER_WRITE_BIT);
dstStage = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
dstAccessFlags = VK_ACCESS_SHADER_READ_BIT;
break;
default:
// If you planned to use UploadMip, you want VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL. After the
@@ -111,6 +113,10 @@ bool VulkanTexture::CreateDirect(VkCommandBuffer cmd, int w, int h, int depth, i
_assert_(false);
break;
}
TransitionImageLayout2(cmd, image_, 0, numMips, 1, VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_UNDEFINED, initialLayout,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dstStage,
0, dstAccessFlags);
}
// Create the view while we're at it.
@@ -205,24 +211,29 @@ void VulkanTexture::GenerateMips(VkCommandBuffer cmd, int firstMipToGenerate, bo
_assert_msg_(firstMipToGenerate > 0, "Cannot generate the first level");
_assert_msg_(firstMipToGenerate < numMips_, "Can't generate levels beyond storage");
VulkanBarrierBatch batch;
// Transition the pre-set levels to GENERAL.
TransitionImageLayout2(cmd, image_, 0, firstMipToGenerate, 1, VK_IMAGE_ASPECT_COLOR_BIT,
fromCompute ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_IMAGE_LAYOUT_GENERAL,
fromCompute ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT : VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
fromCompute ? VK_ACCESS_SHADER_WRITE_BIT : VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_TRANSFER_READ_BIT);
// Do the same with the uninitialized levels, transition from UNDEFINED.
TransitionImageLayout2(cmd, image_, firstMipToGenerate, numMips_ - firstMipToGenerate, 1,
VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_GENERAL,
VkImageMemoryBarrier *barrier = batch.Add(image_,
fromCompute ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT : VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0);
barrier->oldLayout = fromCompute ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier->newLayout = VK_IMAGE_LAYOUT_GENERAL;
barrier->srcAccessMask = fromCompute ? VK_ACCESS_SHADER_WRITE_BIT : VK_ACCESS_TRANSFER_WRITE_BIT;
barrier->dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier->subresourceRange.levelCount = firstMipToGenerate;
barrier = batch.Add(image_,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
VK_ACCESS_TRANSFER_WRITE_BIT);
VK_PIPELINE_STAGE_TRANSFER_BIT, 0);
barrier->subresourceRange.baseMipLevel = firstMipToGenerate;
barrier->subresourceRange.levelCount = numMips_ - firstMipToGenerate;
barrier->oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
barrier->newLayout = VK_IMAGE_LAYOUT_GENERAL;
barrier->srcAccessMask = 0;
barrier->dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
batch.Flush(cmd);
// Now we can blit and barrier the whole pipeline.
for (int mip = firstMipToGenerate; mip < numMips_; mip++) {
@@ -248,35 +259,45 @@ void VulkanTexture::GenerateMips(VkCommandBuffer cmd, int firstMipToGenerate, bo
vkCmdBlitImage(cmd, image_, VK_IMAGE_LAYOUT_GENERAL, image_, VK_IMAGE_LAYOUT_GENERAL, 1, &blit, VK_FILTER_LINEAR);
TransitionImageLayout2(cmd, image_, mip, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
barrier = batch.Add(image_, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0);
barrier->subresourceRange.baseMipLevel = mip;
barrier->oldLayout = VK_IMAGE_LAYOUT_GENERAL;
barrier->newLayout = VK_IMAGE_LAYOUT_GENERAL;
barrier->srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier->dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
batch.Flush(cmd);
}
}
void VulkanTexture::EndCreate(VkCommandBuffer cmd, bool vertexTexture, VkPipelineStageFlags prevStage, VkImageLayout layout) {
TransitionImageLayout2(cmd, image_, 0, numMips_, 1,
VK_IMAGE_ASPECT_COLOR_BIT,
layout, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
prevStage, vertexTexture ? VK_PIPELINE_STAGE_VERTEX_SHADER_BIT : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
prevStage == VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT ? VK_ACCESS_SHADER_WRITE_BIT : VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
VulkanBarrierBatch batch;
VkImageMemoryBarrier *barrier = batch.Add(image_, prevStage, vertexTexture ? VK_PIPELINE_STAGE_VERTEX_SHADER_BIT : VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0);
barrier->subresourceRange.levelCount = numMips_;
barrier->oldLayout = layout;
barrier->newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
barrier->srcAccessMask = prevStage == VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT ? VK_ACCESS_SHADER_WRITE_BIT : VK_ACCESS_TRANSFER_WRITE_BIT;
barrier->dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
batch.Flush(cmd);
}
void VulkanTexture::PrepareForTransferDst(VkCommandBuffer cmd, int levels) {
TransitionImageLayout2(cmd, image_, 0, levels, 1,
VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
VulkanBarrierBatch batch;
VkImageMemoryBarrier *barrier = batch.Add(image_, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0);
barrier->subresourceRange.levelCount = levels;
barrier->srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier->dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier->oldLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
barrier->newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
batch.Flush(cmd);
}
void VulkanTexture::RestoreAfterTransferDst(VkCommandBuffer cmd, int levels) {
TransitionImageLayout2(cmd, image_, 0, levels, 1,
VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
void VulkanTexture::RestoreAfterTransferDst(int levels, VulkanBarrierBatch *barriers) {
VkImageMemoryBarrier *barrier = barriers->Add(image_, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0);
barrier->subresourceRange.levelCount = levels;
barrier->srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier->dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier->oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier->newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
VkImageView VulkanTexture::CreateViewForMip(int mip) {

View File

@@ -7,6 +7,8 @@ class VulkanDeviceAllocator;
VK_DEFINE_HANDLE(VmaAllocation);
class VulkanBarrierBatch;
struct TextureCopyBatch {
std::vector<VkBufferImageCopy> copies;
VkBuffer buffer = VK_NULL_HANDLE;
@@ -39,7 +41,7 @@ public:
// For updating levels after creation. Careful with the timelines!
void PrepareForTransferDst(VkCommandBuffer cmd, int levels);
void RestoreAfterTransferDst(VkCommandBuffer cmd, int levels);
void RestoreAfterTransferDst(int levels, VulkanBarrierBatch *barriers);
// When loading mips from compute shaders, you need to pass VK_IMAGE_LAYOUT_GENERAL to the above function.
// In addition, ignore UploadMip and GenerateMip, and instead use GetViewForMip. Make sure to delete the returned views when used.

View File

@@ -80,7 +80,7 @@ void VulkanQueueRunner::DestroyDeviceObjects() {
renderPasses_.Clear();
}
bool VulkanQueueRunner::CreateSwapchain(VkCommandBuffer cmdInit) {
bool VulkanQueueRunner::CreateSwapchain(VkCommandBuffer cmdInit, VulkanBarrierBatch *barriers) {
VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, nullptr);
_dbg_assert_(res == VK_SUCCESS);
@@ -123,7 +123,7 @@ bool VulkanQueueRunner::CreateSwapchain(VkCommandBuffer cmdInit) {
delete[] swapchainImages;
// Must be before InitBackbufferRenderPass.
if (InitDepthStencilBuffer(cmdInit)) {
if (InitDepthStencilBuffer(cmdInit, barriers)) {
InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
}
return true;
@@ -157,7 +157,7 @@ bool VulkanQueueRunner::InitBackbufferFramebuffers(int width, int height) {
return true;
}
bool VulkanQueueRunner::InitDepthStencilBuffer(VkCommandBuffer cmd) {
bool VulkanQueueRunner::InitDepthStencilBuffer(VkCommandBuffer cmd, VulkanBarrierBatch *barriers) {
const VkFormat depth_format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat;
int aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
@@ -189,12 +189,14 @@ bool VulkanQueueRunner::InitDepthStencilBuffer(VkCommandBuffer cmd) {
vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth");
TransitionImageLayout2(cmd, depth_.image, 0, 1, 1,
aspectMask,
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
VkImageMemoryBarrier *barrier = barriers->Add(depth_.image,
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
0, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, 0);
barrier->subresourceRange.aspectMask = aspectMask;
barrier->oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
barrier->newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
barrier->srcAccessMask = 0;
barrier->dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
VkImageViewCreateInfo depth_view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
depth_view_info.image = depth_.image;

View File

@@ -230,7 +230,7 @@ public:
// Swapchain
void DestroyBackBuffers();
bool CreateSwapchain(VkCommandBuffer cmdInit);
bool CreateSwapchain(VkCommandBuffer cmdInit, VulkanBarrierBatch *barriers);
bool HasBackbuffers() const {
return !framebuffers_.empty();
@@ -277,7 +277,7 @@ public:
private:
bool InitBackbufferFramebuffers(int width, int height);
bool InitDepthStencilBuffer(VkCommandBuffer cmd); // Used for non-buffered rendering.
bool InitDepthStencilBuffer(VkCommandBuffer cmd, VulkanBarrierBatch *barriers); // Used for non-buffered rendering.
VKRRenderPass *PerformBindFramebufferAsRenderTarget(const VKRStep &pass, VkCommandBuffer cmd);
void PerformRenderPass(const VKRStep &pass, VkCommandBuffer cmd, int curFrame, QueueProfileContext &profile);

View File

@@ -291,7 +291,7 @@ bool VulkanRenderManager::CreateBackbuffers() {
VkCommandBuffer cmdInit = GetInitCmd();
if (!queueRunner_.CreateSwapchain(cmdInit)) {
if (!queueRunner_.CreateSwapchain(cmdInit, &postInitBarrier_)) {
return false;
}
@@ -1411,6 +1411,11 @@ void VulkanRenderManager::Finish() {
int curFrame = vulkan_->GetCurFrame();
FrameData &frameData = frameData_[curFrame];
if (!postInitBarrier_.empty()) {
VkCommandBuffer buffer = frameData.GetInitCmd(vulkan_);
postInitBarrier_.Flush(buffer);
}
VLOG("PUSH: Frame[%d]", curFrame);
VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SUBMIT);
task->frame = curFrame;
@@ -1561,7 +1566,12 @@ void VulkanRenderManager::FlushSync() {
int curFrame = vulkan_->GetCurFrame();
FrameData &frameData = frameData_[curFrame];
if (!postInitBarrier_.empty()) {
VkCommandBuffer buffer = frameData.GetInitCmd(vulkan_);
postInitBarrier_.Flush(buffer);
}
if (useRenderThread_) {
{
VLOG("PUSH: Frame[%d]", curFrame);
@@ -1584,11 +1594,10 @@ void VulkanRenderManager::FlushSync() {
frameData.syncDone = false;
}
} else {
VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SYNC);
task->frame = curFrame;
task->steps = std::move(steps_);
Run(*task);
delete task;
VKRRenderThreadTask task(VKRRunType::SYNC);
task.frame = curFrame;
task.steps = std::move(steps_);
Run(task);
steps_.clear();
}
}

View File

@@ -16,6 +16,7 @@
#include "Common/Thread/Promise.h"
#include "Common/System/Display.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/Vulkan/VulkanBarrier.h"
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/Data/Collections/FastVec.h"
#include "Common/Math/math_util.h"
@@ -535,6 +536,12 @@ public:
return outOfDateFrames_ > VulkanContext::MAX_INFLIGHT_FRAMES;
}
void Invalidate(InvalidationFlags flags);
VulkanBarrierBatch &PostInitBarrier() {
return postInitBarrier_;
}
void ResetStats();
void DrainAndBlockCompileQueue();
void ReleaseCompileQueue();
@@ -626,6 +633,8 @@ private:
SimpleStat renderCPUTimeMs_;
SimpleStat descUpdateTimeMs_;
VulkanBarrierBatch postInitBarrier_;
std::function<void(InvalidationCallbackFlags)> invalidationCallback_;
uint64_t frameIdGen_ = FRAME_TIME_HISTORY_LENGTH;

View File

@@ -331,8 +331,8 @@ public:
: vulkan_(vulkan), mipLevels_(desc.mipLevels) {
format_ = desc.format;
}
bool Create(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const TextureDesc &desc);
void Update(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const uint8_t *const *data, TextureCallback callback, int numLevels);
bool Create(VkCommandBuffer cmd, VulkanBarrierBatch *postBarriers, VulkanPushPool *pushBuffer, const TextureDesc &desc);
void Update(VkCommandBuffer cmd, VulkanBarrierBatch *postBarriers, VulkanPushPool *pushBuffer, const uint8_t *const *data, TextureCallback callback, int numLevels);
~VKTexture() {
Destroy();
@@ -759,7 +759,7 @@ enum class TextureState {
PENDING_DESTRUCTION,
};
bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const TextureDesc &desc) {
bool VKTexture::Create(VkCommandBuffer cmd, VulkanBarrierBatch *postBarriers, VulkanPushPool *pushBuffer, const TextureDesc &desc) {
// Zero-sized textures not allowed.
_assert_(desc.width * desc.height * desc.depth > 0); // remember to set depth to 1!
if (desc.width * desc.height * desc.depth <= 0) {
@@ -799,25 +799,24 @@ bool VKTexture::Create(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const Te
return true;
}
void VKTexture::Update(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const uint8_t * const *data, TextureCallback initDataCallback, int numLevels) {
void VKTexture::Update(VkCommandBuffer cmd, VulkanBarrierBatch *postBarriers, VulkanPushPool *pushBuffer, const uint8_t * const *data, TextureCallback initDataCallback, int numLevels) {
// Before we can use UpdateInternal, we need to transition the image to the same state as after CreateDirect,
// making it ready for writing.
vkTex_->PrepareForTransferDst(cmd, numLevels);
UpdateInternal(cmd, pushBuffer, data, initDataCallback, numLevels);
vkTex_->RestoreAfterTransferDst(cmd, numLevels);
vkTex_->RestoreAfterTransferDst(numLevels, postBarriers);
}
void VKTexture::UpdateInternal(VkCommandBuffer cmd, VulkanPushPool *pushBuffer, const uint8_t * const *data, TextureCallback initDataCallback, int numLevels) {
int w = width_;
int h = height_;
int d = depth_;
int i;
VkFormat vulkanFormat = DataFormatToVulkan(format_);
int bpp = GetBpp(vulkanFormat);
int bytesPerPixel = bpp / 8;
TextureCopyBatch batch;
batch.reserve(numLevels);
for (i = 0; i < numLevels; i++) {
for (int i = 0; i < numLevels; i++) {
uint32_t offset;
VkBuffer buf;
size_t size = w * h * d * bytesPerPixel;
@@ -1294,7 +1293,7 @@ Texture *VKContext::CreateTexture(const TextureDesc &desc) {
return nullptr;
}
VKTexture *tex = new VKTexture(vulkan_, initCmd, push_, desc);
if (tex->Create(initCmd, push_, desc)) {
if (tex->Create(initCmd, &renderManager_.PostInitBarrier(), push_, desc)) {
return tex;
} else {
ERROR_LOG(G3D, "Failed to create texture");
@@ -1314,7 +1313,7 @@ void VKContext::UpdateTextureLevels(Texture *texture, const uint8_t **data, Text
VKTexture *tex = (VKTexture *)texture;
_dbg_assert_(numLevels <= tex->NumLevels());
tex->Update(initCmd, push_, data, initDataCallback, numLevels);
tex->Update(initCmd, &renderManager_.PostInitBarrier(), push_, data, initDataCallback, numLevels);
}
static inline void CopySide(VkStencilOpState &dest, const StencilSetup &src) {
@@ -1627,7 +1626,7 @@ Framebuffer *VKContext::CreateFramebuffer(const FramebufferDesc &desc) {
_assert_(desc.height > 0);
VkCommandBuffer cmd = renderManager_.GetInitCmd();
VKRFramebuffer *vkrfb = new VKRFramebuffer(vulkan_, cmd, renderManager_.GetQueueRunner()->GetCompatibleRenderPass(), desc.width, desc.height, desc.numLayers, desc.multiSampleLevel, desc.z_stencil, desc.tag);
VKRFramebuffer *vkrfb = new VKRFramebuffer(vulkan_, &renderManager_.PostInitBarrier(), cmd, renderManager_.GetQueueRunner()->GetCompatibleRenderPass(), desc.width, desc.height, desc.numLayers, desc.multiSampleLevel, desc.z_stencil, desc.tag);
return new VKFramebuffer(vkrfb, desc.multiSampleLevel);
}