It's kinda almost working!

This commit is contained in:
Henrik Rydgård
2023-05-24 10:24:54 +02:00
parent 0472cc2b79
commit d135ce2d62
5 changed files with 86 additions and 11 deletions
+4
View File
@@ -142,6 +142,10 @@ public:
return bindOffset; return bindOffset;
} }
uint8_t *GetPtr(uint32_t offset) {
return writePtr_ + offset;
}
// If you didn't use all of the previous allocation you just made (obviously can't be another one), // If you didn't use all of the previous allocation you just made (obviously can't be another one),
// you can return memory to the buffer by specifying the offset up until which you wrote data. // you can return memory to the buffer by specifying the offset up until which you wrote data.
void Rewind(uint32_t offset) { void Rewind(uint32_t offset) {
+2 -2
View File
@@ -1236,8 +1236,8 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
// TODO: Add fast path for glBindVertexBuffer // TODO: Add fast path for glBindVertexBuffer
GLRInputLayout *layout = c.draw.inputLayout; GLRInputLayout *layout = c.draw.inputLayout;
// TODO: We really shouldn't need null checks here, right? // TODO: We really shouldn't need null checks here, right?
GLuint buf = c.draw.vertexBuffer ? c.draw.vertexBuffer->buffer_ : 0; GLuint buf = c.draw.vertexBuffer->buffer_;
_dbg_assert_(!c.draw.vertexBuffer || !c.draw.vertexBuffer->Mapped()); _dbg_assert_(!c.draw.vertexBuffer->Mapped());
if (buf != curArrayBuffer) { if (buf != curArrayBuffer) {
glBindBuffer(GL_ARRAY_BUFFER, buf); glBindBuffer(GL_ARRAY_BUFFER, buf);
curArrayBuffer = buf; curArrayBuffer = buf;
+4 -2
View File
@@ -750,22 +750,24 @@ public:
} }
void Draw(GLRInputLayout *inputLayout, GLRBuffer *vertexBuffer, uint32_t vertexOffset, GLenum mode, int first, int count) { void Draw(GLRInputLayout *inputLayout, GLRBuffer *vertexBuffer, uint32_t vertexOffset, GLenum mode, int first, int count) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); _dbg_assert_(vertexBuffer && curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::DRAW; data.cmd = GLRRenderCommand::DRAW;
data.draw.inputLayout = inputLayout; data.draw.inputLayout = inputLayout;
data.draw.vertexOffset = vertexOffset; data.draw.vertexOffset = vertexOffset;
data.draw.vertexBuffer = vertexBuffer; data.draw.vertexBuffer = vertexBuffer;
data.draw.indexBuffer = nullptr; data.draw.indexBuffer = nullptr;
data.draw.indexOffset = 0;
data.draw.mode = mode; data.draw.mode = mode;
data.draw.first = first; data.draw.first = first;
data.draw.count = count; data.draw.count = count;
data.draw.indexType = 0; data.draw.indexType = 0;
data.draw.instances = 1;
} }
// Would really love to have a basevertex parameter, but impossible in unextended GLES, without glDrawElementsBaseVertex, unfortunately. // Would really love to have a basevertex parameter, but impossible in unextended GLES, without glDrawElementsBaseVertex, unfortunately.
void DrawIndexed(GLRInputLayout *inputLayout, GLRBuffer *vertexBuffer, uint32_t vertexOffset, GLRBuffer *indexBuffer, uint32_t indexOffset, GLenum mode, int count, GLenum indexType, int instances = 1) { void DrawIndexed(GLRInputLayout *inputLayout, GLRBuffer *vertexBuffer, uint32_t vertexOffset, GLRBuffer *indexBuffer, uint32_t indexOffset, GLenum mode, int count, GLenum indexType, int instances = 1) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); _dbg_assert_(vertexBuffer && curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData &data = curRenderStep_->commands.push_uninitialized(); GLRRenderData &data = curRenderStep_->commands.push_uninitialized();
data.cmd = GLRRenderCommand::DRAW; data.cmd = GLRRenderCommand::DRAW;
data.draw.inputLayout = inputLayout; data.draw.inputLayout = inputLayout;
+63 -6
View File
@@ -234,6 +234,55 @@ void DrawEngineGLES::Invalidate(InvalidationCallbackFlags flags) {
} }
} }
static void CopyIndicesWithOffset(uint16_t *dst, const uint16_t *src, uint32_t count, uint16_t offset) {
if (offset == 0) {
memcpy(dst, src, count * sizeof(uint16_t));
return;
}
// TODO: SIMD-ify.
for (uint32_t i = 0; i < count; i++) {
// If we wrap here, we did something wrong in the calculations before calling this.
dst[i] = src[i] + offset;
}
}
void DrawEngineGLES::ReleaseReservedPushMemory(FrameData &frameData) {
if (curVBuffer_) {
frameData.pushVertex->Rewind(curVBufferOffset_);
// A bit excessive zeroing maybe, but nice for debugging.
curVBuffer_ = nullptr;
curVBufferOffset_ = GLPushBuffer::INVALID_OFFSET;
curVBufferBindOffset_ = GLPushBuffer::INVALID_OFFSET;
curVBufferEnd_ = 0;
}
}
const int RESERVATION_SIZE = 256 * 1024;
u8 *DrawEngineGLES::AllocateVertices(FrameData &frameData, int stride, int count, GLRBuffer **vertexBuffer, uint32_t *bindOffset, uint32_t *vertexOffset) {
int size = stride * count;
if (curVBuffer_ && (curVBufferOffset_ + size <= curVBufferEnd_)) {
_dbg_assert_(curVBufferOffset_ != GLPushBuffer::INVALID_OFFSET && curVBufferBindOffset_ != GLPushBuffer::INVALID_OFFSET);
*bindOffset = curVBufferBindOffset_;
uint8_t *retval = frameData.pushVertex->GetPtr(curVBufferOffset_);
*vertexOffset = (curVBufferOffset_ - curVBufferBindOffset_) / stride;
*vertexBuffer = curVBuffer_;
curVBufferOffset_ += size;
return retval;
}
// OK, no available reserved space to grab. Let's allocate more and start over.
u8 *dest = (u8 *)frameData.pushVertex->Allocate(RESERVATION_SIZE, 4, &curVBuffer_, &curVBufferBindOffset_);
curVBufferEnd_ = curVBufferBindOffset_ + RESERVATION_SIZE;
*bindOffset = curVBufferBindOffset_;
*vertexOffset = 0;
*vertexBuffer = curVBuffer_;
curVBufferOffset_ = curVBufferBindOffset_ + size;
return dest;
}
void DrawEngineGLES::DoFlush() { void DrawEngineGLES::DoFlush() {
PROFILE_THIS_SCOPE("flush"); PROFILE_THIS_SCOPE("flush");
FrameData &frameData = frameData_[render_->GetCurFrame()]; FrameData &frameData = frameData_[render_->GetCurFrame()];
@@ -275,17 +324,23 @@ void DrawEngineGLES::DoFlush() {
int vertexCount = 0; int vertexCount = 0;
bool useElements = true; bool useElements = true;
GLRInputLayout *inputLayout = SetupDecFmtForDraw(dec_->GetDecVtxFmt()); GLRInputLayout *inputLayout = SetupDecFmtForDraw(dec_->GetDecVtxFmt());
int stride = inputLayout->entries[0].stride;
uint32_t vertexOffset = 0;
if (!lastInputLayout_ || stride != lastInputLayout_->entries[0].stride) {
ReleaseReservedPushMemory(frameData);
}
lastInputLayout_ = inputLayout;
if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) { if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
// If software skinning, we've already predecoded into "decoded_", and indices // If software skinning, we've already predecoded into "decoded_", and indices
// into decIndex_. So push that content. // into decIndex_. So push that content.
uint32_t size = decodedVerts_ * dec_->GetDecVtxFmt().stride; u8 *dest = AllocateVertices(frameData, stride, decodedVerts_, &vertexBuffer, &vertexBufferOffset, &vertexOffset);
u8 *dest = (u8 *)frameData.pushVertex->Allocate(size, 4, &vertexBuffer, &vertexBufferOffset); memcpy(dest, decoded_, decodedVerts_ * stride);
memcpy(dest, decoded_, size);
} else { } else {
// Figure out how much pushbuffer space we need to allocate. // Figure out how much pushbuffer space we need to allocate.
int vertsToDecode = ComputeNumVertsToDecode(); int vertsToDecode = ComputeNumVertsToDecode();
u8 *dest = (u8 *)frameData.pushVertex->Allocate(vertsToDecode * dec_->GetDecVtxFmt().stride, 4, &vertexBuffer, &vertexBufferOffset); u8 *dest = AllocateVertices(frameData, stride, vertsToDecode, &vertexBuffer, &vertexBufferOffset, &vertexOffset);
// Indices are decoded in here. // Indices are decoded in here.
DecodeVerts(dest); DecodeVerts(dest);
} }
@@ -306,7 +361,7 @@ void DrawEngineGLES::DoFlush() {
void *dest = frameData.pushIndex->Allocate(esz, 2, &indexBuffer, &indexBufferOffset); void *dest = frameData.pushIndex->Allocate(esz, 2, &indexBuffer, &indexBufferOffset);
// TODO: When we need to apply an index offset, we can apply it directly when copying the indices here. // TODO: When we need to apply an index offset, we can apply it directly when copying the indices here.
// Of course, minding the maximum value of 65535... // Of course, minding the maximum value of 65535...
memcpy(dest, decIndex_, esz); CopyIndicesWithOffset((uint16_t *)dest, decIndex_, indexGen.VertexCount(), vertexOffset);
} }
prim = indexGen.Prim(); prim = indexGen.Prim();
@@ -327,6 +382,7 @@ void DrawEngineGLES::DoFlush() {
LinkedShader *program = shaderManager_->ApplyFragmentShader(vsid, vshader, pipelineState_, framebufferManager_->UseBufferedRendering()); LinkedShader *program = shaderManager_->ApplyFragmentShader(vsid, vshader, pipelineState_, framebufferManager_->UseBufferedRendering());
if (useElements) { if (useElements) {
// The vertexOffset is applied directly to the indices above.
render_->DrawIndexed(inputLayout, render_->DrawIndexed(inputLayout,
vertexBuffer, vertexBufferOffset, vertexBuffer, vertexBufferOffset,
indexBuffer, indexBufferOffset, indexBuffer, indexBufferOffset,
@@ -334,7 +390,7 @@ void DrawEngineGLES::DoFlush() {
} else { } else {
render_->Draw( render_->Draw(
inputLayout, vertexBuffer, vertexBufferOffset, inputLayout, vertexBuffer, vertexBufferOffset,
glprim[prim], 0, vertexCount); glprim[prim], vertexOffset, vertexCount);
} }
} else { } else {
PROFILE_THIS_SCOPE("soft"); PROFILE_THIS_SCOPE("soft");
@@ -429,6 +485,7 @@ void DrawEngineGLES::DoFlush() {
} }
if (result.action == SW_DRAW_PRIMITIVES) { if (result.action == SW_DRAW_PRIMITIVES) {
ReleaseReservedPushMemory(frameData);
if (result.drawIndexed) { if (result.drawIndexed) {
vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(result.drawBuffer, maxIndex * sizeof(TransformedVertex), 4, &vertexBuffer); vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(result.drawBuffer, maxIndex * sizeof(TransformedVertex), 4, &vertexBuffer);
indexBufferOffset = (uint32_t)frameData.pushIndex->Push(inds, sizeof(uint16_t) * result.drawNumTrans, 2, &indexBuffer); indexBufferOffset = (uint32_t)frameData.pushIndex->Push(inds, sizeof(uint16_t) * result.drawNumTrans, 2, &indexBuffer);
+13 -1
View File
@@ -126,13 +126,25 @@ private:
GLPushBuffer *pushVertex; GLPushBuffer *pushVertex;
GLPushBuffer *pushIndex; GLPushBuffer *pushIndex;
}; };
FrameData frameData_[GLRenderManager::MAX_INFLIGHT_FRAMES];
// Manage suballocating pushbuffer reservations
void ReleaseReservedPushMemory(FrameData &frameData);
u8 *AllocateVertices(FrameData &frameData, int stride, int count, GLRBuffer **vertexBuffer, uint32_t *bindOffset, uint32_t *vertexOffset);
FrameData frameData_[GLRenderManager::MAX_INFLIGHT_FRAMES]{};
DenseHashMap<uint32_t, GLRInputLayout *, nullptr> inputLayoutMap_; DenseHashMap<uint32_t, GLRInputLayout *, nullptr> inputLayoutMap_;
GLRInputLayout *softwareInputLayout_ = nullptr; GLRInputLayout *softwareInputLayout_ = nullptr;
GLRInputLayout *lastInputLayout_ = nullptr;
GLRenderManager *render_; GLRenderManager *render_;
// These are ONLY touched within AllocateVertices and ReleaseReservedPushMemory, to isolate the logic properly.
GLRBuffer *curVBuffer_ = nullptr;
uint32_t curVBufferBindOffset_ = 0;
uint32_t curVBufferOffset_ = 0;
uint32_t curVBufferEnd_ = 0;
// Other // Other
ShaderManagerGLES *shaderManager_ = nullptr; ShaderManagerGLES *shaderManager_ = nullptr;
TextureCacheGLES *textureCache_ = nullptr; TextureCacheGLES *textureCache_ = nullptr;