#include "Common/MemoryUtil.h"
#include "Common/GPU/OpenGL/GLMemory.h"
#include "Common/GPU/OpenGL/GLRenderManager.h"
#include "Common/GPU/OpenGL/GLFeatures.h"
#include "Common/Data/Text/Parsers.h"

extern std::thread::id renderThreadId;
#if MAX_LOGLEVEL >= DEBUG_LEVEL
static bool OnRenderThread() {
	return std::this_thread::get_id() == renderThreadId;
}
#endif

void *GLRBuffer::Map(GLBufferStrategy strategy) {
	_assert_(buffer_ != 0);

	GLbitfield access = GL_MAP_WRITE_BIT;
	if ((strategy & GLBufferStrategy::MASK_FLUSH) != 0) {
		access |= GL_MAP_FLUSH_EXPLICIT_BIT;
	}
	if ((strategy & GLBufferStrategy::MASK_INVALIDATE) != 0) {
		access |= GL_MAP_INVALIDATE_BUFFER_BIT;
	}

	void *p = nullptr;
	bool allowNativeBuffer = strategy != GLBufferStrategy::SUBDATA;
	if (allowNativeBuffer) {
		glBindBuffer(target_, buffer_);

		if (gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage) {
#if !PPSSPP_PLATFORM(IOS)
			if (!hasStorage_) {
				GLbitfield storageFlags = access & ~(GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_FLUSH_EXPLICIT_BIT);
#ifdef USING_GLES2
#ifdef GL_EXT_buffer_storage
				glBufferStorageEXT(target_, size_, nullptr, storageFlags);
#endif
#else
				glBufferStorage(target_, size_, nullptr, storageFlags);
#endif
				hasStorage_ = true;
			}
#endif
			p = glMapBufferRange(target_, 0, size_, access);
		} else if (gl_extensions.VersionGEThan(3, 0, 0)) {
			// GLES3 or desktop 3.
			p = glMapBufferRange(target_, 0, size_, access);
		} else if (!gl_extensions.IsGLES) {
#ifndef USING_GLES2
			p = glMapBuffer(target_, GL_READ_WRITE);
#endif
		}
	}

	mapped_ = p != nullptr;
	return p;
}

bool GLRBuffer::Unmap() {
	glBindBuffer(target_, buffer_);
	mapped_ = false;
	return glUnmapBuffer(target_) == GL_TRUE;
}

GLPushBuffer::GLPushBuffer(GLRenderManager *render, GLuint target, size_t size, const char *tag) : render_(render), nextBufferSize_(size), target_(target), tag_(tag) {
	AddBuffer();
	RegisterGPUMemoryManager(this);
}

GLPushBuffer::~GLPushBuffer() {
	UnregisterGPUMemoryManager(this);
	Destroy(true);
}

void GLPushBuffer::Map() {
	_assert_(!writePtr_);
	auto &info = buffers_[buf_];
	writePtr_ = info.deviceMemory ? info.deviceMemory : info.localMemory;
	info.flushOffset = 0;
	// Force alignment.  This is needed for PushAligned() to work as expected.
	while ((intptr_t)writePtr_ & 15) {
		writePtr_++;
		offset_++;
		info.flushOffset++;
	}
	_assert_(writePtr_);
}

void GLPushBuffer::Unmap() {
	_assert_(writePtr_);
	if (!buffers_[buf_].deviceMemory) {
		// Here we simply upload the data to the last buffer.
		// Might be worth trying with size_ instead of offset_, so the driver can replace
		// the whole buffer. At least if it's close.
		render_->BufferSubdata(buffers_[buf_].buffer, 0, offset_, buffers_[buf_].localMemory, false);
	} else {
		buffers_[buf_].flushOffset = offset_;
	}
	writePtr_ = nullptr;
}

void GLPushBuffer::Flush() {
	// Must be called from the render thread.
	_dbg_assert_(OnRenderThread());
	if (buf_ >= buffers_.size()) {
		_dbg_assert_msg_(false, "buf_ somehow got out of sync: %d vs %d", (int)buf_, (int)buffers_.size());
		return;
	}

	buffers_[buf_].flushOffset = offset_;
	if (!buffers_[buf_].deviceMemory && writePtr_) {
		auto &info = buffers_[buf_];
		if (info.flushOffset != 0) {
			_assert_(info.buffer->buffer_);
			glBindBuffer(target_, info.buffer->buffer_);
			glBufferSubData(target_, 0, info.flushOffset, info.localMemory);
		}

		// Here we will submit all the draw calls, with the already known buffer and offsets.
		// Might as well reset the write pointer here and start over the current buffer.
		writePtr_ = info.localMemory;
		offset_ = 0;
		info.flushOffset = 0;
	}

	// For device memory, we flush all buffers here.
	if ((strategy_ & GLBufferStrategy::MASK_FLUSH) != 0) {
		for (auto &info : buffers_) {
			if (info.flushOffset == 0 || !info.deviceMemory)
				continue;

			glBindBuffer(target_, info.buffer->buffer_);
			glFlushMappedBufferRange(target_, 0, info.flushOffset);
			info.flushOffset = 0;
		}
	}
}

void GLPushBuffer::AddBuffer() {
	// INFO_LOG(Log::G3D, "GLPushBuffer(%s): Allocating %d bytes", tag_, size_);
	BufInfo info;
	info.localMemory = (uint8_t *)AllocateAlignedMemory(nextBufferSize_, 16);
	_assert_msg_(info.localMemory != 0, "GLPushBuffer alloc fail: %d (%s)", (int)nextBufferSize_, tag_);
	info.buffer = render_->CreateBuffer(target_, nextBufferSize_, GL_DYNAMIC_DRAW);
	info.size = nextBufferSize_;
	buf_ = buffers_.size();
	buffers_.push_back(info);
}

void GLPushBuffer::Destroy(bool onRenderThread) {
	if (buf_ == -1)
		return;  // Already destroyed
	for (BufInfo &info : buffers_) {
		// This will automatically unmap device memory, if needed.
		// NOTE: We immediately delete the buffer, don't go through the deleter, if we're on the render thread.
		if (onRenderThread) {
			delete info.buffer;
		} else {
			render_->DeleteBuffer(info.buffer);
		}
		FreeAlignedMemory(info.localMemory);
	}
	buffers_.clear();
	buf_ = -1;
}

void GLPushBuffer::NextBuffer(size_t minSize) {
	// First, unmap the current memory.
	Unmap();

	buf_++;
	if (buf_ >= buffers_.size() || minSize > nextBufferSize_) {
		// Before creating the buffer, adjust to the new size_ if necessary.
		while (nextBufferSize_ < minSize) {
			nextBufferSize_ <<= 1;
		}
		AddBuffer();
	}

	// Now, move to the next buffer and map it.
	offset_ = 0;
	Map();
}

void GLPushBuffer::Defragment() {
	_dbg_assert_msg_(!OnRenderThread(), "Defragment must not run on the render thread");

	if (buffers_.size() <= 1) {
		// Let's take this opportunity to jettison any localMemory we don't need.
		for (auto &info : buffers_) {
			if (info.deviceMemory) {
				FreeAlignedMemory(info.localMemory);
				info.localMemory = nullptr;
			}
		}
		return;
	}

	// Okay, we have more than one.  Destroy them all and start over with a larger one.

	// When calling AddBuffer, we sometimes increase size_. So if we allocated multiple buffers in a frame,
	// they won't all have the same size. Sum things up properly.
	size_t newSize = 0;
	for (int i = 0; i < (int)buffers_.size(); i++) {
		newSize += buffers_[i].size;
	}

	Destroy(false);

	// Set some sane but very free limits. If there's another spike, we'll just allocate more anyway.
	nextBufferSize_ = std::min(std::max(newSize, (size_t)65536), (size_t)(512 * 1024 * 1024));
	AddBuffer();
}

size_t GLPushBuffer::GetTotalSize() const {
	size_t sum = 0;
	// When calling AddBuffer, we sometimes increase size_. So if we allocated multiple buffers in a frame,
	// they won't all have the same size. Sum things up properly.
	if (buffers_.size() > 1) {
		for (int i = 0; i < (int)buffers_.size() - 1; i++) {
			sum += buffers_[i].size;
		}
	}
	sum += offset_;
	return sum;
}

void GLPushBuffer::MapDevice(GLBufferStrategy strategy) {
	_dbg_assert_msg_(OnRenderThread(), "MapDevice must run on render thread");

	strategy_ = strategy;
	if (strategy_ == GLBufferStrategy::SUBDATA) {
		return;
	}

	bool mapChanged = false;
	for (auto &info : buffers_) {
		if (!info.buffer->buffer_ || info.deviceMemory) {
			// Can't map - no device buffer associated yet or already mapped.
			continue;
		}

		info.deviceMemory = (uint8_t *)info.buffer->Map(strategy_);
		mapChanged = mapChanged || info.deviceMemory != nullptr;

		if (!info.deviceMemory && !info.localMemory) {
			// Somehow it failed, let's dodge crashing.
			info.localMemory = (uint8_t *)AllocateAlignedMemory(info.buffer->size_, 16);
			mapChanged = true;
		}

		_dbg_assert_msg_(info.localMemory || info.deviceMemory, "Local or device memory must succeed");
	}

	if (writePtr_ && mapChanged) {
		// This can happen during a sync.  Remap.
		writePtr_ = nullptr;
		Map();
	}
}

void GLPushBuffer::UnmapDevice() {
	_dbg_assert_msg_(OnRenderThread(), "UnmapDevice must run on render thread");

	for (auto &info : buffers_) {
		if (info.deviceMemory) {
			// TODO: Technically this can return false?
			info.buffer->Unmap();
			info.deviceMemory = nullptr;
		}
	}
}

void GLPushBuffer::GetDebugString(char *buffer, size_t bufSize) const {
	snprintf(buffer, bufSize, "%s: %s/%s (%d)", tag_, NiceSizeFormat(this->offset_).c_str(), NiceSizeFormat(this->nextBufferSize_).c_str(), (int)buffers_.size());
}