From 4b3fabbfc93be53034b6f7d50031d7877c689c3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fredrik=20H=C3=B6glund?= Date: Wed, 3 Dec 2014 19:41:38 +0100 Subject: [PATCH] Add support for GL_ARB_buffer_storage Use a persistently mapped buffer object for the vertex streaming buffer when GL_ARB_buffer_storage is supported. --- libkwineffects/kwinglutils.cpp | 197 +++++++++++++++++++++++++++++++++ libkwineffects/kwinglutils.h | 14 +++ scene_opengl.cpp | 4 + 3 files changed, 215 insertions(+) diff --git a/libkwineffects/kwinglutils.cpp b/libkwineffects/kwinglutils.cpp index d66d1ee408..9ef69efa89 100644 --- a/libkwineffects/kwinglutils.cpp +++ b/libkwineffects/kwinglutils.cpp @@ -40,12 +40,22 @@ along with this program. If not, see . #include #include +#include + #include #define DEBUG_GLRENDERTARGET 0 #define MAKE_GL_VERSION(major, minor, release) ( ((major) << 16) | ((minor) << 8) | (release) ) +#ifdef __GNUC__ +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +#else +# define likely(x) (x) +# define unlikely(x) (x) +#endif + namespace KWin { // Variables @@ -1492,6 +1502,33 @@ struct VertexAttrib }; +// ------------------------------------------------------------------ + + + +struct BufferFence +{ + GLsync sync; + intptr_t nextEnd; + + bool signaled() const + { + GLint value; + glGetSynciv(sync, GL_SYNC_STATUS, 1, nullptr, &value); + return value == GL_SIGNALED; + } +}; + + +static void deleteAll(std::deque &fences) +{ + for (const BufferFence &fence : fences) + glDeleteSync(fence.sync); + + fences.clear(); +} + + //********************************* // GLVertexBufferPrivate //********************************* @@ -1500,12 +1537,16 @@ class GLVertexBufferPrivate public: GLVertexBufferPrivate(GLVertexBuffer::UsageHint usageHint) : vertexCount(0) + , persistent(false) , useColor(false) , color(0, 0, 0, 255) , bufferSize(0) + , bufferEnd(0) , mappedSize(0) + , frameSize(0) , nextOffset(0) , baseAddress(0) + , map(nullptr) { glGenBuffers(1, &buffer); @@ -1523,7 +1564,10 @@ public: } ~GLVertexBufferPrivate() { + deleteAll(fences); + glDeleteBuffers(1, &buffer); + map = nullptr; } void interleaveArrays(float *array, int dim, const float *vertices, const float *texcoords, int count); @@ -1531,21 +1575,31 @@ public: void unbindArrays(); void reallocateBuffer(size_t size); GLvoid *mapNextFreeRange(size_t size); + void reallocatePersistentBuffer(size_t size); + bool awaitFence(intptr_t offset); + GLvoid *getIdleRange(size_t size); GLuint buffer; GLenum usage; int stride; int vertexCount; static GLVertexBuffer *streamingBuffer; + static bool haveBufferStorage; + static bool haveSyncFences; static bool hasMapBufferRange; static bool supportsIndexedQuads; QByteArray dataStore; + bool persistent; bool useColor; QVector4D color; size_t bufferSize; + intptr_t bufferEnd; size_t mappedSize; + size_t frameSize; intptr_t nextOffset; intptr_t baseAddress; + uint8_t *map; + std::deque fences; VertexAttrib attrib[VertexAttributeCount]; Bitfield enabledArrays; #ifndef KWIN_HAVE_OPENGLES @@ -1556,6 +1610,8 @@ public: bool GLVertexBufferPrivate::hasMapBufferRange = false; bool GLVertexBufferPrivate::supportsIndexedQuads = false; GLVertexBuffer *GLVertexBufferPrivate::streamingBuffer = nullptr; +bool GLVertexBufferPrivate::haveBufferStorage = false; +bool GLVertexBufferPrivate::haveSyncFences = false; #ifndef KWIN_HAVE_OPENGLES IndexBuffer *GLVertexBufferPrivate::s_indexBuffer = nullptr; #endif @@ -1626,6 +1682,95 @@ void GLVertexBufferPrivate::unbindArrays() glDisableVertexAttribArray(it.next()); } +void GLVertexBufferPrivate::reallocatePersistentBuffer(size_t size) +{ + if (buffer != 0) { + // This also unmaps and unbinds the buffer + glDeleteBuffers(1, &buffer); + buffer = 0; + + deleteAll(fences); + } + + if (buffer == 0) + glGenBuffers(1, &buffer); + + // Round the size up to 64 kb + size_t minSize = 128 * 1024; + bufferSize = align(qMax(size, minSize), 64 * 1024); + + const GLbitfield storage = GL_DYNAMIC_STORAGE_BIT; + const GLbitfield access = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; + + glBindBuffer(GL_ARRAY_BUFFER, buffer); + glBufferStorage(GL_ARRAY_BUFFER, bufferSize, nullptr, storage | access); + + map = (uint8_t *) glMapBufferRange(GL_ARRAY_BUFFER, 0, bufferSize, access); + + nextOffset = 0; + bufferEnd = bufferSize; +} + +bool GLVertexBufferPrivate::awaitFence(intptr_t end) +{ + // Skip fences until we reach the end offset + while (!fences.empty() && fences.front().nextEnd < end) { + glDeleteSync(fences.front().sync); + fences.pop_front(); + } + + assert(!fences.empty()); + + // Wait on the next fence + const BufferFence &fence = fences.front(); + + if (!fence.signaled()) { + qDebug() << "Stalling on VBO fence"; + const GLenum ret = glClientWaitSync(fence.sync, GL_SYNC_FLUSH_COMMANDS_BIT, 1000000000); + + if (ret == GL_TIMEOUT_EXPIRED || ret == GL_WAIT_FAILED) { + qCritical() << "Wait failed"; + return false; + } + } + + glDeleteSync(fence.sync); + + // Update the end pointer + bufferEnd = fence.nextEnd; + fences.pop_front(); + + return true; +} + +GLvoid *GLVertexBufferPrivate::getIdleRange(size_t size) +{ + if (unlikely(size > bufferSize)) + reallocatePersistentBuffer(size * 2); + + // Handle wrap-around + if (unlikely(nextOffset + size > bufferSize)) { + nextOffset = 0; + bufferEnd -= bufferSize; + + for (BufferFence &fence : fences) + fence.nextEnd -= bufferSize; + + // Emit a fence now + BufferFence fence; + fence.sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + fence.nextEnd = bufferSize; + fences.emplace_back(fence); + } + + if (unlikely(nextOffset + intptr_t(size) > bufferEnd)) { + if (!awaitFence(nextOffset + size)) + return nullptr; + } + + return map + nextOffset; +} + void GLVertexBufferPrivate::reallocateBuffer(size_t size) { // Round the size up to 4 Kb for streaming/dynamic buffers. @@ -1700,6 +1845,10 @@ void GLVertexBuffer::setData(int vertexCount, int dim, const float* vertices, co GLvoid *GLVertexBuffer::map(size_t size) { d->mappedSize = size; + d->frameSize += size; + + if (d->persistent) + return d->getIdleRange(size); glBindBuffer(GL_ARRAY_BUFFER, d->buffer); @@ -1719,6 +1868,13 @@ GLvoid *GLVertexBuffer::map(size_t size) void GLVertexBuffer::unmap() { + if (d->persistent) { + d->baseAddress = d->nextOffset; + d->nextOffset += align(d->mappedSize, 16); // Align to 16 bytes for SSE + d->mappedSize = 0; + return; + } + bool preferBufferSubData = GLPlatform::instance()->preferBufferSubData(); if (GLVertexBufferPrivate::hasMapBufferRange && !preferBufferSubData) { @@ -1871,11 +2027,44 @@ void GLVertexBuffer::reset() d->vertexCount = 0; } +void GLVertexBuffer::endOfFrame() +{ + if (!d->persistent) + return; + + // Emit a fence if we have uploaded data + if (d->frameSize > 0) { + BufferFence fence; + fence.sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + fence.nextEnd = d->nextOffset + d->bufferSize; + + d->fences.emplace_back(fence); + d->frameSize = 0; + } +} + +void GLVertexBuffer::framePosted() +{ + if (!d->persistent) + return; + + // Remove finished fences from the list and update the bufferEnd offset + while (d->fences.size() > 1 && d->fences.front().signaled()) { + const BufferFence &fence = d->fences.front(); + glDeleteSync(fence.sync); + + d->bufferEnd = fence.nextEnd; + d->fences.pop_front(); + } +} + void GLVertexBuffer::initStatic() { #ifdef KWIN_HAVE_OPENGLES GLVertexBufferPrivate::hasMapBufferRange = hasGLExtension(QByteArrayLiteral("GL_EXT_map_buffer_range")); GLVertexBufferPrivate::supportsIndexedQuads = false; + GLVertexBufferPrivate::haveBufferStorage = false; + GLVertexBufferPrivate::haveSyncFences = false; #else bool haveBaseVertex = hasGLVersion(3, 2) || hasGLExtension(QByteArrayLiteral("GL_ARB_draw_elements_base_vertex")); bool haveCopyBuffer = hasGLVersion(3, 1) || hasGLExtension(QByteArrayLiteral("GL_ARB_copy_buffer")); @@ -1884,8 +2073,16 @@ void GLVertexBuffer::initStatic() GLVertexBufferPrivate::hasMapBufferRange = haveMapBufferRange; GLVertexBufferPrivate::supportsIndexedQuads = haveBaseVertex && haveCopyBuffer && haveMapBufferRange; GLVertexBufferPrivate::s_indexBuffer = nullptr; + GLVertexBufferPrivate::haveBufferStorage = hasGLVersion(4, 4) || hasGLExtension("GL_ARB_buffer_storage"); + GLVertexBufferPrivate::haveSyncFences = hasGLVersion(3, 2) || hasGLExtension("GL_ARB_sync"); #endif GLVertexBufferPrivate::streamingBuffer = new GLVertexBuffer(GLVertexBuffer::Stream); + + if (GLVertexBufferPrivate::haveBufferStorage && GLVertexBufferPrivate::haveSyncFences) { + if (qgetenv("KWIN_PERSISTENT_VBO") != QByteArrayLiteral("0")) { + GLVertexBufferPrivate::streamingBuffer->d->persistent = true; + } + } } void GLVertexBuffer::cleanup() diff --git a/libkwineffects/kwinglutils.h b/libkwineffects/kwinglutils.h index eb997c7f8a..3667f4d717 100644 --- a/libkwineffects/kwinglutils.h +++ b/libkwineffects/kwinglutils.h @@ -729,6 +729,20 @@ public: **/ void reset(); + /** + * Notifies the vertex buffer that we are done painting the frame. + * + * @internal + */ + void endOfFrame(); + + /** + * Notifies the vertex buffer that we have posted the frame. + * + * @internal + */ + void framePosted(); + /** * @internal */ diff --git a/scene_opengl.cpp b/scene_opengl.cpp index 6d8fceb954..5e29f78fe9 100644 --- a/scene_opengl.cpp +++ b/scene_opengl.cpp @@ -664,8 +664,12 @@ qint64 SceneOpenGL::paint(QRegion damage, ToplevelList toplevels) } #endif + GLVertexBuffer::streamingBuffer()->endOfFrame(); + m_backend->endRenderingFrame(validRegion, updateRegion); + GLVertexBuffer::streamingBuffer()->framePosted(); + if (m_currentFence) { if (!m_syncManager->updateFences()) { qDebug() << "Aborting explicit synchronization with the X command stream.";