 KWin - the KDE window manager
 This file is part of the KDE project.

Copyright (C) 2006-2007 Rivo Laks <rivolaks@hot.ee>
Copyright (C) 2010, 2011 Martin Gräßlin <mgraesslin@kde.org>

#include "kwinglutils.h"

// need to call GLTexturePrivate::initStatic()
#include "kwingltexture_p.h"

#include "kwineffects.h"
#include "kwinglplatform.h"
#include "logging_p.h"

#include <QPixmap>
#include <QImage>
#include <QHash>
#include <QFile>
#include <QVector2D>
#include <QVector3D>
#include <QVector4D>
#include <QMatrix4x4>
#include <QVarLengthArray>

#include <array>
#include <deque>

#include <math.h>


#ifdef __GNUC__
#  define likely(x)   __builtin_expect(!!(x), 1)
#  define unlikely(x) __builtin_expect(!!(x), 0)
#  define likely(x)   (x)
#  define unlikely(x) (x)

namespace KWin
// Variables
// List of all supported GL extensions
static QList<QByteArray> glExtensions;

// Functions

void initGL(std::function<resolveFuncPtr(const char*)> resolveFunction)
    // Get list of supported OpenGL extensions
    if (hasGLVersion(3, 0)) {
        int count;
        glGetIntegerv(GL_NUM_EXTENSIONS, &count);

        for (int i = 0; i < count; i++) {
            const QByteArray name = (const char *) glGetStringi(GL_EXTENSIONS, i);
            glExtensions << name;
    } else
        glExtensions = QByteArray((const char*)glGetString(GL_EXTENSIONS)).split(' ');

    // handle OpenGL extensions functions


void cleanupGL()


bool hasGLVersion(int major, int minor, int release)
    return GLPlatform::instance()->glVersion() >= kVersionNumber(major, minor, release);

bool hasGLExtension(const QByteArray &extension)
    return glExtensions.contains(extension);

QList<QByteArray> openGLExtensions()
    return glExtensions;

static QString formatGLError(GLenum err)
    switch(err) {
    case GL_NO_ERROR:          return QStringLiteral("GL_NO_ERROR");
    case GL_INVALID_ENUM:      return QStringLiteral("GL_INVALID_ENUM");
    case GL_INVALID_VALUE:     return QStringLiteral("GL_INVALID_VALUE");
    case GL_STACK_OVERFLOW:    return QStringLiteral("GL_STACK_OVERFLOW");
    case GL_STACK_UNDERFLOW:   return QStringLiteral("GL_STACK_UNDERFLOW");
    case GL_OUT_OF_MEMORY:     return QStringLiteral("GL_OUT_OF_MEMORY");
    default: return QLatin1String("0x") + QString::number(err, 16);

bool checkGLError(const char* txt)
    GLenum err = glGetError();
    if (err == GL_CONTEXT_LOST) {
        qCWarning(LIBKWINGLUTILS) << "GL error: context lost";
        return true;
    bool hasError = false;
    while (err != GL_NO_ERROR) {
        qCWarning(LIBKWINGLUTILS) << "GL error (" << txt << "): " << formatGLError(err);
        hasError = true;
        err = glGetError();
        if (err == GL_CONTEXT_LOST) {
            qCWarning(LIBKWINGLUTILS) << "GL error: context lost";
    return hasError;

// GLShader

GLShader::GLShader(unsigned int flags)
    : mValid(false)
    , mLocationsResolved(false)
    , mExplicitLinking(flags & ExplicitLinking)
    mProgram = glCreateProgram();

GLShader::GLShader(const QString& vertexfile, const QString& fragmentfile, unsigned int flags)
    : mValid(false)
    , mLocationsResolved(false)
    , mExplicitLinking(flags & ExplicitLinking)
    mProgram = glCreateProgram();
    loadFromFiles(vertexfile, fragmentfile);

    if (mProgram) {

bool GLShader::loadFromFiles(const QString &vertexFile, const QString &fragmentFile)
    QFile vf(vertexFile);
    if (!vf.open(QIODevice::ReadOnly)) {
        qCCritical(LIBKWINGLUTILS) << "Couldn't open" << vertexFile << "for reading!";
        return false;
    const QByteArray vertexSource = vf.readAll();

    QFile ff(fragmentFile);
    if (!ff.open(QIODevice::ReadOnly)) {
        qCCritical(LIBKWINGLUTILS) << "Couldn't open" << fragmentFile << "for reading!";
        return false;
    const QByteArray fragmentSource = ff.readAll();

    return load(vertexSource, fragmentSource);

bool GLShader::link()
    // Be optimistic
    mValid = true;


    // Get the program info log
    int maxLength, length;
    glGetProgramiv(mProgram, GL_INFO_LOG_LENGTH, &maxLength);

    QByteArray log(maxLength, 0);
    glGetProgramInfoLog(mProgram, maxLength, &length, log.data());

    // Make sure the program linked successfully
    int status;
    glGetProgramiv(mProgram, GL_LINK_STATUS, &status);

    if (status == 0) {
        qCCritical(LIBKWINGLUTILS) << "Failed to link shader:" << endl << log;
        mValid = false;
    } else if (length > 0) {
        qCDebug(LIBKWINGLUTILS) << "Shader link log:" << log;

    return mValid;

const QByteArray GLShader::prepareSource(GLenum shaderType, const QByteArray &source) const
    // Prepare the source code
    QByteArray ba;
    if (GLPlatform::instance()->isGLES() && GLPlatform::instance()->glslVersion() < kVersionNumber(3, 0)) {
        ba.append("precision highp float;\n");
    if (ShaderManager::instance()->isShaderDebug()) {
        ba.append("#define KWIN_SHADER_DEBUG 1\n");
    if (GLPlatform::instance()->isGLES() && GLPlatform::instance()->glslVersion() >= kVersionNumber(3, 0)) {
        ba.replace("#version 140", "#version 300 es\n\nprecision highp float;\n");

    return ba;

bool GLShader::compile(GLuint program, GLenum shaderType, const QByteArray &source) const
    GLuint shader = glCreateShader(shaderType);

    QByteArray preparedSource = prepareSource(shaderType, source);
    const char* src = preparedSource.constData();
    glShaderSource(shader, 1, &src, nullptr);

    // Compile the shader

    // Get the shader info log
    int maxLength, length;
    glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &maxLength);

    QByteArray log(maxLength, 0);
    glGetShaderInfoLog(shader, maxLength, &length, log.data());

    // Check the status
    int status;
    glGetShaderiv(shader, GL_COMPILE_STATUS, &status);

    if (status == 0) {
        const char *typeName = (shaderType == GL_VERTEX_SHADER ? "vertex" : "fragment");
        qCCritical(LIBKWINGLUTILS) << "Failed to compile" << typeName << "shader:" << endl << log;
    } else if (length > 0)
        qCDebug(LIBKWINGLUTILS) << "Shader compile log:" << log;

    if (status != 0)
        glAttachShader(program, shader);

    return status != 0;

bool GLShader::load(const QByteArray &vertexSource, const QByteArray &fragmentSource)
    // Make sure shaders are actually supported
    if (!(GLPlatform::instance()->supports(GLSL) &&
        // we lack shader branching for Texture2DRectangle everywhere - and it's probably not worth it
        GLPlatform::instance()->supports(TextureNPOT))) {
        qCCritical(LIBKWINGLUTILS) << "Shaders are not supported";
        return false;

    mValid = false;

    // Compile the vertex shader
    if (!vertexSource.isEmpty()) {
        bool success = compile(mProgram, GL_VERTEX_SHADER, vertexSource);

        if (!success)
            return false;

    // Compile the fragment shader
    if (!fragmentSource.isEmpty()) {
        bool success = compile(mProgram, GL_FRAGMENT_SHADER, fragmentSource);

        if (!success)
            return false;

    if (mExplicitLinking)
        return true;

    // link() sets mValid
    return link();

void GLShader::bindAttributeLocation(const char *name, int index)
    glBindAttribLocation(mProgram, index, name);

void GLShader::bindFragDataLocation(const char *name, int index)
    if (!GLPlatform::instance()->isGLES() && (hasGLVersion(3, 0) || hasGLExtension(QByteArrayLiteral("GL_EXT_gpu_shader4"))))
        glBindFragDataLocation(mProgram, index, name);

void GLShader::bind()

void GLShader::unbind()

void GLShader::resolveLocations()
    if (mLocationsResolved)

    mMatrixLocation[TextureMatrix]              = uniformLocation("textureMatrix");
    mMatrixLocation[ProjectionMatrix]           = uniformLocation("projection");
    mMatrixLocation[ModelViewMatrix]            = uniformLocation("modelview");
    mMatrixLocation[ModelViewProjectionMatrix]  = uniformLocation("modelViewProjectionMatrix");
    mMatrixLocation[WindowTransformation]       = uniformLocation("windowTransformation");
    mMatrixLocation[ScreenTransformation]       = uniformLocation("screenTransformation");

    mVec2Location[Offset] = uniformLocation("offset");

    mVec4Location[ModulationConstant] = uniformLocation("modulation");

    mFloatLocation[Saturation]    = uniformLocation("saturation");

    mColorLocation[Color] = uniformLocation("geometryColor");

    mLocationsResolved = true;

int GLShader::uniformLocation(const char *name)
    const int location = glGetUniformLocation(mProgram, name);
    return location;

bool GLShader::setUniform(GLShader::MatrixUniform uniform, const QMatrix4x4 &matrix)
    return setUniform(mMatrixLocation[uniform], matrix);

bool GLShader::setUniform(GLShader::Vec2Uniform uniform, const QVector2D &value)
    return setUniform(mVec2Location[uniform], value);

bool GLShader::setUniform(GLShader::Vec4Uniform uniform, const QVector4D &value)
    return setUniform(mVec4Location[uniform], value);

bool GLShader::setUniform(GLShader::FloatUniform uniform, float value)
    return setUniform(mFloatLocation[uniform], value);

bool GLShader::setUniform(GLShader::IntUniform uniform, int value)
    return setUniform(mIntLocation[uniform], value);

bool GLShader::setUniform(GLShader::ColorUniform uniform, const QVector4D &value)
    return setUniform(mColorLocation[uniform], value);

bool GLShader::setUniform(GLShader::ColorUniform uniform, const QColor &value)
    return setUniform(mColorLocation[uniform], value);

bool GLShader::setUniform(const char *name, float value)
    const int location = uniformLocation(name);
    return setUniform(location, value);

bool GLShader::setUniform(const char *name, int value)
    const int location = uniformLocation(name);
    return setUniform(location, value);

bool GLShader::setUniform(const char *name, const QVector2D& value)
    const int location = uniformLocation(name);
    return setUniform(location, value);

bool GLShader::setUniform(const char *name, const QVector3D& value)
    const int location = uniformLocation(name);
    return setUniform(location, value);

bool GLShader::setUniform(const char *name, const QVector4D& value)
    const int location = uniformLocation(name);
    return setUniform(location, value);

bool GLShader::setUniform(const char *name, const QMatrix4x4& value)
    const int location = uniformLocation(name);
    return setUniform(location, value);

bool GLShader::setUniform(const char *name, const QColor& color)
    const int location = uniformLocation(name);
    return setUniform(location, color);

bool GLShader::setUniform(int location, float value)
    if (location >= 0) {
        glUniform1f(location, value);
    return (location >= 0);

bool GLShader::setUniform(int location, int value)
    if (location >= 0) {
        glUniform1i(location, value);
    return (location >= 0);

bool GLShader::setUniform(int location, const QVector2D &value)
    if (location >= 0) {
        glUniform2fv(location, 1, (const GLfloat*)&value);
    return (location >= 0);

bool GLShader::setUniform(int location, const QVector3D &value)
    if (location >= 0) {
        glUniform3fv(location, 1, (const GLfloat*)&value);
    return (location >= 0);

bool GLShader::setUniform(int location, const QVector4D &value)
    if (location >= 0) {
        glUniform4fv(location, 1, (const GLfloat*)&value);
    return (location >= 0);

bool GLShader::setUniform(int location, const QMatrix4x4 &value)
    if (location >= 0) {
        GLfloat m[16];
        const auto *data = value.constData();
        // i is column, j is row for m
        for (int i = 0; i < 16; ++i) {
            m[i] = data[i];
        glUniformMatrix4fv(location, 1, GL_FALSE, m);
    return (location >= 0);

bool GLShader::setUniform(int location, const QColor &color)
    if (location >= 0) {
        glUniform4f(location, color.redF(), color.greenF(), color.blueF(), color.alphaF());
    return (location >= 0);

int GLShader::attributeLocation(const char* name)
    int location = glGetAttribLocation(mProgram, name);
    return location;

bool GLShader::setAttribute(const char* name, float value)
    int location = attributeLocation(name);
    if (location >= 0) {
        glVertexAttrib1f(location, value);
    return (location >= 0);

QMatrix4x4 GLShader::getUniformMatrix4x4(const char* name)
    int location = uniformLocation(name);
    if (location >= 0) {
        GLfloat m[16];
        glGetnUniformfv(mProgram, location, sizeof(m), m);
        QMatrix4x4 matrix(m[0], m[4], m[8],  m[12],
                          m[1], m[5], m[9],  m[13],
                          m[2], m[6], m[10], m[14],
                          m[3], m[7], m[11], m[15]);
        return matrix;
    } else {
        return QMatrix4x4();

// ShaderManager
ShaderManager *ShaderManager::s_shaderManager = nullptr;

ShaderManager *ShaderManager::instance()
    if (!s_shaderManager) {
        s_shaderManager = new ShaderManager();
    return s_shaderManager;

void ShaderManager::cleanup()
    delete s_shaderManager;
    s_shaderManager = nullptr;

    m_debug = qstrcmp(qgetenv("KWIN_GL_DEBUG"), "1") == 0;

    const qint64 coreVersionNumber = GLPlatform::instance()->isGLES() ? kVersionNumber(3, 0) : kVersionNumber(1, 40);
    if (GLPlatform::instance()->glslVersion() >= coreVersionNumber) {
        m_resourcePath = QStringLiteral(":/effect-shaders-1.40/");
    } else {
        m_resourcePath = QStringLiteral(":/effect-shaders-1.10/");

    while (!m_boundShaders.isEmpty()) {


static bool fuzzyCompare(const QVector4D &lhs, const QVector4D &rhs)
    const float epsilon = 1.0f / 255.0f;

    return lhs[0] >= rhs[0] - epsilon && lhs[0] <= rhs[0] + epsilon &&
           lhs[1] >= rhs[1] - epsilon && lhs[1] <= rhs[1] + epsilon &&
           lhs[2] >= rhs[2] - epsilon && lhs[2] <= rhs[2] + epsilon &&
           lhs[3] >= rhs[3] - epsilon && lhs[3] <= rhs[3] + epsilon;

static bool checkPixel(int x, int y, const QVector4D &expected, const char *file, int line)
    uint8_t data[4];
    glReadnPixels(x, y, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, 4, data);

    const QVector4D pixel{data[0] / 255.f, data[1] / 255.f, data[2] / 255.f, data[3] / 255.f};

    if (fuzzyCompare(pixel, expected))
        return true;

    QMessageLogger(file, line, nullptr).warning() << "Pixel was" << pixel << "expected" << expected;
    return false;

#define CHECK_PIXEL(x, y, expected) \
    checkPixel(x, y, expected, __FILE__, __LINE__)

static QVector4D adjustSaturation(const QVector4D &color, float saturation)
    const float gray = QVector3D::dotProduct(color.toVector3D(), {0.2126, 0.7152, 0.0722});
    return QVector4D{gray, gray, gray, color.w()} * (1.0f - saturation) + color * saturation;

bool ShaderManager::selfTest()
    bool pass = true;

    if (!GLRenderTarget::supported()) {
        qCWarning(LIBKWINGLUTILS) << "Framebuffer objects not supported - skipping shader tests";
        return true;
    if (GLPlatform::instance()->isNvidia() && GLPlatform::instance()->glRendererString().contains("Quadro")) {
        qCWarning(LIBKWINGLUTILS) << "Skipping self test as it is reported to return false positive results on Quadro hardware";
        return true;
    if (GLPlatform::instance()->isMesaDriver() && GLPlatform::instance()->mesaVersion() >= kVersionNumber(17, 0)) {
        qCWarning(LIBKWINGLUTILS) << "Skipping self test as it is reported to return false positive results on Mesa drivers";
        return true;

    // Create the source texture
    QImage image(2, 2, QImage::Format_ARGB32_Premultiplied);
    image.setPixel(0, 0, 0xffff0000); // Red
    image.setPixel(1, 0, 0xff00ff00); // Green
    image.setPixel(0, 1, 0xff0000ff); // Blue
    image.setPixel(1, 1, 0xffffffff); // White

    GLTexture src(image);

    // Create the render target
    GLTexture dst(GL_RGBA8, 32, 32);

    GLRenderTarget fbo(dst);

    // Set up the vertex buffer
    GLVertexBuffer *vbo = GLVertexBuffer::streamingBuffer();

    const GLVertexAttrib attribs[] {
        { VA_Position, 2, GL_FLOAT, offsetof(GLVertex2D, position) },
        { VA_TexCoord, 2, GL_FLOAT, offsetof(GLVertex2D, texcoord) },

    vbo->setAttribLayout(attribs, 2, sizeof(GLVertex2D));

    GLVertex2D *verts = (GLVertex2D*) vbo->map(6 * sizeof(GLVertex2D));
    verts[0] = GLVertex2D{{0,   0}, {0, 0}}; // Top left
    verts[1] = GLVertex2D{{0,  32}, {0, 1}}; // Bottom left
    verts[2] = GLVertex2D{{32,  0}, {1, 0}}; // Top right

    verts[3] = GLVertex2D{{32,  0}, {1, 0}}; // Top right
    verts[4] = GLVertex2D{{0,  32}, {0, 1}}; // Bottom left
    verts[5] = GLVertex2D{{32, 32}, {1, 1}}; // Bottom right


    glViewport(0, 0, 32, 32);
    glClearColor(0, 0, 0, 0);

    // Set up the projection matrix
    QMatrix4x4 matrix;
    matrix.ortho(QRect(0, 0, 32, 32));

    // Bind the source texture

    const QVector4D red   {1.0f, 0.0f, 0.0f, 1.0f};
    const QVector4D green {0.0f, 1.0f, 0.0f, 1.0f};
    const QVector4D blue  {0.0f, 0.0f, 1.0f, 1.0f};
    const QVector4D white {1.0f, 1.0f, 1.0f, 1.0f};

    // Note: To see the line number in error messages, set
    //       QT_MESSAGE_PATTERN="%{message} (%{file}:%{line})"

    // Test solid color
    GLShader *shader = pushShader(ShaderTrait::UniformColor);
    if (shader->isValid()) {

        shader->setUniform(GLShader::ModelViewProjectionMatrix, matrix);
        shader->setUniform(GLShader::Color, green);
        vbo->draw(GL_TRIANGLES, 0, 6);

        pass = CHECK_PIXEL(8,  24, green) && pass;
        pass = CHECK_PIXEL(24, 24, green) && pass;
        pass = CHECK_PIXEL(8,   8, green) && pass;
        pass = CHECK_PIXEL(24,  8, green) && pass;
    } else {
        pass = false;

    // Test texture mapping
    shader = pushShader(ShaderTrait::MapTexture);
    if (shader->isValid()) {

        shader->setUniform(GLShader::ModelViewProjectionMatrix, matrix);
        vbo->draw(GL_TRIANGLES, 0, 6);

        pass = CHECK_PIXEL(8,  24, red)   && pass;
        pass = CHECK_PIXEL(24, 24, green) && pass;
        pass = CHECK_PIXEL(8,   8, blue)  && pass;
        pass = CHECK_PIXEL(24,  8, white) && pass;
    } else {
        pass = false;

    // Test saturation filter
    shader = pushShader(ShaderTrait::MapTexture | ShaderTrait::AdjustSaturation);
    if (shader->isValid()) {

        const float saturation = .3;

        shader->setUniform(GLShader::ModelViewProjectionMatrix, matrix);
        shader->setUniform(GLShader::Saturation, saturation);
        vbo->draw(GL_TRIANGLES, 0, 6);

        pass = CHECK_PIXEL(8,  24, adjustSaturation(red,   saturation)) && pass;
        pass = CHECK_PIXEL(24, 24, adjustSaturation(green, saturation)) && pass;
        pass = CHECK_PIXEL(8,  8,  adjustSaturation(blue,  saturation)) && pass;
        pass = CHECK_PIXEL(24, 8,  adjustSaturation(white, saturation)) && pass;
    } else {
        pass = false;

    // Test modulation filter
    shader = pushShader(ShaderTrait::MapTexture | ShaderTrait::Modulate);
    if (shader->isValid()) {

        const QVector4D modulation{.3f, .4f, .5f, .6f};

        shader->setUniform(GLShader::ModelViewProjectionMatrix, matrix);
        shader->setUniform(GLShader::ModulationConstant, modulation);
        vbo->draw(GL_TRIANGLES, 0, 6);

        pass = CHECK_PIXEL(8,  24, red   * modulation) && pass;
        pass = CHECK_PIXEL(24, 24, green * modulation) && pass;
        pass = CHECK_PIXEL(8,   8, blue  * modulation) && pass;
        pass = CHECK_PIXEL(24,  8, white * modulation) && pass;
    } else {
        pass = false;

    // Test saturation + modulation
    shader = pushShader(ShaderTrait::MapTexture | ShaderTrait::AdjustSaturation | ShaderTrait::Modulate);
    if (shader->isValid()) {

        const QVector4D modulation{.3f, .4f, .5f, .6f};
        const float saturation = .3;

        shader->setUniform(GLShader::ModelViewProjectionMatrix, matrix);
        shader->setUniform(GLShader::ModulationConstant, modulation);
        shader->setUniform(GLShader::Saturation, saturation);
        vbo->draw(GL_TRIANGLES, 0, 6);

        pass = CHECK_PIXEL(8,  24, adjustSaturation(red   * modulation, saturation)) && pass;
        pass = CHECK_PIXEL(24, 24, adjustSaturation(green * modulation, saturation)) && pass;
        pass = CHECK_PIXEL(8,  8,  adjustSaturation(blue  * modulation, saturation)) && pass;
        pass = CHECK_PIXEL(24, 8,  adjustSaturation(white * modulation, saturation)) && pass;
    } else {
        pass = false;


    return pass;

QByteArray ShaderManager::generateVertexSource(ShaderTraits traits) const
    QByteArray source;
    QTextStream stream(&source);

    GLPlatform * const gl = GLPlatform::instance();
    QByteArray attribute, varying;

    if (!gl->isGLES()) {
        const bool glsl_140 = gl->glslVersion() >= kVersionNumber(1, 40);

        attribute = glsl_140 ? QByteArrayLiteral("in")  : QByteArrayLiteral("attribute");
        varying   = glsl_140 ? QByteArrayLiteral("out") : QByteArrayLiteral("varying");

        if (glsl_140)
            stream << "#version 140\n\n";
    } else {
        const bool glsl_es_300 = gl->glslVersion() >= kVersionNumber(3, 0);

        attribute = glsl_es_300 ? QByteArrayLiteral("in")  : QByteArrayLiteral("attribute");
        varying   = glsl_es_300 ? QByteArrayLiteral("out") : QByteArrayLiteral("varying");

        if (glsl_es_300)
            stream << "#version 300 es\n\n";

    stream << attribute << " vec4 position;\n";
    if (traits & ShaderTrait::MapTexture) {
        stream << attribute << " vec4 texcoord;\n\n";
        stream << varying << " vec2 texcoord0;\n\n";
    } else
        stream << "\n";

    stream << "uniform mat4 modelViewProjectionMatrix;\n\n";

    stream << "void main()\n{\n";
    if (traits & ShaderTrait::MapTexture)
        stream << "    texcoord0 = texcoord.st;\n";

    stream << "    gl_Position = modelViewProjectionMatrix * position;\n";
    stream << "}\n";

    return source;

QByteArray ShaderManager::generateFragmentSource(ShaderTraits traits) const
    QByteArray source;
    QTextStream stream(&source);

    GLPlatform * const gl = GLPlatform::instance();
    QByteArray varying, output, textureLookup;

    if (!gl->isGLES()) {
        const bool glsl_140 = gl->glslVersion() >= kVersionNumber(1, 40);

        if (glsl_140)
            stream << "#version 140\n\n";

        varying       = glsl_140 ? QByteArrayLiteral("in")         : QByteArrayLiteral("varying");
        textureLookup = glsl_140 ? QByteArrayLiteral("texture")    : QByteArrayLiteral("texture2D");
        output        = glsl_140 ? QByteArrayLiteral("fragColor")  : QByteArrayLiteral("gl_FragColor");
    } else {
        const bool glsl_es_300 = GLPlatform::instance()->glslVersion() >= kVersionNumber(3, 0);

        if (glsl_es_300)
            stream << "#version 300 es\n\n";

        // From the GLSL ES specification:
        //     "The fragment language has no default precision qualifier for floating point types."
        stream << "precision highp float;\n\n";

        varying       = glsl_es_300 ? QByteArrayLiteral("in")         : QByteArrayLiteral("varying");
        textureLookup = glsl_es_300 ? QByteArrayLiteral("texture")    : QByteArrayLiteral("texture2D");
        output        = glsl_es_300 ? QByteArrayLiteral("fragColor")  : QByteArrayLiteral("gl_FragColor");

    if (traits & ShaderTrait::MapTexture) {
        stream << "uniform sampler2D sampler;\n";

        if (traits & ShaderTrait::Modulate)
            stream << "uniform vec4 modulation;\n";
        if (traits & ShaderTrait::AdjustSaturation)
            stream << "uniform float saturation;\n";

        stream << "\n" << varying << " vec2 texcoord0;\n";

    } else if (traits & ShaderTrait::UniformColor)
        stream << "uniform vec4 geometryColor;\n";

    if (output != QByteArrayLiteral("gl_FragColor"))
        stream << "\nout vec4 " << output << ";\n";

    stream << "\nvoid main(void)\n{\n";
    if (traits & ShaderTrait::MapTexture) {
        if (traits & (ShaderTrait::Modulate | ShaderTrait::AdjustSaturation)) {
            stream << "    vec4 texel = " << textureLookup << "(sampler, texcoord0);\n";

            if (traits & ShaderTrait::Modulate)
                stream << "    texel *= modulation;\n";
            if (traits & ShaderTrait::AdjustSaturation)
                stream << "    texel.rgb = mix(vec3(dot(texel.rgb, vec3(0.2126, 0.7152, 0.0722))), texel.rgb, saturation);\n";

            stream << "    " << output << " = texel;\n";
        } else {
            stream << "    " << output << " = " << textureLookup << "(sampler, texcoord0);\n";
    } else if (traits & ShaderTrait::UniformColor)
        stream << "    " << output << " = geometryColor;\n";

    stream << "}";
    return source;

GLShader *ShaderManager::generateShader(ShaderTraits traits)
    return generateCustomShader(traits);

GLShader *ShaderManager::generateCustomShader(ShaderTraits traits, const QByteArray &vertexSource, const QByteArray &fragmentSource)
    const QByteArray vertex   = vertexSource.isEmpty() ? generateVertexSource(traits) : vertexSource;
    const QByteArray fragment = fragmentSource.isEmpty() ? generateFragmentSource(traits) : fragmentSource;

    GLShader *shader = new GLShader(GLShader::ExplicitLinking);
    shader->load(vertex, fragment);

    shader->bindAttributeLocation("position", VA_Position);
    shader->bindAttributeLocation("texcoord", VA_TexCoord);
    shader->bindFragDataLocation("fragColor", 0);

    return shader;

GLShader *ShaderManager::generateShaderFromResources(ShaderTraits traits, const QString &vertexFile, const QString &fragmentFile)
    auto loadShaderFile = [this] (const QString &fileName) {
        QFile file(m_resourcePath + fileName);
        if (file.open(QIODevice::ReadOnly)) {
            return file.readAll();
        qCCritical(LIBKWINGLUTILS) << "Failed to read shader " << fileName;
        return QByteArray();
    QByteArray vertexSource;
    QByteArray fragmentSource;
    if (!vertexFile.isEmpty()) {
        vertexSource = loadShaderFile(vertexFile);
        if (vertexSource.isEmpty()) {
            return new GLShader();
    if (!fragmentFile.isEmpty()) {
        fragmentSource = loadShaderFile(fragmentFile);
        if (fragmentSource.isEmpty()) {
            return new GLShader();
    return generateCustomShader(traits, vertexSource, fragmentSource);

GLShader *ShaderManager::shader(ShaderTraits traits)
    GLShader *shader = m_shaderHash.value(traits);

    if (!shader) {
        shader = generateShader(traits);
        m_shaderHash.insert(traits, shader);

    return shader;

GLShader *ShaderManager::getBoundShader() const
    if (m_boundShaders.isEmpty()) {
        return nullptr;
    } else {
        return m_boundShaders.top();

bool ShaderManager::isShaderBound() const
    return !m_boundShaders.isEmpty();

bool ShaderManager::isShaderDebug() const
    return m_debug;

GLShader *ShaderManager::pushShader(ShaderTraits traits)
    GLShader *shader = this->shader(traits);
    return shader;

void ShaderManager::pushShader(GLShader *shader)
    // only bind shader if it is not already bound
    if (shader != getBoundShader()) {

void ShaderManager::popShader()
    if (m_boundShaders.isEmpty()) {
    GLShader *shader = m_boundShaders.pop();
    if (m_boundShaders.isEmpty()) {
        // no more shader bound - unbind
    } else if (shader != m_boundShaders.top()) {
        // only rebind if a different shader is on top of stack

void ShaderManager::bindFragDataLocations(GLShader *shader)
    shader->bindFragDataLocation("fragColor", 0);

void ShaderManager::bindAttributeLocations(GLShader *shader) const
    shader->bindAttributeLocation("vertex",   VA_Position);
    shader->bindAttributeLocation("texCoord", VA_TexCoord);

GLShader *ShaderManager::loadShaderFromCode(const QByteArray &vertexSource, const QByteArray &fragmentSource)
    GLShader *shader = new GLShader(GLShader::ExplicitLinking);
    shader->load(vertexSource, fragmentSource);
    return shader;

/***  GLRenderTarget  ***/
bool GLRenderTarget::sSupported = false;
bool GLRenderTarget::s_blitSupported = false;
QStack<GLRenderTarget*> GLRenderTarget::s_renderTargets = QStack<GLRenderTarget*>();
QSize GLRenderTarget::s_virtualScreenSize;
QRect GLRenderTarget::s_virtualScreenGeometry;
qreal GLRenderTarget::s_virtualScreenScale = 1.0;
GLint GLRenderTarget::s_virtualScreenViewport[4];

void GLRenderTarget::initStatic()
    if (GLPlatform::instance()->isGLES()) {
        sSupported = true;
        s_blitSupported = hasGLVersion(3, 0);
    } else {
        sSupported = hasGLVersion(3, 0) ||
            hasGLExtension(QByteArrayLiteral("GL_ARB_framebuffer_object")) ||

        s_blitSupported = hasGLVersion(3, 0) ||
            hasGLExtension(QByteArrayLiteral("GL_ARB_framebuffer_object")) ||

void GLRenderTarget::cleanup()
    sSupported = false;
    s_blitSupported = false;

bool GLRenderTarget::isRenderTargetBound()
    return !s_renderTargets.isEmpty();

bool GLRenderTarget::blitSupported()
    return s_blitSupported;

void GLRenderTarget::pushRenderTarget(GLRenderTarget* target)
    if (s_renderTargets.isEmpty()) {
        glGetIntegerv(GL_VIEWPORT, s_virtualScreenViewport);

void GLRenderTarget::pushRenderTargets(QStack <GLRenderTarget*> targets)
    if (s_renderTargets.isEmpty()) {
        glGetIntegerv(GL_VIEWPORT, s_virtualScreenViewport);

GLRenderTarget* GLRenderTarget::popRenderTarget()
    GLRenderTarget* ret = s_renderTargets.pop();

    if (!s_renderTargets.isEmpty()) {
    } else {
        glViewport (s_virtualScreenViewport[0], s_virtualScreenViewport[1], s_virtualScreenViewport[2], s_virtualScreenViewport[3]);

    return ret;

    // Reset variables
    mValid = false;
    mTexture = GLTexture();

GLRenderTarget::GLRenderTarget(const GLTexture& color)
    // Reset variables
    mValid = false;

    mTexture = color;

    // Make sure FBO is supported
    if (sSupported && !mTexture.isNull()) {
    } else
        qCCritical(LIBKWINGLUTILS) << "Render targets aren't supported!";

    if (mValid) {
        glDeleteFramebuffers(1, &mFramebuffer);

bool GLRenderTarget::enable()
    if (!mValid) {

    if (!valid()) {
        qCCritical(LIBKWINGLUTILS) << "Can't enable invalid render target!";
        return false;

    glBindFramebuffer(GL_FRAMEBUFFER, mFramebuffer);
    glViewport(0, 0, mTexture.width(), mTexture.height());

    return true;

bool GLRenderTarget::disable()
    if (!mValid) {

    if (!valid()) {
        qCCritical(LIBKWINGLUTILS) << "Can't disable invalid render target!";
        return false;

    glBindFramebuffer(GL_FRAMEBUFFER, 0);

    return true;

static QString formatFramebufferStatus(GLenum status)
    switch(status) {
        // An attachment is the wrong type / is invalid / has 0 width or height
        // There are no images attached to the framebuffer
        // A format or the combination of formats of the attachments is unsupported
        return QStringLiteral("GL_FRAMEBUFFER_UNSUPPORTED");
        // Not all attached images have the same width and height
        // The color attachments don't have the same format
        // The attachments don't have the same number of samples
        // The draw buffer is missing
        // The read buffer is missing
        return QStringLiteral("Unknown (0x") + QString::number(status, 16) + QStringLiteral(")");

void GLRenderTarget::initFBO()
    GLenum err = glGetError();
    if (err != GL_NO_ERROR)
        qCCritical(LIBKWINGLUTILS) << "Error status when entering GLRenderTarget::initFBO: " << formatGLError(err);

    glGenFramebuffers(1, &mFramebuffer);

    if ((err = glGetError()) != GL_NO_ERROR) {
        qCCritical(LIBKWINGLUTILS) << "glGenFramebuffers failed: " << formatGLError(err);

    glBindFramebuffer(GL_FRAMEBUFFER, mFramebuffer);

    if ((err = glGetError()) != GL_NO_ERROR) {
        qCCritical(LIBKWINGLUTILS) << "glBindFramebuffer failed: " << formatGLError(err);
        glDeleteFramebuffers(1, &mFramebuffer);

                           mTexture.target(), mTexture.texture(), 0);

    if ((err = glGetError()) != GL_NO_ERROR) {
        qCCritical(LIBKWINGLUTILS) << "glFramebufferTexture2D failed: " << formatGLError(err);
        glBindFramebuffer(GL_FRAMEBUFFER, 0);
        glDeleteFramebuffers(1, &mFramebuffer);

    const GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);

    glBindFramebuffer(GL_FRAMEBUFFER, 0);

    if (status != GL_FRAMEBUFFER_COMPLETE) {
        // We have an incomplete framebuffer, consider it invalid
        if (status == 0)
            qCCritical(LIBKWINGLUTILS) << "glCheckFramebufferStatus failed: " << formatGLError(glGetError());
            qCCritical(LIBKWINGLUTILS) << "Invalid framebuffer status: " << formatFramebufferStatus(status);
        glDeleteFramebuffers(1, &mFramebuffer);

    mValid = true;

void GLRenderTarget::blitFromFramebuffer(const QRect &source, const QRect &destination, GLenum filter)
    if (!GLRenderTarget::blitSupported()) {

    if (!mValid) {

    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, mFramebuffer);
    glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
    const QRect s = source.isNull() ? s_virtualScreenGeometry : source;
    const QRect d = destination.isNull() ? QRect(0, 0, mTexture.width(), mTexture.height()) : destination;

    glBlitFramebuffer((s.x() - s_virtualScreenGeometry.x()) * s_virtualScreenScale,
                      (s_virtualScreenGeometry.height() - (s.y() - s_virtualScreenGeometry.y() + s.height())) * s_virtualScreenScale,
                      (s.x() - s_virtualScreenGeometry.x() + s.width()) * s_virtualScreenScale,
                      (s_virtualScreenGeometry.height() - (s.y() - s_virtualScreenGeometry.y())) * s_virtualScreenScale,
                      d.x(), mTexture.height() - d.y() - d.height(), d.x() + d.width(), mTexture.height() - d.y(),
                      GL_COLOR_BUFFER_BIT, filter);

void GLRenderTarget::attachTexture(const GLTexture& target)
    if (!mValid) {

    if (mTexture.texture() == target.texture()) {


    mTexture = target;
                           mTexture.target(), mTexture.texture(), 0);


void GLRenderTarget::detachTexture()
    if (mTexture.isNull()) {


                           mTexture.target(), 0, 0);


// ------------------------------------------------------------------

template <typename T>
T align(T value, int bytes)
    return (value + bytes - 1) & ~T(bytes - 1);

class IndexBuffer

    void accomodate(int count);
    void bind();

    GLuint m_buffer;
    size_t m_size;
    int m_count;

    // The maximum number of quads we can render with 16 bit indices is 16,384.
    // But we start with 512 and grow the buffer as needed.
    m_size = sizeof(indices);
    m_count = m_size / (6 * sizeof(uint16_t));

    glGenBuffers(1, &m_buffer);
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_buffer);
    glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);

    glDeleteBuffers(1, &m_buffer);

void IndexBuffer::accomodate(int count)
    // Check if we need to grow the buffer.
    if (count <= m_count)

    count = align(count, 128);
    size_t size = 6 * sizeof(uint16_t) * count;

    // Create a new buffer object
    GLuint buffer;
    glGenBuffers(1, &buffer);
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer);
    glBufferData(GL_ELEMENT_ARRAY_BUFFER, size, nullptr, GL_STATIC_DRAW);

    // Use the GPU to copy the data from the old object to the new object,
    glBindBuffer(GL_COPY_READ_BUFFER, m_buffer);
    glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_ELEMENT_ARRAY_BUFFER, 0, 0, m_size);
    glDeleteBuffers(1, &m_buffer);
    glFlush(); // Needed to work around what appears to be a CP DMA issue in r600g

    // Map the new object and fill in the uninitialized section
    uint16_t *map = (uint16_t *) glMapBufferRange(GL_ELEMENT_ARRAY_BUFFER, m_size, size - m_size, access);

    const uint16_t index[] = { 1, 0, 3, 3, 2, 1 };
    for (int i = m_count; i < count; i++) {
        for (int j = 0; j < 6; j++)
            *(map++) = i * 4 + index[j];

    m_buffer = buffer;
    m_count = count;
    m_size = size;

void IndexBuffer::bind()
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_buffer);

// ------------------------------------------------------------------

class BitRef
    BitRef(uint32_t &bitfield, int bit) : m_bitfield(bitfield), m_mask(1 << bit) {}

    void operator = (bool val) {
        if (val)
            m_bitfield |= m_mask;
            m_bitfield &= ~m_mask;

    operator bool () const { return m_bitfield & m_mask; }

    uint32_t &m_bitfield;
    int const m_mask;

// ------------------------------------------------------------------

class Bitfield
    Bitfield() : m_bitfield(0) {}
    Bitfield(uint32_t bits) : m_bitfield(bits) {}

    void set(int i) { m_bitfield |= (1 << i); }
    void clear(int i) { m_bitfield &= ~(1 << i); }

    BitRef operator [] (int i) { return BitRef(m_bitfield, i); }
    operator uint32_t () const { return m_bitfield; }

    uint32_t m_bitfield;

// ------------------------------------------------------------------

class BitfieldIterator
    BitfieldIterator(uint32_t bitfield) : m_bitfield(bitfield) {}

    bool hasNext() const { return m_bitfield != 0; }

    int next() {
        const int bit = ffs(m_bitfield) - 1;
        m_bitfield ^= (1 << bit);
        return bit;

    uint32_t m_bitfield;

// ------------------------------------------------------------------

struct VertexAttrib
    int size;
    GLenum type;
    int offset;

// ------------------------------------------------------------------

struct BufferFence
    GLsync sync;
    intptr_t nextEnd;

    bool signaled() const
        GLint value;
        glGetSynciv(sync, GL_SYNC_STATUS, 1, nullptr, &value);
        return value == GL_SIGNALED;

static void deleteAll(std::deque<BufferFence> &fences)
    for (const BufferFence &fence : fences)


// ------------------------------------------------------------------

template <size_t Count>
struct FrameSizesArray
    FrameSizesArray() {

    void push(size_t size) {
        m_array[m_index] = size;
        m_index = (m_index + 1) % Count;

    size_t average() const {
        size_t sum = 0;
        for (size_t size : m_array)
            sum += size;
        return sum / Count;

    std::array<size_t, Count> m_array;
    int m_index = 0;

// GLVertexBufferPrivate
class GLVertexBufferPrivate
    GLVertexBufferPrivate(GLVertexBuffer::UsageHint usageHint)
        : vertexCount(0)
        , persistent(false)
        , useColor(false)
        , color(0, 0, 0, 255)
        , bufferSize(0)
        , bufferEnd(0)
        , mappedSize(0)
        , frameSize(0)
        , nextOffset(0)
        , baseAddress(0)
        , map(nullptr)
        glGenBuffers(1, &buffer);

        switch(usageHint) {
        case GLVertexBuffer::Dynamic:
            usage = GL_DYNAMIC_DRAW;
        case GLVertexBuffer::Static:
            usage = GL_STATIC_DRAW;
            usage = GL_STREAM_DRAW;

    ~GLVertexBufferPrivate() {

        if (buffer != 0) {
            glDeleteBuffers(1, &buffer);
            map = nullptr;

    void interleaveArrays(float *array, int dim, const float *vertices, const float *texcoords, int count);
    void bindArrays();
    void unbindArrays();
    void reallocateBuffer(size_t size);
    GLvoid *mapNextFreeRange(size_t size);
    void reallocatePersistentBuffer(size_t size);
    bool awaitFence(intptr_t offset);
    GLvoid *getIdleRange(size_t size);

    GLuint buffer;
    GLenum usage;
    int stride;
    int vertexCount;
    static GLVertexBuffer *streamingBuffer;
    static bool haveBufferStorage;
    static bool haveSyncFences;
    static bool hasMapBufferRange;
    static bool supportsIndexedQuads;
    QByteArray dataStore;
    bool persistent;
    bool useColor;
    QVector4D color;
    size_t bufferSize;
    intptr_t bufferEnd;
    size_t mappedSize;
    size_t frameSize;
    intptr_t nextOffset;
    intptr_t baseAddress;
    uint8_t *map;
    std::deque<BufferFence> fences;
    FrameSizesArray<4> frameSizes;
    VertexAttrib attrib[VertexAttributeCount];
    Bitfield enabledArrays;
    static IndexBuffer *s_indexBuffer;

bool GLVertexBufferPrivate::hasMapBufferRange = false;
bool GLVertexBufferPrivate::supportsIndexedQuads = false;
GLVertexBuffer *GLVertexBufferPrivate::streamingBuffer = nullptr;
bool GLVertexBufferPrivate::haveBufferStorage = false;
bool GLVertexBufferPrivate::haveSyncFences = false;
IndexBuffer *GLVertexBufferPrivate::s_indexBuffer = nullptr;

void GLVertexBufferPrivate::interleaveArrays(float *dst, int dim,
                                             const float *vertices, const float *texcoords,
                                             int count)
    if (!texcoords) {
        memcpy((void *) dst, vertices, dim * sizeof(float) * count);

    switch (dim)
    case 2:
        for (int i = 0; i < count; i++) {
            *(dst++) = *(vertices++);
            *(dst++) = *(vertices++);
            *(dst++) = *(texcoords++);
            *(dst++) = *(texcoords++);

    case 3:
        for (int i = 0; i < count; i++) {
            *(dst++) = *(vertices++);
            *(dst++) = *(vertices++);
            *(dst++) = *(vertices++);
            *(dst++) = *(texcoords++);
            *(dst++) = *(texcoords++);

        for (int i = 0; i < count; i++) {
            for (int j = 0; j < dim; j++)
                *(dst++) = *(vertices++);

            *(dst++) = *(texcoords++);
            *(dst++) = *(texcoords++);

void GLVertexBufferPrivate::bindArrays()
    if (useColor) {
        GLShader *shader = ShaderManager::instance()->getBoundShader();
        shader->setUniform(GLShader::Color, color);

    glBindBuffer(GL_ARRAY_BUFFER, buffer);

    BitfieldIterator it(enabledArrays);
    while (it.hasNext()) {
        const int index = it.next();
        glVertexAttribPointer(index, attrib[index].size, attrib[index].type, GL_FALSE, stride,
                                (const GLvoid *) (baseAddress + attrib[index].offset));

void GLVertexBufferPrivate::unbindArrays()
    BitfieldIterator it(enabledArrays);
    while (it.hasNext())

void GLVertexBufferPrivate::reallocatePersistentBuffer(size_t size)
    if (buffer != 0) {
        // This also unmaps and unbinds the buffer
        glDeleteBuffers(1, &buffer);
        buffer = 0;


    if (buffer == 0)
        glGenBuffers(1, &buffer);

    // Round the size up to 64 kb
    size_t minSize = qMax<size_t>(frameSizes.average() * 3, 128 * 1024);
    bufferSize = align(qMax(size, minSize), 64 * 1024);

    const GLbitfield storage = GL_DYNAMIC_STORAGE_BIT;

    glBindBuffer(GL_ARRAY_BUFFER, buffer);
    glBufferStorage(GL_ARRAY_BUFFER, bufferSize, nullptr, storage | access);

    map = (uint8_t *) glMapBufferRange(GL_ARRAY_BUFFER, 0, bufferSize, access);

    nextOffset = 0;
    bufferEnd = bufferSize;

bool GLVertexBufferPrivate::awaitFence(intptr_t end)
    // Skip fences until we reach the end offset
    while (!fences.empty() && fences.front().nextEnd < end) {


    // Wait on the next fence
    const BufferFence &fence = fences.front();

    if (!fence.signaled()) {
        qCDebug(LIBKWINGLUTILS) << "Stalling on VBO fence";
        const GLenum ret = glClientWaitSync(fence.sync, GL_SYNC_FLUSH_COMMANDS_BIT, 1000000000);

        if (ret == GL_TIMEOUT_EXPIRED || ret == GL_WAIT_FAILED) {
            qCCritical(LIBKWINGLUTILS) << "Wait failed";
            return false;


    // Update the end pointer
    bufferEnd = fence.nextEnd;

    return true;

GLvoid *GLVertexBufferPrivate::getIdleRange(size_t size)
    if (unlikely(size > bufferSize))
        reallocatePersistentBuffer(size * 2);

    // Handle wrap-around
    if (unlikely(nextOffset + size > bufferSize)) {
        nextOffset = 0;
        bufferEnd -= bufferSize;

        for (BufferFence &fence : fences)
            fence.nextEnd -= bufferSize;

        // Emit a fence now
        BufferFence fence;
        fence.sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
        fence.nextEnd = bufferSize;

    if (unlikely(nextOffset + intptr_t(size) > bufferEnd)) {
        if (!awaitFence(nextOffset + size))
            return nullptr;

    return map + nextOffset;

void GLVertexBufferPrivate::reallocateBuffer(size_t size)
    // Round the size up to 4 Kb for streaming/dynamic buffers.
    const size_t minSize = 32768; // Minimum size for streaming buffers
    const size_t alloc = usage != GL_STATIC_DRAW ? align(qMax(size, minSize), 4096) : size;

    glBufferData(GL_ARRAY_BUFFER, alloc, 0, usage);

    bufferSize = alloc;

GLvoid *GLVertexBufferPrivate::mapNextFreeRange(size_t size)

    if ((nextOffset + size) > bufferSize) {
        // Reallocate the data store if it's too small.
        if (size > bufferSize) {
        } else {
            access |= GL_MAP_INVALIDATE_BUFFER_BIT;
            access ^= GL_MAP_UNSYNCHRONIZED_BIT;

        nextOffset = 0;

    return glMapBufferRange(GL_ARRAY_BUFFER, nextOffset, size, access);

// GLVertexBuffer
QRect GLVertexBuffer::s_virtualScreenGeometry;
qreal GLVertexBuffer::s_virtualScreenScale;

GLVertexBuffer::GLVertexBuffer(UsageHint hint)
    : d(new GLVertexBufferPrivate(hint))

    delete d;

void GLVertexBuffer::setData(const void *data, size_t size)
    GLvoid *ptr = map(size);
    memcpy(ptr, data, size);

void GLVertexBuffer::setData(int vertexCount, int dim, const float* vertices, const float* texcoords)
    const GLVertexAttrib layout[] = {
        { VA_Position, dim, GL_FLOAT, 0                        },
        { VA_TexCoord, 2,   GL_FLOAT, int(dim * sizeof(float)) }

    int stride       = (texcoords ? dim + 2 : dim) * sizeof(float);
    int attribCount  = texcoords ? 2 : 1;

    setAttribLayout(layout, attribCount, stride);

    GLvoid *ptr = map(vertexCount * stride);
    d->interleaveArrays((float *) ptr, dim, vertices, texcoords, vertexCount);

GLvoid *GLVertexBuffer::map(size_t size)
    d->mappedSize = size;
    d->frameSize += size;

    if (d->persistent)
        return d->getIdleRange(size);

    glBindBuffer(GL_ARRAY_BUFFER, d->buffer);

    bool preferBufferSubData = GLPlatform::instance()->preferBufferSubData();

    if (GLVertexBufferPrivate::hasMapBufferRange && !preferBufferSubData)
        return (GLvoid *) d->mapNextFreeRange(size);

    // If we can't map the buffer we allocate local memory to hold the
    // buffer data and return a pointer to it.  The data will be submitted
    // to the actual buffer object when the user calls unmap().
    if (size_t(d->dataStore.size()) < size)

    return (GLvoid *) d->dataStore.data();

void GLVertexBuffer::unmap()
    if (d->persistent) {
        d->baseAddress = d->nextOffset;
        d->nextOffset += align(d->mappedSize, 16); // Align to 16 bytes for SSE
        d->mappedSize = 0;

    bool preferBufferSubData = GLPlatform::instance()->preferBufferSubData();

    if (GLVertexBufferPrivate::hasMapBufferRange && !preferBufferSubData) {

        d->baseAddress = d->nextOffset;
        d->nextOffset += align(d->mappedSize, 16); // Align to 16 bytes for SSE
    } else {
        // Upload the data from local memory to the buffer object
        if (preferBufferSubData) {
            if ((d->nextOffset + d->mappedSize) > d->bufferSize) {
                d->nextOffset = 0;

            glBufferSubData(GL_ARRAY_BUFFER, d->nextOffset, d->mappedSize, d->dataStore.constData());

            d->baseAddress = d->nextOffset;
            d->nextOffset += align(d->mappedSize, 16); // Align to 16 bytes for SSE
        } else {
            glBufferData(GL_ARRAY_BUFFER, d->mappedSize, d->dataStore.data(), d->usage);
            d->baseAddress = 0;

        // Free the local memory buffer if it's unlikely to be used again
        if (d->usage == GL_STATIC_DRAW)
            d->dataStore = QByteArray();


    d->mappedSize = 0;

void GLVertexBuffer::setVertexCount(int count)
    d->vertexCount = count;

void GLVertexBuffer::setAttribLayout(const GLVertexAttrib *attribs, int count, int stride)
    // Start by disabling all arrays
    d->enabledArrays = 0;

    for (int i = 0; i < count; i++) {
        const int index = attribs[i].index;

        assert(index >= 0 && index < VertexAttributeCount);

        d->attrib[index].size   = attribs[i].size;
        d->attrib[index].type   = attribs[i].type;
        d->attrib[index].offset = attribs[i].relativeOffset;

        d->enabledArrays[index] = true;

    d->stride = stride;

void GLVertexBuffer::render(GLenum primitiveMode)
    render(infiniteRegion(), primitiveMode, false);

void GLVertexBuffer::render(const QRegion& region, GLenum primitiveMode, bool hardwareClipping)
    draw(region, primitiveMode, 0, d->vertexCount, hardwareClipping);

void GLVertexBuffer::bindArrays()

void GLVertexBuffer::unbindArrays()

void GLVertexBuffer::draw(GLenum primitiveMode, int first, int count)
    draw(infiniteRegion(), primitiveMode, first, count, false);

void GLVertexBuffer::draw(const QRegion &region, GLenum primitiveMode, int first, int count, bool hardwareClipping)
    if (primitiveMode == GL_QUADS) {
        IndexBuffer *&indexBuffer = GLVertexBufferPrivate::s_indexBuffer;

        if (!indexBuffer)
            indexBuffer = new IndexBuffer;

        indexBuffer->accomodate(count / 4);

        count = count * 6 / 4;

        if (!hardwareClipping) {
            glDrawElementsBaseVertex(GL_TRIANGLES, count, GL_UNSIGNED_SHORT, nullptr, first);
        } else {
            // Clip using scissoring
            for (const QRect &r : region) {
                glScissor((r.x() - s_virtualScreenGeometry.x()) * s_virtualScreenScale,
                (s_virtualScreenGeometry.height() + s_virtualScreenGeometry.y() - r.y() - r.height()) * s_virtualScreenScale,
                r.width() * s_virtualScreenScale,
                r.height() * s_virtualScreenScale);
                glDrawElementsBaseVertex(GL_TRIANGLES, count, GL_UNSIGNED_SHORT, nullptr, first);

    if (!hardwareClipping) {
        glDrawArrays(primitiveMode, first, count);
    } else {
        // Clip using scissoring
        for (const QRect &r : region) {
            glScissor((r.x() - s_virtualScreenGeometry.x()) * s_virtualScreenScale,
                      (s_virtualScreenGeometry.height()  + s_virtualScreenGeometry.y() - r.y() - r.height()) * s_virtualScreenScale,
                      r.width() * s_virtualScreenScale,
                      r.height() * s_virtualScreenScale);
            glDrawArrays(primitiveMode, first, count);

bool GLVertexBuffer::supportsIndexedQuads()
    return GLVertexBufferPrivate::supportsIndexedQuads;

bool GLVertexBuffer::isUseColor() const
    return d->useColor;

void GLVertexBuffer::setUseColor(bool enable)
    d->useColor = enable;

void GLVertexBuffer::setColor(const QColor& color, bool enable)
    d->useColor = enable;
    d->color = QVector4D(color.redF(), color.greenF(), color.blueF(), color.alphaF());

void GLVertexBuffer::reset()
    d->useColor       = false;
    d->color          = QVector4D(0, 0, 0, 1);
    d->vertexCount    = 0;

void GLVertexBuffer::endOfFrame()
    if (!d->persistent)

    // Emit a fence if we have uploaded data
    if (d->frameSize > 0) {
        d->frameSize = 0;

        // Force the buffer to be reallocated at the beginning of the next frame
        // if the average frame size is greater than half the size of the buffer
        if (unlikely(d->frameSizes.average() > d->bufferSize / 2)) {
            glDeleteBuffers(1, &d->buffer);

            d->buffer = 0;
            d->bufferSize = 0;
            d->nextOffset = 0;
            d->map = nullptr;
        } else {
            BufferFence fence;
            fence.sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
            fence.nextEnd = d->nextOffset + d->bufferSize;


void GLVertexBuffer::framePosted()
    if (!d->persistent)

    // Remove finished fences from the list and update the bufferEnd offset
    while (d->fences.size() > 1 && d->fences.front().signaled()) {
        const BufferFence &fence = d->fences.front();

        d->bufferEnd = fence.nextEnd;

void GLVertexBuffer::initStatic()
    if (GLPlatform::instance()->isGLES()) {
        bool haveBaseVertex     = hasGLExtension(QByteArrayLiteral("GL_OES_draw_elements_base_vertex"));
        bool haveCopyBuffer     = hasGLVersion(3, 0);
        bool haveMapBufferRange = hasGLExtension(QByteArrayLiteral("GL_EXT_map_buffer_range"));

        GLVertexBufferPrivate::hasMapBufferRange = haveMapBufferRange;
        GLVertexBufferPrivate::supportsIndexedQuads = haveBaseVertex && haveCopyBuffer && haveMapBufferRange;
        GLVertexBufferPrivate::haveBufferStorage = hasGLExtension("GL_EXT_buffer_storage");
        GLVertexBufferPrivate::haveSyncFences = hasGLVersion(3, 0);
    } else {
        bool haveBaseVertex     = hasGLVersion(3, 2) || hasGLExtension(QByteArrayLiteral("GL_ARB_draw_elements_base_vertex"));
        bool haveCopyBuffer     = hasGLVersion(3, 1) || hasGLExtension(QByteArrayLiteral("GL_ARB_copy_buffer"));
        bool haveMapBufferRange = hasGLVersion(3, 0) || hasGLExtension(QByteArrayLiteral("GL_ARB_map_buffer_range"));

        GLVertexBufferPrivate::hasMapBufferRange = haveMapBufferRange;
        GLVertexBufferPrivate::supportsIndexedQuads = haveBaseVertex && haveCopyBuffer && haveMapBufferRange;
        GLVertexBufferPrivate::haveBufferStorage = hasGLVersion(4, 4) || hasGLExtension("GL_ARB_buffer_storage");
        GLVertexBufferPrivate::haveSyncFences = hasGLVersion(3, 2) || hasGLExtension("GL_ARB_sync");
    GLVertexBufferPrivate::s_indexBuffer = nullptr;
    GLVertexBufferPrivate::streamingBuffer = new GLVertexBuffer(GLVertexBuffer::Stream);

    if (GLVertexBufferPrivate::haveBufferStorage && GLVertexBufferPrivate::haveSyncFences) {
        if (qgetenv("KWIN_PERSISTENT_VBO") != QByteArrayLiteral("0")) {
            GLVertexBufferPrivate::streamingBuffer->d->persistent = true;

void GLVertexBuffer::cleanup()
    delete GLVertexBufferPrivate::s_indexBuffer;
    GLVertexBufferPrivate::s_indexBuffer = nullptr;
    GLVertexBufferPrivate::hasMapBufferRange = false;
    GLVertexBufferPrivate::supportsIndexedQuads = false;
    delete GLVertexBufferPrivate::streamingBuffer;
    GLVertexBufferPrivate::streamingBuffer = nullptr;

GLVertexBuffer *GLVertexBuffer::streamingBuffer()
    return GLVertexBufferPrivate::streamingBuffer;

} // namespace