kwin/effects/blur/blurshader.cpp
Fredrik Höglund 62b9c59da6 Rewrite the fragment program to avoid exceeding the texture indirection
limit on some GPU's.

Thanks to Mikael Gerdin, Christoph Feck and Iori Yagami for testing.

svn path=/trunk/KDE/kdebase/workspace/; revision=1138004
2010-06-14 22:13:48 +00:00

481 lines
13 KiB
C++

/*
* Copyright © 2010 Fredrik Höglund <fredrik@kde.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. if not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "blurshader.h"
#include <QByteArray>
#include <QTextStream>
#include <KDebug>
#include <cmath>
using namespace KWin;
BlurShader::BlurShader()
: mRadius(0), mValid(false)
{
}
BlurShader::~BlurShader()
{
}
BlurShader *BlurShader::create()
{
if (GLSLBlurShader::supported())
return new GLSLBlurShader();
return new ARBBlurShader();
}
void BlurShader::setRadius(int radius)
{
const int r = qMax(radius, 2);
if (mRadius != r) {
mRadius = r;
reset();
init();
}
}
void BlurShader::setDirection(Qt::Orientation direction)
{
mDirection = direction;
}
float BlurShader::gaussian(float x, float sigma) const
{
return (1.0 / std::sqrt(2.0 * M_PI) * sigma)
* std::exp(-((x * x) / (2.0 * sigma * sigma)));
}
QVector<float> BlurShader::gaussianKernel() const
{
int size = qMin(mRadius | 1, maxKernelSize());
if (!(size & 0x1))
size -= 1;
QVector<float> kernel(size);
const int center = size / 2;
const qreal sigma = (size - 1) / 2.5;
// Generate the gaussian kernel
kernel[center] = gaussian(0, sigma) * .5;
for (int i = 1; i <= center; i++) {
const float val = gaussian(1.5 + (i - 1) * 2.0, sigma);
kernel[center + i] = val;
kernel[center - i] = val;
}
// Normalize the kernel
qreal total = 0;
for (int i = 0; i < size; i++)
total += kernel[i];
for (int i = 0; i < size; i++)
kernel[i] /= total;
return kernel;
}
// ----------------------------------------------------------------------------
GLSLBlurShader::GLSLBlurShader()
: BlurShader(), program(0)
{
}
GLSLBlurShader::~GLSLBlurShader()
{
reset();
}
void GLSLBlurShader::reset()
{
if (program) {
glDeleteProgram(program);
program = 0;
}
setIsValid(false);
}
bool GLSLBlurShader::supported()
{
if (!GLShader::fragmentShaderSupported() || !GLShader::vertexShaderSupported())
return false;
(void) glGetError(); // Clear the error state
// These are the minimum values the implementation is required to support
int value = 0;
glGetIntegerv(GL_MAX_VARYING_FLOATS, &value);
if (value < 32)
return false;
glGetIntegerv(GL_MAX_FRAGMENT_UNIFORM_COMPONENTS, &value);
if (value < 64)
return false;
glGetIntegerv(GL_MAX_VERTEX_UNIFORM_COMPONENTS, &value);
if (value < 512)
return false;
if (glGetError() != GL_NO_ERROR)
return false;
return true;
}
void GLSLBlurShader::setPixelDistance(float val)
{
if (!isValid())
return;
float pixelSize[2] = { 0.0, 0.0 };
if (direction() == Qt::Horizontal)
pixelSize[0] = val;
else
pixelSize[1] = val;
glUniform2fv(uPixelSize, 1, pixelSize);
}
void GLSLBlurShader::bind()
{
if (!isValid())
return;
glUseProgram(program);
glUniform1i(uTexUnit, 0);
}
void GLSLBlurShader::unbind()
{
glUseProgram(0);
}
int GLSLBlurShader::maxKernelSize() const
{
int value;
glGetIntegerv(GL_MAX_VARYING_FLOATS, &value);
// Note: In theory the driver could pack two vec2's in one vec4,
// but we'll assume it doesn't do that
return value / 4; // Max number of vec4 varyings
}
GLuint GLSLBlurShader::compile(GLenum type, const QByteArray &source)
{
const char *sourceData = source.constData();
GLuint shader = glCreateShader(type);
glShaderSource(shader, 1, &sourceData, 0);
glCompileShader(shader);
int status;
glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
if (status == GL_FALSE) {
GLsizei size, length;
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &size);
QByteArray log(size, 0);
glGetShaderInfoLog(shader, size, &length, log.data());
kError() << "Failed to compile shader: " << log;
glDeleteShader(shader);
shader = 0;
}
return shader;
}
GLuint GLSLBlurShader::link(GLuint vertexShader, GLuint fragmentShader)
{
GLuint program = glCreateProgram();
glAttachShader(program, vertexShader);
glAttachShader(program, fragmentShader);
glLinkProgram(program);
int status;
glGetProgramiv(program, GL_LINK_STATUS, &status);
if (status == GL_FALSE) {
GLsizei size, length;
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &size);
QByteArray log(size, 0);
glGetProgramInfoLog(program, size, &length, log.data());
kError() << "Failed to link shader: " << log;
glDeleteProgram(program);
program = 0;
}
return program;
}
void GLSLBlurShader::init()
{
QVector<float> kernel = gaussianKernel();
const int size = kernel.size();
const int center = size / 2;
QByteArray vertexSource;
QByteArray fragmentSource;
// Vertex shader
// ===================================================================
QTextStream stream(&vertexSource);
stream << "uniform vec2 pixelSize;\n\n";
for (int i = 0; i < size; i++)
stream << "varying vec2 samplePos" << i << ";\n";
stream << "\n";
stream << "void main(void)\n";
stream << "{\n";
stream << " gl_TexCoord[0] = gl_TextureMatrix[0] * gl_MultiTexCoord0;\n\n";
for (int i = 0; i < center; i++)
stream << " samplePos" << i << " = gl_TexCoord[0].st + pixelSize * vec2("
<< -(1.5 + (center - i - 1) * 2.0) << ");\n";
stream << " samplePos" << center << " = gl_TexCoord[0].st;\n";
for (int i = center + 1; i < size; i++)
stream << " samplePos" << i << " = gl_TexCoord[0].st + pixelSize * vec2("
<< 1.5 + (i - center - 1) * 2.0 << ");\n";
stream << "\n";
stream << " gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n";
stream << "}\n";
stream.flush();
// Fragment shader
// ===================================================================
QTextStream stream2(&fragmentSource);
stream2 << "uniform sampler2D texUnit;\n\n";
for (int i = 0; i < size; i++)
stream2 << "varying vec2 samplePos" << i << ";\n";
stream2 << "\n";
for (int i = 0; i <= center; i++)
stream2 << "const vec4 kernel" << i << " = vec4(" << kernel[i] << ");\n";
stream2 << "\n";
stream2 << "void main(void)\n";
stream2 << "{\n";
stream2 << " vec4 sum = texture2D(texUnit, samplePos0) * kernel0;\n";
for (int i = 1; i < size; i++)
stream2 << " sum = sum + texture2D(texUnit, samplePos" << i << ") * kernel"
<< (i > center ? size - i - 1 : i) << ";\n";
stream2 << " gl_FragColor = sum;\n";
stream2 << "}\n";
stream2.flush();
GLuint vertexShader = compile(GL_VERTEX_SHADER, vertexSource);
GLuint fragmentShader = compile(GL_FRAGMENT_SHADER, fragmentSource);
if (vertexShader && fragmentShader)
program = link(vertexShader, fragmentShader);
if (vertexShader)
glDeleteShader(vertexShader);
if (fragmentShader)
glDeleteShader(fragmentShader);
if (program) {
uTexUnit = glGetUniformLocation(program, "texUnit");
uPixelSize = glGetUniformLocation(program, "pixelSize");
}
setIsValid(program != 0);
}
// ----------------------------------------------------------------------------
ARBBlurShader::ARBBlurShader()
: BlurShader(), program(0)
{
}
ARBBlurShader::~ARBBlurShader()
{
reset();
}
void ARBBlurShader::reset()
{
if (program) {
glDeleteProgramsARB(1, &program);
program = 0;
}
setIsValid(false);
}
bool ARBBlurShader::supported()
{
if (!hasGLExtension("GL_ARB_fragment_program"))
return false;
(void) glGetError(); // Clear the error state
// These are the minimum values the implementation is required to support
int value = 0;
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_PARAMETERS_ARB, &value);
if (value < 24)
return false;
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_TEMPORARIES_ARB, &value);
if (value < 16)
return false;
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_INSTRUCTIONS_ARB, &value);
if (value < 72)
return false;
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_TEX_INSTRUCTIONS_ARB, &value);
if (value < 24)
return false;
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_TEX_INDIRECTIONS_ARB, &value);
if (value < 4)
return false;
if (glGetError() != GL_NO_ERROR)
return false;
return true;
}
void ARBBlurShader::setPixelDistance(float val)
{
float firstStep = val * 1.5;
float nextStep = val * 2.0;
if (direction() == Qt::Horizontal) {
glProgramLocalParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, 0, firstStep, 0, 0, 0);
glProgramLocalParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, 1, nextStep, 0, 0, 0);
} else {
glProgramLocalParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, 0, 0, firstStep, 0, 0);
glProgramLocalParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, 1, 0, nextStep, 0, 0);
}
}
void ARBBlurShader::bind()
{
if (!isValid())
return;
glEnable(GL_FRAGMENT_PROGRAM_ARB);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, program);
}
void ARBBlurShader::unbind()
{
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0);
glDisable(GL_FRAGMENT_PROGRAM_ARB);
}
int ARBBlurShader::maxKernelSize() const
{
int value;
int result;
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_PARAMETERS_ARB, &value);
result = (value - 1) * 2; // We only need to store half the kernel, since it's symmetrical
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_INSTRUCTIONS_ARB, &value);
result = qMin(result, value / 3); // We need 3 instructions / sample
return result;
}
void ARBBlurShader::init()
{
QVector<float> kernel = gaussianKernel();
const int size = kernel.size();
const int center = size / 2;
QByteArray text;
QTextStream stream(&text);
stream << "!!ARBfp1.0\n";
// The kernel values are hardcoded into the program
for (int i = 0; i <= center; i++)
stream << "PARAM kernel" << i << " = " << kernel[center + i] << ";\n";
stream << "PARAM firstSample = program.local[0];\n"; // Distance from gl_TexCoord[0] to the next sample
stream << "PARAM nextSample = program.local[1];\n"; // Distance to the subsequent sample
// Temporary variables to hold coordinates and texture samples
for (int i = 0; i < size; i++)
stream << "TEMP temp" << i << ";\n";
// Compute the texture coordinates
stream << "ADD temp1, fragment.texcoord[0], firstSample;\n"; // temp1 = gl_TexCoord[0] + firstSample
stream << "SUB temp2, fragment.texcoord[0], firstSample;\n"; // temp2 = gl_TexCoord[0] - firstSample
for (int i = 1, j = 3; i < center; i++, j += 2) {
stream << "ADD temp" << j + 0 << ", temp" << j - 2 << ", nextSample;\n";
stream << "SUB temp" << j + 1 << ", temp" << j - 1 << ", nextSample;\n";
}
// Sample the texture coordinates
stream << "TEX temp0, fragment.texcoord[0], texture[0], 2D;\n";
for (int i = 1; i < size; i++)
stream << "TEX temp" << i << ", temp" << i << ", texture[0], 2D;\n";
// Multiply the samples with the kernel values and compute the sum
stream << "MUL temp0, temp0, kernel0;\n";
for (int i = 0, j = 1; i < center; i++) {
stream << "MAD temp0, temp" << j++ << ", kernel" << i + 1 << ", temp0;\n";
stream << "MAD temp0, temp" << j++ << ", kernel" << i + 1 << ", temp0;\n";
}
stream << "MOV result.color, temp0;\n"; // gl_FragColor = temp0
stream << "END\n";
stream.flush();
glGenProgramsARB(1, &program);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, program);
glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, text.length(), text.constData());
if (glGetError()) {
const char *error = (const char*)glGetString(GL_PROGRAM_ERROR_STRING_ARB);
kError() << "Failed to compile fragment program:" << error;
setIsValid(false);
} else
setIsValid(true);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, 0);
}