Make the lanczos shader use a fixed number of iterations in the loop.

This makes it possible for the GLSL compiler to unroll it, which also
avoids the need to use relative addressing. With this change the shader
should hopefully work with the R300G driver.

The unused kernel weights are set to zero so they don't contribute
to the end result.

Thanks to Tom Stellard and Marek Olšák for their suggestions on how
to solve this problem.

CCBUG: 243191

svn path=/trunk/KDE/kdebase/workspace/; revision=1175021
This commit is contained in:
Fredrik Höglund 2010-09-13 22:03:21 +00:00
parent d4d985b30d
commit 119c06e403
3 changed files with 22 additions and 24 deletions

View file

@ -1,12 +1,11 @@
uniform sampler2D texUnit;
uniform vec2 offsets[25];
uniform vec4 kernel[25];
uniform int kernelSize;
void main(void)
{
vec4 sum = texture2D(texUnit, gl_TexCoord[0].st) * kernel[0];
for (int i = 1; i < kernelSize; i++) {
for (int i = 1; i < 25; i++) {
sum += texture2D(texUnit, gl_TexCoord[0].st - offsets[i]) * kernel[i];
sum += texture2D(texUnit, gl_TexCoord[0].st + offsets[i]) * kernel[i];
}

View file

@ -78,7 +78,6 @@ void LanczosFilter::init()
{
m_shader->bind();
m_uTexUnit = m_shader->uniformLocation("texUnit");
m_uKernelSize = m_shader->uniformLocation("kernelSize");
m_uKernel = m_shader->uniformLocation("kernel");
m_uOffsets = m_shader->uniformLocation("offsets");
m_shader->unbind();
@ -134,7 +133,7 @@ static float lanczos( float x, float a )
return sinc( x ) * sinc( x / a );
}
QVector<QVector4D> LanczosFilter::createKernel( float delta )
void LanczosFilter::createKernel( float delta, int *size )
{
const float a = 2.0;
@ -146,7 +145,6 @@ QVector<QVector4D> LanczosFilter::createKernel( float delta )
const float factor = 1.0 / delta;
QVector<float> values( kernelSize );
QVector<QVector4D> kernel( kernelSize );
float sum = 0;
for ( int i = 0; i < kernelSize; i++ ) {
@ -155,23 +153,24 @@ QVector<QVector4D> LanczosFilter::createKernel( float delta )
values[i] = val;
}
memset(m_kernel, 0, 25 * sizeof(QVector4D));
// Normalize the kernel
for ( int i = 0; i < kernelSize; i++ ) {
const float val = values[i] / sum;
kernel[i] = QVector4D( val, val, val, val );
m_kernel[i] = QVector4D( val, val, val, val );
}
return kernel;
*size = kernelSize;
}
QVector<QVector2D> LanczosFilter::createOffsets( int count, float width, Qt::Orientation direction )
void LanczosFilter::createOffsets( int count, float width, Qt::Orientation direction )
{
QVector<QVector2D> offsets( count );
memset(m_offsets, 0, 25 * sizeof(QVector2D));
for ( int i = 0; i < count; i++ ) {
offsets[i] = ( direction == Qt::Horizontal ) ?
m_offsets[i] = ( direction == Qt::Horizontal ) ?
QVector2D( i / width, 0 ) : QVector2D( 0, i / width );
}
return offsets;
}
void LanczosFilter::performPaint( EffectWindowImpl* w, int mask, QRegion region, WindowPaintData& data )
@ -240,14 +239,14 @@ void LanczosFilter::performPaint( EffectWindowImpl* w, int mask, QRegion region,
// Set up the shader for horizontal scaling
float dx = sw / float(tw);
QVector<QVector4D> kernel = createKernel( dx );
QVector<QVector2D> offsets = createOffsets( kernel.size(), sw, Qt::Horizontal );
int kernelSize;
createKernel( dx, &kernelSize );
createOffsets( kernelSize, sw, Qt::Horizontal );
m_shader->bind();
glUniform1i( m_uTexUnit, 0 );
glUniform1i( m_uKernelSize, kernel.size() );
glUniform2fv( m_uOffsets, offsets.size(), (const GLfloat*)offsets.constData() );
glUniform4fv( m_uKernel, kernel.size(), (const GLfloat*)kernel.constData() );
glUniform2fv( m_uOffsets, 25, (const GLfloat*)m_offsets );
glUniform4fv( m_uKernel, 25, (const GLfloat*)m_kernel );
// Draw the window back into the FBO, this time scaled horizontally
glClear( GL_COLOR_BUFFER_BIT );
@ -268,12 +267,11 @@ void LanczosFilter::performPaint( EffectWindowImpl* w, int mask, QRegion region,
// Set up the shader for vertical scaling
float dy = sh / float(th);
kernel = createKernel( dy );
offsets = createOffsets( kernel.size(), m_offscreenTex->height(), Qt::Vertical );
createKernel( dy, &kernelSize );
createOffsets( kernelSize, m_offscreenTex->height(), Qt::Vertical );
glUniform1i( m_uKernelSize, kernel.size() );
glUniform2fv( m_uOffsets, offsets.size(), (const GLfloat*)offsets.constData() );
glUniform4fv( m_uKernel, kernel.size(), (const GLfloat*)kernel.constData() );
glUniform2fv( m_uOffsets, 25, (const GLfloat*)m_offsets );
glUniform4fv( m_uKernel, 25, (const GLfloat*)m_kernel );
float sx2 = tw / float(m_offscreenTex->width());
float sy2 = 1 - (sh / float(m_offscreenTex->height()));

View file

@ -53,19 +53,20 @@ class LanczosFilter
private:
void init();
void updateOffscreenSurfaces();
QVector<QVector4D> createKernel(float delta);
QVector<QVector2D> createOffsets(int count, float width, Qt::Orientation direction);
void createKernel(float delta, int *kernelSize);
void createOffsets(int count, float width, Qt::Orientation direction);
#ifdef KWIN_HAVE_OPENGL_COMPOSITING
GLTexture *m_offscreenTex;
GLRenderTarget *m_offscreenTarget;
GLShader *m_shader;
#endif
QBasicTimer m_timer;
QVector2D m_offsets[25];
QVector4D m_kernel[25];
bool m_inited;
int m_uTexUnit;
int m_uOffsets;
int m_uKernel;
int m_uKernelSize;
};
} // namespace