Rewrite the ARB lanczos shader to work with the Radeon R300 limitations.

svn path=/branches/KDE/4.6/kdebase/workspace/; revision=1215505
This commit is contained in:
Fredrik Höglund 2011-01-18 22:43:08 +00:00
parent b51ef59c7c
commit 35dcccb847
3 changed files with 44 additions and 29 deletions

View file

@ -1,11 +1,11 @@
uniform sampler2D texUnit; uniform sampler2D texUnit;
uniform vec2 offsets[25]; uniform vec2 offsets[16];
uniform vec4 kernel[25]; uniform vec4 kernel[16];
void main(void) void main(void)
{ {
vec4 sum = texture2D(texUnit, gl_TexCoord[0].st) * kernel[0]; vec4 sum = texture2D(texUnit, gl_TexCoord[0].st) * kernel[0];
for (int i = 1; i < 25; i++) { for (int i = 1; i < 16; i++) {
sum += texture2D(texUnit, gl_TexCoord[0].st - offsets[i]) * kernel[i]; sum += texture2D(texUnit, gl_TexCoord[0].st - offsets[i]) * kernel[i];
sum += texture2D(texUnit, gl_TexCoord[0].st + offsets[i]) * kernel[i]; sum += texture2D(texUnit, gl_TexCoord[0].st + offsets[i]) * kernel[i];
} }

View file

@ -138,7 +138,7 @@ void LanczosShader::createKernel( float delta, int *size )
// The two outermost samples always fall at points where the lanczos // The two outermost samples always fall at points where the lanczos
// function returns 0, so we'll skip them. // function returns 0, so we'll skip them.
const int sampleCount = qBound( 3, qCeil(delta * a) * 2 + 1 - 2, 49 ); const int sampleCount = qBound( 3, qCeil(delta * a) * 2 + 1 - 2, 29 );
const int center = sampleCount / 2; const int center = sampleCount / 2;
const int kernelSize = center + 1; const int kernelSize = center + 1;
const float factor = 1.0 / delta; const float factor = 1.0 / delta;
@ -152,7 +152,7 @@ void LanczosShader::createKernel( float delta, int *size )
values[i] = val; values[i] = val;
} }
memset(m_kernel, 0, 25 * sizeof(QVector4D)); memset(m_kernel, 0, 16 * sizeof(QVector4D));
// Normalize the kernel // Normalize the kernel
for ( int i = 0; i < kernelSize; i++ ) { for ( int i = 0; i < kernelSize; i++ ) {
@ -165,7 +165,7 @@ void LanczosShader::createKernel( float delta, int *size )
void LanczosShader::createOffsets( int count, float width, Qt::Orientation direction ) void LanczosShader::createOffsets( int count, float width, Qt::Orientation direction )
{ {
memset(m_offsets, 0, 25 * sizeof(QVector2D)); memset(m_offsets, 0, 16 * sizeof(QVector2D));
for ( int i = 0; i < count; i++ ) { for ( int i = 0; i < count; i++ ) {
m_offsets[i] = ( direction == Qt::Horizontal ) ? m_offsets[i] = ( direction == Qt::Horizontal ) ?
QVector2D( i / width, 0 ) : QVector2D( 0, i / width ); QVector2D( i / width, 0 ) : QVector2D( 0, i / width );
@ -561,27 +561,29 @@ void LanczosShader::setUniforms()
if( m_shader ) if( m_shader )
{ {
glUniform1i( m_uTexUnit, 0 ); glUniform1i( m_uTexUnit, 0 );
glUniform2fv( m_uOffsets, 25, (const GLfloat*)m_offsets ); glUniform2fv( m_uOffsets, 16, (const GLfloat*)m_offsets );
glUniform4fv( m_uKernel, 25, (const GLfloat*)m_kernel ); glUniform4fv( m_uKernel, 16, (const GLfloat*)m_kernel );
} }
else else
{ {
for( int i=0; i<25; ++i ) for( int i=0; i<16; ++i )
{ {
glProgramLocalParameter4fARB( GL_FRAGMENT_PROGRAM_ARB, i, m_offsets[i].x(), m_offsets[i].y(), 0, 0 ); glProgramLocalParameter4fARB( GL_FRAGMENT_PROGRAM_ARB, i, m_offsets[i].x(), m_offsets[i].y(), 0, 0 );
} }
for( int i=0; i<25; ++i ) for( int i=0; i<16; ++i )
{ {
glProgramLocalParameter4fARB( GL_FRAGMENT_PROGRAM_ARB, i+25, m_kernel[i].x(), m_kernel[i].y(), m_kernel[i].z(), m_kernel[i].w() ); glProgramLocalParameter4fARB( GL_FRAGMENT_PROGRAM_ARB, i+16, m_kernel[i].x(), m_kernel[i].y(), m_kernel[i].z(), m_kernel[i].w() );
} }
} }
} }
bool LanczosShader::init() bool LanczosShader::init()
{ {
GLPlatform *gl = GLPlatform::instance();
if ( GLShader::fragmentShaderSupported() && if ( GLShader::fragmentShaderSupported() &&
GLShader::vertexShaderSupported() && GLShader::vertexShaderSupported() &&
GLRenderTarget::supported() ) GLRenderTarget::supported() &&
!(gl->isRadeon() && gl->chipClass() < R600))
{ {
m_shader = new GLShader(":/resources/lanczos-vertex.glsl", ":/resources/lanczos-fragment.glsl"); m_shader = new GLShader(":/resources/lanczos-vertex.glsl", ":/resources/lanczos-fragment.glsl");
if (m_shader->isValid()) if (m_shader->isValid())
@ -608,22 +610,35 @@ bool LanczosShader::init()
QByteArray text; QByteArray text;
QTextStream stream(&text); QTextStream stream(&text);
// Note: This program uses 31 temporaries, 61 ALU instructions, 31 texture
// fetches, 3 texture indirections and 93 instructions.
// The R300 limitations are 32, 64, 32, 4 and 96 respectively.
stream << "!!ARBfp1.0\n"; stream << "!!ARBfp1.0\n";
stream << "TEMP coord;\n"; // temporary variable to store texcoord stream << "TEMP sum;\n";
stream << "TEMP color;\n"; // temporary variable to store fetched texture colors
stream << "TEMP sum;\n"; // variable to render the final result // Declare 30 temporaries for holding texcoords and TEX results
stream << "TEX sum, fragment.texcoord, texture[0], 2D;\n"; // sum = texture2D(texUnit, gl_TexCoord[0].st) for (int i = 0; i < 30; i++)
stream << "MUL sum, sum, program.local[25];\n"; // sum = sum * kernel[0] stream << "TEMP temp" << i << ";\n";
for( int i=1; i<25; ++i )
{ // Compute the texture coordinates
stream << "ADD coord, fragment.texcoord, program.local[" << i << "];\n"; // coord = gl_TexCoord[0] + offset[i] for (int i = 0, j = 0; i < 30 / 2; i++) {
stream << "TEX color, coord, texture[0], 2D;\n"; // color = texture2D(texUnit, coord) stream << "ADD temp" << j++ << ", fragment.texcoord, program.local[" << i+1 << "];\n";
stream << "MAD sum, color, program.local[" << (25+i) << "], sum;\n"; // sum += color * kernel[i] stream << "SUB temp" << j++ << ", fragment.texcoord, program.local[" << i+1 << "];\n";
stream << "SUB coord, fragment.texcoord, program.local[" << i << "];\n"; // coord = gl_TexCoord[0] - offset[i]
stream << "TEX color, coord, texture[0], 2D;\n"; // color = texture2D(texUnit, coord)
stream << "MAD sum, color, program.local[" << (25+i) << "], sum;\n"; // sum += color * kernel[i]
} }
stream << "MOV result.color, sum;\n"; // gl_FragColor = sum
// Sample the texture coordinates
stream << "TEX sum, fragment.texcoord, texture[0], 2D;\n";
for (int i = 0; i < 30; i++)
stream << "TEX temp" << i << ", temp" << i << ", texture[0], 2D;\n";
// Process the results
stream << "MUL sum, sum, program.local[16];\n"; // sum = sum * kernel[0]
for (int i = 0, j = 0; i < 30 / 2; i++) {
stream << "MAD sum, temp" << j++ << ", program.local[" << (17+i) << "], sum;\n";
stream << "MAD sum, temp" << j++ << ", program.local[" << (17+i) << "], sum;\n";
}
stream << "MOV result.color, sum;\n";
stream << "END\n"; stream << "END\n";
stream.flush(); stream.flush();

View file

@ -90,8 +90,8 @@ class LanczosShader
int m_uTexUnit; int m_uTexUnit;
int m_uOffsets; int m_uOffsets;
int m_uKernel; int m_uKernel;
QVector2D m_offsets[25]; QVector2D m_offsets[16];
QVector4D m_kernel[25]; QVector4D m_kernel[16];
uint m_arbProgram; // TODO: GLuint uint m_arbProgram; // TODO: GLuint
}; };
#endif #endif