Rewrite the ARB lanczos shader to work with the Radeon R300 limitations.
svn path=/branches/KDE/4.6/kdebase/workspace/; revision=1215505
This commit is contained in:
parent
b51ef59c7c
commit
35dcccb847
3 changed files with 44 additions and 29 deletions
|
@ -1,11 +1,11 @@
|
||||||
uniform sampler2D texUnit;
|
uniform sampler2D texUnit;
|
||||||
uniform vec2 offsets[25];
|
uniform vec2 offsets[16];
|
||||||
uniform vec4 kernel[25];
|
uniform vec4 kernel[16];
|
||||||
|
|
||||||
void main(void)
|
void main(void)
|
||||||
{
|
{
|
||||||
vec4 sum = texture2D(texUnit, gl_TexCoord[0].st) * kernel[0];
|
vec4 sum = texture2D(texUnit, gl_TexCoord[0].st) * kernel[0];
|
||||||
for (int i = 1; i < 25; i++) {
|
for (int i = 1; i < 16; i++) {
|
||||||
sum += texture2D(texUnit, gl_TexCoord[0].st - offsets[i]) * kernel[i];
|
sum += texture2D(texUnit, gl_TexCoord[0].st - offsets[i]) * kernel[i];
|
||||||
sum += texture2D(texUnit, gl_TexCoord[0].st + offsets[i]) * kernel[i];
|
sum += texture2D(texUnit, gl_TexCoord[0].st + offsets[i]) * kernel[i];
|
||||||
}
|
}
|
||||||
|
|
|
@ -138,7 +138,7 @@ void LanczosShader::createKernel( float delta, int *size )
|
||||||
|
|
||||||
// The two outermost samples always fall at points where the lanczos
|
// The two outermost samples always fall at points where the lanczos
|
||||||
// function returns 0, so we'll skip them.
|
// function returns 0, so we'll skip them.
|
||||||
const int sampleCount = qBound( 3, qCeil(delta * a) * 2 + 1 - 2, 49 );
|
const int sampleCount = qBound( 3, qCeil(delta * a) * 2 + 1 - 2, 29 );
|
||||||
const int center = sampleCount / 2;
|
const int center = sampleCount / 2;
|
||||||
const int kernelSize = center + 1;
|
const int kernelSize = center + 1;
|
||||||
const float factor = 1.0 / delta;
|
const float factor = 1.0 / delta;
|
||||||
|
@ -152,7 +152,7 @@ void LanczosShader::createKernel( float delta, int *size )
|
||||||
values[i] = val;
|
values[i] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(m_kernel, 0, 25 * sizeof(QVector4D));
|
memset(m_kernel, 0, 16 * sizeof(QVector4D));
|
||||||
|
|
||||||
// Normalize the kernel
|
// Normalize the kernel
|
||||||
for ( int i = 0; i < kernelSize; i++ ) {
|
for ( int i = 0; i < kernelSize; i++ ) {
|
||||||
|
@ -165,7 +165,7 @@ void LanczosShader::createKernel( float delta, int *size )
|
||||||
|
|
||||||
void LanczosShader::createOffsets( int count, float width, Qt::Orientation direction )
|
void LanczosShader::createOffsets( int count, float width, Qt::Orientation direction )
|
||||||
{
|
{
|
||||||
memset(m_offsets, 0, 25 * sizeof(QVector2D));
|
memset(m_offsets, 0, 16 * sizeof(QVector2D));
|
||||||
for ( int i = 0; i < count; i++ ) {
|
for ( int i = 0; i < count; i++ ) {
|
||||||
m_offsets[i] = ( direction == Qt::Horizontal ) ?
|
m_offsets[i] = ( direction == Qt::Horizontal ) ?
|
||||||
QVector2D( i / width, 0 ) : QVector2D( 0, i / width );
|
QVector2D( i / width, 0 ) : QVector2D( 0, i / width );
|
||||||
|
@ -561,27 +561,29 @@ void LanczosShader::setUniforms()
|
||||||
if( m_shader )
|
if( m_shader )
|
||||||
{
|
{
|
||||||
glUniform1i( m_uTexUnit, 0 );
|
glUniform1i( m_uTexUnit, 0 );
|
||||||
glUniform2fv( m_uOffsets, 25, (const GLfloat*)m_offsets );
|
glUniform2fv( m_uOffsets, 16, (const GLfloat*)m_offsets );
|
||||||
glUniform4fv( m_uKernel, 25, (const GLfloat*)m_kernel );
|
glUniform4fv( m_uKernel, 16, (const GLfloat*)m_kernel );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for( int i=0; i<25; ++i )
|
for( int i=0; i<16; ++i )
|
||||||
{
|
{
|
||||||
glProgramLocalParameter4fARB( GL_FRAGMENT_PROGRAM_ARB, i, m_offsets[i].x(), m_offsets[i].y(), 0, 0 );
|
glProgramLocalParameter4fARB( GL_FRAGMENT_PROGRAM_ARB, i, m_offsets[i].x(), m_offsets[i].y(), 0, 0 );
|
||||||
}
|
}
|
||||||
for( int i=0; i<25; ++i )
|
for( int i=0; i<16; ++i )
|
||||||
{
|
{
|
||||||
glProgramLocalParameter4fARB( GL_FRAGMENT_PROGRAM_ARB, i+25, m_kernel[i].x(), m_kernel[i].y(), m_kernel[i].z(), m_kernel[i].w() );
|
glProgramLocalParameter4fARB( GL_FRAGMENT_PROGRAM_ARB, i+16, m_kernel[i].x(), m_kernel[i].y(), m_kernel[i].z(), m_kernel[i].w() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LanczosShader::init()
|
bool LanczosShader::init()
|
||||||
{
|
{
|
||||||
|
GLPlatform *gl = GLPlatform::instance();
|
||||||
if ( GLShader::fragmentShaderSupported() &&
|
if ( GLShader::fragmentShaderSupported() &&
|
||||||
GLShader::vertexShaderSupported() &&
|
GLShader::vertexShaderSupported() &&
|
||||||
GLRenderTarget::supported() )
|
GLRenderTarget::supported() &&
|
||||||
|
!(gl->isRadeon() && gl->chipClass() < R600))
|
||||||
{
|
{
|
||||||
m_shader = new GLShader(":/resources/lanczos-vertex.glsl", ":/resources/lanczos-fragment.glsl");
|
m_shader = new GLShader(":/resources/lanczos-vertex.glsl", ":/resources/lanczos-fragment.glsl");
|
||||||
if (m_shader->isValid())
|
if (m_shader->isValid())
|
||||||
|
@ -608,22 +610,35 @@ bool LanczosShader::init()
|
||||||
QByteArray text;
|
QByteArray text;
|
||||||
QTextStream stream(&text);
|
QTextStream stream(&text);
|
||||||
|
|
||||||
|
// Note: This program uses 31 temporaries, 61 ALU instructions, 31 texture
|
||||||
|
// fetches, 3 texture indirections and 93 instructions.
|
||||||
|
// The R300 limitations are 32, 64, 32, 4 and 96 respectively.
|
||||||
stream << "!!ARBfp1.0\n";
|
stream << "!!ARBfp1.0\n";
|
||||||
stream << "TEMP coord;\n"; // temporary variable to store texcoord
|
stream << "TEMP sum;\n";
|
||||||
stream << "TEMP color;\n"; // temporary variable to store fetched texture colors
|
|
||||||
stream << "TEMP sum;\n"; // variable to render the final result
|
// Declare 30 temporaries for holding texcoords and TEX results
|
||||||
stream << "TEX sum, fragment.texcoord, texture[0], 2D;\n"; // sum = texture2D(texUnit, gl_TexCoord[0].st)
|
for (int i = 0; i < 30; i++)
|
||||||
stream << "MUL sum, sum, program.local[25];\n"; // sum = sum * kernel[0]
|
stream << "TEMP temp" << i << ";\n";
|
||||||
for( int i=1; i<25; ++i )
|
|
||||||
{
|
// Compute the texture coordinates
|
||||||
stream << "ADD coord, fragment.texcoord, program.local[" << i << "];\n"; // coord = gl_TexCoord[0] + offset[i]
|
for (int i = 0, j = 0; i < 30 / 2; i++) {
|
||||||
stream << "TEX color, coord, texture[0], 2D;\n"; // color = texture2D(texUnit, coord)
|
stream << "ADD temp" << j++ << ", fragment.texcoord, program.local[" << i+1 << "];\n";
|
||||||
stream << "MAD sum, color, program.local[" << (25+i) << "], sum;\n"; // sum += color * kernel[i]
|
stream << "SUB temp" << j++ << ", fragment.texcoord, program.local[" << i+1 << "];\n";
|
||||||
stream << "SUB coord, fragment.texcoord, program.local[" << i << "];\n"; // coord = gl_TexCoord[0] - offset[i]
|
|
||||||
stream << "TEX color, coord, texture[0], 2D;\n"; // color = texture2D(texUnit, coord)
|
|
||||||
stream << "MAD sum, color, program.local[" << (25+i) << "], sum;\n"; // sum += color * kernel[i]
|
|
||||||
}
|
}
|
||||||
stream << "MOV result.color, sum;\n"; // gl_FragColor = sum
|
|
||||||
|
// Sample the texture coordinates
|
||||||
|
stream << "TEX sum, fragment.texcoord, texture[0], 2D;\n";
|
||||||
|
for (int i = 0; i < 30; i++)
|
||||||
|
stream << "TEX temp" << i << ", temp" << i << ", texture[0], 2D;\n";
|
||||||
|
|
||||||
|
// Process the results
|
||||||
|
stream << "MUL sum, sum, program.local[16];\n"; // sum = sum * kernel[0]
|
||||||
|
for (int i = 0, j = 0; i < 30 / 2; i++) {
|
||||||
|
stream << "MAD sum, temp" << j++ << ", program.local[" << (17+i) << "], sum;\n";
|
||||||
|
stream << "MAD sum, temp" << j++ << ", program.local[" << (17+i) << "], sum;\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
stream << "MOV result.color, sum;\n";
|
||||||
stream << "END\n";
|
stream << "END\n";
|
||||||
stream.flush();
|
stream.flush();
|
||||||
|
|
||||||
|
|
|
@ -90,8 +90,8 @@ class LanczosShader
|
||||||
int m_uTexUnit;
|
int m_uTexUnit;
|
||||||
int m_uOffsets;
|
int m_uOffsets;
|
||||||
int m_uKernel;
|
int m_uKernel;
|
||||||
QVector2D m_offsets[25];
|
QVector2D m_offsets[16];
|
||||||
QVector4D m_kernel[25];
|
QVector4D m_kernel[16];
|
||||||
uint m_arbProgram; // TODO: GLuint
|
uint m_arbProgram; // TODO: GLuint
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue