Drop custom SSE code from WindowQuads::makeInterleavedArrays
Both GCC and Clang do auto vecrization these days and testing shows that that actually produces faster code, so dropping the SSE stuff makes things both simpler and faster.
This commit is contained in:
parent
0c8d87f9d4
commit
a95b556868
1 changed files with 25 additions and 94 deletions
|
@ -25,10 +25,6 @@
|
|||
#include <kconfiggroup.h>
|
||||
#include <ksharedconfig.h>
|
||||
|
||||
#if defined(__SSE2__)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#include <optional>
|
||||
|
||||
namespace KWin
|
||||
|
@ -1055,106 +1051,41 @@ void WindowQuadList::makeInterleavedArrays(unsigned int type, GLVertex2D *vertic
|
|||
Q_ASSERT(type == GL_QUADS || type == GL_TRIANGLES);
|
||||
|
||||
switch (type) {
|
||||
case GL_QUADS:
|
||||
#if defined(__SSE2__)
|
||||
if (!(intptr_t(vertex) & 0xf)) {
|
||||
for (const WindowQuad &quad : *this) {
|
||||
alignas(16) GLVertex2D v[4];
|
||||
case GL_QUADS: {
|
||||
for (const WindowQuad &quad : *this) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
const WindowVertex &wv = quad[j];
|
||||
|
||||
for (int j = 0; j < 4; j++) {
|
||||
const WindowVertex &wv = quad[j];
|
||||
GLVertex2D v;
|
||||
v.position = QVector2D(wv.x(), wv.y());
|
||||
v.texcoord = QVector2D(wv.u(), wv.v()) * coeff + offset;
|
||||
|
||||
v[j].position = QVector2D(wv.x(), wv.y());
|
||||
v[j].texcoord = QVector2D(wv.u(), wv.v()) * coeff + offset;
|
||||
}
|
||||
|
||||
const __m128i *srcP = reinterpret_cast<const __m128i *>(&v);
|
||||
__m128i *dstP = reinterpret_cast<__m128i *>(vertex);
|
||||
|
||||
_mm_stream_si128(&dstP[0], _mm_load_si128(&srcP[0])); // Top-left
|
||||
_mm_stream_si128(&dstP[1], _mm_load_si128(&srcP[1])); // Top-right
|
||||
_mm_stream_si128(&dstP[2], _mm_load_si128(&srcP[2])); // Bottom-right
|
||||
_mm_stream_si128(&dstP[3], _mm_load_si128(&srcP[3])); // Bottom-left
|
||||
|
||||
vertex += 4;
|
||||
}
|
||||
} else
|
||||
#endif // __SSE2__
|
||||
{
|
||||
for (const WindowQuad &quad : *this) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
const WindowVertex &wv = quad[j];
|
||||
|
||||
GLVertex2D v;
|
||||
v.position = QVector2D(wv.x(), wv.y());
|
||||
v.texcoord = QVector2D(wv.u(), wv.v()) * coeff + offset;
|
||||
|
||||
*(vertex++) = v;
|
||||
}
|
||||
*(vertex++) = v;
|
||||
}
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
case GL_TRIANGLES: {
|
||||
for (const WindowQuad &quad : *this) {
|
||||
GLVertex2D v[4]; // Four unique vertices / quad
|
||||
|
||||
case GL_TRIANGLES:
|
||||
#if defined(__SSE2__)
|
||||
if (!(intptr_t(vertex) & 0xf)) {
|
||||
for (const WindowQuad &quad : *this) {
|
||||
alignas(16) GLVertex2D v[4];
|
||||
for (int j = 0; j < 4; j++) {
|
||||
const WindowVertex &wv = quad[j];
|
||||
|
||||
for (int j = 0; j < 4; j++) {
|
||||
const WindowVertex &wv = quad[j];
|
||||
|
||||
v[j].position = QVector2D(wv.x(), wv.y());
|
||||
v[j].texcoord = QVector2D(wv.u(), wv.v()) * coeff + offset;
|
||||
}
|
||||
|
||||
const __m128i *srcP = reinterpret_cast<const __m128i *>(&v);
|
||||
__m128i *dstP = reinterpret_cast<__m128i *>(vertex);
|
||||
|
||||
__m128i src[4];
|
||||
src[0] = _mm_load_si128(&srcP[0]); // Top-left
|
||||
src[1] = _mm_load_si128(&srcP[1]); // Top-right
|
||||
src[2] = _mm_load_si128(&srcP[2]); // Bottom-right
|
||||
src[3] = _mm_load_si128(&srcP[3]); // Bottom-left
|
||||
|
||||
// First triangle
|
||||
_mm_stream_si128(&dstP[0], src[1]); // Top-right
|
||||
_mm_stream_si128(&dstP[1], src[0]); // Top-left
|
||||
_mm_stream_si128(&dstP[2], src[3]); // Bottom-left
|
||||
|
||||
// Second triangle
|
||||
_mm_stream_si128(&dstP[3], src[3]); // Bottom-left
|
||||
_mm_stream_si128(&dstP[4], src[2]); // Bottom-right
|
||||
_mm_stream_si128(&dstP[5], src[1]); // Top-right
|
||||
|
||||
vertex += 6;
|
||||
v[j].position = QVector2D(wv.x(), wv.y());
|
||||
v[j].texcoord = QVector2D(wv.u(), wv.v()) * coeff + offset;
|
||||
}
|
||||
} else
|
||||
#endif // __SSE2__
|
||||
{
|
||||
for (const WindowQuad &quad : *this) {
|
||||
GLVertex2D v[4]; // Four unique vertices / quad
|
||||
|
||||
for (int j = 0; j < 4; j++) {
|
||||
const WindowVertex &wv = quad[j];
|
||||
// First triangle
|
||||
*(vertex++) = v[1]; // Top-right
|
||||
*(vertex++) = v[0]; // Top-left
|
||||
*(vertex++) = v[3]; // Bottom-left
|
||||
|
||||
v[j].position = QVector2D(wv.x(), wv.y());
|
||||
v[j].texcoord = QVector2D(wv.u(), wv.v()) * coeff + offset;
|
||||
}
|
||||
|
||||
// First triangle
|
||||
*(vertex++) = v[1]; // Top-right
|
||||
*(vertex++) = v[0]; // Top-left
|
||||
*(vertex++) = v[3]; // Bottom-left
|
||||
|
||||
// Second triangle
|
||||
*(vertex++) = v[3]; // Bottom-left
|
||||
*(vertex++) = v[2]; // Bottom-right
|
||||
*(vertex++) = v[1]; // Top-right
|
||||
}
|
||||
// Second triangle
|
||||
*(vertex++) = v[3]; // Bottom-left
|
||||
*(vertex++) = v[2]; // Bottom-right
|
||||
*(vertex++) = v[1]; // Top-right
|
||||
}
|
||||
break;
|
||||
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue