backends/drm: combine color operations more aggressively than ColorPipeline does

With programmable LUTs, consecutive transfer functions, inverse transfer functions and
multipliers can all be combined into one LUT. This allows offloading operations in more
situations and makes the operations more efficient too, as potentially fewer LUTs have
to be programmed
This commit is contained in:
Xaver Hugl 2024-07-15 20:35:46 +02:00
parent 3f2f3cb020
commit e2c8f25d31
2 changed files with 60 additions and 80 deletions

View file

@ -82,29 +82,28 @@ bool DrmAbstractColorOp::matchPipeline(DrmAtomicCommit *commit, const ColorPipel
currentOp->bypass(m_cache.get()); currentOp->bypass(m_cache.get());
currentOp = currentOp->next(); currentOp = currentOp->next();
} }
currentOp->program(m_cache.get(), *initialOp, 1, 1); currentOp->program(m_cache.get(), std::span(&*initialOp, 1), 1, 1);
currentOp = currentOp->next(); currentOp = currentOp->next();
} }
for (auto it = pipeline.ops.begin(); it != pipeline.ops.end(); it++) { for (auto it = pipeline.ops.begin(); it != pipeline.ops.end();) {
while (!currentOp->canBeUsedFor(*it)) { while (!currentOp->canBeUsedFor(*it)) {
currentOp->bypass(m_cache.get()); currentOp->bypass(m_cache.get());
currentOp = currentOp->next(); currentOp = currentOp->next();
} }
if (it == pipeline.ops.end() - 1) { auto firstIt = it;
// this is the last op, we need to un-do the factor it++;
// this assumes that the output is always limited range // combine as many operations into one hardware operation as possible
currentOp->program(m_cache.get(), *it, valueScaling, 1.0); while (it != pipeline.ops.end() && currentOp->canBeUsedFor(*it)) {
valueScaling = 1.0; it++;
} else if (needsLimitedRange(*it) || needsLimitedRange(*(it + 1))) {
// this op can only output limited range or the next op needs a limited range input,
// adjust the factor to make it happen
currentOp->program(m_cache.get(), *it, valueScaling, 1.0 / it->output.max);
valueScaling = 1.0 / it->output.max;
} else {
// this and the next op are both fine with extended range, set the factor to 1.0 to use all the resolution we can get
currentOp->program(m_cache.get(), *it, valueScaling, 1.0);
valueScaling = 1.0;
} }
std::span operations(firstIt, it);
double outputScaling = 1.0;
if (it != pipeline.ops.end() && (needsLimitedRange(operations.front()) || needsLimitedRange(operations.back()) || needsLimitedRange(*it))) {
// if this or the next operation needs a limited range, or we need limited range for the output, make it happen
outputScaling = 1.0 / operations.back().output.max;
}
currentOp->program(m_cache.get(), operations, valueScaling, outputScaling);
valueScaling = outputScaling;
currentOp = currentOp->next(); currentOp = currentOp->next();
} }
while (currentOp) { while (currentOp) {
@ -136,45 +135,31 @@ bool LegacyLutColorOp::canBeUsedFor(const ColorOp &op)
return false; return false;
} }
void LegacyLutColorOp::program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale) void LegacyLutColorOp::program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale)
{ {
if (auto tf = std::get_if<ColorTransferFunction>(&op.operation)) { for (uint32_t i = 0; i < m_maxSize; i++) {
for (uint32_t i = 0; i < m_maxSize; i++) { const double input = i / double(m_maxSize - 1);
const double nits = tf->tf.encodedToNits(i / double(m_maxSize - 1) / inputScale, tf->referenceLuminance); const double scaledInput = input / inputScale;
const uint16_t output = std::round(std::clamp(nits * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max()); QVector3D output(scaledInput, scaledInput, scaledInput);
m_components[i] = { for (const auto &op : operations) {
.red = output, if (auto tf = std::get_if<ColorTransferFunction>(&op.operation)) {
.green = output, output = tf->tf.encodedToNits(output, tf->referenceLuminance);
.blue = output, } else if (auto tf = std::get_if<InverseColorTransferFunction>(&op.operation)) {
.reserved = 0, output = tf->tf.nitsToEncoded(output, tf->referenceLuminance);
}; } else if (auto mult = std::get_if<ColorMultiplier>(&op.operation)) {
output *= mult->factors;
} else {
Q_UNREACHABLE();
}
} }
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), m_components.data(), sizeof(drm_color_lut) * m_components.size())); m_components[i] = {
} else if (auto tf = std::get_if<InverseColorTransferFunction>(&op.operation)) { .red = uint16_t(std::round(std::clamp(output.x() * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
for (uint32_t i = 0; i < m_maxSize; i++) { .green = uint16_t(std::round(std::clamp(output.y() * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
const double nits = tf->tf.nitsToEncoded(i / double(m_maxSize - 1) / inputScale, tf->referenceLuminance); .blue = uint16_t(std::round(std::clamp(output.z() * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
const uint16_t output = std::round(std::clamp(nits * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max()); .reserved = 0,
m_components[i] = { };
.red = output,
.green = output,
.blue = output,
.reserved = 0,
};
}
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), m_components.data(), sizeof(drm_color_lut) * m_components.size()));
} else if (auto mult = std::get_if<ColorMultiplier>(&op.operation)) {
for (uint32_t i = 0; i < m_maxSize; i++) {
m_components[i] = {
.red = uint16_t(std::round(std::clamp(mult->factors.x() * outputScale / inputScale * i / double(m_maxSize - 1), 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
.green = uint16_t(std::round(std::clamp(mult->factors.y() * outputScale / inputScale * i / double(m_maxSize - 1), 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
.blue = uint16_t(std::round(std::clamp(mult->factors.z() * outputScale / inputScale * i / double(m_maxSize - 1), 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
.reserved = 0,
};
}
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), m_components.data(), sizeof(drm_color_lut) * m_maxSize));
} else {
Q_ASSERT(false);
} }
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), m_components.data(), sizeof(drm_color_lut) * m_maxSize));
} }
void LegacyLutColorOp::bypass(DrmAtomicCommit *commit) void LegacyLutColorOp::bypass(DrmAtomicCommit *commit)
@ -215,33 +200,27 @@ static uint64_t doubleToFixed(double value)
return ret; return ret;
} }
void LegacyMatrixColorOp::program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale) void LegacyMatrixColorOp::program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale)
{ {
if (auto matrix = std::get_if<ColorMatrix>(&op.operation)) { QMatrix4x4 result;
QMatrix4x4 scaled = matrix->mat; result.scale(1.0 / inputScale);
scaled.scale(outputScale / inputScale); for (const auto &op : operations) {
drm_color_ctm data = { if (auto matrix = std::get_if<ColorMatrix>(&op.operation)) {
.matrix = { result *= matrix->mat;
doubleToFixed(scaled(0, 0)), doubleToFixed(scaled(0, 1)), doubleToFixed(scaled(0, 2)), // } else if (auto mult = std::get_if<ColorMultiplier>(&op.operation)) {
doubleToFixed(scaled(1, 0)), doubleToFixed(scaled(1, 1)), doubleToFixed(scaled(1, 2)), // result.scale(mult->factors.x(), mult->factors.y(), mult->factors.z());
doubleToFixed(scaled(2, 0)), doubleToFixed(scaled(2, 1)), doubleToFixed(scaled(2, 2)), // } else {
}, Q_UNREACHABLE();
}; }
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), &data, sizeof(data)));
} else if (auto mult = std::get_if<ColorMultiplier>(&op.operation)) {
QVector3D scaled = mult->factors;
scaled *= outputScale / inputScale;
drm_color_ctm data = {
.matrix = {
doubleToFixed(scaled.x()), doubleToFixed(0), doubleToFixed(0), //
doubleToFixed(0), doubleToFixed(scaled.y()), doubleToFixed(0), //
doubleToFixed(0), doubleToFixed(0), doubleToFixed(scaled.z()), //
},
};
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), &data, sizeof(data)));
} else {
Q_ASSERT(false);
} }
drm_color_ctm data = {
.matrix = {
doubleToFixed(result(0, 0)), doubleToFixed(result(0, 1)), doubleToFixed(result(0, 2)), //
doubleToFixed(result(1, 0)), doubleToFixed(result(1, 1)), doubleToFixed(result(1, 2)), //
doubleToFixed(result(2, 0)), doubleToFixed(result(2, 1)), doubleToFixed(result(2, 2)), //
},
};
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), &data, sizeof(data)));
} }
void LegacyMatrixColorOp::bypass(DrmAtomicCommit *commit) void LegacyMatrixColorOp::bypass(DrmAtomicCommit *commit)

View file

@ -11,6 +11,7 @@
#include <drm.h> #include <drm.h>
#include <memory> #include <memory>
#include <span>
namespace KWin namespace KWin
{ {
@ -27,7 +28,7 @@ public:
bool matchPipeline(DrmAtomicCommit *commit, const ColorPipeline &pipeline); bool matchPipeline(DrmAtomicCommit *commit, const ColorPipeline &pipeline);
virtual bool canBeUsedFor(const ColorOp &op) = 0; virtual bool canBeUsedFor(const ColorOp &op) = 0;
virtual void program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale) = 0; virtual void program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale) = 0;
virtual void bypass(DrmAtomicCommit *commit) = 0; virtual void bypass(DrmAtomicCommit *commit) = 0;
DrmAbstractColorOp *next() const; DrmAbstractColorOp *next() const;
@ -45,7 +46,7 @@ public:
explicit LegacyLutColorOp(DrmAbstractColorOp *next, DrmProperty *prop, uint32_t maxSize); explicit LegacyLutColorOp(DrmAbstractColorOp *next, DrmProperty *prop, uint32_t maxSize);
bool canBeUsedFor(const ColorOp &op) override; bool canBeUsedFor(const ColorOp &op) override;
void program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale) override; void program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale) override;
void bypass(DrmAtomicCommit *commit) override; void bypass(DrmAtomicCommit *commit) override;
private: private:
@ -60,7 +61,7 @@ public:
explicit LegacyMatrixColorOp(DrmAbstractColorOp *next, DrmProperty *prop); explicit LegacyMatrixColorOp(DrmAbstractColorOp *next, DrmProperty *prop);
bool canBeUsedFor(const ColorOp &op) override; bool canBeUsedFor(const ColorOp &op) override;
void program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale) override; void program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale) override;
void bypass(DrmAtomicCommit *commit) override; void bypass(DrmAtomicCommit *commit) override;
private: private: