backends/drm: combine color operations more aggressively than ColorPipeline does

With programmable LUTs, consecutive transfer functions, inverse transfer functions and
multipliers can all be combined into one LUT. This allows offloading operations in more
situations and makes the operations more efficient too, as potentially fewer LUTs have
to be programmed
This commit is contained in:
Xaver Hugl 2024-07-15 20:35:46 +02:00
parent 3f2f3cb020
commit e2c8f25d31
2 changed files with 60 additions and 80 deletions

View file

@ -82,29 +82,28 @@ bool DrmAbstractColorOp::matchPipeline(DrmAtomicCommit *commit, const ColorPipel
currentOp->bypass(m_cache.get());
currentOp = currentOp->next();
}
currentOp->program(m_cache.get(), *initialOp, 1, 1);
currentOp->program(m_cache.get(), std::span(&*initialOp, 1), 1, 1);
currentOp = currentOp->next();
}
for (auto it = pipeline.ops.begin(); it != pipeline.ops.end(); it++) {
for (auto it = pipeline.ops.begin(); it != pipeline.ops.end();) {
while (!currentOp->canBeUsedFor(*it)) {
currentOp->bypass(m_cache.get());
currentOp = currentOp->next();
}
if (it == pipeline.ops.end() - 1) {
// this is the last op, we need to un-do the factor
// this assumes that the output is always limited range
currentOp->program(m_cache.get(), *it, valueScaling, 1.0);
valueScaling = 1.0;
} else if (needsLimitedRange(*it) || needsLimitedRange(*(it + 1))) {
// this op can only output limited range or the next op needs a limited range input,
// adjust the factor to make it happen
currentOp->program(m_cache.get(), *it, valueScaling, 1.0 / it->output.max);
valueScaling = 1.0 / it->output.max;
} else {
// this and the next op are both fine with extended range, set the factor to 1.0 to use all the resolution we can get
currentOp->program(m_cache.get(), *it, valueScaling, 1.0);
valueScaling = 1.0;
auto firstIt = it;
it++;
// combine as many operations into one hardware operation as possible
while (it != pipeline.ops.end() && currentOp->canBeUsedFor(*it)) {
it++;
}
std::span operations(firstIt, it);
double outputScaling = 1.0;
if (it != pipeline.ops.end() && (needsLimitedRange(operations.front()) || needsLimitedRange(operations.back()) || needsLimitedRange(*it))) {
// if this or the next operation needs a limited range, or we need limited range for the output, make it happen
outputScaling = 1.0 / operations.back().output.max;
}
currentOp->program(m_cache.get(), operations, valueScaling, outputScaling);
valueScaling = outputScaling;
currentOp = currentOp->next();
}
while (currentOp) {
@ -136,45 +135,31 @@ bool LegacyLutColorOp::canBeUsedFor(const ColorOp &op)
return false;
}
void LegacyLutColorOp::program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale)
void LegacyLutColorOp::program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale)
{
if (auto tf = std::get_if<ColorTransferFunction>(&op.operation)) {
for (uint32_t i = 0; i < m_maxSize; i++) {
const double nits = tf->tf.encodedToNits(i / double(m_maxSize - 1) / inputScale, tf->referenceLuminance);
const uint16_t output = std::round(std::clamp(nits * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max());
m_components[i] = {
.red = output,
.green = output,
.blue = output,
.reserved = 0,
};
for (uint32_t i = 0; i < m_maxSize; i++) {
const double input = i / double(m_maxSize - 1);
const double scaledInput = input / inputScale;
QVector3D output(scaledInput, scaledInput, scaledInput);
for (const auto &op : operations) {
if (auto tf = std::get_if<ColorTransferFunction>(&op.operation)) {
output = tf->tf.encodedToNits(output, tf->referenceLuminance);
} else if (auto tf = std::get_if<InverseColorTransferFunction>(&op.operation)) {
output = tf->tf.nitsToEncoded(output, tf->referenceLuminance);
} else if (auto mult = std::get_if<ColorMultiplier>(&op.operation)) {
output *= mult->factors;
} else {
Q_UNREACHABLE();
}
}
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), m_components.data(), sizeof(drm_color_lut) * m_components.size()));
} else if (auto tf = std::get_if<InverseColorTransferFunction>(&op.operation)) {
for (uint32_t i = 0; i < m_maxSize; i++) {
const double nits = tf->tf.nitsToEncoded(i / double(m_maxSize - 1) / inputScale, tf->referenceLuminance);
const uint16_t output = std::round(std::clamp(nits * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max());
m_components[i] = {
.red = output,
.green = output,
.blue = output,
.reserved = 0,
};
}
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), m_components.data(), sizeof(drm_color_lut) * m_components.size()));
} else if (auto mult = std::get_if<ColorMultiplier>(&op.operation)) {
for (uint32_t i = 0; i < m_maxSize; i++) {
m_components[i] = {
.red = uint16_t(std::round(std::clamp(mult->factors.x() * outputScale / inputScale * i / double(m_maxSize - 1), 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
.green = uint16_t(std::round(std::clamp(mult->factors.y() * outputScale / inputScale * i / double(m_maxSize - 1), 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
.blue = uint16_t(std::round(std::clamp(mult->factors.z() * outputScale / inputScale * i / double(m_maxSize - 1), 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
.reserved = 0,
};
}
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), m_components.data(), sizeof(drm_color_lut) * m_maxSize));
} else {
Q_ASSERT(false);
m_components[i] = {
.red = uint16_t(std::round(std::clamp(output.x() * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
.green = uint16_t(std::round(std::clamp(output.y() * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
.blue = uint16_t(std::round(std::clamp(output.z() * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
.reserved = 0,
};
}
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), m_components.data(), sizeof(drm_color_lut) * m_maxSize));
}
void LegacyLutColorOp::bypass(DrmAtomicCommit *commit)
@ -215,33 +200,27 @@ static uint64_t doubleToFixed(double value)
return ret;
}
void LegacyMatrixColorOp::program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale)
void LegacyMatrixColorOp::program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale)
{
if (auto matrix = std::get_if<ColorMatrix>(&op.operation)) {
QMatrix4x4 scaled = matrix->mat;
scaled.scale(outputScale / inputScale);
drm_color_ctm data = {
.matrix = {
doubleToFixed(scaled(0, 0)), doubleToFixed(scaled(0, 1)), doubleToFixed(scaled(0, 2)), //
doubleToFixed(scaled(1, 0)), doubleToFixed(scaled(1, 1)), doubleToFixed(scaled(1, 2)), //
doubleToFixed(scaled(2, 0)), doubleToFixed(scaled(2, 1)), doubleToFixed(scaled(2, 2)), //
},
};
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), &data, sizeof(data)));
} else if (auto mult = std::get_if<ColorMultiplier>(&op.operation)) {
QVector3D scaled = mult->factors;
scaled *= outputScale / inputScale;
drm_color_ctm data = {
.matrix = {
doubleToFixed(scaled.x()), doubleToFixed(0), doubleToFixed(0), //
doubleToFixed(0), doubleToFixed(scaled.y()), doubleToFixed(0), //
doubleToFixed(0), doubleToFixed(0), doubleToFixed(scaled.z()), //
},
};
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), &data, sizeof(data)));
} else {
Q_ASSERT(false);
QMatrix4x4 result;
result.scale(1.0 / inputScale);
for (const auto &op : operations) {
if (auto matrix = std::get_if<ColorMatrix>(&op.operation)) {
result *= matrix->mat;
} else if (auto mult = std::get_if<ColorMultiplier>(&op.operation)) {
result.scale(mult->factors.x(), mult->factors.y(), mult->factors.z());
} else {
Q_UNREACHABLE();
}
}
drm_color_ctm data = {
.matrix = {
doubleToFixed(result(0, 0)), doubleToFixed(result(0, 1)), doubleToFixed(result(0, 2)), //
doubleToFixed(result(1, 0)), doubleToFixed(result(1, 1)), doubleToFixed(result(1, 2)), //
doubleToFixed(result(2, 0)), doubleToFixed(result(2, 1)), doubleToFixed(result(2, 2)), //
},
};
commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), &data, sizeof(data)));
}
void LegacyMatrixColorOp::bypass(DrmAtomicCommit *commit)

View file

@ -11,6 +11,7 @@
#include <drm.h>
#include <memory>
#include <span>
namespace KWin
{
@ -27,7 +28,7 @@ public:
bool matchPipeline(DrmAtomicCommit *commit, const ColorPipeline &pipeline);
virtual bool canBeUsedFor(const ColorOp &op) = 0;
virtual void program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale) = 0;
virtual void program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale) = 0;
virtual void bypass(DrmAtomicCommit *commit) = 0;
DrmAbstractColorOp *next() const;
@ -45,7 +46,7 @@ public:
explicit LegacyLutColorOp(DrmAbstractColorOp *next, DrmProperty *prop, uint32_t maxSize);
bool canBeUsedFor(const ColorOp &op) override;
void program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale) override;
void program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale) override;
void bypass(DrmAtomicCommit *commit) override;
private:
@ -60,7 +61,7 @@ public:
explicit LegacyMatrixColorOp(DrmAbstractColorOp *next, DrmProperty *prop);
bool canBeUsedFor(const ColorOp &op) override;
void program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale) override;
void program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale) override;
void bypass(DrmAtomicCommit *commit) override;
private: