backends/drm: combine color operations more aggressively than ColorPipeline does

With programmable LUTs, consecutive transfer functions, inverse transfer functions and multipliers can all be combined into one LUT. This allows offloading operations in more situations and makes the operations more efficient too, as potentially fewer LUTs have to be programmed
2024-07-15 20:35:46 +02:00 · 2024-07-15 20:35:46 +02:00 · e2c8f25d31
commit e2c8f25d31
parent 3f2f3cb020
2 changed files with 60 additions and 80 deletions
--- a/src/backends/drm/drm_colorop.cpp
+++ b/src/backends/drm/drm_colorop.cpp
@ -82,29 +82,28 @@ bool DrmAbstractColorOp::matchPipeline(DrmAtomicCommit *commit, const ColorPipel
            currentOp->bypass(m_cache.get());
            currentOp = currentOp->next();
        }
-        currentOp->program(m_cache.get(), *initialOp, 1, 1);
+        currentOp->program(m_cache.get(), std::span(&*initialOp, 1), 1, 1);
        currentOp = currentOp->next();
    }
-    for (auto it = pipeline.ops.begin(); it != pipeline.ops.end(); it++) {
+    for (auto it = pipeline.ops.begin(); it != pipeline.ops.end();) {
        while (!currentOp->canBeUsedFor(*it)) {
            currentOp->bypass(m_cache.get());
            currentOp = currentOp->next();
        }
-        if (it == pipeline.ops.end() - 1) {
+        auto firstIt = it;
-            // this is the last op, we need to un-do the factor
+        it++;
-            // this assumes that the output is always limited range
+        // combine as many operations into one hardware operation as possible
-            currentOp->program(m_cache.get(), *it, valueScaling, 1.0);
+        while (it != pipeline.ops.end() && currentOp->canBeUsedFor(*it)) {
-            valueScaling = 1.0;
+            it++;
        } else if (needsLimitedRange(*it) || needsLimitedRange(*(it + 1))) {
            // this op can only output limited range or the next op needs a limited range input,
            // adjust the factor to make it happen
            currentOp->program(m_cache.get(), *it, valueScaling, 1.0 / it->output.max);
            valueScaling = 1.0 / it->output.max;
        } else {
            // this and the next op are both fine with extended range, set the factor to 1.0 to use all the resolution we can get
            currentOp->program(m_cache.get(), *it, valueScaling, 1.0);
            valueScaling = 1.0;
        }
        std::span operations(firstIt, it);
        double outputScaling = 1.0;
        if (it != pipeline.ops.end() && (needsLimitedRange(operations.front()) || needsLimitedRange(operations.back()) || needsLimitedRange(*it))) {
            // if this or the next operation needs a limited range, or we need limited range for the output, make it happen
            outputScaling = 1.0 / operations.back().output.max;
        }
        currentOp->program(m_cache.get(), operations, valueScaling, outputScaling);
        valueScaling = outputScaling;
        currentOp = currentOp->next();
    }
    while (currentOp) {
@ -136,45 +135,31 @@ bool LegacyLutColorOp::canBeUsedFor(const ColorOp &op)
    return false;
 }
-void LegacyLutColorOp::program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale)
+void LegacyLutColorOp::program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale)
 {
-    if (auto tf = std::get_if<ColorTransferFunction>(&op.operation)) {
+    for (uint32_t i = 0; i < m_maxSize; i++) {
-        for (uint32_t i = 0; i < m_maxSize; i++) {
+        const double input = i / double(m_maxSize - 1);
-            const double nits = tf->tf.encodedToNits(i / double(m_maxSize - 1) / inputScale, tf->referenceLuminance);
+        const double scaledInput = input / inputScale;
-            const uint16_t output = std::round(std::clamp(nits * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max());
+        QVector3D output(scaledInput, scaledInput, scaledInput);
-            m_components[i] = {
+        for (const auto &op : operations) {
-                .red = output,
+            if (auto tf = std::get_if<ColorTransferFunction>(&op.operation)) {
-                .green = output,
+                output = tf->tf.encodedToNits(output, tf->referenceLuminance);
-                .blue = output,
+            } else if (auto tf = std::get_if<InverseColorTransferFunction>(&op.operation)) {
-                .reserved = 0,
+                output = tf->tf.nitsToEncoded(output, tf->referenceLuminance);
-            };
+            } else if (auto mult = std::get_if<ColorMultiplier>(&op.operation)) {
                output *= mult->factors;
            } else {
                Q_UNREACHABLE();
            }
        }
-        commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), m_components.data(), sizeof(drm_color_lut) * m_components.size()));
+        m_components[i] = {
-    } else if (auto tf = std::get_if<InverseColorTransferFunction>(&op.operation)) {
+            .red = uint16_t(std::round(std::clamp(output.x() * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
-        for (uint32_t i = 0; i < m_maxSize; i++) {
+            .green = uint16_t(std::round(std::clamp(output.y() * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
-            const double nits = tf->tf.nitsToEncoded(i / double(m_maxSize - 1) / inputScale, tf->referenceLuminance);
+            .blue = uint16_t(std::round(std::clamp(output.z() * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
-            const uint16_t output = std::round(std::clamp(nits * outputScale, 0.0, 1.0) * std::numeric_limits<uint16_t>::max());
+            .reserved = 0,
-            m_components[i] = {
+        };
                .red = output,
                .green = output,
                .blue = output,
                .reserved = 0,
            };
        }
        commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), m_components.data(), sizeof(drm_color_lut) * m_components.size()));
    } else if (auto mult = std::get_if<ColorMultiplier>(&op.operation)) {
        for (uint32_t i = 0; i < m_maxSize; i++) {
            m_components[i] = {
                .red = uint16_t(std::round(std::clamp(mult->factors.x() * outputScale / inputScale * i / double(m_maxSize - 1), 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
                .green = uint16_t(std::round(std::clamp(mult->factors.y() * outputScale / inputScale * i / double(m_maxSize - 1), 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
                .blue = uint16_t(std::round(std::clamp(mult->factors.z() * outputScale / inputScale * i / double(m_maxSize - 1), 0.0, 1.0) * std::numeric_limits<uint16_t>::max())),
                .reserved = 0,
            };
        }
        commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), m_components.data(), sizeof(drm_color_lut) * m_maxSize));
    } else {
        Q_ASSERT(false);
    }
    commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), m_components.data(), sizeof(drm_color_lut) * m_maxSize));
 }
 void LegacyLutColorOp::bypass(DrmAtomicCommit *commit)
@ -215,33 +200,27 @@ static uint64_t doubleToFixed(double value)
    return ret;
 }
-void LegacyMatrixColorOp::program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale)
+void LegacyMatrixColorOp::program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale)
 {
-    if (auto matrix = std::get_if<ColorMatrix>(&op.operation)) {
+    QMatrix4x4 result;
-        QMatrix4x4 scaled = matrix->mat;
+    result.scale(1.0 / inputScale);
-        scaled.scale(outputScale / inputScale);
+    for (const auto &op : operations) {
-        drm_color_ctm data = {
+        if (auto matrix = std::get_if<ColorMatrix>(&op.operation)) {
-            .matrix = {
+            result *= matrix->mat;
-                doubleToFixed(scaled(0, 0)), doubleToFixed(scaled(0, 1)), doubleToFixed(scaled(0, 2)), //
+        } else if (auto mult = std::get_if<ColorMultiplier>(&op.operation)) {
-                doubleToFixed(scaled(1, 0)), doubleToFixed(scaled(1, 1)), doubleToFixed(scaled(1, 2)), //
+            result.scale(mult->factors.x(), mult->factors.y(), mult->factors.z());
-                doubleToFixed(scaled(2, 0)), doubleToFixed(scaled(2, 1)), doubleToFixed(scaled(2, 2)), //
+        } else {
-            },
+            Q_UNREACHABLE();
-        };
+        }
        commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), &data, sizeof(data)));
    } else if (auto mult = std::get_if<ColorMultiplier>(&op.operation)) {
        QVector3D scaled = mult->factors;
        scaled *= outputScale / inputScale;
        drm_color_ctm data = {
            .matrix = {
                doubleToFixed(scaled.x()), doubleToFixed(0), doubleToFixed(0), //
                doubleToFixed(0), doubleToFixed(scaled.y()), doubleToFixed(0), //
                doubleToFixed(0), doubleToFixed(0), doubleToFixed(scaled.z()), //
            },
        };
        commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), &data, sizeof(data)));
    } else {
        Q_ASSERT(false);
    }
    drm_color_ctm data = {
        .matrix = {
            doubleToFixed(result(0, 0)), doubleToFixed(result(0, 1)), doubleToFixed(result(0, 2)), //
            doubleToFixed(result(1, 0)), doubleToFixed(result(1, 1)), doubleToFixed(result(1, 2)), //
            doubleToFixed(result(2, 0)), doubleToFixed(result(2, 1)), doubleToFixed(result(2, 2)), //
        },
    };
    commit->addBlob(*m_prop, DrmBlob::create(m_prop->drmObject()->gpu(), &data, sizeof(data)));
 }
 void LegacyMatrixColorOp::bypass(DrmAtomicCommit *commit)
--- a/src/backends/drm/drm_colorop.h
+++ b/src/backends/drm/drm_colorop.h
@ -11,6 +11,7 @@
 #include <drm.h>
 #include <memory>
 #include <span>
 namespace KWin
 {
@ -27,7 +28,7 @@ public:
    bool matchPipeline(DrmAtomicCommit *commit, const ColorPipeline &pipeline);
    virtual bool canBeUsedFor(const ColorOp &op) = 0;
-    virtual void program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale) = 0;
+    virtual void program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale) = 0;
    virtual void bypass(DrmAtomicCommit *commit) = 0;
    DrmAbstractColorOp *next() const;
@ -45,7 +46,7 @@ public:
    explicit LegacyLutColorOp(DrmAbstractColorOp *next, DrmProperty *prop, uint32_t maxSize);
    bool canBeUsedFor(const ColorOp &op) override;
-    void program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale) override;
+    void program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale) override;
    void bypass(DrmAtomicCommit *commit) override;
 private:
@ -60,7 +61,7 @@ public:
    explicit LegacyMatrixColorOp(DrmAbstractColorOp *next, DrmProperty *prop);
    bool canBeUsedFor(const ColorOp &op) override;
-    void program(DrmAtomicCommit *commit, const ColorOp &op, double inputScale, double outputScale) override;
+    void program(DrmAtomicCommit *commit, std::span<const ColorOp> operations, double inputScale, double outputScale) override;
    void bypass(DrmAtomicCommit *commit) override;
 private: