From c3c3f56e982c7586afbcc16a152177c0547f8493 Mon Sep 17 00:00:00 2001
From: Xaver Hugl <xaver.hugl@gmail.com>
Date: Wed, 14 Aug 2024 00:15:15 +0200
Subject: [PATCH] implement a proper tone mapping algorithm

Instead of just clipping when HDR content is brighter than the maximum luminance the
screen can show, when HDR metadata indicates this could happen, KWin now
- converts the rgb colors to ICtCp, to split luminance and color
- applies a tone mapping curve that maps the intensity component from
 - [0, reference] to [0, newReference] linearly
 - [reference, max content luminance] to [newReference, max display luminance] nonlinearly
- converts the resulting ICtCp color back to rgb

The result is that HDR content looks much, much better on SDR displays, at least when decent
HDR metadata is provided.
As wrong metadata could cause this tone mapping to wrongly kick in in games for example, the
environment variable KWIN_DISABLE_TONEMAPPING is provided to disable tone mapping and fall back
to clipping again instead.
---
 src/backends/drm/drm_colorop.cpp |  5 ++-
 src/core/colorpipeline.cpp       | 65 ++++++++++++++++++++++++++++++
 src/core/colorpipeline.h         | 20 +++++++++-
 src/core/colorspace.cpp          | 26 ++++++++++++
 src/core/colorspace.h            |  3 ++
 src/opengl/colormanagement.glsl  | 68 ++++++++++++++++++++++++++++++--
 src/opengl/glshader.cpp          | 12 ++++++
 src/opengl/glshader.h            |  3 ++
 8 files changed, 196 insertions(+), 6 deletions(-)
diff --git a/src/backends/drm/drm_colorop.cpp b/src/backends/drm/drm_colorop.cpp
index 955ad4a317..50f3065439 100644
--- a/src/backends/drm/drm_colorop.cpp
+++ b/src/backends/drm/drm_colorop.cpp
@@ -127,7 +127,8 @@ LegacyLutColorOp::LegacyLutColorOp(DrmAbstractColorOp *next, DrmProperty *prop,
 
 bool LegacyLutColorOp::canBeUsedFor(const ColorOp &op)
 {
-    if (std::holds_alternative<ColorTransferFunction>(op.operation) || std::holds_alternative<InverseColorTransferFunction>(op.operation)) {
+    if (std::holds_alternative<ColorTransferFunction>(op.operation) || std::holds_alternative<InverseColorTransferFunction>(op.operation)
+        || std::holds_alternative<ColorTonemapper>(op.operation)) {
         // the required resolution depends heavily on the function and on the input and output ranges / multipliers
         // but this is good enough for now
         return m_maxSize >= 1024;
@@ -150,6 +151,8 @@ void LegacyLutColorOp::program(DrmAtomicCommit *commit, std::span<const ColorOp>
                 output = tf->tf.nitsToEncoded(output);
             } else if (auto mult = std::get_if<ColorMultiplier>(&op.operation)) {
                 output *= mult->factors;
+            } else if (auto tonemap = std::get_if<ColorTonemapper>(&op.operation)) {
+                output.setX(tonemap->map(output.x()));
             } else {
                 Q_UNREACHABLE();
             }
diff --git a/src/core/colorpipeline.cpp b/src/core/colorpipeline.cpp
index ff261e1cac..53032382f9 100644
--- a/src/core/colorpipeline.cpp
+++ b/src/core/colorpipeline.cpp
@@ -8,6 +8,8 @@
 */
 #include "colorpipeline.h"
 
+#include <numbers>
+
 namespace KWin
 {
 
@@ -19,9 +21,12 @@ ValueRange ValueRange::operator*(double mult) const
     };
 }
 
+static bool s_disableTonemapping = qEnvironmentVariableIntValue("KWIN_DISABLE_TONEMAPPING") == 1;
+
 ColorPipeline ColorPipeline::create(const ColorDescription &from, const ColorDescription &to, RenderingIntent intent)
 {
     const auto range1 = ValueRange(from.minLuminance(), from.maxHdrLuminance().value_or(from.referenceLuminance()));
+    const double maxOutputLuminance = to.maxHdrLuminance().value_or(to.referenceLuminance());
     ColorPipeline ret(ValueRange{
         .min = from.transferFunction().nitsToEncoded(range1.min),
         .max = from.transferFunction().nitsToEncoded(range1.max),
@@ -31,6 +36,9 @@ ColorPipeline ColorPipeline::create(const ColorDescription &from, const ColorDes
     // FIXME this assumes that the range stays the same with matrix multiplication
     // that's not necessarily true, and figuring out the actual range could be complicated..
     ret.addMatrix(from.toOther(to, intent), ret.currentOutputRange() * (to.referenceLuminance() / from.referenceLuminance()));
+    if (!s_disableTonemapping && ret.currentOutputRange().max > maxOutputLuminance * 1.01 && intent == RenderingIntent::Perceptual) {
+        ret.addTonemapper(to.containerColorimetry(), to.referenceLuminance(), ret.currentOutputRange().max, maxOutputLuminance, 1.5);
+    }
 
     ret.addInverseTransferFunction(to.transferFunction());
     return ret;
@@ -225,6 +233,34 @@ void ColorPipeline::addMatrix(const QMatrix4x4 &mat, const ValueRange &output)
     });
 }
 
+static const QMatrix4x4 s_toICtCp = QMatrix4x4(
+    2048.0 / 4096.0,   2048.0 / 4096.0,   0.0,             0.0,
+    6610.0 / 4096.0,  -13613.0 / 4096.0,  7003.0 / 4096.0, 0.0,
+    17933.0 / 4096.0, -17390.0 / 4096.0, -543.0 / 4096.0,  0.0,
+    0.0,               0.0,               0.0,             1.0).transposed();
+static const QMatrix4x4 s_fromICtCp = s_toICtCp.inverted();
+
+void ColorPipeline::addTonemapper(const Colorimetry &containerColorimetry, double referenceLuminance, double maxInputLuminance, double maxOutputLuminance, double maxAddedHeadroom)
+{
+    // convert from rgb to ICtCp
+    addMatrix(containerColorimetry.toLMS(), currentOutputRange());
+    addTransferFunction(TransferFunction(TransferFunction::PerceptualQuantizer));
+    addMatrix(s_toICtCp, currentOutputRange());
+    // apply the tone mapping to the intensity component
+    ops.push_back(ColorOp{
+        .input = currentOutputRange(),
+        .operation = ColorTonemapper(referenceLuminance, maxInputLuminance, maxOutputLuminance, maxAddedHeadroom),
+        .output = ValueRange {
+            .min = currentOutputRange().min,
+            .max = maxOutputLuminance,
+        },
+    });
+    // convert back to rgb
+    addMatrix(s_fromICtCp, currentOutputRange());
+    addInverseTransferFunction(TransferFunction(TransferFunction::PerceptualQuantizer));
+    addMatrix(containerColorimetry.fromLMS(), currentOutputRange());
+}
+
 bool ColorPipeline::isIdentity() const
 {
     return ops.empty();
@@ -240,6 +276,8 @@ void ColorPipeline::add(const ColorOp &op)
         addTransferFunction(tf->tf);
     } else if (const auto tf = std::get_if<InverseColorTransferFunction>(&op.operation)) {
         addInverseTransferFunction(tf->tf);
+    } else {
+        ops.push_back(op);
     }
 }
 
@@ -265,6 +303,8 @@ QVector3D ColorPipeline::evaluate(const QVector3D &input) const
             ret = tf->tf.encodedToNits(ret);
         } else if (const auto tf = std::get_if<InverseColorTransferFunction>(&op.operation)) {
             ret = tf->tf.nitsToEncoded(ret);
+        } else if (const auto tonemap = std::get_if<ColorTonemapper>(&op.operation)) {
+            ret.setX(tonemap->map(ret.x()));
         }
     }
     return ret;
@@ -294,6 +334,29 @@ ColorMultiplier::ColorMultiplier(double factor)
     : factors(factor, factor, factor)
 {
 }
+
+ColorTonemapper::ColorTonemapper(double referenceLuminance, double maxInputLuminance, double maxOutputLuminance, double maxAddedHeadroom)
+    : m_inputReferenceLuminance(referenceLuminance)
+    , m_maxInputLuminance(maxInputLuminance)
+    , m_maxOutputLuminance(maxOutputLuminance)
+{
+    m_inputRange = maxInputLuminance / referenceLuminance;
+    const double outputRange = maxOutputLuminance / referenceLuminance;
+    // = how much dynamic range this algorithm adds, by reducing the reference luminance
+    m_addedRange = std::clamp(m_inputRange / outputRange, 1.0, maxAddedHeadroom);
+    m_outputReferenceLuminance = referenceLuminance / m_addedRange;
+}
+
+double ColorTonemapper::map(double pqEncodedLuminance) const
+{
+    const double luminance = TransferFunction(TransferFunction::PerceptualQuantizer).encodedToNits(pqEncodedLuminance);
+    // keep things linear up to the reference luminance
+    const double low = std::min(luminance / m_addedRange, m_outputReferenceLuminance);
+    // and apply a nonlinear curve above, to reduce the luminance without completely removing differences
+    const double relativeHighlight = std::clamp((luminance / m_inputReferenceLuminance - 1.0) / (m_inputRange - 1.0), 0.0, 1.0);
+    const double high = std::log(relativeHighlight * (std::numbers::e - 1) + 1) * (m_maxOutputLuminance - m_outputReferenceLuminance);
+    return TransferFunction(TransferFunction::PerceptualQuantizer).nitsToEncoded(low + high);
+}
 }
 
 QDebug operator<<(QDebug debug, const KWin::ColorPipeline &pipeline)
@@ -308,6 +371,8 @@ QDebug operator<<(QDebug debug, const KWin::ColorPipeline &pipeline)
             debug << mat->mat;
         } else if (auto mult = std::get_if<KWin::ColorMultiplier>(&op.operation)) {
             debug << mult->factors;
+        } else if (auto tonemap = std::get_if<KWin::ColorTonemapper>(&op.operation)) {
+            debug << "tonemapper(" << tonemap->m_inputReferenceLuminance << tonemap->m_maxInputLuminance << tonemap->m_maxOutputLuminance << ")";
         }
     }
     debug << ")";
diff --git a/src/core/colorpipeline.h b/src/core/colorpipeline.h
index c3ffb2d3fd..6c6e1d3878 100644
--- a/src/core/colorpipeline.h
+++ b/src/core/colorpipeline.h
@@ -64,11 +64,28 @@ public:
     QVector3D factors;
 };
 
+class KWIN_EXPORT ColorTonemapper
+{
+public:
+    explicit ColorTonemapper(double referenceLuminance, double maxInputLuminance, double maxOutputLuminance, double maxAddedHeadroom);
+
+    double map(double pqEncodedLuminance) const;
+    bool operator==(const ColorTonemapper &) const = default;
+
+    double m_inputReferenceLuminance;
+    double m_maxInputLuminance;
+    double m_maxOutputLuminance;
+private:
+    double m_inputRange;
+    double m_addedRange;
+    double m_outputReferenceLuminance;
+};
+
 class KWIN_EXPORT ColorOp
 {
 public:
     ValueRange input;
-    std::variant<ColorTransferFunction, InverseColorTransferFunction, ColorMatrix, ColorMultiplier> operation;
+    std::variant<ColorTransferFunction, InverseColorTransferFunction, ColorMatrix, ColorMultiplier, ColorTonemapper> operation;
     ValueRange output;
 
     bool operator==(const ColorOp &) const = default;
@@ -101,6 +118,7 @@ public:
     void addTransferFunction(TransferFunction tf);
     void addInverseTransferFunction(TransferFunction tf);
     void addMatrix(const QMatrix4x4 &mat, const ValueRange &output);
+    void addTonemapper(const Colorimetry &containerColorimetry, double referenceLuminance, double maxInputLuminance, double maxOutputLuminance, double maxAddedHeadroom);
     void add(const ColorOp &op);
 
     ValueRange inputRange;
diff --git a/src/core/colorspace.cpp b/src/core/colorspace.cpp
index 886eb24302..e78ce890eb 100644
--- a/src/core/colorspace.cpp
+++ b/src/core/colorspace.cpp
@@ -127,6 +127,32 @@ const QMatrix4x4 &Colorimetry::fromXYZ() const
     return m_fromXYZ;
 }
 
+// converts from XYZ to LMS suitable for ICtCp
+static const QMatrix4x4 s_xyzToDolbyLMS = []() {
+    QMatrix4x4 ret;
+    ret(0, 0) = 0.3593;
+    ret(0, 1) = 0.6976;
+    ret(0, 2) = -0.0359;
+    ret(1, 0) = -0.1921;
+    ret(1, 1) = 1.1005;
+    ret(1, 2) = 0.0754;
+    ret(2, 0) = 0.0071;
+    ret(2, 1) = 0.0748;
+    ret(2, 2) = 0.8433;
+    return ret;
+}();
+static const QMatrix4x4 s_inverseDolbyLMS = s_xyzToDolbyLMS.inverted();
+
+QMatrix4x4 Colorimetry::toLMS() const
+{
+    return s_xyzToDolbyLMS * m_toXYZ;
+}
+
+QMatrix4x4 Colorimetry::fromLMS() const
+{
+    return m_fromXYZ * s_inverseDolbyLMS;
+}
+
 Colorimetry Colorimetry::adaptedTo(QVector2D newWhitepoint) const
 {
     const auto mat = chromaticAdaptationMatrix(this->white(), newWhitepoint);
diff --git a/src/core/colorspace.h b/src/core/colorspace.h
index e02de1fb7a..ba0e24a49c 100644
--- a/src/core/colorspace.h
+++ b/src/core/colorspace.h
@@ -78,6 +78,9 @@ public:
      * @returns a matrix that transforms from the XYZ representation to the linear RGB representation of colors in this colorimetry
      */
     const QMatrix4x4 &fromXYZ() const;
+    QMatrix4x4 toLMS() const;
+    QMatrix4x4 fromLMS() const;
+
     bool operator==(const Colorimetry &other) const;
     bool operator==(NamedColorimetry name) const;
     /**
diff --git a/src/opengl/colormanagement.glsl b/src/opengl/colormanagement.glsl
index 26f05f62ab..08303462cc 100644
--- a/src/opengl/colormanagement.glsl
+++ b/src/opengl/colormanagement.glsl
@@ -21,9 +21,13 @@ uniform vec2 destinationTransferFunctionParams;
 
 // in nits
 uniform float sourceReferenceLuminance;
+uniform float maxTonemappingLuminance;
 uniform float destinationReferenceLuminance;
 uniform float maxDestinationLuminance;
 
+uniform mat4 destinationToLMS;
+uniform mat4 lmsToDestination;
+
 vec3 linearToPq(vec3 linear) {
     const float c1 = 0.8359375;
     const float c2 = 18.8515625;
@@ -46,6 +50,28 @@ vec3 pqToLinear(vec3 pq) {
     vec3 den = c2 - c3 * powed;
     return pow(num / den, vec3(m1_inv));
 }
+float singleLinearToPq(float linear) {
+    const float c1 = 0.8359375;
+    const float c2 = 18.8515625;
+    const float c3 = 18.6875;
+    const float m1 = 0.1593017578125;
+    const float m2 = 78.84375;
+    float powed = pow(clamp(linear, 0.0, 1.0), m1);
+    float num = c1 + c2 * powed;
+    float denum = 1.0 + c3 * powed;
+    return pow(num / denum, m2);
+}
+float singlePqToLinear(float pq) {
+    const float c1 = 0.8359375;
+    const float c2 = 18.8515625;
+    const float c3 = 18.6875;
+    const float m1_inv = 1.0 / 0.1593017578125;
+    const float m2_inv = 1.0 / 78.84375;
+    float powed = pow(clamp(pq, 0.0, 1.0), m2_inv);
+    float num = max(powed - c1, 0.0);
+    float den = c2 - c3 * powed;
+    return pow(num / den, m1_inv);
+}
 vec3 srgbToLinear(vec3 color) {
     bvec3 isLow = lessThanEqual(color, vec3(0.04045f));
     vec3 loPart = color / 12.92f;
@@ -68,9 +94,43 @@ vec3 linearToSrgb(vec3 color) {
 #endif
 }
 
-vec3 doTonemapping(vec3 color, float maxBrightness) {
-    // TODO do something better here
-    return clamp(color.rgb, vec3(0.0), vec3(maxBrightness));
+const mat3 toICtCp = transpose(mat3(
+    2048.0 / 4096.0,   2048.0 / 4096.0,   0.0,
+    6610.0 / 4096.0,  -13613.0 / 4096.0,  7003.0 / 4096.0,
+    17933.0 / 4096.0, -17390.0 / 4096.0, -543.0 / 4096.0
+));
+const mat3 fromICtCp = inverse(toICtCp);
+
+vec3 doTonemapping(vec3 color) {
+    if (maxTonemappingLuminance < maxDestinationLuminance * 1.01) {
+        // clipping is enough
+        return clamp(color.rgb, vec3(0.0), vec3(maxDestinationLuminance));
+    }
+
+    // first, convert to ICtCp, to properly split luminance and color
+    // intensity is PQ-encoded luminance
+    vec3 lms = (destinationToLMS * vec4(color, 1.0)).rgb;
+    vec3 lms_PQ = linearToPq(lms / 10000.0);
+    vec3 ICtCp = toICtCp * lms_PQ;
+    float luminance = singlePqToLinear(ICtCp.r) * 10000.0;
+
+    // if the reference is too close to the maximum luminance, reduce it to get up to 50% headroom
+    float inputRange = maxTonemappingLuminance / destinationReferenceLuminance;
+    float outputRange = maxDestinationLuminance / destinationReferenceLuminance;
+    float addedRange = min(inputRange / outputRange, 1.5);
+    float outputReferenceLuminance = destinationReferenceLuminance / addedRange;
+
+    // keep it linear up to the reference luminance
+    float low = min(luminance / addedRange, outputReferenceLuminance);
+    // and apply a nonlinear curve above, to reduce the luminance without completely removing differences
+    float relativeHighlight = clamp((luminance / destinationReferenceLuminance - 1.0) / (inputRange - 1.0), 0.0, 1.0);
+    const float e = 2.718281828459045;
+    float high = log(relativeHighlight * (e - 1.0) + 1.0) * (maxDestinationLuminance - outputReferenceLuminance);
+    luminance = low + high;
+
+    // last, convert back to rgb
+    ICtCp.r = singleLinearToPq(luminance / 10000.0);
+    return (lmsToDestination * vec4(pqToLinear(fromICtCp * ICtCp), 1.0)).rgb * 10000.0;
 }
 
 vec4 encodingToNits(vec4 color, int sourceTransferFunction, float luminanceOffset, float luminanceScale) {
@@ -95,7 +155,7 @@ vec4 encodingToNits(vec4 color, int sourceTransferFunction, float luminanceOffse
 vec4 sourceEncodingToNitsInDestinationColorspace(vec4 color) {
     color = encodingToNits(color, sourceNamedTransferFunction, sourceTransferFunctionParams.x, sourceTransferFunctionParams.y);
     color.rgb = (colorimetryTransform * vec4(color.rgb, 1.0)).rgb;
-    return vec4(doTonemapping(color.rgb, maxDestinationLuminance), color.a);
+    return vec4(doTonemapping(color.rgb), color.a);
 }
 
 vec4 nitsToEncoding(vec4 color, int destinationTransferFunction, float luminanceOffset, float luminanceScale) {
diff --git a/src/opengl/glshader.cpp b/src/opengl/glshader.cpp
index e4234e6baa..ee4a4f0771 100644
--- a/src/opengl/glshader.cpp
+++ b/src/opengl/glshader.cpp
@@ -217,6 +217,8 @@ void GLShader::resolveLocations()
     m_matrix4Locations[Mat4Uniform::WindowTransformation] = uniformLocation("windowTransformation");
     m_matrix4Locations[Mat4Uniform::ScreenTransformation] = uniformLocation("screenTransformation");
     m_matrix4Locations[Mat4Uniform::ColorimetryTransformation] = uniformLocation("colorimetryTransform");
+    m_matrix4Locations[Mat4Uniform::DestinationToLMS] = uniformLocation("destinationToLMS");
+    m_matrix4Locations[Mat4Uniform::LMSToDestination] = uniformLocation("lmsToDestination");
 
     m_vec2Locations[Vec2Uniform::Offset] = uniformLocation("offset");
     m_vec2Locations[Vec2Uniform::SourceTransferFunctionParams] = uniformLocation("sourceTransferFunctionParams");
@@ -230,6 +232,7 @@ void GLShader::resolveLocations()
     m_floatLocations[FloatUniform::MaxDestinationLuminance] = uniformLocation("maxDestinationLuminance");
     m_floatLocations[FloatUniform::SourceReferenceLuminance] = uniformLocation("sourceReferenceLuminance");
     m_floatLocations[FloatUniform::DestinationReferenceLuminance] = uniformLocation("destinationReferenceLuminance");
+    m_floatLocations[FloatUniform::MaxTonemappingLuminance] = uniformLocation("maxTonemappingLuminance");
 
     m_colorLocations[ColorUniform::Color] = uniformLocation("geometryColor");
 
@@ -469,6 +472,8 @@ QMatrix4x4 GLShader::getUniformMatrix4x4(const char *name)
     }
 }
 
+static bool s_disableTonemapping = qEnvironmentVariableIntValue("KWIN_DISABLE_TONEMAPPING") == 1;
+
 void GLShader::setColorspaceUniforms(const ColorDescription &src, const ColorDescription &dst, RenderingIntent intent)
 {
     setUniform(Mat4Uniform::ColorimetryTransformation, src.toOther(dst, intent));
@@ -479,5 +484,12 @@ void GLShader::setColorspaceUniforms(const ColorDescription &src, const ColorDes
     setUniform(Vec2Uniform::DestinationTransferFunctionParams, QVector2D(dst.transferFunction().minLuminance, dst.transferFunction().maxLuminance - dst.transferFunction().minLuminance));
     setUniform(FloatUniform::DestinationReferenceLuminance, dst.referenceLuminance());
     setUniform(FloatUniform::MaxDestinationLuminance, dst.maxHdrLuminance().value_or(10'000));
+    if (!s_disableTonemapping && intent == RenderingIntent::Perceptual) {
+        setUniform(FloatUniform::MaxTonemappingLuminance, src.maxHdrLuminance().value_or(src.referenceLuminance()) * dst.referenceLuminance() / src.referenceLuminance());
+    } else {
+        setUniform(FloatUniform::MaxTonemappingLuminance, dst.referenceLuminance());
+    }
+    setUniform(Mat4Uniform::DestinationToLMS, dst.containerColorimetry().toLMS());
+    setUniform(Mat4Uniform::LMSToDestination, dst.containerColorimetry().fromLMS());
 }
 }
diff --git a/src/opengl/glshader.h b/src/opengl/glshader.h
index 8e35087448..8d38925166 100644
--- a/src/opengl/glshader.h
+++ b/src/opengl/glshader.h
@@ -85,6 +85,8 @@ public:
         WindowTransformation,
         ScreenTransformation,
         ColorimetryTransformation,
+        DestinationToLMS,
+        LMSToDestination,
         MatrixCount
     };
 
@@ -109,6 +111,7 @@ public:
         MaxDestinationLuminance,
         SourceReferenceLuminance,
         DestinationReferenceLuminance,
+        MaxTonemappingLuminance,
         FloatUniformCount
     };