From 1c8bd1be626d2c2453f53e8c67d5f15489c75835 Mon Sep 17 00:00:00 2001 From: Xaver Hugl Date: Mon, 5 Feb 2024 22:25:22 +0100 Subject: [PATCH] backends/drm: use explicit sync where possible Instead of calling glFinish, which blocks until it's done and has high CPU usage on NVidia, use EGL_ANDROID_native_fence_fd to get an explicit sync fd, which the commit thread automatically waits on before committing the buffer to KMS. CCBUG: 452219 --- src/backends/drm/drm_buffer.cpp | 17 ++++++----- src/backends/drm/drm_buffer.h | 2 +- src/backends/drm/drm_egl_layer.cpp | 2 +- src/backends/drm/drm_egl_layer_surface.cpp | 34 +++++++++++----------- src/backends/drm/drm_egl_layer_surface.h | 5 ++-- src/backends/drm/drm_gpu.cpp | 4 +-- src/backends/drm/drm_gpu.h | 2 +- src/backends/drm/drm_qpainter_layer.cpp | 6 ++-- 8 files changed, 38 insertions(+), 34 deletions(-) diff --git a/src/backends/drm/drm_buffer.cpp b/src/backends/drm/drm_buffer.cpp index 1f246463db..21dd84a5c5 100644 --- a/src/backends/drm/drm_buffer.cpp +++ b/src/backends/drm/drm_buffer.cpp @@ -38,7 +38,7 @@ namespace KWin static bool s_envIsSet = false; static bool s_disableBufferWait = qEnvironmentVariableIntValue("KWIN_DRM_DISABLE_BUFFER_READABILITY_CHECKS", &s_envIsSet) && s_envIsSet; -DrmFramebuffer::DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffer) +DrmFramebuffer::DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffer, FileDescriptor &&readFence) : m_framebufferId(fbId) , m_gpu(gpu) , m_bufferRef(buffer) @@ -48,13 +48,16 @@ DrmFramebuffer::DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffe // See https://gitlab.freedesktop.org/drm/intel/-/issues/9415 m_readable = true; } + m_syncFd = std::move(readFence); #ifdef DMA_BUF_IOCTL_EXPORT_SYNC_FILE - dma_buf_export_sync_file req{ - .flags = DMA_BUF_SYNC_READ, - .fd = -1, - }; - if (drmIoctl(buffer->dmabufAttributes()->fd[0].get(), DMA_BUF_IOCTL_EXPORT_SYNC_FILE, &req) == 0) { - m_syncFd = FileDescriptor{req.fd}; + if (!m_syncFd.isValid()) { + dma_buf_export_sync_file req{ + .flags = DMA_BUF_SYNC_READ, + .fd = -1, + }; + if (drmIoctl(buffer->dmabufAttributes()->fd[0].get(), DMA_BUF_IOCTL_EXPORT_SYNC_FILE, &req) == 0) { + m_syncFd = FileDescriptor{req.fd}; + } } #endif } diff --git a/src/backends/drm/drm_buffer.h b/src/backends/drm/drm_buffer.h index 9bec7db777..8166428042 100644 --- a/src/backends/drm/drm_buffer.h +++ b/src/backends/drm/drm_buffer.h @@ -23,7 +23,7 @@ class DrmFramebuffer; class DrmFramebuffer { public: - DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffer); + DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffer, FileDescriptor &&readFence); ~DrmFramebuffer(); uint32_t framebufferId() const; diff --git a/src/backends/drm/drm_egl_layer.cpp b/src/backends/drm/drm_egl_layer.cpp index 105e440fb3..4c7ff4db68 100644 --- a/src/backends/drm/drm_egl_layer.cpp +++ b/src/backends/drm/drm_egl_layer.cpp @@ -139,7 +139,7 @@ bool EglGbmLayer::scanout(SurfaceItem *surfaceItem) if (!formats[dmabufAttributes->format].contains(dmabufAttributes->modifier)) { return false; } - m_scanoutBuffer = m_pipeline->gpu()->importBuffer(buffer); + m_scanoutBuffer = m_pipeline->gpu()->importBuffer(buffer, FileDescriptor{}); if (m_scanoutBuffer && m_pipeline->testScanout()) { m_dmabufFeedback.scanoutSuccessful(surface); m_currentDamage = surfaceItem->mapFromBuffer(surfaceItem->damage()); diff --git a/src/backends/drm/drm_egl_layer_surface.cpp b/src/backends/drm/drm_egl_layer_surface.cpp index 7624105d63..6cae292c23 100644 --- a/src/backends/drm/drm_egl_layer_surface.cpp +++ b/src/backends/drm/drm_egl_layer_surface.cpp @@ -179,12 +179,13 @@ bool EglGbmLayerSurface::endRendering(const QRegion &damagedRegion) m_surface->gbmSwapchain->release(m_surface->currentSlot); m_surface->timeQuery->end(); glFlush(); - if (m_eglBackend->contextObject()->isSoftwareRenderer() || m_eglBackend->gpu()->isNVidia()) { + EGLNativeFence sourceFence(m_eglBackend->eglDisplayObject()); + if (!sourceFence.isValid()) { // llvmpipe doesn't do synchronization properly: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9375 // and NVidia doesn't support implicit sync glFinish(); } - const auto buffer = importBuffer(m_surface.get(), m_surface->currentSlot.get()); + const auto buffer = importBuffer(m_surface.get(), m_surface->currentSlot.get(), sourceFence.fileDescriptor()); m_surface->renderEnd = std::chrono::steady_clock::now(); if (buffer) { m_surface->currentFramebuffer = buffer; @@ -486,7 +487,7 @@ std::shared_ptr EglGbmLayerSurface::doRenderTestBuffer(Surface * if (!slot) { return nullptr; } - if (const auto ret = importBuffer(surface, slot.get())) { + if (const auto ret = importBuffer(surface, slot.get(), FileDescriptor{})) { surface->currentSlot = slot; surface->currentFramebuffer = ret; return ret; @@ -495,14 +496,14 @@ std::shared_ptr EglGbmLayerSurface::doRenderTestBuffer(Surface * } } -std::shared_ptr EglGbmLayerSurface::importBuffer(Surface *surface, EglSwapchainSlot *slot) const +std::shared_ptr EglGbmLayerSurface::importBuffer(Surface *surface, EglSwapchainSlot *slot, const FileDescriptor &readFence) const { if (surface->bufferTarget == BufferTarget::Dumb || surface->importMode == MultiGpuImportMode::DumbBuffer) { return importWithCpu(surface, slot); } else if (surface->importMode == MultiGpuImportMode::Egl) { - return importWithEgl(surface, slot->buffer()); + return importWithEgl(surface, slot->buffer(), readFence); } else { - const auto ret = m_gpu->importBuffer(slot->buffer()); + const auto ret = m_gpu->importBuffer(slot->buffer(), readFence.duplicate()); if (!ret) { qCWarning(KWIN_DRM, "Failed to create framebuffer: %s", strerror(errno)); } @@ -510,24 +511,23 @@ std::shared_ptr EglGbmLayerSurface::importBuffer(Surface *surfac } } -std::shared_ptr EglGbmLayerSurface::importWithEgl(Surface *surface, GraphicsBuffer *sourceBuffer) const +std::shared_ptr EglGbmLayerSurface::importWithEgl(Surface *surface, GraphicsBuffer *sourceBuffer, const FileDescriptor &readFence) const { Q_ASSERT(surface->importGbmSwapchain); - EGLNativeFence sourceFence(m_eglBackend->eglDisplayObject()); - const auto display = m_eglBackend->displayForGpu(m_gpu); - // the NVidia proprietary driver supports neither implicit sync nor EGL_ANDROID_native_fence_sync - if (!sourceFence.isValid() || !display->supportsNativeFence()) { + // older versions of the NVidia proprietary driver support neither implicit sync nor EGL_ANDROID_native_fence_sync + if (!readFence.isValid() || !display->supportsNativeFence()) { glFinish(); } + if (!surface->importContext->makeCurrent()) { return nullptr; } surface->importTimeQuery->begin(); - if (sourceFence.isValid()) { - const auto destinationFence = EGLNativeFence::importFence(surface->importContext->displayObject(), sourceFence.fileDescriptor().duplicate()); + if (readFence.isValid()) { + const auto destinationFence = EGLNativeFence::importFence(surface->importContext->displayObject(), readFence.duplicate()); destinationFence.waitSync(); } @@ -563,8 +563,8 @@ std::shared_ptr EglGbmLayerSurface::importWithEgl(Surface *surfa surface->importContext->shaderManager()->popShader(); glFlush(); - if (m_gpu->isNVidia()) { - // the proprietary NVidia driver desn't support implicit sync + EGLNativeFence endFence(display); + if (!endFence.isValid()) { glFinish(); } surface->importGbmSwapchain->release(slot); @@ -572,7 +572,7 @@ std::shared_ptr EglGbmLayerSurface::importWithEgl(Surface *surfa // restore the old context m_eglBackend->makeCurrent(); - return m_gpu->importBuffer(slot->buffer()); + return m_gpu->importBuffer(slot->buffer(), endFence.fileDescriptor().duplicate()); } std::shared_ptr EglGbmLayerSurface::importWithCpu(Surface *surface, EglSwapchainSlot *source) const @@ -601,7 +601,7 @@ std::shared_ptr EglGbmLayerSurface::importWithCpu(Surface *surfa } GLFramebuffer::popFramebuffer(); - const auto ret = m_gpu->importBuffer(slot->buffer()); + const auto ret = m_gpu->importBuffer(slot->buffer(), FileDescriptor{}); if (!ret) { qCWarning(KWIN_DRM, "Failed to create a framebuffer: %s", strerror(errno)); } diff --git a/src/backends/drm/drm_egl_layer_surface.h b/src/backends/drm/drm_egl_layer_surface.h index 1ea70e0cb2..258e65052a 100644 --- a/src/backends/drm/drm_egl_layer_surface.h +++ b/src/backends/drm/drm_egl_layer_surface.h @@ -19,6 +19,7 @@ #include "drm_plane.h" #include "opengl/gltexture.h" #include "utils/damagejournal.h" +#include "utils/filedescriptor.h" struct gbm_bo; @@ -120,8 +121,8 @@ private: std::shared_ptr createGbmSwapchain(DrmGpu *gpu, EglContext *context, const QSize &size, uint32_t format, const QList &modifiers, bool forceLinear) const; std::shared_ptr doRenderTestBuffer(Surface *surface) const; - std::shared_ptr importBuffer(Surface *surface, EglSwapchainSlot *source) const; - std::shared_ptr importWithEgl(Surface *surface, GraphicsBuffer *sourceBuffer) const; + std::shared_ptr importBuffer(Surface *surface, EglSwapchainSlot *source, const FileDescriptor &readFence) const; + std::shared_ptr importWithEgl(Surface *surface, GraphicsBuffer *sourceBuffer, const FileDescriptor &readFence) const; std::shared_ptr importWithCpu(Surface *surface, EglSwapchainSlot *source) const; std::unique_ptr m_surface; diff --git a/src/backends/drm/drm_gpu.cpp b/src/backends/drm/drm_gpu.cpp index c1f5a91dd5..4137351103 100644 --- a/src/backends/drm/drm_gpu.cpp +++ b/src/backends/drm/drm_gpu.cpp @@ -836,7 +836,7 @@ GraphicsBufferAllocator *DrmGpu::graphicsBufferAllocator() const return m_allocator.get(); } -std::shared_ptr DrmGpu::importBuffer(GraphicsBuffer *buffer) +std::shared_ptr DrmGpu::importBuffer(GraphicsBuffer *buffer, FileDescriptor &&readFence) { const DmaBufAttributes *attributes = buffer->dmabufAttributes(); if (Q_UNLIKELY(!attributes)) { @@ -911,7 +911,7 @@ std::shared_ptr DrmGpu::importBuffer(GraphicsBuffer *buffer) return nullptr; } - return std::make_shared(this, framebufferId, buffer); + return std::make_shared(this, framebufferId, buffer, std::move(readFence)); } DrmLease::DrmLease(DrmGpu *gpu, FileDescriptor &&fd, uint32_t lesseeId, const QList &outputs) diff --git a/src/backends/drm/drm_gpu.h b/src/backends/drm/drm_gpu.h index f870140167..7caa841b9e 100644 --- a/src/backends/drm/drm_gpu.h +++ b/src/backends/drm/drm_gpu.h @@ -106,7 +106,7 @@ public: bool maybeModeset(); GraphicsBufferAllocator *graphicsBufferAllocator() const; - std::shared_ptr importBuffer(GraphicsBuffer *buffer); + std::shared_ptr importBuffer(GraphicsBuffer *buffer, FileDescriptor &&explicitFence); void releaseBuffers(); void recreateSurfaces(); diff --git a/src/backends/drm/drm_qpainter_layer.cpp b/src/backends/drm/drm_qpainter_layer.cpp index aab6731e1c..1c68f8bb68 100644 --- a/src/backends/drm/drm_qpainter_layer.cpp +++ b/src/backends/drm/drm_qpainter_layer.cpp @@ -49,7 +49,7 @@ std::optional DrmQPainterLayer::beginFrame() bool DrmQPainterLayer::endFrame(const QRegion &renderedRegion, const QRegion &damagedRegion) { m_renderTime = std::chrono::steady_clock::now() - m_renderStart; - m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer()); + m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer(), FileDescriptor{}); m_damageJournal.add(damagedRegion); m_swapchain->release(m_currentBuffer); if (!m_currentFramebuffer) { @@ -64,7 +64,7 @@ bool DrmQPainterLayer::checkTestBuffer() m_swapchain = std::make_shared(m_pipeline->gpu()->graphicsBufferAllocator(), m_pipeline->mode()->size(), DRM_FORMAT_XRGB8888); m_currentBuffer = m_swapchain->acquire(); if (m_currentBuffer) { - m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer()); + m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer(), FileDescriptor{}); m_swapchain->release(m_currentBuffer); if (!m_currentFramebuffer) { qCWarning(KWIN_DRM, "Failed to create dumb framebuffer: %s", strerror(errno)); @@ -125,7 +125,7 @@ std::optional DrmCursorQPainterLayer::beginFrame() bool DrmCursorQPainterLayer::endFrame(const QRegion &renderedRegion, const QRegion &damagedRegion) { m_renderTime = std::chrono::steady_clock::now() - m_renderStart; - m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer()); + m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer(), FileDescriptor{}); m_swapchain->release(m_currentBuffer); if (!m_currentFramebuffer) { qCWarning(KWIN_DRM, "Failed to create dumb framebuffer for the cursor: %s", strerror(errno));