backends/drm: use explicit sync where possible

Instead of calling glFinish, which blocks until it's done and has high CPU
usage on NVidia, use EGL_ANDROID_native_fence_fd to get an explicit sync
fd, which the commit thread automatically waits on before committing the
buffer to KMS.

CCBUG: 452219
This commit is contained in:
Xaver Hugl 2024-02-05 22:25:22 +01:00
parent 3118ba5982
commit 1c8bd1be62
8 changed files with 38 additions and 34 deletions

View file

@ -38,7 +38,7 @@ namespace KWin
static bool s_envIsSet = false;
static bool s_disableBufferWait = qEnvironmentVariableIntValue("KWIN_DRM_DISABLE_BUFFER_READABILITY_CHECKS", &s_envIsSet) && s_envIsSet;
DrmFramebuffer::DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffer)
DrmFramebuffer::DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffer, FileDescriptor &&readFence)
: m_framebufferId(fbId)
, m_gpu(gpu)
, m_bufferRef(buffer)
@ -48,13 +48,16 @@ DrmFramebuffer::DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffe
// See https://gitlab.freedesktop.org/drm/intel/-/issues/9415
m_readable = true;
}
m_syncFd = std::move(readFence);
#ifdef DMA_BUF_IOCTL_EXPORT_SYNC_FILE
dma_buf_export_sync_file req{
.flags = DMA_BUF_SYNC_READ,
.fd = -1,
};
if (drmIoctl(buffer->dmabufAttributes()->fd[0].get(), DMA_BUF_IOCTL_EXPORT_SYNC_FILE, &req) == 0) {
m_syncFd = FileDescriptor{req.fd};
if (!m_syncFd.isValid()) {
dma_buf_export_sync_file req{
.flags = DMA_BUF_SYNC_READ,
.fd = -1,
};
if (drmIoctl(buffer->dmabufAttributes()->fd[0].get(), DMA_BUF_IOCTL_EXPORT_SYNC_FILE, &req) == 0) {
m_syncFd = FileDescriptor{req.fd};
}
}
#endif
}

View file

@ -23,7 +23,7 @@ class DrmFramebuffer;
class DrmFramebuffer
{
public:
DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffer);
DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffer, FileDescriptor &&readFence);
~DrmFramebuffer();
uint32_t framebufferId() const;

View file

@ -139,7 +139,7 @@ bool EglGbmLayer::scanout(SurfaceItem *surfaceItem)
if (!formats[dmabufAttributes->format].contains(dmabufAttributes->modifier)) {
return false;
}
m_scanoutBuffer = m_pipeline->gpu()->importBuffer(buffer);
m_scanoutBuffer = m_pipeline->gpu()->importBuffer(buffer, FileDescriptor{});
if (m_scanoutBuffer && m_pipeline->testScanout()) {
m_dmabufFeedback.scanoutSuccessful(surface);
m_currentDamage = surfaceItem->mapFromBuffer(surfaceItem->damage());

View file

@ -179,12 +179,13 @@ bool EglGbmLayerSurface::endRendering(const QRegion &damagedRegion)
m_surface->gbmSwapchain->release(m_surface->currentSlot);
m_surface->timeQuery->end();
glFlush();
if (m_eglBackend->contextObject()->isSoftwareRenderer() || m_eglBackend->gpu()->isNVidia()) {
EGLNativeFence sourceFence(m_eglBackend->eglDisplayObject());
if (!sourceFence.isValid()) {
// llvmpipe doesn't do synchronization properly: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9375
// and NVidia doesn't support implicit sync
glFinish();
}
const auto buffer = importBuffer(m_surface.get(), m_surface->currentSlot.get());
const auto buffer = importBuffer(m_surface.get(), m_surface->currentSlot.get(), sourceFence.fileDescriptor());
m_surface->renderEnd = std::chrono::steady_clock::now();
if (buffer) {
m_surface->currentFramebuffer = buffer;
@ -486,7 +487,7 @@ std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::doRenderTestBuffer(Surface *
if (!slot) {
return nullptr;
}
if (const auto ret = importBuffer(surface, slot.get())) {
if (const auto ret = importBuffer(surface, slot.get(), FileDescriptor{})) {
surface->currentSlot = slot;
surface->currentFramebuffer = ret;
return ret;
@ -495,14 +496,14 @@ std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::doRenderTestBuffer(Surface *
}
}
std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importBuffer(Surface *surface, EglSwapchainSlot *slot) const
std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importBuffer(Surface *surface, EglSwapchainSlot *slot, const FileDescriptor &readFence) const
{
if (surface->bufferTarget == BufferTarget::Dumb || surface->importMode == MultiGpuImportMode::DumbBuffer) {
return importWithCpu(surface, slot);
} else if (surface->importMode == MultiGpuImportMode::Egl) {
return importWithEgl(surface, slot->buffer());
return importWithEgl(surface, slot->buffer(), readFence);
} else {
const auto ret = m_gpu->importBuffer(slot->buffer());
const auto ret = m_gpu->importBuffer(slot->buffer(), readFence.duplicate());
if (!ret) {
qCWarning(KWIN_DRM, "Failed to create framebuffer: %s", strerror(errno));
}
@ -510,24 +511,23 @@ std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importBuffer(Surface *surfac
}
}
std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importWithEgl(Surface *surface, GraphicsBuffer *sourceBuffer) const
std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importWithEgl(Surface *surface, GraphicsBuffer *sourceBuffer, const FileDescriptor &readFence) const
{
Q_ASSERT(surface->importGbmSwapchain);
EGLNativeFence sourceFence(m_eglBackend->eglDisplayObject());
const auto display = m_eglBackend->displayForGpu(m_gpu);
// the NVidia proprietary driver supports neither implicit sync nor EGL_ANDROID_native_fence_sync
if (!sourceFence.isValid() || !display->supportsNativeFence()) {
// older versions of the NVidia proprietary driver support neither implicit sync nor EGL_ANDROID_native_fence_sync
if (!readFence.isValid() || !display->supportsNativeFence()) {
glFinish();
}
if (!surface->importContext->makeCurrent()) {
return nullptr;
}
surface->importTimeQuery->begin();
if (sourceFence.isValid()) {
const auto destinationFence = EGLNativeFence::importFence(surface->importContext->displayObject(), sourceFence.fileDescriptor().duplicate());
if (readFence.isValid()) {
const auto destinationFence = EGLNativeFence::importFence(surface->importContext->displayObject(), readFence.duplicate());
destinationFence.waitSync();
}
@ -563,8 +563,8 @@ std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importWithEgl(Surface *surfa
surface->importContext->shaderManager()->popShader();
glFlush();
if (m_gpu->isNVidia()) {
// the proprietary NVidia driver desn't support implicit sync
EGLNativeFence endFence(display);
if (!endFence.isValid()) {
glFinish();
}
surface->importGbmSwapchain->release(slot);
@ -572,7 +572,7 @@ std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importWithEgl(Surface *surfa
// restore the old context
m_eglBackend->makeCurrent();
return m_gpu->importBuffer(slot->buffer());
return m_gpu->importBuffer(slot->buffer(), endFence.fileDescriptor().duplicate());
}
std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importWithCpu(Surface *surface, EglSwapchainSlot *source) const
@ -601,7 +601,7 @@ std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importWithCpu(Surface *surfa
}
GLFramebuffer::popFramebuffer();
const auto ret = m_gpu->importBuffer(slot->buffer());
const auto ret = m_gpu->importBuffer(slot->buffer(), FileDescriptor{});
if (!ret) {
qCWarning(KWIN_DRM, "Failed to create a framebuffer: %s", strerror(errno));
}

View file

@ -19,6 +19,7 @@
#include "drm_plane.h"
#include "opengl/gltexture.h"
#include "utils/damagejournal.h"
#include "utils/filedescriptor.h"
struct gbm_bo;
@ -120,8 +121,8 @@ private:
std::shared_ptr<EglSwapchain> createGbmSwapchain(DrmGpu *gpu, EglContext *context, const QSize &size, uint32_t format, const QList<uint64_t> &modifiers, bool forceLinear) const;
std::shared_ptr<DrmFramebuffer> doRenderTestBuffer(Surface *surface) const;
std::shared_ptr<DrmFramebuffer> importBuffer(Surface *surface, EglSwapchainSlot *source) const;
std::shared_ptr<DrmFramebuffer> importWithEgl(Surface *surface, GraphicsBuffer *sourceBuffer) const;
std::shared_ptr<DrmFramebuffer> importBuffer(Surface *surface, EglSwapchainSlot *source, const FileDescriptor &readFence) const;
std::shared_ptr<DrmFramebuffer> importWithEgl(Surface *surface, GraphicsBuffer *sourceBuffer, const FileDescriptor &readFence) const;
std::shared_ptr<DrmFramebuffer> importWithCpu(Surface *surface, EglSwapchainSlot *source) const;
std::unique_ptr<Surface> m_surface;

View file

@ -836,7 +836,7 @@ GraphicsBufferAllocator *DrmGpu::graphicsBufferAllocator() const
return m_allocator.get();
}
std::shared_ptr<DrmFramebuffer> DrmGpu::importBuffer(GraphicsBuffer *buffer)
std::shared_ptr<DrmFramebuffer> DrmGpu::importBuffer(GraphicsBuffer *buffer, FileDescriptor &&readFence)
{
const DmaBufAttributes *attributes = buffer->dmabufAttributes();
if (Q_UNLIKELY(!attributes)) {
@ -911,7 +911,7 @@ std::shared_ptr<DrmFramebuffer> DrmGpu::importBuffer(GraphicsBuffer *buffer)
return nullptr;
}
return std::make_shared<DrmFramebuffer>(this, framebufferId, buffer);
return std::make_shared<DrmFramebuffer>(this, framebufferId, buffer, std::move(readFence));
}
DrmLease::DrmLease(DrmGpu *gpu, FileDescriptor &&fd, uint32_t lesseeId, const QList<DrmOutput *> &outputs)

View file

@ -106,7 +106,7 @@ public:
bool maybeModeset();
GraphicsBufferAllocator *graphicsBufferAllocator() const;
std::shared_ptr<DrmFramebuffer> importBuffer(GraphicsBuffer *buffer);
std::shared_ptr<DrmFramebuffer> importBuffer(GraphicsBuffer *buffer, FileDescriptor &&explicitFence);
void releaseBuffers();
void recreateSurfaces();

View file

@ -49,7 +49,7 @@ std::optional<OutputLayerBeginFrameInfo> DrmQPainterLayer::beginFrame()
bool DrmQPainterLayer::endFrame(const QRegion &renderedRegion, const QRegion &damagedRegion)
{
m_renderTime = std::chrono::steady_clock::now() - m_renderStart;
m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer());
m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer(), FileDescriptor{});
m_damageJournal.add(damagedRegion);
m_swapchain->release(m_currentBuffer);
if (!m_currentFramebuffer) {
@ -64,7 +64,7 @@ bool DrmQPainterLayer::checkTestBuffer()
m_swapchain = std::make_shared<QPainterSwapchain>(m_pipeline->gpu()->graphicsBufferAllocator(), m_pipeline->mode()->size(), DRM_FORMAT_XRGB8888);
m_currentBuffer = m_swapchain->acquire();
if (m_currentBuffer) {
m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer());
m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer(), FileDescriptor{});
m_swapchain->release(m_currentBuffer);
if (!m_currentFramebuffer) {
qCWarning(KWIN_DRM, "Failed to create dumb framebuffer: %s", strerror(errno));
@ -125,7 +125,7 @@ std::optional<OutputLayerBeginFrameInfo> DrmCursorQPainterLayer::beginFrame()
bool DrmCursorQPainterLayer::endFrame(const QRegion &renderedRegion, const QRegion &damagedRegion)
{
m_renderTime = std::chrono::steady_clock::now() - m_renderStart;
m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer());
m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer(), FileDescriptor{});
m_swapchain->release(m_currentBuffer);
if (!m_currentFramebuffer) {
qCWarning(KWIN_DRM, "Failed to create dumb framebuffer for the cursor: %s", strerror(errno));