backends/drm: use explicit sync where possible
Instead of calling glFinish, which blocks until it's done and has high CPU usage on NVidia, use EGL_ANDROID_native_fence_fd to get an explicit sync fd, which the commit thread automatically waits on before committing the buffer to KMS. CCBUG: 452219
This commit is contained in:
parent
3118ba5982
commit
1c8bd1be62
8 changed files with 38 additions and 34 deletions
|
@ -38,7 +38,7 @@ namespace KWin
|
|||
static bool s_envIsSet = false;
|
||||
static bool s_disableBufferWait = qEnvironmentVariableIntValue("KWIN_DRM_DISABLE_BUFFER_READABILITY_CHECKS", &s_envIsSet) && s_envIsSet;
|
||||
|
||||
DrmFramebuffer::DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffer)
|
||||
DrmFramebuffer::DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffer, FileDescriptor &&readFence)
|
||||
: m_framebufferId(fbId)
|
||||
, m_gpu(gpu)
|
||||
, m_bufferRef(buffer)
|
||||
|
@ -48,7 +48,9 @@ DrmFramebuffer::DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffe
|
|||
// See https://gitlab.freedesktop.org/drm/intel/-/issues/9415
|
||||
m_readable = true;
|
||||
}
|
||||
m_syncFd = std::move(readFence);
|
||||
#ifdef DMA_BUF_IOCTL_EXPORT_SYNC_FILE
|
||||
if (!m_syncFd.isValid()) {
|
||||
dma_buf_export_sync_file req{
|
||||
.flags = DMA_BUF_SYNC_READ,
|
||||
.fd = -1,
|
||||
|
@ -56,6 +58,7 @@ DrmFramebuffer::DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffe
|
|||
if (drmIoctl(buffer->dmabufAttributes()->fd[0].get(), DMA_BUF_IOCTL_EXPORT_SYNC_FILE, &req) == 0) {
|
||||
m_syncFd = FileDescriptor{req.fd};
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ class DrmFramebuffer;
|
|||
class DrmFramebuffer
|
||||
{
|
||||
public:
|
||||
DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffer);
|
||||
DrmFramebuffer(DrmGpu *gpu, uint32_t fbId, GraphicsBuffer *buffer, FileDescriptor &&readFence);
|
||||
~DrmFramebuffer();
|
||||
|
||||
uint32_t framebufferId() const;
|
||||
|
|
|
@ -139,7 +139,7 @@ bool EglGbmLayer::scanout(SurfaceItem *surfaceItem)
|
|||
if (!formats[dmabufAttributes->format].contains(dmabufAttributes->modifier)) {
|
||||
return false;
|
||||
}
|
||||
m_scanoutBuffer = m_pipeline->gpu()->importBuffer(buffer);
|
||||
m_scanoutBuffer = m_pipeline->gpu()->importBuffer(buffer, FileDescriptor{});
|
||||
if (m_scanoutBuffer && m_pipeline->testScanout()) {
|
||||
m_dmabufFeedback.scanoutSuccessful(surface);
|
||||
m_currentDamage = surfaceItem->mapFromBuffer(surfaceItem->damage());
|
||||
|
|
|
@ -179,12 +179,13 @@ bool EglGbmLayerSurface::endRendering(const QRegion &damagedRegion)
|
|||
m_surface->gbmSwapchain->release(m_surface->currentSlot);
|
||||
m_surface->timeQuery->end();
|
||||
glFlush();
|
||||
if (m_eglBackend->contextObject()->isSoftwareRenderer() || m_eglBackend->gpu()->isNVidia()) {
|
||||
EGLNativeFence sourceFence(m_eglBackend->eglDisplayObject());
|
||||
if (!sourceFence.isValid()) {
|
||||
// llvmpipe doesn't do synchronization properly: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9375
|
||||
// and NVidia doesn't support implicit sync
|
||||
glFinish();
|
||||
}
|
||||
const auto buffer = importBuffer(m_surface.get(), m_surface->currentSlot.get());
|
||||
const auto buffer = importBuffer(m_surface.get(), m_surface->currentSlot.get(), sourceFence.fileDescriptor());
|
||||
m_surface->renderEnd = std::chrono::steady_clock::now();
|
||||
if (buffer) {
|
||||
m_surface->currentFramebuffer = buffer;
|
||||
|
@ -486,7 +487,7 @@ std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::doRenderTestBuffer(Surface *
|
|||
if (!slot) {
|
||||
return nullptr;
|
||||
}
|
||||
if (const auto ret = importBuffer(surface, slot.get())) {
|
||||
if (const auto ret = importBuffer(surface, slot.get(), FileDescriptor{})) {
|
||||
surface->currentSlot = slot;
|
||||
surface->currentFramebuffer = ret;
|
||||
return ret;
|
||||
|
@ -495,14 +496,14 @@ std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::doRenderTestBuffer(Surface *
|
|||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importBuffer(Surface *surface, EglSwapchainSlot *slot) const
|
||||
std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importBuffer(Surface *surface, EglSwapchainSlot *slot, const FileDescriptor &readFence) const
|
||||
{
|
||||
if (surface->bufferTarget == BufferTarget::Dumb || surface->importMode == MultiGpuImportMode::DumbBuffer) {
|
||||
return importWithCpu(surface, slot);
|
||||
} else if (surface->importMode == MultiGpuImportMode::Egl) {
|
||||
return importWithEgl(surface, slot->buffer());
|
||||
return importWithEgl(surface, slot->buffer(), readFence);
|
||||
} else {
|
||||
const auto ret = m_gpu->importBuffer(slot->buffer());
|
||||
const auto ret = m_gpu->importBuffer(slot->buffer(), readFence.duplicate());
|
||||
if (!ret) {
|
||||
qCWarning(KWIN_DRM, "Failed to create framebuffer: %s", strerror(errno));
|
||||
}
|
||||
|
@ -510,24 +511,23 @@ std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importBuffer(Surface *surfac
|
|||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importWithEgl(Surface *surface, GraphicsBuffer *sourceBuffer) const
|
||||
std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importWithEgl(Surface *surface, GraphicsBuffer *sourceBuffer, const FileDescriptor &readFence) const
|
||||
{
|
||||
Q_ASSERT(surface->importGbmSwapchain);
|
||||
|
||||
EGLNativeFence sourceFence(m_eglBackend->eglDisplayObject());
|
||||
|
||||
const auto display = m_eglBackend->displayForGpu(m_gpu);
|
||||
// the NVidia proprietary driver supports neither implicit sync nor EGL_ANDROID_native_fence_sync
|
||||
if (!sourceFence.isValid() || !display->supportsNativeFence()) {
|
||||
// older versions of the NVidia proprietary driver support neither implicit sync nor EGL_ANDROID_native_fence_sync
|
||||
if (!readFence.isValid() || !display->supportsNativeFence()) {
|
||||
glFinish();
|
||||
}
|
||||
|
||||
if (!surface->importContext->makeCurrent()) {
|
||||
return nullptr;
|
||||
}
|
||||
surface->importTimeQuery->begin();
|
||||
|
||||
if (sourceFence.isValid()) {
|
||||
const auto destinationFence = EGLNativeFence::importFence(surface->importContext->displayObject(), sourceFence.fileDescriptor().duplicate());
|
||||
if (readFence.isValid()) {
|
||||
const auto destinationFence = EGLNativeFence::importFence(surface->importContext->displayObject(), readFence.duplicate());
|
||||
destinationFence.waitSync();
|
||||
}
|
||||
|
||||
|
@ -563,8 +563,8 @@ std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importWithEgl(Surface *surfa
|
|||
|
||||
surface->importContext->shaderManager()->popShader();
|
||||
glFlush();
|
||||
if (m_gpu->isNVidia()) {
|
||||
// the proprietary NVidia driver desn't support implicit sync
|
||||
EGLNativeFence endFence(display);
|
||||
if (!endFence.isValid()) {
|
||||
glFinish();
|
||||
}
|
||||
surface->importGbmSwapchain->release(slot);
|
||||
|
@ -572,7 +572,7 @@ std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importWithEgl(Surface *surfa
|
|||
|
||||
// restore the old context
|
||||
m_eglBackend->makeCurrent();
|
||||
return m_gpu->importBuffer(slot->buffer());
|
||||
return m_gpu->importBuffer(slot->buffer(), endFence.fileDescriptor().duplicate());
|
||||
}
|
||||
|
||||
std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importWithCpu(Surface *surface, EglSwapchainSlot *source) const
|
||||
|
@ -601,7 +601,7 @@ std::shared_ptr<DrmFramebuffer> EglGbmLayerSurface::importWithCpu(Surface *surfa
|
|||
}
|
||||
GLFramebuffer::popFramebuffer();
|
||||
|
||||
const auto ret = m_gpu->importBuffer(slot->buffer());
|
||||
const auto ret = m_gpu->importBuffer(slot->buffer(), FileDescriptor{});
|
||||
if (!ret) {
|
||||
qCWarning(KWIN_DRM, "Failed to create a framebuffer: %s", strerror(errno));
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "drm_plane.h"
|
||||
#include "opengl/gltexture.h"
|
||||
#include "utils/damagejournal.h"
|
||||
#include "utils/filedescriptor.h"
|
||||
|
||||
struct gbm_bo;
|
||||
|
||||
|
@ -120,8 +121,8 @@ private:
|
|||
std::shared_ptr<EglSwapchain> createGbmSwapchain(DrmGpu *gpu, EglContext *context, const QSize &size, uint32_t format, const QList<uint64_t> &modifiers, bool forceLinear) const;
|
||||
|
||||
std::shared_ptr<DrmFramebuffer> doRenderTestBuffer(Surface *surface) const;
|
||||
std::shared_ptr<DrmFramebuffer> importBuffer(Surface *surface, EglSwapchainSlot *source) const;
|
||||
std::shared_ptr<DrmFramebuffer> importWithEgl(Surface *surface, GraphicsBuffer *sourceBuffer) const;
|
||||
std::shared_ptr<DrmFramebuffer> importBuffer(Surface *surface, EglSwapchainSlot *source, const FileDescriptor &readFence) const;
|
||||
std::shared_ptr<DrmFramebuffer> importWithEgl(Surface *surface, GraphicsBuffer *sourceBuffer, const FileDescriptor &readFence) const;
|
||||
std::shared_ptr<DrmFramebuffer> importWithCpu(Surface *surface, EglSwapchainSlot *source) const;
|
||||
|
||||
std::unique_ptr<Surface> m_surface;
|
||||
|
|
|
@ -836,7 +836,7 @@ GraphicsBufferAllocator *DrmGpu::graphicsBufferAllocator() const
|
|||
return m_allocator.get();
|
||||
}
|
||||
|
||||
std::shared_ptr<DrmFramebuffer> DrmGpu::importBuffer(GraphicsBuffer *buffer)
|
||||
std::shared_ptr<DrmFramebuffer> DrmGpu::importBuffer(GraphicsBuffer *buffer, FileDescriptor &&readFence)
|
||||
{
|
||||
const DmaBufAttributes *attributes = buffer->dmabufAttributes();
|
||||
if (Q_UNLIKELY(!attributes)) {
|
||||
|
@ -911,7 +911,7 @@ std::shared_ptr<DrmFramebuffer> DrmGpu::importBuffer(GraphicsBuffer *buffer)
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
return std::make_shared<DrmFramebuffer>(this, framebufferId, buffer);
|
||||
return std::make_shared<DrmFramebuffer>(this, framebufferId, buffer, std::move(readFence));
|
||||
}
|
||||
|
||||
DrmLease::DrmLease(DrmGpu *gpu, FileDescriptor &&fd, uint32_t lesseeId, const QList<DrmOutput *> &outputs)
|
||||
|
|
|
@ -106,7 +106,7 @@ public:
|
|||
bool maybeModeset();
|
||||
|
||||
GraphicsBufferAllocator *graphicsBufferAllocator() const;
|
||||
std::shared_ptr<DrmFramebuffer> importBuffer(GraphicsBuffer *buffer);
|
||||
std::shared_ptr<DrmFramebuffer> importBuffer(GraphicsBuffer *buffer, FileDescriptor &&explicitFence);
|
||||
void releaseBuffers();
|
||||
void recreateSurfaces();
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ std::optional<OutputLayerBeginFrameInfo> DrmQPainterLayer::beginFrame()
|
|||
bool DrmQPainterLayer::endFrame(const QRegion &renderedRegion, const QRegion &damagedRegion)
|
||||
{
|
||||
m_renderTime = std::chrono::steady_clock::now() - m_renderStart;
|
||||
m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer());
|
||||
m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer(), FileDescriptor{});
|
||||
m_damageJournal.add(damagedRegion);
|
||||
m_swapchain->release(m_currentBuffer);
|
||||
if (!m_currentFramebuffer) {
|
||||
|
@ -64,7 +64,7 @@ bool DrmQPainterLayer::checkTestBuffer()
|
|||
m_swapchain = std::make_shared<QPainterSwapchain>(m_pipeline->gpu()->graphicsBufferAllocator(), m_pipeline->mode()->size(), DRM_FORMAT_XRGB8888);
|
||||
m_currentBuffer = m_swapchain->acquire();
|
||||
if (m_currentBuffer) {
|
||||
m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer());
|
||||
m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer(), FileDescriptor{});
|
||||
m_swapchain->release(m_currentBuffer);
|
||||
if (!m_currentFramebuffer) {
|
||||
qCWarning(KWIN_DRM, "Failed to create dumb framebuffer: %s", strerror(errno));
|
||||
|
@ -125,7 +125,7 @@ std::optional<OutputLayerBeginFrameInfo> DrmCursorQPainterLayer::beginFrame()
|
|||
bool DrmCursorQPainterLayer::endFrame(const QRegion &renderedRegion, const QRegion &damagedRegion)
|
||||
{
|
||||
m_renderTime = std::chrono::steady_clock::now() - m_renderStart;
|
||||
m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer());
|
||||
m_currentFramebuffer = m_pipeline->gpu()->importBuffer(m_currentBuffer->buffer(), FileDescriptor{});
|
||||
m_swapchain->release(m_currentBuffer);
|
||||
if (!m_currentFramebuffer) {
|
||||
qCWarning(KWIN_DRM, "Failed to create dumb framebuffer for the cursor: %s", strerror(errno));
|
||||
|
|
Loading…
Reference in a new issue