From 5d7e9edba1876523f75c74362242aaa56629fba5 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 2 Mar 2018 14:46:26 +0000 Subject: [PATCH] i965: enable INTEL_blackhole_render v2: condition the extension on context isolation support from the kernel (Chris) v3: (Lionel) The initial version of this change used a feature of the Gen7+ command parser to turn the primitive instructions into no-ops. Unfortunately this doesn't play well with how we're using the hardware outside of the user submitted commands. For example resolves are implicit operations which should not be turned into no-ops as part of the previously submitted commands (before blackhole_render is enabled) might not be disabled. For example this sequence : glClear(); glEnable(GL_BLACKHOLE_RENDER_INTEL); glDrawArrays(...); glReadPixels(...); glDisable(GL_BLACKHOLE_RENDER_INTEL); While clear has been emitted outside the blackhole render, it should still be resolved properly in the read pixels. Hence we need to be more selective and only disable user submitted commands. This v3 manually turns primitives into MI_NOOP if blackhole render is enabled. This lets us enable this feature on any platform. v4: Limit support to gen7.5+ (Lionel) v5: Enable Gen7.5 support again, requires a kernel update of the command parser (Lionel) v6: Disable Gen7.5 again... Kernel devs want these patches landed before they accept the kernel patches to whitelist INSTPM (Lionel) v7: Simplify change by never holding noop (there was a shortcoming in the test not considering fast clears) Only program register using MI_LRI (Lionel) v8: Switch to software managed blackhole (BDW hangs on compute batches...) v9: Simplify the noop state tracking (Lionel) v10: Don't modify flush function (Ken) Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke (v8) Part-of: --- src/mesa/drivers/dri/i965/brw_context.c | 26 +++++++++++++++++++ src/mesa/drivers/dri/i965/brw_context.h | 3 +++ src/mesa/drivers/dri/i965/intel_batchbuffer.c | 13 ++++++++++ src/mesa/drivers/dri/i965/intel_batchbuffer.h | 1 + src/mesa/drivers/dri/i965/intel_extensions.c | 6 +++++ 5 files changed, 49 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 92b27cc6864..21566893ec8 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -289,6 +289,31 @@ intel_glFlush(struct gl_context *ctx) brw->need_flush_throttle = true; } +static void +intel_glEnable(struct gl_context *ctx, GLenum cap, GLboolean state) +{ + struct brw_context *brw = brw_context(ctx); + + switch (cap) { + case GL_BLACKHOLE_RENDER_INTEL: + brw->frontend_noop = state; + intel_batchbuffer_flush(brw); + intel_batchbuffer_maybe_noop(brw); + /* Because we started previous batches with a potential + * MI_BATCH_BUFFER_END if NOOP was enabled, that means that anything + * that was ever emitted after that never made it to the HW. So when the + * blackhole state changes from NOOP->!NOOP reupload the entire state. + */ + if (!brw->frontend_noop) { + brw->NewGLState = ~0u; + brw->ctx.NewDriverState = ~0ull; + } + break; + default: + break; + } +} + static void intel_finish(struct gl_context * ctx) { @@ -318,6 +343,7 @@ brw_init_driver_functions(struct brw_context *brw, if (!brw->driContext->driScreenPriv->dri2.useInvalidate) functions->Viewport = intel_viewport; + functions->Enable = intel_glEnable; functions->Flush = intel_glFlush; functions->Finish = intel_finish; functions->GetString = intel_get_string; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index f44a392d79c..754d651d190 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -853,6 +853,9 @@ struct brw_context /* The last PMA stall bits programmed. */ uint32_t pma_stall_bits; + /* Whether INTEL_black_render is active. */ + bool frontend_noop; + struct { struct { /** diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index d5676e9cb9f..f1465ed3556 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -573,6 +573,8 @@ brw_new_batch(struct brw_context *brw) */ if (INTEL_DEBUG & DEBUG_SHADER_TIME) brw_collect_and_report_shader_time(brw); + + intel_batchbuffer_maybe_noop(brw); } /** @@ -891,6 +893,17 @@ _intel_batchbuffer_flush_fence(struct brw_context *brw, return ret; } +void +intel_batchbuffer_maybe_noop(struct brw_context *brw) +{ + if (!brw->frontend_noop || USED_BATCH(brw->batch) != 0) + return; + + BEGIN_BATCH(1); + OUT_BATCH(MI_BATCH_BUFFER_END); + ADVANCE_BATCH(); +} + bool brw_batch_references(struct intel_batchbuffer *batch, struct brw_bo *bo) { diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 91720dad5b4..749fb04c88f 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -30,6 +30,7 @@ void intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz); int _intel_batchbuffer_flush_fence(struct brw_context *brw, int in_fence_fd, int *out_fence_fd, const char *file, int line); +void intel_batchbuffer_maybe_noop(struct brw_context *brw); #define intel_batchbuffer_flush(brw) \ _intel_batchbuffer_flush_fence((brw), -1, NULL, __FILE__, __LINE__) diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 01dacbec8d7..9d511e0765c 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -315,6 +315,12 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.OES_copy_image = true; } + /* Gen < 6 still uses the blitter. It's somewhat annoying to add support + * for blackhole there... Does anybody actually care anymore anyway? + */ + if (devinfo->gen >= 6) + ctx->Extensions.INTEL_blackhole_render = true; + if (devinfo->gen >= 8) { ctx->Extensions.ARB_gpu_shader_int64 = true; /* requires ARB_gpu_shader_int64 */ -- 2.30.2