From a347a5a12c2ed98c5959ab2da9ec4c0fcd365aeb Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 11 May 2018 18:16:48 -0700 Subject: [PATCH] i965: Remove ring switching entirely Reviewed-by: Topi Pohjolainen Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_compute.c | 2 +- src/mesa/drivers/dri/i965/brw_context.h | 7 -- src/mesa/drivers/dri/i965/brw_draw.c | 2 +- src/mesa/drivers/dri/i965/brw_misc_state.c | 2 +- src/mesa/drivers/dri/i965/brw_pipe_control.c | 32 ++----- src/mesa/drivers/dri/i965/brw_urb.c | 2 +- src/mesa/drivers/dri/i965/genX_blorp_exec.c | 4 +- src/mesa/drivers/dri/i965/genX_state_upload.c | 2 +- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 92 +++++++------------ src/mesa/drivers/dri/i965/intel_batchbuffer.h | 15 ++- src/mesa/drivers/dri/i965/intel_blit.c | 6 +- 11 files changed, 61 insertions(+), 105 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index 5ce899bcbcc..de08fc3ac16 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -182,7 +182,7 @@ brw_dispatch_compute_common(struct gl_context *ctx) /* Flush the batch if the batch/state buffers are nearly full. We can * grow them if needed, but this is not free, so we'd like to avoid it. */ - intel_batchbuffer_require_space(brw, 600, RENDER_RING); + intel_batchbuffer_require_space(brw, 600); brw_require_statebuffer_space(brw, 2500); intel_batchbuffer_save_state(brw); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 773f104824d..2613b9fda22 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -461,12 +461,6 @@ struct brw_query_object { bool flushed; }; -enum brw_gpu_ring { - UNKNOWN_RING, - RENDER_RING, - BLT_RING, -}; - struct brw_reloc_list { struct drm_i915_gem_relocation_entry *relocs; int reloc_count; @@ -497,7 +491,6 @@ struct intel_batchbuffer { uint32_t *map_next; uint32_t state_used; - enum brw_gpu_ring ring; bool use_shadow_copy; bool use_batch_first; bool needs_sol_reset; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index ae3b7be2ddd..18aa12feaef 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -798,7 +798,7 @@ brw_draw_single_prim(struct gl_context *ctx, /* Flush the batch if the batch/state buffers are nearly full. We can * grow them if needed, but this is not free, so we'd like to avoid it. */ - intel_batchbuffer_require_space(brw, 1500, RENDER_RING); + intel_batchbuffer_require_space(brw, 1500); brw_require_statebuffer_space(brw, 2400); intel_batchbuffer_save_state(brw); diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 6d7ab92cf61..9a663b1d61c 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -348,7 +348,7 @@ brw_emit_depthbuffer(struct brw_context *brw) brw_emit_depth_stall_flushes(brw); const unsigned ds_dwords = brw->isl_dev.ds.size / 4; - intel_batchbuffer_begin(brw, ds_dwords, RENDER_RING); + intel_batchbuffer_begin(brw, ds_dwords); uint32_t *ds_map = brw->batch.map_next; const uint32_t ds_offset = (char *)ds_map - (char *)brw->batch.batch.map; diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c index cbd2853f58c..122ac260703 100644 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c @@ -544,29 +544,17 @@ brw_emit_mi_flush(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; - if (brw->batch.ring == BLT_RING && devinfo->gen >= 6) { - const unsigned n_dwords = devinfo->gen >= 8 ? 5 : 4; - BEGIN_BATCH_BLT(n_dwords); - OUT_BATCH(MI_FLUSH_DW | (n_dwords - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - if (n_dwords == 5) - OUT_BATCH(0); - ADVANCE_BATCH(); - } else { - int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH; - if (devinfo->gen >= 6) { - flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE | - PIPE_CONTROL_CONST_CACHE_INVALIDATE | - PIPE_CONTROL_DATA_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_VF_CACHE_INVALIDATE | - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - PIPE_CONTROL_CS_STALL; - } - brw_emit_pipe_control_flush(brw, flags); + int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH; + if (devinfo->gen >= 6) { + flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE | + PIPE_CONTROL_CONST_CACHE_INVALIDATE | + PIPE_CONTROL_DATA_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_VF_CACHE_INVALIDATE | + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | + PIPE_CONTROL_CS_STALL; } + brw_emit_pipe_control_flush(brw, flags); } int diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index a86fa78acaf..d34240ee1b7 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -264,5 +264,5 @@ void brw_upload_urb_fence(struct brw_context *brw) while (--pad); } - intel_batchbuffer_data(brw, &uf, sizeof(uf), RENDER_RING); + intel_batchbuffer_data(brw, &uf, sizeof(uf)); } diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index 581438966e5..808bff0db85 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -44,7 +44,7 @@ blorp_emit_dwords(struct blorp_batch *batch, unsigned n) assert(batch->blorp->driver_ctx == batch->driver_batch); struct brw_context *brw = batch->driver_batch; - intel_batchbuffer_begin(brw, n, RENDER_RING); + intel_batchbuffer_begin(brw, n); uint32_t *map = brw->batch.map_next; brw->batch.map_next += n; intel_batchbuffer_advance(brw); @@ -277,7 +277,7 @@ genX(blorp_exec)(struct blorp_batch *batch, brw_select_pipeline(brw, BRW_RENDER_PIPELINE); retry: - intel_batchbuffer_require_space(brw, 1400, RENDER_RING); + intel_batchbuffer_require_space(brw, 1400); brw_require_statebuffer_space(brw, 600); intel_batchbuffer_save_state(brw); brw->batch.no_wrap = true; diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 4f44b9965e6..b485e2cf811 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -59,7 +59,7 @@ UNUSED static void * emit_dwords(struct brw_context *brw, unsigned n) { - intel_batchbuffer_begin(brw, n, RENDER_RING); + intel_batchbuffer_begin(brw, n); uint32_t *map = brw->batch.map_next; brw->batch.map_next += n; intel_batchbuffer_advance(brw); diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 4f78d8d0508..8f47e613df8 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -274,11 +274,6 @@ intel_batchbuffer_reset(struct brw_context *brw) batch->needs_sol_reset = false; batch->state_base_address_emitted = false; - /* We don't know what ring the new batch will be sent to until we see the - * first BEGIN_BATCH or BEGIN_BATCH_BLT. Mark it as unknown. - */ - batch->ring = UNKNOWN_RING; - if (batch->state_batch_sizes) _mesa_hash_table_clear(batch->state_batch_sizes, NULL); } @@ -311,8 +306,6 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw) brw->batch.exec_count = brw->batch.saved.exec_count; brw->batch.map_next = brw->batch.saved.map_next; - if (USED_BATCH(brw->batch) == 0) - brw->batch.ring = UNKNOWN_RING; } void @@ -507,18 +500,10 @@ grow_buffer(struct brw_context *brw, } void -intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, - enum brw_gpu_ring ring) +intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz) { - const struct gen_device_info *devinfo = &brw->screen->devinfo; struct intel_batchbuffer *batch = &brw->batch; - /* If we're switching rings, implicitly flush the batch. */ - if (unlikely(ring != brw->batch.ring) && brw->batch.ring != UNKNOWN_RING && - devinfo->gen >= 6) { - intel_batchbuffer_flush(brw); - } - const unsigned batch_used = USED_BATCH(*batch) * 4; if (batch_used + sz >= BATCH_SZ && !batch->no_wrap) { intel_batchbuffer_flush(brw); @@ -530,11 +515,6 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, batch->map_next = (void *) batch->batch.map + batch_used; assert(batch_used + sz < batch->batch.bo->size); } - - /* The intel_batchbuffer_flush() calls above might have changed - * brw->batch.ring to UNKNOWN_RING, so we need to set it here at the end. - */ - brw->batch.ring = ring; } /** @@ -601,46 +581,44 @@ brw_finish_batch(struct brw_context *brw) */ brw_emit_query_end(brw); - if (brw->batch.ring == RENDER_RING) { - /* Work around L3 state leaks into contexts set MI_RESTORE_INHIBIT which - * assume that the L3 cache is configured according to the hardware - * defaults. On Kernel 4.16+, we no longer need to do this. - */ - if (devinfo->gen >= 7 && - !(brw->screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION)) - gen7_restore_default_l3_config(brw); - - if (devinfo->is_haswell) { - /* From the Haswell PRM, Volume 2b, Command Reference: Instructions, - * 3DSTATE_CC_STATE_POINTERS > "Note": - * - * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every - * 3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall." - * - * From the example in the docs, it seems to expect a regular pipe control - * flush here as well. We may have done it already, but meh. - * - * See also WaAvoidRCZCounterRollover. - */ - brw_emit_mi_flush(brw); - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); - OUT_BATCH(brw->cc.state_offset | 1); - ADVANCE_BATCH(); - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_CS_STALL); - } + /* Work around L3 state leaks into contexts set MI_RESTORE_INHIBIT which + * assume that the L3 cache is configured according to the hardware + * defaults. On Kernel 4.16+, we no longer need to do this. + */ + if (devinfo->gen >= 7 && + !(brw->screen->kernel_features & KERNEL_ALLOWS_CONTEXT_ISOLATION)) + gen7_restore_default_l3_config(brw); - /* Do not restore push constant packets during context restore. */ - if (devinfo->gen >= 7) - gen10_emit_isp_disable(brw); + if (devinfo->is_haswell) { + /* From the Haswell PRM, Volume 2b, Command Reference: Instructions, + * 3DSTATE_CC_STATE_POINTERS > "Note": + * + * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every + * 3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall." + * + * From the example in the docs, it seems to expect a regular pipe control + * flush here as well. We may have done it already, but meh. + * + * See also WaAvoidRCZCounterRollover. + */ + brw_emit_mi_flush(brw); + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(brw->cc.state_offset | 1); + ADVANCE_BATCH(); + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_CS_STALL); } + /* Do not restore push constant packets during context restore. */ + if (devinfo->gen >= 7) + gen10_emit_isp_disable(brw); + /* Emit MI_BATCH_BUFFER_END to finish our batch. Note that execbuf2 * requires our batch size to be QWord aligned, so we pad it out if * necessary by emitting an extra MI_NOOP after the end. */ - intel_batchbuffer_require_space(brw, 8, brw->batch.ring); + intel_batchbuffer_require_space(brw, 8); *brw->batch.map_next++ = MI_BATCH_BUFFER_END; if (USED_BATCH(brw->batch) & 1) { *brw->batch.map_next++ = MI_NOOP; @@ -747,7 +725,6 @@ execbuffer(int fd, static int submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd) { - const struct gen_device_info *devinfo = &brw->screen->devinfo; __DRIscreen *dri_screen = brw->screen->driScrnPriv; struct intel_batchbuffer *batch = &brw->batch; int ret = 0; @@ -776,7 +753,6 @@ submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd) * To avoid stalling, execobject.offset should match the current * address of that object within the active context. */ - assert(devinfo->gen < 6 || batch->ring == RENDER_RING); int flags = I915_EXEC_NO_RELOC | I915_EXEC_RENDER; if (batch->needs_sol_reset) @@ -1045,10 +1021,10 @@ brw_state_batch(struct brw_context *brw, void intel_batchbuffer_data(struct brw_context *brw, - const void *data, GLuint bytes, enum brw_gpu_ring ring) + const void *data, GLuint bytes) { assert((bytes & 3) == 0); - intel_batchbuffer_require_space(brw, bytes, ring); + intel_batchbuffer_require_space(brw, bytes); memcpy(brw->batch.map_next, data, bytes); brw->batch.map_next += bytes >> 2; } diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 7be5b10f3ab..bd07bef9deb 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -25,8 +25,7 @@ void intel_batchbuffer_init(struct brw_context *brw); void intel_batchbuffer_free(struct intel_batchbuffer *batch); void intel_batchbuffer_save_state(struct brw_context *brw); void intel_batchbuffer_reset_to_saved(struct brw_context *brw); -void intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, - enum brw_gpu_ring ring); +void intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz); int _intel_batchbuffer_flush_fence(struct brw_context *brw, int in_fence_fd, int *out_fence_fd, const char *file, int line); @@ -43,8 +42,7 @@ int _intel_batchbuffer_flush_fence(struct brw_context *brw, * intel_buffer_dword() calls. */ void intel_batchbuffer_data(struct brw_context *brw, - const void *data, GLuint bytes, - enum brw_gpu_ring ring); + const void *data, GLuint bytes); bool brw_batch_has_aperture_space(struct brw_context *brw, unsigned extra_space_in_bytes); @@ -81,9 +79,9 @@ static inline uint32_t float_as_int(float f) } static inline void -intel_batchbuffer_begin(struct brw_context *brw, int n, enum brw_gpu_ring ring) +intel_batchbuffer_begin(struct brw_context *brw, int n) { - intel_batchbuffer_require_space(brw, n * 4, ring); + intel_batchbuffer_require_space(brw, n * 4); #ifdef DEBUG brw->batch.emit = USED_BATCH(brw->batch); @@ -117,12 +115,13 @@ brw_ptr_in_state_buffer(struct intel_batchbuffer *batch, void *p) } #define BEGIN_BATCH(n) do { \ - intel_batchbuffer_begin(brw, (n), RENDER_RING); \ + intel_batchbuffer_begin(brw, (n)); \ uint32_t *__map = brw->batch.map_next; \ brw->batch.map_next += (n) #define BEGIN_BATCH_BLT(n) do { \ - intel_batchbuffer_begin(brw, (n), BLT_RING); \ + assert(brw->screen->devinfo.gen < 6); \ + intel_batchbuffer_begin(brw, (n)); \ uint32_t *__map = brw->batch.map_next; \ brw->batch.map_next += (n) diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 5ef78584cab..90784c5b195 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -288,7 +288,7 @@ emit_copy_blit(struct brw_context *brw, unsigned length = devinfo->gen >= 8 ? 10 : 8; - intel_batchbuffer_require_space(brw, length * 4, BLT_RING); + intel_batchbuffer_require_space(brw, length * 4); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __func__, src_buffer, src_pitch, src_offset, src_x, src_y, @@ -661,7 +661,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, unsigned xy_setup_blt_length = devinfo->gen >= 8 ? 10 : 8; intel_batchbuffer_require_space(brw, (xy_setup_blt_length * 4) + - (3 * 4) + dwords * 4, BLT_RING); + (3 * 4) + dwords * 4); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) @@ -699,7 +699,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, OUT_BATCH(SET_FIELD(y + h, BLT_Y) | SET_FIELD(x + w, BLT_X)); ADVANCE_BATCH(); - intel_batchbuffer_data(brw, src_bits, dwords * 4, BLT_RING); + intel_batchbuffer_data(brw, src_bits, dwords * 4); brw_emit_mi_flush(brw); -- 2.30.2