From: Kenneth Graunke Date: Wed, 27 Apr 2016 16:35:03 +0000 (-0700) Subject: i965: Send the minimal number of STATE_BASE_ADDRESS packets. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b6f250d7f2f704c8681aaa2a158d1a39851b8494;p=mesa.git i965: Send the minimal number of STATE_BASE_ADDRESS packets. STATE_BASE_ADDRESS stalls the whole pipeline, and the documentation cautions us to emit it as little as possible for better performance. We recently put some hacks in BLORP to try and avoid emitting it if it was already set correctly. However, this wasn't quite minimal: if BLORP is the first operation (i.e. glClear()), then it would emit it, and subsequent draw calls would emit it again. This caused a small drop in performance in GPUTest Triangle when switching from Meta to BLORP. Unlike most packets, STATE_BASE_ADDRESS isn't influenced by GL state: it needs to be emitted once per batch, before most other commands, or whenever we change the program cache BO. It's also valid in both the 3D and compute pipelines, which makes it even more unique. This patch removes it from the atom mechanism and instead directly calls it as part of every draw, compute dispatch, or BLORP operation. We introduce a new flag indicating that STATE_BASE_ADDRESS has already been emitted this batch, and if so, skip doing it again. When we make a new program cache BO, we simply reset the flag, so the next operation will emit it again. When we flush/reset the batch, we reset the flag. This guarantees that we'll emit STATE_BASE_ADDRESS only when we have to. It's also less code than the old atom mechanism. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index e8c7f0245be..9a4dd31c4bb 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -619,6 +619,7 @@ struct intel_batchbuffer { uint32_t state_batch_offset; enum brw_gpu_ring ring; bool needs_sol_reset; + bool state_base_address_emitted; struct { uint32_t *map_next; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index c72f607785a..5510d2c36f4 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -1066,6 +1066,9 @@ const struct brw_tracked_state brw_invariant_state = { void brw_upload_state_base_address(struct brw_context *brw) { + if (brw->batch.state_base_address_emitted) + return; + /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be * programmed prior to STATE_BASE_ADDRESS. @@ -1201,13 +1204,5 @@ brw_upload_state_base_address(struct brw_context *brw) */ brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS; + brw->batch.state_base_address_emitted = true; } - -const struct brw_tracked_state brw_state_base_address = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_PROGRAM_CACHE, - }, - .emit = brw_upload_state_base_address -}; diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 880f44e553b..70b17fd3f61 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -66,7 +66,6 @@ extern const struct brw_tracked_state brw_polygon_stipple; extern const struct brw_tracked_state brw_recalculate_urb_fence; extern const struct brw_tracked_state brw_sf_unit; extern const struct brw_tracked_state brw_sf_vp; -extern const struct brw_tracked_state brw_state_base_address; extern const struct brw_tracked_state brw_vs_samplers; extern const struct brw_tracked_state brw_tcs_samplers; extern const struct brw_tracked_state brw_tes_samplers; @@ -195,7 +194,6 @@ brw_depthbuffer_format(struct brw_context *brw); void brw_upload_state_base_address(struct brw_context *brw); - /* gen8_depth_state.c */ void gen8_write_pma_stall_bits(struct brw_context *brw, uint32_t pma_stall_bits); diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index c6aa1344270..0e98e654c1c 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -199,6 +199,7 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size) * that depend on it (state base address on gen5+, or unit state before). */ brw->ctx.NewDriverState |= BRW_NEW_PROGRAM_CACHE; + brw->batch.state_base_address_emitted = false; } /** diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index ed92a85f923..0b47ebe3a14 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -79,7 +79,6 @@ static const struct brw_tracked_state *gen4_atoms[] = /* Command packets: */ &brw_invariant_state, - &brw_state_base_address, &brw_binding_table_pointers, &brw_blend_constant_color, @@ -109,9 +108,6 @@ static const struct brw_tracked_state *gen6_atoms[] = /* Command packets: */ - /* must do before binding table pointers, cc state ptrs */ - &brw_state_base_address, - &brw_cc_vp, &gen6_viewport_state, /* must do after *_vp stages */ @@ -175,9 +171,6 @@ static const struct brw_tracked_state *gen7_render_atoms[] = { /* Command packets: */ - /* must do before binding table pointers, cc state ptrs */ - &brw_state_base_address, - &brw_cc_vp, &gen7_sf_clip_viewport, @@ -268,7 +261,6 @@ static const struct brw_tracked_state *gen7_render_atoms[] = static const struct brw_tracked_state *gen7_compute_atoms[] = { - &brw_state_base_address, &gen7_l3_state, &brw_cs_image_surfaces, &gen7_cs_push_constants, @@ -283,9 +275,6 @@ static const struct brw_tracked_state *gen7_compute_atoms[] = static const struct brw_tracked_state *gen8_render_atoms[] = { - /* Command packets: */ - &brw_state_base_address, - &brw_cc_vp, &gen8_sf_clip_viewport, @@ -383,7 +372,6 @@ static const struct brw_tracked_state *gen8_render_atoms[] = static const struct brw_tracked_state *gen8_compute_atoms[] = { - &brw_state_base_address, &gen7_l3_state, &brw_cs_image_surfaces, &gen7_cs_push_constants, @@ -847,6 +835,8 @@ brw_upload_pipeline_state(struct brw_context *brw, brw_upload_programs(brw, pipeline); merge_ctx_state(brw, &state); + brw_upload_state_base_address(brw); + const struct brw_tracked_state *atoms = brw_get_pipeline_atoms(brw, pipeline); const int num_atoms = brw->num_atoms[pipeline]; diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.c b/src/mesa/drivers/dri/i965/gen6_blorp.c index 9d72745d31e..5f84ab09e10 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.c +++ b/src/mesa/drivers/dri/i965/gen6_blorp.c @@ -996,8 +996,7 @@ gen6_blorp_exec(struct brw_context *brw, /* Emit workaround flushes when we switch from drawing to blorping. */ brw_emit_post_sync_nonzero_flush(brw); - if (brw_state_base_address.dirty.brw & brw->ctx.NewDriverState) - brw_upload_state_base_address(brw); + brw_upload_state_base_address(brw); gen6_emit_3dstate_multisample(brw, params->dst.num_samples); gen6_emit_3dstate_sample_mask(brw, diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c b/src/mesa/drivers/dri/i965/gen7_blorp.c index 494ed46dc70..3584864417a 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.c +++ b/src/mesa/drivers/dri/i965/gen7_blorp.c @@ -817,8 +817,7 @@ gen7_blorp_exec(struct brw_context *brw, uint32_t wm_push_const_offset = 0; uint32_t wm_bind_bo_offset = 0; - if (brw_state_base_address.dirty.brw & brw->ctx.NewDriverState) - brw_upload_state_base_address(brw); + brw_upload_state_base_address(brw); gen6_emit_3dstate_multisample(brw, params->dst.num_samples); gen6_emit_3dstate_sample_mask(brw, diff --git a/src/mesa/drivers/dri/i965/gen8_blorp.c b/src/mesa/drivers/dri/i965/gen8_blorp.c index 6a783a75df9..a9a400d9066 100644 --- a/src/mesa/drivers/dri/i965/gen8_blorp.c +++ b/src/mesa/drivers/dri/i965/gen8_blorp.c @@ -653,8 +653,7 @@ gen8_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params) { uint32_t wm_bind_bo_offset = 0; - if (brw_state_base_address.dirty.brw & brw->ctx.NewDriverState) - brw_upload_state_base_address(brw); + brw_upload_state_base_address(brw); gen7_blorp_emit_cc_viewport(brw); gen7_l3_state.emit(brw); diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index f50b2b473c9..f220311842a 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -73,6 +73,7 @@ intel_batchbuffer_reset(struct brw_context *brw) brw->batch.reserved_space = BATCH_RESERVED; brw->batch.state_batch_offset = brw->batch.bo->size; brw->batch.needs_sol_reset = false; + brw->batch.state_base_address_emitted = false; /* We don't know what ring the new batch will be sent to until we see the * first BEGIN_BATCH or BEGIN_BATCH_BLT. Mark it as unknown.