From f8328c998b4d68c62ba939165390c2c22c5b5740 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Mon, 19 Dec 2011 12:59:04 -0800 Subject: [PATCH] i965 gen6: Ensure correct transform feedback indices on new batch. We don't currently have kernel support for saving GPU registers on a context switch, so if multiple processes are performing transform feedback at the same time, their SVBI registers will interfere with each other. To avoid this situation, we keep a software shadow of the state of the SVBI 0 register (which is the only register we use), and re-upload it on every new batch. The function that updates the shadow state of SVBI 0 is called brw_update_primitive_count, since it will also be used to update the counters for the PRIMITIVES_GENERATED and TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN queries. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 7 ++++ src/mesa/drivers/dri/i965/brw_draw.c | 33 +++++++++++++++++ src/mesa/drivers/dri/i965/brw_state.h | 1 + src/mesa/drivers/dri/i965/brw_state_upload.c | 1 + src/mesa/drivers/dri/i965/gen6_sol.c | 38 +++++++++++++++----- 5 files changed, 72 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index e8ebea80d17..8840a83d497 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -145,6 +145,7 @@ enum brw_state_id { BRW_STATE_PROGRAM_CACHE, BRW_STATE_STATE_BASE_ADDRESS, BRW_STATE_HIZ, + BRW_STATE_SOL_INDICES, }; #define BRW_NEW_URB_FENCE (1 << BRW_STATE_URB_FENCE) @@ -174,6 +175,7 @@ enum brw_state_id { #define BRW_NEW_PROGRAM_CACHE (1 << BRW_STATE_PROGRAM_CACHE) #define BRW_NEW_STATE_BASE_ADDRESS (1 << BRW_STATE_STATE_BASE_ADDRESS) #define BRW_NEW_HIZ (1 << BRW_STATE_HIZ) +#define BRW_NEW_SOL_INDICES (1 << BRW_STATE_SOL_INDICES) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -983,6 +985,11 @@ struct brw_context struct gl_renderbuffer *depth_rb; } hiz; + struct brw_sol_state { + uint32_t svbi_0_starting_index; + uint32_t svbi_0_max_index; + } sol; + uint32_t render_target_format[MESA_FORMAT_COUNT]; bool format_supported_as_render_target[MESA_FORMAT_COUNT]; }; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 6627a484a42..774a5ca46ee 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -33,10 +33,12 @@ #include "main/samplerobj.h" #include "main/state.h" #include "main/enums.h" +#include "main/macros.h" #include "tnl/tnl.h" #include "vbo/vbo_context.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" +#include "drivers/common/meta.h" #include "brw_draw.h" #include "brw_defines.h" @@ -377,6 +379,34 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) } } +/** + * Update internal counters based on the the drawing operation described in + * prim. + */ +static void +brw_update_primitive_count(struct brw_context *brw, + const struct _mesa_prim *prim) +{ + uint32_t count = count_tessellated_primitives(prim); + if (brw->intel.ctx.TransformFeedback.CurrentObject->Active) { + /* Update brw->sol.svbi_0_max_index to reflect the amount by which the + * hardware is going to increment SVBI 0 when this drawing operation + * occurs. This is necessary because the kernel does not (yet) save and + * restore GPU registers when context switching, so we'll need to be + * able to reload SVBI 0 with the correct value in case we have to start + * a new batch buffer. + */ + unsigned svbi_postincrement_value = + brw->gs.prog_data->svbi_postincrement_value; + uint32_t space_avail = + (brw->sol.svbi_0_max_index - brw->sol.svbi_0_starting_index) + / svbi_postincrement_value; + uint32_t primitives_written = MIN2 (space_avail, count); + brw->sol.svbi_0_starting_index += + svbi_postincrement_value * primitives_written; + } +} + /* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ @@ -498,6 +528,9 @@ retry: } } } + + if (!_mesa_meta_in_progress(ctx)) + brw_update_primitive_count(brw, &prim[i]); } if (intel->always_flush_batch) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index a3a470fee6b..d2715697c72 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -92,6 +92,7 @@ extern const struct brw_tracked_state gen6_gs_state; extern const struct brw_tracked_state gen6_renderbuffer_surfaces; extern const struct brw_tracked_state gen6_sampler_state; extern const struct brw_tracked_state gen6_scissor_state; +extern const struct brw_tracked_state gen6_sol_indices; extern const struct brw_tracked_state gen6_sol_surface; extern const struct brw_tracked_state gen6_sf_state; extern const struct brw_tracked_state gen6_sf_vp; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 463689224df..74d01d8b5f2 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -171,6 +171,7 @@ static const struct brw_tracked_state *gen6_atoms[] = &brw_drawing_rect, + &gen6_sol_indices, &brw_indices, &brw_index_buffer, &brw_vertices, diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c index 2f2051bc613..5d11481cb03 100644 --- a/src/mesa/drivers/dri/i965/gen6_sol.c +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -72,11 +72,34 @@ const struct brw_tracked_state gen6_sol_surface = { .emit = gen6_update_sol_surfaces, }; +static void +gen6_update_sol_indices(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); + OUT_BATCH(brw->sol.svbi_0_starting_index); /* BRW_NEW_SOL_INDICES */ + OUT_BATCH(0); + OUT_BATCH(brw->sol.svbi_0_max_index); /* BRW_NEW_SOL_INDICES */ + ADVANCE_BATCH(); +} + +const struct brw_tracked_state gen6_sol_indices = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_BATCH | + BRW_NEW_SOL_INDICES), + .cache = 0 + }, + .emit = gen6_update_sol_indices, +}; + void brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, struct gl_transform_feedback_object *obj) { - struct intel_context *intel = intel_context(ctx); + struct brw_context *brw = brw_context(ctx); const struct gl_shader_program *vs_prog = ctx->Shader.CurrentVertexProgram; const struct gl_transform_feedback_info *linked_xfb_info = @@ -100,13 +123,12 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, max_index = MIN2(max_index, max_for_this_buffer); } - /* Initialize the SVBI 0 register to zero and set the maximum index. */ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); - OUT_BATCH(0); /* SVBI 0 */ - OUT_BATCH(0); - OUT_BATCH(max_index); - ADVANCE_BATCH(); + /* Initialize the SVBI 0 register to zero and set the maximum index. + * These values will be sent to the hardware on the next draw. + */ + brw->state.dirty.brw |= BRW_NEW_SOL_INDICES; + brw->sol.svbi_0_starting_index = 0; + brw->sol.svbi_0_max_index = max_index; } void -- 2.30.2