From 07db95c24d00d0d06d9dbdf678006a1636e857cd Mon Sep 17 00:00:00 2001 From: Topi Pohjolainen Date: Wed, 18 May 2016 07:30:00 +0300 Subject: [PATCH] i965/blorp: Fix the size requirement for vertex elements v2: Rebased as this is needed before flat inputs are enabled Signed-off-by: Topi Pohjolainen Reviewed-by: Jason Ekstrand Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_blorp.h | 3 +- src/mesa/drivers/dri/i965/gen7_blorp.c | 40 +++++++++++++++++--------- src/mesa/drivers/dri/i965/gen8_blorp.c | 2 +- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 3f77ca4a37f..ccfebc5395e 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -462,7 +462,8 @@ gen6_blorp_emit_sampler_state(struct brw_context *brw, unsigned tex_filter, unsigned max_lod, bool non_normalized_coords); void -gen7_blorp_emit_urb_config(struct brw_context *brw); +gen7_blorp_emit_urb_config(struct brw_context *brw, + const struct brw_blorp_params *params); void gen7_blorp_emit_blend_state_pointer(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c b/src/mesa/drivers/dri/i965/gen7_blorp.c index 42157754d96..7c2da8a39a9 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.c +++ b/src/mesa/drivers/dri/i965/gen7_blorp.c @@ -31,16 +31,26 @@ #include "brw_blorp.h" -static bool -gen7_blorp_skip_urb_config(const struct brw_context *brw) +/* Once vertex fetcher has written full VUE entries with complete + * header the space requirement is as follows per vertex (in bytes): + * + * Header Position Program constants + * +--------+------------+-------------------+ + * | 16 | 16 | n x 16 | + * +--------+------------+-------------------+ + * + * where 'n' stands for number of varying inputs expressed as vec4s. + * + * The URB size is in turn expressed in 64 bytes (512 bits). + */ +static unsigned +gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params) { - if (brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) - return false; + const unsigned num_varyings = + params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; + const unsigned total_needed = 16 + 16 + num_varyings * 16; - /* Vertex buffer takes 24 bytes. As the size is expressed in 64 bytes, - * one will suffice, otherwise the setup can be any valid configuration. - */ - return brw->urb.vsize > 0; + return DIV_ROUND_UP(total_needed, 64); } /* 3DSTATE_URB_VS @@ -56,7 +66,8 @@ gen7_blorp_skip_urb_config(const struct brw_context *brw) * valid. */ void -gen7_blorp_emit_urb_config(struct brw_context *brw) +gen7_blorp_emit_urb_config(struct brw_context *brw, + const struct brw_blorp_params *params) { /* URB allocations must be done in 8k chunks. */ const unsigned chunk_size_bytes = 8192; @@ -65,13 +76,14 @@ gen7_blorp_emit_urb_config(struct brw_context *brw) const unsigned push_constant_bytes = 1024 * urb_size; const unsigned push_constant_chunks = push_constant_bytes / chunk_size_bytes; - const unsigned vs_size = 1; + const unsigned vs_entry_size = gen7_blorp_get_vs_entry_size(params); const unsigned vs_start = push_constant_chunks; const unsigned min_vs_entries = ALIGN(brw->urb.min_vs_entries, 8); const unsigned vs_chunks = - DIV_ROUND_UP(min_vs_entries * vs_size * 64, chunk_size_bytes); + DIV_ROUND_UP(min_vs_entries * vs_entry_size * 64, chunk_size_bytes); - if (gen7_blorp_skip_urb_config(brw)) + if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) && + brw->urb.vsize >= vs_entry_size) return; brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; @@ -85,7 +97,7 @@ gen7_blorp_emit_urb_config(struct brw_context *brw) gen7_emit_urb_state(brw, min_vs_entries /* num_vs_entries */, - vs_size, + vs_entry_size, vs_start, 0 /* num_hs_entries */, 1 /* hs_size */, @@ -843,7 +855,7 @@ gen7_blorp_exec(struct brw_context *brw, params->dst.num_samples > 1 ? (1 << params->dst.num_samples) - 1 : 1); gen6_blorp_emit_vertices(brw, params); - gen7_blorp_emit_urb_config(brw); + gen7_blorp_emit_urb_config(brw, params); if (params->wm_prog_data) { cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params); cc_state_offset = gen6_blorp_emit_cc_state(brw); diff --git a/src/mesa/drivers/dri/i965/gen8_blorp.c b/src/mesa/drivers/dri/i965/gen8_blorp.c index 553c6373153..d8ea831f279 100644 --- a/src/mesa/drivers/dri/i965/gen8_blorp.c +++ b/src/mesa/drivers/dri/i965/gen8_blorp.c @@ -690,7 +690,7 @@ gen8_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params) gen7_blorp_emit_cc_viewport(brw); gen7_l3_state.emit(brw); - gen7_blorp_emit_urb_config(brw); + gen7_blorp_emit_urb_config(brw, params); const uint32_t cc_blend_state_offset = gen8_blorp_emit_blend_state(brw, params); -- 2.30.2