From f92fbd554f2e9e702a2bd650c9b2571a3f4f1ab8 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 2 Sep 2014 11:38:29 -0700 Subject: [PATCH] i965: Move curb_read_length/total_scratch to brw_stage_prog_data. All shader stages have these fields, so it makes sense to store them in the common base structure, rather than duplicating them in each. Signed-off-by: Kenneth Graunke Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_context.h | 7 +++---- src/mesa/drivers/dri/i965/brw_fs.cpp | 6 +++--- src/mesa/drivers/dri/i965/brw_vec4.cpp | 3 ++- src/mesa/drivers/dri/i965/brw_vec4_gs.c | 5 +++-- src/mesa/drivers/dri/i965/brw_vs.c | 5 +++-- src/mesa/drivers/dri/i965/brw_vs_state.c | 8 ++++---- src/mesa/drivers/dri/i965/brw_wm.c | 4 ++-- src/mesa/drivers/dri/i965/brw_wm_state.c | 8 ++++---- src/mesa/drivers/dri/i965/gen6_vs_state.c | 4 ++-- src/mesa/drivers/dri/i965/gen6_wm_state.c | 4 ++-- src/mesa/drivers/dri/i965/gen7_gs_state.c | 4 ++-- src/mesa/drivers/dri/i965/gen7_vs_state.c | 4 ++-- src/mesa/drivers/dri/i965/gen7_wm_state.c | 4 ++-- src/mesa/drivers/dri/i965/gen8_gs_state.c | 4 ++-- src/mesa/drivers/dri/i965/gen8_ps_state.c | 4 ++-- src/mesa/drivers/dri/i965/gen8_vs_state.c | 4 ++-- 16 files changed, 40 insertions(+), 38 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index ef68c53fd34..d1ec2ea447e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -359,6 +359,9 @@ struct brw_stage_prog_data { GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; + unsigned curb_read_length; + unsigned total_scratch; + /** * Register where the thread expects to find input data from the URB * (typically uniforms, followed by vertex or fragment attributes). @@ -386,13 +389,11 @@ struct brw_stage_prog_data { struct brw_wm_prog_data { struct brw_stage_prog_data base; - GLuint curb_read_length; GLuint num_varying_inputs; GLuint dispatch_grf_start_reg_16; GLuint reg_blocks; GLuint reg_blocks_16; - GLuint total_scratch; struct { /** @{ @@ -592,10 +593,8 @@ struct brw_vec4_prog_data { struct brw_stage_prog_data base; struct brw_vue_map vue_map; - GLuint curb_read_length; GLuint urb_read_length; GLuint total_grf; - GLuint total_scratch; /* Used for calculating urb partitions. In the VS, this is the size of the * URB entry used for both input and output to the thread. In the GS, this diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index ac6b1060ecf..5f98287e965 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1463,7 +1463,7 @@ fs_visitor::assign_curb_setup() prog_data->dispatch_grf_start_reg_16 = payload.num_regs; } - prog_data->curb_read_length = ALIGN(stage_prog_data->nr_params, 8) / 8; + prog_data->base.curb_read_length = ALIGN(stage_prog_data->nr_params, 8) / 8; /* Map the offsets in the UNIFORM file to fixed HW regs. */ foreach_in_list(fs_inst, inst, &instructions) { @@ -1583,7 +1583,7 @@ fs_visitor::calculate_urb_setup() void fs_visitor::assign_urb_setup() { - int urb_start = payload.num_regs + prog_data->curb_read_length; + int urb_start = payload.num_regs + prog_data->base.curb_read_length; /* Offset all the urb_setup[] index by the actual position of the * setup regs, now that the location of the constants has been chosen. @@ -3345,7 +3345,7 @@ fs_visitor::run() schedule_instructions(SCHEDULE_POST); if (last_scratch > 0) { - prog_data->total_scratch = brw_get_scratch_size(last_scratch); + prog_data->base.total_scratch = brw_get_scratch_size(last_scratch); } if (brw->use_rep_send) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index b247bf7532b..5d8f711a100 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1606,7 +1606,8 @@ vec4_visitor::setup_uniforms(int reg) stage_prog_data->nr_params = this->uniforms * 4; - prog_data->curb_read_length = reg - prog_data->base.dispatch_grf_start_reg; + prog_data->base.curb_read_length = + reg - prog_data->base.dispatch_grf_start_reg; return reg; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c b/src/mesa/drivers/dri/i965/brw_vec4_gs.c index d99502de613..210e20ea2ec 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c @@ -242,11 +242,12 @@ do_gs_prog(struct brw_context *brw, "Try reducing the number of live vec4 values to " "improve performance.\n"); - c.prog_data.base.total_scratch + c.prog_data.base.base.total_scratch = brw_get_scratch_size(c.base.last_scratch*REG_SIZE); brw_get_scratch_bo(brw, &stage_state->scratch_bo, - c.prog_data.base.total_scratch * brw->max_gs_threads); + c.prog_data.base.base.total_scratch * + brw->max_gs_threads); } brw_upload_cache(&brw->cache, BRW_GS_PROG, diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 8e8b04493b9..4730635fe47 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -304,11 +304,12 @@ do_vs_prog(struct brw_context *brw, "Try reducing the number of live vec4 values to " "improve performance.\n"); - prog_data.base.total_scratch + prog_data.base.base.total_scratch = brw_get_scratch_size(c.base.last_scratch*REG_SIZE); brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo, - prog_data.base.total_scratch * brw->max_vs_threads); + prog_data.base.base.total_scratch * + brw->max_vs_threads); } brw_upload_cache(&brw->cache, BRW_VS_PROG, diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index e05d3f9af36..53ac335752b 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -82,11 +82,11 @@ brw_upload_vs_unit(struct brw_context *brw) vs->thread1.binding_table_entry_count = brw->vs.prog_data->base.base.binding_table.size_bytes / 4; - if (brw->vs.prog_data->base.total_scratch != 0) { + if (brw->vs.prog_data->base.base.total_scratch != 0) { vs->thread2.scratch_space_base_pointer = stage_state->scratch_bo->offset64 >> 10; /* reloc */ vs->thread2.per_thread_scratch_space = - ffs(brw->vs.prog_data->base.total_scratch) - 11; + ffs(brw->vs.prog_data->base.base.total_scratch) - 11; } else { vs->thread2.scratch_space_base_pointer = 0; vs->thread2.per_thread_scratch_space = 0; @@ -94,7 +94,7 @@ brw_upload_vs_unit(struct brw_context *brw) vs->thread3.urb_entry_read_length = brw->vs.prog_data->base.urb_read_length; vs->thread3.const_urb_entry_read_length - = brw->vs.prog_data->base.curb_read_length; + = brw->vs.prog_data->base.base.curb_read_length; vs->thread3.dispatch_grf_start_reg = brw->vs.prog_data->base.base.dispatch_grf_start_reg; vs->thread3.urb_entry_read_offset = 0; @@ -172,7 +172,7 @@ brw_upload_vs_unit(struct brw_context *brw) } /* Emit scratch space relocation */ - if (brw->vs.prog_data->base.total_scratch != 0) { + if (brw->vs.prog_data->base.base.total_scratch != 0) { drm_intel_bo_emit_reloc(brw->batch.bo, stage_state->state_offset + offsetof(struct brw_vs_unit_state, thread2), diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 2e3cd4bb2cc..6834534c54a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -187,9 +187,9 @@ bool do_wm_prog(struct brw_context *brw, return false; } - if (prog_data.total_scratch) { + if (prog_data.base.total_scratch) { brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo, - prog_data.total_scratch * brw->max_wm_threads); + prog_data.base.total_scratch * brw->max_wm_threads); } if (unlikely(INTEL_DEBUG & DEBUG_WM)) diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 3fd88218324..58f2cf86062 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -124,11 +124,11 @@ brw_upload_wm_unit(struct brw_context *brw) wm->thread1.binding_table_entry_count = brw->wm.prog_data->base.binding_table.size_bytes / 4; - if (brw->wm.prog_data->total_scratch != 0) { + if (brw->wm.prog_data->base.total_scratch != 0) { wm->thread2.scratch_space_base_pointer = brw->wm.base.scratch_bo->offset64 >> 10; /* reloc */ wm->thread2.per_thread_scratch_space = - ffs(brw->wm.prog_data->total_scratch) - 11; + ffs(brw->wm.prog_data->base.total_scratch) - 11; } else { wm->thread2.scratch_space_base_pointer = 0; wm->thread2.per_thread_scratch_space = 0; @@ -140,7 +140,7 @@ brw_upload_wm_unit(struct brw_context *brw) brw->wm.prog_data->num_varying_inputs * 2; wm->thread3.urb_entry_read_offset = 0; wm->thread3.const_urb_entry_read_length = - brw->wm.prog_data->curb_read_length; + brw->wm.prog_data->base.curb_read_length; /* BRW_NEW_CURBE_OFFSETS */ wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; @@ -219,7 +219,7 @@ brw_upload_wm_unit(struct brw_context *brw) wm->wm4.stats_enable = 1; /* Emit scratch space relocation */ - if (brw->wm.prog_data->total_scratch != 0) { + if (brw->wm.prog_data->base.total_scratch != 0) { drm_intel_bo_emit_reloc(brw->batch.bo, brw->wm.base.state_offset + offsetof(struct brw_wm_unit_state, thread2), diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 77f566cd356..2427407c18e 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -215,10 +215,10 @@ upload_vs_state(struct brw_context *brw) ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - if (brw->vs.prog_data->base.total_scratch) { + if (brw->vs.prog_data->base.base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->vs.prog_data->base.total_scratch) - 11); + ffs(brw->vs.prog_data->base.base.total_scratch) - 11); } else { OUT_BATCH(0); } diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index de95db8b8ab..930f9ae4670 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -284,10 +284,10 @@ upload_wm_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); OUT_BATCH(ksp0); OUT_BATCH(dw2); - if (brw->wm.prog_data->total_scratch) { + if (brw->wm.prog_data->base.total_scratch) { OUT_RELOC(brw->wm.base.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->wm.prog_data->total_scratch) - 11); + ffs(brw->wm.prog_data->base.total_scratch) - 11); } else { OUT_BATCH(0); } diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index 93f48f6557b..6b0fb97b64c 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -94,10 +94,10 @@ upload_gs_state(struct brw_context *brw) ((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - if (brw->gs.prog_data->base.total_scratch) { + if (brw->gs.prog_data->base.base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->gs.prog_data->base.total_scratch) - 11); + ffs(brw->gs.prog_data->base.base.total_scratch) - 11); } else { OUT_BATCH(0); } diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index fd8a78f1872..7534de28c2e 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -90,10 +90,10 @@ upload_vs_state(struct brw_context *brw) ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - if (brw->vs.prog_data->base.total_scratch) { + if (brw->vs.prog_data->base.base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->vs.prog_data->base.total_scratch) - 11); + ffs(brw->vs.prog_data->base.base.total_scratch) - 11); } else { OUT_BATCH(0); } diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 278cf17964c..809a2499b60 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -252,10 +252,10 @@ upload_ps_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); OUT_BATCH(ksp0); OUT_BATCH(dw2); - if (brw->wm.prog_data->total_scratch) { + if (brw->wm.prog_data->base.total_scratch) { OUT_RELOC(brw->wm.base.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->wm.prog_data->total_scratch) - 11); + ffs(brw->wm.prog_data->base.total_scratch) - 11); } else { OUT_BATCH(0); } diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 446edec2179..5cb5be970bd 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -55,10 +55,10 @@ gen8_upload_gs_state(struct brw_context *brw) ((prog_data->base.binding_table.size_bytes / 4) << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - if (brw->gs.prog_data->base.total_scratch) { + if (brw->gs.prog_data->base.base.total_scratch) { OUT_RELOC64(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->gs.prog_data->base.total_scratch) - 11); + ffs(brw->gs.prog_data->base.base.total_scratch) - 11); WARN_ONCE(true, "May need to implement a temporary workaround: GS Number of " "URB Entries must be less than or equal to the GS Maximum " diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 5e313bfd226..3d3df19916a 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -225,10 +225,10 @@ upload_ps_state(struct brw_context *brw) OUT_BATCH(ksp0); OUT_BATCH(0); OUT_BATCH(dw3); - if (brw->wm.prog_data->total_scratch) { + if (brw->wm.prog_data->base.total_scratch) { OUT_RELOC64(brw->wm.base.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->wm.prog_data->total_scratch) - 11); + ffs(brw->wm.prog_data->base.total_scratch) - 11); } else { OUT_BATCH(0); OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c index f5e8dd3e4e8..0cc4f0cdc5f 100644 --- a/src/mesa/drivers/dri/i965/gen8_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c @@ -55,10 +55,10 @@ upload_vs_state(struct brw_context *brw) ((prog_data->base.binding_table.size_bytes / 4) << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - if (prog_data->total_scratch) { + if (prog_data->base.total_scratch) { OUT_RELOC64(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(prog_data->total_scratch) - 11); + ffs(prog_data->base.total_scratch) - 11); } else { OUT_BATCH(0); OUT_BATCH(0); -- 2.30.2