From 5fff3752c88255ea3f4eb26cddb2c996694b33b1 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Sun, 17 Feb 2013 07:48:21 -0800 Subject: [PATCH] i965/vs: split brw_vs_prog_data into generic and VS-specific parts. This will allow the generic parts to be re-used for geometry shaders. Reviewed-by: Jordan Justen v2: Put urb_read_length and urb_entry_size in the generic struct. Reviewed-by: Eric Anholt Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 29 +++++--- src/mesa/drivers/dri/i965/brw_curbe.c | 6 +- src/mesa/drivers/dri/i965/brw_gs.c | 4 +- src/mesa/drivers/dri/i965/brw_urb.c | 2 +- src/mesa/drivers/dri/i965/brw_vec4.cpp | 34 +++++----- .../dri/i965/brw_vec4_reg_allocate.cpp | 10 +-- .../drivers/dri/i965/brw_vec4_visitor.cpp | 35 +++++----- src/mesa/drivers/dri/i965/brw_vec4_vp.cpp | 12 ++-- src/mesa/drivers/dri/i965/brw_vs.c | 67 +++++++++++++------ src/mesa/drivers/dri/i965/brw_vs.h | 3 + src/mesa/drivers/dri/i965/brw_vs_state.c | 14 ++-- .../drivers/dri/i965/brw_vs_surface_state.c | 18 ++--- src/mesa/drivers/dri/i965/gen6_urb.c | 2 +- src/mesa/drivers/dri/i965/gen6_vs_state.c | 16 ++--- src/mesa/drivers/dri/i965/gen7_urb.c | 2 +- src/mesa/drivers/dri/i965/gen7_vs_state.c | 6 +- 16 files changed, 155 insertions(+), 105 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 10e738d3090..114c369f821 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -435,10 +435,11 @@ struct brw_gs_prog_data { unsigned svbi_postincrement_value; }; -/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this - * struct! + +/* Note: brw_vec4_prog_data_compare() must be updated when adding fields to + * this struct! */ -struct brw_vs_prog_data { +struct brw_vec4_prog_data { struct brw_vue_map vue_map; GLuint curb_read_length; @@ -448,21 +449,31 @@ struct brw_vs_prog_data { GLuint nr_pull_params; /**< number of dwords referenced by pull_param[] */ GLuint total_scratch; - GLbitfield64 inputs_read; - - /* Used for calculating urb partitions: + /* Used for calculating urb partitions. In the VS, this is the size of the + * URB entry used for both input and output to the thread. In the GS, this + * is the size of the URB entry used for output. */ GLuint urb_entry_size; - bool uses_vertexid; - int num_surfaces; - /* These pointers must appear last. See brw_vs_prog_data_compare(). */ + /* These pointers must appear last. See brw_vec4_prog_data_compare(). */ const float **param; const float **pull_param; }; + +/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this + * struct! + */ +struct brw_vs_prog_data { + struct brw_vec4_prog_data base; + + GLbitfield64 inputs_read; + + bool uses_vertexid; +}; + /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index b332f1960ab..3abd22b5629 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -60,7 +60,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; /* BRW_NEW_VERTEX_PROGRAM */ - const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16; + const GLuint nr_vp_regs = (brw->vs.prog_data->base.nr_params + 15) / 16; GLuint nr_clip_regs = 0; GLuint total_regs; @@ -240,8 +240,8 @@ brw_upload_constant_buffer(struct brw_context *brw) if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; - for (i = 0; i < brw->vs.prog_data->nr_params; i++) { - buf[offset + i] = *brw->vs.prog_data->param[i]; + for (i = 0; i < brw->vs.prog_data->base.nr_params; i++) { + buf[offset + i] = *brw->vs.prog_data->base.param[i]; } } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 00a2a5d99e0..caa3b3efdd6 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -57,7 +57,7 @@ static void compile_gs_prog( struct brw_context *brw, memset(&c, 0, sizeof(c)); c.key = *key; - c.vue_map = brw->vs.prog_data->vue_map; + c.vue_map = brw->vs.prog_data->base.vue_map; c.nr_regs = (c.vue_map.num_slots + 1)/2; mem_ctx = ralloc_context(NULL); @@ -167,7 +167,7 @@ static void populate_key( struct brw_context *brw, memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG (part of VUE map) */ - key->attrs = brw->vs.prog_data->vue_map.slots_valid; + key->attrs = brw->vs.prog_data->base.vue_map.slots_valid; /* BRW_NEW_PRIMITIVE */ key->primitive = brw->primitive; diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index b1126b5ff5a..3f42ba82ba5 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -116,7 +116,7 @@ static void recalculate_urb_fence( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; GLuint csize = brw->curbe.total_size; - GLuint vsize = brw->vs.prog_data->urb_entry_size; + GLuint vsize = brw->vs.prog_data->base.urb_entry_size; GLuint sfsize = brw->sf.prog_data->urb_entry_size; if (csize < limits[CS].min_entry_size) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 9a4bbd892e7..279637ec108 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -423,8 +423,8 @@ vec4_visitor::pack_uniform_registers() /* Move the references to the data */ for (int j = 0; j < size; j++) { - prog_data->param[dst * 4 + new_chan[src] + j] = - prog_data->param[src * 4 + j]; + prog_data->base.param[dst * 4 + new_chan[src] + j] = + prog_data->base.param[src * 4 + j]; } this->uniform_vector_size[dst] += size; @@ -575,16 +575,16 @@ vec4_visitor::move_push_constants_to_pull_constants() pull_constant_loc[i / 4] = -1; if (i >= max_uniform_components) { - const float **values = &prog_data->param[i]; + const float **values = &prog_data->base.param[i]; /* Try to find an existing copy of this uniform in the pull * constants if it was part of an array access already. */ - for (unsigned int j = 0; j < prog_data->nr_pull_params; j += 4) { + for (unsigned int j = 0; j < prog_data->base.nr_pull_params; j += 4) { int matches; for (matches = 0; matches < 4; matches++) { - if (prog_data->pull_param[j + matches] != values[matches]) + if (prog_data->base.pull_param[j + matches] != values[matches]) break; } @@ -595,11 +595,11 @@ vec4_visitor::move_push_constants_to_pull_constants() } if (pull_constant_loc[i / 4] == -1) { - assert(prog_data->nr_pull_params % 4 == 0); - pull_constant_loc[i / 4] = prog_data->nr_pull_params / 4; + assert(prog_data->base.nr_pull_params % 4 == 0); + pull_constant_loc[i / 4] = prog_data->base.nr_pull_params / 4; for (int j = 0; j < 4; j++) { - prog_data->pull_param[prog_data->nr_pull_params++] = values[j]; + prog_data->base.pull_param[prog_data->base.nr_pull_params++] = values[j]; } } } @@ -659,7 +659,8 @@ vec4_visitor::opt_set_dependency_control() cfg_t cfg(this); - assert(prog_data->total_grf || !"Must be called after register allocation"); + assert(prog_data->base.total_grf || + !"Must be called after register allocation"); for (int i = 0; i < cfg.num_blocks; i++) { bblock_t *bblock = cfg.blocks[i]; @@ -1246,14 +1247,15 @@ vec4_visitor::setup_attributes(int payload_reg) if (nr_attributes == 0) nr_attributes = 1; - prog_data->urb_read_length = (nr_attributes + 1) / 2; + prog_data->base.urb_read_length = (nr_attributes + 1) / 2; - unsigned vue_entries = MAX2(nr_attributes, prog_data->vue_map.num_slots); + unsigned vue_entries = + MAX2(nr_attributes, prog_data->base.vue_map.num_slots); if (intel->gen == 6) - prog_data->urb_entry_size = ALIGN(vue_entries, 8) / 8; + prog_data->base.urb_entry_size = ALIGN(vue_entries, 8) / 8; else - prog_data->urb_entry_size = ALIGN(vue_entries, 4) / 4; + prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4; return payload_reg + nr_attributes; } @@ -1270,7 +1272,7 @@ vec4_visitor::setup_uniforms(int reg) for (unsigned int i = 0; i < 4; i++) { unsigned int slot = this->uniforms * 4 + i; static float zero = 0.0; - prog_data->param[slot] = &zero; + prog_data->base.param[slot] = &zero; } this->uniforms++; @@ -1279,9 +1281,9 @@ vec4_visitor::setup_uniforms(int reg) reg += ALIGN(uniforms, 2) / 2; } - prog_data->nr_params = this->uniforms * 4; + prog_data->base.nr_params = this->uniforms * 4; - prog_data->curb_read_length = reg - 1; + prog_data->base.curb_read_length = reg - 1; return reg; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 0853c0a0209..f9ebc3cba2b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -76,7 +76,7 @@ vec4_visitor::reg_allocate_trivial() next += this->virtual_grf_sizes[i]; } } - prog_data->total_grf = next; + prog_data->base.total_grf = next; foreach_iter(exec_list_iterator, iter, this->instructions) { vec4_instruction *inst = (vec4_instruction *)iter.get(); @@ -87,9 +87,9 @@ vec4_visitor::reg_allocate_trivial() assign(hw_reg_mapping, &inst->src[2]); } - if (prog_data->total_grf > max_grf) { + if (prog_data->base.total_grf > max_grf) { fail("Ran out of regs on trivial allocator (%d/%d)\n", - prog_data->total_grf, max_grf); + prog_data->base.total_grf, max_grf); return false; } @@ -221,12 +221,12 @@ vec4_visitor::reg_allocate() * regs in the register classes back down to real hardware reg * numbers. */ - prog_data->total_grf = first_assigned_grf; + prog_data->base.total_grf = first_assigned_grf; for (int i = 0; i < virtual_grf_count; i++) { int reg = ra_get_node_reg(g, i); hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg]; - prog_data->total_grf = MAX2(prog_data->total_grf, + prog_data->base.total_grf = MAX2(prog_data->base.total_grf, hw_reg_mapping[i] + virtual_grf_sizes[i]); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 250c3741458..12f7f02d8b8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -605,12 +605,12 @@ vec4_visitor::setup_uniform_values(ir_variable *ir) int i; for (i = 0; i < uniform_vector_size[uniforms]; i++) { - prog_data->param[uniforms * 4 + i] = &components->f; + prog_data->base.param[uniforms * 4 + i] = &components->f; components++; } for (; i < 4; i++) { static float zero = 0; - prog_data->param[uniforms * 4 + i] = &zero; + prog_data->base.param[uniforms * 4 + i] = &zero; } uniforms++; @@ -639,7 +639,7 @@ vec4_visitor::setup_uniform_clipplane_values() this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms); this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F; for (int j = 0; j < 4; ++j) { - prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j]; + prog_data->base.param[this->uniforms * 4 + j] = &clip_planes[i][j]; } ++compacted_clipplane_index; ++this->uniforms; @@ -653,7 +653,7 @@ vec4_visitor::setup_uniform_clipplane_values() this->userplane[i] = dst_reg(UNIFORM, this->uniforms); this->userplane[i].type = BRW_REGISTER_TYPE_F; for (int j = 0; j < 4; ++j) { - prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j]; + prog_data->base.param[this->uniforms * 4 + j] = &clip_planes[i][j]; } ++this->uniforms; } @@ -689,7 +689,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) int swiz = GET_SWZ(slots[i].swizzle, j); last_swiz = swiz; - prog_data->param[this->uniforms * 4 + j] = &values[swiz]; + prog_data->base.param[this->uniforms * 4 + j] = &values[swiz]; if (swiz <= last_swiz) this->uniform_vector_size[this->uniforms]++; } @@ -2408,7 +2408,7 @@ void vec4_visitor::emit_psiz_and_flags(struct brw_reg reg) { if (intel->gen < 6 && - ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) || + ((prog_data->base.vue_map.slots_valid & VARYING_BIT_PSIZ) || c->key.base.userclip_active || brw->has_negative_rhw_bug)) { dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); dst_reg header1_w = header1; @@ -2417,7 +2417,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg) emit(MOV(header1, 0u)); - if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { + if (prog_data->base.vue_map.slots_valid & VARYING_BIT_PSIZ) { src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]); current_annotation = "Point size"; @@ -2462,7 +2462,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg) emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u)); } else { emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0))); - if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { + if (prog_data->base.vue_map.slots_valid & VARYING_BIT_PSIZ) { emit(MOV(brw_writemask(reg, WRITEMASK_W), src_reg(output_reg[VARYING_SLOT_PSIZ]))); } @@ -2493,7 +2493,7 @@ vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset) * if the user wrote to it; otherwise we use gl_Position. */ gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX; - if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) { + if (!(prog_data->base.vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) { clip_vertex = VARYING_SLOT_POS; } @@ -2632,8 +2632,8 @@ vec4_visitor::emit_urb_writes() /* Set up the VUE data for the first URB write */ int slot; - for (slot = 0; slot < prog_data->vue_map.num_slots; ++slot) { - emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]); + for (slot = 0; slot < prog_data->base.vue_map.num_slots; ++slot) { + emit_urb_slot(mrf++, prog_data->base.vue_map.slot_to_varying[slot]); /* If this was max_usable_mrf, we can't fit anything more into this URB * WRITE. @@ -2644,7 +2644,7 @@ vec4_visitor::emit_urb_writes() } } - bool eot = slot >= prog_data->vue_map.num_slots; + bool eot = slot >= prog_data->base.vue_map.num_slots; if (eot) { if (INTEL_DEBUG & DEBUG_SHADER_TIME) emit_shader_time_end(); @@ -2659,10 +2659,10 @@ vec4_visitor::emit_urb_writes() if (!inst->eot) { mrf = base_mrf + 1; - for (; slot < prog_data->vue_map.num_slots; ++slot) { + for (; slot < prog_data->base.vue_map.num_slots; ++slot) { assert(mrf < max_usable_mrf); - emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]); + emit_urb_slot(mrf++, prog_data->base.vue_map.slot_to_varying[slot]); } if (INTEL_DEBUG & DEBUG_SHADER_TIME) @@ -2938,12 +2938,13 @@ vec4_visitor::move_uniform_array_access_to_pull_constants() * add it. */ if (pull_constant_loc[uniform] == -1) { - const float **values = &prog_data->param[uniform * 4]; + const float **values = &prog_data->base.param[uniform * 4]; - pull_constant_loc[uniform] = prog_data->nr_pull_params / 4; + pull_constant_loc[uniform] = prog_data->base.nr_pull_params / 4; for (int j = 0; j < uniform_size[uniform] * 4; j++) { - prog_data->pull_param[prog_data->nr_pull_params++] = values[j]; + prog_data->base.pull_param[prog_data->base.nr_pull_params++] + = values[j]; } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp index bf6d03c6979..13156dda07b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp @@ -413,9 +413,9 @@ vec4_visitor::emit_vertex_program_code() const struct gl_program_parameter_list *params = c->vp->program.Base.Parameters; unsigned i; for (i = 0; i < params->NumParameters * 4; i++) { - prog_data->pull_param[i] = ¶ms->ParameterValues[i / 4][i % 4].f; + prog_data->base.pull_param[i] = ¶ms->ParameterValues[i / 4][i % 4].f; } - prog_data->nr_pull_params = i; + prog_data->base.nr_pull_params = i; } } @@ -442,15 +442,15 @@ vec4_visitor::setup_vp_regs() this->uniform_size[this->uniforms] = 1; /* 1 vec4 */ this->uniform_vector_size[this->uniforms] = components; for (unsigned i = 0; i < 4; i++) { - prog_data->param[this->uniforms * 4 + i] = i >= components ? 0 : - &plist->ParameterValues[p][i].f; + prog_data->base.param[this->uniforms * 4 + i] = i >= components + ? 0 : &plist->ParameterValues[p][i].f; } this->uniforms++; /* counted in vec4 units */ } /* PROGRAM_OUTPUT */ - for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) { - int varying = prog_data->vue_map.slot_to_varying[slot]; + for (int slot = 0; slot < prog_data->base.vue_map.num_slots; slot++) { + int varying = prog_data->base.vue_map.slot_to_varying[slot]; if (varying == VARYING_SLOT_PSIZ) output_reg[varying] = dst_reg(this, glsl_type::float_type); else diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 2d0849a3700..998edb079cb 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -197,15 +197,13 @@ gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx) } } + bool -brw_vs_prog_data_compare(const void *in_a, const void *in_b, - int aux_size, const void *in_key) +brw_vec4_prog_data_compare(const struct brw_vec4_prog_data *a, + const struct brw_vec4_prog_data *b) { - const struct brw_vs_prog_data *a = in_a; - const struct brw_vs_prog_data *b = in_b; - /* Compare all the struct up to the pointers. */ - if (memcmp(a, b, offsetof(struct brw_vs_prog_data, param))) + if (memcmp(a, b, offsetof(struct brw_vec4_prog_data, param))) return false; if (memcmp(a->param, b->param, a->nr_params * sizeof(void *))) @@ -217,6 +215,28 @@ brw_vs_prog_data_compare(const void *in_a, const void *in_b, return true; } + +bool +brw_vs_prog_data_compare(const void *in_a, const void *in_b, + int aux_size, const void *in_key) +{ + const struct brw_vs_prog_data *a = in_a; + const struct brw_vs_prog_data *b = in_b; + + /* Compare the base vec4 structure. */ + if (!brw_vec4_prog_data_compare(&a->base, &b->base)) + return false; + + /* Compare the rest of the struct. */ + const unsigned offset = sizeof(struct brw_vec4_prog_data); + if (memcmp(((char *) &a) + offset, ((char *) &b) + offset, + sizeof(struct brw_vs_prog_data) - offset)) { + return false; + } + + return true; +} + static bool do_vs_prog(struct brw_context *brw, struct gl_shader_program *prog, @@ -261,8 +281,8 @@ do_vs_prog(struct brw_context *brw, /* We also upload clip plane data as uniforms */ param_count += MAX_CLIP_PLANES * 4; - prog_data.param = rzalloc_array(NULL, const float *, param_count); - prog_data.pull_param = rzalloc_array(NULL, const float *, param_count); + prog_data.base.param = rzalloc_array(NULL, const float *, param_count); + prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count); GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; prog_data.inputs_read = vp->program.Base.InputsRead; @@ -285,7 +305,7 @@ do_vs_prog(struct brw_context *brw, } } - brw_compute_vue_map(brw, &prog_data.vue_map, outputs_written, + brw_compute_vue_map(brw, &prog_data.base.vue_map, outputs_written, c.key.base.userclip_active); if (0) { @@ -301,13 +321,13 @@ do_vs_prog(struct brw_context *brw, return false; } - if (prog_data.nr_pull_params) - prog_data.num_surfaces = 1; + if (prog_data.base.nr_pull_params) + prog_data.base.num_surfaces = 1; if (c.vp->program.Base.SamplersUsed) - prog_data.num_surfaces = SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT); + prog_data.base.num_surfaces = SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT); if (prog && prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks) { - prog_data.num_surfaces = + prog_data.base.num_surfaces = SURF_INDEX_VS_UBO(prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks); } @@ -317,10 +337,11 @@ do_vs_prog(struct brw_context *brw, "Try reducing the number of live vec4 values to " "improve performance.\n"); - prog_data.total_scratch = brw_get_scratch_size(c.base.last_scratch*REG_SIZE); + prog_data.base.total_scratch + = brw_get_scratch_size(c.base.last_scratch*REG_SIZE); brw_get_scratch_bo(intel, &brw->vs.scratch_bo, - prog_data.total_scratch * brw->max_vs_threads); + prog_data.base.total_scratch * brw->max_vs_threads); } brw_upload_cache(&brw->cache, BRW_VS_PROG, @@ -503,9 +524,9 @@ static void brw_upload_vs_prog(struct brw_context *brw) assert(success); } - if (memcmp(&brw->vs.prog_data->vue_map, &brw->vue_map_geom_out, + if (memcmp(&brw->vs.prog_data->base.vue_map, &brw->vue_map_geom_out, sizeof(brw->vue_map_geom_out)) != 0) { - brw->vue_map_geom_out = brw->vs.prog_data->vue_map; + brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map; brw->state.dirty.brw |= BRW_NEW_VUE_MAP_GEOM_OUT; } } @@ -564,11 +585,19 @@ brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) return success; } + +void +brw_vec4_prog_data_free(const struct brw_vec4_prog_data *prog_data) +{ + ralloc_free((void *)prog_data->param); + ralloc_free((void *)prog_data->pull_param); +} + + void brw_vs_prog_data_free(const void *in_prog_data) { const struct brw_vs_prog_data *prog_data = in_prog_data; - ralloc_free((void *)prog_data->param); - ralloc_free((void *)prog_data->pull_param); + brw_vec4_prog_data_free(&prog_data->base); } diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index e1c6ed0bba2..ba83f6d8153 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -130,8 +130,11 @@ bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); void brw_vs_debug_recompile(struct brw_context *brw, struct gl_shader_program *prog, const struct brw_vs_prog_key *key); +bool brw_vec4_prog_data_compare(const struct brw_vec4_prog_data *a, + const struct brw_vec4_prog_data *b); bool brw_vs_prog_data_compare(const void *a, const void *b, int aux_size, const void *key); +void brw_vec4_prog_data_free(const struct brw_vec4_prog_data *prog_data); void brw_vs_prog_data_free(const void *in_prog_data); #endif diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index a72a283982f..bb42bd002df 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -47,7 +47,8 @@ brw_upload_vs_unit(struct brw_context *brw) memset(vs, 0, sizeof(*vs)); /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */ - vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; + vs->thread0.grf_reg_count = + ALIGN(brw->vs.prog_data->base.total_grf, 16) / 16 - 1; vs->thread0.kernel_start_pointer = brw_program_reloc(brw, brw->vs.state_offset + @@ -72,18 +73,19 @@ brw_upload_vs_unit(struct brw_context *brw) vs->thread1.binding_table_entry_count = 0; - if (brw->vs.prog_data->total_scratch != 0) { + if (brw->vs.prog_data->base.total_scratch != 0) { vs->thread2.scratch_space_base_pointer = brw->vs.scratch_bo->offset >> 10; /* reloc */ vs->thread2.per_thread_scratch_space = - ffs(brw->vs.prog_data->total_scratch) - 11; + ffs(brw->vs.prog_data->base.total_scratch) - 11; } else { vs->thread2.scratch_space_base_pointer = 0; vs->thread2.per_thread_scratch_space = 0; } - vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; - vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; + vs->thread3.urb_entry_read_length = brw->vs.prog_data->base.urb_read_length; + vs->thread3.const_urb_entry_read_length + = brw->vs.prog_data->base.curb_read_length; vs->thread3.dispatch_grf_start_reg = 1; vs->thread3.urb_entry_read_offset = 0; @@ -144,7 +146,7 @@ brw_upload_vs_unit(struct brw_context *brw) vs->vs6.vs_enable = 1; /* Emit scratch space relocation */ - if (brw->vs.prog_data->total_scratch != 0) { + if (brw->vs.prog_data->base.total_scratch != 0) { drm_intel_bo_emit_reloc(intel->batch.bo, brw->vs.state_offset + offsetof(struct brw_vs_unit_state, thread2), diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 675a84ccf24..968cc0336f7 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -56,7 +56,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw) _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters); /* CACHE_NEW_VS_PROG */ - if (!brw->vs.prog_data->nr_pull_params) { + if (!brw->vs.prog_data->base.nr_pull_params) { if (brw->vs.const_bo) { drm_intel_bo_unreference(brw->vs.const_bo); brw->vs.const_bo = NULL; @@ -68,19 +68,20 @@ brw_upload_vs_pull_constants(struct brw_context *brw) /* _NEW_PROGRAM_CONSTANTS */ drm_intel_bo_unreference(brw->vs.const_bo); - uint32_t size = brw->vs.prog_data->nr_pull_params * 4; + uint32_t size = brw->vs.prog_data->base.nr_pull_params * 4; brw->vs.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); drm_intel_gem_bo_map_gtt(brw->vs.const_bo); - for (i = 0; i < brw->vs.prog_data->nr_pull_params; i++) { + for (i = 0; i < brw->vs.prog_data->base.nr_pull_params; i++) { memcpy(brw->vs.const_bo->virtual + i * 4, - brw->vs.prog_data->pull_param[i], + brw->vs.prog_data->base.pull_param[i], 4); } if (0) { - for (i = 0; i < ALIGN(brw->vs.prog_data->nr_pull_params, 4) / 4; i++) { + for (i = 0; i < ALIGN(brw->vs.prog_data->base.nr_pull_params, 4) / 4; + i++) { float *row = (float *)brw->vs.const_bo->virtual + i * 4; printf("vs const surface %3d: %4.3f %4.3f %4.3f %4.3f\n", i, row[0], row[1], row[2], row[3]); @@ -142,14 +143,15 @@ brw_vs_upload_binding_table(struct brw_context *brw) if (INTEL_DEBUG & DEBUG_SHADER_TIME) { gen7_create_shader_time_surface(brw, &brw->vs.surf_offset[SURF_INDEX_VS_SHADER_TIME]); - assert(brw->vs.prog_data->num_surfaces <= SURF_INDEX_VS_SHADER_TIME); - brw->vs.prog_data->num_surfaces = SURF_INDEX_VS_SHADER_TIME; + assert(brw->vs.prog_data->base.num_surfaces + <= SURF_INDEX_VS_SHADER_TIME); + brw->vs.prog_data->base.num_surfaces = SURF_INDEX_VS_SHADER_TIME; } /* CACHE_NEW_VS_PROG: Skip making a binding table if we don't use textures or * pull constants. */ - if (brw->vs.prog_data->num_surfaces == 0) { + if (brw->vs.prog_data->base.num_surfaces == 0) { if (brw->vs.bind_bo_offset != 0) { brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE; brw->vs.bind_bo_offset = 0; diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c index aa985de920d..d47bf9ea466 100644 --- a/src/mesa/drivers/dri/i965/gen6_urb.c +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -54,7 +54,7 @@ gen6_upload_urb( struct brw_context *brw ) int total_urb_size = brw->urb.size * 1024; /* in bytes */ /* CACHE_NEW_VS_PROG */ - unsigned vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1); + unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1); /* We use the same VUE layout for VS outputs and GS outputs (as it's what * the SF and Clipper expect), so we can simply make the GS URB entry size diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index fb05354ddd1..ae1a8412a65 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -49,7 +49,7 @@ gen6_upload_vs_push_constants(struct brw_context *brw) _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); /* CACHE_NEW_VS_PROG */ - if (brw->vs.prog_data->nr_params == 0) { + if (brw->vs.prog_data->base.nr_params == 0) { brw->vs.push_const_size = 0; } else { int params_uploaded; @@ -57,7 +57,7 @@ gen6_upload_vs_push_constants(struct brw_context *brw) int i; param = brw_state_batch(brw, AUB_TRACE_VS_CONSTANTS, - brw->vs.prog_data->nr_params * sizeof(float), + brw->vs.prog_data->base.nr_params * sizeof(float), 32, &brw->vs.push_const_offset); /* _NEW_PROGRAM_CONSTANTS @@ -66,10 +66,10 @@ gen6_upload_vs_push_constants(struct brw_context *brw) * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS * wouldn't be set for them. */ - for (i = 0; i < brw->vs.prog_data->nr_params; i++) { - param[i] = *brw->vs.prog_data->param[i]; + for (i = 0; i < brw->vs.prog_data->base.nr_params; i++) { + param[i] = *brw->vs.prog_data->base.param[i]; } - params_uploaded = brw->vs.prog_data->nr_params / 4; + params_uploaded = brw->vs.prog_data->base.nr_params / 4; if (0) { printf("VS constant buffer:\n"); @@ -148,16 +148,16 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH(floating_point_mode | ((ALIGN(brw->sampler.count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT)); - if (brw->vs.prog_data->total_scratch) { + if (brw->vs.prog_data->base.total_scratch) { OUT_RELOC(brw->vs.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->vs.prog_data->total_scratch) - 11); + ffs(brw->vs.prog_data->base.total_scratch) - 11); } else { OUT_BATCH(0); } OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | - (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | + (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); OUT_BATCH(((brw->max_vs_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) | diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 5ac388589b4..bdcf1648939 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -83,7 +83,7 @@ gen7_upload_urb(struct brw_context *brw) int handle_region_size = (brw->urb.size - push_size_kB) * 1024; /* bytes */ /* CACHE_NEW_VS_PROG */ - unsigned vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1); + unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1); int nr_vs_entries = handle_region_size / (vs_size * 64); if (nr_vs_entries > brw->urb.max_vs_entries) diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index e8be4f28823..1b97e8c0783 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -89,16 +89,16 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH(floating_point_mode | ((ALIGN(brw->sampler.count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT)); - if (brw->vs.prog_data->total_scratch) { + if (brw->vs.prog_data->base.total_scratch) { OUT_RELOC(brw->vs.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->vs.prog_data->total_scratch) - 11); + ffs(brw->vs.prog_data->base.total_scratch) - 11); } else { OUT_BATCH(0); } OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | - (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | + (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); OUT_BATCH(((brw->max_vs_threads - 1) << max_threads_shift) | -- 2.30.2