From: Kenneth Graunke Date: Fri, 14 Aug 2015 23:01:33 +0000 (-0700) Subject: i965/vs: Simplify fs_visitor's ATTR file. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=bd198b9f0a292a9ff4ffffec3a29bad23d62caba;p=mesa.git i965/vs: Simplify fs_visitor's ATTR file. Previously, ATTR was indexed by VERT_ATTRIB_* slots; at the end of compilation, assign_vs_urb_setup() translated those into GRF units, and converted ATTR to HW_REGs. This patch moves the transslation earlier, making ATTR work in terms of GRF units from the beginning. assign_vs_urb_setup() simply has to add the number of payload registers and push constants to obtain the final hardware GRF number. (We can't do this earlier as those values aren't known.) ATTR still supports reg_offset; however, it's simply added to reg. It's not clear whether this is valuable or not. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 65f2e68e621..d000f16f49a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1508,9 +1508,11 @@ void fs_visitor::assign_vs_urb_setup() { brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data; - int grf, slot, channel, attr; assert(stage == MESA_SHADER_VERTEX); + int count = _mesa_bitcount_64(vs_prog_data->inputs_read); + if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) + count++; /* Each attribute is 4 regs. */ this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes; @@ -1521,25 +1523,10 @@ fs_visitor::assign_vs_urb_setup() foreach_block_and_inst(block, fs_inst, inst, cfg) { for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == ATTR) { - - if (inst->src[i].reg == VERT_ATTRIB_MAX) { - slot = vs_prog_data->nr_attributes - 1; - } else { - /* Attributes come in in a contiguous block, ordered by their - * gl_vert_attrib value. That means we can compute the slot - * number for an attribute by masking out the enabled - * attributes before it and counting the bits. - */ - attr = inst->src[i].reg + inst->src[i].reg_offset / 4; - slot = _mesa_bitcount_64(vs_prog_data->inputs_read & - BITFIELD64_MASK(attr)); - } - - channel = inst->src[i].reg_offset & 3; - - grf = payload.num_regs + - prog_data->curb_read_length + - slot * 4 + channel; + int grf = payload.num_regs + + prog_data->curb_read_length + + inst->src[i].reg + + inst->src[i].reg_offset; inst->src[i].file = HW_REG; inst->src[i].fixed_hw_reg = diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index df1a7ed9b59..8aee2c087f7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -53,7 +53,8 @@ fs_reg * fs_visitor::emit_vs_system_value(int location) { fs_reg *reg = new(this->mem_ctx) - fs_reg(ATTR, VERT_ATTRIB_MAX, BRW_REGISTER_TYPE_D); + fs_reg(ATTR, 4 * _mesa_bitcount_64(nir->info.inputs_read), + BRW_REGISTER_TYPE_D); brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data; switch (location) { diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 15c1b1984a1..4f35d81fc7e 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -27,6 +27,34 @@ #include "glsl/nir/glsl_to_nir.h" #include "program/prog_to_nir.h" +static bool +remap_vs_attrs(nir_block *block, void *closure) +{ + GLbitfield64 inputs_read = *((GLbitfield64 *) closure); + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + /* We set EmitNoIndirect for VS inputs, so there are no indirects. */ + assert(intrin->intrinsic != nir_intrinsic_load_input_indirect); + + if (intrin->intrinsic == nir_intrinsic_load_input) { + /* Attributes come in a contiguous block, ordered by their + * gl_vert_attrib value. That means we can compute the slot + * number for an attribute by masking out the enabled attributes + * before it and counting the bits. + */ + int attr = intrin->const_index[0]; + int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr)); + intrin->const_index[0] = 4 * slot; + } + } + return true; +} + static void brw_nir_lower_inputs(nir_shader *nir, bool is_scalar) { @@ -49,6 +77,18 @@ brw_nir_lower_inputs(nir_shader *nir, bool is_scalar) * type_size_vec4 here. */ nir_lower_io(nir, nir_var_shader_in, type_size_vec4); + + /* Finally, translate VERT_ATTRIB_* values into the actual registers. + * + * Note that we can use nir->info.inputs_read instead of key->inputs_read + * since the two are identical aside from Gen4-5 edge flag differences. + */ + GLbitfield64 inputs_read = nir->info.inputs_read; + nir_foreach_overload(nir, overload) { + if (overload->impl) { + nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read); + } + } break; case MESA_SHADER_GEOMETRY: foreach_list_typed(nir_variable, var, node, &nir->inputs) {