From 2e9916ea0410385534dc3ff45cd476143a4e2041 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 4 May 2017 16:36:26 -0700 Subject: [PATCH] i965/vec4: Use NIR to do GS input remapping MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We're already doing this in the FS back-end. This just does the same thing in the vec4 back-end. Reviewed-by: Alejandro Piñeiro Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_nir.c | 5 +- src/intel/compiler/brw_nir.h | 2 +- src/intel/compiler/brw_vec4.cpp | 60 ------------------ src/intel/compiler/brw_vec4.h | 2 - src/intel/compiler/brw_vec4_gs_nir.cpp | 10 ++- src/intel/compiler/brw_vec4_gs_visitor.cpp | 72 +++++++++++++++------- src/intel/compiler/brw_vec4_gs_visitor.h | 3 +- src/intel/compiler/brw_vec4_tcs.cpp | 2 +- src/intel/compiler/gen6_gs_visitor.cpp | 4 +- 9 files changed, 59 insertions(+), 101 deletions(-) diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 88a7430168b..de8f519b4e1 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -334,7 +334,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir, } void -brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, +brw_nir_lower_vue_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) { foreach_list_typed(nir_variable, var, node, &nir->inputs) { @@ -344,9 +344,6 @@ brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); - if (nir->stage == MESA_SHADER_GEOMETRY && !is_scalar) - return; - /* This pass needs actual constants */ nir_opt_constant_folding(nir); diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 6d9d86f98c6..5d866b86ac8 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -100,7 +100,7 @@ bool brw_nir_lower_intrinsics(nir_shader *nir, void brw_nir_lower_vs_inputs(nir_shader *nir, bool use_legacy_snorm_formula, const uint8_t *vs_attrib_wa_flags); -void brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, +void brw_nir_lower_vue_inputs(nir_shader *nir, const struct brw_vue_map *vue_map); void brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue); void brw_nir_lower_fs_inputs(nir_shader *nir, diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index f936429bbfc..5c032c0c822 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -1677,66 +1677,6 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) } -static inline struct brw_reg -attribute_to_hw_reg(int attr, brw_reg_type type, bool interleaved) -{ - struct brw_reg reg; - - unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(type)); - if (interleaved) { - reg = stride(brw_vecn_grf(width, attr / 2, (attr % 2) * 4), 0, width, 1); - } else { - reg = brw_vecn_grf(width, attr, 0); - } - - reg.type = type; - return reg; -} - - -/** - * Replace each register of type ATTR in this->instructions with a reference - * to a fixed HW register. - * - * If interleaved is true, then each attribute takes up half a register, with - * register N containing attribute 2*N in its first half and attribute 2*N+1 - * in its second half (this corresponds to the payload setup used by geometry - * shaders in "single" or "dual instanced" dispatch mode). If interleaved is - * false, then each attribute takes up a whole register, with register N - * containing attribute N (this corresponds to the payload setup used by - * vertex shaders, and by geometry shaders in "dual object" dispatch mode). - */ -void -vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map, - bool interleaved) -{ - foreach_block_and_inst(block, vec4_instruction, inst, cfg) { - for (int i = 0; i < 3; i++) { - if (inst->src[i].file != ATTR) - continue; - - int grf = attribute_map[inst->src[i].nr + - inst->src[i].offset / REG_SIZE]; - assert(inst->src[i].offset % REG_SIZE == 0); - - /* All attributes used in the shader need to have been assigned a - * hardware register by the caller - */ - assert(grf != 0); - - struct brw_reg reg = - attribute_to_hw_reg(grf, inst->src[i].type, interleaved); - reg.swizzle = inst->src[i].swizzle; - if (inst->src[i].abs) - reg = brw_abs(reg); - if (inst->src[i].negate) - reg = negate(reg); - - inst->src[i] = reg; - } - } -} - int vec4_vs_visitor::setup_attributes(int payload_reg) { diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h index 89adfaa7b79..0f92f343ce5 100644 --- a/src/intel/compiler/brw_vec4.h +++ b/src/intel/compiler/brw_vec4.h @@ -367,8 +367,6 @@ public: protected: void emit_vertex(); - void lower_attributes_to_hw_regs(const int *attribute_map, - bool interleaved); void setup_payload_interference(struct ra_graph *g, int first_payload_node, int reg_node_count); virtual void setup_payload() = 0; diff --git a/src/intel/compiler/brw_vec4_gs_nir.cpp b/src/intel/compiler/brw_vec4_gs_nir.cpp index ed8c03b0594..577f587f9b1 100644 --- a/src/intel/compiler/brw_vec4_gs_nir.cpp +++ b/src/intel/compiler/brw_vec4_gs_nir.cpp @@ -66,8 +66,10 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir_const_value *vertex = nir_src_as_const_value(instr->src[0]); nir_const_value *offset_reg = nir_src_as_const_value(instr->src[1]); + const unsigned input_array_stride = prog_data->urb_read_length * 2; + if (nir_dest_bit_size(instr->dest) == 64) { - src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] + + src = src_reg(ATTR, input_array_stride * vertex->u32[0] + instr->const_index[0] + offset_reg->u32[0], glsl_type::dvec4_type); @@ -85,15 +87,11 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) /* Make up a type...we have no way of knowing... */ const glsl_type *const type = glsl_type::ivec(instr->num_components); - src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] + + src = src_reg(ATTR, input_array_stride * vertex->u32[0] + instr->const_index[0] + offset_reg->u32[0], type); src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); - /* gl_PointSize is passed in the .w component of the VUE header */ - if (instr->const_index[0] == VARYING_SLOT_PSIZ) - src.swizzle = BRW_SWIZZLE_WWWW; - dest = get_nir_dest(instr->dest, src.type); dest.writemask = brw_writemask_for_size(instr->num_components); emit(MOV(dest, src)); diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index 9793ef50125..b281bcf49f4 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -29,6 +29,7 @@ #include "brw_vec4_gs_visitor.h" #include "gen6_gs_visitor.h" +#include "brw_cfg.h" #include "brw_fs.h" #include "brw_nir.h" #include "common/gen_debug.h" @@ -72,9 +73,36 @@ vec4_gs_visitor::make_reg_for_system_value(int location) return reg; } +static inline struct brw_reg +attribute_to_hw_reg(int attr, brw_reg_type type, bool interleaved) +{ + struct brw_reg reg; + unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(type)); + if (interleaved) { + reg = stride(brw_vecn_grf(width, attr / 2, (attr % 2) * 4), 0, width, 1); + } else { + reg = brw_vecn_grf(width, attr, 0); + } + + reg.type = type; + return reg; +} + +/** + * Replace each register of type ATTR in this->instructions with a reference + * to a fixed HW register. + * + * If interleaved is true, then each attribute takes up half a register, with + * register N containing attribute 2*N in its first half and attribute 2*N+1 + * in its second half (this corresponds to the payload setup used by geometry + * shaders in "single" or "dual instanced" dispatch mode). If interleaved is + * false, then each attribute takes up a whole register, with register N + * containing attribute N (this corresponds to the payload setup used by + * vertex shaders, and by geometry shaders in "dual object" dispatch mode). + */ int -vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map, +vec4_gs_visitor::setup_varying_inputs(int payload_reg, int attributes_per_reg) { /* For geometry shaders there are N copies of the input attributes, where N @@ -89,12 +117,24 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map, assert(num_input_vertices <= MAX_GS_INPUT_VERTICES); unsigned input_array_stride = prog_data->urb_read_length * 2; - for (int slot = 0; slot < c->input_vue_map.num_slots; slot++) { - int varying = c->input_vue_map.slot_to_varying[slot]; - for (unsigned vertex = 0; vertex < num_input_vertices; vertex++) { - attribute_map[BRW_VARYING_SLOT_COUNT * vertex + varying] = - attributes_per_reg * payload_reg + input_array_stride * vertex + - slot; + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != ATTR) + continue; + + assert(inst->src[i].offset % REG_SIZE == 0); + int grf = payload_reg * attributes_per_reg + + inst->src[i].nr + inst->src[i].offset / REG_SIZE; + + struct brw_reg reg = + attribute_to_hw_reg(grf, inst->src[i].type, attributes_per_reg > 1); + reg.swizzle = inst->src[i].swizzle; + if (inst->src[i].abs) + reg = brw_abs(reg); + if (inst->src[i].negate) + reg = negate(reg); + + inst->src[i] = reg; } } @@ -103,25 +143,15 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map, return payload_reg + regs_used; } - void vec4_gs_visitor::setup_payload() { - int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES]; - /* If we are in dual instanced or single mode, then attributes are going * to be interleaved, so one register contains two attribute slots. */ int attributes_per_reg = prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2; - /* If a geometry shader tries to read from an input that wasn't written by - * the vertex shader, that produces undefined results, but it shouldn't - * crash anything. So initialize attribute_map to zeros--that ensures that - * these undefined results are read from r0. - */ - memset(attribute_map, 0, sizeof(attribute_map)); - int reg = 0; /* The payload always contains important data in r0, which contains @@ -132,13 +162,11 @@ vec4_gs_visitor::setup_payload() /* If the shader uses gl_PrimitiveIDIn, that goes in r1. */ if (gs_prog_data->include_primitive_id) - attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg++; + reg++; reg = setup_uniforms(reg); - reg = setup_varying_inputs(reg, attribute_map, attributes_per_reg); - - lower_attributes_to_hw_regs(attribute_map, attributes_per_reg > 1); + reg = setup_varying_inputs(reg, attributes_per_reg); this->first_non_payload_grf = reg; } @@ -634,7 +662,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, shader->info.separate_shader); shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, is_scalar); - brw_nir_lower_vue_inputs(shader, is_scalar, &c.input_vue_map); + brw_nir_lower_vue_inputs(shader, &c.input_vue_map); brw_nir_lower_vue_outputs(shader, is_scalar); shader = brw_postprocess_nir(shader, compiler, is_scalar); diff --git a/src/intel/compiler/brw_vec4_gs_visitor.h b/src/intel/compiler/brw_vec4_gs_visitor.h index 09221f928d1..f57cdba8cf9 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.h +++ b/src/intel/compiler/brw_vec4_gs_visitor.h @@ -64,8 +64,7 @@ protected: virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr); protected: - int setup_varying_inputs(int payload_reg, int *attribute_map, - int attributes_per_reg); + int setup_varying_inputs(int payload_reg, int attributes_per_reg); void emit_control_data_bits(); void set_stream_control_data_bits(unsigned stream_id); diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp index c362a0a5f14..733f152acc6 100644 --- a/src/intel/compiler/brw_vec4_tcs.cpp +++ b/src/intel/compiler/brw_vec4_tcs.cpp @@ -413,7 +413,7 @@ brw_compile_tcs(const struct brw_compiler *compiler, nir->info.patch_outputs_written); nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar); - brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map); + brw_nir_lower_vue_inputs(nir, &input_vue_map); brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map, key->tes_primitive_mode); if (key->quads_workaround) diff --git a/src/intel/compiler/gen6_gs_visitor.cpp b/src/intel/compiler/gen6_gs_visitor.cpp index f76cdf02556..fe9f834f0ce 100644 --- a/src/intel/compiler/gen6_gs_visitor.cpp +++ b/src/intel/compiler/gen6_gs_visitor.cpp @@ -516,9 +516,7 @@ gen6_gs_visitor::setup_payload() reg = setup_uniforms(reg); - reg = setup_varying_inputs(reg, attribute_map, attributes_per_reg); - - lower_attributes_to_hw_regs(attribute_map, true); + reg = setup_varying_inputs(reg, attributes_per_reg); this->first_non_payload_grf = reg; } -- 2.30.2