From 7dac8820730777756c00d7024330517848dc3b9f Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 21 Jul 2016 21:26:20 -0700 Subject: [PATCH] i965/fs: Rework representation of fragment output locations in NIR. The problem with the current approach is that driver output locations are represented as a linear offset within the nir_outputs array, which makes it rather difficult for the back-end to figure out what color output and index some nir_intrinsic_load/store_output was meant for, because the offset of a given output within the nir_output array is dependent on the type and size of all previously allocated outputs. Instead this defines the driver location of an output to be the pair formed by its GLSL-assigned location and index (I've borrowed the bitfield macros from brw_defines.h in order to represent the pair of integers as a single scalar value that can be assigned to nir_variable_data::driver_location). nir_assign_var_locations is no longer useful for fragment outputs. Because fragment outputs are now allocated independently rather than within the nir_outputs array, the get_frag_output() helper becomes necessary in order to obtain the right temporary register for a given location-index pair. The type_size helper passed to nir_lower_io is now type_size_dvec4 rather than type_size_vec4_times_4 so that output array offsets are provided in terms of whole array elements rather than in terms of scalar components (dvec4 is the largest vector type supported by the GLSL so this will cause all individual fragment outputs to have a size of one regardless of the type). Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 50 ++++++++++++++++++++---- src/mesa/drivers/dri/i965/brw_nir.c | 10 +++-- src/mesa/drivers/dri/i965/brw_nir.h | 5 +++ 3 files changed, 55 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index c8b6348753d..5910d8c1a19 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -89,17 +89,19 @@ fs_visitor::nir_setup_outputs() nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs); nir_foreach_variable(var, &nir->outputs) { - fs_reg reg = offset(nir_outputs, bld, var->data.driver_location); - switch (stage) { case MESA_SHADER_VERTEX: case MESA_SHADER_TESS_EVAL: case MESA_SHADER_GEOMETRY: { + fs_reg reg = offset(nir_outputs, bld, var->data.driver_location); unsigned location = var->data.location; nir_setup_single_output_varying(®, var->type, &location); break; } - case MESA_SHADER_FRAGMENT: + case MESA_SHADER_FRAGMENT: { + const fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, + type_size_vec4_times_4(var->type)); + if (key->force_dual_color_blend && var->data.location == FRAG_RESULT_DATA1) { this->dual_src_output = reg; @@ -130,6 +132,7 @@ fs_visitor::nir_setup_outputs() } } break; + } default: unreachable("unhandled shader stage"); } @@ -3247,6 +3250,38 @@ fs_visitor::emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst, return inst; } +static fs_reg +get_frag_output(const fs_visitor *v, unsigned location) +{ + assert(v->stage == MESA_SHADER_FRAGMENT); + const brw_wm_prog_key *const key = + reinterpret_cast(v->key); + const unsigned l = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_LOCATION); + const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX); + + if (i > 0 || (key->force_dual_color_blend && l == FRAG_RESULT_DATA1)) + return v->dual_src_output; + + else if (l == FRAG_RESULT_COLOR) + return v->outputs[0]; + + else if (l == FRAG_RESULT_DEPTH) + return v->frag_depth; + + else if (l == FRAG_RESULT_STENCIL) + return v->frag_stencil; + + else if (l == FRAG_RESULT_SAMPLE_MASK) + return v->sample_mask; + + else if (l >= FRAG_RESULT_DATA0 && + l < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS) + return v->outputs[l - FRAG_RESULT_DATA0]; + + else + unreachable("Invalid location"); +} + void fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) @@ -3285,11 +3320,12 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, case nir_intrinsic_store_output: { const fs_reg src = get_nir_src(instr->src[0]); - nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + const nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); assert(const_offset && "Indirect output stores not allowed"); - const fs_reg new_dest = offset(retype(nir_outputs, src.type), bld, - nir_intrinsic_base(instr) + - const_offset->u32[0]); + const unsigned location = nir_intrinsic_base(instr) + + SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION); + const fs_reg new_dest = retype(get_frag_output(this, location), + src.type); for (unsigned j = 0; j < instr->num_components; j++) bld.MOV(offset(new_dest, bld, nir_intrinsic_component(instr) + j), diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index d12a946f0ff..24a64cb09f0 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -339,9 +339,13 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map) void brw_nir_lower_fs_outputs(nir_shader *nir) { - nir_assign_var_locations(&nir->outputs, &nir->num_outputs, - FRAG_RESULT_DATA0, type_size_vec4_times_4); - nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4); + nir_foreach_variable(var, &nir->outputs) { + var->data.driver_location = + SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) | + SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION); + } + + nir_lower_io(nir, nir_var_shader_out, type_size_dvec4); } void diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 61853109653..dc03c2d9381 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -139,6 +139,11 @@ void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog, bool brw_nir_opt_peephole_ffma(nir_shader *shader); +#define BRW_NIR_FRAG_OUTPUT_INDEX_SHIFT 0 +#define BRW_NIR_FRAG_OUTPUT_INDEX_MASK INTEL_MASK(0, 0) +#define BRW_NIR_FRAG_OUTPUT_LOCATION_SHIFT 1 +#define BRW_NIR_FRAG_OUTPUT_LOCATION_MASK INTEL_MASK(31, 1) + #ifdef __cplusplus } #endif -- 2.30.2