From b00a236d6a6212323f77248ba923c65eeb02592b Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Thu, 21 Jul 2016 21:47:45 -0700 Subject: [PATCH] i965/fs: Allocate fragment output temporaries on demand. This gets rid of the duplication of logic between nir_setup_outputs() and get_frag_output() by allocating fragment output temporaries lazily whenever get_frag_output() is called. This makes nir_setup_outputs() a no-op for the fragment shader stage. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 73 +++++++++--------------- 1 file changed, 27 insertions(+), 46 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 5910d8c1a19..07db5d3d374 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -81,11 +81,9 @@ fs_visitor::nir_setup_single_output_varying(fs_reg *reg, void fs_visitor::nir_setup_outputs() { - if (stage == MESA_SHADER_TESS_CTRL) + if (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT) return; - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs); nir_foreach_variable(var, &nir->outputs) { @@ -98,41 +96,6 @@ fs_visitor::nir_setup_outputs() nir_setup_single_output_varying(®, var->type, &location); break; } - case MESA_SHADER_FRAGMENT: { - const fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, - type_size_vec4_times_4(var->type)); - - if (key->force_dual_color_blend && - var->data.location == FRAG_RESULT_DATA1) { - this->dual_src_output = reg; - } else if (var->data.index > 0) { - assert(var->data.location == FRAG_RESULT_DATA0); - assert(var->data.index == 1); - this->dual_src_output = reg; - } else if (var->data.location == FRAG_RESULT_COLOR) { - /* Writing gl_FragColor outputs to all color regions. */ - for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) { - this->outputs[i] = reg; - } - } else if (var->data.location == FRAG_RESULT_DEPTH) { - this->frag_depth = reg; - } else if (var->data.location == FRAG_RESULT_STENCIL) { - this->frag_stencil = reg; - } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) { - this->sample_mask = reg; - } else { - /* gl_FragData or a user-defined FS output */ - assert(var->data.location >= FRAG_RESULT_DATA0 && - var->data.location < FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS); - - /* General color output. */ - for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) { - int output = var->data.location - FRAG_RESULT_DATA0 + i; - this->outputs[output] = offset(reg, bld, 4 * i); - } - } - break; - } default: unreachable("unhandled shader stage"); } @@ -3251,7 +3214,23 @@ fs_visitor::emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst, } static fs_reg -get_frag_output(const fs_visitor *v, unsigned location) +alloc_temporary(const fs_builder &bld, unsigned size, fs_reg *regs, unsigned n) +{ + if (n && regs[0].file != BAD_FILE) { + return regs[0]; + + } else { + const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, size); + + for (unsigned i = 0; i < n; i++) + regs[i] = tmp; + + return tmp; + } +} + +static fs_reg +alloc_frag_output(fs_visitor *v, unsigned location) { assert(v->stage == MESA_SHADER_FRAGMENT); const brw_wm_prog_key *const key = @@ -3260,23 +3239,25 @@ get_frag_output(const fs_visitor *v, unsigned location) const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX); if (i > 0 || (key->force_dual_color_blend && l == FRAG_RESULT_DATA1)) - return v->dual_src_output; + return alloc_temporary(v->bld, 4, &v->dual_src_output, 1); else if (l == FRAG_RESULT_COLOR) - return v->outputs[0]; + return alloc_temporary(v->bld, 4, v->outputs, + MAX2(key->nr_color_regions, 1)); else if (l == FRAG_RESULT_DEPTH) - return v->frag_depth; + return alloc_temporary(v->bld, 1, &v->frag_depth, 1); else if (l == FRAG_RESULT_STENCIL) - return v->frag_stencil; + return alloc_temporary(v->bld, 1, &v->frag_stencil, 1); else if (l == FRAG_RESULT_SAMPLE_MASK) - return v->sample_mask; + return alloc_temporary(v->bld, 1, &v->sample_mask, 1); else if (l >= FRAG_RESULT_DATA0 && l < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS) - return v->outputs[l - FRAG_RESULT_DATA0]; + return alloc_temporary(v->bld, 4, + &v->outputs[l - FRAG_RESULT_DATA0], 1); else unreachable("Invalid location"); @@ -3324,7 +3305,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, assert(const_offset && "Indirect output stores not allowed"); const unsigned location = nir_intrinsic_base(instr) + SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION); - const fs_reg new_dest = retype(get_frag_output(this, location), + const fs_reg new_dest = retype(alloc_frag_output(this, location), src.type); for (unsigned j = 0; j < instr->num_components; j++) -- 2.30.2