From 5adee83806f764d60c629027dc0ee38cef3cb87a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 4 Aug 2016 17:31:02 -0700 Subject: [PATCH] vc4: Switch store_output to using nir_lower_io_to_scalar / component. --- src/gallium/drivers/vc4/vc4_nir_lower_io.c | 42 ++-------------------- src/gallium/drivers/vc4/vc4_program.c | 18 +++++++--- 2 files changed, 16 insertions(+), 44 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index ad96ef5ad82..3d08b648125 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -29,9 +29,9 @@ * Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into * something amenable to the VC4 architecture. * - * Currently, it splits outputs, VS inputs, and uniforms into scalars, drops - * any non-position outputs in coordinate shaders, and fixes up the addressing - * on indirect uniform loads. FS input scalarization is handled by + * Currently, it splits VS inputs and uniforms into scalars, drops any + * non-position outputs in coordinate shaders, and fixes up the addressing on + * indirect uniform loads. FS input and VS output scalarization is handled by * nir_lower_io_to_scalar(). */ @@ -319,42 +319,6 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, nir_instr_remove(&intr->instr); return; } - - /* Color output is lowered by vc4_nir_lower_blend(). */ - if (c->stage == QSTAGE_FRAG && - (output_var->data.location == FRAG_RESULT_COLOR || - output_var->data.location == FRAG_RESULT_DATA0 || - output_var->data.location == FRAG_RESULT_SAMPLE_MASK)) { - nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 4); - return; - } - - /* All TGSI-to-NIR outputs are VEC4. */ - assert(intr->num_components == 4); - - /* We only accept direct outputs and TGSI only ever gives them to us - * with an offset value of 0. - */ - assert(nir_src_as_const_value(intr->src[1]) && - nir_src_as_const_value(intr->src[1])->u32[0] == 0); - - b->cursor = nir_before_instr(&intr->instr); - - for (unsigned i = 0; i < intr->num_components; i++) { - nir_intrinsic_instr *intr_comp = - nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output); - intr_comp->num_components = 1; - nir_intrinsic_set_base(intr_comp, - nir_intrinsic_base(intr) * 4 + i); - - assert(intr->src[0].is_ssa); - intr_comp->src[0] = - nir_src_for_ssa(nir_channel(b, intr->src[0].ssa, i)); - intr_comp->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_builder_instr_insert(b, &intr_comp->instr); - } - - nir_instr_remove(&intr->instr); } static void diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 454202753a5..5acb7a75919 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1652,6 +1652,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) i)); } } else { + offset = offset * 4 + nir_intrinsic_component(instr); assert(instr->num_components == 1); c->outputs[offset] = qir_MOV(c, ntq_get_src(c, instr->src[0], 0)); @@ -2063,17 +2064,24 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, if (c->vs_key && c->vs_key->clamp_color) NIR_PASS_V(c->s, nir_lower_clamp_color_outputs); - if (stage == QSTAGE_FRAG) { - NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables); - } else { - NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables); + if (c->key->ucp_enables) { + if (stage == QSTAGE_FRAG) { + NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables); + } else { + NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables); + NIR_PASS_V(c->s, nir_lower_io_to_scalar, + nir_var_shader_out); + } } /* FS input scalarizing must happen after nir_lower_two_sided_color, - * which only handles a vec4 at a time. + * which only handles a vec4 at a time. Similarly, VS output + * scalarizing must happen after nir_lower_clip_vs. */ if (c->stage == QSTAGE_FRAG) NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in); + else + NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out); NIR_PASS_V(c->s, vc4_nir_lower_io, c); NIR_PASS_V(c->s, vc4_nir_lower_txf_ms, c); -- 2.30.2