From cbf8c1941033fd543aa81cffd02672b5ca81a788 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 4 Aug 2016 15:00:37 -0700 Subject: [PATCH] vc4: Convert to using nir_lower_io_scalar for FS inputs. The scalarizing of FS inputs can be done in a non-driver-dependent manner, so extract it out of the driver. --- src/gallium/drivers/vc4/vc4_nir_lower_io.c | 86 +++++++++++----------- src/gallium/drivers/vc4/vc4_program.c | 20 ++++- 2 files changed, 62 insertions(+), 44 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index 85f31f34131..d2b27bd7c8a 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -29,9 +29,10 @@ * Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into * something amenable to the VC4 architecture. * - * Currently, it split inputs, outputs, and uniforms into scalars, drops any - * non-position outputs in coordinate shaders, and fixes up the addressing on - * indirect uniform loads. + * Currently, it splits outputs, VS inputs, and uniforms into scalars, drops + * any non-position outputs in coordinate shaders, and fixes up the addressing + * on indirect uniform loads. FS input scalarization is handled by + * nir_lower_io_to_scalar(). */ static void @@ -228,11 +229,22 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, replace_intrinsic_with_vec4(b, intr, dests); } +static bool +is_point_sprite(struct vc4_compile *c, nir_variable *var) +{ + if (var->data.location < VARYING_SLOT_VAR0 || + var->data.location > VARYING_SLOT_VAR31) + return false; + + return (c->fs_key->point_sprite_mask & + (1 << (var->data.location - VARYING_SLOT_VAR0))); +} + static void vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { - b->cursor = nir_before_instr(&intr->instr); + b->cursor = nir_after_instr(&intr->instr); if (nir_intrinsic_base(intr) >= VC4_NIR_TLB_COLOR_READ_INPUT && nir_intrinsic_base(intr) < (VC4_NIR_TLB_COLOR_READ_INPUT + @@ -250,50 +262,42 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b, } assert(input_var); - /* All TGSI-to-NIR inputs are vec4. */ - assert(intr->num_components == 4); + int comp = nir_intrinsic_component(intr); - /* We only accept direct inputs and TGSI only ever gives them to us - * with an offset value of 0. - */ - assert(nir_src_as_const_value(intr->src[0]) && - nir_src_as_const_value(intr->src[0])->u32[0] == 0); + /* Lower away point coordinates, and fix up PNTC. */ + if (is_point_sprite(c, input_var) || + input_var->data.location == VARYING_SLOT_PNTC) { + assert(intr->num_components == 1); - /* Generate scalar loads equivalent to the original VEC4. */ - nir_ssa_def *dests[4]; - for (unsigned i = 0; i < intr->num_components; i++) { - nir_intrinsic_instr *intr_comp = - nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input); - intr_comp->num_components = 1; - nir_intrinsic_set_base(intr_comp, - nir_intrinsic_base(intr) * 4 + i); - intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_ssa_def *result = &intr->dest.ssa; - nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL); - nir_builder_instr_insert(b, &intr_comp->instr); + switch (comp) { + case 0: + case 1: + /* If we're not rendering points, we need to set a + * defined value for the input that would come from + * PNTC. + */ + if (!c->fs_key->is_points) + result = nir_imm_float(b, 0.0); + break; + case 2: + result = nir_imm_float(b, 0.0); + break; + case 3: + result = nir_imm_float(b, 1.0); + break; + } - dests[i] = &intr_comp->dest.ssa; - } + if (c->fs_key->point_coord_upper_left && comp == 1) + result = nir_fsub(b, nir_imm_float(b, 1.0), result); - if (input_var->data.location >= VARYING_SLOT_VAR0) { - if (c->fs_key->point_sprite_mask & - (1 << (input_var->data.location - - VARYING_SLOT_VAR0))) { - if (!c->fs_key->is_points) { - dests[0] = nir_imm_float(b, 0.0); - dests[1] = nir_imm_float(b, 0.0); - } - if (c->fs_key->point_coord_upper_left) { - dests[1] = nir_fsub(b, - nir_imm_float(b, 1.0), - dests[1]); - } - dests[2] = nir_imm_float(b, 0.0); - dests[3] = nir_imm_float(b, 1.0); + if (result != &intr->dest.ssa) { + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, + nir_src_for_ssa(result), + result->parent_instr); } } - - replace_intrinsic_with_vec4(b, intr, dests); } static void diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 8c9298116f4..4d1d6237a0c 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1611,7 +1611,8 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) assert(instr->num_components == 1); const_offset = nir_src_as_const_value(instr->src[0]); assert(const_offset && "vc4 doesn't support indirect inputs"); - if (nir_intrinsic_base(instr) >= VC4_NIR_TLB_COLOR_READ_INPUT) { + if (c->stage == QSTAGE_FRAG && + nir_intrinsic_base(instr) >= VC4_NIR_TLB_COLOR_READ_INPUT) { assert(const_offset->u32[0] == 0); /* Reads of the per-sample color need to be done in * order. @@ -1626,6 +1627,11 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) } ntq_store_dest(c, &instr->dest, 0, c->color_reads[sample_index]); + } else if (c->stage == QSTAGE_FRAG) { + offset = nir_intrinsic_base(instr) + const_offset->u32[0]; + int comp = nir_intrinsic_component(instr); + ntq_store_dest(c, &instr->dest, 0, + c->inputs[offset * 4 + comp]); } else { offset = nir_intrinsic_base(instr) + const_offset->u32[0]; ntq_store_dest(c, &instr->dest, 0, @@ -2061,10 +2067,17 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, if (c->vs_key && c->vs_key->clamp_color) NIR_PASS_V(c->s, nir_lower_clamp_color_outputs); - if (stage == QSTAGE_FRAG) + if (stage == QSTAGE_FRAG) { NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables); - else + } else { NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables); + } + + /* FS input scalarizing must happen after nir_lower_two_sided_color, + * which only handles a vec4 at a time. + */ + if (c->stage == QSTAGE_FRAG) + NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in); NIR_PASS_V(c->s, vc4_nir_lower_io, c); NIR_PASS_V(c->s, vc4_nir_lower_txf_ms, c); @@ -2168,6 +2181,7 @@ vc4_shader_state_create(struct pipe_context *pctx, NIR_PASS_V(s, nir_opt_global_to_local); NIR_PASS_V(s, nir_convert_to_ssa); NIR_PASS_V(s, nir_normalize_cubemap_coords); + NIR_PASS_V(s, nir_lower_load_const_to_scalar); vc4_optimize_nir(s); -- 2.30.2