broadcom/vc5: Fix shader input/outputs for gallium's new NIR linking.
[mesa.git] / src / broadcom / compiler / nir_to_vir.c
index 675cad2c32f47b4b11418c74666b45f6b3e3a6b7..6ec5db58a261cff2319315256213014ada99364d 100644 (file)
@@ -380,9 +380,11 @@ ntq_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                         p0_unpacked.bias_supplied = true;
                         break;
                 case nir_tex_src_lod:
-                        /* XXX: Needs base level addition */
                         coords[next_coord++] =
-                                ntq_get_src(c, instr->src[i].src, 0);
+                                vir_FADD(c,
+                                         ntq_get_src(c, instr->src[i].src, 0),
+                                         vir_uniform(c, QUNIFORM_TEXTURE_FIRST_LEVEL,
+                                                     unit));
 
                         if (instr->op != nir_texop_txf &&
                             instr->op != nir_texop_tg4) {
@@ -477,14 +479,35 @@ ntq_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                         STATIC_ASSERT(PIPE_SWIZZLE_X == 0);
                         chan = return_values[i / 2];
 
-                        enum v3d_qpu_input_unpack unpack;
-                        if (i & 1)
-                                unpack = V3D_QPU_UNPACK_H;
-                        else
-                                unpack = V3D_QPU_UNPACK_L;
+                        if (nir_alu_type_get_base_type(instr->dest_type) ==
+                            nir_type_float) {
+                                enum v3d_qpu_input_unpack unpack;
+                                if (i & 1)
+                                        unpack = V3D_QPU_UNPACK_H;
+                                else
+                                        unpack = V3D_QPU_UNPACK_L;
+
+                                chan = vir_FMOV(c, chan);
+                                vir_set_unpack(c->defs[chan.index], 0, unpack);
+                        } else {
+                                /* If we're unpacking the low field, shift it
+                                 * up to the top first.
+                                 */
+                                if ((i & 1) == 0) {
+                                        chan = vir_SHL(c, chan,
+                                                       vir_uniform_ui(c, 16));
+                                }
 
-                        chan = vir_FMOV(c, chan);
-                        vir_set_unpack(c->defs[chan.index], 0, unpack);
+                                /* Do proper sign extension to a 32-bit int. */
+                                if (nir_alu_type_get_base_type(instr->dest_type) ==
+                                    nir_type_int) {
+                                        chan = vir_ASR(c, chan,
+                                                       vir_uniform_ui(c, 16));
+                                } else {
+                                        chan = vir_SHR(c, chan,
+                                                       vir_uniform_ui(c, 16));
+                                }
+                        }
                 } else {
                         chan = vir_MOV(c, return_values[i]);
                 }
@@ -604,8 +627,9 @@ static void
 emit_fragment_input(struct v3d_compile *c, int attr, nir_variable *var)
 {
         for (int i = 0; i < glsl_get_vector_elements(var->type); i++) {
-                c->inputs[attr * 4 + i] =
-                        emit_fragment_varying(c, var, i);
+                int chan = var->data.location_frac + i;
+                c->inputs[attr * 4 + chan] =
+                        emit_fragment_varying(c, var, chan);
         }
 }
 
@@ -806,6 +830,14 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
         case nir_op_imov:
                 result = vir_MOV(c, src[0]);
                 break;
+
+        case nir_op_fneg:
+                result = vir_XOR(c, src[0], vir_uniform_ui(c, 1 << 31));
+                break;
+        case nir_op_ineg:
+                result = vir_NEG(c, src[0]);
+                break;
+
         case nir_op_fmul:
                 result = vir_FMUL(c, src[0], src[1]);
                 break;
@@ -1030,34 +1062,42 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
 static void
 emit_frag_end(struct v3d_compile *c)
 {
-        uint32_t discard_cond = V3D_QPU_COND_NONE;
-        if (c->s->info.fs.uses_discard) {
-                vir_PF(c, vir_MOV(c, c->discard), V3D_QPU_PF_PUSHZ);
-                discard_cond = V3D_QPU_COND_IFA;
-        }
-
         /* XXX
         if (c->output_sample_mask_index != -1) {
                 vir_MS_MASK(c, c->outputs[c->output_sample_mask_index]);
         }
         */
 
+        bool has_any_tlb_color_write = false;
+        for (int rt = 0; rt < c->fs_key->nr_cbufs; rt++) {
+                if (c->output_color_var[rt])
+                        has_any_tlb_color_write = true;
+        }
+
         if (c->output_position_index != -1) {
                 struct qinst *inst = vir_MOV_dest(c,
                                                   vir_reg(QFILE_TLBU, 0),
                                                   c->outputs[c->output_position_index]);
-                vir_set_cond(inst, discard_cond);
 
                 inst->src[vir_get_implicit_uniform_src(inst)] =
                         vir_uniform_ui(c,
                                        TLB_TYPE_DEPTH |
                                        TLB_DEPTH_TYPE_PER_PIXEL |
                                        0xffffff00);
-        } else if (c->s->info.fs.uses_discard) {
+        } else if (c->s->info.fs.uses_discard || !has_any_tlb_color_write) {
+                /* Emit passthrough Z if it needed to be delayed until shader
+                 * end due to potential discards.
+                 *
+                 * Since (single-threaded) fragment shaders always need a TLB
+                 * write, emit passthrouh Z if we didn't have any color
+                 * buffers and flag us as potentially discarding, so that we
+                 * can use Z as the TLB write.
+                 */
+                c->s->info.fs.uses_discard = true;
+
                 struct qinst *inst = vir_MOV_dest(c,
                                                   vir_reg(QFILE_TLBU, 0),
                                                   vir_reg(QFILE_NULL, 0));
-                vir_set_cond(inst, discard_cond);
 
                 inst->src[vir_get_implicit_uniform_src(inst)] =
                         vir_uniform_ui(c,
@@ -1092,14 +1132,12 @@ emit_frag_end(struct v3d_compile *c)
                                  TLB_VEC_SIZE_MINUS_1_SHIFT);
 
                         inst = vir_MOV_dest(c, vir_reg(QFILE_TLBU, 0), color[0]);
-                        vir_set_cond(inst, discard_cond);
                         inst->src[vir_get_implicit_uniform_src(inst)] =
                                 vir_uniform_ui(c, conf);
 
                         for (int i = 1; i < num_components; i++) {
                                 inst = vir_MOV_dest(c, vir_reg(QFILE_TLB, 0),
                                                     color[i]);
-                                vir_set_cond(inst, discard_cond);
                         }
                         break;
 
@@ -1129,14 +1167,12 @@ emit_frag_end(struct v3d_compile *c)
 
                         if (c->fs_key->f32_color_rb & (1 << rt)) {
                                 inst = vir_MOV_dest(c, vir_reg(QFILE_TLBU, 0), color[0]);
-                                vir_set_cond(inst, discard_cond);
                                 inst->src[vir_get_implicit_uniform_src(inst)] =
                                         vir_uniform_ui(c, conf);
 
                                 for (int i = 1; i < num_components; i++) {
                                         inst = vir_MOV_dest(c, vir_reg(QFILE_TLB, 0),
                                                             color[i]);
-                                        vir_set_cond(inst, discard_cond);
                                 }
                         } else {
                                 inst = vir_VFPACK_dest(c, vir_reg(QFILE_TLB, 0), r, g);
@@ -1145,10 +1181,8 @@ emit_frag_end(struct v3d_compile *c)
                                         inst->src[vir_get_implicit_uniform_src(inst)] =
                                                 vir_uniform_ui(c, conf);
                                 }
-                                vir_set_cond(inst, discard_cond);
 
                                 inst = vir_VFPACK_dest(c, vir_reg(QFILE_TLB, 0), b, a);
-                                vir_set_cond(inst, discard_cond);
                         }
                         break;
                 }
@@ -1375,7 +1409,7 @@ ntq_setup_inputs(struct v3d_compile *c)
         qsort(&vars, num_entries, sizeof(*vars), driver_location_compare);
 
         uint32_t vpm_components_queued = 0;
-        if (c->s->stage == MESA_SHADER_VERTEX) {
+        if (c->s->info.stage == MESA_SHADER_VERTEX) {
                 bool uses_iid = c->s->info.system_values_read &
                         (1ull << SYSTEM_VALUE_INSTANCE_ID);
                 bool uses_vid = c->s->info.system_values_read &
@@ -1405,7 +1439,7 @@ ntq_setup_inputs(struct v3d_compile *c)
                 resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
                                   (loc + 1) * 4);
 
-                if (c->s->stage == MESA_SHADER_FRAGMENT) {
+                if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
                         if (var->data.location == VARYING_SLOT_POS) {
                                 emit_fragcoord_input(c, loc);
                         } else if (var->data.location == VARYING_SLOT_PNTC ||
@@ -1433,7 +1467,7 @@ ntq_setup_inputs(struct v3d_compile *c)
                 }
         }
 
-        if (c->s->stage == MESA_SHADER_VERTEX) {
+        if (c->s->info.stage == MESA_SHADER_VERTEX) {
                 assert(vpm_components_queued == 0);
                 assert(num_components == 0);
         }
@@ -1449,10 +1483,13 @@ ntq_setup_outputs(struct v3d_compile *c)
                 assert(array_len == 1);
                 (void)array_len;
 
-                for (int i = 0; i < 4; i++)
-                        add_output(c, loc + i, var->data.location, i);
+                for (int i = 0; i < glsl_get_vector_elements(var->type); i++) {
+                        add_output(c, loc + var->data.location_frac + i,
+                                   var->data.location,
+                                   var->data.location_frac + i);
+                }
 
-                if (c->s->stage == MESA_SHADER_FRAGMENT) {
+                if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
                         switch (var->data.location) {
                         case FRAG_RESULT_COLOR:
                                 c->output_color_var[0] = var;
@@ -1665,10 +1702,12 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
         case nir_intrinsic_discard:
                 if (c->execute.file != QFILE_NULL) {
                         vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
-                        vir_MOV_cond(c, V3D_QPU_COND_IFA, c->discard,
-                                     vir_uniform_ui(c, ~0));
+                        vir_set_cond(vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+                                                     vir_uniform_ui(c, 0)),
+                                V3D_QPU_COND_IFA);
                 } else {
-                        vir_MOV_dest(c, c->discard, vir_uniform_ui(c, ~0));
+                        vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+                                        vir_uniform_ui(c, 0));
                 }
                 break;
 
@@ -1683,9 +1722,14 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
                          */
                         vir_PF(c, vir_AND(c, c->execute, vir_NOT(c, cond)),
                                V3D_QPU_PF_PUSHZ);
-                        vir_MOV_cond(c, V3D_QPU_COND_IFA, c->discard, cond);
+                        vir_set_cond(vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+                                                     vir_uniform_ui(c, 0)),
+                                     V3D_QPU_COND_IFA);
                 } else {
-                        vir_OR_dest(c, c->discard, c->discard, cond);
+                        vir_PF(c, cond, V3D_QPU_PF_PUSHZ);
+                        vir_set_cond(vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+                                                     vir_uniform_ui(c, 0)),
+                                     V3D_QPU_COND_IFNA);
                 }
 
                 break;
@@ -1948,14 +1992,11 @@ ntq_emit_impl(struct v3d_compile *c, nir_function_impl *impl)
 static void
 nir_to_vir(struct v3d_compile *c)
 {
-        if (c->s->stage == MESA_SHADER_FRAGMENT) {
+        if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
                 c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0));
                 c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1));
                 c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2));
 
-                if (c->s->info.fs.uses_discard)
-                        c->discard = vir_MOV(c, vir_uniform_ui(c, 0));
-
                 if (c->fs_key->is_points) {
                         c->point_x = emit_fragment_varying(c, NULL, 0);
                         c->point_y = emit_fragment_varying(c, NULL, 0);
@@ -1982,12 +2023,18 @@ const nir_shader_compiler_options v3d_nir_options = {
         .lower_extract_word = true,
         .lower_bitfield_insert = true,
         .lower_bitfield_extract = true,
+        .lower_pack_unorm_2x16 = true,
+        .lower_pack_snorm_2x16 = true,
+        .lower_pack_unorm_4x8 = true,
+        .lower_pack_snorm_4x8 = true,
+        .lower_unpack_unorm_4x8 = true,
+        .lower_unpack_snorm_4x8 = true,
+        .lower_fdiv = true,
         .lower_ffma = true,
         .lower_flrp32 = true,
         .lower_fpow = true,
         .lower_fsat = true,
         .lower_fsqrt = true,
-        .lower_negate = true,
         .native_integers = true,
 };
 
@@ -2013,7 +2060,7 @@ void
 v3d_nir_to_vir(struct v3d_compile *c)
 {
         if (V3D_DEBUG & (V3D_DEBUG_NIR |
-                         v3d_debug_flag_for_shader_stage(c->s->stage))) {
+                         v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
                 fprintf(stderr, "%s prog %d/%d NIR:\n",
                         vir_get_stage_name(c),
                         c->program_id, c->variant_id);
@@ -2022,7 +2069,7 @@ v3d_nir_to_vir(struct v3d_compile *c)
 
         nir_to_vir(c);
 
-        switch (c->s->stage) {
+        switch (c->s->info.stage) {
         case MESA_SHADER_FRAGMENT:
                 emit_frag_end(c);
                 break;
@@ -2034,7 +2081,7 @@ v3d_nir_to_vir(struct v3d_compile *c)
         }
 
         if (V3D_DEBUG & (V3D_DEBUG_VIR |
-                         v3d_debug_flag_for_shader_stage(c->s->stage))) {
+                         v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
                 fprintf(stderr, "%s prog %d/%d pre-opt VIR:\n",
                         vir_get_stage_name(c),
                         c->program_id, c->variant_id);
@@ -2048,7 +2095,7 @@ v3d_nir_to_vir(struct v3d_compile *c)
         /* XXX: vir_schedule_instructions(c); */
 
         if (V3D_DEBUG & (V3D_DEBUG_VIR |
-                         v3d_debug_flag_for_shader_stage(c->s->stage))) {
+                         v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
                 fprintf(stderr, "%s prog %d/%d VIR:\n",
                         vir_get_stage_name(c),
                         c->program_id, c->variant_id);