broadcom/vc5: Fix shader input/outputs for gallium's new NIR linking.
[mesa.git] / src / broadcom / compiler / nir_to_vir.c
index acfeba4cb715edd4041cdf155ed0492c720eb099..6ec5db58a261cff2319315256213014ada99364d 100644 (file)
@@ -380,9 +380,11 @@ ntq_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                         p0_unpacked.bias_supplied = true;
                         break;
                 case nir_tex_src_lod:
-                        /* XXX: Needs base level addition */
                         coords[next_coord++] =
-                                ntq_get_src(c, instr->src[i].src, 0);
+                                vir_FADD(c,
+                                         ntq_get_src(c, instr->src[i].src, 0),
+                                         vir_uniform(c, QUNIFORM_TEXTURE_FIRST_LEVEL,
+                                                     unit));
 
                         if (instr->op != nir_texop_txf &&
                             instr->op != nir_texop_tg4) {
@@ -477,14 +479,35 @@ ntq_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                         STATIC_ASSERT(PIPE_SWIZZLE_X == 0);
                         chan = return_values[i / 2];
 
-                        enum v3d_qpu_input_unpack unpack;
-                        if (i & 1)
-                                unpack = V3D_QPU_UNPACK_H;
-                        else
-                                unpack = V3D_QPU_UNPACK_L;
+                        if (nir_alu_type_get_base_type(instr->dest_type) ==
+                            nir_type_float) {
+                                enum v3d_qpu_input_unpack unpack;
+                                if (i & 1)
+                                        unpack = V3D_QPU_UNPACK_H;
+                                else
+                                        unpack = V3D_QPU_UNPACK_L;
 
-                        chan = vir_FMOV(c, chan);
-                        vir_set_unpack(c->defs[chan.index], 0, unpack);
+                                chan = vir_FMOV(c, chan);
+                                vir_set_unpack(c->defs[chan.index], 0, unpack);
+                        } else {
+                                /* If we're unpacking the low field, shift it
+                                 * up to the top first.
+                                 */
+                                if ((i & 1) == 0) {
+                                        chan = vir_SHL(c, chan,
+                                                       vir_uniform_ui(c, 16));
+                                }
+
+                                /* Do proper sign extension to a 32-bit int. */
+                                if (nir_alu_type_get_base_type(instr->dest_type) ==
+                                    nir_type_int) {
+                                        chan = vir_ASR(c, chan,
+                                                       vir_uniform_ui(c, 16));
+                                } else {
+                                        chan = vir_SHR(c, chan,
+                                                       vir_uniform_ui(c, 16));
+                                }
+                        }
                 } else {
                         chan = vir_MOV(c, return_values[i]);
                 }
@@ -604,8 +627,9 @@ static void
 emit_fragment_input(struct v3d_compile *c, int attr, nir_variable *var)
 {
         for (int i = 0; i < glsl_get_vector_elements(var->type); i++) {
-                c->inputs[attr * 4 + i] =
-                        emit_fragment_varying(c, var, i);
+                int chan = var->data.location_frac + i;
+                c->inputs[attr * 4 + chan] =
+                        emit_fragment_varying(c, var, chan);
         }
 }
 
@@ -806,6 +830,14 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
         case nir_op_imov:
                 result = vir_MOV(c, src[0]);
                 break;
+
+        case nir_op_fneg:
+                result = vir_XOR(c, src[0], vir_uniform_ui(c, 1 << 31));
+                break;
+        case nir_op_ineg:
+                result = vir_NEG(c, src[0]);
+                break;
+
         case nir_op_fmul:
                 result = vir_FMUL(c, src[0], src[1]);
                 break;
@@ -1030,34 +1062,42 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
 static void
 emit_frag_end(struct v3d_compile *c)
 {
-        uint32_t discard_cond = V3D_QPU_COND_NONE;
-        if (c->s->info.fs.uses_discard) {
-                vir_PF(c, vir_MOV(c, c->discard), V3D_QPU_PF_PUSHZ);
-                discard_cond = V3D_QPU_COND_IFA;
-        }
-
         /* XXX
         if (c->output_sample_mask_index != -1) {
                 vir_MS_MASK(c, c->outputs[c->output_sample_mask_index]);
         }
         */
 
+        bool has_any_tlb_color_write = false;
+        for (int rt = 0; rt < c->fs_key->nr_cbufs; rt++) {
+                if (c->output_color_var[rt])
+                        has_any_tlb_color_write = true;
+        }
+
         if (c->output_position_index != -1) {
                 struct qinst *inst = vir_MOV_dest(c,
                                                   vir_reg(QFILE_TLBU, 0),
                                                   c->outputs[c->output_position_index]);
-                vir_set_cond(inst, discard_cond);
 
                 inst->src[vir_get_implicit_uniform_src(inst)] =
                         vir_uniform_ui(c,
                                        TLB_TYPE_DEPTH |
                                        TLB_DEPTH_TYPE_PER_PIXEL |
                                        0xffffff00);
-        } else if (c->s->info.fs.uses_discard) {
+        } else if (c->s->info.fs.uses_discard || !has_any_tlb_color_write) {
+                /* Emit passthrough Z if it needed to be delayed until shader
+                 * end due to potential discards.
+                 *
+                 * Since (single-threaded) fragment shaders always need a TLB
+                 * write, emit passthrouh Z if we didn't have any color
+                 * buffers and flag us as potentially discarding, so that we
+                 * can use Z as the TLB write.
+                 */
+                c->s->info.fs.uses_discard = true;
+
                 struct qinst *inst = vir_MOV_dest(c,
                                                   vir_reg(QFILE_TLBU, 0),
                                                   vir_reg(QFILE_NULL, 0));
-                vir_set_cond(inst, discard_cond);
 
                 inst->src[vir_get_implicit_uniform_src(inst)] =
                         vir_uniform_ui(c,
@@ -1070,34 +1110,34 @@ emit_frag_end(struct v3d_compile *c)
          * uniform setup
          */
 
-        if (c->output_color_var) {
-                nir_variable *var = c->output_color_var;
+        for (int rt = 0; rt < c->fs_key->nr_cbufs; rt++) {
+                if (!c->output_color_var[rt])
+                        continue;
+
+                nir_variable *var = c->output_color_var[rt];
                 struct qreg *color = &c->outputs[var->data.driver_location * 4];
                 int num_components = glsl_get_vector_elements(var->type);
-                uint32_t conf = ~0;
+                uint32_t conf = 0xffffff00;
                 struct qinst *inst;
 
+                conf |= TLB_SAMPLE_MODE_PER_PIXEL;
+                conf |= (7 - rt) << TLB_RENDER_TARGET_SHIFT;
+
                 assert(num_components != 0);
                 switch (glsl_get_base_type(var->type)) {
                 case GLSL_TYPE_UINT:
                 case GLSL_TYPE_INT:
-                        conf = (TLB_TYPE_I32_COLOR |
-                                TLB_SAMPLE_MODE_PER_PIXEL |
-                                ((7 - 0) << TLB_RENDER_TARGET_SHIFT) |
-                                ((num_components - 1) <<
-                                 TLB_VEC_SIZE_MINUS_1_SHIFT) |
-                                0xffffff00);
-
+                        conf |= TLB_TYPE_I32_COLOR;
+                        conf |= ((num_components - 1) <<
+                                 TLB_VEC_SIZE_MINUS_1_SHIFT);
 
                         inst = vir_MOV_dest(c, vir_reg(QFILE_TLBU, 0), color[0]);
-                        vir_set_cond(inst, discard_cond);
                         inst->src[vir_get_implicit_uniform_src(inst)] =
                                 vir_uniform_ui(c, conf);
 
                         for (int i = 1; i < num_components; i++) {
                                 inst = vir_MOV_dest(c, vir_reg(QFILE_TLB, 0),
                                                     color[i]);
-                                vir_set_cond(inst, discard_cond);
                         }
                         break;
 
@@ -1107,15 +1147,43 @@ emit_frag_end(struct v3d_compile *c)
                         struct qreg b = color[2];
                         struct qreg a = color[3];
 
-                        if (c->fs_key->swap_color_rb)  {
+                        if (c->fs_key->f32_color_rb) {
+                                conf |= TLB_TYPE_F32_COLOR;
+                                conf |= ((num_components - 1) <<
+                                         TLB_VEC_SIZE_MINUS_1_SHIFT);
+                        } else {
+                                conf |= TLB_TYPE_F16_COLOR;
+                                conf |= TLB_F16_SWAP_HI_LO;
+                                if (num_components >= 3)
+                                        conf |= TLB_VEC_SIZE_4_F16;
+                                else
+                                        conf |= TLB_VEC_SIZE_2_F16;
+                        }
+
+                        if (c->fs_key->swap_color_rb & (1 << rt))  {
                                 r = color[2];
                                 b = color[0];
                         }
 
-                        inst = vir_VFPACK_dest(c, vir_reg(QFILE_TLB, 0), r, g);
-                        vir_set_cond(inst, discard_cond);
-                        inst = vir_VFPACK_dest(c, vir_reg(QFILE_TLB, 0), b, a);
-                        vir_set_cond(inst, discard_cond);
+                        if (c->fs_key->f32_color_rb & (1 << rt)) {
+                                inst = vir_MOV_dest(c, vir_reg(QFILE_TLBU, 0), color[0]);
+                                inst->src[vir_get_implicit_uniform_src(inst)] =
+                                        vir_uniform_ui(c, conf);
+
+                                for (int i = 1; i < num_components; i++) {
+                                        inst = vir_MOV_dest(c, vir_reg(QFILE_TLB, 0),
+                                                            color[i]);
+                                }
+                        } else {
+                                inst = vir_VFPACK_dest(c, vir_reg(QFILE_TLB, 0), r, g);
+                                if (conf != ~0) {
+                                        inst->dst.file = QFILE_TLBU;
+                                        inst->src[vir_get_implicit_uniform_src(inst)] =
+                                                vir_uniform_ui(c, conf);
+                                }
+
+                                inst = vir_VFPACK_dest(c, vir_reg(QFILE_TLB, 0), b, a);
+                        }
                         break;
                 }
                 }
@@ -1341,7 +1409,7 @@ ntq_setup_inputs(struct v3d_compile *c)
         qsort(&vars, num_entries, sizeof(*vars), driver_location_compare);
 
         uint32_t vpm_components_queued = 0;
-        if (c->s->stage == MESA_SHADER_VERTEX) {
+        if (c->s->info.stage == MESA_SHADER_VERTEX) {
                 bool uses_iid = c->s->info.system_values_read &
                         (1ull << SYSTEM_VALUE_INSTANCE_ID);
                 bool uses_vid = c->s->info.system_values_read &
@@ -1371,7 +1439,7 @@ ntq_setup_inputs(struct v3d_compile *c)
                 resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
                                   (loc + 1) * 4);
 
-                if (c->s->stage == MESA_SHADER_FRAGMENT) {
+                if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
                         if (var->data.location == VARYING_SLOT_POS) {
                                 emit_fragcoord_input(c, loc);
                         } else if (var->data.location == VARYING_SLOT_PNTC ||
@@ -1399,7 +1467,7 @@ ntq_setup_inputs(struct v3d_compile *c)
                 }
         }
 
-        if (c->s->stage == MESA_SHADER_VERTEX) {
+        if (c->s->info.stage == MESA_SHADER_VERTEX) {
                 assert(vpm_components_queued == 0);
                 assert(num_components == 0);
         }
@@ -1415,14 +1483,26 @@ ntq_setup_outputs(struct v3d_compile *c)
                 assert(array_len == 1);
                 (void)array_len;
 
-                for (int i = 0; i < 4; i++)
-                        add_output(c, loc + i, var->data.location, i);
+                for (int i = 0; i < glsl_get_vector_elements(var->type); i++) {
+                        add_output(c, loc + var->data.location_frac + i,
+                                   var->data.location,
+                                   var->data.location_frac + i);
+                }
 
-                if (c->s->stage == MESA_SHADER_FRAGMENT) {
+                if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
                         switch (var->data.location) {
                         case FRAG_RESULT_COLOR:
+                                c->output_color_var[0] = var;
+                                c->output_color_var[1] = var;
+                                c->output_color_var[2] = var;
+                                c->output_color_var[3] = var;
+                                break;
                         case FRAG_RESULT_DATA0:
-                                c->output_color_var = var;
+                        case FRAG_RESULT_DATA1:
+                        case FRAG_RESULT_DATA2:
+                        case FRAG_RESULT_DATA3:
+                                c->output_color_var[var->data.location -
+                                                    FRAG_RESULT_DATA0] = var;
                                 break;
                         case FRAG_RESULT_DEPTH:
                                 c->output_position_index = loc;
@@ -1622,10 +1702,12 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
         case nir_intrinsic_discard:
                 if (c->execute.file != QFILE_NULL) {
                         vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
-                        vir_MOV_cond(c, V3D_QPU_COND_IFA, c->discard,
-                                     vir_uniform_ui(c, ~0));
+                        vir_set_cond(vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+                                                     vir_uniform_ui(c, 0)),
+                                V3D_QPU_COND_IFA);
                 } else {
-                        vir_MOV_dest(c, c->discard, vir_uniform_ui(c, ~0));
+                        vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+                                        vir_uniform_ui(c, 0));
                 }
                 break;
 
@@ -1640,9 +1722,14 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
                          */
                         vir_PF(c, vir_AND(c, c->execute, vir_NOT(c, cond)),
                                V3D_QPU_PF_PUSHZ);
-                        vir_MOV_cond(c, V3D_QPU_COND_IFA, c->discard, cond);
+                        vir_set_cond(vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+                                                     vir_uniform_ui(c, 0)),
+                                     V3D_QPU_COND_IFA);
                 } else {
-                        vir_OR_dest(c, c->discard, c->discard, cond);
+                        vir_PF(c, cond, V3D_QPU_PF_PUSHZ);
+                        vir_set_cond(vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+                                                     vir_uniform_ui(c, 0)),
+                                     V3D_QPU_COND_IFNA);
                 }
 
                 break;
@@ -1905,14 +1992,11 @@ ntq_emit_impl(struct v3d_compile *c, nir_function_impl *impl)
 static void
 nir_to_vir(struct v3d_compile *c)
 {
-        if (c->s->stage == MESA_SHADER_FRAGMENT) {
+        if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
                 c->payload_w = vir_MOV(c, vir_reg(QFILE_REG, 0));
                 c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1));
                 c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2));
 
-                if (c->s->info.fs.uses_discard)
-                        c->discard = vir_MOV(c, vir_uniform_ui(c, 0));
-
                 if (c->fs_key->is_points) {
                         c->point_x = emit_fragment_varying(c, NULL, 0);
                         c->point_y = emit_fragment_varying(c, NULL, 0);
@@ -1939,12 +2023,18 @@ const nir_shader_compiler_options v3d_nir_options = {
         .lower_extract_word = true,
         .lower_bitfield_insert = true,
         .lower_bitfield_extract = true,
+        .lower_pack_unorm_2x16 = true,
+        .lower_pack_snorm_2x16 = true,
+        .lower_pack_unorm_4x8 = true,
+        .lower_pack_snorm_4x8 = true,
+        .lower_unpack_unorm_4x8 = true,
+        .lower_unpack_snorm_4x8 = true,
+        .lower_fdiv = true,
         .lower_ffma = true,
         .lower_flrp32 = true,
         .lower_fpow = true,
         .lower_fsat = true,
         .lower_fsqrt = true,
-        .lower_negate = true,
         .native_integers = true,
 };
 
@@ -1970,7 +2060,7 @@ void
 v3d_nir_to_vir(struct v3d_compile *c)
 {
         if (V3D_DEBUG & (V3D_DEBUG_NIR |
-                         v3d_debug_flag_for_shader_stage(c->s->stage))) {
+                         v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
                 fprintf(stderr, "%s prog %d/%d NIR:\n",
                         vir_get_stage_name(c),
                         c->program_id, c->variant_id);
@@ -1979,7 +2069,7 @@ v3d_nir_to_vir(struct v3d_compile *c)
 
         nir_to_vir(c);
 
-        switch (c->s->stage) {
+        switch (c->s->info.stage) {
         case MESA_SHADER_FRAGMENT:
                 emit_frag_end(c);
                 break;
@@ -1991,7 +2081,7 @@ v3d_nir_to_vir(struct v3d_compile *c)
         }
 
         if (V3D_DEBUG & (V3D_DEBUG_VIR |
-                         v3d_debug_flag_for_shader_stage(c->s->stage))) {
+                         v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
                 fprintf(stderr, "%s prog %d/%d pre-opt VIR:\n",
                         vir_get_stage_name(c),
                         c->program_id, c->variant_id);
@@ -2005,7 +2095,7 @@ v3d_nir_to_vir(struct v3d_compile *c)
         /* XXX: vir_schedule_instructions(c); */
 
         if (V3D_DEBUG & (V3D_DEBUG_VIR |
-                         v3d_debug_flag_for_shader_stage(c->s->stage))) {
+                         v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
                 fprintf(stderr, "%s prog %d/%d VIR:\n",
                         vir_get_stage_name(c),
                         c->program_id, c->variant_id);