vc4: Add missing scheduling dependency for MS color writes.
[mesa.git] / src / gallium / drivers / vc4 / vc4_program.c
index 3e402d048baf8016662151085ecd8c7dd54c4578..d99862ad5ba843eef7cdb08b51731a17e59eda15 100644 (file)
 #include "util/ralloc.h"
 #include "util/hash_table.h"
 #include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_lowering.h"
 #include "tgsi/tgsi_parse.h"
-#include "glsl/nir/nir.h"
-#include "glsl/nir/nir_builder.h"
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
 #include "nir/tgsi_to_nir.h"
 #include "vc4_context.h"
 #include "vc4_qpu.h"
@@ -118,7 +117,7 @@ nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
         intr->const_index[0] = (VC4_NIR_STATE_UNIFORM_OFFSET + contents) * 4;
         intr->num_components = 1;
         intr->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
-        nir_ssa_dest_init(&intr->instr, &intr->dest, 1, NULL);
+        nir_ssa_dest_init(&intr->instr, &intr->dest, 1, 32, NULL);
         nir_builder_instr_insert(b, &intr->instr);
         return &intr->dest.ssa;
 }
@@ -296,7 +295,7 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
         uint32_t tile_size = (tile_height * tile_width *
                               VC4_MAX_SAMPLES * sizeof(uint32_t));
 
-        unsigned unit = instr->sampler_index;
+        unsigned unit = instr->texture_index;
         uint32_t w = align(c->key->tex[unit].msaa_width, tile_width);
         uint32_t w_tiles = w / tile_width;
         uint32_t h = align(c->key->tex[unit].msaa_height, tile_height);
@@ -339,7 +338,7 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
 {
         struct qreg s, t, r, lod, proj, compare;
         bool is_txb = false, is_txl = false, has_proj = false;
-        unsigned unit = instr->sampler_index;
+        unsigned unit = instr->texture_index;
 
         if (instr->op == nir_texop_txf) {
                 ntq_emit_txf(c, instr);
@@ -885,7 +884,9 @@ ntq_emit_comparison(struct vc4_compile *c, struct qreg *dest,
         struct qreg src0 = ntq_get_alu_src(c, compare_instr, 0);
         struct qreg src1 = ntq_get_alu_src(c, compare_instr, 1);
 
-        if (nir_op_infos[compare_instr->op].input_types[0] == nir_type_float)
+        unsigned unsized_type =
+                nir_alu_type_get_base_type(nir_op_infos[compare_instr->op].input_types[0]);
+        if (unsized_type == nir_type_float)
                 qir_SF(c, qir_FSUB(c, src0, src1));
         else
                 qir_SF(c, qir_SUB(c, src0, src1));
@@ -1184,8 +1185,11 @@ emit_frag_end(struct vc4_compile *c)
                 color = qir_uniform_ui(c, 0);
         }
 
-        if (c->discard.file != QFILE_NULL)
-                qir_TLB_DISCARD_SETUP(c, c->discard);
+        uint32_t discard_cond = QPU_COND_ALWAYS;
+        if (c->discard.file != QFILE_NULL) {
+                qir_SF(c, c->discard);
+                discard_cond = QPU_COND_ZS;
+        }
 
         if (c->fs_key->stencil_enabled) {
                 qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0));
@@ -1209,14 +1213,18 @@ emit_frag_end(struct vc4_compile *c)
                 } else {
                         z = qir_FRAG_Z(c);
                 }
-                qir_TLB_Z_WRITE(c, z);
+                struct qinst *inst = qir_TLB_Z_WRITE(c, z);
+                inst->cond = discard_cond;
         }
 
         if (!c->msaa_per_sample_output) {
-                qir_TLB_COLOR_WRITE(c, color);
+                struct qinst *inst = qir_TLB_COLOR_WRITE(c, color);
+                inst->cond = discard_cond;
         } else {
-                for (int i = 0; i < VC4_MAX_SAMPLES; i++)
-                        qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
+                for (int i = 0; i < VC4_MAX_SAMPLES; i++) {
+                        struct qinst *inst = qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
+                        inst->cond = discard_cond;
+                }
         }
 }
 
@@ -1362,16 +1370,16 @@ vc4_optimize_nir(struct nir_shader *s)
         do {
                 progress = false;
 
-                nir_lower_vars_to_ssa(s);
-                nir_lower_alu_to_scalar(s);
+                NIR_PASS_V(s, nir_lower_vars_to_ssa);
+                NIR_PASS_V(s, nir_lower_alu_to_scalar);
 
-                progress = nir_copy_prop(s) || progress;
-                progress = nir_opt_dce(s) || progress;
-                progress = nir_opt_cse(s) || progress;
-                progress = nir_opt_peephole_select(s) || progress;
-                progress = nir_opt_algebraic(s) || progress;
-                progress = nir_opt_constant_folding(s) || progress;
-                progress = nir_opt_undef(s) || progress;
+                NIR_PASS(progress, s, nir_copy_prop);
+                NIR_PASS(progress, s, nir_opt_dce);
+                NIR_PASS(progress, s, nir_opt_cse);
+                NIR_PASS(progress, s, nir_opt_peephole_select);
+                NIR_PASS(progress, s, nir_opt_algebraic);
+                NIR_PASS(progress, s, nir_opt_constant_folding);
+                NIR_PASS(progress, s, nir_opt_undef);
         } while (progress);
 }
 
@@ -1512,7 +1520,7 @@ ntq_emit_load_const(struct vc4_compile *c, nir_load_const_instr *instr)
 {
         struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
         for (int i = 0; i < instr->def.num_components; i++)
-                qregs[i] = qir_uniform_ui(c, instr->value.u[i]);
+                qregs[i] = qir_uniform_ui(c, instr->value.u32[i]);
 
         _mesa_hash_table_insert(c->def_ht, &instr->def, qregs);
 }
@@ -1546,7 +1554,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
                 assert(instr->num_components == 1);
                 const_offset = nir_src_as_const_value(instr->src[0]);
                 if (const_offset) {
-                        offset = instr->const_index[0] + const_offset->u[0];
+                        offset = instr->const_index[0] + const_offset->u32[0];
                         assert(offset % 4 == 0);
                         /* We need dwords */
                         offset = offset / 4;
@@ -1564,8 +1572,10 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
                 break;
 
         case nir_intrinsic_load_user_clip_plane:
-                *dest = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
-                                    instr->const_index[0]);
+                for (int i = 0; i < instr->num_components; i++) {
+                        dest[i] = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
+                                              instr->const_index[0] * 4 + i);
+                }
                 break;
 
         case nir_intrinsic_load_sample_mask_in:
@@ -1577,7 +1587,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
                 const_offset = nir_src_as_const_value(instr->src[0]);
                 assert(const_offset && "vc4 doesn't support indirect inputs");
                 if (instr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT) {
-                        assert(const_offset->u[0] == 0);
+                        assert(const_offset->u32[0] == 0);
                         /* Reads of the per-sample color need to be done in
                          * order.
                          */
@@ -1591,7 +1601,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
                         }
                         *dest = c->color_reads[sample_index];
                 } else {
-                        offset = instr->const_index[0] + const_offset->u[0];
+                        offset = instr->const_index[0] + const_offset->u32[0];
                         *dest = c->inputs[offset];
                 }
                 break;
@@ -1599,7 +1609,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
         case nir_intrinsic_store_output:
                 const_offset = nir_src_as_const_value(instr->src[1]);
                 assert(const_offset && "vc4 doesn't support indirect outputs");
-                offset = instr->const_index[0] + const_offset->u[0];
+                offset = instr->const_index[0] + const_offset->u32[0];
 
                 /* MSAA color outputs are the only case where we have an
                  * output that's not lowered to being a store of a single 32
@@ -1685,12 +1695,27 @@ ntq_emit_block(struct vc4_compile *c, nir_block *block)
         }
 }
 
+static void ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list);
+
+static void
+ntq_emit_loop(struct vc4_compile *c, nir_loop *nloop)
+{
+        fprintf(stderr, "LOOPS not fully handled. Rendering errors likely.\n");
+        ntq_emit_cf_list(c, &nloop->body);
+}
+
+static void
+ntq_emit_function(struct vc4_compile *c, nir_function_impl *func)
+{
+        fprintf(stderr, "FUNCTIONS not handled.\n");
+        abort();
+}
+
 static void
 ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list)
 {
         foreach_list_typed(nir_cf_node, node, node, list) {
                 switch (node->type) {
-                        /* case nir_cf_node_loop: */
                 case nir_cf_node_block:
                         ntq_emit_block(c, nir_cf_node_as_block(node));
                         break;
@@ -1699,8 +1724,17 @@ ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list)
                         ntq_emit_if(c, nir_cf_node_as_if(node));
                         break;
 
+                case nir_cf_node_loop:
+                        ntq_emit_loop(c, nir_cf_node_as_loop(node));
+                        break;
+
+                case nir_cf_node_function:
+                        ntq_emit_function(c, nir_cf_node_as_function(node));
+                        break;
+
                 default:
-                        assert(0);
+                        fprintf(stderr, "Unknown NIR node type\n");
+                        abort();
                 }
         }
 }
@@ -1729,6 +1763,8 @@ nir_to_qir(struct vc4_compile *c)
 }
 
 static const nir_shader_compiler_options nir_options = {
+        .lower_extract_byte = true,
+        .lower_extract_word = true,
         .lower_ffma = true,
         .lower_flrp = true,
         .lower_fpow = true,
@@ -1799,11 +1835,11 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
         }
 
         c->s = tgsi_to_nir(tokens, &nir_options);
-        nir_opt_global_to_local(c->s);
-        nir_convert_to_ssa(c->s);
+        NIR_PASS_V(c->s, nir_opt_global_to_local);
+        NIR_PASS_V(c->s, nir_convert_to_ssa);
 
         if (stage == QSTAGE_FRAG)
-                vc4_nir_lower_blend(c);
+                NIR_PASS_V(c->s, vc4_nir_lower_blend, c);
 
         struct nir_lower_tex_options tex_options = {
                 /* We would need to implement txs, but we don't want the
@@ -1853,26 +1889,25 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
                 }
         }
 
-        nir_lower_tex(c->s, &tex_options);
+        NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
 
         if (c->fs_key && c->fs_key->light_twoside)
-                nir_lower_two_sided_color(c->s);
+                NIR_PASS_V(c->s, nir_lower_two_sided_color);
 
         if (stage == QSTAGE_FRAG)
-                nir_lower_clip_fs(c->s, c->key->ucp_enables);
+                NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables);
         else
-                nir_lower_clip_vs(c->s, c->key->ucp_enables);
+                NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables);
 
-        vc4_nir_lower_io(c);
-        vc4_nir_lower_txf_ms(c);
-        nir_lower_idiv(c->s);
-        nir_lower_load_const_to_scalar(c->s);
+        NIR_PASS_V(c->s, vc4_nir_lower_io, c);
+        NIR_PASS_V(c->s, vc4_nir_lower_txf_ms, c);
+        NIR_PASS_V(c->s, nir_lower_idiv);
+        NIR_PASS_V(c->s, nir_lower_load_const_to_scalar);
 
         vc4_optimize_nir(c->s);
 
-        nir_remove_dead_variables(c->s);
-
-        nir_convert_from_ssa(c->s, true);
+        NIR_PASS_V(c->s, nir_remove_dead_variables);
+        NIR_PASS_V(c->s, nir_convert_from_ssa, true);
 
         if (vc4_debug & VC4_DEBUG_SHADERDB) {
                 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d NIR instructions\n",