vc4: Add missing scheduling dependency for MS color writes.

[mesa.git] / src / gallium / drivers / vc4 / vc4_program.c
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c

index 3e402d048baf8016662151085ecd8c7dd54c4578..d99862ad5ba843eef7cdb08b51731a17e59eda15 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -30,10 +30,9 @@
  #include "util/ralloc.h"
  #include "util/hash_table.h"
  #include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_lowering.h"
  #include "tgsi/tgsi_parse.h"
-#include "glsl/nir/nir.h"
-#include "glsl/nir/nir_builder.h"
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
  #include "nir/tgsi_to_nir.h"
  #include "vc4_context.h"
  #include "vc4_qpu.h"
@@ -118,7 +117,7 @@ nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
          intr->const_index[0] = (VC4_NIR_STATE_UNIFORM_OFFSET + contents) * 4;
          intr->num_components = 1;
          intr->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
-        nir_ssa_dest_init(&intr->instr, &intr->dest, 1, NULL);
+        nir_ssa_dest_init(&intr->instr, &intr->dest, 1, 32, NULL);
          nir_builder_instr_insert(b, &intr->instr);
          return &intr->dest.ssa;
  }
@@ -296,7 +295,7 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
          uint32_t tile_size = (tile_height * tile_width *
                                VC4_MAX_SAMPLES * sizeof(uint32_t));
  
-        unsigned unit = instr->sampler_index;
+        unsigned unit = instr->texture_index;
          uint32_t w = align(c->key->tex[unit].msaa_width, tile_width);
          uint32_t w_tiles = w / tile_width;
          uint32_t h = align(c->key->tex[unit].msaa_height, tile_height);
@@ -339,7 +338,7 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
  {
          struct qreg s, t, r, lod, proj, compare;
          bool is_txb = false, is_txl = false, has_proj = false;
-        unsigned unit = instr->sampler_index;
+        unsigned unit = instr->texture_index;
  
          if (instr->op == nir_texop_txf) {
                  ntq_emit_txf(c, instr);
@@ -885,7 +884,9 @@ ntq_emit_comparison(struct vc4_compile *c, struct qreg *dest,
          struct qreg src0 = ntq_get_alu_src(c, compare_instr, 0);
          struct qreg src1 = ntq_get_alu_src(c, compare_instr, 1);
  
-        if (nir_op_infos[compare_instr->op].input_types[0] == nir_type_float)
+        unsigned unsized_type =
+                nir_alu_type_get_base_type(nir_op_infos[compare_instr->op].input_types[0]);
+        if (unsized_type == nir_type_float)
                  qir_SF(c, qir_FSUB(c, src0, src1));
          else
                  qir_SF(c, qir_SUB(c, src0, src1));
@@ -1184,8 +1185,11 @@ emit_frag_end(struct vc4_compile *c)
                  color = qir_uniform_ui(c, 0);
          }
  
-        if (c->discard.file != QFILE_NULL)
-                qir_TLB_DISCARD_SETUP(c, c->discard);
+        uint32_t discard_cond = QPU_COND_ALWAYS;
+        if (c->discard.file != QFILE_NULL) {
+                qir_SF(c, c->discard);
+                discard_cond = QPU_COND_ZS;
+        }
  
          if (c->fs_key->stencil_enabled) {
                  qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0));
@@ -1209,14 +1213,18 @@ emit_frag_end(struct vc4_compile *c)
                  } else {
                          z = qir_FRAG_Z(c);
                  }
-                qir_TLB_Z_WRITE(c, z);
+                struct qinst *inst = qir_TLB_Z_WRITE(c, z);
+                inst->cond = discard_cond;
          }
  
          if (!c->msaa_per_sample_output) {
-                qir_TLB_COLOR_WRITE(c, color);
+                struct qinst *inst = qir_TLB_COLOR_WRITE(c, color);
+                inst->cond = discard_cond;
          } else {
-                for (int i = 0; i < VC4_MAX_SAMPLES; i++)
-                        qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
+                for (int i = 0; i < VC4_MAX_SAMPLES; i++) {
+                        struct qinst *inst = qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
+                        inst->cond = discard_cond;
+                }
          }
  }
  
@@ -1362,16 +1370,16 @@ vc4_optimize_nir(struct nir_shader *s)
          do {
                  progress = false;
  
-                nir_lower_vars_to_ssa(s);
-                nir_lower_alu_to_scalar(s);
+                NIR_PASS_V(s, nir_lower_vars_to_ssa);
+                NIR_PASS_V(s, nir_lower_alu_to_scalar);
  
-                progress = nir_copy_prop(s) || progress;
-                progress = nir_opt_dce(s) || progress;
-                progress = nir_opt_cse(s) || progress;
-                progress = nir_opt_peephole_select(s) || progress;
-                progress = nir_opt_algebraic(s) || progress;
-                progress = nir_opt_constant_folding(s) || progress;
-                progress = nir_opt_undef(s) || progress;
+                NIR_PASS(progress, s, nir_copy_prop);
+                NIR_PASS(progress, s, nir_opt_dce);
+                NIR_PASS(progress, s, nir_opt_cse);
+                NIR_PASS(progress, s, nir_opt_peephole_select);
+                NIR_PASS(progress, s, nir_opt_algebraic);
+                NIR_PASS(progress, s, nir_opt_constant_folding);
+                NIR_PASS(progress, s, nir_opt_undef);
          } while (progress);
  }
  
@@ -1512,7 +1520,7 @@ ntq_emit_load_const(struct vc4_compile *c, nir_load_const_instr *instr)
  {
          struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
          for (int i = 0; i < instr->def.num_components; i++)
-                qregs[i] = qir_uniform_ui(c, instr->value.u[i]);
+                qregs[i] = qir_uniform_ui(c, instr->value.u32[i]);
  
          _mesa_hash_table_insert(c->def_ht, &instr->def, qregs);
  }
@@ -1546,7 +1554,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
                  assert(instr->num_components == 1);
                  const_offset = nir_src_as_const_value(instr->src[0]);
                  if (const_offset) {
-                        offset = instr->const_index[0] + const_offset->u[0];
+                        offset = instr->const_index[0] + const_offset->u32[0];
                          assert(offset % 4 == 0);
                          /* We need dwords */
                          offset = offset / 4;
@@ -1564,8 +1572,10 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
                  break;
  
          case nir_intrinsic_load_user_clip_plane:
-                *dest = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
-                                    instr->const_index[0]);
+                for (int i = 0; i < instr->num_components; i++) {
+                        dest[i] = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
+                                              instr->const_index[0] * 4 + i);
+                }
                  break;
  
          case nir_intrinsic_load_sample_mask_in:
@@ -1577,7 +1587,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
                  const_offset = nir_src_as_const_value(instr->src[0]);
                  assert(const_offset && "vc4 doesn't support indirect inputs");
                  if (instr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT) {
-                        assert(const_offset->u[0] == 0);
+                        assert(const_offset->u32[0] == 0);
                          /* Reads of the per-sample color need to be done in
                           * order.
                           */
@@ -1591,7 +1601,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
                          }
                          *dest = c->color_reads[sample_index];
                  } else {
-                        offset = instr->const_index[0] + const_offset->u[0];
+                        offset = instr->const_index[0] + const_offset->u32[0];
                          *dest = c->inputs[offset];
                  }
                  break;
@@ -1599,7 +1609,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
          case nir_intrinsic_store_output:
                  const_offset = nir_src_as_const_value(instr->src[1]);
                  assert(const_offset && "vc4 doesn't support indirect outputs");
-                offset = instr->const_index[0] + const_offset->u[0];
+                offset = instr->const_index[0] + const_offset->u32[0];
  
                  /* MSAA color outputs are the only case where we have an
                   * output that's not lowered to being a store of a single 32
@@ -1685,12 +1695,27 @@ ntq_emit_block(struct vc4_compile *c, nir_block *block)
          }
  }
  
+static void ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list);
+
+static void
+ntq_emit_loop(struct vc4_compile *c, nir_loop *nloop)
+{
+        fprintf(stderr, "LOOPS not fully handled. Rendering errors likely.\n");
+        ntq_emit_cf_list(c, &nloop->body);
+}
+
+static void
+ntq_emit_function(struct vc4_compile *c, nir_function_impl *func)
+{
+        fprintf(stderr, "FUNCTIONS not handled.\n");
+        abort();
+}
+
  static void
  ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list)
  {
          foreach_list_typed(nir_cf_node, node, node, list) {
                  switch (node->type) {
-                        /* case nir_cf_node_loop: */
                  case nir_cf_node_block:
                          ntq_emit_block(c, nir_cf_node_as_block(node));
                          break;
@@ -1699,8 +1724,17 @@ ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list)
                          ntq_emit_if(c, nir_cf_node_as_if(node));
                          break;
  
+                case nir_cf_node_loop:
+                        ntq_emit_loop(c, nir_cf_node_as_loop(node));
+                        break;
+
+                case nir_cf_node_function:
+                        ntq_emit_function(c, nir_cf_node_as_function(node));
+                        break;
+
                  default:
-                        assert(0);
+                        fprintf(stderr, "Unknown NIR node type\n");
+                        abort();
                  }
          }
  }
@@ -1729,6 +1763,8 @@ nir_to_qir(struct vc4_compile *c)
  }
  
  static const nir_shader_compiler_options nir_options = {
+        .lower_extract_byte = true,
+        .lower_extract_word = true,
          .lower_ffma = true,
          .lower_flrp = true,
          .lower_fpow = true,
@@ -1799,11 +1835,11 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
          }
  
          c->s = tgsi_to_nir(tokens, &nir_options);
-        nir_opt_global_to_local(c->s);
-        nir_convert_to_ssa(c->s);
+        NIR_PASS_V(c->s, nir_opt_global_to_local);
+        NIR_PASS_V(c->s, nir_convert_to_ssa);
  
          if (stage == QSTAGE_FRAG)
-                vc4_nir_lower_blend(c);
+                NIR_PASS_V(c->s, vc4_nir_lower_blend, c);
  
          struct nir_lower_tex_options tex_options = {
                  /* We would need to implement txs, but we don't want the
@@ -1853,26 +1889,25 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
                  }
          }
  
-        nir_lower_tex(c->s, &tex_options);
+        NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
  
          if (c->fs_key && c->fs_key->light_twoside)
-                nir_lower_two_sided_color(c->s);
+                NIR_PASS_V(c->s, nir_lower_two_sided_color);
  
          if (stage == QSTAGE_FRAG)
-                nir_lower_clip_fs(c->s, c->key->ucp_enables);
+                NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables);
          else
-                nir_lower_clip_vs(c->s, c->key->ucp_enables);
+                NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables);
  
-        vc4_nir_lower_io(c);
-        vc4_nir_lower_txf_ms(c);
-        nir_lower_idiv(c->s);
-        nir_lower_load_const_to_scalar(c->s);
+        NIR_PASS_V(c->s, vc4_nir_lower_io, c);
+        NIR_PASS_V(c->s, vc4_nir_lower_txf_ms, c);
+        NIR_PASS_V(c->s, nir_lower_idiv);
+        NIR_PASS_V(c->s, nir_lower_load_const_to_scalar);
  
          vc4_optimize_nir(c->s);
  
-        nir_remove_dead_variables(c->s);
-
-        nir_convert_from_ssa(c->s, true);
+        NIR_PASS_V(c->s, nir_remove_dead_variables);
+        NIR_PASS_V(c->s, nir_convert_from_ssa, true);
  
          if (vc4_debug & VC4_DEBUG_SHADERDB) {
                  fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d NIR instructions\n",