freedreno/ir3: Initialize the unused dwords of the immediates consts.

[mesa.git] / src / freedreno / ir3 / ir3_cp.c
diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c

index fa25c9ca8a1248e3fd4e4ef8f64ed5bc8a1ada3a..cb5f88630b1bcef3f68d3fd608a92db7b97295cb 100644 (file)
--- a/src/freedreno/ir3/ir3_cp.c
+++ b/src/freedreno/ir3/ir3_cp.c
@@ -85,10 +85,10 @@ static bool is_eligible_mov(struct ir3_instruction *instr,
                  * We could possibly do a bit better, and copy-propagation if
                  * we can CP all components that are being fanned out.
                  */
-               if (src_instr->opc == OPC_META_FO) {
+               if (src_instr->opc == OPC_META_SPLIT) {
                         if (!dst_instr)
                                 return false;
-                       if (dst_instr->opc == OPC_META_FI)
+                       if (dst_instr->opc == OPC_META_COLLECT)
                                 return false;
                         if (dst_instr->cp.left || dst_instr->cp.right)
                                 return false;
@@ -229,6 +229,9 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n,
                         if (instr->opc == OPC_STLW && n == 0)
                                 return false;
  
+                       if (instr->opc == OPC_LDLW && n == 0)
+                               return false;
+
                         /* disallow CP into anything but the SSBO slot argument for
                          * atomics:
                          */
@@ -238,10 +241,13 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n,
                         if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
                                 return false;
  
-                       /* as with atomics, ldib on a6xx can only have immediate for
-                        * SSBO slot argument
+                       if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2))
+                               return false;
+
+                       /* as with atomics, ldib and ldc on a6xx can only have immediate
+                        * for SSBO slot argument
                          */
-                       if ((instr->opc == OPC_LDIB) && (n != 0))
+                       if ((instr->opc == OPC_LDIB || instr->opc == OPC_LDC) && (n != 0))
                                 return false;
                 }
  
@@ -302,6 +308,12 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags
  
         reg = ir3_reg_clone(ctx->shader, reg);
  
+       /* Half constant registers seems to handle only 32-bit values
+        * within floating-point opcodes. So convert back to 32-bit values.
+        */
+       if (f_opcode && (new_flags & IR3_REG_HALF))
+               reg->uim_val = fui(_mesa_half_to_float(reg->uim_val));
+
         /* in some cases, there are restrictions on (abs)/(neg) plus const..
          * so just evaluate those and clear the flags:
          */
@@ -331,6 +343,9 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags
                 const_state->immediates_size += 4;
                 const_state->immediates = realloc (const_state->immediates,
                         const_state->immediates_size * sizeof(const_state->immediates[0]));
+
+               for (int i = const_state->immediate_idx; i < const_state->immediates_size * 4; i++)
+                       const_state->immediates[i / 4].val[i % 4] = 0xd0d0d0d0;
         }
  
         for (i = 0; i < const_state->immediate_idx; i++) {
@@ -347,12 +362,6 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags
                 swiz = i % 4;
                 idx  = i / 4;
  
-               /* Half constant registers seems to handle only 32-bit values
-                * within floating-point opcodes. So convert back to 32-bit values. */
-               if (f_opcode && (new_flags & IR3_REG_HALF)) {
-                       reg->uim_val = fui(_mesa_half_to_float(reg->uim_val));
-               }
-
                 const_state->immediates[idx].val[swiz] = reg->uim_val;
                 const_state->immediates_count = idx + 1;
                 const_state->immediate_idx++;
@@ -460,7 +469,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
  
                         return true;
                 }
-       } else if (is_same_type_mov(src) &&
+       } else if ((is_same_type_mov(src) || is_const_mov(src)) &&
                         /* cannot collapse const/immed/etc into meta instrs: */
                         !is_meta(instr)) {
                 /* immed/const/etc cases, which require some special handling: */
@@ -472,8 +481,8 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
                 if (!valid_flags(instr, n, new_flags)) {
                         /* See if lowering an immediate to const would help. */
                         if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
-                               bool f_opcode = (ir3_cat2_float(instr->opc) ||
-                                               ir3_cat3_float(instr->opc)) ? true : false;
+                               bool f_opcode = (is_cat2_float(instr->opc) ||
+                                               is_cat3_float(instr->opc)) ? true : false;
  
                                 debug_assert(new_flags & IR3_REG_IMMED);
  
@@ -520,6 +529,17 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
                                         (src_reg->array.offset == 0))
                                 return false;
  
+                       /* When narrowing constant from 32b to 16b, it seems
+                        * to work only for float. So we should do this only with
+                        * float opcodes.
+                        */
+                       if (src->cat1.dst_type == TYPE_F16) {
+                               if (instr->opc == OPC_MOV && !type_float(instr->cat1.src_type))
+                                       return false;
+                               if (!is_cat2_float(instr->opc) && !is_cat3_float(instr->opc))
+                                       return false;
+                       }
+
                         src_reg = ir3_reg_clone(instr->block->shader, src_reg);
                         src_reg->flags = new_flags;
                         instr->regs[n+1] = src_reg;
@@ -577,8 +597,8 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
  
                                 return true;
                         } else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
-                               bool f_opcode = (ir3_cat2_float(instr->opc) ||
-                                               ir3_cat3_float(instr->opc)) ? true : false;
+                               bool f_opcode = (is_cat2_float(instr->opc) ||
+                                               is_cat3_float(instr->opc)) ? true : false;
  
                                 /* See if lowering an immediate to const would help. */
                                 instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags, f_opcode);
@@ -629,7 +649,7 @@ instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
         bool progress;
         do {
                 progress = false;
-               foreach_src_n(reg, n, instr) {
+               foreach_src_n (reg, n, instr) {
                         struct ir3_instruction *src = ssa(reg);
  
                         if (!src)
@@ -696,19 +716,21 @@ instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
                 }
         }
  
-       /* Handle converting a sam.s2en (taking samp/tex idx params via
-        * register) into a normal sam (encoding immediate samp/tex idx)
-        * if they are immediate.  This saves some instructions and regs
-        * in the common case where we know samp/tex at compile time:
+       /* Handle converting a sam.s2en (taking samp/tex idx params via register)
+        * into a normal sam (encoding immediate samp/tex idx) if they are
+        * immediate. This saves some instructions and regs in the common case
+        * where we know samp/tex at compile time. This needs to be done in the
+        * frontend for bindless tex, though, so don't replicate it here.
          */
         if (is_tex(instr) && (instr->flags & IR3_INSTR_S2EN) &&
+                       !(instr->flags & IR3_INSTR_B) &&
                         !(ir3_shader_debug & IR3_DBG_FORCES2EN)) {
-               /* The first src will be a fan-in (collect), if both of it's
+               /* The first src will be a collect, if both of it's
                  * two sources are mov from imm, then we can
                  */
                 struct ir3_instruction *samp_tex = ssa(instr->regs[1]);
  
-               debug_assert(samp_tex->opc == OPC_META_FI);
+               debug_assert(samp_tex->opc == OPC_META_COLLECT);
  
                 struct ir3_instruction *samp = ssa(samp_tex->regs[1]);
                 struct ir3_instruction *tex  = ssa(samp_tex->regs[2]);
@@ -720,7 +742,12 @@ instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
                         instr->flags &= ~IR3_INSTR_S2EN;
                         instr->cat5.samp = samp->regs[1]->iim_val;
                         instr->cat5.tex  = tex->regs[1]->iim_val;
-                       instr->regs[1]->instr = NULL;
+
+                       /* shuffle around the regs to remove the first src: */
+                       instr->regs_count--;
+                       for (unsigned i = 1; i < instr->regs_count; i++) {
+                               instr->regs[i] = instr->regs[i + 1];
+                       }
                 }
         }
  }
@@ -739,8 +766,8 @@ ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so)
          * a mov, so we need to do a pass to first count consumers of a
          * mov.
          */
-       list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
-               list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+       foreach_block (block, &ir->block_list) {
+               foreach_instr (instr, &block->instr_list) {
                         struct ir3_instruction *src;
  
                         /* by the way, we don't account for false-dep's, so the CP
@@ -748,7 +775,7 @@ ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so)
                          */
                         debug_assert(instr->deps_count == 0);
  
-                       foreach_ssa_src(src, instr) {
+                       foreach_ssa_src (src, instr) {
                                 src->use_count++;
                         }
                 }
@@ -756,14 +783,13 @@ ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so)
  
         ir3_clear_mark(ir);
  
-       for (unsigned i = 0; i < ir->noutputs; i++) {
-               if (ir->outputs[i]) {
-                       instr_cp(&ctx, ir->outputs[i]);
-                       ir->outputs[i] = eliminate_output_mov(ir->outputs[i]);
-               }
+       struct ir3_instruction *out;
+       foreach_output_n (out, n, ir) {
+               instr_cp(&ctx, out);
+               ir->outputs[n] = eliminate_output_mov(out);
         }
  
-       list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+       foreach_block (block, &ir->block_list) {
                 if (block->condition) {
                         instr_cp(&ctx, block->condition);
                         block->condition = eliminate_output_mov(block->condition);