freedreno/ir3: convert back to 32-bit values for half constant registers.

author Hyunjun Ko <zzoon@igalia.com>

Thu, 21 Mar 2019 08:30:11 +0000 (17:30 +0900)

committer Rob Clark <robdclark@chromium.org>

Mon, 3 Jun 2019 19:44:03 +0000 (12:44 -0700)
author Hyunjun Ko <zzoon@igalia.com>
Thu, 21 Mar 2019 08:30:11 +0000 (17:30 +0900)
committer Rob Clark <robdclark@chromium.org>
Mon, 3 Jun 2019 19:44:03 +0000 (12:44 -0700)
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h

index 05245cd54bb370d0cbf8d51283d3fdbbe07bd92d..4d1a44b1deced126bfccc192521ab6ccb2f350d8 100644 (file)
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -877,6 +877,41 @@ static inline bool ir3_cat2_int(opc_t opc)
         }
  }
  
+static inline bool ir3_cat2_float(opc_t opc)
+{
+       switch (opc) {
+       case OPC_ADD_F:
+       case OPC_MIN_F:
+       case OPC_MAX_F:
+       case OPC_MUL_F:
+       case OPC_SIGN_F:
+       case OPC_CMPS_F:
+       case OPC_ABSNEG_F:
+       case OPC_CMPV_F:
+       case OPC_FLOOR_F:
+       case OPC_CEIL_F:
+       case OPC_RNDNE_F:
+       case OPC_RNDAZ_F:
+       case OPC_TRUNC_F:
+               return true;
+
+       default:
+               return false;
+       }
+}
+
+static inline bool ir3_cat3_float(opc_t opc)
+{
+       switch (opc) {
+       case OPC_MAD_F16:
+       case OPC_MAD_F32:
+       case OPC_SEL_F16:
+       case OPC_SEL_F32:
+               return true;
+       default:
+               return false;
+       }
+}
  
  /* map cat2 instruction to valid abs/neg flags: */
  static inline unsigned ir3_cat2_absneg(opc_t opc)
diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c

index dedbd8dbb1d3c6845399e1c51f38fd4d3c71f55a..9bd97b690e34fa095441f17009370ec0b7024e12 100644 (file)
--- a/src/freedreno/ir3/ir3_cp.c
+++ b/src/freedreno/ir3/ir3_cp.c
@@ -25,6 +25,8 @@
   */
  
  #include <math.h>
+#include "util/half_float.h"
+#include "util/u_math.h"
  
  #include "ir3.h"
  #include "ir3_compiler.h"
@@ -268,7 +270,7 @@ static void combine_flags(unsigned *dstflags, struct ir3_instruction *src)
  }
  
  static struct ir3_register *
-lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags)
+lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags, bool f_opcode)
  {
         unsigned swiz, idx, i;
  
@@ -318,6 +320,13 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags
                 /* need to generate a new immediate: */
                 swiz = i % 4;
                 idx  = i / 4;
+
+               /* Half constant registers seems to handle only 32-bit values
+                * within floating-point opcodes. So convert back to 32-bit values. */
+               if (f_opcode && (new_flags & IR3_REG_HALF)) {
+                       reg->uim_val = fui(_mesa_half_to_float(reg->uim_val));
+               }
+
                 const_state->immediates[idx].val[swiz] = reg->uim_val;
                 const_state->immediates_count = idx + 1;
                 const_state->immediate_idx++;
@@ -398,8 +407,12 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
                 if (!valid_flags(instr, n, new_flags)) {
                         /* See if lowering an immediate to const would help. */
                         if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
+                               bool f_opcode = (ir3_cat2_float(instr->opc) ||
+                                               ir3_cat3_float(instr->opc)) ? true : false;
+
                                 debug_assert(new_flags & IR3_REG_IMMED);
-                               instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags);
+
+                               instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags, f_opcode);
                                 return;
                         }
  
@@ -504,10 +517,12 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
                                 src_reg->iim_val = iim_val;
                                 instr->regs[n+1] = src_reg;
                         } else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
+                               bool f_opcode = (ir3_cat2_float(instr->opc) ||
+                                               ir3_cat3_float(instr->opc)) ? true : false;
+
                                 /* See if lowering an immediate to const would help. */
-                               instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags);
+                               instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags, f_opcode);
                         }
-
                         return;
                 }
         }
author	Hyunjun Ko <zzoon@igalia.com>
	Thu, 21 Mar 2019 08:30:11 +0000 (17:30 +0900)
committer	Rob Clark <robdclark@chromium.org>
	Mon, 3 Jun 2019 19:44:03 +0000 (12:44 -0700)
src/freedreno/ir3/ir3.h		patch \| blob \| history
src/freedreno/ir3/ir3_cp.c		patch \| blob \| history