freedreno/ir3: Add new ir3 pass to fold out fp16 conversions
authorHyunjun Ko <zzoon@igalia.com>
Fri, 11 Oct 2019 08:38:36 +0000 (08:38 +0000)
committerMarge Bot <eric+marge@anholt.net>
Mon, 24 Feb 2020 17:24:13 +0000 (17:24 +0000)
This pass tries to fold f2f16 conversion into alu instructions.
This will be useful to help reduce the number of instructions once
mesa starts supporting precision lowering.  For example:

  add.f r0.w, r0.w, c0.x
  cov.f32f16 hr2.x, r0.w

to

  add.f hr2.x, r0.w, c0.x

Additionally this pass also tries to fold f2f16 conversion into load_input
instruction:

  bary.f r0.x, 3, r0.w
  cov.f32f16 hr0.x, r0.x

to

  bary.f hr1.x, 3, r0.x

v2: Edit to not fold OPC_MAX_F and OPC_MIN_F, since that's not valid.

v3: Add OPC_ABSNEG_F to the blacklist as well.

v4: Don't remove dead cov instructions, DCE will do that later; don't
iterate through sources when a cov only has one; remove special
handling of IR3_REG_ARRAY and IR3_REG_RELATIV.

v5: Handle folding into u32.u32 movs of floats correctly, don't bail
out on IR3_REG_RELATIV or IR3_REG_ARRAY movs.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3822>

src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_cf.c [new file with mode: 0644]
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/meson.build

index c682c6e0db02bf843ad23e0f2dfe6ea8afbfeb97..fc82932ba22e5da86737a1d83ea5830dfb657754 100644 (file)
@@ -1149,6 +1149,9 @@ struct ir3_shader_variant;
 void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
 void ir3_depth(struct ir3 *ir, struct ir3_shader_variant *so);
 
+/* fp16 conversion folding */
+void ir3_cf(struct ir3 *ir);
+
 /* copy-propagate: */
 void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so);
 
diff --git a/src/freedreno/ir3/ir3_cf.c b/src/freedreno/ir3/ir3_cf.c
new file mode 100644 (file)
index 0000000..4e29265
--- /dev/null
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2019 Google.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir3.h"
+
+static bool
+is_fp16_conv(struct ir3_instruction *instr)
+{
+       if (instr->opc == OPC_MOV &&
+                       instr->cat1.src_type == TYPE_F32 &&
+                       instr->cat1.dst_type == TYPE_F16)
+               return true;
+
+       return false;
+}
+
+static bool
+all_uses_fp16_conv(struct ir3 *ir, struct ir3_instruction *conv_src)
+{
+       foreach_block (block, &ir->block_list) {
+               foreach_instr (instr, &block->instr_list) {
+                       struct ir3_instruction *src;
+                       foreach_ssa_src (src, instr) {
+                               if (src == conv_src && !is_fp16_conv(instr))
+                                       return false;
+                       }
+               }
+       }
+
+       return true;
+}
+
+static void
+rewrite_uses(struct ir3 *ir, struct ir3_instruction *conv,
+                        struct ir3_instruction *replace)
+{
+       foreach_block (block, &ir->block_list) {
+               foreach_instr (instr, &block->instr_list) {
+                       struct ir3_instruction *src;
+                       foreach_ssa_src_n (src, n, instr) {
+                               if (src == conv)
+                                       instr->regs[n]->instr = replace;
+                       }
+               }
+       }
+}
+
+static void
+try_conversion_folding(struct ir3 *ir, struct ir3_instruction *conv)
+{
+       struct ir3_instruction *src;
+
+       if (!is_fp16_conv(conv))
+               return;
+
+       src = ssa(conv->regs[1]);
+       if (!is_alu(src))
+               return;
+
+       switch (src->opc) {
+       case OPC_SEL_B32:
+       case OPC_MAX_F:
+       case OPC_MIN_F:
+       case OPC_ABSNEG_F:
+               return;
+       default:
+               break;
+       }
+
+       if (!all_uses_fp16_conv(ir, src))
+               return;
+
+       if (src->opc == OPC_MOV) {
+               if (src->cat1.dst_type == src->cat1.src_type) {
+                       /* If we're folding a conversion into a bitwise move, we need to
+                        * change the dst type to F32 to get the right behavior, since we
+                        * could be moving a float with a u32.u32 move.
+                        */
+                       src->cat1.dst_type = TYPE_F16;
+                       src->cat1.src_type = TYPE_F32;
+               } else {
+                       /* Otherwise, for typechanging movs, we can just change the dst
+                        * type to F16 to collaps the two conversions.  For example
+                        * cov.s32f32 follwed by cov.f32f16 becomes cov.s32f16.
+                        */
+                       src->cat1.dst_type = TYPE_F16;
+               }
+       }
+
+       src->regs[0]->flags |= IR3_REG_HALF;
+
+       rewrite_uses(ir, conv, src);
+}
+
+void
+ir3_cf(struct ir3 *ir)
+{
+       foreach_block_safe (block, &ir->block_list) {
+               foreach_instr_safe (instr, &block->instr_list) {
+                       try_conversion_folding(ir, instr);
+               }
+       }
+}
index 747b66ac58aeb14f60e0969bc1a8daa1b78f6c53..00f914227dc5a091668372c2c83a71ad50c1089c 100644 (file)
@@ -3345,6 +3345,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
        if (so->binning_pass && (ctx->compiler->gpu_id < 600))
                fixup_binning_pass(ctx);
 
+       ir3_debug_print(ir, "BEFORE CF");
+
+       ir3_cf(ir);
+
        ir3_debug_print(ir, "BEFORE CP");
 
        ir3_cp(ir, so);
index 89ec273034e75809cab66f127d0ed1049f646d3c..1e64fcda1f9e7dc08a780572e534cd06907d1326 100644 (file)
@@ -53,6 +53,7 @@ libfreedreno_ir3_files = files(
   'ir3_compiler.h',
   'ir3_context.c',
   'ir3_context.h',
+  'ir3_cf.c',
   'ir3_cp.c',
   'ir3_delay.c',
   'ir3_depth.c',