Revert "i965/fs: Merge CMP and SEL into CSEL on Gen8+"
authorJason Ekstrand <jason@jlekstrand.net>
Fri, 1 Nov 2019 19:22:35 +0000 (14:22 -0500)
committerJason Ekstrand <jason@jlekstrand.net>
Wed, 20 Nov 2019 20:47:32 +0000 (20:47 +0000)
This reverts commit 52c7df1643ec9af119fd66f916f7fbdbcc798d2d.  The pass,
while clearly useful for some shaders, has at least three bugs that I
was able to find fairly quickly:

 1. It doesn't work for type-converting MOVs because f > 0 is not the
    same as f2i(f) > 0

 2. CSEL is a 3src instruction and only supports one source type; it
    doesn't take this into account and tries to create instructions
    which do a F compare and a D select.  This is especially nasty to
    debug because you don't see that in the dumped assembly because we
    don't properly assert that types are the same in codegen.

 3. While you can handle 2, in theory, by reinterpreting types, you
    can't do that in the presence of source modifiers.  This pass
    doesn't even attempt to detect that.

Those are just the ones I found with the one almost trival shader I was
debugging.  There very likely may be more and.  Best thing to do for now
is just shut it off until someone has the time to figure out how to do
this properly and write tests to ensure it's correct.

Fixes: 3cb085e6d61a "i965/fs: Merge CMP and SEL into CSEL on Gen8+"
Reviewed-by: Brian Paul <brianp@vmware.com>
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs.h

index 0d3d6137058f66dc4fb5e4a23e154f9a07dd40f0..52a54fecdaaf0aa57ed50d0f0064acc21cd4b2f4 100644 (file)
@@ -3142,107 +3142,6 @@ mask_relative_to(const fs_reg &r, const fs_reg &s, unsigned ds)
    return ((1 << n) - 1) << shift;
 }
 
-bool
-fs_visitor::opt_peephole_csel()
-{
-   if (devinfo->gen < 8)
-      return false;
-
-   bool progress = false;
-
-   foreach_block_reverse(block, cfg) {
-      int ip = block->end_ip + 1;
-
-      foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
-         ip--;
-
-         if (inst->opcode != BRW_OPCODE_SEL ||
-             inst->predicate != BRW_PREDICATE_NORMAL ||
-             (inst->dst.type != BRW_REGISTER_TYPE_F &&
-              inst->dst.type != BRW_REGISTER_TYPE_D &&
-              inst->dst.type != BRW_REGISTER_TYPE_UD))
-            continue;
-
-         /* Because it is a 3-src instruction, CSEL cannot have an immediate
-          * value as a source, but we can sometimes handle zero.
-          */
-         if ((inst->src[0].file != VGRF && inst->src[0].file != ATTR &&
-              inst->src[0].file != UNIFORM) ||
-             (inst->src[1].file != VGRF && inst->src[1].file != ATTR &&
-              inst->src[1].file != UNIFORM && !inst->src[1].is_zero()))
-            continue;
-
-         foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
-            if (!scan_inst->flags_written())
-               continue;
-
-            if ((scan_inst->opcode != BRW_OPCODE_CMP &&
-                 scan_inst->opcode != BRW_OPCODE_MOV) ||
-                scan_inst->predicate != BRW_PREDICATE_NONE ||
-                (scan_inst->src[0].file != VGRF &&
-                 scan_inst->src[0].file != ATTR &&
-                 scan_inst->src[0].file != UNIFORM) ||
-                scan_inst->src[0].type != BRW_REGISTER_TYPE_F)
-               break;
-
-            if (scan_inst->opcode == BRW_OPCODE_CMP && !scan_inst->src[1].is_zero())
-               break;
-
-            const brw::fs_builder ibld(this, block, inst);
-
-            const enum brw_conditional_mod cond =
-               inst->predicate_inverse
-               ? brw_negate_cmod(scan_inst->conditional_mod)
-               : scan_inst->conditional_mod;
-
-            fs_inst *csel_inst = NULL;
-
-            if (inst->src[1].file != IMM) {
-               csel_inst = ibld.CSEL(inst->dst,
-                                     inst->src[0],
-                                     inst->src[1],
-                                     scan_inst->src[0],
-                                     cond);
-            } else if (cond == BRW_CONDITIONAL_NZ) {
-               /* Consider the sequence
-                *
-                * cmp.nz.f0  null<1>F   g3<8,8,1>F   0F
-                * (+f0) sel  g124<1>UD  g2<8,8,1>UD  0x00000000UD
-                *
-                * The sel will pick the immediate value 0 if r0 is ±0.0.
-                * Therefore, this sequence is equivalent:
-                *
-                * cmp.nz.f0  null<1>F   g3<8,8,1>F   0F
-                * (+f0) sel  g124<1>F   g2<8,8,1>F   (abs)g3<8,8,1>F
-                *
-                * The abs is ensures that the result is 0UD when g3 is -0.0F.
-                * By normal cmp-sel merging, this is also equivalent:
-                *
-                * csel.nz    g124<1>F   g2<4,4,1>F   (abs)g3<4,4,1>F  g3<4,4,1>F
-                */
-               csel_inst = ibld.CSEL(inst->dst,
-                                     inst->src[0],
-                                     scan_inst->src[0],
-                                     scan_inst->src[0],
-                                     cond);
-
-               csel_inst->src[1].abs = true;
-            }
-
-            if (csel_inst != NULL) {
-               progress = true;
-               csel_inst->saturate = inst->saturate;
-               inst->remove(block);
-            }
-
-            break;
-         }
-      }
-   }
-
-   return progress;
-}
-
 bool
 fs_visitor::compute_to_mrf()
 {
@@ -7396,12 +7295,6 @@ fs_visitor::optimize()
       OPT(compact_virtual_grfs);
    } while (progress);
 
-   /* Do this after cmod propagation has had every possible opportunity to
-    * propagate results into SEL instructions.
-    */
-   if (OPT(opt_peephole_csel))
-      OPT(dead_code_eliminate);
-
    progress = false;
    pass_num = 0;
 
index fd5a1a9071ad4c8f31b11e64d1a5e11ac8339840..680bdc535ac1900b527eb1f36da8df6864911208 100644 (file)
@@ -190,7 +190,6 @@ public:
                    fs_reg result, fs_reg *op, unsigned fsign_src);
    void emit_shader_float_controls_execution_mode();
    bool opt_peephole_sel();
-   bool opt_peephole_csel();
    bool opt_peephole_predicated_break();
    bool opt_saturate_propagation();
    bool opt_cmod_propagation();