nvc0: detect no-op MIN/MAX, do CSE earlier to succeed more often

author Christoph Bumiller <e0425955@student.tuwien.ac.at>

Tue, 1 Feb 2011 19:52:49 +0000 (20:52 +0100)

committer Christoph Bumiller <e0425955@student.tuwien.ac.at>

Wed, 9 Feb 2011 15:04:59 +0000 (16:04 +0100)
author Christoph Bumiller <e0425955@student.tuwien.ac.at>
Tue, 1 Feb 2011 19:52:49 +0000 (20:52 +0100)
committer Christoph Bumiller <e0425955@student.tuwien.ac.at>
Wed, 9 Feb 2011 15:04:59 +0000 (16:04 +0100)
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c

index e0d4e2daf9b2739cddfea64b07b98ae62251c4cd..b6d99724a10c7eb4f37f4e74d746039a0b6d1897 100644 (file)
--- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
@@ -607,25 +607,83 @@ constant_operand(struct nv_pc *pc,
     }
  }
  
+static void
+handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi)
+{
+   struct nv_value *src0 = nvi->src[0]->value;
+   struct nv_value *src1 = nvi->src[1]->value;
+
+   if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod))
+      return;
+   if (src0->reg.file != NV_FILE_GPR)
+      return;
+   nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0);
+   nvc0_insn_delete(nvi);
+}
+
+/* check if we can MUL + ADD -> MAD/FMA */
+static void
+handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi)
+{
+   struct nv_value *src0 = nvi->src[0]->value;
+   struct nv_value *src1 = nvi->src[1]->value;
+   struct nv_value *src;
+   int s;
+   uint8_t mod[4];
+
+   if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0;
+   else
+   if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1;
+   else
+      return;
+
+   if ((src0->insn && src0->insn->bb != nvi->bb) ||
+       (src1->insn && src1->insn->bb != nvi->bb))
+      return;
+
+   /* check for immediates from prior constant folding */
+   if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
+      return;
+   src = nvi->src[s]->value;
+
+   mod[0] = nvi->src[0]->mod;
+   mod[1] = nvi->src[1]->mod;
+   mod[2] = src->insn->src[0]->mod;
+   mod[3] = src->insn->src[1]->mod;
+
+   if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
+      return;
+
+   nvi->opcode = NV_OP_MAD_F32;
+
+   nv_reference(ctx->pc, nvi, s, NULL);
+   nvi->src[2] = nvi->src[!s];
+   nvi->src[!s] = NULL;
+
+   nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value);
+   nvi->src[0]->mod = mod[2] ^ mod[s];
+   nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value);
+   nvi->src[1]->mod = mod[3];
+}
+
  static int
-nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
+nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b)
  {
     struct nv_instruction *nvi, *next;
     int j;
  
     for (nvi = b->entry; nvi; nvi = next) {
-      struct nv_value *src0, *src1, *src;
-      int s;
-      uint8_t mod[4];
+      struct nv_value *src0, *src1;
+      uint baseop = NV_BASEOP(nvi->opcode);
  
        next = nvi->next;
  
        src0 = nvc0_pc_find_immediate(nvi->src[0]);
        src1 = nvc0_pc_find_immediate(nvi->src[1]);
  
-      if (src0 && src1)
+      if (src0 && src1) {
           constant_expression(ctx->pc, nvi, src0, src1);
-      else {
+      } else {
           if (src0)
              constant_operand(ctx->pc, nvi, src0, 0);
           else
@@ -633,44 +691,13 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
              constant_operand(ctx->pc, nvi, src1, 1);
        }
  
-      /* check if we can MUL + ADD -> MAD/FMA */
-      if (nvi->opcode != NV_OP_ADD)
-         continue;
-
-      src0 = nvi->src[0]->value;
-      src1 = nvi->src[1]->value;
-
-      if (SRC_IS_MUL(src0) && src0->refc == 1)
-         src = src0;
-      else
-      if (SRC_IS_MUL(src1) && src1->refc == 1)
-         src = src1;
+      if (baseop == NV_OP_MIN || baseop == NV_OP_MAX)
+         handle_min_max(ctx, nvi);
        else
-         continue;
-
-      /* could have an immediate from above constant_*  */
-      if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
-         continue;
-      s = (src == src0) ? 0 : 1;
-
-      mod[0] = nvi->src[0]->mod;
-      mod[1] = nvi->src[1]->mod;
-      mod[2] = src->insn->src[0]->mod;
-      mod[3] = src->insn->src[0]->mod;
-
-      if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
-         continue;
-
-      nvi->opcode = NV_OP_MAD;
-      nv_reference(ctx->pc, nvi, s, NULL);
-      nvi->src[2] = nvi->src[!s];
-
-      nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
-      nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
-      nvi->src[0]->mod = mod[2] ^ mod[s];
-      nvi->src[1]->mod = mod[3];
+      if (nvi->opcode == NV_OP_ADD_F32)
+         handle_add_mul(ctx, nvi);
     }
-   DESCEND_ARBITRARY(j, nv_pass_lower_arith);
+   DESCEND_ARBITRARY(j, nv_pass_algebraic_opt);
  
     return 0;
  }
@@ -1158,11 +1185,17 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
     pass.n = 0;
     pass.pc = pc;
  
+   /* Do CSE so we can just compare values by pointer in subsequent passes. */
+   pc->pass_seq++;
+   ret = nv_pass_cse(&pass, root);
+   if (ret)
+      return ret;
+
     /* Do this first, so we don't have to pay attention
      * to whether sources are supported memory loads.
      */
     pc->pass_seq++;
-   ret = nv_pass_lower_arith(&pass, root);
+   ret = nv_pass_algebraic_opt(&pass, root);
     if (ret)
        return ret;
  
@@ -1190,11 +1223,9 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
        reldelim->pc = pc;
     }
  
-   pc->pass_seq++;
-   ret = nv_pass_cse(&pass, root);
-   if (ret)
-      return ret;
-
+   /* May run DCE before load-combining since that pass will clean up
+    * after itself.
+    */
     dce.pc = pc;
     do {
        dce.removed = 0;
author	Christoph Bumiller <e0425955@student.tuwien.ac.at>
	Tue, 1 Feb 2011 19:52:49 +0000 (20:52 +0100)
committer	Christoph Bumiller <e0425955@student.tuwien.ac.at>
	Wed, 9 Feb 2011 15:04:59 +0000 (16:04 +0100)