From 54c48a95e6e0573886433f94ac83293876ffe03d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Thu, 10 Feb 2011 15:48:27 -0800
Subject: [PATCH] mesa: Add partial constant propagation pass for Mesa IR

This cleans up some code generated by the IR-to-Mesa pass for i915.
In particular, some shaders involving arrays of constant matrices
result in really bad code.

v2: Silence several warnings from merging the gl_constant_value work.
Fix DP[23] folding.  Add support for a bunch more opcodes that appear
in piglit runs on i915.

Reviewed-by: Eric Anholt <eric@anholt.net>
---
 src/mesa/SConscript                       |   1 +
 src/mesa/program/prog_opt_constant_fold.c | 451 ++++++++++++++++++++++
 src/mesa/program/prog_optimize.c          |   2 +
 src/mesa/program/prog_optimize.h          |   3 +
 src/mesa/sources.mak                      |   1 +
 5 files changed, 458 insertions(+)
 create mode 100644 src/mesa/program/prog_opt_constant_fold.c

diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 05aa0e8010e..b0c3334fa48 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -293,6 +293,7 @@ program_sources = [
     'program/prog_instruction.c',
     'program/prog_noise.c',
     'program/prog_optimize.c',
+    'program/prog_opt_constant_fold.c',
     'program/prog_parameter.c',
     'program/prog_parameter_layout.c',
     'program/prog_print.c',
diff --git a/src/mesa/program/prog_opt_constant_fold.c b/src/mesa/program/prog_opt_constant_fold.c
new file mode 100644
index 00000000000..e2418b55451
--- /dev/null
+++ b/src/mesa/program/prog_opt_constant_fold.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright Â© 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "program.h"
+#include "prog_instruction.h"
+#include "prog_optimize.h"
+#include "prog_parameter.h"
+#include <stdbool.h>
+
+static bool
+src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
+{
+   unsigned i;
+
+   for (i = 0; i < num_srcs; i++) {
+      if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
+	 return false;
+   }
+
+   return true;
+}
+
+static struct prog_src_register
+src_reg_for_float(struct gl_program *prog, float val)
+{
+   struct prog_src_register src;
+   unsigned swiz;
+
+   memset(&src, 0, sizeof(src));
+
+   src.File = PROGRAM_CONSTANT;
+   src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+					  (gl_constant_value *) &val, 1, &swiz);
+   src.Swizzle = swiz;
+   return src;
+}
+
+static struct prog_src_register
+src_reg_for_vec4(struct gl_program *prog, const float *val)
+{
+   struct prog_src_register src;
+   unsigned swiz;
+
+   memset(&src, 0, sizeof(src));
+
+   src.File = PROGRAM_CONSTANT;
+   src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+					  (gl_constant_value *) val, 4, &swiz);
+   src.Swizzle = swiz;
+   return src;
+}
+
+static bool
+src_regs_are_same(const struct prog_src_register *a,
+		  const struct prog_src_register *b)
+{
+   return (a->File == b->File)
+      && (a->Index == b->Index)
+      && (a->Swizzle == b->Swizzle)
+      && (a->Abs == b->Abs)
+      && (a->Negate == b->Negate)
+      && (a->RelAddr == 0)
+      && (b->RelAddr == 0);
+}
+
+static void
+get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
+{
+   const gl_constant_value *const value =
+      prog->Parameters->ParameterValues[r->Index];
+
+   data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
+   data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
+   data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
+   data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
+
+   if (r->Abs) {
+      data[0] = fabsf(data[0]);
+      data[1] = fabsf(data[1]);
+      data[2] = fabsf(data[2]);
+      data[3] = fabsf(data[3]);
+   }
+
+   if (r->Negate & 0x01) {
+      data[0] = -data[0];
+   }
+
+   if (r->Negate & 0x02) {
+      data[1] = -data[1];
+   }
+
+   if (r->Negate & 0x04) {
+      data[2] = -data[2];
+   }
+
+   if (r->Negate & 0x08) {
+      data[3] = -data[3];
+   }
+}
+
+/**
+ * Try to replace instructions that produce a constant result with simple moves
+ *
+ * The hope is that a following copy propagation pass will eliminate the
+ * unnecessary move instructions.
+ */
+GLboolean
+_mesa_constant_fold(struct gl_program *prog)
+{
+   bool progress = false;
+   unsigned i;
+
+   for (i = 0; i < prog->NumInstructions; i++) {
+      struct prog_instruction *const inst = &prog->Instructions[i];
+
+      switch (inst->Opcode) {
+      case OPCODE_ADD:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = a[0] + b[0];
+	    result[1] = a[1] + b[1];
+	    result[2] = a[2] + b[2];
+	    result[3] = a[3] + b[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_CMP:
+	 /* FINISHME: We could also optimize CMP instructions where the first
+	  * FINISHME: source is a constant that is either all < 0.0 or all
+	  * FINISHME: >= 0.0.
+	  */
+	 if (src_regs_are_constant(inst, 3)) {
+	    float a[4];
+	    float b[4];
+	    float c[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+	    get_value(prog, &inst->SrcReg[2], c);
+
+            result[0] = a[0] < 0.0f ? b[0] : c[0];
+            result[1] = a[1] < 0.0f ? b[1] : c[1];
+            result[2] = a[2] < 0.0f ? b[2] : c[2];
+            result[3] = a[3] < 0.0f ? b[3] : c[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+	    inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_DP2:
+      case OPCODE_DP3:
+      case OPCODE_DP4:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result;
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    /* It seems like a loop could be used here, but we cleverly put
+	     * DP2A between DP2 and DP3.  Subtracting DP2 (or similar) from
+	     * the opcode results in various failures of the loop control.
+	     */
+	    result = (a[0] * b[0]) + (a[1] * b[1]);
+
+	    if (inst->Opcode >= OPCODE_DP3)
+	       result += a[2] * b[2];
+
+	    if (inst->Opcode == OPCODE_DP4)
+	       result += a[3] * b[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_MUL:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = a[0] * b[0];
+	    result[1] = a[1] * b[1];
+	    result[2] = a[2] * b[2];
+	    result[3] = a[3] * b[3];
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SEQ:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SGE:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SGT:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SLE:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SLT:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      case OPCODE_SNE:
+	 if (src_regs_are_constant(inst, 2)) {
+	    float a[4];
+	    float b[4];
+	    float result[4];
+
+	    get_value(prog, &inst->SrcReg[0], a);
+	    get_value(prog, &inst->SrcReg[1], b);
+
+	    result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
+	    result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
+	    result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
+	    result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
+
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+	    inst->Opcode = OPCODE_MOV;
+	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+	    progress = true;
+	 }
+	 break;
+
+      default:
+	 break;
+      }
+   }
+
+   return progress;
+}
diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c
index 3340ce0498b..25d9684b137 100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@ -1358,6 +1358,8 @@ _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
          any_change = GL_TRUE;
       if (_mesa_remove_dead_code_local(program))
          any_change = GL_TRUE;
+
+      any_change = _mesa_constant_fold(program) || any_change;
       _mesa_reallocate_registers(program);
    } while (any_change);
 }
diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h
index 463f5fc51c4..9854fb7a491 100644
--- a/src/mesa/program/prog_optimize.h
+++ b/src/mesa/program/prog_optimize.h
@@ -44,4 +44,7 @@ _mesa_find_temp_intervals(const struct prog_instruction *instructions,
 extern void
 _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program);
 
+extern GLboolean
+_mesa_constant_fold(struct gl_program *prog);
+
 #endif
diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak
index ed008f8813e..5e77e0f5919 100644
--- a/src/mesa/sources.mak
+++ b/src/mesa/sources.mak
@@ -251,6 +251,7 @@ PROGRAM_SOURCES = \
 	program/prog_instruction.c \
 	program/prog_noise.c \
 	program/prog_optimize.c \
+	program/prog_opt_constant_fold.c \
 	program/prog_parameter.c \
 	program/prog_parameter_layout.c \
 	program/prog_print.c \
-- 
2.30.2