i965/fs: Split generate_math into gen4/gen6 and 1/2 operand variants.
authorKenneth Graunke <kenneth@whitecape.org>
Thu, 18 Aug 2011 18:55:42 +0000 (11:55 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Mon, 26 Sep 2011 23:30:07 +0000 (16:30 -0700)
This mirrors the structure Eric used in the new VS backend, and seems
simpler.  In particular, the math1/math2 split will avoid having to
figure out how many operands there are, as this is already known by the
caller.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_emit.cpp

index 0bd518f77020f265bd71f567fe1cc8a12c35e18b..f6a57bacb33ccc94a32916fabf76e9313e3e74c5 100644 (file)
@@ -487,7 +487,16 @@ public:
    void generate_linterp(fs_inst *inst, struct brw_reg dst,
                         struct brw_reg *src);
    void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
-   void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
+   void generate_math1_gen6(fs_inst *inst,
+                           struct brw_reg dst,
+                           struct brw_reg src);
+   void generate_math2_gen6(fs_inst *inst,
+                           struct brw_reg dst,
+                           struct brw_reg src0,
+                           struct brw_reg src1);
+   void generate_math_gen4(fs_inst *inst,
+                          struct brw_reg dst,
+                          struct brw_reg src);
    void generate_discard(fs_inst *inst);
    void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
    void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
index f742e84e1c6f204b7a7a650695c6bfbc56fdbb52..8176a76a85e491dc1eb623c6b528eee31409eef0 100644 (file)
@@ -143,69 +143,85 @@ fs_visitor::generate_linterp(fs_inst *inst,
 }
 
 void
-fs_visitor::generate_math(fs_inst *inst,
-                         struct brw_reg dst, struct brw_reg *src)
+fs_visitor::generate_math1_gen6(fs_inst *inst,
+                               struct brw_reg dst,
+                               struct brw_reg src0)
 {
    int op = brw_math_function(inst->opcode);
 
-   if (intel->gen >= 6) {
-      assert(inst->mlen == 0);
-
-      if (inst->opcode == SHADER_OPCODE_POW) {
-        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-        brw_math2(p, dst, op, src[0], src[1]);
+   assert(inst->mlen == 0);
 
-        if (c->dispatch_width == 16) {
-           brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
-           brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
-           brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-        }
-      } else {
-        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-        brw_math(p, dst,
-                 op,
-                 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
-                 BRW_MATH_SATURATE_NONE,
-                 0, src[0],
-                 BRW_MATH_DATA_VECTOR,
-                 BRW_MATH_PRECISION_FULL);
-
-        if (c->dispatch_width == 16) {
-           brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
-           brw_math(p, sechalf(dst),
-                    op,
-                    inst->saturate ? BRW_MATH_SATURATE_SATURATE :
-                    BRW_MATH_SATURATE_NONE,
-                    0, sechalf(src[0]),
-                    BRW_MATH_DATA_VECTOR,
-                    BRW_MATH_PRECISION_FULL);
-           brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-        }
-      }
-   } else /* gen <= 5 */{
-      assert(inst->mlen >= 1);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_math(p, dst,
+           op,
+           inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+           BRW_MATH_SATURATE_NONE,
+           0, src0,
+           BRW_MATH_DATA_VECTOR,
+           BRW_MATH_PRECISION_FULL);
 
-      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-      brw_math(p, dst,
+   if (c->dispatch_width == 16) {
+      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      brw_math(p, sechalf(dst),
               op,
               inst->saturate ? BRW_MATH_SATURATE_SATURATE :
               BRW_MATH_SATURATE_NONE,
-              inst->base_mrf, src[0],
+              0, sechalf(src0),
               BRW_MATH_DATA_VECTOR,
               BRW_MATH_PRECISION_FULL);
+      brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+   }
+}
 
-      if (c->dispatch_width == 16) {
-        brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
-        brw_math(p, sechalf(dst),
-                 op,
-                 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
-                 BRW_MATH_SATURATE_NONE,
-                 inst->base_mrf + 1, sechalf(src[0]),
-                 BRW_MATH_DATA_VECTOR,
-                 BRW_MATH_PRECISION_FULL);
+void
+fs_visitor::generate_math2_gen6(fs_inst *inst,
+                               struct brw_reg dst,
+                               struct brw_reg src0,
+                               struct brw_reg src1)
+{
+   int op = brw_math_function(inst->opcode);
 
-        brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
-      }
+   assert(inst->mlen == 0);
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_math2(p, dst, op, src0, src1);
+
+   if (c->dispatch_width == 16) {
+      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
+      brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+   }
+}
+
+void
+fs_visitor::generate_math_gen4(fs_inst *inst,
+                              struct brw_reg dst,
+                              struct brw_reg src)
+{
+   int op = brw_math_function(inst->opcode);
+
+   assert(inst->mlen >= 1);
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_math(p, dst,
+           op,
+           inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+           BRW_MATH_SATURATE_NONE,
+           inst->base_mrf, src,
+           BRW_MATH_DATA_VECTOR,
+           BRW_MATH_PRECISION_FULL);
+
+   if (c->dispatch_width == 16) {
+      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      brw_math(p, sechalf(dst),
+              op,
+              inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+              BRW_MATH_SATURATE_NONE,
+              inst->base_mrf + 1, sechalf(src),
+              BRW_MATH_DATA_VECTOR,
+              BRW_MATH_PRECISION_FULL);
+
+      brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
    }
 }
 
@@ -770,10 +786,20 @@ fs_visitor::generate_code()
       case SHADER_OPCODE_SQRT:
       case SHADER_OPCODE_EXP2:
       case SHADER_OPCODE_LOG2:
-      case SHADER_OPCODE_POW:
       case SHADER_OPCODE_SIN:
       case SHADER_OPCODE_COS:
-        generate_math(inst, dst, src);
+        if (intel->gen >= 6) {
+           generate_math1_gen6(inst, dst, src[0]);
+        } else {
+           generate_math_gen4(inst, dst, src[0]);
+        }
+        break;
+      case SHADER_OPCODE_POW:
+        if (intel->gen >= 6) {
+           generate_math2_gen6(inst, dst, src[0], src[1]);
+        } else {
+           generate_math_gen4(inst, dst, src[0]);
+        }
         break;
       case FS_OPCODE_PIXEL_X:
         generate_pixel_xy(dst, true);