i965/fs: Increase and document MAD latency on Gen7.

author Matt Turner <mattst88@gmail.com>

Thu, 28 Mar 2013 18:15:20 +0000 (11:15 -0700)

committer Matt Turner <mattst88@gmail.com>

Fri, 29 Mar 2013 17:13:27 +0000 (10:13 -0700)
author Matt Turner <mattst88@gmail.com>
Thu, 28 Mar 2013 18:15:20 +0000 (11:15 -0700)
committer Matt Turner <mattst88@gmail.com>
Fri, 29 Mar 2013 17:13:27 +0000 (10:13 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp

index ec558e385d6813500537ff13e95f84af4463223d..f0ef4701e3144fe1032f3782423b5115c10e66ad 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -129,15 +129,29 @@ schedule_node::set_latency_gen7(bool is_haswell)
  {
     switch (inst->opcode) {
     case BRW_OPCODE_MAD:
-      /* 3 cycles (this is said to be 4 cycles sometimes depending on the
-       * register numbers in the sources):
+      /* 2 cycles
+       *  (since the last two src operands are in different register banks):
+       * mad(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g3.1<4,1,1>F.x { align16 WE_normal 1Q };
+       *
+       * 3 cycles on IVB, 4 on HSW
+       *  (since the last two src operands are in the same register bank):
         * mad(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q };
         *
-       * 20 cycles:
+       * 18 cycles on IVB, 16 on HSW
+       *  (since the last two src operands are in different register banks):
+       * mad(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g3.1<4,1,1>F.x { align16 WE_normal 1Q };
+       * mov(8) null   g4<4,5,1>F                     { align16 WE_normal 1Q };
+       *
+       * 20 cycles on IVB, 18 on HSW
+       *  (since the last two src operands are in the same register bank):
         * mad(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q };
         * mov(8) null   g4<4,4,1>F                     { align16 WE_normal 1Q };
         */
-      latency = is_haswell ? 16 : 17;
+
+      /* Our register allocator doesn't know about register banks, so use the
+       * higher latency.
+       */
+      latency = is_haswell ? 16 : 18;
        break;
  
     case BRW_OPCODE_LRP:
author	Matt Turner <mattst88@gmail.com>
	Thu, 28 Mar 2013 18:15:20 +0000 (11:15 -0700)
committer	Matt Turner <mattst88@gmail.com>
	Fri, 29 Mar 2013 17:13:27 +0000 (10:13 -0700)