From f085b21b25f76157dd91e57a022e5f5465dc86f9 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 28 Mar 2013 11:15:20 -0700 Subject: [PATCH] i965/fs: Increase and document MAD latency on Gen7. 58% of mad(8) generated in shader-db are reading registers from the same bank. Reviewed-by: Eric Anholt --- .../dri/i965/brw_fs_schedule_instructions.cpp | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index ec558e385d6..f0ef4701e31 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -129,15 +129,29 @@ schedule_node::set_latency_gen7(bool is_haswell) { switch (inst->opcode) { case BRW_OPCODE_MAD: - /* 3 cycles (this is said to be 4 cycles sometimes depending on the - * register numbers in the sources): + /* 2 cycles + * (since the last two src operands are in different register banks): + * mad(8) g4<1>F g2.2<4,1,1>F.x g2<4,1,1>F.x g3.1<4,1,1>F.x { align16 WE_normal 1Q }; + * + * 3 cycles on IVB, 4 on HSW + * (since the last two src operands are in the same register bank): * mad(8) g4<1>F g2.2<4,1,1>F.x g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q }; * - * 20 cycles: + * 18 cycles on IVB, 16 on HSW + * (since the last two src operands are in different register banks): + * mad(8) g4<1>F g2.2<4,1,1>F.x g2<4,1,1>F.x g3.1<4,1,1>F.x { align16 WE_normal 1Q }; + * mov(8) null g4<4,5,1>F { align16 WE_normal 1Q }; + * + * 20 cycles on IVB, 18 on HSW + * (since the last two src operands are in the same register bank): * mad(8) g4<1>F g2.2<4,1,1>F.x g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q }; * mov(8) null g4<4,4,1>F { align16 WE_normal 1Q }; */ - latency = is_haswell ? 16 : 17; + + /* Our register allocator doesn't know about register banks, so use the + * higher latency. + */ + latency = is_haswell ? 16 : 18; break; case BRW_OPCODE_LRP: -- 2.30.2