From 90537d6a891df2cefa99b0fdcfec19dbeb8c5a78 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 20 Dec 2016 00:11:33 +0100 Subject: [PATCH] nv50/ir: use sched control codes for gm107 builtins Yes, IMUL/IMAD require dependency barriers and we should definitely replace these instructions by XMAD but the different flags need to be figured out. Note that XMAD only supports 16-bits integers. Signed-off-by: Samuel Pitoiset Reviewed-by: Pierre Moreau --- .../drivers/nouveau/codegen/lib/gm107.asm | 40 +++++++++---------- .../drivers/nouveau/codegen/lib/gm107.asm.h | 40 +++++++++---------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm index 67b98dac39c..90741b6c59f 100644 --- a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm +++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm @@ -11,39 +11,39 @@ // SIZE: 22 / 14 * 8 bytes // gm107_div_u32: - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0xd wr 0x0 wt 0x3f) (st 0x1 wt 0x1) (st 0x6) flo u32 $r2 $r1 lop xor 1 $r2 $r2 0x1f mov $r3 0x1 0xf - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x1) (st 0xf wr 0x0) (st 0x6 wr 0x0 wt 0x1) shl $r2 $r3 $r2 i2i u32 u32 $r1 neg $r1 imul u32 u32 $r3 $r1 $r2 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) imad u32 u32 hi $r2 $r2 $r3 $r2 imul u32 u32 $r3 $r1 $r2 imad u32 u32 hi $r2 $r2 $r3 $r2 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) imul u32 u32 $r3 $r1 $r2 imad u32 u32 hi $r2 $r2 $r3 $r2 imul u32 u32 $r3 $r1 $r2 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) (st 0x6 wr 0x0 wt 0x1) imad u32 u32 hi $r2 $r2 $r3 $r2 imul u32 u32 $r3 $r1 $r2 imad u32 u32 hi $r2 $r2 $r3 $r2 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x6) (st 0x6 wr 0x0 rd 0x1 wt 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x2) mov $r3 $r0 0xf imul u32 u32 hi $r0 $r0 $r2 i2i u32 u32 $r2 neg $r1 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x6 wr 0x0 wt 0x3) (st 0xd wt 0x1) (st 0x1) imad u32 u32 $r1 $r1 $r0 $r3 isetp ge u32 and $p0 1 $r1 $r2 1 $p0 iadd $r1 $r1 neg $r2 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x5) (st 0xd) (st 0x1) $p0 iadd $r0 $r0 0x1 $p0 isetp ge u32 and $p0 1 $r1 $r2 1 $p0 iadd $r1 $r1 neg $r2 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x1) (st 0xf) (st 0xf) $p0 iadd $r0 $r0 0x1 ret nop 0 @@ -55,47 +55,47 @@ gm107_div_u32: // CLOBBER: $r2 - $r3, $p0 - $p3 // gm107_div_s32: - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0xd wt 0x3f) (st 0x1) (st 0x1 wr 0x0) isetp lt and $p2 0x1 $r0 0 1 isetp lt xor $p3 1 $r1 0 $p2 i2i s32 s32 $r0 abs $r0 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0xf wr 0x1) (st 0xd wr 0x1 wt 0x2) (st 0x1 wt 0x2) i2i s32 s32 $r1 abs $r1 flo u32 $r2 $r1 lop xor 1 $r2 $r2 0x1f - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x6) (st 0x1) (st 0xf wr 0x1) mov $r3 0x1 0xf shl $r2 $r3 $r2 i2i u32 u32 $r1 neg $r1 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2) imul u32 u32 $r3 $r1 $r2 imad u32 u32 hi $r2 $r2 $r3 $r2 imul u32 u32 $r3 $r1 $r2 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2) imad u32 u32 hi $r2 $r2 $r3 $r2 imul u32 u32 $r3 $r1 $r2 imad u32 u32 hi $r2 $r2 $r3 $r2 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2) (st 0x6 wr 0x1 wt 0x2) imul u32 u32 $r3 $r1 $r2 imad u32 u32 hi $r2 $r2 $r3 $r2 imul u32 u32 $r3 $r1 $r2 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x6 wr 0x1 rd 0x2 wt 0x2) (st 0x2 wt 0x5) (st 0x6 wr 0x0 rd 0x1 wt 0x2) imad u32 u32 hi $r2 $r2 $r3 $r2 mov $r3 $r0 0xf imul u32 u32 hi $r0 $r0 $r2 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0xf wr 0x1 rd 0x2 wt 0x2) (st 0x6 wr 0x0 wt 0x5) (st 0xd wt 0x3) i2i u32 u32 $r2 neg $r1 imad u32 u32 $r1 $r1 $r0 $r3 isetp ge u32 and $p0 1 $r1 $r2 1 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x1) (st 0x5) (st 0xd) $p0 iadd $r1 $r1 neg $r2 $p0 iadd $r0 $r0 0x1 $p0 isetp ge u32 and $p0 1 $r1 $r2 1 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0x1) (st 0x2) (st 0xf wr 0x0) $p0 iadd $r1 $r1 neg $r2 $p0 iadd $r0 $r0 0x1 $p3 i2i s32 s32 $r0 neg $r0 - sched (st 0x0) (st 0x0) (st 0x0) + sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf) $p2 i2i s32 s32 $r1 neg $r1 ret nop 0 diff --git a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h index 7be25da5532..8708a94b0a6 100644 --- a/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h +++ b/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h @@ -1,83 +1,83 @@ uint64_t gm107_builtin_code[] = { /* 0x0000: gm107_div_u32 */ - 0x001f8000fc0007e0, + 0x001f9801fc21ff0d, 0x5c30000000170002, 0x3847040001f70202, 0x3898078000170003, - 0x001f8000fc0007e0, + 0x003c1800e1e007e1, 0x5c48000000270302, 0x5ce0200000170a01, 0x5c38000000270103, - 0x001f8000fc0007e0, + 0x003c1801e0c00f06, 0x5a40010000370202, 0x5c38000000270103, 0x5a40010000370202, - 0x001f8000fc0007e0, + 0x003c1801e0c00f06, 0x5c38000000270103, 0x5a40010000370202, 0x5c38000000270103, - 0x001f8000fc0007e0, + 0x003c1801e0c00f06, 0x5a40010000370202, 0x5c38000000270103, 0x5a40010000370202, - 0x001f8000fc0007e0, + 0x00443c0120c007e6, 0x5c98078000070003, 0x5c38008000270000, 0x5ce0200000170a02, - 0x001f8000fc0007e0, + 0x001f8401fda01f06, 0x5a00018000070101, 0x5b6c038000270107, 0x5c11000000200101, - 0x001f8000fc0007e0, + 0x001f8400fda007e5, 0x3810000000100000, 0x5b6c038000200107, 0x5c11000000200101, - 0x001f8000fc0007e0, + 0x001fbc00fde007e1, 0x3810000000100000, 0xe32000000007000f, 0x50b0000000070f00, /* 0x0120: gm107_div_s32 */ - 0x001f8000fc0007e0, + 0x001c0400fc21ffed, 0x5b6303800ff70017, 0x5b6341000ff7011f, 0x5ce2000000073a00, - 0x001f8000fc0007e0, + 0x005f8402e5a0072f, 0x5ce2000000173a01, 0x5c30000000170002, 0x3847040001f70202, - 0x001f8000fc0007e0, + 0x001cbc00fc2007e6, 0x3898078000170003, 0x5c48000000270302, 0x5ce0200000170a01, - 0x001f8000fc0007e0, + 0x005c9802e4c01726, 0x5c38000000270103, 0x5a40010000370202, 0x5c38000000270103, - 0x001f8000fc0007e0, + 0x005c9802e4c01726, 0x5a40010000370202, 0x5c38000000270103, 0x5a40010000370202, - 0x001f8000fc0007e0, + 0x005c9802e4c01726, 0x5c38000000270103, 0x5a40010000370202, 0x5c38000000270103, - 0x001f8000fc0007e0, + 0x00441805fc401226, 0x5a40010000370202, 0x5c98078000070003, 0x5c38008000270000, - 0x001f8000fc0007e0, + 0x007fb405e0c0122f, 0x5ce0200000170a02, 0x5a00018000070101, 0x5b6c038000270107, - 0x001f8000fc0007e0, + 0x001fb400fca007e1, 0x5c11000000200101, 0x3810000000100000, 0x5b6c038000200107, - 0x001f8000fc0007e0, + 0x001c3c00fc4007e1, 0x5c11000000200101, 0x3810000000100000, 0x5ce0200000033a00, - 0x001f8000fc0007e0, + 0x001fbc03fde0072f, 0x5ce0200000123a01, 0xe32000000007000f, 0x50b0000000070f00, -- 2.30.2