From 71877143b62ce3f10ef4716d19361fa95a53ff42 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sat, 15 Sep 2012 23:25:34 -0400 Subject: [PATCH] r300/compiler: Avoid generating MOV instructions for invalid IMM swizzles v2 If an instruction reads from a constant register that contains immediates using an invalid swizzle, we can avoid generating MOV instructions to fix up the swizzle by loading the immediates into a different constant register that can be read using a valid swizzle. This only affects r300 and r400 cards. For example: CONST[1] = { -3.5000 3.5000 2.5000 1.5000 } MAD temp[4].xy, const[0].xy__, const[1].xz__, input[0].xy__; ========== Before this change would be lowered to: ========= CONST[1] = { -3.5000 3.5000 2.5000 1.5000 } MOV temp[0].x, const[1].x___; MOV temp[0].y, const[1]._z__; MAD temp[4].xy, const[0].xy__, temp[0].xy__, input[0].xy__; ========== After this change is lowered to: =============== CONST[1] = { -3.5000 3.5000 2.5000 1.5000 } CONST[2] = { 0.0000 -3.5000 2.5000 0.0000 } MAD temp[4].xy, const[0].xy__, const[2].yz__, input[0].xy__; ============================================================ This change reduces one of the Lightsmark shaders from 133 to 91 instructions. v2: - Fix crash caused by swizzles with only inline constants. --- .../r300/compiler/radeon_dataflow_swizzles.c | 353 +++++++++++++++++- 1 file changed, 349 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c index 133a9f72ec7..7c745968afc 100644 --- a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2012 Advanced Micro Devices, Inc. * * All Rights Reserved. * @@ -23,11 +24,16 @@ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * + * Authors: + * Nicolai Haehnle + * Tom Stellard */ #include "radeon_dataflow.h" +#include "radeon_code.h" #include "radeon_compiler.h" +#include "radeon_compiler_util.h" #include "radeon_swizzle.h" @@ -87,17 +93,356 @@ static void rewrite_source(struct radeon_compiler * c, } } +/** + * This function will attempt to rewrite non-native swizzles that read from + * immediate registers by rearranging the immediates to allow the + * instruction to use native swizzles. + */ +static unsigned try_rewrite_constant(struct radeon_compiler *c, + struct rc_src_register *reg) +{ + unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz; + unsigned all_inline = 0; + float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + + if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) { + /* The register does not contain immediates, but if all + * the swizzles are inline constants, we can still rewrite + * it. */ + + new_swizzle = RC_SWIZZLE_XYZW; + for (chan = 0 ; chan < 4; chan++) { + unsigned swz = GET_SWZ(reg->Swizzle, chan); + if (swz <= RC_SWIZZLE_W) { + return 0; + } + if (swz == RC_SWIZZLE_UNUSED) { + SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED); + } + } + all_inline = 1; + } else { + new_swizzle = reg->Swizzle; + } + + swz = RC_SWIZZLE_UNUSED; + found_swizzle = 1; + /* Check if all channels have the same swizzle. If they do we can skip + * the search for a native swizzle. We only need to check the first + * three channels, because any swizzle is legal in the fourth channel. + */ + for (chan = 0; chan < 3; chan++) { + unsigned chan_swz = GET_SWZ(reg->Swizzle, chan); + if (chan_swz == RC_SWIZZLE_UNUSED) { + continue; + } + if (swz == RC_SWIZZLE_UNUSED) { + swz = chan_swz; + } else if (swz != chan_swz) { + found_swizzle = 0; + break; + } + } + + /* Find a legal swizzle */ + + /* This loop attempts to find a native swizzle where all the + * channels are different. */ + while (!found_swizzle && !all_inline) { + swz0 = GET_SWZ(new_swizzle, 0); + swz1 = GET_SWZ(new_swizzle, 1); + swz2 = GET_SWZ(new_swizzle, 2); + + /* Swizzle .W. is never legal. */ + if (swz1 == RC_SWIZZLE_W || + swz1 == RC_SWIZZLE_UNUSED || + swz1 == RC_SWIZZLE_ZERO || + swz1 == RC_SWIZZLE_HALF || + swz1 == RC_SWIZZLE_ONE) { + /* We chose Z, because there are two non-repeating + * swizzle combinations of the form .Z. There are + * only one combination each for .X. and .Y. */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); + continue; + } + + if (swz2 == RC_SWIZZLE_UNUSED) { + /* We choose Y, because there are two non-repeating + * swizzle combinations of the form ..Y */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); + continue; + } + + switch (swz0) { + /* X.. */ + case RC_SWIZZLE_X: + /* Legal swizzles that start with X: XYZ, XXX */ + switch (swz1) { + /* XX. */ + case RC_SWIZZLE_X: + /* The new swizzle will be: + * ZXY (XX. => ZX. => ZXY) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); + break; + /* XY. */ + case RC_SWIZZLE_Y: + /* The new swizzle is XYZ */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z); + found_swizzle = 1; + break; + /* XZ. */ + case RC_SWIZZLE_Z: + /* XZZ */ + if (swz2 == RC_SWIZZLE_Z) { + /* The new swizzle is XYZ */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y); + found_swizzle = 1; + } else { /* XZ[^Z] */ + /* The new swizzle will be: + * YZX (XZ. => YZ. => YZX) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y); + } + break; + /* XW. Should have already been handled. */ + case RC_SWIZZLE_W: + assert(0); + break; + } + break; + /* Y.. */ + case RC_SWIZZLE_Y: + /* Legal swizzles that start with Y: YYY, YZX */ + switch (swz1) { + /* YY. */ + case RC_SWIZZLE_Y: + /* The new swizzle will be: + * XYZ (YY. => XY. => XYZ) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); + break; + /* YZ. */ + case RC_SWIZZLE_Z: + /* The new swizzle is YZX */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X); + found_swizzle = 1; + break; + /* YX. */ + case RC_SWIZZLE_X: + /* YXX */ + if (swz2 == RC_SWIZZLE_X) { + /*The new swizzle is YZX */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); + found_swizzle = 1; + } else { /* YX[^X] */ + /* The new swizzle will be: + * ZXY (YX. => ZX. -> ZXY) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); + } + break; + /* YW. Should have already been handled. */ + case RC_SWIZZLE_W: + assert(0); + break; + } + break; + /* Z.. */ + case RC_SWIZZLE_Z: + /* Legal swizzles that start with Z: ZZZ, ZXY */ + switch (swz1) { + /* ZZ. */ + case RC_SWIZZLE_Z: + /* The new swizzle will be: + * WZY (ZZ. => WZ. => WZY) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W); + break; + /* ZX. */ + case RC_SWIZZLE_X: + /* The new swizzle is ZXY */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); + found_swizzle = 1; + break; + /* ZY. */ + case RC_SWIZZLE_Y: + /* ZYY */ + if (swz2 == RC_SWIZZLE_Y) { + /* The new swizzle is ZXY */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X); + found_swizzle = 1; + } else { /* ZY[^Y] */ + /* The new swizzle will be: + * XYZ (ZY. => XY. => XYZ) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); + } + break; + /* ZW. Should have already been handled. */ + case RC_SWIZZLE_W: + assert(0); + break; + } + break; + + /* W.. */ + case RC_SWIZZLE_W: + /* Legal swizzles that start with X: WWW, WZY */ + switch (swz1) { + /* WW. Should have already been handled. */ + case RC_SWIZZLE_W: + assert(0); + break; + /* WZ. */ + case RC_SWIZZLE_Z: + /* The new swizzle will be WZY */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); + found_swizzle = 1; + break; + /* WX. */ + case RC_SWIZZLE_X: + /* WY. */ + case RC_SWIZZLE_Y: + /* W[XY]Y */ + if (swz2 == RC_SWIZZLE_Y) { + /* The new swizzle will be WZY */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); + found_swizzle = 1; + } else { /* W[XY][^Y] */ + /* The new swizzle will be: + * ZXY (WX. => XX. => ZX. => ZXY) or + * XYZ (WY. => XY. => XYZ) + */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); + } + break; + } + break; + /* U.. 0.. 1.. H..*/ + case RC_SWIZZLE_UNUSED: + case RC_SWIZZLE_ZERO: + case RC_SWIZZLE_ONE: + case RC_SWIZZLE_HALF: + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); + break; + } + } + + /* Handle the swizzle in the w channel. */ + swz3 = GET_SWZ(reg->Swizzle, 3); + + /* We can skip this if the swizzle in channel w is an inline constant. */ + if (swz3 <= RC_SWIZZLE_W) { + for (chan = 0; chan < 3; chan++) { + unsigned old_swz = GET_SWZ(reg->Swizzle, chan); + unsigned new_swz = GET_SWZ(new_swizzle, chan); + /* If the swizzle in the w channel is the same as the + * swizzle in any other channels, we need to rewrite it. + * For example: + * reg->Swizzle == XWZW + * new_swizzle == XYZX + * Since the swizzle in the y channel is being + * rewritten from W -> Y we need to change the swizzle + * in the w channel from W -> Y as well. + */ + if (old_swz == swz3) { + SET_SWZ(new_swizzle, 3, + GET_SWZ(new_swizzle, chan)); + break; + } + + /* The swizzle in channel w will be overwritten by one + * of the new swizzles. */ + if (new_swz == swz3) { + /* Find an unused swizzle */ + unsigned i; + unsigned used = 0; + for (i = 0; i < 3; i++) { + used |= 1 << GET_SWZ(new_swizzle, i); + } + for (i = 0; i < 4; i++) { + if (used & (1 << i)) { + continue; + } + SET_SWZ(new_swizzle, 3, i); + } + } + } + } + + for (chan = 0; chan < 4; chan++) { + unsigned old_swz = GET_SWZ(reg->Swizzle, chan); + unsigned new_swz = GET_SWZ(new_swizzle, chan); + + if (old_swz == RC_SWIZZLE_UNUSED) { + continue; + } + + /* We don't need to change the swizzle in channel w if it is + * an inline constant. These are always legal in the w channel. + * + * Swizzles with a value > RC_SWIZZLE_W are inline constants. + */ + if (chan == 3 && old_swz > RC_SWIZZLE_W) { + continue; + } + + assert(new_swz <= RC_SWIZZLE_W); + + switch (old_swz) { + case RC_SWIZZLE_ZERO: + imms[new_swz] = 0.0f; + break; + case RC_SWIZZLE_HALF: + if (reg->Negate & (1 << chan)) { + imms[new_swz] = -0.5f; + } else { + imms[new_swz] = 0.5f; + } + break; + case RC_SWIZZLE_ONE: + if (reg->Negate & (1 << chan)) { + imms[new_swz] = -1.0f; + } else { + imms[new_swz] = 1.0f; + } + break; + default: + imms[new_swz] = rc_get_constant_value(c, reg->Index, + reg->Swizzle, reg->Negate, chan); + } + SET_SWZ(reg->Swizzle, chan, new_swz); + } + reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants, + imms); + /* We need to set the register file to CONSTANT in case we are + * converting a non-constant register with constant swizzles (e.g. + * ONE, ZERO, HALF). + */ + reg->File = RC_FILE_CONSTANT; + reg->Negate = 0; + return 1; +} + void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) { struct rc_instruction * inst; - for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + const struct rc_opcode_info * opcode = + rc_get_opcode_info(inst->U.I.Opcode); unsigned int src; for(src = 0; src < opcode->NumSrcRegs; ++src) { - if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) - rewrite_source(c, inst, src); + struct rc_src_register *reg = &inst->U.I.SrcReg[src]; + if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) { + continue; + } + if (!c->is_r500 && + c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS && + try_rewrite_constant(c, reg)) { + continue; + } + rewrite_source(c, inst, src); } } + if (c->Debug & RC_DBG_LOG) + rc_constants_print(&c->Program.Constants); } -- 2.30.2