From e6b137dcce58ca074458b184304573613917553f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Sun, 4 Oct 2009 11:13:09 +0200 Subject: [PATCH] r300/compiler: Introduce aluresult register for branch operation support MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- .../drivers/dri/r300/compiler/r3xx_fragprog.c | 3 +- .../drivers/dri/r300/compiler/r500_fragprog.c | 28 ++++++++ .../drivers/dri/r300/compiler/r500_fragprog.h | 5 ++ .../r300/compiler/radeon_dataflow_deadcode.c | 66 +++++++++++++++---- .../dri/r300/compiler/radeon_program.h | 8 +++ .../r300/compiler/radeon_program_constants.h | 21 +++++- .../dri/r300/compiler/radeon_program_print.c | 38 ++++++++++- 7 files changed, 154 insertions(+), 15 deletions(-) diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 590201a9bab..614c2e3d24d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -88,11 +88,12 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) if (c->is_r500) { struct radeon_program_transformation transformations[] = { { &r500_transform_TEX, c }, + { &r500_transform_IF, 0 }, { &radeonTransformALU, 0 }, { &radeonTransformDeriv, 0 }, { &radeonTransformTrigScale, 0 } }; - radeonLocalTransform(&c->Base, 4, transformations); + radeonLocalTransform(&c->Base, 5, transformations); c->Base.SwizzleCaps = &r500_swizzle_caps; } else { diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 971465e3591..39f2445bd47 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -169,6 +169,34 @@ int r500_transform_TEX( return 1; } +/** + * Rewrite IF instructions to use the ALU result special register. + */ +int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + if (inst->I.Opcode != RC_OPCODE_IF) + return 0; + + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->I.Opcode = RC_OPCODE_MOV; + inst_mov->I.DstReg.WriteMask = 0; + inst_mov->I.WriteALUResult = RC_ALURESULT_W; + inst_mov->I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; + inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mov->I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->I.SrcReg[0].Swizzle, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X); + + inst->I.SrcReg[0].File = RC_FILE_SPECIAL; + inst->I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; + inst->I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->I.SrcReg[0].Negate = 0; + + return 1; +} + static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { unsigned int relevant; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 92ac75d5fd4..0918cdf518b 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -47,4 +47,9 @@ extern int r500_transform_TEX( struct rc_instruction * inst, void* data); +extern int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); + #endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index 95af6fd411f..2ae3c566890 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -34,10 +34,12 @@ struct updatemask_state { unsigned char Output[RC_REGISTER_MAX_INDEX]; unsigned char Temporary[RC_REGISTER_MAX_INDEX]; unsigned char Address; + unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; }; struct instruction_state { - unsigned char WriteMask; + unsigned char WriteMask:4; + unsigned char WriteALUResult:1; unsigned char SrcReg[3]; }; @@ -70,6 +72,9 @@ static void or_updatemasks( dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; } + for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) + dst->Special[i] = a->Special[i] | b->Special[i]; + dst->Address = a->Address | b->Address; } @@ -108,6 +113,13 @@ static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file f return &s->R.Temporary[index]; } else if (file == RC_FILE_ADDRESS) { return &s->R.Address; + } else if (file == RC_FILE_SPECIAL) { + if (index >= RC_NUM_SPECIAL_REGISTERS) { + rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->R.Special[index]; } return 0; @@ -136,6 +148,19 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction insts->WriteMask |= usedmask; + if (inst->I.WriteALUResult) { + unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); + if (pused && *pused) { + if (inst->I.WriteALUResult == RC_ALURESULT_X) + usedmask |= RC_MASK_X; + else if (inst->I.WriteALUResult == RC_ALURESULT_W) + usedmask |= RC_MASK_W; + + *pused = 0; + insts->WriteALUResult = 1; + } + } + unsigned int srcmasks[3]; rc_compute_sources_for_writemask(opcode, usedmask, srcmasks); @@ -225,21 +250,38 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f for(struct rc_instruction * inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next, ++ip) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode);\ + int dead = 1; + + if (!opcode->HasDstReg) { + dead = 0; + } else { + inst->I.DstReg.WriteMask = s.Instructions[ip].WriteMask; + if (s.Instructions[ip].WriteMask) + dead = 0; + + if (s.Instructions[ip].WriteALUResult) + dead = 0; + else + inst->I.WriteALUResult = RC_ALURESULT_NONE; + } - if (opcode->HasDstReg) { - if (s.Instructions[ip].WriteMask) { - inst->I.DstReg.WriteMask = s.Instructions[ip].WriteMask; - } else { - struct rc_instruction * todelete = inst; - inst = inst->Prev; - rc_remove_instruction(todelete); - continue; - } + if (dead) { + struct rc_instruction * todelete = inst; + inst = inst->Prev; + rc_remove_instruction(todelete); + continue; } unsigned int srcmasks[3]; - rc_compute_sources_for_writemask(opcode, s.Instructions[ip].WriteMask, srcmasks); + unsigned int usemask = s.Instructions[ip].WriteMask; + + if (inst->I.WriteALUResult == RC_ALURESULT_X) + usemask |= RC_MASK_X; + else if (inst->I.WriteALUResult == RC_ALURESULT_W) + usemask |= RC_MASK_W; + + rc_compute_sources_for_writemask(opcode, usemask, srcmasks); for(unsigned int src = 0; src < 3; ++src) { for(unsigned int chan = 0; chan < 4; ++chan) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index efa2b0dfe3e..071b0a0ca9f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -86,6 +86,14 @@ struct rc_sub_instruction { */ rc_saturate_mode SaturateMode:2; + /** + * Writing to the special register RC_SPECIAL_ALU_RESULT + */ + /*@{*/ + rc_write_aluresult WriteALUResult:2; + rc_compare_func ALUResultCompare:3; + /*@}*/ + /** * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h index 69994f9880f..7c0d6720b11 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -74,9 +74,22 @@ typedef enum { /** * Indicates a constant from the \ref rc_constant_list . */ - RC_FILE_CONSTANT + RC_FILE_CONSTANT, + + /** + * Indicates a special register, see RC_SPECIAL_xxx. + */ + RC_FILE_SPECIAL } rc_register_file; +enum { + /** R500 fragment program ALU result "register" */ + RC_SPECIAL_ALU_RESULT = 0, + + /** Must be last */ + RC_NUM_SPECIAL_REGISTERS +}; + #define RC_REGISTER_INDEX_BITS 10 #define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) @@ -125,4 +138,10 @@ typedef enum { #define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) /*@}*/ +typedef enum { + RC_ALURESULT_NONE = 0, + RC_ALURESULT_X, + RC_ALURESULT_W +} rc_write_aluresult; + #endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index 04852864516..6645d7cacb7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -38,10 +38,36 @@ static const char * textarget_to_string(rc_texture_target target) } } +static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs) +{ + if (func == RC_COMPARE_FUNC_NEVER) { + fprintf(f, "false"); + } else if (func == RC_COMPARE_FUNC_ALWAYS) { + fprintf(f, "true"); + } else { + const char * op; + switch(func) { + case RC_COMPARE_FUNC_LESS: op = "<"; break; + case RC_COMPARE_FUNC_EQUAL: op = "=="; break; + case RC_COMPARE_FUNC_LEQUAL: op = "<="; break; + case RC_COMPARE_FUNC_GREATER: op = ">"; break; + case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break; + case RC_COMPARE_FUNC_GEQUAL: op = ">="; break; + default: op = "???"; break; + } + fprintf(f, "%s %s %s", lhs, op, rhs); + } +} + static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) { if (file == RC_FILE_NONE) { fprintf(f, "none"); + } else if (file == RC_FILE_SPECIAL) { + switch(index) { + case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break; + default: fprintf(f, "special[%i]", index); break; + } } else { const char * filename; switch(file) { @@ -151,7 +177,17 @@ static void rc_print_instruction(FILE * f, struct rc_instruction * inst) inst->I.TexSrcUnit); } - fprintf(f, ";\n"); + fprintf(f, ";"); + + if (inst->I.WriteALUResult) { + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, + (inst->I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", + inst->I.ALUResultCompare, "0"); + fprintf(f, ")]"); + } + + fprintf(f, "\n"); } /** -- 2.30.2