From 6d539579add0a9d3017f441d9fad5d4cd3ae7bb9 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sat, 14 May 2011 21:47:26 -0700 Subject: [PATCH] r300/compiler: Use ALU Result for IF conditionals This saves one instruction per IF. --- .../drivers/dri/r300/compiler/r3xx_fragprog.c | 7 +- .../drivers/dri/r300/compiler/r500_fragprog.c | 152 ++++++++++++++++-- .../drivers/dri/r300/compiler/r500_fragprog.h | 2 +- .../dri/r300/compiler/radeon_optimize.c | 5 +- .../dri/r300/compiler/radeon_variable.c | 61 +++++++ .../dri/r300/compiler/radeon_variable.h | 5 + 6 files changed, 212 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index e2441e97d87..bb6c010e8e3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -109,8 +109,12 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { 0, 0 } }; - struct radeon_program_transformation native_rewrite_r500[] = { + struct radeon_program_transformation rewrite_if[] = { { &r500_transform_IF, 0 }, + {0, 0} + }; + + struct radeon_program_transformation native_rewrite_r500[] = { { &radeonTransformALU, 0 }, { &radeonTransformDeriv, 0 }, { &radeonTransformTrigScale, 0 }, @@ -135,6 +139,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, {"saturate output writes", 1, sat_out, rc_local_transform, saturate_output}, {"transform TEX", 1, 1, rc_local_transform, rewrite_tex}, + {"transform IF", 1, is_r500, rc_local_transform, rewrite_if}, {"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500}, {"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300}, {"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_use}, diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 5e0be6b8881..cf99f5e4538 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -30,6 +30,8 @@ #include #include "radeon_compiler_util.h" +#include "radeon_list.h" +#include "radeon_variable.h" #include "../r300_reg.h" /** @@ -37,27 +39,143 @@ */ int r500_transform_IF( struct radeon_compiler * c, - struct rc_instruction * inst, - void* data) + struct rc_instruction * inst_if, + void *data) { - struct rc_instruction * inst_mov; + struct rc_variable * writer; + struct rc_list * writer_list, * list_ptr; + struct rc_list * var_list = rc_get_variables(c); + unsigned int generic_if = 0; + unsigned int alu_chan; - if (inst->U.I.Opcode != RC_OPCODE_IF) + if (inst_if->U.I.Opcode != RC_OPCODE_IF) { return 0; + } + + writer_list = rc_variable_list_get_writers( + var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]); + if (!writer_list) { + generic_if = 1; + } else { + + /* Make sure it is safe for the writers to write to + * ALU Result */ + for (list_ptr = writer_list; list_ptr; + list_ptr = list_ptr->Next) { + struct rc_instruction * inst; + writer = list_ptr->Item; + /* We are going to modify the destination register + * of writer, so if it has a reader other than + * inst_if (aka ReaderCount > 1) we must fall back to + * our generic IF. + * If the writer has a lower IP than inst_if, this + * means that inst_if is above the writer in a loop. + * I'm not sure why this would ever happen, but + * if it does we want to make sure we fall back + * to our generic IF. */ + if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) { + generic_if = 1; + break; + } + + /* The ALU Result is not preserved across IF + * instructions, so if there is another IF + * instruction between writer and inst_if, then + * we need to fall back to generic IF. */ + for (inst = writer->Inst; inst != inst_if; inst = inst->Next) { + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + if (info->IsFlowControl) { + generic_if = 1; + break; + } + } + if (generic_if) { + break; + } + } + } + + if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) { + alu_chan = RC_ALURESULT_X; + } else { + alu_chan = RC_ALURESULT_W; + } + if (generic_if) { + struct rc_instruction * inst_mov = + rc_insert_new_instruction(c, inst_if->Prev); + + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.WriteMask = 0; + inst_mov->U.I.DstReg.File = RC_FILE_NONE; + inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; + inst_mov->U.I.WriteALUResult = alu_chan; + inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0]; + if (alu_chan == RC_ALURESULT_X) { + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( + inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + } else { + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4( + inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z); + } + } else { + rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER; + unsigned int reverse_srcs = 0; + unsigned int preserve_opcode = 0; + for (list_ptr = writer_list; list_ptr; + list_ptr = list_ptr->Next) { + writer = list_ptr->Item; + switch(writer->Inst->U.I.Opcode) { + case RC_OPCODE_SEQ: + compare_func = RC_COMPARE_FUNC_EQUAL; + break; + case RC_OPCODE_SNE: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + break; + case RC_OPCODE_SLE: + reverse_srcs = 1; + /* Fall through */ + case RC_OPCODE_SGE: + compare_func = RC_COMPARE_FUNC_GEQUAL; + break; + case RC_OPCODE_SGT: + reverse_srcs = 1; + /* Fall through */ + case RC_OPCODE_SLT: + compare_func = RC_COMPARE_FUNC_LESS; + break; + default: + compare_func = RC_COMPARE_FUNC_NOTEQUAL; + preserve_opcode = 1; + break; + } + if (!preserve_opcode) { + writer->Inst->U.I.Opcode = RC_OPCODE_SUB; + } + writer->Inst->U.I.DstReg.WriteMask = 0; + writer->Inst->U.I.DstReg.File = RC_FILE_NONE; + writer->Inst->U.I.WriteALUResult = alu_chan; + writer->Inst->U.I.ALUResultCompare = compare_func; + if (reverse_srcs) { + struct rc_src_register temp_src; + temp_src = writer->Inst->U.I.SrcReg[0]; + writer->Inst->U.I.SrcReg[0] = + writer->Inst->U.I.SrcReg[1]; + writer->Inst->U.I.SrcReg[1] = temp_src; + } + } + } - inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->U.I.Opcode = RC_OPCODE_MOV; - inst_mov->U.I.DstReg.WriteMask = 0; - inst_mov->U.I.WriteALUResult = RC_ALURESULT_W; - inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; - inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle, - RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X); - - inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL; - inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; - inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; - inst->U.I.SrcReg[0].Negate = 0; + inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL; + inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; + inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE( + RC_SWIZZLE_X, RC_SWIZZLE_UNUSED, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED); + inst_if->U.I.SrcReg[0].Negate = 0; return 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 1e665e27641..6aa448cc6f7 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -44,7 +44,7 @@ extern struct rc_swizzle_caps r500_swizzle_caps; extern int r500_transform_IF( struct radeon_compiler * c, - struct rc_instruction * inst, + struct rc_instruction * inst_if, void* data); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 53ab5fbbbd9..ac73608839e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -517,8 +517,11 @@ static int is_presub_candidate( assert(inst->U.I.Opcode == RC_OPCODE_ADD); - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode) + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE + || inst->U.I.SaturateMode + || inst->U.I.WriteALUResult) { return 0; + } /* If both sources use a constant swizzle, then we can't convert it to * a presubtract operation. In fact for the ADD and SUB presubtract diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c index 5b2295dc7ce..33181bdcc88 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c @@ -469,6 +469,67 @@ struct rc_list * rc_variable_readers_union(struct rc_variable * var) return list; } +static unsigned int reader_equals_src( + struct rc_reader reader, + unsigned int src_type, + void * src) +{ + if (reader.Inst->Type != src_type) { + return 0; + } + if (src_type == RC_INSTRUCTION_NORMAL) { + return reader.U.I.Src == src; + } else { + return reader.U.P.Src == src; + } +} + +static unsigned int variable_writes_src( + struct rc_variable * var, + unsigned int src_type, + void * src) +{ + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + if (reader_equals_src(var->Readers[i], src_type, src)) { + return 1; + } + } + return 0; +} + + +struct rc_list * rc_variable_list_get_writers( + struct rc_list * var_list, + unsigned int src_type, + void * src) +{ + struct rc_list * list_ptr; + struct rc_list * writer_list = NULL; + for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) { + struct rc_variable * var = list_ptr->Item; + if (variable_writes_src(var, src_type, src)) { + struct rc_variable * friend; + rc_list_add(&writer_list, rc_list(&var->C->Pool, var)); + for (friend = var->Friend; friend; + friend = friend->Friend) { + if (variable_writes_src(friend, src_type, src)) { + rc_list_add(&writer_list, + rc_list(&var->C->Pool, friend)); + } + } + /* Once we have indentifed the variable and its + * friends that write this source, we can stop + * stop searching, because we know know of the + * other variables in the list will write this source. + * If they did they would be friends of var. + */ + break; + } + } + return writer_list; +} + void rc_variable_print(struct rc_variable * var) { unsigned int i; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h index b8fbcaa4029..9427bee18a7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h @@ -79,6 +79,11 @@ unsigned int rc_variable_writemask_sum(struct rc_variable * var); struct rc_list * rc_variable_readers_union(struct rc_variable * var); +struct rc_list * rc_variable_list_get_writers( + struct rc_list * var_list, + unsigned int src_type, + void * src); + void rc_variable_print(struct rc_variable * var); #endif /* RADEON_VARIABLE_H */ -- 2.30.2