X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fr300%2Fcompiler%2Fradeon_optimize.c;h=3bbfe898cc2ad599cc43da9ea73a7e61357b5d9a;hb=e2c3640540dbe423d2c75d89615854aeb9f560f3;hp=39dcb21d4f4b6eca191678aab95fc5a6b64d305d;hpb=1c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6;p=mesa.git diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c index 39dcb21d4f4..3bbfe898cc2 100644 --- a/src/gallium/drivers/r300/compiler/radeon_optimize.c +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -30,7 +30,9 @@ #include "radeon_compiler.h" #include "radeon_compiler_util.h" +#include "radeon_list.h" #include "radeon_swizzle.h" +#include "radeon_variable.h" struct src_clobbered_reads_cb_data { rc_register_file File; @@ -141,8 +143,7 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i unsigned int i; if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || - inst_mov->U.I.WriteALUResult || - inst_mov->U.I.SaturateMode) + inst_mov->U.I.WriteALUResult) return; /* Get a list of all the readers of this MOV instruction. */ @@ -154,6 +155,22 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i if (reader_data.Abort || reader_data.ReaderCount == 0) return; + /* We can propagate SaturateMode if all the readers are MOV instructions + * without a presubtract operation, source negation and absolute. + * In that case, we just move SaturateMode to all readers. */ + if (inst_mov->U.I.SaturateMode) { + for (i = 0; i < reader_data.ReaderCount; i++) { + struct rc_instruction * inst = reader_data.Readers[i].Inst; + + if (inst->U.I.Opcode != RC_OPCODE_MOV || + inst->U.I.SrcReg[0].File == RC_FILE_PRESUB || + inst->U.I.SrcReg[0].Abs || + inst->U.I.SrcReg[0].Negate) { + return; + } + } + } + /* Propagate the MOV instruction. */ for (i = 0; i < reader_data.ReaderCount; i++) { struct rc_instruction * inst = reader_data.Readers[i].Inst; @@ -161,6 +178,8 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) inst->U.I.PreSub = inst_mov->U.I.PreSub; + if (!inst->U.I.SaturateMode) + inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode; } /* Finally, remove the original MOV instruction */ @@ -519,7 +538,8 @@ static int is_presub_candidate( if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode - || inst->U.I.WriteALUResult) { + || inst->U.I.WriteALUResult + || inst->U.I.Omod) { return 0; } @@ -658,6 +678,179 @@ static int peephole_add_presub_inv( return 0; } +struct peephole_mul_cb_data { + struct rc_dst_register * Writer; + unsigned int Clobbered; +}; + +static void omod_filter_reader_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct peephole_mul_cb_data * d = userdata; + if (rc_src_reads_dst_mask(file, mask, index, + d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) { + + d->Clobbered = 1; + } +} + +static void omod_filter_writer_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct peephole_mul_cb_data * d = userdata; + if (file == d->Writer->File && index == d->Writer->Index && + (mask & d->Writer->WriteMask)) { + d->Clobbered = 1; + } +} + +static int peephole_mul_omod( + struct radeon_compiler * c, + struct rc_instruction * inst_mul, + struct rc_list * var_list) +{ + unsigned int chan = 0, swz, i; + int const_index = -1; + int temp_index = -1; + float const_value; + rc_omod_op omod_op = RC_OMOD_DISABLE; + struct rc_list * writer_list; + struct rc_variable * var; + struct peephole_mul_cb_data cb_data; + unsigned writemask_sum; + + for (i = 0; i < 2; i++) { + unsigned int j; + if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT + && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) { + return 0; + } + if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + if (temp_index != -1) { + /* The instruction has two temp sources */ + return 0; + } else { + temp_index = i; + continue; + } + } + /* If we get this far Src[i] must be a constant src */ + if (inst_mul->U.I.SrcReg[i].Negate) { + return 0; + } + /* The constant src needs to read from the same swizzle */ + swz = RC_SWIZZLE_UNUSED; + chan = 0; + for (j = 0; j < 4; j++) { + unsigned int j_swz = + GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); + if (j_swz == RC_SWIZZLE_UNUSED) { + continue; + } + if (swz == RC_SWIZZLE_UNUSED) { + swz = j_swz; + chan = j; + } else if (j_swz != swz) { + return 0; + } + } + + if (const_index != -1) { + /* The instruction has two constant sources */ + return 0; + } else { + const_index = i; + } + } + + if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File, + inst_mul->U.I.SrcReg[const_index].Index)) { + return 0; + } + const_value = rc_get_constant_value(c, + inst_mul->U.I.SrcReg[const_index].Index, + inst_mul->U.I.SrcReg[const_index].Swizzle, + inst_mul->U.I.SrcReg[const_index].Negate, + chan); + + if (const_value == 2.0f) { + omod_op = RC_OMOD_MUL_2; + } else if (const_value == 4.0f) { + omod_op = RC_OMOD_MUL_4; + } else if (const_value == 8.0f) { + omod_op = RC_OMOD_MUL_8; + } else if (const_value == (1.0f / 2.0f)) { + omod_op = RC_OMOD_DIV_2; + } else if (const_value == (1.0f / 4.0f)) { + omod_op = RC_OMOD_DIV_4; + } else if (const_value == (1.0f / 8.0f)) { + omod_op = RC_OMOD_DIV_8; + } else { + return 0; + } + + writer_list = rc_variable_list_get_writers_one_reader(var_list, + RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]); + + if (!writer_list) { + return 0; + } + + cb_data.Clobbered = 0; + cb_data.Writer = &inst_mul->U.I.DstReg; + for (var = writer_list->Item; var; var = var->Friend) { + struct rc_instruction * inst; + const struct rc_opcode_info * info = rc_get_opcode_info( + var->Inst->U.I.Opcode); + if (info->HasTexture) { + return 0; + } + if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) { + return 0; + } + for (inst = inst_mul->Prev; inst != var->Inst; + inst = inst->Prev) { + rc_for_all_reads_mask(inst, omod_filter_reader_cb, + &cb_data); + rc_for_all_writes_mask(inst, omod_filter_writer_cb, + &cb_data); + if (cb_data.Clobbered) { + break; + } + } + } + + if (cb_data.Clobbered) { + return 0; + } + + /* Rewrite the instructions */ + writemask_sum = rc_variable_writemask_sum(writer_list->Item); + for (var = writer_list->Item; var; var = var->Friend) { + struct rc_variable * writer = var; + unsigned conversion_swizzle = rc_make_conversion_swizzle( + writemask_sum, + inst_mul->U.I.DstReg.WriteMask); + writer->Inst->U.I.Omod = omod_op; + writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File; + writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index; + rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle); + writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode; + } + + rc_remove_instruction(inst_mul); + + return 1; +} + /** * @return * 0 if inst is still part of the program. @@ -683,6 +876,7 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) void rc_optimize(struct radeon_compiler * c, void *user) { struct rc_instruction * inst = c->Program.Instructions.Next; + struct rc_list * var_list; while(inst != &c->Program.Instructions) { struct rc_instruction * cur = inst; inst = inst->Next; @@ -697,4 +891,18 @@ void rc_optimize(struct radeon_compiler * c, void *user) /* cur may no longer be part of the program */ } } + + if (!c->has_omod) { + return; + } + + inst = c->Program.Instructions.Next; + while(inst != &c->Program.Instructions) { + struct rc_instruction * cur = inst; + inst = inst->Next; + if (cur->U.I.Opcode == RC_OPCODE_MUL) { + var_list = rc_get_variables(c); + peephole_mul_omod(c, cur, var_list); + } + } }