From: Tom Stellard Date: Mon, 30 Aug 2010 15:59:30 +0000 (-0700) Subject: r300/compiler: Add peephole optimization for the 'add' presubtract operation X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a64b4a05af362fff52c9e52eb51cd92fe164afcc;p=mesa.git r300/compiler: Add peephole optimization for the 'add' presubtract operation --- diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index e01ba85aa56..3ff07d60396 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -38,6 +38,10 @@ struct peephole_state { unsigned int WriteMask; }; +typedef void (*rc_presub_replace_fn)(struct peephole_state *, + struct rc_instruction *, + unsigned int); + static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) { struct rc_src_register combine; @@ -516,68 +520,26 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst, } } -/** - * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] - * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source - * of the add instruction must have the constatnt 1 swizzle. This function - * does not check const registers to see if their value is 1.0, so it should - * be called after the constant_folding optimization. - * @return - * 0 if the ADD instruction is still part of the program. - * 1 if the ADD instruction is no longer part of the program. - */ -static int peephole_add_presub_inv( +static int presub_helper( struct radeon_compiler * c, - struct rc_instruction * inst_add) + struct peephole_state * s, + rc_presubtract_op presub_opcode, + rc_presub_replace_fn presub_replace) { - unsigned int i, swz, mask; + struct rc_instruction * inst; unsigned int can_remove = 0; unsigned int cant_sub = 0; - struct rc_instruction * inst; - struct peephole_state s; - - if (inst_add->U.I.SaturateMode) - return 0; - - mask = inst_add->U.I.DstReg.WriteMask; - - /* Check if src0 is 1. */ - /* XXX It would be nice to use is_src_uniform_constant here, but that - * function only works if the register's file is RC_FILE_NONE */ - for(i = 0; i < 4; i++ ) { - swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); - if(((1 << i) & inst_add->U.I.DstReg.WriteMask) - && swz != RC_SWIZZLE_ONE) { - return 0; - } - } - /* Check src1. */ - if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != - inst_add->U.I.DstReg.WriteMask - || inst_add->U.I.SrcReg[1].Abs - || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY - && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) - || src_has_const_swz(inst_add->U.I.SrcReg[1])) { - - return 0; - } - - /* Setup the peephole_state information. */ - s.Inst = inst_add; - s.WriteMask = inst_add->U.I.DstReg.WriteMask; - - /* For all instructions that read inst_add->U.I.DstReg before it is - * written again, use the 1 - src0 presubtact instead. */ - for(inst = inst_add->Next; inst != &c->Program.Instructions; + for(inst = s->Inst->Next; inst != &c->Program.Instructions; inst = inst->Next) { + unsigned int i; const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); for(i = 0; i < info->NumSrcRegs; i++) { - if(inst_add->U.I.DstReg.WriteMask != + if(s->Inst->U.I.DstReg.WriteMask != src_reads_dst_mask(inst->U.I.SrcReg[i], - inst_add->U.I.DstReg)) { + s->Inst->U.I.DstReg)) { continue; } if (cant_sub) { @@ -601,47 +563,173 @@ static int peephole_add_presub_inv( * instruction, unless the two prsubtract operations * are the same and read from the same registers. */ if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { - if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV + if (inst->U.I.PreSub.Opcode != presub_opcode || inst->U.I.PreSub.SrcReg[0].File != - inst_add->U.I.SrcReg[1].File + s->Inst->U.I.SrcReg[1].File || inst->U.I.PreSub.SrcReg[0].Index != - inst_add->U.I.SrcReg[1].Index) { + s->Inst->U.I.SrcReg[1].Index) { can_remove = 0; break; } } - /* We must be careful not to modify inst_add, since it - * is possible it will remain part of the program. */ - inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; - inst->U.I.PreSub.SrcReg[0].Negate = 0; - inst->U.I.PreSub.Opcode = RC_PRESUB_INV; - inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i], - inst->U.I.PreSub.SrcReg[0]); - - inst->U.I.SrcReg[i].File = RC_FILE_PRESUB; - inst->U.I.SrcReg[i].Index = RC_PRESUB_INV; + presub_replace(s, inst, i); can_remove = 1; } if(!can_remove) break; - rc_for_all_writes_mask(inst, peephole_scan_write, &s); + rc_for_all_writes_mask(inst, peephole_scan_write, s); /* If all components of inst_add's destination register have * been written to by subsequent instructions, the original * value of the destination register is no longer valid and * we can't keep doing substitutions. */ - if (!s.WriteMask){ + if (!s->WriteMask){ break; } /* Make this instruction doesn't write to the presubtract source. */ if (inst->U.I.DstReg.WriteMask & - src_reads_dst_mask(inst_add->U.I.SrcReg[1], + src_reads_dst_mask(s->Inst->U.I.SrcReg[1], inst->U.I.DstReg) || info->IsFlowControl) { cant_sub = 1; } } - if(can_remove) { + return can_remove; +} + +static void presub_replace_add(struct peephole_state *s, + struct rc_instruction * inst, + unsigned int src_index) +{ + inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0]; + inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1]; + inst->U.I.PreSub.SrcReg[0].Negate = 0; + inst->U.I.PreSub.SrcReg[1].Negate = 0; + inst->U.I.PreSub.Opcode = RC_PRESUB_ADD; + inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], + inst->U.I.PreSub.SrcReg[0]); + inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst->U.I.SrcReg[src_index].Index = RC_PRESUB_ADD; +} + +static int peephole_add_presub_add( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + struct rc_src_register * src0 = NULL; + struct rc_src_register * src1 = NULL; + unsigned int i; + struct peephole_state s; + + if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE) + return 0; + + if (inst_add->U.I.SaturateMode) + return 0; + + if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) + return 0; + + /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */ + for (i = 0; i < 2; i++) { + if (inst_add->U.I.SrcReg[i].Abs) + return 0; + if ((inst_add->U.I.SrcReg[i].Negate + & inst_add->U.I.DstReg.WriteMask) == + inst_add->U.I.DstReg.WriteMask) { + src0 = &inst_add->U.I.SrcReg[i]; + } else if (!src1) { + src1 = &inst_add->U.I.SrcReg[i]; + } else { + src0 = &inst_add->U.I.SrcReg[i]; + } + } + + if (!src1) + return 0; + + /* XXX Only do add for now. */ + if (src0->Negate) + return 0; + + s.Inst = inst_add; + s.WriteMask = inst_add->U.I.DstReg.WriteMask; + if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; +} + +static void presub_replace_inv(struct peephole_state * s, + struct rc_instruction * inst, + unsigned int src_index) +{ + /* We must be careful not to modify s->Inst, since it + * is possible it will remain part of the program. + * XXX Maybe pass a struct instead of a pointer for s->Inst.*/ + inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1]; + inst->U.I.PreSub.SrcReg[0].Negate = 0; + inst->U.I.PreSub.Opcode = RC_PRESUB_INV; + inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], + inst->U.I.PreSub.SrcReg[0]); + + inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; +} + +/** + * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] + * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source + * of the add instruction must have the constatnt 1 swizzle. This function + * does not check const registers to see if their value is 1.0, so it should + * be called after the constant_folding optimization. + * @return + * 0 if the ADD instruction is still part of the program. + * 1 if the ADD instruction is no longer part of the program. + */ +static int peephole_add_presub_inv( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + unsigned int i, swz, mask; + struct peephole_state s; + + if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE) + return 0; + + if (inst_add->U.I.SaturateMode) + return 0; + + mask = inst_add->U.I.DstReg.WriteMask; + + /* Check if src0 is 1. */ + /* XXX It would be nice to use is_src_uniform_constant here, but that + * function only works if the register's file is RC_FILE_NONE */ + for(i = 0; i < 4; i++ ) { + swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); + if(((1 << i) & inst_add->U.I.DstReg.WriteMask) + && swz != RC_SWIZZLE_ONE) { + return 0; + } + } + + /* Check src1. */ + if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != + inst_add->U.I.DstReg.WriteMask + || inst_add->U.I.SrcReg[1].Abs + || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY + && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) + || src_has_const_swz(inst_add->U.I.SrcReg[1])) { + + return 0; + } + + /* Setup the peephole_state information. */ + s.Inst = inst_add; + s.WriteMask = inst_add->U.I.DstReg.WriteMask; + + if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) { rc_remove_instruction(inst_add); return 1; } @@ -660,6 +748,8 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) if (c->has_presub) { if(peephole_add_presub_inv(c, inst)) return 1; + if(peephole_add_presub_add(c, inst)) + return 1; } break; default: diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index 32c54fd74bc..5269d659851 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -300,6 +300,7 @@ static int destructive_merge_instructions( for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { unsigned int arg; int free_source; + unsigned int one_way = 0; struct radeon_pair_instruction_source srcp = alpha->RGB.Src[srcp_src]; struct radeon_pair_instruction_source temp; @@ -307,14 +308,27 @@ static int destructive_merge_instructions( * 3rd arg of 0 means this is not an alpha source. */ free_source = rc_pair_alloc_source(rgb, 1, 0, srcp.File, srcp.Index); - /* If free_source == srcp_src, then either the - * presubtract source is already in the correct place. */ - if (free_source == srcp_src) - continue; /* If free_source < 0 then there are no free source * slots. */ if (free_source < 0) return 0; + + temp = rgb->RGB.Src[srcp_src]; + rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source]; + /* srcp needs src0 and src1 to be the same */ + if (free_source < srcp_src) { + if (!temp.Used) + continue; + free_source = rc_pair_alloc_source(rgb, 1, 0, + srcp.File, srcp.Index); + one_way = 1; + } else { + rgb->RGB.Src[free_source] = temp; + } + /* If free_source == srcp_src, then the presubtract + * source is already in the correct place. */ + if (free_source == srcp_src) + continue; /* Shuffle the sources, so we can put the * presubtract source in the correct place. */ for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) { @@ -331,12 +345,11 @@ static int destructive_merge_instructions( /* We need to do this just in case register * is one of the sources already, but in the * wrong spot. */ - else if(rgb->RGB.Arg[arg].Source == free_source) + else if(rgb->RGB.Arg[arg].Source == free_source + && !one_way) { rgb->RGB.Arg[arg].Source = srcp_src; + } } - temp = rgb->RGB.Src[srcp_src]; - rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source]; - rgb->RGB.Src[free_source] = temp; } } @@ -352,6 +365,7 @@ static int destructive_merge_instructions( for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) { unsigned int arg; int free_source; + unsigned int one_way = 0; struct radeon_pair_instruction_source srcp = alpha->Alpha.Src[srcp_src]; struct radeon_pair_instruction_source temp; @@ -359,14 +373,27 @@ static int destructive_merge_instructions( * 3rd arg of 1 means this is an alpha source. */ free_source = rc_pair_alloc_source(rgb, 0, 1, srcp.File, srcp.Index); - /* If free_source == srcp_src, then either the - * presubtract source is already in the correct place. */ - if (free_source == srcp_src) - continue; /* If free_source < 0 then there are no free source * slots. */ if (free_source < 0) return 0; + + temp = rgb->Alpha.Src[srcp_src]; + rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source]; + /* srcp needs src0 and src1 to be the same. */ + if (free_source < srcp_src) { + if (!temp.Used) + continue; + free_source = rc_pair_alloc_source(rgb, 0, 1, + temp.File, temp.Index); + one_way = 1; + } else { + rgb->Alpha.Src[free_source] = temp; + } + /* If free_source == srcp_src, then the presubtract + * source is already in the correct place. */ + if (free_source == srcp_src) + continue; /* Shuffle the sources, so we can put the * presubtract source in the correct place. */ for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) { @@ -380,12 +407,11 @@ static int destructive_merge_instructions( } if (rgb->RGB.Arg[arg].Source == srcp_src) rgb->RGB.Arg[arg].Source = free_source; - else if (rgb->RGB.Arg[arg].Source == free_source) + else if (rgb->RGB.Arg[arg].Source == free_source + && !one_way) { rgb->RGB.Arg[arg].Source = srcp_src; + } } - temp = rgb->Alpha.Src[srcp_src]; - rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source]; - rgb->Alpha.Src[free_source] = temp; } }