X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr300%2Fcompiler%2Fradeon_optimize.c;h=4d9120ffd09f8104120c152b9fb26e0cd423f98c;hb=cd6a31cd4a9ea6deef4778c2eaef2d47240c3a6e;hp=2bed2dd9c5f293dd2e9afa94b6f8d4ecaab4d8e3;hpb=564653b9f196b9bf91fe772fd1ca1e131ff33774;p=mesa.git diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 2bed2dd9c5f..4d9120ffd09 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard * * All Rights Reserved. * @@ -28,6 +29,7 @@ #include "radeon_dataflow.h" #include "radeon_compiler.h" +#include "radeon_compiler_util.h" #include "radeon_swizzle.h" struct peephole_state { @@ -86,84 +88,65 @@ struct copy_propagate_state { int BranchDepth; }; -/** - * This is a callback function that is meant to be passed to - * rc_for_all_reads_mask. This function will be called once for each source - * register in inst. - * @param inst The instruction that the source register belongs to. - * @param file The register file of the source register. - * @param index The index of the source register. - * @param mask The components of the source register that are being read from. - */ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) + struct rc_src_register * src) { - struct copy_propagate_state * s = data; + rc_register_file file = src->File; + struct rc_reader_data * reader_data = data; + const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); + + /* It is possible to do copy propigation in this situation, + * just not right now, see peephole_add_presub_inv() */ + if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE && + (info->NumSrcRegs > 2 || info->HasTexture)) { + reader_data->Abort = 1; + return; + } /* XXX This could probably be handled better. */ if (file == RC_FILE_ADDRESS) { - s->Conflict = 1; + reader_data->Abort = 1; return; } - if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index) - return; - /* These instructions cannot read from the constants file. * see radeonTransformTEX() */ - if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && - s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT && + if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && + reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && (inst->U.I.Opcode == RC_OPCODE_TEX || inst->U.I.Opcode == RC_OPCODE_TXB || inst->U.I.Opcode == RC_OPCODE_TXP || inst->U.I.Opcode == RC_OPCODE_KIL)){ - s->Conflict = 1; + reader_data->Abort = 1; return; } - if ((mask & s->MovMask) == mask) { - if (s->SourceClobbered) { - s->Conflict = 1; - } - } else if ((mask & s->DefinedMask) == mask) { - /* read from something entirely written by other instruction: this is okay */ - } else { - /* read from component combination that is not well-defined without - * the MOV: cannot remove it */ - s->Conflict = 1; - } } static void copy_propagate_scan_write(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { - struct copy_propagate_state * s = data; + struct rc_reader_data * reader_data = data; + struct copy_propagate_state * s = reader_data->CbData; - if (s->BranchDepth < 0) - return; - - if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) { - s->MovMask &= ~mask; - if (s->BranchDepth == 0) - s->DefinedMask |= mask; - else - s->DefinedMask &= ~mask; - } - if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) { + if (file == reader_data->Writer->U.I.SrcReg[0].File && index == reader_data->Writer->U.I.SrcReg[0].Index) { if (mask & s->SourcedMask) - s->SourceClobbered = 1; + reader_data->AbortOnRead = 1; } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) { - s->SourceClobbered = 1; + reader_data->AbortOnRead = 1; } } static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) { struct copy_propagate_state s; + struct rc_reader_data reader_data; + unsigned int i; if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.DstReg.RelAddr || - inst_mov->U.I.WriteALUResult) + inst_mov->U.I.WriteALUResult || + inst_mov->U.I.SaturateMode) return; memset(&s, 0, sizeof(s)); @@ -172,95 +155,27 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i s.MovMask = inst_mov->U.I.DstReg.WriteMask; s.DefinedMask = RC_MASK_XYZW & ~s.MovMask; + reader_data.CbData = &s; + for(unsigned int chan = 0; chan < 4; ++chan) { unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan); s.SourcedMask |= (1 << swz) & RC_MASK_XYZW; } - /* 1st pass: Check whether all subsequent readers can be changed */ - for(struct rc_instruction * inst = inst_mov->Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); - /* XXX In the future we might be able to make the optimizer - * smart enough to handle loops. */ - if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP - || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){ - return; - } - - /* It is possible to do copy propigation in this situation, - * just not right now, see peephole_add_presub_inv() */ - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE && - info->NumSrcRegs > 2) { - return; - } - - rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s); - rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s); - if (s.Conflict) - return; - - if (s.BranchDepth >= 0) { - if (inst->U.I.Opcode == RC_OPCODE_IF) { - s.BranchDepth++; - } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF - || inst->U.I.Opcode == RC_OPCODE_ELSE) { - s.BranchDepth--; - if (s.BranchDepth < 0) { - s.DefinedMask &= ~s.MovMask; - s.MovMask = 0; - } - } - } - } + /* Get a list of all the readers of this MOV instruction. */ + rc_get_readers_normal(c, inst_mov, &reader_data, + copy_propagate_scan_read, copy_propagate_scan_write); - if (s.Conflict) + if (reader_data.Abort || reader_data.ReaderCount == 0) return; - /* 2nd pass: We can satisfy all readers, so switch them over all at once */ - s.MovMask = inst_mov->U.I.DstReg.WriteMask; - s.BranchDepth = 0; - - for(struct rc_instruction * inst = inst_mov->Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { - if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) { - unsigned int refmask = 0; - - for(unsigned int chan = 0; chan < 4; ++chan) { - unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); - refmask |= (1 << swz) & RC_MASK_XYZW; - } - - if ((refmask & s.MovMask) == refmask) { - inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]); - if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) - inst->U.I.PreSub = s.Mov->U.I.PreSub; - } - } - } - - if (opcode->HasDstReg) { - if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY && - inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) { - s.MovMask &= ~inst->U.I.DstReg.WriteMask; - } - } + /* Propagate the MOV instruction. */ + for (i = 0; i < reader_data.ReaderCount; i++) { + struct rc_instruction * inst = reader_data.Readers[i].Inst; + *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, s.Mov->U.I.SrcReg[0]); - if (s.BranchDepth >= 0) { - if (inst->U.I.Opcode == RC_OPCODE_IF) { - s.BranchDepth++; - } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF - || inst->U.I.Opcode == RC_OPCODE_ELSE) { - s.BranchDepth--; - if (s.BranchDepth < 0) - break; /* no more readers after this point */ - } - } + if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) + inst->U.I.PreSub = s.Mov->U.I.PreSub; } /* Finally, remove the original MOV instruction */ @@ -407,30 +322,38 @@ static void constant_folding_add(struct rc_instruction * inst) static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + struct rc_constant * constant; + struct rc_src_register newsrc; + int have_real_reference; + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || inst->U.I.SrcReg[src].RelAddr || inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) continue; - struct rc_constant * constant = + constant = &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; if (constant->Type != RC_CONSTANT_IMMEDIATE) continue; - struct rc_src_register newsrc = inst->U.I.SrcReg[src]; - int have_real_reference = 0; + newsrc = inst->U.I.SrcReg[src]; + have_real_reference = 0; for(unsigned int chan = 0; chan < 4; ++chan) { unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); + unsigned int newswz; + float imm; + float baseimm; + if (swz >= 4) continue; - unsigned int newswz; - float imm = constant->u.Immediate[swz]; - float baseimm = imm; + imm = constant->u.Immediate[swz]; + baseimm = imm; if (imm < 0.0) baseimm = -baseimm; @@ -472,27 +395,26 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction * constant_folding_mul(inst); else if (inst->U.I.Opcode == RC_OPCODE_ADD) constant_folding_add(inst); + + /* In case this instruction has been converted, make sure all of the + * registers that are no longer used are empty. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + for(i = opcode->NumSrcRegs; i < 3; i++) { + memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); + } } /** - * This function returns a writemask that indicates wich components are - * read by src and also written by dst. + * If src and dst use the same register, this function returns a writemask that + * indicates wich components are read by src. Otherwise zero is returned. */ static unsigned int src_reads_dst_mask(struct rc_src_register src, struct rc_dst_register dst) { - unsigned int mask = 0; - unsigned int i; if (dst.File != src.File || dst.Index != src.Index) { return 0; } - - for(i = 0; i < 4; i++) { - mask |= 1 << GET_SWZ(src.Swizzle, i); - } - mask &= RC_MASK_XYZW; - - return mask; + return rc_swizzle_to_writemask(src.Swizzle); } /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) @@ -533,46 +455,53 @@ static int presub_helper( for(inst = s->Inst->Next; inst != &c->Program.Instructions; inst = inst->Next) { unsigned int i; + unsigned char can_use_presub = 1; const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); + /* XXX: There are some situations where instructions + * with more than 2 src registers can use the + * presubtract select, but to keep things simple we + * will disable presubtract on these instructions for + * now. */ + if (info->NumSrcRegs > 2 || info->HasTexture) { + can_use_presub = 0; + } - for(i = 0; i < info->NumSrcRegs; i++) { - if(s->Inst->U.I.DstReg.WriteMask != - src_reads_dst_mask(inst->U.I.SrcReg[i], - s->Inst->U.I.DstReg)) { - continue; - } - if (cant_sub) { - can_remove = 0; - break; + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. */ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + if (inst->U.I.PreSub.Opcode != presub_opcode + || inst->U.I.PreSub.SrcReg[0].File != + s->Inst->U.I.SrcReg[1].File + || inst->U.I.PreSub.SrcReg[0].Index != + s->Inst->U.I.SrcReg[1].Index) { + can_use_presub = 0; } - /* XXX: There are some situations where instructions - * with more than 2 src registers can use the - * presubtract select, but to keep things simple we - * will disable presubtract on these instructions for - * now. Note: This if statement should not be pulled - * outside of the loop, because it only applies to - * instructions that could potentially use the - * presubtract source. */ - if (info->NumSrcRegs > 2) { - can_remove = 0; - break; - } - - /* We can't use more than one presubtract value in an - * instruction, unless the two prsubtract operations - * are the same and read from the same registers. */ - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { - if (inst->U.I.PreSub.Opcode != presub_opcode - || inst->U.I.PreSub.SrcReg[0].File != - s->Inst->U.I.SrcReg[1].File - || inst->U.I.PreSub.SrcReg[0].Index != - s->Inst->U.I.SrcReg[1].Index) { + } + /* Even if the instruction can't use a presubtract operation + * we still need to check if the instruction reads from + * s->Inst->U.I.DstReg, because if it does we must not + * remove s->Inst. */ + for(i = 0; i < info->NumSrcRegs; i++) { + unsigned int mask = src_reads_dst_mask( + inst->U.I.SrcReg[i], s->Inst->U.I.DstReg); + /* XXX We could be more aggressive here using + * presubtract. It is okay if SrcReg[i] only reads + * from some of the mask components. */ + if(s->Inst->U.I.DstReg.WriteMask != mask) { + if (s->Inst->U.I.DstReg.WriteMask & mask) { can_remove = 0; break; + } else { + continue; } } + if (cant_sub || !can_use_presub) { + can_remove = 0; + break; + } presub_replace(s, inst, i); can_remove = 1; } @@ -590,6 +519,8 @@ static int presub_helper( if (inst->U.I.DstReg.WriteMask & src_reads_dst_mask(s->Inst->U.I.SrcReg[1], inst->U.I.DstReg) + || src_reads_dst_mask(s->Inst->U.I.SrcReg[0], + inst->U.I.DstReg) || info->IsFlowControl) { cant_sub = 1; } @@ -625,6 +556,21 @@ static void presub_replace_add(struct peephole_state *s, inst->U.I.SrcReg[src_index].Index = presub_opcode; } +static int is_presub_candidate(struct rc_instruction * inst) +{ + const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; + + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode) + return 0; + + for(i = 0; i < info->NumSrcRegs; i++) { + if (src_reads_dst_mask(inst->U.I.SrcReg[i], inst->U.I.DstReg)) + return 0; + } + return 1; +} + static int peephole_add_presub_add( struct radeon_compiler * c, struct rc_instruction * inst_add) @@ -634,10 +580,7 @@ static int peephole_add_presub_add( unsigned int i; struct peephole_state s; - if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE) - return 0; - - if (inst_add->U.I.SaturateMode) + if (!is_presub_candidate(inst_add)) return 0; if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) @@ -704,10 +647,7 @@ static int peephole_add_presub_inv( unsigned int i, swz, mask; struct peephole_state s; - if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE) - return 0; - - if (inst_add->U.I.SaturateMode) + if (!is_presub_candidate(inst_add)) return 0; mask = inst_add->U.I.DstReg.WriteMask;