r300/compiler: Enable presubtract sources
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_optimize.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_dataflow.h"
29
30 #include "radeon_compiler.h"
31 #include "radeon_swizzle.h"
32
33 struct peephole_state {
34 struct rc_instruction * Inst;
35 /** Stores a bitmask of the components that are still "alive" (i.e.
36 * they have not been written to since Inst was executed.)
37 */
38 unsigned int WriteMask;
39 };
40
41 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
42 {
43 struct rc_src_register combine;
44 combine.File = inner.File;
45 combine.Index = inner.Index;
46 combine.RelAddr = inner.RelAddr;
47 if (outer.Abs) {
48 combine.Abs = 1;
49 combine.Negate = outer.Negate;
50 } else {
51 combine.Abs = inner.Abs;
52 combine.Negate = 0;
53 for(unsigned int chan = 0; chan < 4; ++chan) {
54 unsigned int swz = GET_SWZ(outer.Swizzle, chan);
55 if (swz < 4)
56 combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
57 }
58 combine.Negate ^= outer.Negate;
59 }
60 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
61 return combine;
62 }
63
64 struct copy_propagate_state {
65 struct radeon_compiler * C;
66 struct rc_instruction * Mov;
67 unsigned int Conflict:1;
68
69 /** Whether Mov's source has been clobbered */
70 unsigned int SourceClobbered:1;
71
72 /** Which components of Mov's destination register are still from that Mov? */
73 unsigned int MovMask:4;
74
75 /** Which components of Mov's destination register are clearly *not* from that Mov */
76 unsigned int DefinedMask:4;
77
78 /** Which components of Mov's source register are sourced */
79 unsigned int SourcedMask:4;
80
81 /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
82 int BranchDepth;
83 };
84
85 /**
86 * This is a callback function that is meant to be passed to
87 * rc_for_all_reads_mask. This function will be called once for each source
88 * register in inst.
89 * @param inst The instruction that the source register belongs to.
90 * @param file The register file of the source register.
91 * @param index The index of the source register.
92 * @param mask The components of the source register that are being read from.
93 */
94 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
95 rc_register_file file, unsigned int index, unsigned int mask)
96 {
97 struct copy_propagate_state * s = data;
98
99 /* XXX This could probably be handled better. */
100 if (file == RC_FILE_ADDRESS) {
101 s->Conflict = 1;
102 return;
103 }
104
105 if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
106 return;
107
108 /* These instructions cannot read from the constants file.
109 * see radeonTransformTEX()
110 */
111 if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
112 s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT &&
113 (inst->U.I.Opcode == RC_OPCODE_TEX ||
114 inst->U.I.Opcode == RC_OPCODE_TXB ||
115 inst->U.I.Opcode == RC_OPCODE_TXP ||
116 inst->U.I.Opcode == RC_OPCODE_KIL)){
117 s->Conflict = 1;
118 return;
119 }
120 if ((mask & s->MovMask) == mask) {
121 if (s->SourceClobbered) {
122 s->Conflict = 1;
123 }
124 } else if ((mask & s->DefinedMask) == mask) {
125 /* read from something entirely written by other instruction: this is okay */
126 } else {
127 /* read from component combination that is not well-defined without
128 * the MOV: cannot remove it */
129 s->Conflict = 1;
130 }
131 }
132
133 static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
134 rc_register_file file, unsigned int index, unsigned int mask)
135 {
136 struct copy_propagate_state * s = data;
137
138 if (s->BranchDepth < 0)
139 return;
140
141 if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
142 s->MovMask &= ~mask;
143 if (s->BranchDepth == 0)
144 s->DefinedMask |= mask;
145 else
146 s->DefinedMask &= ~mask;
147 }
148 if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
149 if (mask & s->SourcedMask)
150 s->SourceClobbered = 1;
151 } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
152 s->SourceClobbered = 1;
153 }
154 }
155
156 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
157 {
158 struct copy_propagate_state s;
159
160 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
161 inst_mov->U.I.DstReg.RelAddr ||
162 inst_mov->U.I.WriteALUResult)
163 return;
164
165 memset(&s, 0, sizeof(s));
166 s.C = c;
167 s.Mov = inst_mov;
168 s.MovMask = inst_mov->U.I.DstReg.WriteMask;
169 s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
170
171 for(unsigned int chan = 0; chan < 4; ++chan) {
172 unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
173 s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
174 }
175
176 /* 1st pass: Check whether all subsequent readers can be changed */
177 for(struct rc_instruction * inst = inst_mov->Next;
178 inst != &c->Program.Instructions;
179 inst = inst->Next) {
180 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
181 /* XXX In the future we might be able to make the optimizer
182 * smart enough to handle loops. */
183 if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
184 || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
185 return;
186 }
187
188 /* It is possible to do copy propigation in this situation,
189 * just not right now, see peephole_add_presub_inv() */
190 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
191 info->NumSrcRegs > 2) {
192 return;
193 }
194
195 rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s);
196 rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s);
197 if (s.Conflict)
198 return;
199
200 if (s.BranchDepth >= 0) {
201 if (inst->U.I.Opcode == RC_OPCODE_IF) {
202 s.BranchDepth++;
203 } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
204 || inst->U.I.Opcode == RC_OPCODE_ELSE) {
205 s.BranchDepth--;
206 if (s.BranchDepth < 0) {
207 s.DefinedMask &= ~s.MovMask;
208 s.MovMask = 0;
209 }
210 }
211 }
212 }
213
214 if (s.Conflict)
215 return;
216
217 /* 2nd pass: We can satisfy all readers, so switch them over all at once */
218 s.MovMask = inst_mov->U.I.DstReg.WriteMask;
219 s.BranchDepth = 0;
220
221 for(struct rc_instruction * inst = inst_mov->Next;
222 inst != &c->Program.Instructions;
223 inst = inst->Next) {
224 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
225 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
226 if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
227 inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
228 unsigned int refmask = 0;
229
230 for(unsigned int chan = 0; chan < 4; ++chan) {
231 unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
232 refmask |= (1 << swz) & RC_MASK_XYZW;
233 }
234
235 if ((refmask & s.MovMask) == refmask) {
236 inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
237 if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
238 inst->U.I.PreSub = s.Mov->U.I.PreSub;
239 }
240 }
241 }
242
243 if (opcode->HasDstReg) {
244 if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
245 inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
246 s.MovMask &= ~inst->U.I.DstReg.WriteMask;
247 }
248 }
249
250 if (s.BranchDepth >= 0) {
251 if (inst->U.I.Opcode == RC_OPCODE_IF) {
252 s.BranchDepth++;
253 } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
254 || inst->U.I.Opcode == RC_OPCODE_ELSE) {
255 s.BranchDepth--;
256 if (s.BranchDepth < 0)
257 break; /* no more readers after this point */
258 }
259 }
260 }
261
262 /* Finally, remove the original MOV instruction */
263 rc_remove_instruction(inst_mov);
264 }
265
266 /**
267 * Check if a source register is actually always the same
268 * swizzle constant.
269 */
270 static int is_src_uniform_constant(struct rc_src_register src,
271 rc_swizzle * pswz, unsigned int * pnegate)
272 {
273 int have_used = 0;
274
275 if (src.File != RC_FILE_NONE) {
276 *pswz = 0;
277 return 0;
278 }
279
280 for(unsigned int chan = 0; chan < 4; ++chan) {
281 unsigned int swz = GET_SWZ(src.Swizzle, chan);
282 if (swz < 4) {
283 *pswz = 0;
284 return 0;
285 }
286 if (swz == RC_SWIZZLE_UNUSED)
287 continue;
288
289 if (!have_used) {
290 *pswz = swz;
291 *pnegate = GET_BIT(src.Negate, chan);
292 have_used = 1;
293 } else {
294 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
295 *pswz = 0;
296 return 0;
297 }
298 }
299 }
300
301 return 1;
302 }
303
304 static void constant_folding_mad(struct rc_instruction * inst)
305 {
306 rc_swizzle swz;
307 unsigned int negate;
308
309 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
310 if (swz == RC_SWIZZLE_ZERO) {
311 inst->U.I.Opcode = RC_OPCODE_MUL;
312 return;
313 }
314 }
315
316 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
317 if (swz == RC_SWIZZLE_ONE) {
318 inst->U.I.Opcode = RC_OPCODE_ADD;
319 if (negate)
320 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
321 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
322 return;
323 } else if (swz == RC_SWIZZLE_ZERO) {
324 inst->U.I.Opcode = RC_OPCODE_MOV;
325 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
326 return;
327 }
328 }
329
330 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
331 if (swz == RC_SWIZZLE_ONE) {
332 inst->U.I.Opcode = RC_OPCODE_ADD;
333 if (negate)
334 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
335 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
336 return;
337 } else if (swz == RC_SWIZZLE_ZERO) {
338 inst->U.I.Opcode = RC_OPCODE_MOV;
339 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
340 return;
341 }
342 }
343 }
344
345 static void constant_folding_mul(struct rc_instruction * inst)
346 {
347 rc_swizzle swz;
348 unsigned int negate;
349
350 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
351 if (swz == RC_SWIZZLE_ONE) {
352 inst->U.I.Opcode = RC_OPCODE_MOV;
353 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
354 if (negate)
355 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
356 return;
357 } else if (swz == RC_SWIZZLE_ZERO) {
358 inst->U.I.Opcode = RC_OPCODE_MOV;
359 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
360 return;
361 }
362 }
363
364 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
365 if (swz == RC_SWIZZLE_ONE) {
366 inst->U.I.Opcode = RC_OPCODE_MOV;
367 if (negate)
368 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
369 return;
370 } else if (swz == RC_SWIZZLE_ZERO) {
371 inst->U.I.Opcode = RC_OPCODE_MOV;
372 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
373 return;
374 }
375 }
376 }
377
378 static void constant_folding_add(struct rc_instruction * inst)
379 {
380 rc_swizzle swz;
381 unsigned int negate;
382
383 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
384 if (swz == RC_SWIZZLE_ZERO) {
385 inst->U.I.Opcode = RC_OPCODE_MOV;
386 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
387 return;
388 }
389 }
390
391 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
392 if (swz == RC_SWIZZLE_ZERO) {
393 inst->U.I.Opcode = RC_OPCODE_MOV;
394 return;
395 }
396 }
397 }
398
399 /**
400 * Replace 0.0, 1.0 and 0.5 immediate constants by their
401 * respective swizzles. Simplify instructions like ADD dst, src, 0;
402 */
403 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
404 {
405 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
406
407 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
408 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
409 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
410 inst->U.I.SrcReg[src].RelAddr ||
411 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
412 continue;
413
414 struct rc_constant * constant =
415 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
416
417 if (constant->Type != RC_CONSTANT_IMMEDIATE)
418 continue;
419
420 struct rc_src_register newsrc = inst->U.I.SrcReg[src];
421 int have_real_reference = 0;
422 for(unsigned int chan = 0; chan < 4; ++chan) {
423 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
424 if (swz >= 4)
425 continue;
426
427 unsigned int newswz;
428 float imm = constant->u.Immediate[swz];
429 float baseimm = imm;
430 if (imm < 0.0)
431 baseimm = -baseimm;
432
433 if (baseimm == 0.0) {
434 newswz = RC_SWIZZLE_ZERO;
435 } else if (baseimm == 1.0) {
436 newswz = RC_SWIZZLE_ONE;
437 } else if (baseimm == 0.5 && c->has_half_swizzles) {
438 newswz = RC_SWIZZLE_HALF;
439 } else {
440 have_real_reference = 1;
441 continue;
442 }
443
444 SET_SWZ(newsrc.Swizzle, chan, newswz);
445 if (imm < 0.0 && !newsrc.Abs)
446 newsrc.Negate ^= 1 << chan;
447 }
448
449 if (!have_real_reference) {
450 newsrc.File = RC_FILE_NONE;
451 newsrc.Index = 0;
452 }
453
454 /* don't make the swizzle worse */
455 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
456 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
457 continue;
458
459 inst->U.I.SrcReg[src] = newsrc;
460 }
461
462 /* Simplify instructions based on constants */
463 if (inst->U.I.Opcode == RC_OPCODE_MAD)
464 constant_folding_mad(inst);
465
466 /* note: MAD can simplify to MUL or ADD */
467 if (inst->U.I.Opcode == RC_OPCODE_MUL)
468 constant_folding_mul(inst);
469 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
470 constant_folding_add(inst);
471 }
472
473 /**
474 * This function returns a writemask that indicates wich components are
475 * read by src and also written by dst.
476 */
477 static unsigned int src_reads_dst_mask(struct rc_src_register src,
478 struct rc_dst_register dst)
479 {
480 unsigned int mask = 0;
481 unsigned int i;
482 if (dst.File != src.File || dst.Index != src.Index) {
483 return 0;
484 }
485
486 for(i = 0; i < 4; i++) {
487 mask |= 1 << GET_SWZ(src.Swizzle, i);
488 }
489 mask &= RC_MASK_XYZW;
490
491 return mask;
492 }
493
494 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
495 * in any of its channels. Return 0 otherwise. */
496 static int src_has_const_swz(struct rc_src_register src) {
497 int chan;
498 for(chan = 0; chan < 4; chan++) {
499 unsigned int swz = GET_SWZ(src.Swizzle, chan);
500 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
501 || swz == RC_SWIZZLE_ONE) {
502 return 1;
503 }
504 }
505 return 0;
506 }
507
508 static void peephole_scan_write(void * data, struct rc_instruction * inst,
509 rc_register_file file, unsigned int index, unsigned int mask)
510 {
511 struct peephole_state * s = data;
512 if(s->Inst->U.I.DstReg.File == file
513 && s->Inst->U.I.DstReg.Index == index) {
514 unsigned int common_mask = s->WriteMask & mask;
515 s->WriteMask &= ~common_mask;
516 }
517 }
518
519 /**
520 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
521 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
522 * of the add instruction must have the constatnt 1 swizzle. This function
523 * does not check const registers to see if their value is 1.0, so it should
524 * be called after the constant_folding optimization.
525 * @return
526 * 0 if the ADD instruction is still part of the program.
527 * 1 if the ADD instruction is no longer part of the program.
528 */
529 static int peephole_add_presub_inv(
530 struct radeon_compiler * c,
531 struct rc_instruction * inst_add)
532 {
533 unsigned int i, swz, mask;
534 unsigned int can_remove = 0;
535 unsigned int cant_sub = 0;
536 struct rc_instruction * inst;
537 struct peephole_state s;
538
539 if (inst_add->U.I.SaturateMode)
540 return 0;
541
542 mask = inst_add->U.I.DstReg.WriteMask;
543
544 /* Check if src0 is 1. */
545 /* XXX It would be nice to use is_src_uniform_constant here, but that
546 * function only works if the register's file is RC_FILE_NONE */
547 for(i = 0; i < 4; i++ ) {
548 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
549 if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
550 && swz != RC_SWIZZLE_ONE) {
551 return 0;
552 }
553 }
554
555 /* Check src1. */
556 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
557 inst_add->U.I.DstReg.WriteMask
558 || inst_add->U.I.SrcReg[1].Abs
559 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
560 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
561 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
562
563 return 0;
564 }
565
566 /* Setup the peephole_state information. */
567 s.Inst = inst_add;
568 s.WriteMask = inst_add->U.I.DstReg.WriteMask;
569
570 /* For all instructions that read inst_add->U.I.DstReg before it is
571 * written again, use the 1 - src0 presubtact instead. */
572 for(inst = inst_add->Next; inst != &c->Program.Instructions;
573 inst = inst->Next) {
574 const struct rc_opcode_info * info =
575 rc_get_opcode_info(inst->U.I.Opcode);
576
577 for(i = 0; i < info->NumSrcRegs; i++) {
578 if(inst_add->U.I.DstReg.WriteMask !=
579 src_reads_dst_mask(inst->U.I.SrcReg[i],
580 inst_add->U.I.DstReg)) {
581 continue;
582 }
583 if (cant_sub) {
584 can_remove = 0;
585 break;
586 }
587 /* XXX: There are some situations where instructions
588 * with more than 2 src registers can use the
589 * presubtract select, but to keep things simple we
590 * will disable presubtract on these instructions for
591 * now. Note: This if statement should not be pulled
592 * outside of the loop, because it only applies to
593 * instructions that could potentially use the
594 * presubtract source. */
595 if (info->NumSrcRegs > 2) {
596 can_remove = 0;
597 break;
598 }
599
600 /* We can't use more than one presubtract value in an
601 * instruction, unless the two prsubtract operations
602 * are the same and read from the same registers. */
603 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
604 if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV
605 || inst->U.I.PreSub.SrcReg[0].File !=
606 inst_add->U.I.SrcReg[1].File
607 || inst->U.I.PreSub.SrcReg[0].Index !=
608 inst_add->U.I.SrcReg[1].Index) {
609
610 can_remove = 0;
611 break;
612 }
613 }
614 /* We must be careful not to modify inst_add, since it
615 * is possible it will remain part of the program. */
616 inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
617 inst->U.I.PreSub.SrcReg[0].Negate = 0;
618 inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
619 inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i],
620 inst->U.I.PreSub.SrcReg[0]);
621
622 inst->U.I.SrcReg[i].File = RC_FILE_PRESUB;
623 inst->U.I.SrcReg[i].Index = RC_PRESUB_INV;
624 can_remove = 1;
625 }
626 if(!can_remove)
627 break;
628 rc_for_all_writes_mask(inst, peephole_scan_write, &s);
629 /* If all components of inst_add's destination register have
630 * been written to by subsequent instructions, the original
631 * value of the destination register is no longer valid and
632 * we can't keep doing substitutions. */
633 if (!s.WriteMask){
634 break;
635 }
636 /* Make this instruction doesn't write to the presubtract source. */
637 if (inst->U.I.DstReg.WriteMask &
638 src_reads_dst_mask(inst_add->U.I.SrcReg[1],
639 inst->U.I.DstReg)
640 || info->IsFlowControl) {
641 cant_sub = 1;
642 }
643 }
644 if(can_remove) {
645 rc_remove_instruction(inst_add);
646 return 1;
647 }
648 return 0;
649 }
650
651 /**
652 * @return
653 * 0 if inst is still part of the program.
654 * 1 if inst is no longer part of the program.
655 */
656 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
657 {
658 switch(inst->U.I.Opcode){
659 case RC_OPCODE_ADD:
660 if (c->has_presub) {
661 if(peephole_add_presub_inv(c, inst))
662 return 1;
663 }
664 break;
665 default:
666 break;
667 }
668 return 0;
669 }
670
671 void rc_optimize(struct radeon_compiler * c, void *user)
672 {
673 struct rc_instruction * inst = c->Program.Instructions.Next;
674 while(inst != &c->Program.Instructions) {
675 struct rc_instruction * cur = inst;
676 inst = inst->Next;
677
678 constant_folding(c, cur);
679
680 if(peephole(c, cur))
681 continue;
682
683 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
684 copy_propagate(c, cur);
685 /* cur may no longer be part of the program */
686 }
687 }
688 }