r300/compiler: Fix dataflow bug in presub_helper()
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_optimize.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_dataflow.h"
29
30 #include "radeon_compiler.h"
31 #include "radeon_swizzle.h"
32
33 struct peephole_state {
34 struct rc_instruction * Inst;
35 /** Stores a bitmask of the components that are still "alive" (i.e.
36 * they have not been written to since Inst was executed.)
37 */
38 unsigned int WriteMask;
39 };
40
41 typedef void (*rc_presub_replace_fn)(struct peephole_state *,
42 struct rc_instruction *,
43 unsigned int);
44
45 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
46 {
47 struct rc_src_register combine;
48 combine.File = inner.File;
49 combine.Index = inner.Index;
50 combine.RelAddr = inner.RelAddr;
51 if (outer.Abs) {
52 combine.Abs = 1;
53 combine.Negate = outer.Negate;
54 } else {
55 combine.Abs = inner.Abs;
56 combine.Negate = 0;
57 for(unsigned int chan = 0; chan < 4; ++chan) {
58 unsigned int swz = GET_SWZ(outer.Swizzle, chan);
59 if (swz < 4)
60 combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
61 }
62 combine.Negate ^= outer.Negate;
63 }
64 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
65 return combine;
66 }
67
68 struct copy_propagate_state {
69 struct radeon_compiler * C;
70 struct rc_instruction * Mov;
71 unsigned int Conflict:1;
72
73 /** Whether Mov's source has been clobbered */
74 unsigned int SourceClobbered:1;
75
76 /** Which components of Mov's destination register are still from that Mov? */
77 unsigned int MovMask:4;
78
79 /** Which components of Mov's destination register are clearly *not* from that Mov */
80 unsigned int DefinedMask:4;
81
82 /** Which components of Mov's source register are sourced */
83 unsigned int SourcedMask:4;
84
85 /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
86 int BranchDepth;
87 };
88
89 /**
90 * This is a callback function that is meant to be passed to
91 * rc_for_all_reads_mask. This function will be called once for each source
92 * register in inst.
93 * @param inst The instruction that the source register belongs to.
94 * @param file The register file of the source register.
95 * @param index The index of the source register.
96 * @param mask The components of the source register that are being read from.
97 */
98 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
99 rc_register_file file, unsigned int index, unsigned int mask)
100 {
101 struct copy_propagate_state * s = data;
102
103 /* XXX This could probably be handled better. */
104 if (file == RC_FILE_ADDRESS) {
105 s->Conflict = 1;
106 return;
107 }
108
109 if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
110 return;
111
112 /* These instructions cannot read from the constants file.
113 * see radeonTransformTEX()
114 */
115 if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
116 s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT &&
117 (inst->U.I.Opcode == RC_OPCODE_TEX ||
118 inst->U.I.Opcode == RC_OPCODE_TXB ||
119 inst->U.I.Opcode == RC_OPCODE_TXP ||
120 inst->U.I.Opcode == RC_OPCODE_KIL)){
121 s->Conflict = 1;
122 return;
123 }
124 if ((mask & s->MovMask) == mask) {
125 if (s->SourceClobbered) {
126 s->Conflict = 1;
127 }
128 } else if ((mask & s->DefinedMask) == mask) {
129 /* read from something entirely written by other instruction: this is okay */
130 } else {
131 /* read from component combination that is not well-defined without
132 * the MOV: cannot remove it */
133 s->Conflict = 1;
134 }
135 }
136
137 static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
138 rc_register_file file, unsigned int index, unsigned int mask)
139 {
140 struct copy_propagate_state * s = data;
141
142 if (s->BranchDepth < 0)
143 return;
144
145 if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
146 s->MovMask &= ~mask;
147 if (s->BranchDepth == 0)
148 s->DefinedMask |= mask;
149 else
150 s->DefinedMask &= ~mask;
151 }
152 if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
153 if (mask & s->SourcedMask)
154 s->SourceClobbered = 1;
155 } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
156 s->SourceClobbered = 1;
157 }
158 }
159
160 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
161 {
162 struct copy_propagate_state s;
163
164 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
165 inst_mov->U.I.DstReg.RelAddr ||
166 inst_mov->U.I.WriteALUResult)
167 return;
168
169 memset(&s, 0, sizeof(s));
170 s.C = c;
171 s.Mov = inst_mov;
172 s.MovMask = inst_mov->U.I.DstReg.WriteMask;
173 s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
174
175 for(unsigned int chan = 0; chan < 4; ++chan) {
176 unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
177 s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
178 }
179
180 /* 1st pass: Check whether all subsequent readers can be changed */
181 for(struct rc_instruction * inst = inst_mov->Next;
182 inst != &c->Program.Instructions;
183 inst = inst->Next) {
184 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
185 /* XXX In the future we might be able to make the optimizer
186 * smart enough to handle loops. */
187 if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
188 || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
189 return;
190 }
191
192 /* It is possible to do copy propigation in this situation,
193 * just not right now, see peephole_add_presub_inv() */
194 if (inst_mov->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
195 info->NumSrcRegs > 2) {
196 return;
197 }
198
199 rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s);
200 rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s);
201 if (s.Conflict)
202 return;
203
204 if (s.BranchDepth >= 0) {
205 if (inst->U.I.Opcode == RC_OPCODE_IF) {
206 s.BranchDepth++;
207 } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
208 || inst->U.I.Opcode == RC_OPCODE_ELSE) {
209 s.BranchDepth--;
210 if (s.BranchDepth < 0) {
211 s.DefinedMask &= ~s.MovMask;
212 s.MovMask = 0;
213 }
214 }
215 }
216 }
217
218 if (s.Conflict)
219 return;
220
221 /* 2nd pass: We can satisfy all readers, so switch them over all at once */
222 s.MovMask = inst_mov->U.I.DstReg.WriteMask;
223 s.BranchDepth = 0;
224
225 for(struct rc_instruction * inst = inst_mov->Next;
226 inst != &c->Program.Instructions;
227 inst = inst->Next) {
228 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
229 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
230 if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
231 inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
232 unsigned int refmask = 0;
233
234 for(unsigned int chan = 0; chan < 4; ++chan) {
235 unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
236 refmask |= (1 << swz) & RC_MASK_XYZW;
237 }
238
239 if ((refmask & s.MovMask) == refmask) {
240 inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
241 if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
242 inst->U.I.PreSub = s.Mov->U.I.PreSub;
243 }
244 }
245 }
246
247 if (opcode->HasDstReg) {
248 if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
249 inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
250 s.MovMask &= ~inst->U.I.DstReg.WriteMask;
251 }
252 }
253
254 if (s.BranchDepth >= 0) {
255 if (inst->U.I.Opcode == RC_OPCODE_IF) {
256 s.BranchDepth++;
257 } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
258 || inst->U.I.Opcode == RC_OPCODE_ELSE) {
259 s.BranchDepth--;
260 if (s.BranchDepth < 0)
261 break; /* no more readers after this point */
262 }
263 }
264 }
265
266 /* Finally, remove the original MOV instruction */
267 rc_remove_instruction(inst_mov);
268 }
269
270 /**
271 * Check if a source register is actually always the same
272 * swizzle constant.
273 */
274 static int is_src_uniform_constant(struct rc_src_register src,
275 rc_swizzle * pswz, unsigned int * pnegate)
276 {
277 int have_used = 0;
278
279 if (src.File != RC_FILE_NONE) {
280 *pswz = 0;
281 return 0;
282 }
283
284 for(unsigned int chan = 0; chan < 4; ++chan) {
285 unsigned int swz = GET_SWZ(src.Swizzle, chan);
286 if (swz < 4) {
287 *pswz = 0;
288 return 0;
289 }
290 if (swz == RC_SWIZZLE_UNUSED)
291 continue;
292
293 if (!have_used) {
294 *pswz = swz;
295 *pnegate = GET_BIT(src.Negate, chan);
296 have_used = 1;
297 } else {
298 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
299 *pswz = 0;
300 return 0;
301 }
302 }
303 }
304
305 return 1;
306 }
307
308 static void constant_folding_mad(struct rc_instruction * inst)
309 {
310 rc_swizzle swz;
311 unsigned int negate;
312
313 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
314 if (swz == RC_SWIZZLE_ZERO) {
315 inst->U.I.Opcode = RC_OPCODE_MUL;
316 return;
317 }
318 }
319
320 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
321 if (swz == RC_SWIZZLE_ONE) {
322 inst->U.I.Opcode = RC_OPCODE_ADD;
323 if (negate)
324 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
325 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
326 return;
327 } else if (swz == RC_SWIZZLE_ZERO) {
328 inst->U.I.Opcode = RC_OPCODE_MOV;
329 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
330 return;
331 }
332 }
333
334 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
335 if (swz == RC_SWIZZLE_ONE) {
336 inst->U.I.Opcode = RC_OPCODE_ADD;
337 if (negate)
338 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
339 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
340 return;
341 } else if (swz == RC_SWIZZLE_ZERO) {
342 inst->U.I.Opcode = RC_OPCODE_MOV;
343 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
344 return;
345 }
346 }
347 }
348
349 static void constant_folding_mul(struct rc_instruction * inst)
350 {
351 rc_swizzle swz;
352 unsigned int negate;
353
354 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
355 if (swz == RC_SWIZZLE_ONE) {
356 inst->U.I.Opcode = RC_OPCODE_MOV;
357 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
358 if (negate)
359 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
360 return;
361 } else if (swz == RC_SWIZZLE_ZERO) {
362 inst->U.I.Opcode = RC_OPCODE_MOV;
363 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
364 return;
365 }
366 }
367
368 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
369 if (swz == RC_SWIZZLE_ONE) {
370 inst->U.I.Opcode = RC_OPCODE_MOV;
371 if (negate)
372 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
373 return;
374 } else if (swz == RC_SWIZZLE_ZERO) {
375 inst->U.I.Opcode = RC_OPCODE_MOV;
376 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
377 return;
378 }
379 }
380 }
381
382 static void constant_folding_add(struct rc_instruction * inst)
383 {
384 rc_swizzle swz;
385 unsigned int negate;
386
387 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
388 if (swz == RC_SWIZZLE_ZERO) {
389 inst->U.I.Opcode = RC_OPCODE_MOV;
390 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
391 return;
392 }
393 }
394
395 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
396 if (swz == RC_SWIZZLE_ZERO) {
397 inst->U.I.Opcode = RC_OPCODE_MOV;
398 return;
399 }
400 }
401 }
402
403 /**
404 * Replace 0.0, 1.0 and 0.5 immediate constants by their
405 * respective swizzles. Simplify instructions like ADD dst, src, 0;
406 */
407 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
408 {
409 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
410
411 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
412 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
413 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
414 inst->U.I.SrcReg[src].RelAddr ||
415 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
416 continue;
417
418 struct rc_constant * constant =
419 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
420
421 if (constant->Type != RC_CONSTANT_IMMEDIATE)
422 continue;
423
424 struct rc_src_register newsrc = inst->U.I.SrcReg[src];
425 int have_real_reference = 0;
426 for(unsigned int chan = 0; chan < 4; ++chan) {
427 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
428 if (swz >= 4)
429 continue;
430
431 unsigned int newswz;
432 float imm = constant->u.Immediate[swz];
433 float baseimm = imm;
434 if (imm < 0.0)
435 baseimm = -baseimm;
436
437 if (baseimm == 0.0) {
438 newswz = RC_SWIZZLE_ZERO;
439 } else if (baseimm == 1.0) {
440 newswz = RC_SWIZZLE_ONE;
441 } else if (baseimm == 0.5 && c->has_half_swizzles) {
442 newswz = RC_SWIZZLE_HALF;
443 } else {
444 have_real_reference = 1;
445 continue;
446 }
447
448 SET_SWZ(newsrc.Swizzle, chan, newswz);
449 if (imm < 0.0 && !newsrc.Abs)
450 newsrc.Negate ^= 1 << chan;
451 }
452
453 if (!have_real_reference) {
454 newsrc.File = RC_FILE_NONE;
455 newsrc.Index = 0;
456 }
457
458 /* don't make the swizzle worse */
459 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
460 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
461 continue;
462
463 inst->U.I.SrcReg[src] = newsrc;
464 }
465
466 /* Simplify instructions based on constants */
467 if (inst->U.I.Opcode == RC_OPCODE_MAD)
468 constant_folding_mad(inst);
469
470 /* note: MAD can simplify to MUL or ADD */
471 if (inst->U.I.Opcode == RC_OPCODE_MUL)
472 constant_folding_mul(inst);
473 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
474 constant_folding_add(inst);
475 }
476
477 /**
478 * This function returns a writemask that indicates wich components are
479 * read by src and also written by dst.
480 */
481 static unsigned int src_reads_dst_mask(struct rc_src_register src,
482 struct rc_dst_register dst)
483 {
484 unsigned int mask = 0;
485 unsigned int i;
486 if (dst.File != src.File || dst.Index != src.Index) {
487 return 0;
488 }
489
490 for(i = 0; i < 4; i++) {
491 mask |= 1 << GET_SWZ(src.Swizzle, i);
492 }
493 mask &= RC_MASK_XYZW;
494
495 return mask;
496 }
497
498 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
499 * in any of its channels. Return 0 otherwise. */
500 static int src_has_const_swz(struct rc_src_register src) {
501 int chan;
502 for(chan = 0; chan < 4; chan++) {
503 unsigned int swz = GET_SWZ(src.Swizzle, chan);
504 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
505 || swz == RC_SWIZZLE_ONE) {
506 return 1;
507 }
508 }
509 return 0;
510 }
511
512 static void peephole_scan_write(void * data, struct rc_instruction * inst,
513 rc_register_file file, unsigned int index, unsigned int mask)
514 {
515 struct peephole_state * s = data;
516 if(s->Inst->U.I.DstReg.File == file
517 && s->Inst->U.I.DstReg.Index == index) {
518 unsigned int common_mask = s->WriteMask & mask;
519 s->WriteMask &= ~common_mask;
520 }
521 }
522
523 static int presub_helper(
524 struct radeon_compiler * c,
525 struct peephole_state * s,
526 rc_presubtract_op presub_opcode,
527 rc_presub_replace_fn presub_replace)
528 {
529 struct rc_instruction * inst;
530 unsigned int can_remove = 0;
531 unsigned int cant_sub = 0;
532
533 for(inst = s->Inst->Next; inst != &c->Program.Instructions;
534 inst = inst->Next) {
535 unsigned int i;
536 const struct rc_opcode_info * info =
537 rc_get_opcode_info(inst->U.I.Opcode);
538
539 for(i = 0; i < info->NumSrcRegs; i++) {
540 if(s->Inst->U.I.DstReg.WriteMask !=
541 src_reads_dst_mask(inst->U.I.SrcReg[i],
542 s->Inst->U.I.DstReg)) {
543 continue;
544 }
545 if (cant_sub) {
546 can_remove = 0;
547 break;
548 }
549 /* XXX: There are some situations where instructions
550 * with more than 2 src registers can use the
551 * presubtract select, but to keep things simple we
552 * will disable presubtract on these instructions for
553 * now. Note: This if statement should not be pulled
554 * outside of the loop, because it only applies to
555 * instructions that could potentially use the
556 * presubtract source. */
557 if (info->NumSrcRegs > 2) {
558 can_remove = 0;
559 break;
560 }
561
562 /* We can't use more than one presubtract value in an
563 * instruction, unless the two prsubtract operations
564 * are the same and read from the same registers. */
565 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
566 if (inst->U.I.PreSub.Opcode != presub_opcode
567 || inst->U.I.PreSub.SrcReg[0].File !=
568 s->Inst->U.I.SrcReg[1].File
569 || inst->U.I.PreSub.SrcReg[0].Index !=
570 s->Inst->U.I.SrcReg[1].Index) {
571
572 can_remove = 0;
573 break;
574 }
575 }
576 presub_replace(s, inst, i);
577 can_remove = 1;
578 }
579 if(!can_remove)
580 break;
581 rc_for_all_writes_mask(inst, peephole_scan_write, s);
582 /* If all components of inst_add's destination register have
583 * been written to by subsequent instructions, the original
584 * value of the destination register is no longer valid and
585 * we can't keep doing substitutions. */
586 if (!s->WriteMask){
587 break;
588 }
589 /* Make this instruction doesn't write to the presubtract source. */
590 if (inst->U.I.DstReg.WriteMask &
591 src_reads_dst_mask(s->Inst->U.I.SrcReg[1],
592 inst->U.I.DstReg)
593 || src_reads_dst_mask(s->Inst->U.I.SrcReg[0],
594 inst->U.I.DstReg)
595 || info->IsFlowControl) {
596 cant_sub = 1;
597 }
598 }
599 return can_remove;
600 }
601
602 /* This function assumes that s->Inst->U.I.SrcReg[0] and
603 * s->Inst->U.I.SrcReg[1] aren't both negative. */
604 static void presub_replace_add(struct peephole_state *s,
605 struct rc_instruction * inst,
606 unsigned int src_index)
607 {
608 rc_presubtract_op presub_opcode;
609 if (s->Inst->U.I.SrcReg[1].Negate || s->Inst->U.I.SrcReg[0].Negate)
610 presub_opcode = RC_PRESUB_SUB;
611 else
612 presub_opcode = RC_PRESUB_ADD;
613
614 if (s->Inst->U.I.SrcReg[1].Negate) {
615 inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
616 inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[0];
617 } else {
618 inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0];
619 inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1];
620 }
621 inst->U.I.PreSub.SrcReg[0].Negate = 0;
622 inst->U.I.PreSub.SrcReg[1].Negate = 0;
623 inst->U.I.PreSub.Opcode = presub_opcode;
624 inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
625 inst->U.I.PreSub.SrcReg[0]);
626 inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
627 inst->U.I.SrcReg[src_index].Index = presub_opcode;
628 }
629
630 static int peephole_add_presub_add(
631 struct radeon_compiler * c,
632 struct rc_instruction * inst_add)
633 {
634 struct rc_src_register * src0 = NULL;
635 struct rc_src_register * src1 = NULL;
636 unsigned int i;
637 struct peephole_state s;
638
639 if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
640 return 0;
641
642 if (inst_add->U.I.SaturateMode)
643 return 0;
644
645 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
646 return 0;
647
648 /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
649 for (i = 0; i < 2; i++) {
650 if (inst_add->U.I.SrcReg[i].Abs)
651 return 0;
652 if ((inst_add->U.I.SrcReg[i].Negate
653 & inst_add->U.I.DstReg.WriteMask) ==
654 inst_add->U.I.DstReg.WriteMask) {
655 src0 = &inst_add->U.I.SrcReg[i];
656 } else if (!src1) {
657 src1 = &inst_add->U.I.SrcReg[i];
658 } else {
659 src0 = &inst_add->U.I.SrcReg[i];
660 }
661 }
662
663 if (!src1)
664 return 0;
665
666 s.Inst = inst_add;
667 s.WriteMask = inst_add->U.I.DstReg.WriteMask;
668 if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) {
669 rc_remove_instruction(inst_add);
670 return 1;
671 }
672 return 0;
673 }
674
675 static void presub_replace_inv(struct peephole_state * s,
676 struct rc_instruction * inst,
677 unsigned int src_index)
678 {
679 /* We must be careful not to modify s->Inst, since it
680 * is possible it will remain part of the program.
681 * XXX Maybe pass a struct instead of a pointer for s->Inst.*/
682 inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
683 inst->U.I.PreSub.SrcReg[0].Negate = 0;
684 inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
685 inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
686 inst->U.I.PreSub.SrcReg[0]);
687
688 inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
689 inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
690 }
691
692 /**
693 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
694 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
695 * of the add instruction must have the constatnt 1 swizzle. This function
696 * does not check const registers to see if their value is 1.0, so it should
697 * be called after the constant_folding optimization.
698 * @return
699 * 0 if the ADD instruction is still part of the program.
700 * 1 if the ADD instruction is no longer part of the program.
701 */
702 static int peephole_add_presub_inv(
703 struct radeon_compiler * c,
704 struct rc_instruction * inst_add)
705 {
706 unsigned int i, swz, mask;
707 struct peephole_state s;
708
709 if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
710 return 0;
711
712 if (inst_add->U.I.SaturateMode)
713 return 0;
714
715 mask = inst_add->U.I.DstReg.WriteMask;
716
717 /* Check if src0 is 1. */
718 /* XXX It would be nice to use is_src_uniform_constant here, but that
719 * function only works if the register's file is RC_FILE_NONE */
720 for(i = 0; i < 4; i++ ) {
721 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
722 if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
723 && swz != RC_SWIZZLE_ONE) {
724 return 0;
725 }
726 }
727
728 /* Check src1. */
729 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
730 inst_add->U.I.DstReg.WriteMask
731 || inst_add->U.I.SrcReg[1].Abs
732 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
733 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
734 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
735
736 return 0;
737 }
738
739 /* Setup the peephole_state information. */
740 s.Inst = inst_add;
741 s.WriteMask = inst_add->U.I.DstReg.WriteMask;
742
743 if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) {
744 rc_remove_instruction(inst_add);
745 return 1;
746 }
747 return 0;
748 }
749
750 /**
751 * @return
752 * 0 if inst is still part of the program.
753 * 1 if inst is no longer part of the program.
754 */
755 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
756 {
757 switch(inst->U.I.Opcode){
758 case RC_OPCODE_ADD:
759 if (c->has_presub) {
760 if(peephole_add_presub_inv(c, inst))
761 return 1;
762 if(peephole_add_presub_add(c, inst))
763 return 1;
764 }
765 break;
766 default:
767 break;
768 }
769 return 0;
770 }
771
772 void rc_optimize(struct radeon_compiler * c, void *user)
773 {
774 struct rc_instruction * inst = c->Program.Instructions.Next;
775 while(inst != &c->Program.Instructions) {
776 struct rc_instruction * cur = inst;
777 inst = inst->Next;
778
779 constant_folding(c, cur);
780
781 if(peephole(c, cur))
782 continue;
783
784 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
785 copy_propagate(c, cur);
786 /* cur may no longer be part of the program */
787 }
788 }
789 }