r300/compiler: Fix two mistakes in the presubtract optimization pass.
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_optimize.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_dataflow.h"
29
30 #include "radeon_compiler.h"
31 #include "radeon_swizzle.h"
32
33 struct peephole_state {
34 struct rc_instruction * Inst;
35 /** Stores a bitmask of the components that are still "alive" (i.e.
36 * they have not been written to since Inst was executed.)
37 */
38 unsigned int WriteMask;
39 };
40
41 typedef void (*rc_presub_replace_fn)(struct peephole_state *,
42 struct rc_instruction *,
43 unsigned int);
44
45 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
46 {
47 struct rc_src_register combine;
48 combine.File = inner.File;
49 combine.Index = inner.Index;
50 combine.RelAddr = inner.RelAddr;
51 if (outer.Abs) {
52 combine.Abs = 1;
53 combine.Negate = outer.Negate;
54 } else {
55 combine.Abs = inner.Abs;
56 combine.Negate = 0;
57 for(unsigned int chan = 0; chan < 4; ++chan) {
58 unsigned int swz = GET_SWZ(outer.Swizzle, chan);
59 if (swz < 4)
60 combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
61 }
62 combine.Negate ^= outer.Negate;
63 }
64 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
65 return combine;
66 }
67
68 struct copy_propagate_state {
69 struct radeon_compiler * C;
70 struct rc_instruction * Mov;
71 unsigned int Conflict:1;
72
73 /** Whether Mov's source has been clobbered */
74 unsigned int SourceClobbered:1;
75
76 /** Which components of Mov's destination register are still from that Mov? */
77 unsigned int MovMask:4;
78
79 /** Which components of Mov's destination register are clearly *not* from that Mov */
80 unsigned int DefinedMask:4;
81
82 /** Which components of Mov's source register are sourced */
83 unsigned int SourcedMask:4;
84
85 /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
86 int BranchDepth;
87 };
88
89 /**
90 * This is a callback function that is meant to be passed to
91 * rc_for_all_reads_mask. This function will be called once for each source
92 * register in inst.
93 * @param inst The instruction that the source register belongs to.
94 * @param file The register file of the source register.
95 * @param index The index of the source register.
96 * @param mask The components of the source register that are being read from.
97 */
98 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
99 rc_register_file file, unsigned int index, unsigned int mask)
100 {
101 struct copy_propagate_state * s = data;
102
103 /* XXX This could probably be handled better. */
104 if (file == RC_FILE_ADDRESS) {
105 s->Conflict = 1;
106 return;
107 }
108
109 if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
110 return;
111
112 /* These instructions cannot read from the constants file.
113 * see radeonTransformTEX()
114 */
115 if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
116 s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT &&
117 (inst->U.I.Opcode == RC_OPCODE_TEX ||
118 inst->U.I.Opcode == RC_OPCODE_TXB ||
119 inst->U.I.Opcode == RC_OPCODE_TXP ||
120 inst->U.I.Opcode == RC_OPCODE_KIL)){
121 s->Conflict = 1;
122 return;
123 }
124 if ((mask & s->MovMask) == mask) {
125 if (s->SourceClobbered) {
126 s->Conflict = 1;
127 }
128 } else if ((mask & s->DefinedMask) == mask) {
129 /* read from something entirely written by other instruction: this is okay */
130 } else {
131 /* read from component combination that is not well-defined without
132 * the MOV: cannot remove it */
133 s->Conflict = 1;
134 }
135 }
136
137 static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
138 rc_register_file file, unsigned int index, unsigned int mask)
139 {
140 struct copy_propagate_state * s = data;
141
142 if (s->BranchDepth < 0)
143 return;
144
145 if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
146 s->MovMask &= ~mask;
147 if (s->BranchDepth == 0)
148 s->DefinedMask |= mask;
149 else
150 s->DefinedMask &= ~mask;
151 }
152 if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
153 if (mask & s->SourcedMask)
154 s->SourceClobbered = 1;
155 } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
156 s->SourceClobbered = 1;
157 }
158 }
159
160 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
161 {
162 struct copy_propagate_state s;
163
164 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
165 inst_mov->U.I.DstReg.RelAddr ||
166 inst_mov->U.I.WriteALUResult)
167 return;
168
169 memset(&s, 0, sizeof(s));
170 s.C = c;
171 s.Mov = inst_mov;
172 s.MovMask = inst_mov->U.I.DstReg.WriteMask;
173 s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
174
175 for(unsigned int chan = 0; chan < 4; ++chan) {
176 unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
177 s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
178 }
179
180 /* 1st pass: Check whether all subsequent readers can be changed */
181 for(struct rc_instruction * inst = inst_mov->Next;
182 inst != &c->Program.Instructions;
183 inst = inst->Next) {
184 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
185 /* XXX In the future we might be able to make the optimizer
186 * smart enough to handle loops. */
187 if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
188 || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
189 return;
190 }
191
192 /* It is possible to do copy propigation in this situation,
193 * just not right now, see peephole_add_presub_inv() */
194 if (inst_mov->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
195 (info->NumSrcRegs > 2 || info->HasTexture)) {
196 return;
197 }
198
199 rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s);
200 rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s);
201 if (s.Conflict)
202 return;
203
204 if (s.BranchDepth >= 0) {
205 if (inst->U.I.Opcode == RC_OPCODE_IF) {
206 s.BranchDepth++;
207 } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
208 || inst->U.I.Opcode == RC_OPCODE_ELSE) {
209 s.BranchDepth--;
210 if (s.BranchDepth < 0) {
211 s.DefinedMask &= ~s.MovMask;
212 s.MovMask = 0;
213 }
214 }
215 }
216 }
217
218 if (s.Conflict)
219 return;
220
221 /* 2nd pass: We can satisfy all readers, so switch them over all at once */
222 s.MovMask = inst_mov->U.I.DstReg.WriteMask;
223 s.BranchDepth = 0;
224
225 for(struct rc_instruction * inst = inst_mov->Next;
226 inst != &c->Program.Instructions;
227 inst = inst->Next) {
228 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
229 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
230 if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
231 inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
232 unsigned int refmask = 0;
233
234 for(unsigned int chan = 0; chan < 4; ++chan) {
235 unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
236 refmask |= (1 << swz) & RC_MASK_XYZW;
237 }
238
239 if ((refmask & s.MovMask) == refmask) {
240 inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
241 if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
242 inst->U.I.PreSub = s.Mov->U.I.PreSub;
243 }
244 }
245 }
246
247 if (opcode->HasDstReg) {
248 if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
249 inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
250 s.MovMask &= ~inst->U.I.DstReg.WriteMask;
251 }
252 }
253
254 if (s.BranchDepth >= 0) {
255 if (inst->U.I.Opcode == RC_OPCODE_IF) {
256 s.BranchDepth++;
257 } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
258 || inst->U.I.Opcode == RC_OPCODE_ELSE) {
259 s.BranchDepth--;
260 if (s.BranchDepth < 0)
261 break; /* no more readers after this point */
262 }
263 }
264 }
265
266 /* Finally, remove the original MOV instruction */
267 rc_remove_instruction(inst_mov);
268 }
269
270 /**
271 * Check if a source register is actually always the same
272 * swizzle constant.
273 */
274 static int is_src_uniform_constant(struct rc_src_register src,
275 rc_swizzle * pswz, unsigned int * pnegate)
276 {
277 int have_used = 0;
278
279 if (src.File != RC_FILE_NONE) {
280 *pswz = 0;
281 return 0;
282 }
283
284 for(unsigned int chan = 0; chan < 4; ++chan) {
285 unsigned int swz = GET_SWZ(src.Swizzle, chan);
286 if (swz < 4) {
287 *pswz = 0;
288 return 0;
289 }
290 if (swz == RC_SWIZZLE_UNUSED)
291 continue;
292
293 if (!have_used) {
294 *pswz = swz;
295 *pnegate = GET_BIT(src.Negate, chan);
296 have_used = 1;
297 } else {
298 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
299 *pswz = 0;
300 return 0;
301 }
302 }
303 }
304
305 return 1;
306 }
307
308 static void constant_folding_mad(struct rc_instruction * inst)
309 {
310 rc_swizzle swz;
311 unsigned int negate;
312
313 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
314 if (swz == RC_SWIZZLE_ZERO) {
315 inst->U.I.Opcode = RC_OPCODE_MUL;
316 return;
317 }
318 }
319
320 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
321 if (swz == RC_SWIZZLE_ONE) {
322 inst->U.I.Opcode = RC_OPCODE_ADD;
323 if (negate)
324 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
325 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
326 return;
327 } else if (swz == RC_SWIZZLE_ZERO) {
328 inst->U.I.Opcode = RC_OPCODE_MOV;
329 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
330 return;
331 }
332 }
333
334 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
335 if (swz == RC_SWIZZLE_ONE) {
336 inst->U.I.Opcode = RC_OPCODE_ADD;
337 if (negate)
338 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
339 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
340 return;
341 } else if (swz == RC_SWIZZLE_ZERO) {
342 inst->U.I.Opcode = RC_OPCODE_MOV;
343 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
344 return;
345 }
346 }
347 }
348
349 static void constant_folding_mul(struct rc_instruction * inst)
350 {
351 rc_swizzle swz;
352 unsigned int negate;
353
354 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
355 if (swz == RC_SWIZZLE_ONE) {
356 inst->U.I.Opcode = RC_OPCODE_MOV;
357 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
358 if (negate)
359 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
360 return;
361 } else if (swz == RC_SWIZZLE_ZERO) {
362 inst->U.I.Opcode = RC_OPCODE_MOV;
363 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
364 return;
365 }
366 }
367
368 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
369 if (swz == RC_SWIZZLE_ONE) {
370 inst->U.I.Opcode = RC_OPCODE_MOV;
371 if (negate)
372 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
373 return;
374 } else if (swz == RC_SWIZZLE_ZERO) {
375 inst->U.I.Opcode = RC_OPCODE_MOV;
376 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
377 return;
378 }
379 }
380 }
381
382 static void constant_folding_add(struct rc_instruction * inst)
383 {
384 rc_swizzle swz;
385 unsigned int negate;
386
387 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
388 if (swz == RC_SWIZZLE_ZERO) {
389 inst->U.I.Opcode = RC_OPCODE_MOV;
390 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
391 return;
392 }
393 }
394
395 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
396 if (swz == RC_SWIZZLE_ZERO) {
397 inst->U.I.Opcode = RC_OPCODE_MOV;
398 return;
399 }
400 }
401 }
402
403 /**
404 * Replace 0.0, 1.0 and 0.5 immediate constants by their
405 * respective swizzles. Simplify instructions like ADD dst, src, 0;
406 */
407 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
408 {
409 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
410
411 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
412 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
413 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
414 inst->U.I.SrcReg[src].RelAddr ||
415 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
416 continue;
417
418 struct rc_constant * constant =
419 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
420
421 if (constant->Type != RC_CONSTANT_IMMEDIATE)
422 continue;
423
424 struct rc_src_register newsrc = inst->U.I.SrcReg[src];
425 int have_real_reference = 0;
426 for(unsigned int chan = 0; chan < 4; ++chan) {
427 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
428 if (swz >= 4)
429 continue;
430
431 unsigned int newswz;
432 float imm = constant->u.Immediate[swz];
433 float baseimm = imm;
434 if (imm < 0.0)
435 baseimm = -baseimm;
436
437 if (baseimm == 0.0) {
438 newswz = RC_SWIZZLE_ZERO;
439 } else if (baseimm == 1.0) {
440 newswz = RC_SWIZZLE_ONE;
441 } else if (baseimm == 0.5 && c->has_half_swizzles) {
442 newswz = RC_SWIZZLE_HALF;
443 } else {
444 have_real_reference = 1;
445 continue;
446 }
447
448 SET_SWZ(newsrc.Swizzle, chan, newswz);
449 if (imm < 0.0 && !newsrc.Abs)
450 newsrc.Negate ^= 1 << chan;
451 }
452
453 if (!have_real_reference) {
454 newsrc.File = RC_FILE_NONE;
455 newsrc.Index = 0;
456 }
457
458 /* don't make the swizzle worse */
459 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
460 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
461 continue;
462
463 inst->U.I.SrcReg[src] = newsrc;
464 }
465
466 /* Simplify instructions based on constants */
467 if (inst->U.I.Opcode == RC_OPCODE_MAD)
468 constant_folding_mad(inst);
469
470 /* note: MAD can simplify to MUL or ADD */
471 if (inst->U.I.Opcode == RC_OPCODE_MUL)
472 constant_folding_mul(inst);
473 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
474 constant_folding_add(inst);
475 }
476
477 /**
478 * If src and dst use the same register, this function returns a writemask that
479 * indicates wich components are read by src. Otherwise zero is returned.
480 */
481 static unsigned int src_reads_dst_mask(struct rc_src_register src,
482 struct rc_dst_register dst)
483 {
484 unsigned int mask = 0;
485 unsigned int i;
486 if (dst.File != src.File || dst.Index != src.Index) {
487 return 0;
488 }
489
490 for(i = 0; i < 4; i++) {
491 mask |= 1 << GET_SWZ(src.Swizzle, i);
492 }
493 mask &= RC_MASK_XYZW;
494
495 return mask;
496 }
497
498 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
499 * in any of its channels. Return 0 otherwise. */
500 static int src_has_const_swz(struct rc_src_register src) {
501 int chan;
502 for(chan = 0; chan < 4; chan++) {
503 unsigned int swz = GET_SWZ(src.Swizzle, chan);
504 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
505 || swz == RC_SWIZZLE_ONE) {
506 return 1;
507 }
508 }
509 return 0;
510 }
511
512 static void peephole_scan_write(void * data, struct rc_instruction * inst,
513 rc_register_file file, unsigned int index, unsigned int mask)
514 {
515 struct peephole_state * s = data;
516 if(s->Inst->U.I.DstReg.File == file
517 && s->Inst->U.I.DstReg.Index == index) {
518 unsigned int common_mask = s->WriteMask & mask;
519 s->WriteMask &= ~common_mask;
520 }
521 }
522
523 static int presub_helper(
524 struct radeon_compiler * c,
525 struct peephole_state * s,
526 rc_presubtract_op presub_opcode,
527 rc_presub_replace_fn presub_replace)
528 {
529 struct rc_instruction * inst;
530 unsigned int can_remove = 0;
531 unsigned int cant_sub = 0;
532
533 for(inst = s->Inst->Next; inst != &c->Program.Instructions;
534 inst = inst->Next) {
535 unsigned int i;
536 unsigned char can_use_presub = 1;
537 const struct rc_opcode_info * info =
538 rc_get_opcode_info(inst->U.I.Opcode);
539 /* XXX: There are some situations where instructions
540 * with more than 2 src registers can use the
541 * presubtract select, but to keep things simple we
542 * will disable presubtract on these instructions for
543 * now. */
544 if (info->NumSrcRegs > 2 || info->HasTexture) {
545 can_use_presub = 0;
546 }
547
548 /* We can't use more than one presubtract value in an
549 * instruction, unless the two prsubtract operations
550 * are the same and read from the same registers. */
551 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
552 if (inst->U.I.PreSub.Opcode != presub_opcode
553 || inst->U.I.PreSub.SrcReg[0].File !=
554 s->Inst->U.I.SrcReg[1].File
555 || inst->U.I.PreSub.SrcReg[0].Index !=
556 s->Inst->U.I.SrcReg[1].Index) {
557 can_use_presub = 0;
558 }
559 }
560
561 /* Even if the instruction can't use a presubtract operation
562 * we still need to check if the instruction reads from
563 * s->Inst->U.I.DstReg, because if it does we must not
564 * remove s->Inst. */
565 for(i = 0; i < info->NumSrcRegs; i++) {
566 unsigned int mask = src_reads_dst_mask(
567 inst->U.I.SrcReg[i], s->Inst->U.I.DstReg);
568 /* XXX We could be more aggressive here using
569 * presubtract. It is okay if SrcReg[i] only reads
570 * from some of the mask components. */
571 if(s->Inst->U.I.DstReg.WriteMask != mask) {
572 if (s->Inst->U.I.DstReg.WriteMask & mask) {
573 can_remove = 0;
574 break;
575 } else {
576 continue;
577 }
578 }
579 if (cant_sub || !can_use_presub) {
580 can_remove = 0;
581 break;
582 }
583 presub_replace(s, inst, i);
584 can_remove = 1;
585 }
586 if(!can_remove)
587 break;
588 rc_for_all_writes_mask(inst, peephole_scan_write, s);
589 /* If all components of inst_add's destination register have
590 * been written to by subsequent instructions, the original
591 * value of the destination register is no longer valid and
592 * we can't keep doing substitutions. */
593 if (!s->WriteMask){
594 break;
595 }
596 /* Make this instruction doesn't write to the presubtract source. */
597 if (inst->U.I.DstReg.WriteMask &
598 src_reads_dst_mask(s->Inst->U.I.SrcReg[1],
599 inst->U.I.DstReg)
600 || src_reads_dst_mask(s->Inst->U.I.SrcReg[0],
601 inst->U.I.DstReg)
602 || info->IsFlowControl) {
603 cant_sub = 1;
604 }
605 }
606 return can_remove;
607 }
608
609 /* This function assumes that s->Inst->U.I.SrcReg[0] and
610 * s->Inst->U.I.SrcReg[1] aren't both negative. */
611 static void presub_replace_add(struct peephole_state *s,
612 struct rc_instruction * inst,
613 unsigned int src_index)
614 {
615 rc_presubtract_op presub_opcode;
616 if (s->Inst->U.I.SrcReg[1].Negate || s->Inst->U.I.SrcReg[0].Negate)
617 presub_opcode = RC_PRESUB_SUB;
618 else
619 presub_opcode = RC_PRESUB_ADD;
620
621 if (s->Inst->U.I.SrcReg[1].Negate) {
622 inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
623 inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[0];
624 } else {
625 inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0];
626 inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1];
627 }
628 inst->U.I.PreSub.SrcReg[0].Negate = 0;
629 inst->U.I.PreSub.SrcReg[1].Negate = 0;
630 inst->U.I.PreSub.Opcode = presub_opcode;
631 inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
632 inst->U.I.PreSub.SrcReg[0]);
633 inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
634 inst->U.I.SrcReg[src_index].Index = presub_opcode;
635 }
636
637 static int is_presub_candidate(struct rc_instruction * inst)
638 {
639 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
640 unsigned int i;
641
642 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode)
643 return 0;
644
645 for(i = 0; i < info->NumSrcRegs; i++) {
646 if (src_reads_dst_mask(inst->U.I.SrcReg[i], inst->U.I.DstReg))
647 return 0;
648 }
649 return 1;
650 }
651
652 static int peephole_add_presub_add(
653 struct radeon_compiler * c,
654 struct rc_instruction * inst_add)
655 {
656 struct rc_src_register * src0 = NULL;
657 struct rc_src_register * src1 = NULL;
658 unsigned int i;
659 struct peephole_state s;
660
661 if (!is_presub_candidate(inst_add))
662 return 0;
663
664 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
665 return 0;
666
667 /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
668 for (i = 0; i < 2; i++) {
669 if (inst_add->U.I.SrcReg[i].Abs)
670 return 0;
671 if ((inst_add->U.I.SrcReg[i].Negate
672 & inst_add->U.I.DstReg.WriteMask) ==
673 inst_add->U.I.DstReg.WriteMask) {
674 src0 = &inst_add->U.I.SrcReg[i];
675 } else if (!src1) {
676 src1 = &inst_add->U.I.SrcReg[i];
677 } else {
678 src0 = &inst_add->U.I.SrcReg[i];
679 }
680 }
681
682 if (!src1)
683 return 0;
684
685 s.Inst = inst_add;
686 s.WriteMask = inst_add->U.I.DstReg.WriteMask;
687 if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) {
688 rc_remove_instruction(inst_add);
689 return 1;
690 }
691 return 0;
692 }
693
694 static void presub_replace_inv(struct peephole_state * s,
695 struct rc_instruction * inst,
696 unsigned int src_index)
697 {
698 /* We must be careful not to modify s->Inst, since it
699 * is possible it will remain part of the program.
700 * XXX Maybe pass a struct instead of a pointer for s->Inst.*/
701 inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
702 inst->U.I.PreSub.SrcReg[0].Negate = 0;
703 inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
704 inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
705 inst->U.I.PreSub.SrcReg[0]);
706
707 inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
708 inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
709 }
710
711 /**
712 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
713 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
714 * of the add instruction must have the constatnt 1 swizzle. This function
715 * does not check const registers to see if their value is 1.0, so it should
716 * be called after the constant_folding optimization.
717 * @return
718 * 0 if the ADD instruction is still part of the program.
719 * 1 if the ADD instruction is no longer part of the program.
720 */
721 static int peephole_add_presub_inv(
722 struct radeon_compiler * c,
723 struct rc_instruction * inst_add)
724 {
725 unsigned int i, swz, mask;
726 struct peephole_state s;
727
728 if (!is_presub_candidate(inst_add))
729 return 0;
730
731 mask = inst_add->U.I.DstReg.WriteMask;
732
733 /* Check if src0 is 1. */
734 /* XXX It would be nice to use is_src_uniform_constant here, but that
735 * function only works if the register's file is RC_FILE_NONE */
736 for(i = 0; i < 4; i++ ) {
737 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
738 if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
739 && swz != RC_SWIZZLE_ONE) {
740 return 0;
741 }
742 }
743
744 /* Check src1. */
745 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
746 inst_add->U.I.DstReg.WriteMask
747 || inst_add->U.I.SrcReg[1].Abs
748 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
749 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
750 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
751
752 return 0;
753 }
754
755 /* Setup the peephole_state information. */
756 s.Inst = inst_add;
757 s.WriteMask = inst_add->U.I.DstReg.WriteMask;
758
759 if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) {
760 rc_remove_instruction(inst_add);
761 return 1;
762 }
763 return 0;
764 }
765
766 /**
767 * @return
768 * 0 if inst is still part of the program.
769 * 1 if inst is no longer part of the program.
770 */
771 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
772 {
773 switch(inst->U.I.Opcode){
774 case RC_OPCODE_ADD:
775 if (c->has_presub) {
776 if(peephole_add_presub_inv(c, inst))
777 return 1;
778 if(peephole_add_presub_add(c, inst))
779 return 1;
780 }
781 break;
782 default:
783 break;
784 }
785 return 0;
786 }
787
788 void rc_optimize(struct radeon_compiler * c, void *user)
789 {
790 struct rc_instruction * inst = c->Program.Instructions.Next;
791 while(inst != &c->Program.Instructions) {
792 struct rc_instruction * cur = inst;
793 inst = inst->Next;
794
795 constant_folding(c, cur);
796
797 if(peephole(c, cur))
798 continue;
799
800 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
801 copy_propagate(c, cur);
802 /* cur may no longer be part of the program */
803 }
804 }
805 }