r300/compiler: Add a more efficient version of rc_find_free_temporary()
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_optimize.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_dataflow.h"
30
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_swizzle.h"
34
35 struct src_clobbered_reads_cb_data {
36 rc_register_file File;
37 unsigned int Index;
38 unsigned int Mask;
39 struct rc_reader_data * ReaderData;
40 };
41
42 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
43 struct rc_instruction *,
44 unsigned int);
45
46 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
47 {
48 struct rc_src_register combine;
49 combine.File = inner.File;
50 combine.Index = inner.Index;
51 combine.RelAddr = inner.RelAddr;
52 if (outer.Abs) {
53 combine.Abs = 1;
54 combine.Negate = outer.Negate;
55 } else {
56 combine.Abs = inner.Abs;
57 combine.Negate = 0;
58 for(unsigned int chan = 0; chan < 4; ++chan) {
59 unsigned int swz = GET_SWZ(outer.Swizzle, chan);
60 if (swz < 4)
61 combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
62 }
63 combine.Negate ^= outer.Negate;
64 }
65 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
66 return combine;
67 }
68
69 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
70 struct rc_src_register * src)
71 {
72 rc_register_file file = src->File;
73 struct rc_reader_data * reader_data = data;
74
75 if(!rc_inst_can_use_presub(inst,
76 reader_data->Writer->U.I.PreSub.Opcode,
77 rc_swizzle_to_writemask(src->Swizzle),
78 *src,
79 reader_data->Writer->U.I.PreSub.SrcReg[0],
80 reader_data->Writer->U.I.PreSub.SrcReg[1])) {
81 reader_data->Abort = 1;
82 return;
83 }
84
85 /* XXX This could probably be handled better. */
86 if (file == RC_FILE_ADDRESS) {
87 reader_data->Abort = 1;
88 return;
89 }
90
91 /* These instructions cannot read from the constants file.
92 * see radeonTransformTEX()
93 */
94 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
95 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
96 (inst->U.I.Opcode == RC_OPCODE_TEX ||
97 inst->U.I.Opcode == RC_OPCODE_TXB ||
98 inst->U.I.Opcode == RC_OPCODE_TXP ||
99 inst->U.I.Opcode == RC_OPCODE_KIL)){
100 reader_data->Abort = 1;
101 return;
102 }
103 }
104
105 static void src_clobbered_reads_cb(
106 void * data,
107 struct rc_instruction * inst,
108 struct rc_src_register * src)
109 {
110 struct src_clobbered_reads_cb_data * sc_data = data;
111
112 if (src->File == sc_data->File
113 && src->Index == sc_data->Index
114 && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
115
116 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
117 }
118
119 if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
120 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
121 }
122 }
123
124 static void is_src_clobbered_scan_write(
125 void * data,
126 struct rc_instruction * inst,
127 rc_register_file file,
128 unsigned int index,
129 unsigned int mask)
130 {
131 struct src_clobbered_reads_cb_data sc_data;
132 struct rc_reader_data * reader_data = data;
133 sc_data.File = file;
134 sc_data.Index = index;
135 sc_data.Mask = mask;
136 sc_data.ReaderData = reader_data;
137 rc_for_all_reads_src(reader_data->Writer,
138 src_clobbered_reads_cb, &sc_data);
139 }
140
141 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
142 {
143 struct rc_reader_data reader_data;
144 unsigned int i;
145
146 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
147 inst_mov->U.I.DstReg.RelAddr ||
148 inst_mov->U.I.WriteALUResult ||
149 inst_mov->U.I.SaturateMode)
150 return;
151
152 /* Get a list of all the readers of this MOV instruction. */
153 rc_get_readers(c, inst_mov, &reader_data,
154 copy_propagate_scan_read, NULL,
155 is_src_clobbered_scan_write);
156
157 if (reader_data.Abort || reader_data.ReaderCount == 0)
158 return;
159
160 /* Propagate the MOV instruction. */
161 for (i = 0; i < reader_data.ReaderCount; i++) {
162 struct rc_instruction * inst = reader_data.Readers[i].Inst;
163 *reader_data.Readers[i].U.Src = chain_srcregs(*reader_data.Readers[i].U.Src, inst_mov->U.I.SrcReg[0]);
164
165 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
166 inst->U.I.PreSub = inst_mov->U.I.PreSub;
167 }
168
169 /* Finally, remove the original MOV instruction */
170 rc_remove_instruction(inst_mov);
171 }
172
173 /**
174 * Check if a source register is actually always the same
175 * swizzle constant.
176 */
177 static int is_src_uniform_constant(struct rc_src_register src,
178 rc_swizzle * pswz, unsigned int * pnegate)
179 {
180 int have_used = 0;
181
182 if (src.File != RC_FILE_NONE) {
183 *pswz = 0;
184 return 0;
185 }
186
187 for(unsigned int chan = 0; chan < 4; ++chan) {
188 unsigned int swz = GET_SWZ(src.Swizzle, chan);
189 if (swz < 4) {
190 *pswz = 0;
191 return 0;
192 }
193 if (swz == RC_SWIZZLE_UNUSED)
194 continue;
195
196 if (!have_used) {
197 *pswz = swz;
198 *pnegate = GET_BIT(src.Negate, chan);
199 have_used = 1;
200 } else {
201 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
202 *pswz = 0;
203 return 0;
204 }
205 }
206 }
207
208 return 1;
209 }
210
211 static void constant_folding_mad(struct rc_instruction * inst)
212 {
213 rc_swizzle swz = 0;
214 unsigned int negate= 0;
215
216 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
217 if (swz == RC_SWIZZLE_ZERO) {
218 inst->U.I.Opcode = RC_OPCODE_MUL;
219 return;
220 }
221 }
222
223 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
224 if (swz == RC_SWIZZLE_ONE) {
225 inst->U.I.Opcode = RC_OPCODE_ADD;
226 if (negate)
227 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
228 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
229 return;
230 } else if (swz == RC_SWIZZLE_ZERO) {
231 inst->U.I.Opcode = RC_OPCODE_MOV;
232 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
233 return;
234 }
235 }
236
237 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
238 if (swz == RC_SWIZZLE_ONE) {
239 inst->U.I.Opcode = RC_OPCODE_ADD;
240 if (negate)
241 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
242 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
243 return;
244 } else if (swz == RC_SWIZZLE_ZERO) {
245 inst->U.I.Opcode = RC_OPCODE_MOV;
246 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
247 return;
248 }
249 }
250 }
251
252 static void constant_folding_mul(struct rc_instruction * inst)
253 {
254 rc_swizzle swz = 0;
255 unsigned int negate = 0;
256
257 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
258 if (swz == RC_SWIZZLE_ONE) {
259 inst->U.I.Opcode = RC_OPCODE_MOV;
260 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
261 if (negate)
262 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
263 return;
264 } else if (swz == RC_SWIZZLE_ZERO) {
265 inst->U.I.Opcode = RC_OPCODE_MOV;
266 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
267 return;
268 }
269 }
270
271 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
272 if (swz == RC_SWIZZLE_ONE) {
273 inst->U.I.Opcode = RC_OPCODE_MOV;
274 if (negate)
275 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
276 return;
277 } else if (swz == RC_SWIZZLE_ZERO) {
278 inst->U.I.Opcode = RC_OPCODE_MOV;
279 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
280 return;
281 }
282 }
283 }
284
285 static void constant_folding_add(struct rc_instruction * inst)
286 {
287 rc_swizzle swz = 0;
288 unsigned int negate = 0;
289
290 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
291 if (swz == RC_SWIZZLE_ZERO) {
292 inst->U.I.Opcode = RC_OPCODE_MOV;
293 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
294 return;
295 }
296 }
297
298 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
299 if (swz == RC_SWIZZLE_ZERO) {
300 inst->U.I.Opcode = RC_OPCODE_MOV;
301 return;
302 }
303 }
304 }
305
306 /**
307 * Replace 0.0, 1.0 and 0.5 immediate constants by their
308 * respective swizzles. Simplify instructions like ADD dst, src, 0;
309 */
310 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
311 {
312 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
313 unsigned int i;
314
315 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
316 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
317 struct rc_constant * constant;
318 struct rc_src_register newsrc;
319 int have_real_reference;
320
321 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
322 inst->U.I.SrcReg[src].RelAddr ||
323 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
324 continue;
325
326 constant =
327 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
328
329 if (constant->Type != RC_CONSTANT_IMMEDIATE)
330 continue;
331
332 newsrc = inst->U.I.SrcReg[src];
333 have_real_reference = 0;
334 for(unsigned int chan = 0; chan < 4; ++chan) {
335 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
336 unsigned int newswz;
337 float imm;
338 float baseimm;
339
340 if (swz >= 4)
341 continue;
342
343 imm = constant->u.Immediate[swz];
344 baseimm = imm;
345 if (imm < 0.0)
346 baseimm = -baseimm;
347
348 if (baseimm == 0.0) {
349 newswz = RC_SWIZZLE_ZERO;
350 } else if (baseimm == 1.0) {
351 newswz = RC_SWIZZLE_ONE;
352 } else if (baseimm == 0.5 && c->has_half_swizzles) {
353 newswz = RC_SWIZZLE_HALF;
354 } else {
355 have_real_reference = 1;
356 continue;
357 }
358
359 SET_SWZ(newsrc.Swizzle, chan, newswz);
360 if (imm < 0.0 && !newsrc.Abs)
361 newsrc.Negate ^= 1 << chan;
362 }
363
364 if (!have_real_reference) {
365 newsrc.File = RC_FILE_NONE;
366 newsrc.Index = 0;
367 }
368
369 /* don't make the swizzle worse */
370 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
371 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
372 continue;
373
374 inst->U.I.SrcReg[src] = newsrc;
375 }
376
377 /* Simplify instructions based on constants */
378 if (inst->U.I.Opcode == RC_OPCODE_MAD)
379 constant_folding_mad(inst);
380
381 /* note: MAD can simplify to MUL or ADD */
382 if (inst->U.I.Opcode == RC_OPCODE_MUL)
383 constant_folding_mul(inst);
384 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
385 constant_folding_add(inst);
386
387 /* In case this instruction has been converted, make sure all of the
388 * registers that are no longer used are empty. */
389 opcode = rc_get_opcode_info(inst->U.I.Opcode);
390 for(i = opcode->NumSrcRegs; i < 3; i++) {
391 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
392 }
393 }
394
395 /**
396 * If src and dst use the same register, this function returns a writemask that
397 * indicates wich components are read by src. Otherwise zero is returned.
398 */
399 static unsigned int src_reads_dst_mask(struct rc_src_register src,
400 struct rc_dst_register dst)
401 {
402 if (dst.File != src.File || dst.Index != src.Index) {
403 return 0;
404 }
405 return rc_swizzle_to_writemask(src.Swizzle);
406 }
407
408 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
409 * in any of its channels. Return 0 otherwise. */
410 static int src_has_const_swz(struct rc_src_register src) {
411 int chan;
412 for(chan = 0; chan < 4; chan++) {
413 unsigned int swz = GET_SWZ(src.Swizzle, chan);
414 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
415 || swz == RC_SWIZZLE_ONE) {
416 return 1;
417 }
418 }
419 return 0;
420 }
421
422 static void presub_scan_read(
423 void * data,
424 struct rc_instruction * inst,
425 struct rc_src_register * src)
426 {
427 struct rc_reader_data * reader_data = data;
428 rc_presubtract_op * presub_opcode = reader_data->CbData;
429
430 if (!rc_inst_can_use_presub(inst, *presub_opcode,
431 reader_data->Writer->U.I.DstReg.WriteMask,
432 *src,
433 reader_data->Writer->U.I.SrcReg[0],
434 reader_data->Writer->U.I.SrcReg[1])) {
435 reader_data->Abort = 1;
436 return;
437 }
438 }
439
440 static int presub_helper(
441 struct radeon_compiler * c,
442 struct rc_instruction * inst_add,
443 rc_presubtract_op presub_opcode,
444 rc_presub_replace_fn presub_replace)
445 {
446 struct rc_reader_data reader_data;
447 unsigned int i;
448 rc_presubtract_op cb_op = presub_opcode;
449
450 reader_data.CbData = &cb_op;
451 rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
452 is_src_clobbered_scan_write);
453
454 if (reader_data.Abort || reader_data.ReaderCount == 0)
455 return 0;
456
457 for(i = 0; i < reader_data.ReaderCount; i++) {
458 unsigned int src_index;
459 struct rc_reader reader = reader_data.Readers[i];
460 const struct rc_opcode_info * info =
461 rc_get_opcode_info(reader.Inst->U.I.Opcode);
462
463 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
464 if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.Src)
465 presub_replace(inst_add, reader.Inst, src_index);
466 }
467 }
468 return 1;
469 }
470
471 /* This function assumes that inst_add->U.I.SrcReg[0] and
472 * inst_add->U.I.SrcReg[1] aren't both negative. */
473 static void presub_replace_add(
474 struct rc_instruction * inst_add,
475 struct rc_instruction * inst_reader,
476 unsigned int src_index)
477 {
478 rc_presubtract_op presub_opcode;
479 if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
480 presub_opcode = RC_PRESUB_SUB;
481 else
482 presub_opcode = RC_PRESUB_ADD;
483
484 if (inst_add->U.I.SrcReg[1].Negate) {
485 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
486 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
487 } else {
488 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
489 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
490 }
491 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
492 inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
493 inst_reader->U.I.PreSub.Opcode = presub_opcode;
494 inst_reader->U.I.SrcReg[src_index] =
495 chain_srcregs(inst_reader->U.I.SrcReg[src_index],
496 inst_reader->U.I.PreSub.SrcReg[0]);
497 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
498 inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
499 }
500
501 static int is_presub_candidate(
502 struct radeon_compiler * c,
503 struct rc_instruction * inst)
504 {
505 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
506 unsigned int i;
507
508 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode)
509 return 0;
510
511 for(i = 0; i < info->NumSrcRegs; i++) {
512 struct rc_src_register src = inst->U.I.SrcReg[i];
513 if (src_reads_dst_mask(src, inst->U.I.DstReg))
514 return 0;
515
516 src.File = RC_FILE_PRESUB;
517 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
518 return 0;
519 }
520 return 1;
521 }
522
523 static int peephole_add_presub_add(
524 struct radeon_compiler * c,
525 struct rc_instruction * inst_add)
526 {
527 struct rc_src_register * src0 = NULL;
528 struct rc_src_register * src1 = NULL;
529 unsigned int i;
530
531 if (!is_presub_candidate(c, inst_add))
532 return 0;
533
534 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
535 return 0;
536
537 /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
538 for (i = 0; i < 2; i++) {
539 if (inst_add->U.I.SrcReg[i].Abs)
540 return 0;
541 if ((inst_add->U.I.SrcReg[i].Negate
542 & inst_add->U.I.DstReg.WriteMask) ==
543 inst_add->U.I.DstReg.WriteMask) {
544 src0 = &inst_add->U.I.SrcReg[i];
545 } else if (!src1) {
546 src1 = &inst_add->U.I.SrcReg[i];
547 } else {
548 src0 = &inst_add->U.I.SrcReg[i];
549 }
550 }
551
552 if (!src1)
553 return 0;
554
555 if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
556 rc_remove_instruction(inst_add);
557 return 1;
558 }
559 return 0;
560 }
561
562 static void presub_replace_inv(
563 struct rc_instruction * inst_add,
564 struct rc_instruction * inst_reader,
565 unsigned int src_index)
566 {
567 /* We must be careful not to modify inst_add, since it
568 * is possible it will remain part of the program.*/
569 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
570 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
571 inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
572 inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
573 inst_reader->U.I.PreSub.SrcReg[0]);
574
575 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
576 inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
577 }
578
579 /**
580 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
581 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
582 * of the add instruction must have the constatnt 1 swizzle. This function
583 * does not check const registers to see if their value is 1.0, so it should
584 * be called after the constant_folding optimization.
585 * @return
586 * 0 if the ADD instruction is still part of the program.
587 * 1 if the ADD instruction is no longer part of the program.
588 */
589 static int peephole_add_presub_inv(
590 struct radeon_compiler * c,
591 struct rc_instruction * inst_add)
592 {
593 unsigned int i, swz, mask;
594
595 if (!is_presub_candidate(c, inst_add))
596 return 0;
597
598 mask = inst_add->U.I.DstReg.WriteMask;
599
600 /* Check if src0 is 1. */
601 /* XXX It would be nice to use is_src_uniform_constant here, but that
602 * function only works if the register's file is RC_FILE_NONE */
603 for(i = 0; i < 4; i++ ) {
604 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
605 if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
606 && swz != RC_SWIZZLE_ONE) {
607 return 0;
608 }
609 }
610
611 /* Check src1. */
612 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
613 inst_add->U.I.DstReg.WriteMask
614 || inst_add->U.I.SrcReg[1].Abs
615 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
616 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
617 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
618
619 return 0;
620 }
621
622 if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
623 rc_remove_instruction(inst_add);
624 return 1;
625 }
626 return 0;
627 }
628
629 /**
630 * @return
631 * 0 if inst is still part of the program.
632 * 1 if inst is no longer part of the program.
633 */
634 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
635 {
636 switch(inst->U.I.Opcode){
637 case RC_OPCODE_ADD:
638 if (c->has_presub) {
639 if(peephole_add_presub_inv(c, inst))
640 return 1;
641 if(peephole_add_presub_add(c, inst))
642 return 1;
643 }
644 break;
645 default:
646 break;
647 }
648 return 0;
649 }
650
651 void rc_optimize(struct radeon_compiler * c, void *user)
652 {
653 struct rc_instruction * inst = c->Program.Instructions.Next;
654 while(inst != &c->Program.Instructions) {
655 struct rc_instruction * cur = inst;
656 inst = inst->Next;
657
658 constant_folding(c, cur);
659
660 if(peephole(c, cur))
661 continue;
662
663 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
664 copy_propagate(c, cur);
665 /* cur may no longer be part of the program */
666 }
667 }
668 }