r300/compiler: make lowering passes possibly use up to two less temps
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_optimize.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_dataflow.h"
30
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_swizzle.h"
34
35 struct src_clobbered_reads_cb_data {
36 rc_register_file File;
37 unsigned int Index;
38 unsigned int Mask;
39 struct rc_reader_data * ReaderData;
40 };
41
42 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
43 struct rc_instruction *,
44 unsigned int);
45
46 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
47 {
48 struct rc_src_register combine;
49 combine.File = inner.File;
50 combine.Index = inner.Index;
51 combine.RelAddr = inner.RelAddr;
52 if (outer.Abs) {
53 combine.Abs = 1;
54 combine.Negate = outer.Negate;
55 } else {
56 combine.Abs = inner.Abs;
57 combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
58 combine.Negate ^= outer.Negate;
59 }
60 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
61 return combine;
62 }
63
64 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
65 struct rc_src_register * src)
66 {
67 rc_register_file file = src->File;
68 struct rc_reader_data * reader_data = data;
69
70 if(!rc_inst_can_use_presub(inst,
71 reader_data->Writer->U.I.PreSub.Opcode,
72 rc_swizzle_to_writemask(src->Swizzle),
73 *src,
74 reader_data->Writer->U.I.PreSub.SrcReg[0],
75 reader_data->Writer->U.I.PreSub.SrcReg[1])) {
76 reader_data->Abort = 1;
77 return;
78 }
79
80 /* XXX This could probably be handled better. */
81 if (file == RC_FILE_ADDRESS) {
82 reader_data->Abort = 1;
83 return;
84 }
85
86 /* These instructions cannot read from the constants file.
87 * see radeonTransformTEX()
88 */
89 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
90 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
91 (inst->U.I.Opcode == RC_OPCODE_TEX ||
92 inst->U.I.Opcode == RC_OPCODE_TXB ||
93 inst->U.I.Opcode == RC_OPCODE_TXP ||
94 inst->U.I.Opcode == RC_OPCODE_KIL)){
95 reader_data->Abort = 1;
96 return;
97 }
98 }
99
100 static void src_clobbered_reads_cb(
101 void * data,
102 struct rc_instruction * inst,
103 struct rc_src_register * src)
104 {
105 struct src_clobbered_reads_cb_data * sc_data = data;
106
107 if (src->File == sc_data->File
108 && src->Index == sc_data->Index
109 && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
110
111 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
112 }
113
114 if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
115 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
116 }
117 }
118
119 static void is_src_clobbered_scan_write(
120 void * data,
121 struct rc_instruction * inst,
122 rc_register_file file,
123 unsigned int index,
124 unsigned int mask)
125 {
126 struct src_clobbered_reads_cb_data sc_data;
127 struct rc_reader_data * reader_data = data;
128 sc_data.File = file;
129 sc_data.Index = index;
130 sc_data.Mask = mask;
131 sc_data.ReaderData = reader_data;
132 rc_for_all_reads_src(reader_data->Writer,
133 src_clobbered_reads_cb, &sc_data);
134 }
135
136 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
137 {
138 struct rc_reader_data reader_data;
139 unsigned int i;
140
141 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
142 inst_mov->U.I.DstReg.RelAddr ||
143 inst_mov->U.I.WriteALUResult ||
144 inst_mov->U.I.SaturateMode)
145 return;
146
147 /* Get a list of all the readers of this MOV instruction. */
148 rc_get_readers(c, inst_mov, &reader_data,
149 copy_propagate_scan_read, NULL,
150 is_src_clobbered_scan_write);
151
152 if (reader_data.Abort || reader_data.ReaderCount == 0)
153 return;
154
155 /* Propagate the MOV instruction. */
156 for (i = 0; i < reader_data.ReaderCount; i++) {
157 struct rc_instruction * inst = reader_data.Readers[i].Inst;
158 *reader_data.Readers[i].U.Src = chain_srcregs(*reader_data.Readers[i].U.Src, inst_mov->U.I.SrcReg[0]);
159
160 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
161 inst->U.I.PreSub = inst_mov->U.I.PreSub;
162 }
163
164 /* Finally, remove the original MOV instruction */
165 rc_remove_instruction(inst_mov);
166 }
167
168 /**
169 * Check if a source register is actually always the same
170 * swizzle constant.
171 */
172 static int is_src_uniform_constant(struct rc_src_register src,
173 rc_swizzle * pswz, unsigned int * pnegate)
174 {
175 int have_used = 0;
176
177 if (src.File != RC_FILE_NONE) {
178 *pswz = 0;
179 return 0;
180 }
181
182 for(unsigned int chan = 0; chan < 4; ++chan) {
183 unsigned int swz = GET_SWZ(src.Swizzle, chan);
184 if (swz < 4) {
185 *pswz = 0;
186 return 0;
187 }
188 if (swz == RC_SWIZZLE_UNUSED)
189 continue;
190
191 if (!have_used) {
192 *pswz = swz;
193 *pnegate = GET_BIT(src.Negate, chan);
194 have_used = 1;
195 } else {
196 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
197 *pswz = 0;
198 return 0;
199 }
200 }
201 }
202
203 return 1;
204 }
205
206 static void constant_folding_mad(struct rc_instruction * inst)
207 {
208 rc_swizzle swz = 0;
209 unsigned int negate= 0;
210
211 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
212 if (swz == RC_SWIZZLE_ZERO) {
213 inst->U.I.Opcode = RC_OPCODE_MUL;
214 return;
215 }
216 }
217
218 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
219 if (swz == RC_SWIZZLE_ONE) {
220 inst->U.I.Opcode = RC_OPCODE_ADD;
221 if (negate)
222 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
223 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
224 return;
225 } else if (swz == RC_SWIZZLE_ZERO) {
226 inst->U.I.Opcode = RC_OPCODE_MOV;
227 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
228 return;
229 }
230 }
231
232 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
233 if (swz == RC_SWIZZLE_ONE) {
234 inst->U.I.Opcode = RC_OPCODE_ADD;
235 if (negate)
236 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
237 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
238 return;
239 } else if (swz == RC_SWIZZLE_ZERO) {
240 inst->U.I.Opcode = RC_OPCODE_MOV;
241 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
242 return;
243 }
244 }
245 }
246
247 static void constant_folding_mul(struct rc_instruction * inst)
248 {
249 rc_swizzle swz = 0;
250 unsigned int negate = 0;
251
252 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
253 if (swz == RC_SWIZZLE_ONE) {
254 inst->U.I.Opcode = RC_OPCODE_MOV;
255 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
256 if (negate)
257 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
258 return;
259 } else if (swz == RC_SWIZZLE_ZERO) {
260 inst->U.I.Opcode = RC_OPCODE_MOV;
261 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
262 return;
263 }
264 }
265
266 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
267 if (swz == RC_SWIZZLE_ONE) {
268 inst->U.I.Opcode = RC_OPCODE_MOV;
269 if (negate)
270 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
271 return;
272 } else if (swz == RC_SWIZZLE_ZERO) {
273 inst->U.I.Opcode = RC_OPCODE_MOV;
274 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
275 return;
276 }
277 }
278 }
279
280 static void constant_folding_add(struct rc_instruction * inst)
281 {
282 rc_swizzle swz = 0;
283 unsigned int negate = 0;
284
285 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
286 if (swz == RC_SWIZZLE_ZERO) {
287 inst->U.I.Opcode = RC_OPCODE_MOV;
288 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
289 return;
290 }
291 }
292
293 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
294 if (swz == RC_SWIZZLE_ZERO) {
295 inst->U.I.Opcode = RC_OPCODE_MOV;
296 return;
297 }
298 }
299 }
300
301 /**
302 * Replace 0.0, 1.0 and 0.5 immediate constants by their
303 * respective swizzles. Simplify instructions like ADD dst, src, 0;
304 */
305 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
306 {
307 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
308 unsigned int i;
309
310 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
311 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
312 struct rc_constant * constant;
313 struct rc_src_register newsrc;
314 int have_real_reference;
315
316 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
317 inst->U.I.SrcReg[src].RelAddr ||
318 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
319 continue;
320
321 constant =
322 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
323
324 if (constant->Type != RC_CONSTANT_IMMEDIATE)
325 continue;
326
327 newsrc = inst->U.I.SrcReg[src];
328 have_real_reference = 0;
329 for(unsigned int chan = 0; chan < 4; ++chan) {
330 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
331 unsigned int newswz;
332 float imm;
333 float baseimm;
334
335 if (swz >= 4)
336 continue;
337
338 imm = constant->u.Immediate[swz];
339 baseimm = imm;
340 if (imm < 0.0)
341 baseimm = -baseimm;
342
343 if (baseimm == 0.0) {
344 newswz = RC_SWIZZLE_ZERO;
345 } else if (baseimm == 1.0) {
346 newswz = RC_SWIZZLE_ONE;
347 } else if (baseimm == 0.5 && c->has_half_swizzles) {
348 newswz = RC_SWIZZLE_HALF;
349 } else {
350 have_real_reference = 1;
351 continue;
352 }
353
354 SET_SWZ(newsrc.Swizzle, chan, newswz);
355 if (imm < 0.0 && !newsrc.Abs)
356 newsrc.Negate ^= 1 << chan;
357 }
358
359 if (!have_real_reference) {
360 newsrc.File = RC_FILE_NONE;
361 newsrc.Index = 0;
362 }
363
364 /* don't make the swizzle worse */
365 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
366 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
367 continue;
368
369 inst->U.I.SrcReg[src] = newsrc;
370 }
371
372 /* Simplify instructions based on constants */
373 if (inst->U.I.Opcode == RC_OPCODE_MAD)
374 constant_folding_mad(inst);
375
376 /* note: MAD can simplify to MUL or ADD */
377 if (inst->U.I.Opcode == RC_OPCODE_MUL)
378 constant_folding_mul(inst);
379 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
380 constant_folding_add(inst);
381
382 /* In case this instruction has been converted, make sure all of the
383 * registers that are no longer used are empty. */
384 opcode = rc_get_opcode_info(inst->U.I.Opcode);
385 for(i = opcode->NumSrcRegs; i < 3; i++) {
386 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
387 }
388 }
389
390 /**
391 * If src and dst use the same register, this function returns a writemask that
392 * indicates wich components are read by src. Otherwise zero is returned.
393 */
394 static unsigned int src_reads_dst_mask(struct rc_src_register src,
395 struct rc_dst_register dst)
396 {
397 if (dst.File != src.File || dst.Index != src.Index) {
398 return 0;
399 }
400 return rc_swizzle_to_writemask(src.Swizzle);
401 }
402
403 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
404 * in any of its channels. Return 0 otherwise. */
405 static int src_has_const_swz(struct rc_src_register src) {
406 int chan;
407 for(chan = 0; chan < 4; chan++) {
408 unsigned int swz = GET_SWZ(src.Swizzle, chan);
409 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
410 || swz == RC_SWIZZLE_ONE) {
411 return 1;
412 }
413 }
414 return 0;
415 }
416
417 static void presub_scan_read(
418 void * data,
419 struct rc_instruction * inst,
420 struct rc_src_register * src)
421 {
422 struct rc_reader_data * reader_data = data;
423 rc_presubtract_op * presub_opcode = reader_data->CbData;
424
425 if (!rc_inst_can_use_presub(inst, *presub_opcode,
426 reader_data->Writer->U.I.DstReg.WriteMask,
427 *src,
428 reader_data->Writer->U.I.SrcReg[0],
429 reader_data->Writer->U.I.SrcReg[1])) {
430 reader_data->Abort = 1;
431 return;
432 }
433 }
434
435 static int presub_helper(
436 struct radeon_compiler * c,
437 struct rc_instruction * inst_add,
438 rc_presubtract_op presub_opcode,
439 rc_presub_replace_fn presub_replace)
440 {
441 struct rc_reader_data reader_data;
442 unsigned int i;
443 rc_presubtract_op cb_op = presub_opcode;
444
445 reader_data.CbData = &cb_op;
446 rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
447 is_src_clobbered_scan_write);
448
449 if (reader_data.Abort || reader_data.ReaderCount == 0)
450 return 0;
451
452 for(i = 0; i < reader_data.ReaderCount; i++) {
453 unsigned int src_index;
454 struct rc_reader reader = reader_data.Readers[i];
455 const struct rc_opcode_info * info =
456 rc_get_opcode_info(reader.Inst->U.I.Opcode);
457
458 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
459 if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.Src)
460 presub_replace(inst_add, reader.Inst, src_index);
461 }
462 }
463 return 1;
464 }
465
466 /* This function assumes that inst_add->U.I.SrcReg[0] and
467 * inst_add->U.I.SrcReg[1] aren't both negative. */
468 static void presub_replace_add(
469 struct rc_instruction * inst_add,
470 struct rc_instruction * inst_reader,
471 unsigned int src_index)
472 {
473 rc_presubtract_op presub_opcode;
474 if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
475 presub_opcode = RC_PRESUB_SUB;
476 else
477 presub_opcode = RC_PRESUB_ADD;
478
479 if (inst_add->U.I.SrcReg[1].Negate) {
480 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
481 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
482 } else {
483 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
484 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
485 }
486 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
487 inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
488 inst_reader->U.I.PreSub.Opcode = presub_opcode;
489 inst_reader->U.I.SrcReg[src_index] =
490 chain_srcregs(inst_reader->U.I.SrcReg[src_index],
491 inst_reader->U.I.PreSub.SrcReg[0]);
492 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
493 inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
494 }
495
496 static int is_presub_candidate(
497 struct radeon_compiler * c,
498 struct rc_instruction * inst)
499 {
500 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
501 unsigned int i;
502
503 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode)
504 return 0;
505
506 for(i = 0; i < info->NumSrcRegs; i++) {
507 struct rc_src_register src = inst->U.I.SrcReg[i];
508 if (src_reads_dst_mask(src, inst->U.I.DstReg))
509 return 0;
510
511 src.File = RC_FILE_PRESUB;
512 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
513 return 0;
514 }
515 return 1;
516 }
517
518 static int peephole_add_presub_add(
519 struct radeon_compiler * c,
520 struct rc_instruction * inst_add)
521 {
522 struct rc_src_register * src0 = NULL;
523 struct rc_src_register * src1 = NULL;
524 unsigned int i;
525
526 if (!is_presub_candidate(c, inst_add))
527 return 0;
528
529 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
530 return 0;
531
532 /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
533 for (i = 0; i < 2; i++) {
534 if (inst_add->U.I.SrcReg[i].Abs)
535 return 0;
536 if ((inst_add->U.I.SrcReg[i].Negate
537 & inst_add->U.I.DstReg.WriteMask) ==
538 inst_add->U.I.DstReg.WriteMask) {
539 src0 = &inst_add->U.I.SrcReg[i];
540 } else if (!src1) {
541 src1 = &inst_add->U.I.SrcReg[i];
542 } else {
543 src0 = &inst_add->U.I.SrcReg[i];
544 }
545 }
546
547 if (!src1)
548 return 0;
549
550 if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
551 rc_remove_instruction(inst_add);
552 return 1;
553 }
554 return 0;
555 }
556
557 static void presub_replace_inv(
558 struct rc_instruction * inst_add,
559 struct rc_instruction * inst_reader,
560 unsigned int src_index)
561 {
562 /* We must be careful not to modify inst_add, since it
563 * is possible it will remain part of the program.*/
564 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
565 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
566 inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
567 inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
568 inst_reader->U.I.PreSub.SrcReg[0]);
569
570 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
571 inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
572 }
573
574 /**
575 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
576 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
577 * of the add instruction must have the constatnt 1 swizzle. This function
578 * does not check const registers to see if their value is 1.0, so it should
579 * be called after the constant_folding optimization.
580 * @return
581 * 0 if the ADD instruction is still part of the program.
582 * 1 if the ADD instruction is no longer part of the program.
583 */
584 static int peephole_add_presub_inv(
585 struct radeon_compiler * c,
586 struct rc_instruction * inst_add)
587 {
588 unsigned int i, swz, mask;
589
590 if (!is_presub_candidate(c, inst_add))
591 return 0;
592
593 mask = inst_add->U.I.DstReg.WriteMask;
594
595 /* Check if src0 is 1. */
596 /* XXX It would be nice to use is_src_uniform_constant here, but that
597 * function only works if the register's file is RC_FILE_NONE */
598 for(i = 0; i < 4; i++ ) {
599 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
600 if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
601 && swz != RC_SWIZZLE_ONE) {
602 return 0;
603 }
604 }
605
606 /* Check src1. */
607 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
608 inst_add->U.I.DstReg.WriteMask
609 || inst_add->U.I.SrcReg[1].Abs
610 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
611 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
612 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
613
614 return 0;
615 }
616
617 if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
618 rc_remove_instruction(inst_add);
619 return 1;
620 }
621 return 0;
622 }
623
624 /**
625 * @return
626 * 0 if inst is still part of the program.
627 * 1 if inst is no longer part of the program.
628 */
629 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
630 {
631 switch(inst->U.I.Opcode){
632 case RC_OPCODE_ADD:
633 if (c->has_presub) {
634 if(peephole_add_presub_inv(c, inst))
635 return 1;
636 if(peephole_add_presub_add(c, inst))
637 return 1;
638 }
639 break;
640 default:
641 break;
642 }
643 return 0;
644 }
645
646 void rc_optimize(struct radeon_compiler * c, void *user)
647 {
648 struct rc_instruction * inst = c->Program.Instructions.Next;
649 while(inst != &c->Program.Instructions) {
650 struct rc_instruction * cur = inst;
651 inst = inst->Next;
652
653 constant_folding(c, cur);
654
655 if(peephole(c, cur))
656 continue;
657
658 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
659 copy_propagate(c, cur);
660 /* cur may no longer be part of the program */
661 }
662 }
663 }