r300/compiler: Fix incorrect presubtract conversion
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_optimize.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_dataflow.h"
30
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_swizzle.h"
34
35 struct src_clobbered_reads_cb_data {
36 rc_register_file File;
37 unsigned int Index;
38 unsigned int Mask;
39 struct rc_reader_data * ReaderData;
40 };
41
42 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
43 struct rc_instruction *,
44 unsigned int);
45
46 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
47 {
48 struct rc_src_register combine;
49 combine.File = inner.File;
50 combine.Index = inner.Index;
51 combine.RelAddr = inner.RelAddr;
52 if (outer.Abs) {
53 combine.Abs = 1;
54 combine.Negate = outer.Negate;
55 } else {
56 combine.Abs = inner.Abs;
57 combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
58 combine.Negate ^= outer.Negate;
59 }
60 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
61 return combine;
62 }
63
64 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
65 struct rc_src_register * src)
66 {
67 rc_register_file file = src->File;
68 struct rc_reader_data * reader_data = data;
69
70 if(!rc_inst_can_use_presub(inst,
71 reader_data->Writer->U.I.PreSub.Opcode,
72 rc_swizzle_to_writemask(src->Swizzle),
73 *src,
74 reader_data->Writer->U.I.PreSub.SrcReg[0],
75 reader_data->Writer->U.I.PreSub.SrcReg[1])) {
76 reader_data->Abort = 1;
77 return;
78 }
79
80 /* XXX This could probably be handled better. */
81 if (file == RC_FILE_ADDRESS) {
82 reader_data->Abort = 1;
83 return;
84 }
85
86 /* These instructions cannot read from the constants file.
87 * see radeonTransformTEX()
88 */
89 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
90 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
91 (inst->U.I.Opcode == RC_OPCODE_TEX ||
92 inst->U.I.Opcode == RC_OPCODE_TXB ||
93 inst->U.I.Opcode == RC_OPCODE_TXP ||
94 inst->U.I.Opcode == RC_OPCODE_KIL)){
95 reader_data->Abort = 1;
96 return;
97 }
98 }
99
100 static void src_clobbered_reads_cb(
101 void * data,
102 struct rc_instruction * inst,
103 struct rc_src_register * src)
104 {
105 struct src_clobbered_reads_cb_data * sc_data = data;
106
107 if (src->File == sc_data->File
108 && src->Index == sc_data->Index
109 && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
110
111 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
112 }
113
114 if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
115 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
116 }
117 }
118
119 static void is_src_clobbered_scan_write(
120 void * data,
121 struct rc_instruction * inst,
122 rc_register_file file,
123 unsigned int index,
124 unsigned int mask)
125 {
126 struct src_clobbered_reads_cb_data sc_data;
127 struct rc_reader_data * reader_data = data;
128 sc_data.File = file;
129 sc_data.Index = index;
130 sc_data.Mask = mask;
131 sc_data.ReaderData = reader_data;
132 rc_for_all_reads_src(reader_data->Writer,
133 src_clobbered_reads_cb, &sc_data);
134 }
135
136 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
137 {
138 struct rc_reader_data reader_data;
139 unsigned int i;
140
141 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
142 inst_mov->U.I.WriteALUResult ||
143 inst_mov->U.I.SaturateMode)
144 return;
145
146 /* Get a list of all the readers of this MOV instruction. */
147 rc_get_readers(c, inst_mov, &reader_data,
148 copy_propagate_scan_read, NULL,
149 is_src_clobbered_scan_write);
150
151 if (reader_data.Abort || reader_data.ReaderCount == 0)
152 return;
153
154 /* Propagate the MOV instruction. */
155 for (i = 0; i < reader_data.ReaderCount; i++) {
156 struct rc_instruction * inst = reader_data.Readers[i].Inst;
157 *reader_data.Readers[i].U.Src = chain_srcregs(*reader_data.Readers[i].U.Src, inst_mov->U.I.SrcReg[0]);
158
159 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
160 inst->U.I.PreSub = inst_mov->U.I.PreSub;
161 }
162
163 /* Finally, remove the original MOV instruction */
164 rc_remove_instruction(inst_mov);
165 }
166
167 /**
168 * Check if a source register is actually always the same
169 * swizzle constant.
170 */
171 static int is_src_uniform_constant(struct rc_src_register src,
172 rc_swizzle * pswz, unsigned int * pnegate)
173 {
174 int have_used = 0;
175
176 if (src.File != RC_FILE_NONE) {
177 *pswz = 0;
178 return 0;
179 }
180
181 for(unsigned int chan = 0; chan < 4; ++chan) {
182 unsigned int swz = GET_SWZ(src.Swizzle, chan);
183 if (swz < 4) {
184 *pswz = 0;
185 return 0;
186 }
187 if (swz == RC_SWIZZLE_UNUSED)
188 continue;
189
190 if (!have_used) {
191 *pswz = swz;
192 *pnegate = GET_BIT(src.Negate, chan);
193 have_used = 1;
194 } else {
195 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
196 *pswz = 0;
197 return 0;
198 }
199 }
200 }
201
202 return 1;
203 }
204
205 static void constant_folding_mad(struct rc_instruction * inst)
206 {
207 rc_swizzle swz = 0;
208 unsigned int negate= 0;
209
210 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
211 if (swz == RC_SWIZZLE_ZERO) {
212 inst->U.I.Opcode = RC_OPCODE_MUL;
213 return;
214 }
215 }
216
217 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
218 if (swz == RC_SWIZZLE_ONE) {
219 inst->U.I.Opcode = RC_OPCODE_ADD;
220 if (negate)
221 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
222 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
223 return;
224 } else if (swz == RC_SWIZZLE_ZERO) {
225 inst->U.I.Opcode = RC_OPCODE_MOV;
226 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
227 return;
228 }
229 }
230
231 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
232 if (swz == RC_SWIZZLE_ONE) {
233 inst->U.I.Opcode = RC_OPCODE_ADD;
234 if (negate)
235 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
236 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
237 return;
238 } else if (swz == RC_SWIZZLE_ZERO) {
239 inst->U.I.Opcode = RC_OPCODE_MOV;
240 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
241 return;
242 }
243 }
244 }
245
246 static void constant_folding_mul(struct rc_instruction * inst)
247 {
248 rc_swizzle swz = 0;
249 unsigned int negate = 0;
250
251 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
252 if (swz == RC_SWIZZLE_ONE) {
253 inst->U.I.Opcode = RC_OPCODE_MOV;
254 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
255 if (negate)
256 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
257 return;
258 } else if (swz == RC_SWIZZLE_ZERO) {
259 inst->U.I.Opcode = RC_OPCODE_MOV;
260 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
261 return;
262 }
263 }
264
265 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
266 if (swz == RC_SWIZZLE_ONE) {
267 inst->U.I.Opcode = RC_OPCODE_MOV;
268 if (negate)
269 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
270 return;
271 } else if (swz == RC_SWIZZLE_ZERO) {
272 inst->U.I.Opcode = RC_OPCODE_MOV;
273 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
274 return;
275 }
276 }
277 }
278
279 static void constant_folding_add(struct rc_instruction * inst)
280 {
281 rc_swizzle swz = 0;
282 unsigned int negate = 0;
283
284 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
285 if (swz == RC_SWIZZLE_ZERO) {
286 inst->U.I.Opcode = RC_OPCODE_MOV;
287 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
288 return;
289 }
290 }
291
292 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
293 if (swz == RC_SWIZZLE_ZERO) {
294 inst->U.I.Opcode = RC_OPCODE_MOV;
295 return;
296 }
297 }
298 }
299
300 /**
301 * Replace 0.0, 1.0 and 0.5 immediate constants by their
302 * respective swizzles. Simplify instructions like ADD dst, src, 0;
303 */
304 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
305 {
306 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
307 unsigned int i;
308
309 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
310 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
311 struct rc_constant * constant;
312 struct rc_src_register newsrc;
313 int have_real_reference;
314 unsigned int chan;
315
316 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
317 for (chan = 0; chan < 4; ++chan)
318 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
319 break;
320 if (chan == 4) {
321 inst->U.I.SrcReg[src].File = RC_FILE_NONE;
322 continue;
323 }
324
325 /* Convert immediates to swizzles. */
326 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
327 inst->U.I.SrcReg[src].RelAddr ||
328 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
329 continue;
330
331 constant =
332 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
333
334 if (constant->Type != RC_CONSTANT_IMMEDIATE)
335 continue;
336
337 newsrc = inst->U.I.SrcReg[src];
338 have_real_reference = 0;
339 for (chan = 0; chan < 4; ++chan) {
340 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
341 unsigned int newswz;
342 float imm;
343 float baseimm;
344
345 if (swz >= 4)
346 continue;
347
348 imm = constant->u.Immediate[swz];
349 baseimm = imm;
350 if (imm < 0.0)
351 baseimm = -baseimm;
352
353 if (baseimm == 0.0) {
354 newswz = RC_SWIZZLE_ZERO;
355 } else if (baseimm == 1.0) {
356 newswz = RC_SWIZZLE_ONE;
357 } else if (baseimm == 0.5 && c->has_half_swizzles) {
358 newswz = RC_SWIZZLE_HALF;
359 } else {
360 have_real_reference = 1;
361 continue;
362 }
363
364 SET_SWZ(newsrc.Swizzle, chan, newswz);
365 if (imm < 0.0 && !newsrc.Abs)
366 newsrc.Negate ^= 1 << chan;
367 }
368
369 if (!have_real_reference) {
370 newsrc.File = RC_FILE_NONE;
371 newsrc.Index = 0;
372 }
373
374 /* don't make the swizzle worse */
375 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
376 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
377 continue;
378
379 inst->U.I.SrcReg[src] = newsrc;
380 }
381
382 /* Simplify instructions based on constants */
383 if (inst->U.I.Opcode == RC_OPCODE_MAD)
384 constant_folding_mad(inst);
385
386 /* note: MAD can simplify to MUL or ADD */
387 if (inst->U.I.Opcode == RC_OPCODE_MUL)
388 constant_folding_mul(inst);
389 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
390 constant_folding_add(inst);
391
392 /* In case this instruction has been converted, make sure all of the
393 * registers that are no longer used are empty. */
394 opcode = rc_get_opcode_info(inst->U.I.Opcode);
395 for(i = opcode->NumSrcRegs; i < 3; i++) {
396 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
397 }
398 }
399
400 /**
401 * If src and dst use the same register, this function returns a writemask that
402 * indicates wich components are read by src. Otherwise zero is returned.
403 */
404 static unsigned int src_reads_dst_mask(struct rc_src_register src,
405 struct rc_dst_register dst)
406 {
407 if (dst.File != src.File || dst.Index != src.Index) {
408 return 0;
409 }
410 return rc_swizzle_to_writemask(src.Swizzle);
411 }
412
413 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
414 * in any of its channels. Return 0 otherwise. */
415 static int src_has_const_swz(struct rc_src_register src) {
416 int chan;
417 for(chan = 0; chan < 4; chan++) {
418 unsigned int swz = GET_SWZ(src.Swizzle, chan);
419 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
420 || swz == RC_SWIZZLE_ONE) {
421 return 1;
422 }
423 }
424 return 0;
425 }
426
427 static void presub_scan_read(
428 void * data,
429 struct rc_instruction * inst,
430 struct rc_src_register * src)
431 {
432 struct rc_reader_data * reader_data = data;
433 rc_presubtract_op * presub_opcode = reader_data->CbData;
434
435 if (!rc_inst_can_use_presub(inst, *presub_opcode,
436 reader_data->Writer->U.I.DstReg.WriteMask,
437 *src,
438 reader_data->Writer->U.I.SrcReg[0],
439 reader_data->Writer->U.I.SrcReg[1])) {
440 reader_data->Abort = 1;
441 return;
442 }
443 }
444
445 static int presub_helper(
446 struct radeon_compiler * c,
447 struct rc_instruction * inst_add,
448 rc_presubtract_op presub_opcode,
449 rc_presub_replace_fn presub_replace)
450 {
451 struct rc_reader_data reader_data;
452 unsigned int i;
453 rc_presubtract_op cb_op = presub_opcode;
454
455 reader_data.CbData = &cb_op;
456 rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
457 is_src_clobbered_scan_write);
458
459 if (reader_data.Abort || reader_data.ReaderCount == 0)
460 return 0;
461
462 for(i = 0; i < reader_data.ReaderCount; i++) {
463 unsigned int src_index;
464 struct rc_reader reader = reader_data.Readers[i];
465 const struct rc_opcode_info * info =
466 rc_get_opcode_info(reader.Inst->U.I.Opcode);
467
468 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
469 if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.Src)
470 presub_replace(inst_add, reader.Inst, src_index);
471 }
472 }
473 return 1;
474 }
475
476 /* This function assumes that inst_add->U.I.SrcReg[0] and
477 * inst_add->U.I.SrcReg[1] aren't both negative. */
478 static void presub_replace_add(
479 struct rc_instruction * inst_add,
480 struct rc_instruction * inst_reader,
481 unsigned int src_index)
482 {
483 rc_presubtract_op presub_opcode;
484 if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
485 presub_opcode = RC_PRESUB_SUB;
486 else
487 presub_opcode = RC_PRESUB_ADD;
488
489 if (inst_add->U.I.SrcReg[1].Negate) {
490 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
491 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
492 } else {
493 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
494 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
495 }
496 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
497 inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
498 inst_reader->U.I.PreSub.Opcode = presub_opcode;
499 inst_reader->U.I.SrcReg[src_index] =
500 chain_srcregs(inst_reader->U.I.SrcReg[src_index],
501 inst_reader->U.I.PreSub.SrcReg[0]);
502 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
503 inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
504 }
505
506 static int is_presub_candidate(
507 struct radeon_compiler * c,
508 struct rc_instruction * inst)
509 {
510 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
511 unsigned int i;
512 unsigned int is_constant[2] = {0, 0};
513
514 assert(inst->U.I.Opcode == RC_OPCODE_ADD);
515
516 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode)
517 return 0;
518
519 /* If both sources use a constant swizzle, then we can't convert it to
520 * a presubtract operation. In fact for the ADD and SUB presubtract
521 * operations neither source can contain a constant swizzle. This
522 * specific case is checked in peephole_add_presub_add() when
523 * we make sure the swizzles for both sources are equal, so we
524 * don't need to worry about it here. */
525 for (i = 0; i < 2; i++) {
526 int chan;
527 for (chan = 0; chan < 4; chan++) {
528 rc_swizzle swz =
529 get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
530 if (swz == RC_SWIZZLE_ONE
531 || swz == RC_SWIZZLE_ZERO
532 || swz == RC_SWIZZLE_HALF) {
533 is_constant[i] = 1;
534 }
535 }
536 }
537 if (is_constant[0] && is_constant[1])
538 return 0;
539
540 for(i = 0; i < info->NumSrcRegs; i++) {
541 struct rc_src_register src = inst->U.I.SrcReg[i];
542 if (src_reads_dst_mask(src, inst->U.I.DstReg))
543 return 0;
544
545 src.File = RC_FILE_PRESUB;
546 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
547 return 0;
548 }
549 return 1;
550 }
551
552 static int peephole_add_presub_add(
553 struct radeon_compiler * c,
554 struct rc_instruction * inst_add)
555 {
556 struct rc_src_register * src0 = NULL;
557 struct rc_src_register * src1 = NULL;
558 unsigned int i;
559
560 if (!is_presub_candidate(c, inst_add))
561 return 0;
562
563 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
564 return 0;
565
566 /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
567 for (i = 0; i < 2; i++) {
568 if (inst_add->U.I.SrcReg[i].Abs)
569 return 0;
570 if ((inst_add->U.I.SrcReg[i].Negate
571 & inst_add->U.I.DstReg.WriteMask) ==
572 inst_add->U.I.DstReg.WriteMask) {
573 src0 = &inst_add->U.I.SrcReg[i];
574 } else if (!src1) {
575 src1 = &inst_add->U.I.SrcReg[i];
576 } else {
577 src0 = &inst_add->U.I.SrcReg[i];
578 }
579 }
580
581 if (!src1)
582 return 0;
583
584 if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
585 rc_remove_instruction(inst_add);
586 return 1;
587 }
588 return 0;
589 }
590
591 static void presub_replace_inv(
592 struct rc_instruction * inst_add,
593 struct rc_instruction * inst_reader,
594 unsigned int src_index)
595 {
596 /* We must be careful not to modify inst_add, since it
597 * is possible it will remain part of the program.*/
598 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
599 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
600 inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
601 inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
602 inst_reader->U.I.PreSub.SrcReg[0]);
603
604 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
605 inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
606 }
607
608 /**
609 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
610 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
611 * of the add instruction must have the constatnt 1 swizzle. This function
612 * does not check const registers to see if their value is 1.0, so it should
613 * be called after the constant_folding optimization.
614 * @return
615 * 0 if the ADD instruction is still part of the program.
616 * 1 if the ADD instruction is no longer part of the program.
617 */
618 static int peephole_add_presub_inv(
619 struct radeon_compiler * c,
620 struct rc_instruction * inst_add)
621 {
622 unsigned int i, swz, mask;
623
624 if (!is_presub_candidate(c, inst_add))
625 return 0;
626
627 mask = inst_add->U.I.DstReg.WriteMask;
628
629 /* Check if src0 is 1. */
630 /* XXX It would be nice to use is_src_uniform_constant here, but that
631 * function only works if the register's file is RC_FILE_NONE */
632 for(i = 0; i < 4; i++ ) {
633 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
634 if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
635 && swz != RC_SWIZZLE_ONE) {
636 return 0;
637 }
638 }
639
640 /* Check src1. */
641 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
642 inst_add->U.I.DstReg.WriteMask
643 || inst_add->U.I.SrcReg[1].Abs
644 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
645 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
646 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
647
648 return 0;
649 }
650
651 if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
652 rc_remove_instruction(inst_add);
653 return 1;
654 }
655 return 0;
656 }
657
658 /**
659 * @return
660 * 0 if inst is still part of the program.
661 * 1 if inst is no longer part of the program.
662 */
663 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
664 {
665 switch(inst->U.I.Opcode){
666 case RC_OPCODE_ADD:
667 if (c->has_presub) {
668 if(peephole_add_presub_inv(c, inst))
669 return 1;
670 if(peephole_add_presub_add(c, inst))
671 return 1;
672 }
673 break;
674 default:
675 break;
676 }
677 return 0;
678 }
679
680 void rc_optimize(struct radeon_compiler * c, void *user)
681 {
682 struct rc_instruction * inst = c->Program.Instructions.Next;
683 while(inst != &c->Program.Instructions) {
684 struct rc_instruction * cur = inst;
685 inst = inst->Next;
686
687 constant_folding(c, cur);
688
689 if(peephole(c, cur))
690 continue;
691
692 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
693 copy_propagate(c, cur);
694 /* cur may no longer be part of the program */
695 }
696 }
697 }