53ab5fbbbd9bb12ba9a9127747ce9441d4b27c7c
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_optimize.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_dataflow.h"
30
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_swizzle.h"
34
35 struct src_clobbered_reads_cb_data {
36 rc_register_file File;
37 unsigned int Index;
38 unsigned int Mask;
39 struct rc_reader_data * ReaderData;
40 };
41
42 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
43 struct rc_instruction *,
44 unsigned int);
45
46 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
47 {
48 struct rc_src_register combine;
49 combine.File = inner.File;
50 combine.Index = inner.Index;
51 combine.RelAddr = inner.RelAddr;
52 if (outer.Abs) {
53 combine.Abs = 1;
54 combine.Negate = outer.Negate;
55 } else {
56 combine.Abs = inner.Abs;
57 combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
58 combine.Negate ^= outer.Negate;
59 }
60 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
61 return combine;
62 }
63
64 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
65 struct rc_src_register * src)
66 {
67 rc_register_file file = src->File;
68 struct rc_reader_data * reader_data = data;
69
70 if(!rc_inst_can_use_presub(inst,
71 reader_data->Writer->U.I.PreSub.Opcode,
72 rc_swizzle_to_writemask(src->Swizzle),
73 src,
74 &reader_data->Writer->U.I.PreSub.SrcReg[0],
75 &reader_data->Writer->U.I.PreSub.SrcReg[1])) {
76 reader_data->Abort = 1;
77 return;
78 }
79
80 /* XXX This could probably be handled better. */
81 if (file == RC_FILE_ADDRESS) {
82 reader_data->Abort = 1;
83 return;
84 }
85
86 /* These instructions cannot read from the constants file.
87 * see radeonTransformTEX()
88 */
89 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
90 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
91 (inst->U.I.Opcode == RC_OPCODE_TEX ||
92 inst->U.I.Opcode == RC_OPCODE_TXB ||
93 inst->U.I.Opcode == RC_OPCODE_TXP ||
94 inst->U.I.Opcode == RC_OPCODE_TXD ||
95 inst->U.I.Opcode == RC_OPCODE_TXL ||
96 inst->U.I.Opcode == RC_OPCODE_KIL)){
97 reader_data->Abort = 1;
98 return;
99 }
100 }
101
102 static void src_clobbered_reads_cb(
103 void * data,
104 struct rc_instruction * inst,
105 struct rc_src_register * src)
106 {
107 struct src_clobbered_reads_cb_data * sc_data = data;
108
109 if (src->File == sc_data->File
110 && src->Index == sc_data->Index
111 && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
112
113 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
114 }
115
116 if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
117 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
118 }
119 }
120
121 static void is_src_clobbered_scan_write(
122 void * data,
123 struct rc_instruction * inst,
124 rc_register_file file,
125 unsigned int index,
126 unsigned int mask)
127 {
128 struct src_clobbered_reads_cb_data sc_data;
129 struct rc_reader_data * reader_data = data;
130 sc_data.File = file;
131 sc_data.Index = index;
132 sc_data.Mask = mask;
133 sc_data.ReaderData = reader_data;
134 rc_for_all_reads_src(reader_data->Writer,
135 src_clobbered_reads_cb, &sc_data);
136 }
137
138 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
139 {
140 struct rc_reader_data reader_data;
141 unsigned int i;
142
143 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
144 inst_mov->U.I.WriteALUResult ||
145 inst_mov->U.I.SaturateMode)
146 return;
147
148 /* Get a list of all the readers of this MOV instruction. */
149 reader_data.ExitOnAbort = 1;
150 rc_get_readers(c, inst_mov, &reader_data,
151 copy_propagate_scan_read, NULL,
152 is_src_clobbered_scan_write);
153
154 if (reader_data.Abort || reader_data.ReaderCount == 0)
155 return;
156
157 /* Propagate the MOV instruction. */
158 for (i = 0; i < reader_data.ReaderCount; i++) {
159 struct rc_instruction * inst = reader_data.Readers[i].Inst;
160 *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
161
162 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
163 inst->U.I.PreSub = inst_mov->U.I.PreSub;
164 }
165
166 /* Finally, remove the original MOV instruction */
167 rc_remove_instruction(inst_mov);
168 }
169
170 /**
171 * Check if a source register is actually always the same
172 * swizzle constant.
173 */
174 static int is_src_uniform_constant(struct rc_src_register src,
175 rc_swizzle * pswz, unsigned int * pnegate)
176 {
177 int have_used = 0;
178
179 if (src.File != RC_FILE_NONE) {
180 *pswz = 0;
181 return 0;
182 }
183
184 for(unsigned int chan = 0; chan < 4; ++chan) {
185 unsigned int swz = GET_SWZ(src.Swizzle, chan);
186 if (swz < 4) {
187 *pswz = 0;
188 return 0;
189 }
190 if (swz == RC_SWIZZLE_UNUSED)
191 continue;
192
193 if (!have_used) {
194 *pswz = swz;
195 *pnegate = GET_BIT(src.Negate, chan);
196 have_used = 1;
197 } else {
198 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
199 *pswz = 0;
200 return 0;
201 }
202 }
203 }
204
205 return 1;
206 }
207
208 static void constant_folding_mad(struct rc_instruction * inst)
209 {
210 rc_swizzle swz = 0;
211 unsigned int negate= 0;
212
213 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
214 if (swz == RC_SWIZZLE_ZERO) {
215 inst->U.I.Opcode = RC_OPCODE_MUL;
216 return;
217 }
218 }
219
220 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
221 if (swz == RC_SWIZZLE_ONE) {
222 inst->U.I.Opcode = RC_OPCODE_ADD;
223 if (negate)
224 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
225 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
226 return;
227 } else if (swz == RC_SWIZZLE_ZERO) {
228 inst->U.I.Opcode = RC_OPCODE_MOV;
229 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
230 return;
231 }
232 }
233
234 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
235 if (swz == RC_SWIZZLE_ONE) {
236 inst->U.I.Opcode = RC_OPCODE_ADD;
237 if (negate)
238 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
239 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
240 return;
241 } else if (swz == RC_SWIZZLE_ZERO) {
242 inst->U.I.Opcode = RC_OPCODE_MOV;
243 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
244 return;
245 }
246 }
247 }
248
249 static void constant_folding_mul(struct rc_instruction * inst)
250 {
251 rc_swizzle swz = 0;
252 unsigned int negate = 0;
253
254 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
255 if (swz == RC_SWIZZLE_ONE) {
256 inst->U.I.Opcode = RC_OPCODE_MOV;
257 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
258 if (negate)
259 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
260 return;
261 } else if (swz == RC_SWIZZLE_ZERO) {
262 inst->U.I.Opcode = RC_OPCODE_MOV;
263 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
264 return;
265 }
266 }
267
268 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
269 if (swz == RC_SWIZZLE_ONE) {
270 inst->U.I.Opcode = RC_OPCODE_MOV;
271 if (negate)
272 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
273 return;
274 } else if (swz == RC_SWIZZLE_ZERO) {
275 inst->U.I.Opcode = RC_OPCODE_MOV;
276 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
277 return;
278 }
279 }
280 }
281
282 static void constant_folding_add(struct rc_instruction * inst)
283 {
284 rc_swizzle swz = 0;
285 unsigned int negate = 0;
286
287 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
288 if (swz == RC_SWIZZLE_ZERO) {
289 inst->U.I.Opcode = RC_OPCODE_MOV;
290 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
291 return;
292 }
293 }
294
295 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
296 if (swz == RC_SWIZZLE_ZERO) {
297 inst->U.I.Opcode = RC_OPCODE_MOV;
298 return;
299 }
300 }
301 }
302
303 /**
304 * Replace 0.0, 1.0 and 0.5 immediate constants by their
305 * respective swizzles. Simplify instructions like ADD dst, src, 0;
306 */
307 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
308 {
309 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
310 unsigned int i;
311
312 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
313 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
314 struct rc_constant * constant;
315 struct rc_src_register newsrc;
316 int have_real_reference;
317 unsigned int chan;
318
319 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
320 for (chan = 0; chan < 4; ++chan)
321 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
322 break;
323 if (chan == 4) {
324 inst->U.I.SrcReg[src].File = RC_FILE_NONE;
325 continue;
326 }
327
328 /* Convert immediates to swizzles. */
329 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
330 inst->U.I.SrcReg[src].RelAddr ||
331 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
332 continue;
333
334 constant =
335 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
336
337 if (constant->Type != RC_CONSTANT_IMMEDIATE)
338 continue;
339
340 newsrc = inst->U.I.SrcReg[src];
341 have_real_reference = 0;
342 for (chan = 0; chan < 4; ++chan) {
343 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
344 unsigned int newswz;
345 float imm;
346 float baseimm;
347
348 if (swz >= 4)
349 continue;
350
351 imm = constant->u.Immediate[swz];
352 baseimm = imm;
353 if (imm < 0.0)
354 baseimm = -baseimm;
355
356 if (baseimm == 0.0) {
357 newswz = RC_SWIZZLE_ZERO;
358 } else if (baseimm == 1.0) {
359 newswz = RC_SWIZZLE_ONE;
360 } else if (baseimm == 0.5 && c->has_half_swizzles) {
361 newswz = RC_SWIZZLE_HALF;
362 } else {
363 have_real_reference = 1;
364 continue;
365 }
366
367 SET_SWZ(newsrc.Swizzle, chan, newswz);
368 if (imm < 0.0 && !newsrc.Abs)
369 newsrc.Negate ^= 1 << chan;
370 }
371
372 if (!have_real_reference) {
373 newsrc.File = RC_FILE_NONE;
374 newsrc.Index = 0;
375 }
376
377 /* don't make the swizzle worse */
378 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
379 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
380 continue;
381
382 inst->U.I.SrcReg[src] = newsrc;
383 }
384
385 /* Simplify instructions based on constants */
386 if (inst->U.I.Opcode == RC_OPCODE_MAD)
387 constant_folding_mad(inst);
388
389 /* note: MAD can simplify to MUL or ADD */
390 if (inst->U.I.Opcode == RC_OPCODE_MUL)
391 constant_folding_mul(inst);
392 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
393 constant_folding_add(inst);
394
395 /* In case this instruction has been converted, make sure all of the
396 * registers that are no longer used are empty. */
397 opcode = rc_get_opcode_info(inst->U.I.Opcode);
398 for(i = opcode->NumSrcRegs; i < 3; i++) {
399 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
400 }
401 }
402
403 /**
404 * If src and dst use the same register, this function returns a writemask that
405 * indicates wich components are read by src. Otherwise zero is returned.
406 */
407 static unsigned int src_reads_dst_mask(struct rc_src_register src,
408 struct rc_dst_register dst)
409 {
410 if (dst.File != src.File || dst.Index != src.Index) {
411 return 0;
412 }
413 return rc_swizzle_to_writemask(src.Swizzle);
414 }
415
416 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
417 * in any of its channels. Return 0 otherwise. */
418 static int src_has_const_swz(struct rc_src_register src) {
419 int chan;
420 for(chan = 0; chan < 4; chan++) {
421 unsigned int swz = GET_SWZ(src.Swizzle, chan);
422 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
423 || swz == RC_SWIZZLE_ONE) {
424 return 1;
425 }
426 }
427 return 0;
428 }
429
430 static void presub_scan_read(
431 void * data,
432 struct rc_instruction * inst,
433 struct rc_src_register * src)
434 {
435 struct rc_reader_data * reader_data = data;
436 rc_presubtract_op * presub_opcode = reader_data->CbData;
437
438 if (!rc_inst_can_use_presub(inst, *presub_opcode,
439 reader_data->Writer->U.I.DstReg.WriteMask,
440 src,
441 &reader_data->Writer->U.I.SrcReg[0],
442 &reader_data->Writer->U.I.SrcReg[1])) {
443 reader_data->Abort = 1;
444 return;
445 }
446 }
447
448 static int presub_helper(
449 struct radeon_compiler * c,
450 struct rc_instruction * inst_add,
451 rc_presubtract_op presub_opcode,
452 rc_presub_replace_fn presub_replace)
453 {
454 struct rc_reader_data reader_data;
455 unsigned int i;
456 rc_presubtract_op cb_op = presub_opcode;
457
458 reader_data.CbData = &cb_op;
459 reader_data.ExitOnAbort = 1;
460 rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
461 is_src_clobbered_scan_write);
462
463 if (reader_data.Abort || reader_data.ReaderCount == 0)
464 return 0;
465
466 for(i = 0; i < reader_data.ReaderCount; i++) {
467 unsigned int src_index;
468 struct rc_reader reader = reader_data.Readers[i];
469 const struct rc_opcode_info * info =
470 rc_get_opcode_info(reader.Inst->U.I.Opcode);
471
472 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
473 if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
474 presub_replace(inst_add, reader.Inst, src_index);
475 }
476 }
477 return 1;
478 }
479
480 /* This function assumes that inst_add->U.I.SrcReg[0] and
481 * inst_add->U.I.SrcReg[1] aren't both negative. */
482 static void presub_replace_add(
483 struct rc_instruction * inst_add,
484 struct rc_instruction * inst_reader,
485 unsigned int src_index)
486 {
487 rc_presubtract_op presub_opcode;
488 if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
489 presub_opcode = RC_PRESUB_SUB;
490 else
491 presub_opcode = RC_PRESUB_ADD;
492
493 if (inst_add->U.I.SrcReg[1].Negate) {
494 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
495 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
496 } else {
497 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
498 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
499 }
500 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
501 inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
502 inst_reader->U.I.PreSub.Opcode = presub_opcode;
503 inst_reader->U.I.SrcReg[src_index] =
504 chain_srcregs(inst_reader->U.I.SrcReg[src_index],
505 inst_reader->U.I.PreSub.SrcReg[0]);
506 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
507 inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
508 }
509
510 static int is_presub_candidate(
511 struct radeon_compiler * c,
512 struct rc_instruction * inst)
513 {
514 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
515 unsigned int i;
516 unsigned int is_constant[2] = {0, 0};
517
518 assert(inst->U.I.Opcode == RC_OPCODE_ADD);
519
520 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode)
521 return 0;
522
523 /* If both sources use a constant swizzle, then we can't convert it to
524 * a presubtract operation. In fact for the ADD and SUB presubtract
525 * operations neither source can contain a constant swizzle. This
526 * specific case is checked in peephole_add_presub_add() when
527 * we make sure the swizzles for both sources are equal, so we
528 * don't need to worry about it here. */
529 for (i = 0; i < 2; i++) {
530 int chan;
531 for (chan = 0; chan < 4; chan++) {
532 rc_swizzle swz =
533 get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
534 if (swz == RC_SWIZZLE_ONE
535 || swz == RC_SWIZZLE_ZERO
536 || swz == RC_SWIZZLE_HALF) {
537 is_constant[i] = 1;
538 }
539 }
540 }
541 if (is_constant[0] && is_constant[1])
542 return 0;
543
544 for(i = 0; i < info->NumSrcRegs; i++) {
545 struct rc_src_register src = inst->U.I.SrcReg[i];
546 if (src_reads_dst_mask(src, inst->U.I.DstReg))
547 return 0;
548
549 src.File = RC_FILE_PRESUB;
550 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
551 return 0;
552 }
553 return 1;
554 }
555
556 static int peephole_add_presub_add(
557 struct radeon_compiler * c,
558 struct rc_instruction * inst_add)
559 {
560 struct rc_src_register * src0 = NULL;
561 struct rc_src_register * src1 = NULL;
562 unsigned int i;
563
564 if (!is_presub_candidate(c, inst_add))
565 return 0;
566
567 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
568 return 0;
569
570 /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
571 for (i = 0; i < 2; i++) {
572 if (inst_add->U.I.SrcReg[i].Abs)
573 return 0;
574 if ((inst_add->U.I.SrcReg[i].Negate
575 & inst_add->U.I.DstReg.WriteMask) ==
576 inst_add->U.I.DstReg.WriteMask) {
577 src0 = &inst_add->U.I.SrcReg[i];
578 } else if (!src1) {
579 src1 = &inst_add->U.I.SrcReg[i];
580 } else {
581 src0 = &inst_add->U.I.SrcReg[i];
582 }
583 }
584
585 if (!src1)
586 return 0;
587
588 if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
589 rc_remove_instruction(inst_add);
590 return 1;
591 }
592 return 0;
593 }
594
595 static void presub_replace_inv(
596 struct rc_instruction * inst_add,
597 struct rc_instruction * inst_reader,
598 unsigned int src_index)
599 {
600 /* We must be careful not to modify inst_add, since it
601 * is possible it will remain part of the program.*/
602 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
603 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
604 inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
605 inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
606 inst_reader->U.I.PreSub.SrcReg[0]);
607
608 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
609 inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
610 }
611
612 /**
613 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
614 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
615 * of the add instruction must have the constatnt 1 swizzle. This function
616 * does not check const registers to see if their value is 1.0, so it should
617 * be called after the constant_folding optimization.
618 * @return
619 * 0 if the ADD instruction is still part of the program.
620 * 1 if the ADD instruction is no longer part of the program.
621 */
622 static int peephole_add_presub_inv(
623 struct radeon_compiler * c,
624 struct rc_instruction * inst_add)
625 {
626 unsigned int i, swz;
627
628 if (!is_presub_candidate(c, inst_add))
629 return 0;
630
631 /* Check if src0 is 1. */
632 /* XXX It would be nice to use is_src_uniform_constant here, but that
633 * function only works if the register's file is RC_FILE_NONE */
634 for(i = 0; i < 4; i++ ) {
635 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
636 if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
637 && swz != RC_SWIZZLE_ONE) {
638 return 0;
639 }
640 }
641
642 /* Check src1. */
643 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
644 inst_add->U.I.DstReg.WriteMask
645 || inst_add->U.I.SrcReg[1].Abs
646 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
647 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
648 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
649
650 return 0;
651 }
652
653 if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
654 rc_remove_instruction(inst_add);
655 return 1;
656 }
657 return 0;
658 }
659
660 /**
661 * @return
662 * 0 if inst is still part of the program.
663 * 1 if inst is no longer part of the program.
664 */
665 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
666 {
667 switch(inst->U.I.Opcode){
668 case RC_OPCODE_ADD:
669 if (c->has_presub) {
670 if(peephole_add_presub_inv(c, inst))
671 return 1;
672 if(peephole_add_presub_add(c, inst))
673 return 1;
674 }
675 break;
676 default:
677 break;
678 }
679 return 0;
680 }
681
682 void rc_optimize(struct radeon_compiler * c, void *user)
683 {
684 struct rc_instruction * inst = c->Program.Instructions.Next;
685 while(inst != &c->Program.Instructions) {
686 struct rc_instruction * cur = inst;
687 inst = inst->Next;
688
689 constant_folding(c, cur);
690
691 if(peephole(c, cur))
692 continue;
693
694 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
695 copy_propagate(c, cur);
696 /* cur may no longer be part of the program */
697 }
698 }
699 }