r300g: copy the compiler from r300c
[mesa.git] / src / gallium / drivers / r300 / compiler / radeon_optimize.c
1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_dataflow.h"
30
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_swizzle.h"
34
35 struct src_clobbered_reads_cb_data {
36 rc_register_file File;
37 unsigned int Index;
38 unsigned int Mask;
39 struct rc_reader_data * ReaderData;
40 };
41
42 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
43 struct rc_instruction *,
44 unsigned int);
45
46 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
47 {
48 struct rc_src_register combine;
49 combine.File = inner.File;
50 combine.Index = inner.Index;
51 combine.RelAddr = inner.RelAddr;
52 if (outer.Abs) {
53 combine.Abs = 1;
54 combine.Negate = outer.Negate;
55 } else {
56 combine.Abs = inner.Abs;
57 combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
58 combine.Negate ^= outer.Negate;
59 }
60 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
61 return combine;
62 }
63
64 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
65 struct rc_src_register * src)
66 {
67 rc_register_file file = src->File;
68 struct rc_reader_data * reader_data = data;
69
70 if(!rc_inst_can_use_presub(inst,
71 reader_data->Writer->U.I.PreSub.Opcode,
72 rc_swizzle_to_writemask(src->Swizzle),
73 src,
74 &reader_data->Writer->U.I.PreSub.SrcReg[0],
75 &reader_data->Writer->U.I.PreSub.SrcReg[1])) {
76 reader_data->Abort = 1;
77 return;
78 }
79
80 /* XXX This could probably be handled better. */
81 if (file == RC_FILE_ADDRESS) {
82 reader_data->Abort = 1;
83 return;
84 }
85
86 /* These instructions cannot read from the constants file.
87 * see radeonTransformTEX()
88 */
89 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
90 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
91 (inst->U.I.Opcode == RC_OPCODE_TEX ||
92 inst->U.I.Opcode == RC_OPCODE_TXB ||
93 inst->U.I.Opcode == RC_OPCODE_TXP ||
94 inst->U.I.Opcode == RC_OPCODE_TXD ||
95 inst->U.I.Opcode == RC_OPCODE_TXL ||
96 inst->U.I.Opcode == RC_OPCODE_KIL)){
97 reader_data->Abort = 1;
98 return;
99 }
100 }
101
102 static void src_clobbered_reads_cb(
103 void * data,
104 struct rc_instruction * inst,
105 struct rc_src_register * src)
106 {
107 struct src_clobbered_reads_cb_data * sc_data = data;
108
109 if (src->File == sc_data->File
110 && src->Index == sc_data->Index
111 && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
112
113 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
114 }
115
116 if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
117 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
118 }
119 }
120
121 static void is_src_clobbered_scan_write(
122 void * data,
123 struct rc_instruction * inst,
124 rc_register_file file,
125 unsigned int index,
126 unsigned int mask)
127 {
128 struct src_clobbered_reads_cb_data sc_data;
129 struct rc_reader_data * reader_data = data;
130 sc_data.File = file;
131 sc_data.Index = index;
132 sc_data.Mask = mask;
133 sc_data.ReaderData = reader_data;
134 rc_for_all_reads_src(reader_data->Writer,
135 src_clobbered_reads_cb, &sc_data);
136 }
137
138 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
139 {
140 struct rc_reader_data reader_data;
141 unsigned int i;
142
143 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
144 inst_mov->U.I.WriteALUResult ||
145 inst_mov->U.I.SaturateMode)
146 return;
147
148 /* Get a list of all the readers of this MOV instruction. */
149 reader_data.ExitOnAbort = 1;
150 rc_get_readers(c, inst_mov, &reader_data,
151 copy_propagate_scan_read, NULL,
152 is_src_clobbered_scan_write);
153
154 if (reader_data.Abort || reader_data.ReaderCount == 0)
155 return;
156
157 /* Propagate the MOV instruction. */
158 for (i = 0; i < reader_data.ReaderCount; i++) {
159 struct rc_instruction * inst = reader_data.Readers[i].Inst;
160 *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
161
162 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
163 inst->U.I.PreSub = inst_mov->U.I.PreSub;
164 }
165
166 /* Finally, remove the original MOV instruction */
167 rc_remove_instruction(inst_mov);
168 }
169
170 /**
171 * Check if a source register is actually always the same
172 * swizzle constant.
173 */
174 static int is_src_uniform_constant(struct rc_src_register src,
175 rc_swizzle * pswz, unsigned int * pnegate)
176 {
177 int have_used = 0;
178
179 if (src.File != RC_FILE_NONE) {
180 *pswz = 0;
181 return 0;
182 }
183
184 for(unsigned int chan = 0; chan < 4; ++chan) {
185 unsigned int swz = GET_SWZ(src.Swizzle, chan);
186 if (swz < 4) {
187 *pswz = 0;
188 return 0;
189 }
190 if (swz == RC_SWIZZLE_UNUSED)
191 continue;
192
193 if (!have_used) {
194 *pswz = swz;
195 *pnegate = GET_BIT(src.Negate, chan);
196 have_used = 1;
197 } else {
198 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
199 *pswz = 0;
200 return 0;
201 }
202 }
203 }
204
205 return 1;
206 }
207
208 static void constant_folding_mad(struct rc_instruction * inst)
209 {
210 rc_swizzle swz = 0;
211 unsigned int negate= 0;
212
213 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
214 if (swz == RC_SWIZZLE_ZERO) {
215 inst->U.I.Opcode = RC_OPCODE_MUL;
216 return;
217 }
218 }
219
220 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
221 if (swz == RC_SWIZZLE_ONE) {
222 inst->U.I.Opcode = RC_OPCODE_ADD;
223 if (negate)
224 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
225 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
226 return;
227 } else if (swz == RC_SWIZZLE_ZERO) {
228 inst->U.I.Opcode = RC_OPCODE_MOV;
229 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
230 return;
231 }
232 }
233
234 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
235 if (swz == RC_SWIZZLE_ONE) {
236 inst->U.I.Opcode = RC_OPCODE_ADD;
237 if (negate)
238 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
239 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
240 return;
241 } else if (swz == RC_SWIZZLE_ZERO) {
242 inst->U.I.Opcode = RC_OPCODE_MOV;
243 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
244 return;
245 }
246 }
247 }
248
249 static void constant_folding_mul(struct rc_instruction * inst)
250 {
251 rc_swizzle swz = 0;
252 unsigned int negate = 0;
253
254 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
255 if (swz == RC_SWIZZLE_ONE) {
256 inst->U.I.Opcode = RC_OPCODE_MOV;
257 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
258 if (negate)
259 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
260 return;
261 } else if (swz == RC_SWIZZLE_ZERO) {
262 inst->U.I.Opcode = RC_OPCODE_MOV;
263 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
264 return;
265 }
266 }
267
268 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
269 if (swz == RC_SWIZZLE_ONE) {
270 inst->U.I.Opcode = RC_OPCODE_MOV;
271 if (negate)
272 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
273 return;
274 } else if (swz == RC_SWIZZLE_ZERO) {
275 inst->U.I.Opcode = RC_OPCODE_MOV;
276 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
277 return;
278 }
279 }
280 }
281
282 static void constant_folding_add(struct rc_instruction * inst)
283 {
284 rc_swizzle swz = 0;
285 unsigned int negate = 0;
286
287 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
288 if (swz == RC_SWIZZLE_ZERO) {
289 inst->U.I.Opcode = RC_OPCODE_MOV;
290 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
291 return;
292 }
293 }
294
295 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
296 if (swz == RC_SWIZZLE_ZERO) {
297 inst->U.I.Opcode = RC_OPCODE_MOV;
298 return;
299 }
300 }
301 }
302
303 /**
304 * Replace 0.0, 1.0 and 0.5 immediate constants by their
305 * respective swizzles. Simplify instructions like ADD dst, src, 0;
306 */
307 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
308 {
309 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
310 unsigned int i;
311
312 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
313 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
314 struct rc_constant * constant;
315 struct rc_src_register newsrc;
316 int have_real_reference;
317 unsigned int chan;
318
319 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
320 for (chan = 0; chan < 4; ++chan)
321 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
322 break;
323 if (chan == 4) {
324 inst->U.I.SrcReg[src].File = RC_FILE_NONE;
325 continue;
326 }
327
328 /* Convert immediates to swizzles. */
329 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
330 inst->U.I.SrcReg[src].RelAddr ||
331 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
332 continue;
333
334 constant =
335 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
336
337 if (constant->Type != RC_CONSTANT_IMMEDIATE)
338 continue;
339
340 newsrc = inst->U.I.SrcReg[src];
341 have_real_reference = 0;
342 for (chan = 0; chan < 4; ++chan) {
343 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
344 unsigned int newswz;
345 float imm;
346 float baseimm;
347
348 if (swz >= 4)
349 continue;
350
351 imm = constant->u.Immediate[swz];
352 baseimm = imm;
353 if (imm < 0.0)
354 baseimm = -baseimm;
355
356 if (baseimm == 0.0) {
357 newswz = RC_SWIZZLE_ZERO;
358 } else if (baseimm == 1.0) {
359 newswz = RC_SWIZZLE_ONE;
360 } else if (baseimm == 0.5 && c->has_half_swizzles) {
361 newswz = RC_SWIZZLE_HALF;
362 } else {
363 have_real_reference = 1;
364 continue;
365 }
366
367 SET_SWZ(newsrc.Swizzle, chan, newswz);
368 if (imm < 0.0 && !newsrc.Abs)
369 newsrc.Negate ^= 1 << chan;
370 }
371
372 if (!have_real_reference) {
373 newsrc.File = RC_FILE_NONE;
374 newsrc.Index = 0;
375 }
376
377 /* don't make the swizzle worse */
378 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
379 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
380 continue;
381
382 inst->U.I.SrcReg[src] = newsrc;
383 }
384
385 /* Simplify instructions based on constants */
386 if (inst->U.I.Opcode == RC_OPCODE_MAD)
387 constant_folding_mad(inst);
388
389 /* note: MAD can simplify to MUL or ADD */
390 if (inst->U.I.Opcode == RC_OPCODE_MUL)
391 constant_folding_mul(inst);
392 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
393 constant_folding_add(inst);
394
395 /* In case this instruction has been converted, make sure all of the
396 * registers that are no longer used are empty. */
397 opcode = rc_get_opcode_info(inst->U.I.Opcode);
398 for(i = opcode->NumSrcRegs; i < 3; i++) {
399 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
400 }
401 }
402
403 /**
404 * If src and dst use the same register, this function returns a writemask that
405 * indicates wich components are read by src. Otherwise zero is returned.
406 */
407 static unsigned int src_reads_dst_mask(struct rc_src_register src,
408 struct rc_dst_register dst)
409 {
410 if (dst.File != src.File || dst.Index != src.Index) {
411 return 0;
412 }
413 return rc_swizzle_to_writemask(src.Swizzle);
414 }
415
416 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
417 * in any of its channels. Return 0 otherwise. */
418 static int src_has_const_swz(struct rc_src_register src) {
419 int chan;
420 for(chan = 0; chan < 4; chan++) {
421 unsigned int swz = GET_SWZ(src.Swizzle, chan);
422 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
423 || swz == RC_SWIZZLE_ONE) {
424 return 1;
425 }
426 }
427 return 0;
428 }
429
430 static void presub_scan_read(
431 void * data,
432 struct rc_instruction * inst,
433 struct rc_src_register * src)
434 {
435 struct rc_reader_data * reader_data = data;
436 rc_presubtract_op * presub_opcode = reader_data->CbData;
437
438 if (!rc_inst_can_use_presub(inst, *presub_opcode,
439 reader_data->Writer->U.I.DstReg.WriteMask,
440 src,
441 &reader_data->Writer->U.I.SrcReg[0],
442 &reader_data->Writer->U.I.SrcReg[1])) {
443 reader_data->Abort = 1;
444 return;
445 }
446 }
447
448 static int presub_helper(
449 struct radeon_compiler * c,
450 struct rc_instruction * inst_add,
451 rc_presubtract_op presub_opcode,
452 rc_presub_replace_fn presub_replace)
453 {
454 struct rc_reader_data reader_data;
455 unsigned int i;
456 rc_presubtract_op cb_op = presub_opcode;
457
458 reader_data.CbData = &cb_op;
459 reader_data.ExitOnAbort = 1;
460 rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
461 is_src_clobbered_scan_write);
462
463 if (reader_data.Abort || reader_data.ReaderCount == 0)
464 return 0;
465
466 for(i = 0; i < reader_data.ReaderCount; i++) {
467 unsigned int src_index;
468 struct rc_reader reader = reader_data.Readers[i];
469 const struct rc_opcode_info * info =
470 rc_get_opcode_info(reader.Inst->U.I.Opcode);
471
472 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
473 if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
474 presub_replace(inst_add, reader.Inst, src_index);
475 }
476 }
477 return 1;
478 }
479
480 /* This function assumes that inst_add->U.I.SrcReg[0] and
481 * inst_add->U.I.SrcReg[1] aren't both negative. */
482 static void presub_replace_add(
483 struct rc_instruction * inst_add,
484 struct rc_instruction * inst_reader,
485 unsigned int src_index)
486 {
487 rc_presubtract_op presub_opcode;
488 if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
489 presub_opcode = RC_PRESUB_SUB;
490 else
491 presub_opcode = RC_PRESUB_ADD;
492
493 if (inst_add->U.I.SrcReg[1].Negate) {
494 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
495 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
496 } else {
497 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
498 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
499 }
500 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
501 inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
502 inst_reader->U.I.PreSub.Opcode = presub_opcode;
503 inst_reader->U.I.SrcReg[src_index] =
504 chain_srcregs(inst_reader->U.I.SrcReg[src_index],
505 inst_reader->U.I.PreSub.SrcReg[0]);
506 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
507 inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
508 }
509
510 static int is_presub_candidate(
511 struct radeon_compiler * c,
512 struct rc_instruction * inst)
513 {
514 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
515 unsigned int i;
516 unsigned int is_constant[2] = {0, 0};
517
518 assert(inst->U.I.Opcode == RC_OPCODE_ADD);
519
520 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
521 || inst->U.I.SaturateMode
522 || inst->U.I.WriteALUResult) {
523 return 0;
524 }
525
526 /* If both sources use a constant swizzle, then we can't convert it to
527 * a presubtract operation. In fact for the ADD and SUB presubtract
528 * operations neither source can contain a constant swizzle. This
529 * specific case is checked in peephole_add_presub_add() when
530 * we make sure the swizzles for both sources are equal, so we
531 * don't need to worry about it here. */
532 for (i = 0; i < 2; i++) {
533 int chan;
534 for (chan = 0; chan < 4; chan++) {
535 rc_swizzle swz =
536 get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
537 if (swz == RC_SWIZZLE_ONE
538 || swz == RC_SWIZZLE_ZERO
539 || swz == RC_SWIZZLE_HALF) {
540 is_constant[i] = 1;
541 }
542 }
543 }
544 if (is_constant[0] && is_constant[1])
545 return 0;
546
547 for(i = 0; i < info->NumSrcRegs; i++) {
548 struct rc_src_register src = inst->U.I.SrcReg[i];
549 if (src_reads_dst_mask(src, inst->U.I.DstReg))
550 return 0;
551
552 src.File = RC_FILE_PRESUB;
553 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
554 return 0;
555 }
556 return 1;
557 }
558
559 static int peephole_add_presub_add(
560 struct radeon_compiler * c,
561 struct rc_instruction * inst_add)
562 {
563 unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
564 unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
565 unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
566
567 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
568 return 0;
569
570 /* src0 and src1 can't have absolute values */
571 if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
572 return 0;
573
574 /* presub_replace_add() assumes only one is negative */
575 if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
576 return 0;
577
578 /* if src0 is negative, at least all bits of dstmask have to be set */
579 if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
580 return 0;
581
582 /* if src1 is negative, at least all bits of dstmask have to be set */
583 if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
584 return 0;
585
586 if (!is_presub_candidate(c, inst_add))
587 return 0;
588
589 if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
590 rc_remove_instruction(inst_add);
591 return 1;
592 }
593 return 0;
594 }
595
596 static void presub_replace_inv(
597 struct rc_instruction * inst_add,
598 struct rc_instruction * inst_reader,
599 unsigned int src_index)
600 {
601 /* We must be careful not to modify inst_add, since it
602 * is possible it will remain part of the program.*/
603 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
604 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
605 inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
606 inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
607 inst_reader->U.I.PreSub.SrcReg[0]);
608
609 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
610 inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
611 }
612
613 /**
614 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
615 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
616 * of the add instruction must have the constatnt 1 swizzle. This function
617 * does not check const registers to see if their value is 1.0, so it should
618 * be called after the constant_folding optimization.
619 * @return
620 * 0 if the ADD instruction is still part of the program.
621 * 1 if the ADD instruction is no longer part of the program.
622 */
623 static int peephole_add_presub_inv(
624 struct radeon_compiler * c,
625 struct rc_instruction * inst_add)
626 {
627 unsigned int i, swz;
628
629 if (!is_presub_candidate(c, inst_add))
630 return 0;
631
632 /* Check if src0 is 1. */
633 /* XXX It would be nice to use is_src_uniform_constant here, but that
634 * function only works if the register's file is RC_FILE_NONE */
635 for(i = 0; i < 4; i++ ) {
636 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
637 if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
638 && swz != RC_SWIZZLE_ONE) {
639 return 0;
640 }
641 }
642
643 /* Check src1. */
644 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
645 inst_add->U.I.DstReg.WriteMask
646 || inst_add->U.I.SrcReg[1].Abs
647 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
648 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
649 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
650
651 return 0;
652 }
653
654 if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
655 rc_remove_instruction(inst_add);
656 return 1;
657 }
658 return 0;
659 }
660
661 /**
662 * @return
663 * 0 if inst is still part of the program.
664 * 1 if inst is no longer part of the program.
665 */
666 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
667 {
668 switch(inst->U.I.Opcode){
669 case RC_OPCODE_ADD:
670 if (c->has_presub) {
671 if(peephole_add_presub_inv(c, inst))
672 return 1;
673 if(peephole_add_presub_add(c, inst))
674 return 1;
675 }
676 break;
677 default:
678 break;
679 }
680 return 0;
681 }
682
683 void rc_optimize(struct radeon_compiler * c, void *user)
684 {
685 struct rc_instruction * inst = c->Program.Instructions.Next;
686 while(inst != &c->Program.Instructions) {
687 struct rc_instruction * cur = inst;
688 inst = inst->Next;
689
690 constant_folding(c, cur);
691
692 if(peephole(c, cur))
693 continue;
694
695 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
696 copy_propagate(c, cur);
697 /* cur may no longer be part of the program */
698 }
699 }
700 }