2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "radeon_dataflow.h"
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_swizzle.h"
35 struct src_clobbered_reads_cb_data
{
36 rc_register_file File
;
39 struct rc_reader_data
* ReaderData
;
42 typedef void (*rc_presub_replace_fn
)(struct rc_instruction
*,
43 struct rc_instruction
*,
46 static struct rc_src_register
chain_srcregs(struct rc_src_register outer
, struct rc_src_register inner
)
48 struct rc_src_register combine
;
49 combine
.File
= inner
.File
;
50 combine
.Index
= inner
.Index
;
51 combine
.RelAddr
= inner
.RelAddr
;
54 combine
.Negate
= outer
.Negate
;
56 combine
.Abs
= inner
.Abs
;
57 combine
.Negate
= swizzle_mask(outer
.Swizzle
, inner
.Negate
);
58 combine
.Negate
^= outer
.Negate
;
60 combine
.Swizzle
= combine_swizzles(inner
.Swizzle
, outer
.Swizzle
);
64 static void copy_propagate_scan_read(void * data
, struct rc_instruction
* inst
,
65 struct rc_src_register
* src
)
67 rc_register_file file
= src
->File
;
68 struct rc_reader_data
* reader_data
= data
;
70 if(!rc_inst_can_use_presub(inst
,
71 reader_data
->Writer
->U
.I
.PreSub
.Opcode
,
72 rc_swizzle_to_writemask(src
->Swizzle
),
74 reader_data
->Writer
->U
.I
.PreSub
.SrcReg
[0],
75 reader_data
->Writer
->U
.I
.PreSub
.SrcReg
[1])) {
76 reader_data
->Abort
= 1;
80 /* XXX This could probably be handled better. */
81 if (file
== RC_FILE_ADDRESS
) {
82 reader_data
->Abort
= 1;
86 /* These instructions cannot read from the constants file.
87 * see radeonTransformTEX()
89 if(reader_data
->Writer
->U
.I
.SrcReg
[0].File
!= RC_FILE_TEMPORARY
&&
90 reader_data
->Writer
->U
.I
.SrcReg
[0].File
!= RC_FILE_INPUT
&&
91 (inst
->U
.I
.Opcode
== RC_OPCODE_TEX
||
92 inst
->U
.I
.Opcode
== RC_OPCODE_TXB
||
93 inst
->U
.I
.Opcode
== RC_OPCODE_TXP
||
94 inst
->U
.I
.Opcode
== RC_OPCODE_KIL
)){
95 reader_data
->Abort
= 1;
100 static void src_clobbered_reads_cb(
102 struct rc_instruction
* inst
,
103 struct rc_src_register
* src
)
105 struct src_clobbered_reads_cb_data
* sc_data
= data
;
107 if (src
->File
== sc_data
->File
108 && src
->Index
== sc_data
->Index
109 && (rc_swizzle_to_writemask(src
->Swizzle
) & sc_data
->Mask
)) {
111 sc_data
->ReaderData
->AbortOnRead
= RC_MASK_XYZW
;
114 if (src
->RelAddr
&& sc_data
->File
== RC_FILE_ADDRESS
) {
115 sc_data
->ReaderData
->AbortOnRead
= RC_MASK_XYZW
;
119 static void is_src_clobbered_scan_write(
121 struct rc_instruction
* inst
,
122 rc_register_file file
,
126 struct src_clobbered_reads_cb_data sc_data
;
127 struct rc_reader_data
* reader_data
= data
;
129 sc_data
.Index
= index
;
131 sc_data
.ReaderData
= reader_data
;
132 rc_for_all_reads_src(reader_data
->Writer
,
133 src_clobbered_reads_cb
, &sc_data
);
136 static void copy_propagate(struct radeon_compiler
* c
, struct rc_instruction
* inst_mov
)
138 struct rc_reader_data reader_data
;
141 if (inst_mov
->U
.I
.DstReg
.File
!= RC_FILE_TEMPORARY
||
142 inst_mov
->U
.I
.WriteALUResult
||
143 inst_mov
->U
.I
.SaturateMode
)
146 /* Get a list of all the readers of this MOV instruction. */
147 rc_get_readers(c
, inst_mov
, &reader_data
,
148 copy_propagate_scan_read
, NULL
,
149 is_src_clobbered_scan_write
);
151 if (reader_data
.Abort
|| reader_data
.ReaderCount
== 0)
154 /* Propagate the MOV instruction. */
155 for (i
= 0; i
< reader_data
.ReaderCount
; i
++) {
156 struct rc_instruction
* inst
= reader_data
.Readers
[i
].Inst
;
157 *reader_data
.Readers
[i
].U
.Src
= chain_srcregs(*reader_data
.Readers
[i
].U
.Src
, inst_mov
->U
.I
.SrcReg
[0]);
159 if (inst_mov
->U
.I
.SrcReg
[0].File
== RC_FILE_PRESUB
)
160 inst
->U
.I
.PreSub
= inst_mov
->U
.I
.PreSub
;
163 /* Finally, remove the original MOV instruction */
164 rc_remove_instruction(inst_mov
);
168 * Check if a source register is actually always the same
171 static int is_src_uniform_constant(struct rc_src_register src
,
172 rc_swizzle
* pswz
, unsigned int * pnegate
)
176 if (src
.File
!= RC_FILE_NONE
) {
181 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
182 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
187 if (swz
== RC_SWIZZLE_UNUSED
)
192 *pnegate
= GET_BIT(src
.Negate
, chan
);
195 if (swz
!= *pswz
|| *pnegate
!= GET_BIT(src
.Negate
, chan
)) {
205 static void constant_folding_mad(struct rc_instruction
* inst
)
208 unsigned int negate
= 0;
210 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[2], &swz
, &negate
)) {
211 if (swz
== RC_SWIZZLE_ZERO
) {
212 inst
->U
.I
.Opcode
= RC_OPCODE_MUL
;
217 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
218 if (swz
== RC_SWIZZLE_ONE
) {
219 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
221 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
222 inst
->U
.I
.SrcReg
[1] = inst
->U
.I
.SrcReg
[2];
224 } else if (swz
== RC_SWIZZLE_ZERO
) {
225 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
226 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
231 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
232 if (swz
== RC_SWIZZLE_ONE
) {
233 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
235 inst
->U
.I
.SrcReg
[1].Negate
^= RC_MASK_XYZW
;
236 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
238 } else if (swz
== RC_SWIZZLE_ZERO
) {
239 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
240 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
246 static void constant_folding_mul(struct rc_instruction
* inst
)
249 unsigned int negate
= 0;
251 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
252 if (swz
== RC_SWIZZLE_ONE
) {
253 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
254 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
256 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
258 } else if (swz
== RC_SWIZZLE_ZERO
) {
259 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
260 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
265 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
266 if (swz
== RC_SWIZZLE_ONE
) {
267 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
269 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
271 } else if (swz
== RC_SWIZZLE_ZERO
) {
272 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
273 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
279 static void constant_folding_add(struct rc_instruction
* inst
)
282 unsigned int negate
= 0;
284 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
285 if (swz
== RC_SWIZZLE_ZERO
) {
286 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
287 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
292 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
293 if (swz
== RC_SWIZZLE_ZERO
) {
294 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
301 * Replace 0.0, 1.0 and 0.5 immediate constants by their
302 * respective swizzles. Simplify instructions like ADD dst, src, 0;
304 static void constant_folding(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
306 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
309 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
310 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
311 struct rc_constant
* constant
;
312 struct rc_src_register newsrc
;
313 int have_real_reference
;
316 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
317 for (chan
= 0; chan
< 4; ++chan
)
318 if (GET_SWZ(inst
->U
.I
.SrcReg
[src
].Swizzle
, chan
) <= 3)
321 inst
->U
.I
.SrcReg
[src
].File
= RC_FILE_NONE
;
325 /* Convert immediates to swizzles. */
326 if (inst
->U
.I
.SrcReg
[src
].File
!= RC_FILE_CONSTANT
||
327 inst
->U
.I
.SrcReg
[src
].RelAddr
||
328 inst
->U
.I
.SrcReg
[src
].Index
>= c
->Program
.Constants
.Count
)
332 &c
->Program
.Constants
.Constants
[inst
->U
.I
.SrcReg
[src
].Index
];
334 if (constant
->Type
!= RC_CONSTANT_IMMEDIATE
)
337 newsrc
= inst
->U
.I
.SrcReg
[src
];
338 have_real_reference
= 0;
339 for (chan
= 0; chan
< 4; ++chan
) {
340 unsigned int swz
= GET_SWZ(newsrc
.Swizzle
, chan
);
348 imm
= constant
->u
.Immediate
[swz
];
353 if (baseimm
== 0.0) {
354 newswz
= RC_SWIZZLE_ZERO
;
355 } else if (baseimm
== 1.0) {
356 newswz
= RC_SWIZZLE_ONE
;
357 } else if (baseimm
== 0.5 && c
->has_half_swizzles
) {
358 newswz
= RC_SWIZZLE_HALF
;
360 have_real_reference
= 1;
364 SET_SWZ(newsrc
.Swizzle
, chan
, newswz
);
365 if (imm
< 0.0 && !newsrc
.Abs
)
366 newsrc
.Negate
^= 1 << chan
;
369 if (!have_real_reference
) {
370 newsrc
.File
= RC_FILE_NONE
;
374 /* don't make the swizzle worse */
375 if (!c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, newsrc
) &&
376 c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, inst
->U
.I
.SrcReg
[src
]))
379 inst
->U
.I
.SrcReg
[src
] = newsrc
;
382 /* Simplify instructions based on constants */
383 if (inst
->U
.I
.Opcode
== RC_OPCODE_MAD
)
384 constant_folding_mad(inst
);
386 /* note: MAD can simplify to MUL or ADD */
387 if (inst
->U
.I
.Opcode
== RC_OPCODE_MUL
)
388 constant_folding_mul(inst
);
389 else if (inst
->U
.I
.Opcode
== RC_OPCODE_ADD
)
390 constant_folding_add(inst
);
392 /* In case this instruction has been converted, make sure all of the
393 * registers that are no longer used are empty. */
394 opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
395 for(i
= opcode
->NumSrcRegs
; i
< 3; i
++) {
396 memset(&inst
->U
.I
.SrcReg
[i
], 0, sizeof(struct rc_src_register
));
401 * If src and dst use the same register, this function returns a writemask that
402 * indicates wich components are read by src. Otherwise zero is returned.
404 static unsigned int src_reads_dst_mask(struct rc_src_register src
,
405 struct rc_dst_register dst
)
407 if (dst
.File
!= src
.File
|| dst
.Index
!= src
.Index
) {
410 return rc_swizzle_to_writemask(src
.Swizzle
);
413 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
414 * in any of its channels. Return 0 otherwise. */
415 static int src_has_const_swz(struct rc_src_register src
) {
417 for(chan
= 0; chan
< 4; chan
++) {
418 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
419 if (swz
== RC_SWIZZLE_ZERO
|| swz
== RC_SWIZZLE_HALF
420 || swz
== RC_SWIZZLE_ONE
) {
427 static void presub_scan_read(
429 struct rc_instruction
* inst
,
430 struct rc_src_register
* src
)
432 struct rc_reader_data
* reader_data
= data
;
433 rc_presubtract_op
* presub_opcode
= reader_data
->CbData
;
435 if (!rc_inst_can_use_presub(inst
, *presub_opcode
,
436 reader_data
->Writer
->U
.I
.DstReg
.WriteMask
,
438 reader_data
->Writer
->U
.I
.SrcReg
[0],
439 reader_data
->Writer
->U
.I
.SrcReg
[1])) {
440 reader_data
->Abort
= 1;
445 static int presub_helper(
446 struct radeon_compiler
* c
,
447 struct rc_instruction
* inst_add
,
448 rc_presubtract_op presub_opcode
,
449 rc_presub_replace_fn presub_replace
)
451 struct rc_reader_data reader_data
;
453 rc_presubtract_op cb_op
= presub_opcode
;
455 reader_data
.CbData
= &cb_op
;
456 rc_get_readers(c
, inst_add
, &reader_data
, presub_scan_read
, NULL
,
457 is_src_clobbered_scan_write
);
459 if (reader_data
.Abort
|| reader_data
.ReaderCount
== 0)
462 for(i
= 0; i
< reader_data
.ReaderCount
; i
++) {
463 unsigned int src_index
;
464 struct rc_reader reader
= reader_data
.Readers
[i
];
465 const struct rc_opcode_info
* info
=
466 rc_get_opcode_info(reader
.Inst
->U
.I
.Opcode
);
468 for (src_index
= 0; src_index
< info
->NumSrcRegs
; src_index
++) {
469 if (&reader
.Inst
->U
.I
.SrcReg
[src_index
] == reader
.U
.Src
)
470 presub_replace(inst_add
, reader
.Inst
, src_index
);
476 /* This function assumes that inst_add->U.I.SrcReg[0] and
477 * inst_add->U.I.SrcReg[1] aren't both negative. */
478 static void presub_replace_add(
479 struct rc_instruction
* inst_add
,
480 struct rc_instruction
* inst_reader
,
481 unsigned int src_index
)
483 rc_presubtract_op presub_opcode
;
484 if (inst_add
->U
.I
.SrcReg
[1].Negate
|| inst_add
->U
.I
.SrcReg
[0].Negate
)
485 presub_opcode
= RC_PRESUB_SUB
;
487 presub_opcode
= RC_PRESUB_ADD
;
489 if (inst_add
->U
.I
.SrcReg
[1].Negate
) {
490 inst_reader
->U
.I
.PreSub
.SrcReg
[0] = inst_add
->U
.I
.SrcReg
[1];
491 inst_reader
->U
.I
.PreSub
.SrcReg
[1] = inst_add
->U
.I
.SrcReg
[0];
493 inst_reader
->U
.I
.PreSub
.SrcReg
[0] = inst_add
->U
.I
.SrcReg
[0];
494 inst_reader
->U
.I
.PreSub
.SrcReg
[1] = inst_add
->U
.I
.SrcReg
[1];
496 inst_reader
->U
.I
.PreSub
.SrcReg
[0].Negate
= 0;
497 inst_reader
->U
.I
.PreSub
.SrcReg
[1].Negate
= 0;
498 inst_reader
->U
.I
.PreSub
.Opcode
= presub_opcode
;
499 inst_reader
->U
.I
.SrcReg
[src_index
] =
500 chain_srcregs(inst_reader
->U
.I
.SrcReg
[src_index
],
501 inst_reader
->U
.I
.PreSub
.SrcReg
[0]);
502 inst_reader
->U
.I
.SrcReg
[src_index
].File
= RC_FILE_PRESUB
;
503 inst_reader
->U
.I
.SrcReg
[src_index
].Index
= presub_opcode
;
506 static int is_presub_candidate(
507 struct radeon_compiler
* c
,
508 struct rc_instruction
* inst
)
510 const struct rc_opcode_info
* info
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
512 unsigned int is_constant
[2] = {0, 0};
514 assert(inst
->U
.I
.Opcode
== RC_OPCODE_ADD
);
516 if (inst
->U
.I
.PreSub
.Opcode
!= RC_PRESUB_NONE
|| inst
->U
.I
.SaturateMode
)
519 /* If both sources use a constant swizzle, then we can't convert it to
520 * a presubtract operation. In fact for the ADD and SUB presubtract
521 * operations neither source can contain a constant swizzle. This
522 * specific case is checked in peephole_add_presub_add() when
523 * we make sure the swizzles for both sources are equal, so we
524 * don't need to worry about it here. */
525 for (i
= 0; i
< 2; i
++) {
527 for (chan
= 0; chan
< 4; chan
++) {
529 get_swz(inst
->U
.I
.SrcReg
[i
].Swizzle
, chan
);
530 if (swz
== RC_SWIZZLE_ONE
531 || swz
== RC_SWIZZLE_ZERO
532 || swz
== RC_SWIZZLE_HALF
) {
537 if (is_constant
[0] && is_constant
[1])
540 for(i
= 0; i
< info
->NumSrcRegs
; i
++) {
541 struct rc_src_register src
= inst
->U
.I
.SrcReg
[i
];
542 if (src_reads_dst_mask(src
, inst
->U
.I
.DstReg
))
545 src
.File
= RC_FILE_PRESUB
;
546 if (!c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, src
))
552 static int peephole_add_presub_add(
553 struct radeon_compiler
* c
,
554 struct rc_instruction
* inst_add
)
556 struct rc_src_register
* src0
= NULL
;
557 struct rc_src_register
* src1
= NULL
;
560 if (!is_presub_candidate(c
, inst_add
))
563 if (inst_add
->U
.I
.SrcReg
[0].Swizzle
!= inst_add
->U
.I
.SrcReg
[1].Swizzle
)
566 /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
567 for (i
= 0; i
< 2; i
++) {
568 if (inst_add
->U
.I
.SrcReg
[i
].Abs
)
570 if ((inst_add
->U
.I
.SrcReg
[i
].Negate
571 & inst_add
->U
.I
.DstReg
.WriteMask
) ==
572 inst_add
->U
.I
.DstReg
.WriteMask
) {
573 src0
= &inst_add
->U
.I
.SrcReg
[i
];
575 src1
= &inst_add
->U
.I
.SrcReg
[i
];
577 src0
= &inst_add
->U
.I
.SrcReg
[i
];
584 if (presub_helper(c
, inst_add
, RC_PRESUB_ADD
, presub_replace_add
)) {
585 rc_remove_instruction(inst_add
);
591 static void presub_replace_inv(
592 struct rc_instruction
* inst_add
,
593 struct rc_instruction
* inst_reader
,
594 unsigned int src_index
)
596 /* We must be careful not to modify inst_add, since it
597 * is possible it will remain part of the program.*/
598 inst_reader
->U
.I
.PreSub
.SrcReg
[0] = inst_add
->U
.I
.SrcReg
[1];
599 inst_reader
->U
.I
.PreSub
.SrcReg
[0].Negate
= 0;
600 inst_reader
->U
.I
.PreSub
.Opcode
= RC_PRESUB_INV
;
601 inst_reader
->U
.I
.SrcReg
[src_index
] = chain_srcregs(inst_reader
->U
.I
.SrcReg
[src_index
],
602 inst_reader
->U
.I
.PreSub
.SrcReg
[0]);
604 inst_reader
->U
.I
.SrcReg
[src_index
].File
= RC_FILE_PRESUB
;
605 inst_reader
->U
.I
.SrcReg
[src_index
].Index
= RC_PRESUB_INV
;
609 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
610 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
611 * of the add instruction must have the constatnt 1 swizzle. This function
612 * does not check const registers to see if their value is 1.0, so it should
613 * be called after the constant_folding optimization.
615 * 0 if the ADD instruction is still part of the program.
616 * 1 if the ADD instruction is no longer part of the program.
618 static int peephole_add_presub_inv(
619 struct radeon_compiler
* c
,
620 struct rc_instruction
* inst_add
)
622 unsigned int i
, swz
, mask
;
624 if (!is_presub_candidate(c
, inst_add
))
627 mask
= inst_add
->U
.I
.DstReg
.WriteMask
;
629 /* Check if src0 is 1. */
630 /* XXX It would be nice to use is_src_uniform_constant here, but that
631 * function only works if the register's file is RC_FILE_NONE */
632 for(i
= 0; i
< 4; i
++ ) {
633 swz
= GET_SWZ(inst_add
->U
.I
.SrcReg
[0].Swizzle
, i
);
634 if(((1 << i
) & inst_add
->U
.I
.DstReg
.WriteMask
)
635 && swz
!= RC_SWIZZLE_ONE
) {
641 if ((inst_add
->U
.I
.SrcReg
[1].Negate
& inst_add
->U
.I
.DstReg
.WriteMask
) !=
642 inst_add
->U
.I
.DstReg
.WriteMask
643 || inst_add
->U
.I
.SrcReg
[1].Abs
644 || (inst_add
->U
.I
.SrcReg
[1].File
!= RC_FILE_TEMPORARY
645 && inst_add
->U
.I
.SrcReg
[1].File
!= RC_FILE_CONSTANT
)
646 || src_has_const_swz(inst_add
->U
.I
.SrcReg
[1])) {
651 if (presub_helper(c
, inst_add
, RC_PRESUB_INV
, presub_replace_inv
)) {
652 rc_remove_instruction(inst_add
);
660 * 0 if inst is still part of the program.
661 * 1 if inst is no longer part of the program.
663 static int peephole(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
665 switch(inst
->U
.I
.Opcode
){
668 if(peephole_add_presub_inv(c
, inst
))
670 if(peephole_add_presub_add(c
, inst
))
680 void rc_optimize(struct radeon_compiler
* c
, void *user
)
682 struct rc_instruction
* inst
= c
->Program
.Instructions
.Next
;
683 while(inst
!= &c
->Program
.Instructions
) {
684 struct rc_instruction
* cur
= inst
;
687 constant_folding(c
, cur
);
692 if (cur
->U
.I
.Opcode
== RC_OPCODE_MOV
) {
693 copy_propagate(c
, cur
);
694 /* cur may no longer be part of the program */