2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "radeon_dataflow.h"
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_swizzle.h"
35 struct src_clobbered_reads_cb_data
{
36 rc_register_file File
;
39 struct rc_reader_data
* ReaderData
;
42 typedef void (*rc_presub_replace_fn
)(struct rc_instruction
*,
43 struct rc_instruction
*,
46 static struct rc_src_register
chain_srcregs(struct rc_src_register outer
, struct rc_src_register inner
)
48 struct rc_src_register combine
;
49 combine
.File
= inner
.File
;
50 combine
.Index
= inner
.Index
;
51 combine
.RelAddr
= inner
.RelAddr
;
54 combine
.Negate
= outer
.Negate
;
56 combine
.Abs
= inner
.Abs
;
57 combine
.Negate
= swizzle_mask(outer
.Swizzle
, inner
.Negate
);
58 combine
.Negate
^= outer
.Negate
;
60 combine
.Swizzle
= combine_swizzles(inner
.Swizzle
, outer
.Swizzle
);
64 static void copy_propagate_scan_read(void * data
, struct rc_instruction
* inst
,
65 struct rc_src_register
* src
)
67 rc_register_file file
= src
->File
;
68 struct rc_reader_data
* reader_data
= data
;
70 if(!rc_inst_can_use_presub(inst
,
71 reader_data
->Writer
->U
.I
.PreSub
.Opcode
,
72 rc_swizzle_to_writemask(src
->Swizzle
),
74 &reader_data
->Writer
->U
.I
.PreSub
.SrcReg
[0],
75 &reader_data
->Writer
->U
.I
.PreSub
.SrcReg
[1])) {
76 reader_data
->Abort
= 1;
80 /* XXX This could probably be handled better. */
81 if (file
== RC_FILE_ADDRESS
) {
82 reader_data
->Abort
= 1;
86 /* These instructions cannot read from the constants file.
87 * see radeonTransformTEX()
89 if(reader_data
->Writer
->U
.I
.SrcReg
[0].File
!= RC_FILE_TEMPORARY
&&
90 reader_data
->Writer
->U
.I
.SrcReg
[0].File
!= RC_FILE_INPUT
&&
91 (inst
->U
.I
.Opcode
== RC_OPCODE_TEX
||
92 inst
->U
.I
.Opcode
== RC_OPCODE_TXB
||
93 inst
->U
.I
.Opcode
== RC_OPCODE_TXP
||
94 inst
->U
.I
.Opcode
== RC_OPCODE_TXD
||
95 inst
->U
.I
.Opcode
== RC_OPCODE_TXL
||
96 inst
->U
.I
.Opcode
== RC_OPCODE_KIL
)){
97 reader_data
->Abort
= 1;
102 static void src_clobbered_reads_cb(
104 struct rc_instruction
* inst
,
105 struct rc_src_register
* src
)
107 struct src_clobbered_reads_cb_data
* sc_data
= data
;
109 if (src
->File
== sc_data
->File
110 && src
->Index
== sc_data
->Index
111 && (rc_swizzle_to_writemask(src
->Swizzle
) & sc_data
->Mask
)) {
113 sc_data
->ReaderData
->AbortOnRead
= RC_MASK_XYZW
;
116 if (src
->RelAddr
&& sc_data
->File
== RC_FILE_ADDRESS
) {
117 sc_data
->ReaderData
->AbortOnRead
= RC_MASK_XYZW
;
121 static void is_src_clobbered_scan_write(
123 struct rc_instruction
* inst
,
124 rc_register_file file
,
128 struct src_clobbered_reads_cb_data sc_data
;
129 struct rc_reader_data
* reader_data
= data
;
131 sc_data
.Index
= index
;
133 sc_data
.ReaderData
= reader_data
;
134 rc_for_all_reads_src(reader_data
->Writer
,
135 src_clobbered_reads_cb
, &sc_data
);
138 static void copy_propagate(struct radeon_compiler
* c
, struct rc_instruction
* inst_mov
)
140 struct rc_reader_data reader_data
;
143 if (inst_mov
->U
.I
.DstReg
.File
!= RC_FILE_TEMPORARY
||
144 inst_mov
->U
.I
.WriteALUResult
||
145 inst_mov
->U
.I
.SaturateMode
)
148 /* Get a list of all the readers of this MOV instruction. */
149 reader_data
.ExitOnAbort
= 1;
150 rc_get_readers(c
, inst_mov
, &reader_data
,
151 copy_propagate_scan_read
, NULL
,
152 is_src_clobbered_scan_write
);
154 if (reader_data
.Abort
|| reader_data
.ReaderCount
== 0)
157 /* Propagate the MOV instruction. */
158 for (i
= 0; i
< reader_data
.ReaderCount
; i
++) {
159 struct rc_instruction
* inst
= reader_data
.Readers
[i
].Inst
;
160 *reader_data
.Readers
[i
].U
.I
.Src
= chain_srcregs(*reader_data
.Readers
[i
].U
.I
.Src
, inst_mov
->U
.I
.SrcReg
[0]);
162 if (inst_mov
->U
.I
.SrcReg
[0].File
== RC_FILE_PRESUB
)
163 inst
->U
.I
.PreSub
= inst_mov
->U
.I
.PreSub
;
166 /* Finally, remove the original MOV instruction */
167 rc_remove_instruction(inst_mov
);
171 * Check if a source register is actually always the same
174 static int is_src_uniform_constant(struct rc_src_register src
,
175 rc_swizzle
* pswz
, unsigned int * pnegate
)
179 if (src
.File
!= RC_FILE_NONE
) {
184 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
185 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
190 if (swz
== RC_SWIZZLE_UNUSED
)
195 *pnegate
= GET_BIT(src
.Negate
, chan
);
198 if (swz
!= *pswz
|| *pnegate
!= GET_BIT(src
.Negate
, chan
)) {
208 static void constant_folding_mad(struct rc_instruction
* inst
)
211 unsigned int negate
= 0;
213 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[2], &swz
, &negate
)) {
214 if (swz
== RC_SWIZZLE_ZERO
) {
215 inst
->U
.I
.Opcode
= RC_OPCODE_MUL
;
220 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
221 if (swz
== RC_SWIZZLE_ONE
) {
222 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
224 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
225 inst
->U
.I
.SrcReg
[1] = inst
->U
.I
.SrcReg
[2];
227 } else if (swz
== RC_SWIZZLE_ZERO
) {
228 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
229 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
234 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
235 if (swz
== RC_SWIZZLE_ONE
) {
236 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
238 inst
->U
.I
.SrcReg
[1].Negate
^= RC_MASK_XYZW
;
239 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
241 } else if (swz
== RC_SWIZZLE_ZERO
) {
242 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
243 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
249 static void constant_folding_mul(struct rc_instruction
* inst
)
252 unsigned int negate
= 0;
254 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
255 if (swz
== RC_SWIZZLE_ONE
) {
256 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
257 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
259 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
261 } else if (swz
== RC_SWIZZLE_ZERO
) {
262 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
263 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
268 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
269 if (swz
== RC_SWIZZLE_ONE
) {
270 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
272 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
274 } else if (swz
== RC_SWIZZLE_ZERO
) {
275 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
276 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
282 static void constant_folding_add(struct rc_instruction
* inst
)
285 unsigned int negate
= 0;
287 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
288 if (swz
== RC_SWIZZLE_ZERO
) {
289 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
290 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
295 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
296 if (swz
== RC_SWIZZLE_ZERO
) {
297 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
304 * Replace 0.0, 1.0 and 0.5 immediate constants by their
305 * respective swizzles. Simplify instructions like ADD dst, src, 0;
307 static void constant_folding(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
309 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
312 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
313 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
314 struct rc_constant
* constant
;
315 struct rc_src_register newsrc
;
316 int have_real_reference
;
319 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
320 for (chan
= 0; chan
< 4; ++chan
)
321 if (GET_SWZ(inst
->U
.I
.SrcReg
[src
].Swizzle
, chan
) <= 3)
324 inst
->U
.I
.SrcReg
[src
].File
= RC_FILE_NONE
;
328 /* Convert immediates to swizzles. */
329 if (inst
->U
.I
.SrcReg
[src
].File
!= RC_FILE_CONSTANT
||
330 inst
->U
.I
.SrcReg
[src
].RelAddr
||
331 inst
->U
.I
.SrcReg
[src
].Index
>= c
->Program
.Constants
.Count
)
335 &c
->Program
.Constants
.Constants
[inst
->U
.I
.SrcReg
[src
].Index
];
337 if (constant
->Type
!= RC_CONSTANT_IMMEDIATE
)
340 newsrc
= inst
->U
.I
.SrcReg
[src
];
341 have_real_reference
= 0;
342 for (chan
= 0; chan
< 4; ++chan
) {
343 unsigned int swz
= GET_SWZ(newsrc
.Swizzle
, chan
);
351 imm
= constant
->u
.Immediate
[swz
];
356 if (baseimm
== 0.0) {
357 newswz
= RC_SWIZZLE_ZERO
;
358 } else if (baseimm
== 1.0) {
359 newswz
= RC_SWIZZLE_ONE
;
360 } else if (baseimm
== 0.5 && c
->has_half_swizzles
) {
361 newswz
= RC_SWIZZLE_HALF
;
363 have_real_reference
= 1;
367 SET_SWZ(newsrc
.Swizzle
, chan
, newswz
);
368 if (imm
< 0.0 && !newsrc
.Abs
)
369 newsrc
.Negate
^= 1 << chan
;
372 if (!have_real_reference
) {
373 newsrc
.File
= RC_FILE_NONE
;
377 /* don't make the swizzle worse */
378 if (!c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, newsrc
) &&
379 c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, inst
->U
.I
.SrcReg
[src
]))
382 inst
->U
.I
.SrcReg
[src
] = newsrc
;
385 /* Simplify instructions based on constants */
386 if (inst
->U
.I
.Opcode
== RC_OPCODE_MAD
)
387 constant_folding_mad(inst
);
389 /* note: MAD can simplify to MUL or ADD */
390 if (inst
->U
.I
.Opcode
== RC_OPCODE_MUL
)
391 constant_folding_mul(inst
);
392 else if (inst
->U
.I
.Opcode
== RC_OPCODE_ADD
)
393 constant_folding_add(inst
);
395 /* In case this instruction has been converted, make sure all of the
396 * registers that are no longer used are empty. */
397 opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
398 for(i
= opcode
->NumSrcRegs
; i
< 3; i
++) {
399 memset(&inst
->U
.I
.SrcReg
[i
], 0, sizeof(struct rc_src_register
));
404 * If src and dst use the same register, this function returns a writemask that
405 * indicates wich components are read by src. Otherwise zero is returned.
407 static unsigned int src_reads_dst_mask(struct rc_src_register src
,
408 struct rc_dst_register dst
)
410 if (dst
.File
!= src
.File
|| dst
.Index
!= src
.Index
) {
413 return rc_swizzle_to_writemask(src
.Swizzle
);
416 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
417 * in any of its channels. Return 0 otherwise. */
418 static int src_has_const_swz(struct rc_src_register src
) {
420 for(chan
= 0; chan
< 4; chan
++) {
421 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
422 if (swz
== RC_SWIZZLE_ZERO
|| swz
== RC_SWIZZLE_HALF
423 || swz
== RC_SWIZZLE_ONE
) {
430 static void presub_scan_read(
432 struct rc_instruction
* inst
,
433 struct rc_src_register
* src
)
435 struct rc_reader_data
* reader_data
= data
;
436 rc_presubtract_op
* presub_opcode
= reader_data
->CbData
;
438 if (!rc_inst_can_use_presub(inst
, *presub_opcode
,
439 reader_data
->Writer
->U
.I
.DstReg
.WriteMask
,
441 &reader_data
->Writer
->U
.I
.SrcReg
[0],
442 &reader_data
->Writer
->U
.I
.SrcReg
[1])) {
443 reader_data
->Abort
= 1;
448 static int presub_helper(
449 struct radeon_compiler
* c
,
450 struct rc_instruction
* inst_add
,
451 rc_presubtract_op presub_opcode
,
452 rc_presub_replace_fn presub_replace
)
454 struct rc_reader_data reader_data
;
456 rc_presubtract_op cb_op
= presub_opcode
;
458 reader_data
.CbData
= &cb_op
;
459 reader_data
.ExitOnAbort
= 1;
460 rc_get_readers(c
, inst_add
, &reader_data
, presub_scan_read
, NULL
,
461 is_src_clobbered_scan_write
);
463 if (reader_data
.Abort
|| reader_data
.ReaderCount
== 0)
466 for(i
= 0; i
< reader_data
.ReaderCount
; i
++) {
467 unsigned int src_index
;
468 struct rc_reader reader
= reader_data
.Readers
[i
];
469 const struct rc_opcode_info
* info
=
470 rc_get_opcode_info(reader
.Inst
->U
.I
.Opcode
);
472 for (src_index
= 0; src_index
< info
->NumSrcRegs
; src_index
++) {
473 if (&reader
.Inst
->U
.I
.SrcReg
[src_index
] == reader
.U
.I
.Src
)
474 presub_replace(inst_add
, reader
.Inst
, src_index
);
480 /* This function assumes that inst_add->U.I.SrcReg[0] and
481 * inst_add->U.I.SrcReg[1] aren't both negative. */
482 static void presub_replace_add(
483 struct rc_instruction
* inst_add
,
484 struct rc_instruction
* inst_reader
,
485 unsigned int src_index
)
487 rc_presubtract_op presub_opcode
;
488 if (inst_add
->U
.I
.SrcReg
[1].Negate
|| inst_add
->U
.I
.SrcReg
[0].Negate
)
489 presub_opcode
= RC_PRESUB_SUB
;
491 presub_opcode
= RC_PRESUB_ADD
;
493 if (inst_add
->U
.I
.SrcReg
[1].Negate
) {
494 inst_reader
->U
.I
.PreSub
.SrcReg
[0] = inst_add
->U
.I
.SrcReg
[1];
495 inst_reader
->U
.I
.PreSub
.SrcReg
[1] = inst_add
->U
.I
.SrcReg
[0];
497 inst_reader
->U
.I
.PreSub
.SrcReg
[0] = inst_add
->U
.I
.SrcReg
[0];
498 inst_reader
->U
.I
.PreSub
.SrcReg
[1] = inst_add
->U
.I
.SrcReg
[1];
500 inst_reader
->U
.I
.PreSub
.SrcReg
[0].Negate
= 0;
501 inst_reader
->U
.I
.PreSub
.SrcReg
[1].Negate
= 0;
502 inst_reader
->U
.I
.PreSub
.Opcode
= presub_opcode
;
503 inst_reader
->U
.I
.SrcReg
[src_index
] =
504 chain_srcregs(inst_reader
->U
.I
.SrcReg
[src_index
],
505 inst_reader
->U
.I
.PreSub
.SrcReg
[0]);
506 inst_reader
->U
.I
.SrcReg
[src_index
].File
= RC_FILE_PRESUB
;
507 inst_reader
->U
.I
.SrcReg
[src_index
].Index
= presub_opcode
;
510 static int is_presub_candidate(
511 struct radeon_compiler
* c
,
512 struct rc_instruction
* inst
)
514 const struct rc_opcode_info
* info
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
516 unsigned int is_constant
[2] = {0, 0};
518 assert(inst
->U
.I
.Opcode
== RC_OPCODE_ADD
);
520 if (inst
->U
.I
.PreSub
.Opcode
!= RC_PRESUB_NONE
|| inst
->U
.I
.SaturateMode
)
523 /* If both sources use a constant swizzle, then we can't convert it to
524 * a presubtract operation. In fact for the ADD and SUB presubtract
525 * operations neither source can contain a constant swizzle. This
526 * specific case is checked in peephole_add_presub_add() when
527 * we make sure the swizzles for both sources are equal, so we
528 * don't need to worry about it here. */
529 for (i
= 0; i
< 2; i
++) {
531 for (chan
= 0; chan
< 4; chan
++) {
533 get_swz(inst
->U
.I
.SrcReg
[i
].Swizzle
, chan
);
534 if (swz
== RC_SWIZZLE_ONE
535 || swz
== RC_SWIZZLE_ZERO
536 || swz
== RC_SWIZZLE_HALF
) {
541 if (is_constant
[0] && is_constant
[1])
544 for(i
= 0; i
< info
->NumSrcRegs
; i
++) {
545 struct rc_src_register src
= inst
->U
.I
.SrcReg
[i
];
546 if (src_reads_dst_mask(src
, inst
->U
.I
.DstReg
))
549 src
.File
= RC_FILE_PRESUB
;
550 if (!c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, src
))
556 static int peephole_add_presub_add(
557 struct radeon_compiler
* c
,
558 struct rc_instruction
* inst_add
)
560 struct rc_src_register
* src0
= NULL
;
561 struct rc_src_register
* src1
= NULL
;
564 if (!is_presub_candidate(c
, inst_add
))
567 if (inst_add
->U
.I
.SrcReg
[0].Swizzle
!= inst_add
->U
.I
.SrcReg
[1].Swizzle
)
570 /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
571 for (i
= 0; i
< 2; i
++) {
572 if (inst_add
->U
.I
.SrcReg
[i
].Abs
)
574 if ((inst_add
->U
.I
.SrcReg
[i
].Negate
575 & inst_add
->U
.I
.DstReg
.WriteMask
) ==
576 inst_add
->U
.I
.DstReg
.WriteMask
) {
577 src0
= &inst_add
->U
.I
.SrcReg
[i
];
579 src1
= &inst_add
->U
.I
.SrcReg
[i
];
581 src0
= &inst_add
->U
.I
.SrcReg
[i
];
588 if (presub_helper(c
, inst_add
, RC_PRESUB_ADD
, presub_replace_add
)) {
589 rc_remove_instruction(inst_add
);
595 static void presub_replace_inv(
596 struct rc_instruction
* inst_add
,
597 struct rc_instruction
* inst_reader
,
598 unsigned int src_index
)
600 /* We must be careful not to modify inst_add, since it
601 * is possible it will remain part of the program.*/
602 inst_reader
->U
.I
.PreSub
.SrcReg
[0] = inst_add
->U
.I
.SrcReg
[1];
603 inst_reader
->U
.I
.PreSub
.SrcReg
[0].Negate
= 0;
604 inst_reader
->U
.I
.PreSub
.Opcode
= RC_PRESUB_INV
;
605 inst_reader
->U
.I
.SrcReg
[src_index
] = chain_srcregs(inst_reader
->U
.I
.SrcReg
[src_index
],
606 inst_reader
->U
.I
.PreSub
.SrcReg
[0]);
608 inst_reader
->U
.I
.SrcReg
[src_index
].File
= RC_FILE_PRESUB
;
609 inst_reader
->U
.I
.SrcReg
[src_index
].Index
= RC_PRESUB_INV
;
613 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
614 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
615 * of the add instruction must have the constatnt 1 swizzle. This function
616 * does not check const registers to see if their value is 1.0, so it should
617 * be called after the constant_folding optimization.
619 * 0 if the ADD instruction is still part of the program.
620 * 1 if the ADD instruction is no longer part of the program.
622 static int peephole_add_presub_inv(
623 struct radeon_compiler
* c
,
624 struct rc_instruction
* inst_add
)
628 if (!is_presub_candidate(c
, inst_add
))
631 /* Check if src0 is 1. */
632 /* XXX It would be nice to use is_src_uniform_constant here, but that
633 * function only works if the register's file is RC_FILE_NONE */
634 for(i
= 0; i
< 4; i
++ ) {
635 swz
= GET_SWZ(inst_add
->U
.I
.SrcReg
[0].Swizzle
, i
);
636 if(((1 << i
) & inst_add
->U
.I
.DstReg
.WriteMask
)
637 && swz
!= RC_SWIZZLE_ONE
) {
643 if ((inst_add
->U
.I
.SrcReg
[1].Negate
& inst_add
->U
.I
.DstReg
.WriteMask
) !=
644 inst_add
->U
.I
.DstReg
.WriteMask
645 || inst_add
->U
.I
.SrcReg
[1].Abs
646 || (inst_add
->U
.I
.SrcReg
[1].File
!= RC_FILE_TEMPORARY
647 && inst_add
->U
.I
.SrcReg
[1].File
!= RC_FILE_CONSTANT
)
648 || src_has_const_swz(inst_add
->U
.I
.SrcReg
[1])) {
653 if (presub_helper(c
, inst_add
, RC_PRESUB_INV
, presub_replace_inv
)) {
654 rc_remove_instruction(inst_add
);
662 * 0 if inst is still part of the program.
663 * 1 if inst is no longer part of the program.
665 static int peephole(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
667 switch(inst
->U
.I
.Opcode
){
670 if(peephole_add_presub_inv(c
, inst
))
672 if(peephole_add_presub_add(c
, inst
))
682 void rc_optimize(struct radeon_compiler
* c
, void *user
)
684 struct rc_instruction
* inst
= c
->Program
.Instructions
.Next
;
685 while(inst
!= &c
->Program
.Instructions
) {
686 struct rc_instruction
* cur
= inst
;
689 constant_folding(c
, cur
);
694 if (cur
->U
.I
.Opcode
== RC_OPCODE_MOV
) {
695 copy_propagate(c
, cur
);
696 /* cur may no longer be part of the program */