2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "radeon_dataflow.h"
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_swizzle.h"
35 struct peephole_state
{
36 struct rc_instruction
* Inst
;
37 /** Stores a bitmask of the components that are still "alive" (i.e.
38 * they have not been written to since Inst was executed.)
40 unsigned int WriteMask
;
43 typedef void (*rc_presub_replace_fn
)(struct peephole_state
*,
44 struct rc_instruction
*,
47 static struct rc_src_register
chain_srcregs(struct rc_src_register outer
, struct rc_src_register inner
)
49 struct rc_src_register combine
;
50 combine
.File
= inner
.File
;
51 combine
.Index
= inner
.Index
;
52 combine
.RelAddr
= inner
.RelAddr
;
55 combine
.Negate
= outer
.Negate
;
57 combine
.Abs
= inner
.Abs
;
59 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
60 unsigned int swz
= GET_SWZ(outer
.Swizzle
, chan
);
62 combine
.Negate
|= GET_BIT(inner
.Negate
, swz
) << chan
;
64 combine
.Negate
^= outer
.Negate
;
66 combine
.Swizzle
= combine_swizzles(inner
.Swizzle
, outer
.Swizzle
);
70 struct copy_propagate_state
{
71 struct radeon_compiler
* C
;
72 struct rc_instruction
* Mov
;
73 unsigned int Conflict
:1;
75 /** Whether Mov's source has been clobbered */
76 unsigned int SourceClobbered
:1;
78 /** Which components of Mov's destination register are still from that Mov? */
79 unsigned int MovMask
:4;
81 /** Which components of Mov's destination register are clearly *not* from that Mov */
82 unsigned int DefinedMask
:4;
84 /** Which components of Mov's source register are sourced */
85 unsigned int SourcedMask
:4;
87 /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
91 static void copy_propagate_scan_read(void * data
, struct rc_instruction
* inst
,
92 struct rc_src_register
* src
)
94 rc_register_file file
= src
->File
;
95 struct rc_reader_data
* reader_data
= data
;
96 const struct rc_opcode_info
* info
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
98 /* It is possible to do copy propigation in this situation,
99 * just not right now, see peephole_add_presub_inv() */
100 if (reader_data
->Writer
->U
.I
.PreSub
.Opcode
!= RC_PRESUB_NONE
&&
101 (info
->NumSrcRegs
> 2 || info
->HasTexture
)) {
102 reader_data
->Abort
= 1;
106 /* XXX This could probably be handled better. */
107 if (file
== RC_FILE_ADDRESS
) {
108 reader_data
->Abort
= 1;
112 /* These instructions cannot read from the constants file.
113 * see radeonTransformTEX()
115 if(reader_data
->Writer
->U
.I
.SrcReg
[0].File
!= RC_FILE_TEMPORARY
&&
116 reader_data
->Writer
->U
.I
.SrcReg
[0].File
!= RC_FILE_INPUT
&&
117 (inst
->U
.I
.Opcode
== RC_OPCODE_TEX
||
118 inst
->U
.I
.Opcode
== RC_OPCODE_TXB
||
119 inst
->U
.I
.Opcode
== RC_OPCODE_TXP
||
120 inst
->U
.I
.Opcode
== RC_OPCODE_KIL
)){
121 reader_data
->Abort
= 1;
126 static void copy_propagate_scan_write(void * data
, struct rc_instruction
* inst
,
127 rc_register_file file
, unsigned int index
, unsigned int mask
)
129 struct rc_reader_data
* reader_data
= data
;
130 struct copy_propagate_state
* s
= reader_data
->CbData
;
132 if (file
== reader_data
->Writer
->U
.I
.SrcReg
[0].File
&& index
== reader_data
->Writer
->U
.I
.SrcReg
[0].Index
) {
133 if (mask
& s
->SourcedMask
)
134 reader_data
->AbortOnRead
= 1;
135 } else if (s
->Mov
->U
.I
.SrcReg
[0].RelAddr
&& file
== RC_FILE_ADDRESS
) {
136 reader_data
->AbortOnRead
= 1;
140 static void copy_propagate(struct radeon_compiler
* c
, struct rc_instruction
* inst_mov
)
142 struct copy_propagate_state s
;
143 struct rc_reader_data reader_data
;
146 if (inst_mov
->U
.I
.DstReg
.File
!= RC_FILE_TEMPORARY
||
147 inst_mov
->U
.I
.DstReg
.RelAddr
||
148 inst_mov
->U
.I
.WriteALUResult
||
149 inst_mov
->U
.I
.SaturateMode
)
152 memset(&s
, 0, sizeof(s
));
155 s
.MovMask
= inst_mov
->U
.I
.DstReg
.WriteMask
;
156 s
.DefinedMask
= RC_MASK_XYZW
& ~s
.MovMask
;
158 reader_data
.CbData
= &s
;
160 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
161 unsigned int swz
= GET_SWZ(inst_mov
->U
.I
.SrcReg
[0].Swizzle
, chan
);
162 s
.SourcedMask
|= (1 << swz
) & RC_MASK_XYZW
;
165 /* Get a list of all the readers of this MOV instruction. */
166 rc_get_readers_normal(c
, inst_mov
, &reader_data
,
167 copy_propagate_scan_read
, copy_propagate_scan_write
);
169 if (reader_data
.Abort
|| reader_data
.ReaderCount
== 0)
172 /* Propagate the MOV instruction. */
173 for (i
= 0; i
< reader_data
.ReaderCount
; i
++) {
174 struct rc_instruction
* inst
= reader_data
.Readers
[i
].Inst
;
175 *reader_data
.Readers
[i
].Src
= chain_srcregs(*reader_data
.Readers
[i
].Src
, s
.Mov
->U
.I
.SrcReg
[0]);
177 if (s
.Mov
->U
.I
.SrcReg
[0].File
== RC_FILE_PRESUB
)
178 inst
->U
.I
.PreSub
= s
.Mov
->U
.I
.PreSub
;
181 /* Finally, remove the original MOV instruction */
182 rc_remove_instruction(inst_mov
);
186 * Check if a source register is actually always the same
189 static int is_src_uniform_constant(struct rc_src_register src
,
190 rc_swizzle
* pswz
, unsigned int * pnegate
)
194 if (src
.File
!= RC_FILE_NONE
) {
199 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
200 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
205 if (swz
== RC_SWIZZLE_UNUSED
)
210 *pnegate
= GET_BIT(src
.Negate
, chan
);
213 if (swz
!= *pswz
|| *pnegate
!= GET_BIT(src
.Negate
, chan
)) {
223 static void constant_folding_mad(struct rc_instruction
* inst
)
228 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[2], &swz
, &negate
)) {
229 if (swz
== RC_SWIZZLE_ZERO
) {
230 inst
->U
.I
.Opcode
= RC_OPCODE_MUL
;
235 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
236 if (swz
== RC_SWIZZLE_ONE
) {
237 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
239 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
240 inst
->U
.I
.SrcReg
[1] = inst
->U
.I
.SrcReg
[2];
242 } else if (swz
== RC_SWIZZLE_ZERO
) {
243 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
244 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
249 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
250 if (swz
== RC_SWIZZLE_ONE
) {
251 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
253 inst
->U
.I
.SrcReg
[1].Negate
^= RC_MASK_XYZW
;
254 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
256 } else if (swz
== RC_SWIZZLE_ZERO
) {
257 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
258 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
264 static void constant_folding_mul(struct rc_instruction
* inst
)
269 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
270 if (swz
== RC_SWIZZLE_ONE
) {
271 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
272 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
274 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
276 } else if (swz
== RC_SWIZZLE_ZERO
) {
277 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
278 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
283 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
284 if (swz
== RC_SWIZZLE_ONE
) {
285 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
287 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
289 } else if (swz
== RC_SWIZZLE_ZERO
) {
290 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
291 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
297 static void constant_folding_add(struct rc_instruction
* inst
)
302 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
303 if (swz
== RC_SWIZZLE_ZERO
) {
304 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
305 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
310 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
311 if (swz
== RC_SWIZZLE_ZERO
) {
312 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
319 * Replace 0.0, 1.0 and 0.5 immediate constants by their
320 * respective swizzles. Simplify instructions like ADD dst, src, 0;
322 static void constant_folding(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
324 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
327 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
328 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
329 struct rc_constant
* constant
;
330 struct rc_src_register newsrc
;
331 int have_real_reference
;
333 if (inst
->U
.I
.SrcReg
[src
].File
!= RC_FILE_CONSTANT
||
334 inst
->U
.I
.SrcReg
[src
].RelAddr
||
335 inst
->U
.I
.SrcReg
[src
].Index
>= c
->Program
.Constants
.Count
)
339 &c
->Program
.Constants
.Constants
[inst
->U
.I
.SrcReg
[src
].Index
];
341 if (constant
->Type
!= RC_CONSTANT_IMMEDIATE
)
344 newsrc
= inst
->U
.I
.SrcReg
[src
];
345 have_real_reference
= 0;
346 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
347 unsigned int swz
= GET_SWZ(newsrc
.Swizzle
, chan
);
355 imm
= constant
->u
.Immediate
[swz
];
360 if (baseimm
== 0.0) {
361 newswz
= RC_SWIZZLE_ZERO
;
362 } else if (baseimm
== 1.0) {
363 newswz
= RC_SWIZZLE_ONE
;
364 } else if (baseimm
== 0.5 && c
->has_half_swizzles
) {
365 newswz
= RC_SWIZZLE_HALF
;
367 have_real_reference
= 1;
371 SET_SWZ(newsrc
.Swizzle
, chan
, newswz
);
372 if (imm
< 0.0 && !newsrc
.Abs
)
373 newsrc
.Negate
^= 1 << chan
;
376 if (!have_real_reference
) {
377 newsrc
.File
= RC_FILE_NONE
;
381 /* don't make the swizzle worse */
382 if (!c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, newsrc
) &&
383 c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, inst
->U
.I
.SrcReg
[src
]))
386 inst
->U
.I
.SrcReg
[src
] = newsrc
;
389 /* Simplify instructions based on constants */
390 if (inst
->U
.I
.Opcode
== RC_OPCODE_MAD
)
391 constant_folding_mad(inst
);
393 /* note: MAD can simplify to MUL or ADD */
394 if (inst
->U
.I
.Opcode
== RC_OPCODE_MUL
)
395 constant_folding_mul(inst
);
396 else if (inst
->U
.I
.Opcode
== RC_OPCODE_ADD
)
397 constant_folding_add(inst
);
399 /* In case this instruction has been converted, make sure all of the
400 * registers that are no longer used are empty. */
401 opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
402 for(i
= opcode
->NumSrcRegs
; i
< 3; i
++) {
403 memset(&inst
->U
.I
.SrcReg
[i
], 0, sizeof(struct rc_src_register
));
408 * If src and dst use the same register, this function returns a writemask that
409 * indicates wich components are read by src. Otherwise zero is returned.
411 static unsigned int src_reads_dst_mask(struct rc_src_register src
,
412 struct rc_dst_register dst
)
414 if (dst
.File
!= src
.File
|| dst
.Index
!= src
.Index
) {
417 return rc_swizzle_to_writemask(src
.Swizzle
);
420 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
421 * in any of its channels. Return 0 otherwise. */
422 static int src_has_const_swz(struct rc_src_register src
) {
424 for(chan
= 0; chan
< 4; chan
++) {
425 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
426 if (swz
== RC_SWIZZLE_ZERO
|| swz
== RC_SWIZZLE_HALF
427 || swz
== RC_SWIZZLE_ONE
) {
434 static void peephole_scan_write(void * data
, struct rc_instruction
* inst
,
435 rc_register_file file
, unsigned int index
, unsigned int mask
)
437 struct peephole_state
* s
= data
;
438 if(s
->Inst
->U
.I
.DstReg
.File
== file
439 && s
->Inst
->U
.I
.DstReg
.Index
== index
) {
440 unsigned int common_mask
= s
->WriteMask
& mask
;
441 s
->WriteMask
&= ~common_mask
;
445 static int presub_helper(
446 struct radeon_compiler
* c
,
447 struct peephole_state
* s
,
448 rc_presubtract_op presub_opcode
,
449 rc_presub_replace_fn presub_replace
)
451 struct rc_instruction
* inst
;
452 unsigned int can_remove
= 0;
453 unsigned int cant_sub
= 0;
455 for(inst
= s
->Inst
->Next
; inst
!= &c
->Program
.Instructions
;
458 unsigned char can_use_presub
= 1;
459 const struct rc_opcode_info
* info
=
460 rc_get_opcode_info(inst
->U
.I
.Opcode
);
461 /* XXX: There are some situations where instructions
462 * with more than 2 src registers can use the
463 * presubtract select, but to keep things simple we
464 * will disable presubtract on these instructions for
466 if (info
->NumSrcRegs
> 2 || info
->HasTexture
) {
470 /* We can't use more than one presubtract value in an
471 * instruction, unless the two prsubtract operations
472 * are the same and read from the same registers. */
473 if (inst
->U
.I
.PreSub
.Opcode
!= RC_PRESUB_NONE
) {
474 if (inst
->U
.I
.PreSub
.Opcode
!= presub_opcode
475 || inst
->U
.I
.PreSub
.SrcReg
[0].File
!=
476 s
->Inst
->U
.I
.SrcReg
[1].File
477 || inst
->U
.I
.PreSub
.SrcReg
[0].Index
!=
478 s
->Inst
->U
.I
.SrcReg
[1].Index
) {
483 /* Even if the instruction can't use a presubtract operation
484 * we still need to check if the instruction reads from
485 * s->Inst->U.I.DstReg, because if it does we must not
487 for(i
= 0; i
< info
->NumSrcRegs
; i
++) {
488 unsigned int mask
= src_reads_dst_mask(
489 inst
->U
.I
.SrcReg
[i
], s
->Inst
->U
.I
.DstReg
);
490 /* XXX We could be more aggressive here using
491 * presubtract. It is okay if SrcReg[i] only reads
492 * from some of the mask components. */
493 if(s
->Inst
->U
.I
.DstReg
.WriteMask
!= mask
) {
494 if (s
->Inst
->U
.I
.DstReg
.WriteMask
& mask
) {
501 if (cant_sub
|| !can_use_presub
) {
505 presub_replace(s
, inst
, i
);
510 rc_for_all_writes_mask(inst
, peephole_scan_write
, s
);
511 /* If all components of inst_add's destination register have
512 * been written to by subsequent instructions, the original
513 * value of the destination register is no longer valid and
514 * we can't keep doing substitutions. */
518 /* Make this instruction doesn't write to the presubtract source. */
519 if (inst
->U
.I
.DstReg
.WriteMask
&
520 src_reads_dst_mask(s
->Inst
->U
.I
.SrcReg
[1],
522 || src_reads_dst_mask(s
->Inst
->U
.I
.SrcReg
[0],
524 || info
->IsFlowControl
) {
531 /* This function assumes that s->Inst->U.I.SrcReg[0] and
532 * s->Inst->U.I.SrcReg[1] aren't both negative. */
533 static void presub_replace_add(struct peephole_state
*s
,
534 struct rc_instruction
* inst
,
535 unsigned int src_index
)
537 rc_presubtract_op presub_opcode
;
538 if (s
->Inst
->U
.I
.SrcReg
[1].Negate
|| s
->Inst
->U
.I
.SrcReg
[0].Negate
)
539 presub_opcode
= RC_PRESUB_SUB
;
541 presub_opcode
= RC_PRESUB_ADD
;
543 if (s
->Inst
->U
.I
.SrcReg
[1].Negate
) {
544 inst
->U
.I
.PreSub
.SrcReg
[0] = s
->Inst
->U
.I
.SrcReg
[1];
545 inst
->U
.I
.PreSub
.SrcReg
[1] = s
->Inst
->U
.I
.SrcReg
[0];
547 inst
->U
.I
.PreSub
.SrcReg
[0] = s
->Inst
->U
.I
.SrcReg
[0];
548 inst
->U
.I
.PreSub
.SrcReg
[1] = s
->Inst
->U
.I
.SrcReg
[1];
550 inst
->U
.I
.PreSub
.SrcReg
[0].Negate
= 0;
551 inst
->U
.I
.PreSub
.SrcReg
[1].Negate
= 0;
552 inst
->U
.I
.PreSub
.Opcode
= presub_opcode
;
553 inst
->U
.I
.SrcReg
[src_index
] = chain_srcregs(inst
->U
.I
.SrcReg
[src_index
],
554 inst
->U
.I
.PreSub
.SrcReg
[0]);
555 inst
->U
.I
.SrcReg
[src_index
].File
= RC_FILE_PRESUB
;
556 inst
->U
.I
.SrcReg
[src_index
].Index
= presub_opcode
;
559 static int is_presub_candidate(struct rc_instruction
* inst
)
561 const struct rc_opcode_info
* info
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
564 if (inst
->U
.I
.PreSub
.Opcode
!= RC_PRESUB_NONE
|| inst
->U
.I
.SaturateMode
)
567 for(i
= 0; i
< info
->NumSrcRegs
; i
++) {
568 if (src_reads_dst_mask(inst
->U
.I
.SrcReg
[i
], inst
->U
.I
.DstReg
))
574 static int peephole_add_presub_add(
575 struct radeon_compiler
* c
,
576 struct rc_instruction
* inst_add
)
578 struct rc_src_register
* src0
= NULL
;
579 struct rc_src_register
* src1
= NULL
;
581 struct peephole_state s
;
583 if (!is_presub_candidate(inst_add
))
586 if (inst_add
->U
.I
.SrcReg
[0].Swizzle
!= inst_add
->U
.I
.SrcReg
[1].Swizzle
)
589 /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
590 for (i
= 0; i
< 2; i
++) {
591 if (inst_add
->U
.I
.SrcReg
[i
].Abs
)
593 if ((inst_add
->U
.I
.SrcReg
[i
].Negate
594 & inst_add
->U
.I
.DstReg
.WriteMask
) ==
595 inst_add
->U
.I
.DstReg
.WriteMask
) {
596 src0
= &inst_add
->U
.I
.SrcReg
[i
];
598 src1
= &inst_add
->U
.I
.SrcReg
[i
];
600 src0
= &inst_add
->U
.I
.SrcReg
[i
];
608 s
.WriteMask
= inst_add
->U
.I
.DstReg
.WriteMask
;
609 if (presub_helper(c
, &s
, RC_PRESUB_ADD
, presub_replace_add
)) {
610 rc_remove_instruction(inst_add
);
616 static void presub_replace_inv(struct peephole_state
* s
,
617 struct rc_instruction
* inst
,
618 unsigned int src_index
)
620 /* We must be careful not to modify s->Inst, since it
621 * is possible it will remain part of the program.
622 * XXX Maybe pass a struct instead of a pointer for s->Inst.*/
623 inst
->U
.I
.PreSub
.SrcReg
[0] = s
->Inst
->U
.I
.SrcReg
[1];
624 inst
->U
.I
.PreSub
.SrcReg
[0].Negate
= 0;
625 inst
->U
.I
.PreSub
.Opcode
= RC_PRESUB_INV
;
626 inst
->U
.I
.SrcReg
[src_index
] = chain_srcregs(inst
->U
.I
.SrcReg
[src_index
],
627 inst
->U
.I
.PreSub
.SrcReg
[0]);
629 inst
->U
.I
.SrcReg
[src_index
].File
= RC_FILE_PRESUB
;
630 inst
->U
.I
.SrcReg
[src_index
].Index
= RC_PRESUB_INV
;
634 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
635 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
636 * of the add instruction must have the constatnt 1 swizzle. This function
637 * does not check const registers to see if their value is 1.0, so it should
638 * be called after the constant_folding optimization.
640 * 0 if the ADD instruction is still part of the program.
641 * 1 if the ADD instruction is no longer part of the program.
643 static int peephole_add_presub_inv(
644 struct radeon_compiler
* c
,
645 struct rc_instruction
* inst_add
)
647 unsigned int i
, swz
, mask
;
648 struct peephole_state s
;
650 if (!is_presub_candidate(inst_add
))
653 mask
= inst_add
->U
.I
.DstReg
.WriteMask
;
655 /* Check if src0 is 1. */
656 /* XXX It would be nice to use is_src_uniform_constant here, but that
657 * function only works if the register's file is RC_FILE_NONE */
658 for(i
= 0; i
< 4; i
++ ) {
659 swz
= GET_SWZ(inst_add
->U
.I
.SrcReg
[0].Swizzle
, i
);
660 if(((1 << i
) & inst_add
->U
.I
.DstReg
.WriteMask
)
661 && swz
!= RC_SWIZZLE_ONE
) {
667 if ((inst_add
->U
.I
.SrcReg
[1].Negate
& inst_add
->U
.I
.DstReg
.WriteMask
) !=
668 inst_add
->U
.I
.DstReg
.WriteMask
669 || inst_add
->U
.I
.SrcReg
[1].Abs
670 || (inst_add
->U
.I
.SrcReg
[1].File
!= RC_FILE_TEMPORARY
671 && inst_add
->U
.I
.SrcReg
[1].File
!= RC_FILE_CONSTANT
)
672 || src_has_const_swz(inst_add
->U
.I
.SrcReg
[1])) {
677 /* Setup the peephole_state information. */
679 s
.WriteMask
= inst_add
->U
.I
.DstReg
.WriteMask
;
681 if (presub_helper(c
, &s
, RC_PRESUB_INV
, presub_replace_inv
)) {
682 rc_remove_instruction(inst_add
);
690 * 0 if inst is still part of the program.
691 * 1 if inst is no longer part of the program.
693 static int peephole(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
695 switch(inst
->U
.I
.Opcode
){
698 if(peephole_add_presub_inv(c
, inst
))
700 if(peephole_add_presub_add(c
, inst
))
710 void rc_optimize(struct radeon_compiler
* c
, void *user
)
712 struct rc_instruction
* inst
= c
->Program
.Instructions
.Next
;
713 while(inst
!= &c
->Program
.Instructions
) {
714 struct rc_instruction
* cur
= inst
;
717 constant_folding(c
, cur
);
722 if (cur
->U
.I
.Opcode
== RC_OPCODE_MOV
) {
723 copy_propagate(c
, cur
);
724 /* cur may no longer be part of the program */