2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "radeon_dataflow.h"
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_list.h"
34 #include "radeon_swizzle.h"
35 #include "radeon_variable.h"
37 struct src_clobbered_reads_cb_data
{
38 rc_register_file File
;
41 struct rc_reader_data
* ReaderData
;
44 typedef void (*rc_presub_replace_fn
)(struct rc_instruction
*,
45 struct rc_instruction
*,
48 static struct rc_src_register
chain_srcregs(struct rc_src_register outer
, struct rc_src_register inner
)
50 struct rc_src_register combine
;
51 combine
.File
= inner
.File
;
52 combine
.Index
= inner
.Index
;
53 combine
.RelAddr
= inner
.RelAddr
;
56 combine
.Negate
= outer
.Negate
;
58 combine
.Abs
= inner
.Abs
;
59 combine
.Negate
= swizzle_mask(outer
.Swizzle
, inner
.Negate
);
60 combine
.Negate
^= outer
.Negate
;
62 combine
.Swizzle
= combine_swizzles(inner
.Swizzle
, outer
.Swizzle
);
66 static void copy_propagate_scan_read(void * data
, struct rc_instruction
* inst
,
67 struct rc_src_register
* src
)
69 rc_register_file file
= src
->File
;
70 struct rc_reader_data
* reader_data
= data
;
72 if(!rc_inst_can_use_presub(inst
,
73 reader_data
->Writer
->U
.I
.PreSub
.Opcode
,
74 rc_swizzle_to_writemask(src
->Swizzle
),
76 &reader_data
->Writer
->U
.I
.PreSub
.SrcReg
[0],
77 &reader_data
->Writer
->U
.I
.PreSub
.SrcReg
[1])) {
78 reader_data
->Abort
= 1;
82 /* XXX This could probably be handled better. */
83 if (file
== RC_FILE_ADDRESS
) {
84 reader_data
->Abort
= 1;
88 /* These instructions cannot read from the constants file.
89 * see radeonTransformTEX()
91 if(reader_data
->Writer
->U
.I
.SrcReg
[0].File
!= RC_FILE_TEMPORARY
&&
92 reader_data
->Writer
->U
.I
.SrcReg
[0].File
!= RC_FILE_INPUT
&&
93 (inst
->U
.I
.Opcode
== RC_OPCODE_TEX
||
94 inst
->U
.I
.Opcode
== RC_OPCODE_TXB
||
95 inst
->U
.I
.Opcode
== RC_OPCODE_TXP
||
96 inst
->U
.I
.Opcode
== RC_OPCODE_TXD
||
97 inst
->U
.I
.Opcode
== RC_OPCODE_TXL
||
98 inst
->U
.I
.Opcode
== RC_OPCODE_KIL
)){
99 reader_data
->Abort
= 1;
104 static void src_clobbered_reads_cb(
106 struct rc_instruction
* inst
,
107 struct rc_src_register
* src
)
109 struct src_clobbered_reads_cb_data
* sc_data
= data
;
111 if (src
->File
== sc_data
->File
112 && src
->Index
== sc_data
->Index
113 && (rc_swizzle_to_writemask(src
->Swizzle
) & sc_data
->Mask
)) {
115 sc_data
->ReaderData
->AbortOnRead
= RC_MASK_XYZW
;
118 if (src
->RelAddr
&& sc_data
->File
== RC_FILE_ADDRESS
) {
119 sc_data
->ReaderData
->AbortOnRead
= RC_MASK_XYZW
;
123 static void is_src_clobbered_scan_write(
125 struct rc_instruction
* inst
,
126 rc_register_file file
,
130 struct src_clobbered_reads_cb_data sc_data
;
131 struct rc_reader_data
* reader_data
= data
;
133 sc_data
.Index
= index
;
135 sc_data
.ReaderData
= reader_data
;
136 rc_for_all_reads_src(reader_data
->Writer
,
137 src_clobbered_reads_cb
, &sc_data
);
140 static void copy_propagate(struct radeon_compiler
* c
, struct rc_instruction
* inst_mov
)
142 struct rc_reader_data reader_data
;
145 if (inst_mov
->U
.I
.DstReg
.File
!= RC_FILE_TEMPORARY
||
146 inst_mov
->U
.I
.WriteALUResult
)
149 /* Get a list of all the readers of this MOV instruction. */
150 reader_data
.ExitOnAbort
= 1;
151 rc_get_readers(c
, inst_mov
, &reader_data
,
152 copy_propagate_scan_read
, NULL
,
153 is_src_clobbered_scan_write
);
155 if (reader_data
.Abort
|| reader_data
.ReaderCount
== 0)
158 /* We can propagate SaturateMode if all the readers are MOV instructions
159 * without a presubtract operation, source negation and absolute.
160 * In that case, we just move SaturateMode to all readers. */
161 if (inst_mov
->U
.I
.SaturateMode
) {
162 for (i
= 0; i
< reader_data
.ReaderCount
; i
++) {
163 struct rc_instruction
* inst
= reader_data
.Readers
[i
].Inst
;
165 if (inst
->U
.I
.Opcode
!= RC_OPCODE_MOV
||
166 inst
->U
.I
.SrcReg
[0].File
== RC_FILE_PRESUB
||
167 inst
->U
.I
.SrcReg
[0].Abs
||
168 inst
->U
.I
.SrcReg
[0].Negate
) {
174 /* Propagate the MOV instruction. */
175 for (i
= 0; i
< reader_data
.ReaderCount
; i
++) {
176 struct rc_instruction
* inst
= reader_data
.Readers
[i
].Inst
;
177 *reader_data
.Readers
[i
].U
.I
.Src
= chain_srcregs(*reader_data
.Readers
[i
].U
.I
.Src
, inst_mov
->U
.I
.SrcReg
[0]);
179 if (inst_mov
->U
.I
.SrcReg
[0].File
== RC_FILE_PRESUB
)
180 inst
->U
.I
.PreSub
= inst_mov
->U
.I
.PreSub
;
181 if (!inst
->U
.I
.SaturateMode
)
182 inst
->U
.I
.SaturateMode
= inst_mov
->U
.I
.SaturateMode
;
185 /* Finally, remove the original MOV instruction */
186 rc_remove_instruction(inst_mov
);
190 * Check if a source register is actually always the same
193 static int is_src_uniform_constant(struct rc_src_register src
,
194 rc_swizzle
* pswz
, unsigned int * pnegate
)
198 if (src
.File
!= RC_FILE_NONE
) {
203 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
204 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
209 if (swz
== RC_SWIZZLE_UNUSED
)
214 *pnegate
= GET_BIT(src
.Negate
, chan
);
217 if (swz
!= *pswz
|| *pnegate
!= GET_BIT(src
.Negate
, chan
)) {
227 static void constant_folding_mad(struct rc_instruction
* inst
)
230 unsigned int negate
= 0;
232 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[2], &swz
, &negate
)) {
233 if (swz
== RC_SWIZZLE_ZERO
) {
234 inst
->U
.I
.Opcode
= RC_OPCODE_MUL
;
239 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
240 if (swz
== RC_SWIZZLE_ONE
) {
241 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
243 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
244 inst
->U
.I
.SrcReg
[1] = inst
->U
.I
.SrcReg
[2];
246 } else if (swz
== RC_SWIZZLE_ZERO
) {
247 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
248 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
253 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
254 if (swz
== RC_SWIZZLE_ONE
) {
255 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
257 inst
->U
.I
.SrcReg
[1].Negate
^= RC_MASK_XYZW
;
258 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
260 } else if (swz
== RC_SWIZZLE_ZERO
) {
261 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
262 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
268 static void constant_folding_mul(struct rc_instruction
* inst
)
271 unsigned int negate
= 0;
273 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
274 if (swz
== RC_SWIZZLE_ONE
) {
275 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
276 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
278 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
280 } else if (swz
== RC_SWIZZLE_ZERO
) {
281 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
282 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
287 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
288 if (swz
== RC_SWIZZLE_ONE
) {
289 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
291 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
293 } else if (swz
== RC_SWIZZLE_ZERO
) {
294 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
295 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
301 static void constant_folding_add(struct rc_instruction
* inst
)
304 unsigned int negate
= 0;
306 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
307 if (swz
== RC_SWIZZLE_ZERO
) {
308 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
309 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
314 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
315 if (swz
== RC_SWIZZLE_ZERO
) {
316 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
323 * Replace 0.0, 1.0 and 0.5 immediate constants by their
324 * respective swizzles. Simplify instructions like ADD dst, src, 0;
326 static void constant_folding(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
328 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
331 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
332 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
333 struct rc_constant
* constant
;
334 struct rc_src_register newsrc
;
335 int have_real_reference
;
338 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
339 for (chan
= 0; chan
< 4; ++chan
)
340 if (GET_SWZ(inst
->U
.I
.SrcReg
[src
].Swizzle
, chan
) <= 3)
343 inst
->U
.I
.SrcReg
[src
].File
= RC_FILE_NONE
;
347 /* Convert immediates to swizzles. */
348 if (inst
->U
.I
.SrcReg
[src
].File
!= RC_FILE_CONSTANT
||
349 inst
->U
.I
.SrcReg
[src
].RelAddr
||
350 inst
->U
.I
.SrcReg
[src
].Index
>= c
->Program
.Constants
.Count
)
354 &c
->Program
.Constants
.Constants
[inst
->U
.I
.SrcReg
[src
].Index
];
356 if (constant
->Type
!= RC_CONSTANT_IMMEDIATE
)
359 newsrc
= inst
->U
.I
.SrcReg
[src
];
360 have_real_reference
= 0;
361 for (chan
= 0; chan
< 4; ++chan
) {
362 unsigned int swz
= GET_SWZ(newsrc
.Swizzle
, chan
);
370 imm
= constant
->u
.Immediate
[swz
];
375 if (baseimm
== 0.0) {
376 newswz
= RC_SWIZZLE_ZERO
;
377 } else if (baseimm
== 1.0) {
378 newswz
= RC_SWIZZLE_ONE
;
379 } else if (baseimm
== 0.5 && c
->has_half_swizzles
) {
380 newswz
= RC_SWIZZLE_HALF
;
382 have_real_reference
= 1;
386 SET_SWZ(newsrc
.Swizzle
, chan
, newswz
);
387 if (imm
< 0.0 && !newsrc
.Abs
)
388 newsrc
.Negate
^= 1 << chan
;
391 if (!have_real_reference
) {
392 newsrc
.File
= RC_FILE_NONE
;
396 /* don't make the swizzle worse */
397 if (!c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, newsrc
) &&
398 c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, inst
->U
.I
.SrcReg
[src
]))
401 inst
->U
.I
.SrcReg
[src
] = newsrc
;
404 /* Simplify instructions based on constants */
405 if (inst
->U
.I
.Opcode
== RC_OPCODE_MAD
)
406 constant_folding_mad(inst
);
408 /* note: MAD can simplify to MUL or ADD */
409 if (inst
->U
.I
.Opcode
== RC_OPCODE_MUL
)
410 constant_folding_mul(inst
);
411 else if (inst
->U
.I
.Opcode
== RC_OPCODE_ADD
)
412 constant_folding_add(inst
);
414 /* In case this instruction has been converted, make sure all of the
415 * registers that are no longer used are empty. */
416 opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
417 for(i
= opcode
->NumSrcRegs
; i
< 3; i
++) {
418 memset(&inst
->U
.I
.SrcReg
[i
], 0, sizeof(struct rc_src_register
));
423 * If src and dst use the same register, this function returns a writemask that
424 * indicates wich components are read by src. Otherwise zero is returned.
426 static unsigned int src_reads_dst_mask(struct rc_src_register src
,
427 struct rc_dst_register dst
)
429 if (dst
.File
!= src
.File
|| dst
.Index
!= src
.Index
) {
432 return rc_swizzle_to_writemask(src
.Swizzle
);
435 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
436 * in any of its channels. Return 0 otherwise. */
437 static int src_has_const_swz(struct rc_src_register src
) {
439 for(chan
= 0; chan
< 4; chan
++) {
440 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
441 if (swz
== RC_SWIZZLE_ZERO
|| swz
== RC_SWIZZLE_HALF
442 || swz
== RC_SWIZZLE_ONE
) {
449 static void presub_scan_read(
451 struct rc_instruction
* inst
,
452 struct rc_src_register
* src
)
454 struct rc_reader_data
* reader_data
= data
;
455 rc_presubtract_op
* presub_opcode
= reader_data
->CbData
;
457 if (!rc_inst_can_use_presub(inst
, *presub_opcode
,
458 reader_data
->Writer
->U
.I
.DstReg
.WriteMask
,
460 &reader_data
->Writer
->U
.I
.SrcReg
[0],
461 &reader_data
->Writer
->U
.I
.SrcReg
[1])) {
462 reader_data
->Abort
= 1;
467 static int presub_helper(
468 struct radeon_compiler
* c
,
469 struct rc_instruction
* inst_add
,
470 rc_presubtract_op presub_opcode
,
471 rc_presub_replace_fn presub_replace
)
473 struct rc_reader_data reader_data
;
475 rc_presubtract_op cb_op
= presub_opcode
;
477 reader_data
.CbData
= &cb_op
;
478 reader_data
.ExitOnAbort
= 1;
479 rc_get_readers(c
, inst_add
, &reader_data
, presub_scan_read
, NULL
,
480 is_src_clobbered_scan_write
);
482 if (reader_data
.Abort
|| reader_data
.ReaderCount
== 0)
485 for(i
= 0; i
< reader_data
.ReaderCount
; i
++) {
486 unsigned int src_index
;
487 struct rc_reader reader
= reader_data
.Readers
[i
];
488 const struct rc_opcode_info
* info
=
489 rc_get_opcode_info(reader
.Inst
->U
.I
.Opcode
);
491 for (src_index
= 0; src_index
< info
->NumSrcRegs
; src_index
++) {
492 if (&reader
.Inst
->U
.I
.SrcReg
[src_index
] == reader
.U
.I
.Src
)
493 presub_replace(inst_add
, reader
.Inst
, src_index
);
499 /* This function assumes that inst_add->U.I.SrcReg[0] and
500 * inst_add->U.I.SrcReg[1] aren't both negative. */
501 static void presub_replace_add(
502 struct rc_instruction
* inst_add
,
503 struct rc_instruction
* inst_reader
,
504 unsigned int src_index
)
506 rc_presubtract_op presub_opcode
;
507 if (inst_add
->U
.I
.SrcReg
[1].Negate
|| inst_add
->U
.I
.SrcReg
[0].Negate
)
508 presub_opcode
= RC_PRESUB_SUB
;
510 presub_opcode
= RC_PRESUB_ADD
;
512 if (inst_add
->U
.I
.SrcReg
[1].Negate
) {
513 inst_reader
->U
.I
.PreSub
.SrcReg
[0] = inst_add
->U
.I
.SrcReg
[1];
514 inst_reader
->U
.I
.PreSub
.SrcReg
[1] = inst_add
->U
.I
.SrcReg
[0];
516 inst_reader
->U
.I
.PreSub
.SrcReg
[0] = inst_add
->U
.I
.SrcReg
[0];
517 inst_reader
->U
.I
.PreSub
.SrcReg
[1] = inst_add
->U
.I
.SrcReg
[1];
519 inst_reader
->U
.I
.PreSub
.SrcReg
[0].Negate
= 0;
520 inst_reader
->U
.I
.PreSub
.SrcReg
[1].Negate
= 0;
521 inst_reader
->U
.I
.PreSub
.Opcode
= presub_opcode
;
522 inst_reader
->U
.I
.SrcReg
[src_index
] =
523 chain_srcregs(inst_reader
->U
.I
.SrcReg
[src_index
],
524 inst_reader
->U
.I
.PreSub
.SrcReg
[0]);
525 inst_reader
->U
.I
.SrcReg
[src_index
].File
= RC_FILE_PRESUB
;
526 inst_reader
->U
.I
.SrcReg
[src_index
].Index
= presub_opcode
;
529 static int is_presub_candidate(
530 struct radeon_compiler
* c
,
531 struct rc_instruction
* inst
)
533 const struct rc_opcode_info
* info
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
535 unsigned int is_constant
[2] = {0, 0};
537 assert(inst
->U
.I
.Opcode
== RC_OPCODE_ADD
);
539 if (inst
->U
.I
.PreSub
.Opcode
!= RC_PRESUB_NONE
540 || inst
->U
.I
.SaturateMode
541 || inst
->U
.I
.WriteALUResult
546 /* If both sources use a constant swizzle, then we can't convert it to
547 * a presubtract operation. In fact for the ADD and SUB presubtract
548 * operations neither source can contain a constant swizzle. This
549 * specific case is checked in peephole_add_presub_add() when
550 * we make sure the swizzles for both sources are equal, so we
551 * don't need to worry about it here. */
552 for (i
= 0; i
< 2; i
++) {
554 for (chan
= 0; chan
< 4; chan
++) {
556 get_swz(inst
->U
.I
.SrcReg
[i
].Swizzle
, chan
);
557 if (swz
== RC_SWIZZLE_ONE
558 || swz
== RC_SWIZZLE_ZERO
559 || swz
== RC_SWIZZLE_HALF
) {
564 if (is_constant
[0] && is_constant
[1])
567 for(i
= 0; i
< info
->NumSrcRegs
; i
++) {
568 struct rc_src_register src
= inst
->U
.I
.SrcReg
[i
];
569 if (src_reads_dst_mask(src
, inst
->U
.I
.DstReg
))
572 src
.File
= RC_FILE_PRESUB
;
573 if (!c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, src
))
579 static int peephole_add_presub_add(
580 struct radeon_compiler
* c
,
581 struct rc_instruction
* inst_add
)
583 unsigned dstmask
= inst_add
->U
.I
.DstReg
.WriteMask
;
584 unsigned src0_neg
= inst_add
->U
.I
.SrcReg
[0].Negate
& dstmask
;
585 unsigned src1_neg
= inst_add
->U
.I
.SrcReg
[1].Negate
& dstmask
;
587 if (inst_add
->U
.I
.SrcReg
[0].Swizzle
!= inst_add
->U
.I
.SrcReg
[1].Swizzle
)
590 /* src0 and src1 can't have absolute values */
591 if (inst_add
->U
.I
.SrcReg
[0].Abs
|| inst_add
->U
.I
.SrcReg
[1].Abs
)
594 /* presub_replace_add() assumes only one is negative */
595 if (inst_add
->U
.I
.SrcReg
[0].Negate
&& inst_add
->U
.I
.SrcReg
[1].Negate
)
598 /* if src0 is negative, at least all bits of dstmask have to be set */
599 if (inst_add
->U
.I
.SrcReg
[0].Negate
&& src0_neg
!= dstmask
)
602 /* if src1 is negative, at least all bits of dstmask have to be set */
603 if (inst_add
->U
.I
.SrcReg
[1].Negate
&& src1_neg
!= dstmask
)
606 if (!is_presub_candidate(c
, inst_add
))
609 if (presub_helper(c
, inst_add
, RC_PRESUB_ADD
, presub_replace_add
)) {
610 rc_remove_instruction(inst_add
);
616 static void presub_replace_inv(
617 struct rc_instruction
* inst_add
,
618 struct rc_instruction
* inst_reader
,
619 unsigned int src_index
)
621 /* We must be careful not to modify inst_add, since it
622 * is possible it will remain part of the program.*/
623 inst_reader
->U
.I
.PreSub
.SrcReg
[0] = inst_add
->U
.I
.SrcReg
[1];
624 inst_reader
->U
.I
.PreSub
.SrcReg
[0].Negate
= 0;
625 inst_reader
->U
.I
.PreSub
.Opcode
= RC_PRESUB_INV
;
626 inst_reader
->U
.I
.SrcReg
[src_index
] = chain_srcregs(inst_reader
->U
.I
.SrcReg
[src_index
],
627 inst_reader
->U
.I
.PreSub
.SrcReg
[0]);
629 inst_reader
->U
.I
.SrcReg
[src_index
].File
= RC_FILE_PRESUB
;
630 inst_reader
->U
.I
.SrcReg
[src_index
].Index
= RC_PRESUB_INV
;
634 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
635 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
636 * of the add instruction must have the constatnt 1 swizzle. This function
637 * does not check const registers to see if their value is 1.0, so it should
638 * be called after the constant_folding optimization.
640 * 0 if the ADD instruction is still part of the program.
641 * 1 if the ADD instruction is no longer part of the program.
643 static int peephole_add_presub_inv(
644 struct radeon_compiler
* c
,
645 struct rc_instruction
* inst_add
)
649 if (!is_presub_candidate(c
, inst_add
))
652 /* Check if src0 is 1. */
653 /* XXX It would be nice to use is_src_uniform_constant here, but that
654 * function only works if the register's file is RC_FILE_NONE */
655 for(i
= 0; i
< 4; i
++ ) {
656 swz
= GET_SWZ(inst_add
->U
.I
.SrcReg
[0].Swizzle
, i
);
657 if(((1 << i
) & inst_add
->U
.I
.DstReg
.WriteMask
)
658 && swz
!= RC_SWIZZLE_ONE
) {
664 if ((inst_add
->U
.I
.SrcReg
[1].Negate
& inst_add
->U
.I
.DstReg
.WriteMask
) !=
665 inst_add
->U
.I
.DstReg
.WriteMask
666 || inst_add
->U
.I
.SrcReg
[1].Abs
667 || (inst_add
->U
.I
.SrcReg
[1].File
!= RC_FILE_TEMPORARY
668 && inst_add
->U
.I
.SrcReg
[1].File
!= RC_FILE_CONSTANT
)
669 || src_has_const_swz(inst_add
->U
.I
.SrcReg
[1])) {
674 if (presub_helper(c
, inst_add
, RC_PRESUB_INV
, presub_replace_inv
)) {
675 rc_remove_instruction(inst_add
);
681 struct peephole_mul_cb_data
{
682 struct rc_dst_register
* Writer
;
683 unsigned int Clobbered
;
686 static void omod_filter_reader_cb(
688 struct rc_instruction
* inst
,
689 rc_register_file file
,
693 struct peephole_mul_cb_data
* d
= userdata
;
694 if (rc_src_reads_dst_mask(file
, mask
, index
,
695 d
->Writer
->File
, d
->Writer
->Index
, d
->Writer
->WriteMask
)) {
701 static void omod_filter_writer_cb(
703 struct rc_instruction
* inst
,
704 rc_register_file file
,
708 struct peephole_mul_cb_data
* d
= userdata
;
709 if (file
== d
->Writer
->File
&& index
== d
->Writer
->Index
&&
710 (mask
& d
->Writer
->WriteMask
)) {
715 static int peephole_mul_omod(
716 struct radeon_compiler
* c
,
717 struct rc_instruction
* inst_mul
,
718 struct rc_list
* var_list
)
720 unsigned int chan
= 0, swz
, i
;
721 int const_index
= -1;
724 rc_omod_op omod_op
= RC_OMOD_DISABLE
;
725 struct rc_list
* writer_list
;
726 struct rc_variable
* var
;
727 struct peephole_mul_cb_data cb_data
;
728 unsigned writemask_sum
;
730 for (i
= 0; i
< 2; i
++) {
732 if (inst_mul
->U
.I
.SrcReg
[i
].File
!= RC_FILE_CONSTANT
733 && inst_mul
->U
.I
.SrcReg
[i
].File
!= RC_FILE_TEMPORARY
) {
736 if (inst_mul
->U
.I
.SrcReg
[i
].File
== RC_FILE_TEMPORARY
) {
737 if (temp_index
!= -1) {
738 /* The instruction has two temp sources */
745 /* If we get this far Src[i] must be a constant src */
746 if (inst_mul
->U
.I
.SrcReg
[i
].Negate
) {
749 /* The constant src needs to read from the same swizzle */
750 swz
= RC_SWIZZLE_UNUSED
;
752 for (j
= 0; j
< 4; j
++) {
754 GET_SWZ(inst_mul
->U
.I
.SrcReg
[i
].Swizzle
, j
);
755 if (j_swz
== RC_SWIZZLE_UNUSED
) {
758 if (swz
== RC_SWIZZLE_UNUSED
) {
761 } else if (j_swz
!= swz
) {
766 if (const_index
!= -1) {
767 /* The instruction has two constant sources */
774 if (!rc_src_reg_is_immediate(c
, inst_mul
->U
.I
.SrcReg
[const_index
].File
,
775 inst_mul
->U
.I
.SrcReg
[const_index
].Index
)) {
778 const_value
= rc_get_constant_value(c
,
779 inst_mul
->U
.I
.SrcReg
[const_index
].Index
,
780 inst_mul
->U
.I
.SrcReg
[const_index
].Swizzle
,
781 inst_mul
->U
.I
.SrcReg
[const_index
].Negate
,
784 if (const_value
== 2.0f
) {
785 omod_op
= RC_OMOD_MUL_2
;
786 } else if (const_value
== 4.0f
) {
787 omod_op
= RC_OMOD_MUL_4
;
788 } else if (const_value
== 8.0f
) {
789 omod_op
= RC_OMOD_MUL_8
;
790 } else if (const_value
== (1.0f
/ 2.0f
)) {
791 omod_op
= RC_OMOD_DIV_2
;
792 } else if (const_value
== (1.0f
/ 4.0f
)) {
793 omod_op
= RC_OMOD_DIV_4
;
794 } else if (const_value
== (1.0f
/ 8.0f
)) {
795 omod_op
= RC_OMOD_DIV_8
;
800 writer_list
= rc_variable_list_get_writers_one_reader(var_list
,
801 RC_INSTRUCTION_NORMAL
, &inst_mul
->U
.I
.SrcReg
[temp_index
]);
807 cb_data
.Clobbered
= 0;
808 cb_data
.Writer
= &inst_mul
->U
.I
.DstReg
;
809 for (var
= writer_list
->Item
; var
; var
= var
->Friend
) {
810 struct rc_instruction
* inst
;
811 const struct rc_opcode_info
* info
= rc_get_opcode_info(
812 var
->Inst
->U
.I
.Opcode
);
813 if (info
->HasTexture
) {
816 if (var
->Inst
->U
.I
.SaturateMode
!= RC_SATURATE_NONE
) {
819 for (inst
= inst_mul
->Prev
; inst
!= var
->Inst
;
821 rc_for_all_reads_mask(inst
, omod_filter_reader_cb
,
823 rc_for_all_writes_mask(inst
, omod_filter_writer_cb
,
825 if (cb_data
.Clobbered
) {
831 if (cb_data
.Clobbered
) {
835 /* Rewrite the instructions */
836 writemask_sum
= rc_variable_writemask_sum(writer_list
->Item
);
837 for (var
= writer_list
->Item
; var
; var
= var
->Friend
) {
838 struct rc_variable
* writer
= var
;
839 unsigned conversion_swizzle
= rc_make_conversion_swizzle(
841 inst_mul
->U
.I
.DstReg
.WriteMask
);
842 writer
->Inst
->U
.I
.Omod
= omod_op
;
843 writer
->Inst
->U
.I
.DstReg
.File
= inst_mul
->U
.I
.DstReg
.File
;
844 writer
->Inst
->U
.I
.DstReg
.Index
= inst_mul
->U
.I
.DstReg
.Index
;
845 rc_normal_rewrite_writemask(writer
->Inst
, conversion_swizzle
);
846 writer
->Inst
->U
.I
.SaturateMode
= inst_mul
->U
.I
.SaturateMode
;
849 rc_remove_instruction(inst_mul
);
856 * 0 if inst is still part of the program.
857 * 1 if inst is no longer part of the program.
859 static int peephole(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
861 switch(inst
->U
.I
.Opcode
){
864 if(peephole_add_presub_inv(c
, inst
))
866 if(peephole_add_presub_add(c
, inst
))
876 void rc_optimize(struct radeon_compiler
* c
, void *user
)
878 struct rc_instruction
* inst
= c
->Program
.Instructions
.Next
;
879 struct rc_list
* var_list
;
880 while(inst
!= &c
->Program
.Instructions
) {
881 struct rc_instruction
* cur
= inst
;
884 constant_folding(c
, cur
);
889 if (cur
->U
.I
.Opcode
== RC_OPCODE_MOV
) {
890 copy_propagate(c
, cur
);
891 /* cur may no longer be part of the program */
899 inst
= c
->Program
.Instructions
.Next
;
900 while(inst
!= &c
->Program
.Instructions
) {
901 struct rc_instruction
* cur
= inst
;
903 if (cur
->U
.I
.Opcode
== RC_OPCODE_MUL
) {
904 var_list
= rc_get_variables(c
);
905 peephole_mul_omod(c
, cur
, var_list
);