2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "radeon_dataflow.h"
31 #include "radeon_compiler.h"
32 #include "radeon_compiler_util.h"
33 #include "radeon_list.h"
34 #include "radeon_swizzle.h"
35 #include "radeon_variable.h"
37 struct src_clobbered_reads_cb_data
{
38 rc_register_file File
;
41 struct rc_reader_data
* ReaderData
;
44 typedef void (*rc_presub_replace_fn
)(struct rc_instruction
*,
45 struct rc_instruction
*,
48 static struct rc_src_register
chain_srcregs(struct rc_src_register outer
, struct rc_src_register inner
)
50 struct rc_src_register combine
;
51 combine
.File
= inner
.File
;
52 combine
.Index
= inner
.Index
;
53 combine
.RelAddr
= inner
.RelAddr
;
56 combine
.Negate
= outer
.Negate
;
58 combine
.Abs
= inner
.Abs
;
59 combine
.Negate
= swizzle_mask(outer
.Swizzle
, inner
.Negate
);
60 combine
.Negate
^= outer
.Negate
;
62 combine
.Swizzle
= combine_swizzles(inner
.Swizzle
, outer
.Swizzle
);
66 static void copy_propagate_scan_read(void * data
, struct rc_instruction
* inst
,
67 struct rc_src_register
* src
)
69 rc_register_file file
= src
->File
;
70 struct rc_reader_data
* reader_data
= data
;
72 if(!rc_inst_can_use_presub(inst
,
73 reader_data
->Writer
->U
.I
.PreSub
.Opcode
,
74 rc_swizzle_to_writemask(src
->Swizzle
),
76 &reader_data
->Writer
->U
.I
.PreSub
.SrcReg
[0],
77 &reader_data
->Writer
->U
.I
.PreSub
.SrcReg
[1])) {
78 reader_data
->Abort
= 1;
82 /* XXX This could probably be handled better. */
83 if (file
== RC_FILE_ADDRESS
) {
84 reader_data
->Abort
= 1;
88 /* These instructions cannot read from the constants file.
89 * see radeonTransformTEX()
91 if(reader_data
->Writer
->U
.I
.SrcReg
[0].File
!= RC_FILE_TEMPORARY
&&
92 reader_data
->Writer
->U
.I
.SrcReg
[0].File
!= RC_FILE_INPUT
&&
93 (inst
->U
.I
.Opcode
== RC_OPCODE_TEX
||
94 inst
->U
.I
.Opcode
== RC_OPCODE_TXB
||
95 inst
->U
.I
.Opcode
== RC_OPCODE_TXP
||
96 inst
->U
.I
.Opcode
== RC_OPCODE_TXD
||
97 inst
->U
.I
.Opcode
== RC_OPCODE_TXL
||
98 inst
->U
.I
.Opcode
== RC_OPCODE_KIL
)){
99 reader_data
->Abort
= 1;
104 static void src_clobbered_reads_cb(
106 struct rc_instruction
* inst
,
107 struct rc_src_register
* src
)
109 struct src_clobbered_reads_cb_data
* sc_data
= data
;
111 if (src
->File
== sc_data
->File
112 && src
->Index
== sc_data
->Index
113 && (rc_swizzle_to_writemask(src
->Swizzle
) & sc_data
->Mask
)) {
115 sc_data
->ReaderData
->AbortOnRead
= RC_MASK_XYZW
;
118 if (src
->RelAddr
&& sc_data
->File
== RC_FILE_ADDRESS
) {
119 sc_data
->ReaderData
->AbortOnRead
= RC_MASK_XYZW
;
123 static void is_src_clobbered_scan_write(
125 struct rc_instruction
* inst
,
126 rc_register_file file
,
130 struct src_clobbered_reads_cb_data sc_data
;
131 struct rc_reader_data
* reader_data
= data
;
133 sc_data
.Index
= index
;
135 sc_data
.ReaderData
= reader_data
;
136 rc_for_all_reads_src(reader_data
->Writer
,
137 src_clobbered_reads_cb
, &sc_data
);
140 static void copy_propagate(struct radeon_compiler
* c
, struct rc_instruction
* inst_mov
)
142 struct rc_reader_data reader_data
;
145 if (inst_mov
->U
.I
.DstReg
.File
!= RC_FILE_TEMPORARY
||
146 inst_mov
->U
.I
.WriteALUResult
||
147 inst_mov
->U
.I
.SaturateMode
)
150 /* Get a list of all the readers of this MOV instruction. */
151 reader_data
.ExitOnAbort
= 1;
152 rc_get_readers(c
, inst_mov
, &reader_data
,
153 copy_propagate_scan_read
, NULL
,
154 is_src_clobbered_scan_write
);
156 if (reader_data
.Abort
|| reader_data
.ReaderCount
== 0)
159 /* Propagate the MOV instruction. */
160 for (i
= 0; i
< reader_data
.ReaderCount
; i
++) {
161 struct rc_instruction
* inst
= reader_data
.Readers
[i
].Inst
;
162 *reader_data
.Readers
[i
].U
.I
.Src
= chain_srcregs(*reader_data
.Readers
[i
].U
.I
.Src
, inst_mov
->U
.I
.SrcReg
[0]);
164 if (inst_mov
->U
.I
.SrcReg
[0].File
== RC_FILE_PRESUB
)
165 inst
->U
.I
.PreSub
= inst_mov
->U
.I
.PreSub
;
168 /* Finally, remove the original MOV instruction */
169 rc_remove_instruction(inst_mov
);
173 * Check if a source register is actually always the same
176 static int is_src_uniform_constant(struct rc_src_register src
,
177 rc_swizzle
* pswz
, unsigned int * pnegate
)
181 if (src
.File
!= RC_FILE_NONE
) {
186 for(unsigned int chan
= 0; chan
< 4; ++chan
) {
187 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
192 if (swz
== RC_SWIZZLE_UNUSED
)
197 *pnegate
= GET_BIT(src
.Negate
, chan
);
200 if (swz
!= *pswz
|| *pnegate
!= GET_BIT(src
.Negate
, chan
)) {
210 static void constant_folding_mad(struct rc_instruction
* inst
)
213 unsigned int negate
= 0;
215 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[2], &swz
, &negate
)) {
216 if (swz
== RC_SWIZZLE_ZERO
) {
217 inst
->U
.I
.Opcode
= RC_OPCODE_MUL
;
222 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
223 if (swz
== RC_SWIZZLE_ONE
) {
224 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
226 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
227 inst
->U
.I
.SrcReg
[1] = inst
->U
.I
.SrcReg
[2];
229 } else if (swz
== RC_SWIZZLE_ZERO
) {
230 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
231 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
236 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
237 if (swz
== RC_SWIZZLE_ONE
) {
238 inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
240 inst
->U
.I
.SrcReg
[1].Negate
^= RC_MASK_XYZW
;
241 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
243 } else if (swz
== RC_SWIZZLE_ZERO
) {
244 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
245 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[2];
251 static void constant_folding_mul(struct rc_instruction
* inst
)
254 unsigned int negate
= 0;
256 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
257 if (swz
== RC_SWIZZLE_ONE
) {
258 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
259 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
261 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
263 } else if (swz
== RC_SWIZZLE_ZERO
) {
264 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
265 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
270 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
271 if (swz
== RC_SWIZZLE_ONE
) {
272 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
274 inst
->U
.I
.SrcReg
[0].Negate
^= RC_MASK_XYZW
;
276 } else if (swz
== RC_SWIZZLE_ZERO
) {
277 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
278 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
284 static void constant_folding_add(struct rc_instruction
* inst
)
287 unsigned int negate
= 0;
289 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[0], &swz
, &negate
)) {
290 if (swz
== RC_SWIZZLE_ZERO
) {
291 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
292 inst
->U
.I
.SrcReg
[0] = inst
->U
.I
.SrcReg
[1];
297 if (is_src_uniform_constant(inst
->U
.I
.SrcReg
[1], &swz
, &negate
)) {
298 if (swz
== RC_SWIZZLE_ZERO
) {
299 inst
->U
.I
.Opcode
= RC_OPCODE_MOV
;
306 * Replace 0.0, 1.0 and 0.5 immediate constants by their
307 * respective swizzles. Simplify instructions like ADD dst, src, 0;
309 static void constant_folding(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
311 const struct rc_opcode_info
* opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
314 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
315 for(unsigned int src
= 0; src
< opcode
->NumSrcRegs
; ++src
) {
316 struct rc_constant
* constant
;
317 struct rc_src_register newsrc
;
318 int have_real_reference
;
321 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
322 for (chan
= 0; chan
< 4; ++chan
)
323 if (GET_SWZ(inst
->U
.I
.SrcReg
[src
].Swizzle
, chan
) <= 3)
326 inst
->U
.I
.SrcReg
[src
].File
= RC_FILE_NONE
;
330 /* Convert immediates to swizzles. */
331 if (inst
->U
.I
.SrcReg
[src
].File
!= RC_FILE_CONSTANT
||
332 inst
->U
.I
.SrcReg
[src
].RelAddr
||
333 inst
->U
.I
.SrcReg
[src
].Index
>= c
->Program
.Constants
.Count
)
337 &c
->Program
.Constants
.Constants
[inst
->U
.I
.SrcReg
[src
].Index
];
339 if (constant
->Type
!= RC_CONSTANT_IMMEDIATE
)
342 newsrc
= inst
->U
.I
.SrcReg
[src
];
343 have_real_reference
= 0;
344 for (chan
= 0; chan
< 4; ++chan
) {
345 unsigned int swz
= GET_SWZ(newsrc
.Swizzle
, chan
);
353 imm
= constant
->u
.Immediate
[swz
];
358 if (baseimm
== 0.0) {
359 newswz
= RC_SWIZZLE_ZERO
;
360 } else if (baseimm
== 1.0) {
361 newswz
= RC_SWIZZLE_ONE
;
362 } else if (baseimm
== 0.5 && c
->has_half_swizzles
) {
363 newswz
= RC_SWIZZLE_HALF
;
365 have_real_reference
= 1;
369 SET_SWZ(newsrc
.Swizzle
, chan
, newswz
);
370 if (imm
< 0.0 && !newsrc
.Abs
)
371 newsrc
.Negate
^= 1 << chan
;
374 if (!have_real_reference
) {
375 newsrc
.File
= RC_FILE_NONE
;
379 /* don't make the swizzle worse */
380 if (!c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, newsrc
) &&
381 c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, inst
->U
.I
.SrcReg
[src
]))
384 inst
->U
.I
.SrcReg
[src
] = newsrc
;
387 /* Simplify instructions based on constants */
388 if (inst
->U
.I
.Opcode
== RC_OPCODE_MAD
)
389 constant_folding_mad(inst
);
391 /* note: MAD can simplify to MUL or ADD */
392 if (inst
->U
.I
.Opcode
== RC_OPCODE_MUL
)
393 constant_folding_mul(inst
);
394 else if (inst
->U
.I
.Opcode
== RC_OPCODE_ADD
)
395 constant_folding_add(inst
);
397 /* In case this instruction has been converted, make sure all of the
398 * registers that are no longer used are empty. */
399 opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
400 for(i
= opcode
->NumSrcRegs
; i
< 3; i
++) {
401 memset(&inst
->U
.I
.SrcReg
[i
], 0, sizeof(struct rc_src_register
));
406 * If src and dst use the same register, this function returns a writemask that
407 * indicates wich components are read by src. Otherwise zero is returned.
409 static unsigned int src_reads_dst_mask(struct rc_src_register src
,
410 struct rc_dst_register dst
)
412 if (dst
.File
!= src
.File
|| dst
.Index
!= src
.Index
) {
415 return rc_swizzle_to_writemask(src
.Swizzle
);
418 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
419 * in any of its channels. Return 0 otherwise. */
420 static int src_has_const_swz(struct rc_src_register src
) {
422 for(chan
= 0; chan
< 4; chan
++) {
423 unsigned int swz
= GET_SWZ(src
.Swizzle
, chan
);
424 if (swz
== RC_SWIZZLE_ZERO
|| swz
== RC_SWIZZLE_HALF
425 || swz
== RC_SWIZZLE_ONE
) {
432 static void presub_scan_read(
434 struct rc_instruction
* inst
,
435 struct rc_src_register
* src
)
437 struct rc_reader_data
* reader_data
= data
;
438 rc_presubtract_op
* presub_opcode
= reader_data
->CbData
;
440 if (!rc_inst_can_use_presub(inst
, *presub_opcode
,
441 reader_data
->Writer
->U
.I
.DstReg
.WriteMask
,
443 &reader_data
->Writer
->U
.I
.SrcReg
[0],
444 &reader_data
->Writer
->U
.I
.SrcReg
[1])) {
445 reader_data
->Abort
= 1;
450 static int presub_helper(
451 struct radeon_compiler
* c
,
452 struct rc_instruction
* inst_add
,
453 rc_presubtract_op presub_opcode
,
454 rc_presub_replace_fn presub_replace
)
456 struct rc_reader_data reader_data
;
458 rc_presubtract_op cb_op
= presub_opcode
;
460 reader_data
.CbData
= &cb_op
;
461 reader_data
.ExitOnAbort
= 1;
462 rc_get_readers(c
, inst_add
, &reader_data
, presub_scan_read
, NULL
,
463 is_src_clobbered_scan_write
);
465 if (reader_data
.Abort
|| reader_data
.ReaderCount
== 0)
468 for(i
= 0; i
< reader_data
.ReaderCount
; i
++) {
469 unsigned int src_index
;
470 struct rc_reader reader
= reader_data
.Readers
[i
];
471 const struct rc_opcode_info
* info
=
472 rc_get_opcode_info(reader
.Inst
->U
.I
.Opcode
);
474 for (src_index
= 0; src_index
< info
->NumSrcRegs
; src_index
++) {
475 if (&reader
.Inst
->U
.I
.SrcReg
[src_index
] == reader
.U
.I
.Src
)
476 presub_replace(inst_add
, reader
.Inst
, src_index
);
482 /* This function assumes that inst_add->U.I.SrcReg[0] and
483 * inst_add->U.I.SrcReg[1] aren't both negative. */
484 static void presub_replace_add(
485 struct rc_instruction
* inst_add
,
486 struct rc_instruction
* inst_reader
,
487 unsigned int src_index
)
489 rc_presubtract_op presub_opcode
;
490 if (inst_add
->U
.I
.SrcReg
[1].Negate
|| inst_add
->U
.I
.SrcReg
[0].Negate
)
491 presub_opcode
= RC_PRESUB_SUB
;
493 presub_opcode
= RC_PRESUB_ADD
;
495 if (inst_add
->U
.I
.SrcReg
[1].Negate
) {
496 inst_reader
->U
.I
.PreSub
.SrcReg
[0] = inst_add
->U
.I
.SrcReg
[1];
497 inst_reader
->U
.I
.PreSub
.SrcReg
[1] = inst_add
->U
.I
.SrcReg
[0];
499 inst_reader
->U
.I
.PreSub
.SrcReg
[0] = inst_add
->U
.I
.SrcReg
[0];
500 inst_reader
->U
.I
.PreSub
.SrcReg
[1] = inst_add
->U
.I
.SrcReg
[1];
502 inst_reader
->U
.I
.PreSub
.SrcReg
[0].Negate
= 0;
503 inst_reader
->U
.I
.PreSub
.SrcReg
[1].Negate
= 0;
504 inst_reader
->U
.I
.PreSub
.Opcode
= presub_opcode
;
505 inst_reader
->U
.I
.SrcReg
[src_index
] =
506 chain_srcregs(inst_reader
->U
.I
.SrcReg
[src_index
],
507 inst_reader
->U
.I
.PreSub
.SrcReg
[0]);
508 inst_reader
->U
.I
.SrcReg
[src_index
].File
= RC_FILE_PRESUB
;
509 inst_reader
->U
.I
.SrcReg
[src_index
].Index
= presub_opcode
;
512 static int is_presub_candidate(
513 struct radeon_compiler
* c
,
514 struct rc_instruction
* inst
)
516 const struct rc_opcode_info
* info
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
518 unsigned int is_constant
[2] = {0, 0};
520 assert(inst
->U
.I
.Opcode
== RC_OPCODE_ADD
);
522 if (inst
->U
.I
.PreSub
.Opcode
!= RC_PRESUB_NONE
523 || inst
->U
.I
.SaturateMode
524 || inst
->U
.I
.WriteALUResult
529 /* If both sources use a constant swizzle, then we can't convert it to
530 * a presubtract operation. In fact for the ADD and SUB presubtract
531 * operations neither source can contain a constant swizzle. This
532 * specific case is checked in peephole_add_presub_add() when
533 * we make sure the swizzles for both sources are equal, so we
534 * don't need to worry about it here. */
535 for (i
= 0; i
< 2; i
++) {
537 for (chan
= 0; chan
< 4; chan
++) {
539 get_swz(inst
->U
.I
.SrcReg
[i
].Swizzle
, chan
);
540 if (swz
== RC_SWIZZLE_ONE
541 || swz
== RC_SWIZZLE_ZERO
542 || swz
== RC_SWIZZLE_HALF
) {
547 if (is_constant
[0] && is_constant
[1])
550 for(i
= 0; i
< info
->NumSrcRegs
; i
++) {
551 struct rc_src_register src
= inst
->U
.I
.SrcReg
[i
];
552 if (src_reads_dst_mask(src
, inst
->U
.I
.DstReg
))
555 src
.File
= RC_FILE_PRESUB
;
556 if (!c
->SwizzleCaps
->IsNative(inst
->U
.I
.Opcode
, src
))
562 static int peephole_add_presub_add(
563 struct radeon_compiler
* c
,
564 struct rc_instruction
* inst_add
)
566 unsigned dstmask
= inst_add
->U
.I
.DstReg
.WriteMask
;
567 unsigned src0_neg
= inst_add
->U
.I
.SrcReg
[0].Negate
& dstmask
;
568 unsigned src1_neg
= inst_add
->U
.I
.SrcReg
[1].Negate
& dstmask
;
570 if (inst_add
->U
.I
.SrcReg
[0].Swizzle
!= inst_add
->U
.I
.SrcReg
[1].Swizzle
)
573 /* src0 and src1 can't have absolute values */
574 if (inst_add
->U
.I
.SrcReg
[0].Abs
|| inst_add
->U
.I
.SrcReg
[1].Abs
)
577 /* presub_replace_add() assumes only one is negative */
578 if (inst_add
->U
.I
.SrcReg
[0].Negate
&& inst_add
->U
.I
.SrcReg
[1].Negate
)
581 /* if src0 is negative, at least all bits of dstmask have to be set */
582 if (inst_add
->U
.I
.SrcReg
[0].Negate
&& src0_neg
!= dstmask
)
585 /* if src1 is negative, at least all bits of dstmask have to be set */
586 if (inst_add
->U
.I
.SrcReg
[1].Negate
&& src1_neg
!= dstmask
)
589 if (!is_presub_candidate(c
, inst_add
))
592 if (presub_helper(c
, inst_add
, RC_PRESUB_ADD
, presub_replace_add
)) {
593 rc_remove_instruction(inst_add
);
599 static void presub_replace_inv(
600 struct rc_instruction
* inst_add
,
601 struct rc_instruction
* inst_reader
,
602 unsigned int src_index
)
604 /* We must be careful not to modify inst_add, since it
605 * is possible it will remain part of the program.*/
606 inst_reader
->U
.I
.PreSub
.SrcReg
[0] = inst_add
->U
.I
.SrcReg
[1];
607 inst_reader
->U
.I
.PreSub
.SrcReg
[0].Negate
= 0;
608 inst_reader
->U
.I
.PreSub
.Opcode
= RC_PRESUB_INV
;
609 inst_reader
->U
.I
.SrcReg
[src_index
] = chain_srcregs(inst_reader
->U
.I
.SrcReg
[src_index
],
610 inst_reader
->U
.I
.PreSub
.SrcReg
[0]);
612 inst_reader
->U
.I
.SrcReg
[src_index
].File
= RC_FILE_PRESUB
;
613 inst_reader
->U
.I
.SrcReg
[src_index
].Index
= RC_PRESUB_INV
;
617 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
618 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
619 * of the add instruction must have the constatnt 1 swizzle. This function
620 * does not check const registers to see if their value is 1.0, so it should
621 * be called after the constant_folding optimization.
623 * 0 if the ADD instruction is still part of the program.
624 * 1 if the ADD instruction is no longer part of the program.
626 static int peephole_add_presub_inv(
627 struct radeon_compiler
* c
,
628 struct rc_instruction
* inst_add
)
632 if (!is_presub_candidate(c
, inst_add
))
635 /* Check if src0 is 1. */
636 /* XXX It would be nice to use is_src_uniform_constant here, but that
637 * function only works if the register's file is RC_FILE_NONE */
638 for(i
= 0; i
< 4; i
++ ) {
639 swz
= GET_SWZ(inst_add
->U
.I
.SrcReg
[0].Swizzle
, i
);
640 if(((1 << i
) & inst_add
->U
.I
.DstReg
.WriteMask
)
641 && swz
!= RC_SWIZZLE_ONE
) {
647 if ((inst_add
->U
.I
.SrcReg
[1].Negate
& inst_add
->U
.I
.DstReg
.WriteMask
) !=
648 inst_add
->U
.I
.DstReg
.WriteMask
649 || inst_add
->U
.I
.SrcReg
[1].Abs
650 || (inst_add
->U
.I
.SrcReg
[1].File
!= RC_FILE_TEMPORARY
651 && inst_add
->U
.I
.SrcReg
[1].File
!= RC_FILE_CONSTANT
)
652 || src_has_const_swz(inst_add
->U
.I
.SrcReg
[1])) {
657 if (presub_helper(c
, inst_add
, RC_PRESUB_INV
, presub_replace_inv
)) {
658 rc_remove_instruction(inst_add
);
664 struct peephole_mul_cb_data
{
665 struct rc_dst_register
* Writer
;
666 unsigned int Clobbered
;
669 static void omod_filter_reader_cb(
671 struct rc_instruction
* inst
,
672 rc_register_file file
,
676 struct peephole_mul_cb_data
* d
= userdata
;
677 if (rc_src_reads_dst_mask(file
, mask
, index
,
678 d
->Writer
->File
, d
->Writer
->Index
, d
->Writer
->WriteMask
)) {
684 static void omod_filter_writer_cb(
686 struct rc_instruction
* inst
,
687 rc_register_file file
,
691 struct peephole_mul_cb_data
* d
= userdata
;
692 if (file
== d
->Writer
->File
&& index
== d
->Writer
->Index
&&
693 (mask
& d
->Writer
->WriteMask
)) {
698 static int peephole_mul_omod(
699 struct radeon_compiler
* c
,
700 struct rc_instruction
* inst_mul
,
701 struct rc_list
* var_list
)
703 unsigned int chan
= 0, swz
, i
;
704 int const_index
= -1;
707 rc_omod_op omod_op
= RC_OMOD_DISABLE
;
708 struct rc_list
* writer_list
;
709 struct rc_variable
* var
;
710 struct peephole_mul_cb_data cb_data
;
712 for (i
= 0; i
< 2; i
++) {
714 if (inst_mul
->U
.I
.SrcReg
[i
].File
!= RC_FILE_CONSTANT
715 && inst_mul
->U
.I
.SrcReg
[i
].File
!= RC_FILE_TEMPORARY
) {
718 if (inst_mul
->U
.I
.SrcReg
[i
].File
== RC_FILE_TEMPORARY
) {
719 if (temp_index
!= -1) {
720 /* The instruction has two temp sources */
727 /* If we get this far Src[i] must be a constant src */
728 if (inst_mul
->U
.I
.SrcReg
[i
].Negate
) {
731 /* The constant src needs to read from the same swizzle */
732 swz
= RC_SWIZZLE_UNUSED
;
734 for (j
= 0; j
< 4; j
++) {
736 GET_SWZ(inst_mul
->U
.I
.SrcReg
[i
].Swizzle
, j
);
737 if (j_swz
== RC_SWIZZLE_UNUSED
) {
740 if (swz
== RC_SWIZZLE_UNUSED
) {
743 } else if (j_swz
!= swz
) {
748 if (const_index
!= -1) {
749 /* The instruction has two constant sources */
756 if (!rc_src_reg_is_immediate(c
, inst_mul
->U
.I
.SrcReg
[const_index
].File
,
757 inst_mul
->U
.I
.SrcReg
[const_index
].Index
)) {
760 const_value
= rc_get_constant_value(c
,
761 inst_mul
->U
.I
.SrcReg
[const_index
].Index
,
762 inst_mul
->U
.I
.SrcReg
[const_index
].Swizzle
,
763 inst_mul
->U
.I
.SrcReg
[const_index
].Negate
,
766 if (const_value
== 2.0f
) {
767 omod_op
= RC_OMOD_MUL_2
;
768 } else if (const_value
== 4.0f
) {
769 omod_op
= RC_OMOD_MUL_4
;
770 } else if (const_value
== 8.0f
) {
771 omod_op
= RC_OMOD_MUL_8
;
772 } else if (const_value
== (1.0f
/ 2.0f
)) {
773 omod_op
= RC_OMOD_DIV_2
;
774 } else if (const_value
== (1.0f
/ 4.0f
)) {
775 omod_op
= RC_OMOD_DIV_4
;
776 } else if (const_value
== (1.0f
/ 8.0f
)) {
777 omod_op
= RC_OMOD_DIV_8
;
782 writer_list
= rc_variable_list_get_writers_one_reader(var_list
,
783 RC_INSTRUCTION_NORMAL
, &inst_mul
->U
.I
.SrcReg
[temp_index
]);
789 cb_data
.Clobbered
= 0;
790 cb_data
.Writer
= &inst_mul
->U
.I
.DstReg
;
791 for (var
= writer_list
->Item
; var
; var
= var
->Friend
) {
792 struct rc_instruction
* inst
;
793 const struct rc_opcode_info
* info
= rc_get_opcode_info(
794 var
->Inst
->U
.I
.Opcode
);
795 if (info
->HasTexture
) {
798 if (var
->Inst
->U
.I
.SaturateMode
!= RC_SATURATE_NONE
) {
801 for (inst
= inst_mul
->Prev
; inst
!= var
->Inst
;
803 rc_for_all_reads_mask(inst
, omod_filter_reader_cb
,
805 rc_for_all_writes_mask(inst
, omod_filter_writer_cb
,
807 if (cb_data
.Clobbered
) {
813 if (cb_data
.Clobbered
) {
817 /* Rewrite the instructions */
818 for (var
= writer_list
->Item
; var
; var
= var
->Friend
) {
819 struct rc_variable
* writer
= writer_list
->Item
;
820 unsigned conversion_swizzle
= rc_make_conversion_swizzle(
821 writer
->Inst
->U
.I
.DstReg
.WriteMask
,
822 inst_mul
->U
.I
.DstReg
.WriteMask
);
823 writer
->Inst
->U
.I
.Omod
= omod_op
;
824 writer
->Inst
->U
.I
.DstReg
.File
= inst_mul
->U
.I
.DstReg
.File
;
825 writer
->Inst
->U
.I
.DstReg
.Index
= inst_mul
->U
.I
.DstReg
.Index
;
826 rc_normal_rewrite_writemask(writer
->Inst
, conversion_swizzle
);
827 writer
->Inst
->U
.I
.SaturateMode
= inst_mul
->U
.I
.SaturateMode
;
830 rc_remove_instruction(inst_mul
);
837 * 0 if inst is still part of the program.
838 * 1 if inst is no longer part of the program.
840 static int peephole(struct radeon_compiler
* c
, struct rc_instruction
* inst
)
842 switch(inst
->U
.I
.Opcode
){
845 if(peephole_add_presub_inv(c
, inst
))
847 if(peephole_add_presub_add(c
, inst
))
857 void rc_optimize(struct radeon_compiler
* c
, void *user
)
859 struct rc_instruction
* inst
= c
->Program
.Instructions
.Next
;
860 struct rc_list
* var_list
;
861 while(inst
!= &c
->Program
.Instructions
) {
862 struct rc_instruction
* cur
= inst
;
865 constant_folding(c
, cur
);
870 if (cur
->U
.I
.Opcode
== RC_OPCODE_MOV
) {
871 copy_propagate(c
, cur
);
872 /* cur may no longer be part of the program */
880 inst
= c
->Program
.Instructions
.Next
;
881 while(inst
!= &c
->Program
.Instructions
) {
882 struct rc_instruction
* cur
= inst
;
884 if (cur
->U
.I
.Opcode
== RC_OPCODE_MUL
) {
885 var_list
= rc_get_variables(c
);
886 peephole_mul_omod(c
, cur
, var_list
);