2 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 #include "radeon_compiler_util.h"
34 #include "radeon_compiler.h"
35 #include "radeon_dataflow.h"
38 unsigned int rc_swizzle_to_writemask(unsigned int swz
)
40 unsigned int mask
= 0;
43 for(i
= 0; i
< 4; i
++) {
44 mask
|= 1 << GET_SWZ(swz
, i
);
51 rc_swizzle
get_swz(unsigned int swz
, rc_swizzle idx
)
55 return GET_SWZ(swz
, idx
);
59 * The purpose of this function is to standardize the number channels used by
60 * swizzles. All swizzles regardless of what instruction they are a part of
61 * should have 4 channels initialized with values.
62 * @param channels The number of channels in initial_value that have a
64 * @return An initialized swizzle that has all of the unused channels set to
67 unsigned int rc_init_swizzle(unsigned int initial_value
, unsigned int channels
)
70 for (i
= channels
; i
< 4; i
++) {
71 SET_SWZ(initial_value
, i
, RC_SWIZZLE_UNUSED
);
76 unsigned int combine_swizzles4(unsigned int src
,
77 rc_swizzle swz_x
, rc_swizzle swz_y
, rc_swizzle swz_z
, rc_swizzle swz_w
)
81 ret
|= get_swz(src
, swz_x
);
82 ret
|= get_swz(src
, swz_y
) << 3;
83 ret
|= get_swz(src
, swz_z
) << 6;
84 ret
|= get_swz(src
, swz_w
) << 9;
89 unsigned int combine_swizzles(unsigned int src
, unsigned int swz
)
93 ret
|= get_swz(src
, GET_SWZ(swz
, RC_SWIZZLE_X
));
94 ret
|= get_swz(src
, GET_SWZ(swz
, RC_SWIZZLE_Y
)) << 3;
95 ret
|= get_swz(src
, GET_SWZ(swz
, RC_SWIZZLE_Z
)) << 6;
96 ret
|= get_swz(src
, GET_SWZ(swz
, RC_SWIZZLE_W
)) << 9;
102 * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
104 rc_swizzle
rc_mask_to_swizzle(unsigned int mask
)
107 case RC_MASK_X
: return RC_SWIZZLE_X
;
108 case RC_MASK_Y
: return RC_SWIZZLE_Y
;
109 case RC_MASK_Z
: return RC_SWIZZLE_Z
;
110 case RC_MASK_W
: return RC_SWIZZLE_W
;
112 return RC_SWIZZLE_UNUSED
;
115 /* Reorder mask bits according to swizzle. */
116 unsigned swizzle_mask(unsigned swizzle
, unsigned mask
)
119 for (unsigned chan
= 0; chan
< 4; ++chan
) {
120 unsigned swz
= GET_SWZ(swizzle
, chan
);
122 ret
|= GET_BIT(mask
, swz
) << chan
;
127 static unsigned int srcs_need_rewrite(const struct rc_opcode_info
* info
)
129 if (info
->HasTexture
) {
132 switch (info
->Opcode
) {
145 * @return A swizzle the results from converting old_swizzle using
148 unsigned int rc_adjust_channels(
149 unsigned int old_swizzle
,
150 unsigned int conversion_swizzle
)
153 unsigned int new_swizzle
= rc_init_swizzle(RC_SWIZZLE_UNUSED
, 0);
154 for (i
= 0; i
< 4; i
++) {
155 unsigned int new_chan
= get_swz(conversion_swizzle
, i
);
156 if (new_chan
== RC_SWIZZLE_UNUSED
) {
159 SET_SWZ(new_swizzle
, new_chan
, GET_SWZ(old_swizzle
, i
));
164 static unsigned int rewrite_writemask(
165 unsigned int old_mask
,
166 unsigned int conversion_swizzle
)
168 unsigned int new_mask
= 0;
171 for (i
= 0; i
< 4; i
++) {
172 if (!GET_BIT(old_mask
, i
)
173 || GET_SWZ(conversion_swizzle
, i
) == RC_SWIZZLE_UNUSED
) {
176 new_mask
|= (1 << GET_SWZ(conversion_swizzle
, i
));
183 * This function rewrites the writemask of sub and adjusts the swizzles
184 * of all its source registers based on the conversion_swizzle.
185 * conversion_swizzle represents a mapping of the old writemask to the
186 * new writemask. For a detailed description of how conversion swizzles
187 * work see rc_rewrite_swizzle().
189 void rc_pair_rewrite_writemask(
190 struct rc_pair_sub_instruction
* sub
,
191 unsigned int conversion_swizzle
)
193 const struct rc_opcode_info
* info
= rc_get_opcode_info(sub
->Opcode
);
196 sub
->WriteMask
= rewrite_writemask(sub
->WriteMask
, conversion_swizzle
);
198 if (!srcs_need_rewrite(info
)) {
202 for (i
= 0; i
< info
->NumSrcRegs
; i
++) {
203 sub
->Arg
[i
].Swizzle
=
204 rc_adjust_channels(sub
->Arg
[i
].Swizzle
,
209 static void normal_rewrite_writemask_cb(
211 struct rc_instruction
* inst
,
212 struct rc_src_register
* src
)
214 unsigned int * conversion_swizzle
= (unsigned int *)userdata
;
215 src
->Swizzle
= rc_adjust_channels(src
->Swizzle
, *conversion_swizzle
);
219 * This function is the same as rc_pair_rewrite_writemask() except it
220 * operates on normal instructions.
222 void rc_normal_rewrite_writemask(
223 struct rc_instruction
* inst
,
224 unsigned int conversion_swizzle
)
226 struct rc_sub_instruction
* sub
= &inst
->U
.I
;
227 const struct rc_opcode_info
* info
= rc_get_opcode_info(sub
->Opcode
);
228 sub
->DstReg
.WriteMask
=
229 rewrite_writemask(sub
->DstReg
.WriteMask
, conversion_swizzle
);
231 if (info
->HasTexture
) {
233 assert(sub
->TexSwizzle
== RC_SWIZZLE_XYZW
);
234 for (i
= 0; i
< 4; i
++) {
235 unsigned int swz
= GET_SWZ(conversion_swizzle
, i
);
238 SET_SWZ(sub
->TexSwizzle
, swz
, i
);
242 if (!srcs_need_rewrite(info
)) {
246 rc_for_all_reads_src(inst
, normal_rewrite_writemask_cb
,
247 &conversion_swizzle
);
251 * This function replaces each value 'swz' in swizzle with the value of
252 * GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's
253 * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want
254 * to change all the Y's in swizzle to X, then conversion_swizzle should be
255 * _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then
256 * conversion swizzle should be YX__ (0xfc1).
257 * @param swizzle The swizzle to change
258 * @param conversion_swizzle Describes the conversion to perform on the swizzle
259 * @return A converted swizzle
261 unsigned int rc_rewrite_swizzle(
262 unsigned int swizzle
,
263 unsigned int conversion_swizzle
)
266 unsigned int out_swizzle
= swizzle
;
268 for (chan
= 0; chan
< 4; chan
++) {
269 unsigned int swz
= GET_SWZ(swizzle
, chan
);
270 unsigned int new_swz
;
272 SET_SWZ(out_swizzle
, chan
, swz
);
274 new_swz
= GET_SWZ(conversion_swizzle
, swz
);
275 if (new_swz
!= RC_SWIZZLE_UNUSED
) {
276 SET_SWZ(out_swizzle
, chan
, new_swz
);
278 SET_SWZ(out_swizzle
, chan
, swz
);
286 * Left multiplication of a register with a swizzle
288 struct rc_src_register
lmul_swizzle(unsigned int swizzle
, struct rc_src_register srcreg
)
290 struct rc_src_register tmp
= srcreg
;
294 for(i
= 0; i
< 4; ++i
) {
295 rc_swizzle swz
= GET_SWZ(swizzle
, i
);
297 tmp
.Swizzle
|= GET_SWZ(srcreg
.Swizzle
, swz
) << (i
*3);
298 tmp
.Negate
|= GET_BIT(srcreg
.Negate
, swz
) << i
;
300 tmp
.Swizzle
|= swz
<< (i
*3);
306 void reset_srcreg(struct rc_src_register
* reg
)
308 memset(reg
, 0, sizeof(struct rc_src_register
));
309 reg
->Swizzle
= RC_SWIZZLE_XYZW
;
312 unsigned int rc_src_reads_dst_mask(
313 rc_register_file src_file
,
314 unsigned int src_idx
,
315 unsigned int src_swz
,
316 rc_register_file dst_file
,
317 unsigned int dst_idx
,
318 unsigned int dst_mask
)
320 if (src_file
!= dst_file
|| src_idx
!= dst_idx
) {
323 return dst_mask
& rc_swizzle_to_writemask(src_swz
);
327 * @return A bit mask specifying whether this swizzle will select from an RGB
328 * source, an Alpha source, or both.
330 unsigned int rc_source_type_swz(unsigned int swizzle
)
333 unsigned int swz
= RC_SWIZZLE_UNUSED
;
334 unsigned int ret
= RC_SOURCE_NONE
;
336 for(chan
= 0; chan
< 4; chan
++) {
337 swz
= GET_SWZ(swizzle
, chan
);
338 if (swz
== RC_SWIZZLE_W
) {
339 ret
|= RC_SOURCE_ALPHA
;
340 } else if (swz
== RC_SWIZZLE_X
|| swz
== RC_SWIZZLE_Y
341 || swz
== RC_SWIZZLE_Z
) {
342 ret
|= RC_SOURCE_RGB
;
348 unsigned int rc_source_type_mask(unsigned int mask
)
350 unsigned int ret
= RC_SOURCE_NONE
;
352 if (mask
& RC_MASK_XYZ
)
353 ret
|= RC_SOURCE_RGB
;
355 if (mask
& RC_MASK_W
)
356 ret
|= RC_SOURCE_ALPHA
;
362 rc_register_file File
;
364 unsigned int SrcType
;
367 struct can_use_presub_data
{
368 struct src_select Selects
[5];
369 unsigned int SelectCount
;
370 const struct rc_src_register
* ReplaceReg
;
371 unsigned int ReplaceRemoved
;
374 static void can_use_presub_data_add_select(
375 struct can_use_presub_data
* data
,
376 rc_register_file file
,
378 unsigned int src_type
)
380 struct src_select
* select
;
382 select
= &data
->Selects
[data
->SelectCount
++];
384 select
->Index
= index
;
385 select
->SrcType
= src_type
;
389 * This callback function counts the number of sources in inst that are
390 * different from the sources in can_use_presub_data->RemoveSrcs.
392 static void can_use_presub_read_cb(
394 struct rc_instruction
* inst
,
395 struct rc_src_register
* src
)
397 struct can_use_presub_data
* d
= userdata
;
399 if (!d
->ReplaceRemoved
&& src
== d
->ReplaceReg
) {
400 d
->ReplaceRemoved
= 1;
404 if (src
->File
== RC_FILE_NONE
)
407 can_use_presub_data_add_select(d
, src
->File
, src
->Index
,
408 rc_source_type_swz(src
->Swizzle
));
411 unsigned int rc_inst_can_use_presub(
412 struct rc_instruction
* inst
,
413 rc_presubtract_op presub_op
,
414 unsigned int presub_writemask
,
415 const struct rc_src_register
* replace_reg
,
416 const struct rc_src_register
* presub_src0
,
417 const struct rc_src_register
* presub_src1
)
419 struct can_use_presub_data d
;
420 unsigned int num_presub_srcs
;
422 const struct rc_opcode_info
* info
=
423 rc_get_opcode_info(inst
->U
.I
.Opcode
);
424 int rgb_count
= 0, alpha_count
= 0;
425 unsigned int src_type0
, src_type1
;
427 if (presub_op
== RC_PRESUB_NONE
) {
431 if (info
->HasTexture
) {
435 /* We can't use more than one presubtract value in an
436 * instruction, unless the two prsubtract operations
437 * are the same and read from the same registers.
438 * XXX For now we will limit instructions to only one presubtract
440 if (inst
->U
.I
.PreSub
.Opcode
!= RC_PRESUB_NONE
) {
444 memset(&d
, 0, sizeof(d
));
445 d
.ReplaceReg
= replace_reg
;
447 rc_for_all_reads_src(inst
, can_use_presub_read_cb
, &d
);
449 num_presub_srcs
= rc_presubtract_src_reg_count(presub_op
);
451 src_type0
= rc_source_type_swz(presub_src0
->Swizzle
);
452 can_use_presub_data_add_select(&d
,
457 if (num_presub_srcs
> 1) {
458 src_type1
= rc_source_type_swz(presub_src1
->Swizzle
);
459 can_use_presub_data_add_select(&d
,
464 /* Even if both of the presub sources read from the same
465 * register, we still need to use 2 different source selects
466 * for them, so we need to increment the count to compensate.
468 if (presub_src0
->File
== presub_src1
->File
469 && presub_src0
->Index
== presub_src1
->Index
) {
470 if (src_type0
& src_type1
& RC_SOURCE_RGB
) {
473 if (src_type0
& src_type1
& RC_SOURCE_ALPHA
) {
479 /* Count the number of source selects for Alpha and RGB. If we
480 * encounter two of the same source selects then we can ignore the
482 for (i
= 0; i
< d
.SelectCount
; i
++) {
484 unsigned int src_type
= d
.Selects
[i
].SrcType
;
485 for (j
= i
+ 1; j
< d
.SelectCount
; j
++) {
486 if (d
.Selects
[i
].File
== d
.Selects
[j
].File
487 && d
.Selects
[i
].Index
== d
.Selects
[j
].Index
) {
488 src_type
&= ~d
.Selects
[j
].SrcType
;
491 if (src_type
& RC_SOURCE_RGB
) {
495 if (src_type
& RC_SOURCE_ALPHA
) {
500 if (rgb_count
> 3 || alpha_count
> 3) {
509 unsigned int HasFileType
;
510 rc_register_file File
;
513 static void max_callback(
515 struct rc_instruction
* inst
,
516 rc_register_file file
,
520 struct max_data
* d
= (struct max_data
*)userdata
;
521 if (file
== d
->File
&& (!d
->HasFileType
|| index
> d
->Max
)) {
528 * @return The maximum index of the specified register file used by the
531 int rc_get_max_index(
532 struct radeon_compiler
* c
,
533 rc_register_file file
)
535 struct max_data data
;
536 struct rc_instruction
* inst
;
538 data
.HasFileType
= 0;
540 for (inst
= c
->Program
.Instructions
.Next
;
541 inst
!= &c
->Program
.Instructions
;
543 rc_for_all_reads_mask(inst
, max_callback
, &data
);
544 rc_for_all_writes_mask(inst
, max_callback
, &data
);
546 if (!data
.HasFileType
) {
553 static unsigned int get_source_readmask(
554 struct rc_pair_sub_instruction
* sub
,
556 unsigned int src_type
)
559 unsigned int readmask
= 0;
560 const struct rc_opcode_info
* info
= rc_get_opcode_info(sub
->Opcode
);
562 for (i
= 0; i
< info
->NumSrcRegs
; i
++) {
563 if (sub
->Arg
[i
].Source
!= source
564 || src_type
!= rc_source_type_swz(sub
->Arg
[i
].Swizzle
)) {
567 readmask
|= rc_swizzle_to_writemask(sub
->Arg
[i
].Swizzle
);
573 * This function attempts to remove a source from a pair instructions.
575 * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd
576 * @param source The index of the source to remove
577 * @param new_readmask A mask representing the components that are read by
578 * the source that is intended to replace the one you are removing. If you
579 * want to remove a source only and not replace it, this parameter should be
581 * @return 1 if the source was successfully removed, 0 if it was not
583 unsigned int rc_pair_remove_src(
584 struct rc_instruction
* inst
,
585 unsigned int src_type
,
587 unsigned int new_readmask
)
589 unsigned int readmask
= 0;
591 readmask
|= get_source_readmask(&inst
->U
.P
.RGB
, source
, src_type
);
592 readmask
|= get_source_readmask(&inst
->U
.P
.Alpha
, source
, src_type
);
594 if ((new_readmask
& readmask
) != readmask
)
597 if (src_type
& RC_SOURCE_RGB
) {
598 memset(&inst
->U
.P
.RGB
.Src
[source
], 0,
599 sizeof(struct rc_pair_instruction_source
));
602 if (src_type
& RC_SOURCE_ALPHA
) {
603 memset(&inst
->U
.P
.Alpha
.Src
[source
], 0,
604 sizeof(struct rc_pair_instruction_source
));
611 * @return RC_OPCODE_NOOP if inst is not a flow control instruction.
612 * @return The opcode of inst if it is a flow control instruction.
614 rc_opcode
rc_get_flow_control_inst(struct rc_instruction
* inst
)
616 const struct rc_opcode_info
* info
;
617 if (inst
->Type
== RC_INSTRUCTION_NORMAL
) {
618 info
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
620 info
= rc_get_opcode_info(inst
->U
.P
.RGB
.Opcode
);
621 /*A flow control instruction shouldn't have an alpha
623 assert(!info
->IsFlowControl
||
624 inst
->U
.P
.Alpha
.Opcode
== RC_OPCODE_NOP
);
627 if (info
->IsFlowControl
)
630 return RC_OPCODE_NOP
;
635 * @return The BGNLOOP instruction that starts the loop ended by endloop.
637 struct rc_instruction
* rc_match_endloop(struct rc_instruction
* endloop
)
639 unsigned int endloop_count
= 0;
640 struct rc_instruction
* inst
;
641 for (inst
= endloop
->Prev
; inst
!= endloop
; inst
= inst
->Prev
) {
642 rc_opcode op
= rc_get_flow_control_inst(inst
);
643 if (op
== RC_OPCODE_ENDLOOP
) {
645 } else if (op
== RC_OPCODE_BGNLOOP
) {
646 if (endloop_count
== 0) {
657 * @return The ENDLOOP instruction that ends the loop started by bgnloop.
659 struct rc_instruction
* rc_match_bgnloop(struct rc_instruction
* bgnloop
)
661 unsigned int bgnloop_count
= 0;
662 struct rc_instruction
* inst
;
663 for (inst
= bgnloop
->Next
; inst
!=bgnloop
; inst
= inst
->Next
) {
664 rc_opcode op
= rc_get_flow_control_inst(inst
);
665 if (op
== RC_OPCODE_BGNLOOP
) {
667 } else if (op
== RC_OPCODE_ENDLOOP
) {
668 if (bgnloop_count
== 0) {
679 * @return A conversion swizzle for converting from old_mask->new_mask
681 unsigned int rc_make_conversion_swizzle(
682 unsigned int old_mask
,
683 unsigned int new_mask
)
685 unsigned int conversion_swizzle
= rc_init_swizzle(RC_SWIZZLE_UNUSED
, 0);
686 unsigned int old_idx
;
687 unsigned int new_idx
= 0;
688 for (old_idx
= 0; old_idx
< 4; old_idx
++) {
689 if (!GET_BIT(old_mask
, old_idx
))
691 for ( ; new_idx
< 4; new_idx
++) {
692 if (GET_BIT(new_mask
, new_idx
)) {
693 SET_SWZ(conversion_swizzle
, old_idx
, new_idx
);
699 return conversion_swizzle
;
703 * @return 1 if the register contains an immediate value, 0 otherwise.
705 unsigned int rc_src_reg_is_immediate(
706 struct radeon_compiler
* c
,
710 return file
== RC_FILE_CONSTANT
&&
711 c
->Program
.Constants
.Constants
[index
].Type
== RC_CONSTANT_IMMEDIATE
;
715 * @return The immediate value in the specified register.
717 float rc_get_constant_value(
718 struct radeon_compiler
* c
,
720 unsigned int swizzle
,
725 int swz
= GET_SWZ(swizzle
, chan
);
726 if(swz
>= 4 || index
>= c
->Program
.Constants
.Count
){
727 rc_error(c
, "get_constant_value: Can't find a value.\n");
730 if(GET_BIT(negate
, chan
)){
734 c
->Program
.Constants
.Constants
[index
].u
.Immediate
[swz
];
738 * This function returns the component value (RC_SWIZZLE_*) of the first used
739 * channel in the swizzle. This is only useful for scalar instructions that are
740 * known to use only one channel of the swizzle.
742 unsigned int rc_get_scalar_src_swz(unsigned int swizzle
)
744 unsigned int swz
, chan
;
745 for (chan
= 0; chan
< 4; chan
++) {
746 swz
= GET_SWZ(swizzle
, chan
);
747 if (swz
!= RC_SWIZZLE_UNUSED
) {
751 assert(swz
!= RC_SWIZZLE_UNUSED
);