2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 * Author: Tom Stellard <thomas.stellard@amd.com>
26 #include "radeon_compiler.h"
27 #include "radeon_compiler_util.h"
28 #include "radeon_dataflow.h"
29 #include "radeon_program.h"
30 #include "radeon_program_constants.h"
32 struct vert_fc_state
{
33 struct radeon_compiler
*C
;
36 unsigned LoopsReserved
;
37 int PredStack
[R500_PVS_MAX_LOOP_DEPTH
];
42 static void build_pred_src(
43 struct rc_src_register
* src
,
44 struct vert_fc_state
* fc_state
)
46 src
->Swizzle
= RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED
, RC_SWIZZLE_UNUSED
,
47 RC_SWIZZLE_UNUSED
, RC_SWIZZLE_W
);
48 src
->File
= RC_FILE_TEMPORARY
;
49 src
->Index
= fc_state
->PredicateReg
;
52 static void build_pred_dst(
53 struct rc_dst_register
* dst
,
54 struct vert_fc_state
* fc_state
)
56 dst
->WriteMask
= RC_MASK_W
;
57 dst
->File
= RC_FILE_TEMPORARY
;
58 dst
->Index
= fc_state
->PredicateReg
;
61 static void mark_write(void * userdata
, struct rc_instruction
* inst
,
62 rc_register_file file
, unsigned int index
, unsigned int mask
)
64 unsigned int * writemasks
= userdata
;
66 if (file
!= RC_FILE_TEMPORARY
)
69 if (index
>= R300_VS_MAX_TEMPS
)
72 writemasks
[index
] |= mask
;
75 static int reserve_predicate_reg(struct vert_fc_state
* fc_state
)
78 unsigned int writemasks
[RC_REGISTER_MAX_INDEX
];
79 struct rc_instruction
* inst
;
80 memset(writemasks
, 0, sizeof(writemasks
));
81 for(inst
= fc_state
->C
->Program
.Instructions
.Next
;
82 inst
!= &fc_state
->C
->Program
.Instructions
;
84 rc_for_all_writes_mask(inst
, mark_write
, writemasks
);
87 for(i
= 0; i
< fc_state
->C
->max_temp_regs
; i
++) {
88 /* Most of the control flow instructions only write the
89 * W component of the Predicate Register, but
90 * the docs say that ME_PRED_SET_CLR and
91 * ME_PRED_SET_RESTORE write all components of the
92 * register, so we must reserve a register that has
93 * all its components free. */
95 fc_state
->PredicateReg
= i
;
99 if (i
== fc_state
->C
->max_temp_regs
) {
100 rc_error(fc_state
->C
, "No free temporary to use for"
101 " predicate stack counter.\n");
107 static void lower_bgnloop(
108 struct rc_instruction
* inst
,
109 struct vert_fc_state
* fc_state
)
111 struct rc_instruction
* new_inst
=
112 rc_insert_new_instruction(fc_state
->C
, inst
->Prev
);
114 if ((!fc_state
->C
->is_r500
115 && fc_state
->LoopsReserved
>= R300_VS_MAX_LOOP_DEPTH
)
116 || fc_state
->LoopsReserved
>= R500_PVS_MAX_LOOP_DEPTH
) {
117 rc_error(fc_state
->C
, "Loops are nested too deep.");
121 if (fc_state
->LoopDepth
== 0 && fc_state
->BranchDepth
== 0) {
122 if (fc_state
->PredicateReg
== -1) {
123 if (reserve_predicate_reg(fc_state
) == -1) {
128 /* Initialize the predicate bit to true. */
129 new_inst
->U
.I
.Opcode
= RC_ME_PRED_SEQ
;
130 build_pred_dst(&new_inst
->U
.I
.DstReg
, fc_state
);
131 new_inst
->U
.I
.SrcReg
[0].Index
= 0;
132 new_inst
->U
.I
.SrcReg
[0].File
= RC_FILE_NONE
;
133 new_inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
135 fc_state
->PredStack
[fc_state
->LoopDepth
] =
136 fc_state
->PredicateReg
;
137 /* Copy the the current predicate value to this loop's
138 * predicate register */
140 /* Use the old predicate value for src0 */
141 build_pred_src(&new_inst
->U
.I
.SrcReg
[0], fc_state
);
143 /* Reserve this loop's predicate register */
144 if (reserve_predicate_reg(fc_state
) == -1) {
148 /* Copy the old predicate value to the new register */
149 new_inst
->U
.I
.Opcode
= RC_OPCODE_ADD
;
150 build_pred_dst(&new_inst
->U
.I
.DstReg
, fc_state
);
151 new_inst
->U
.I
.SrcReg
[1].Index
= 0;
152 new_inst
->U
.I
.SrcReg
[1].File
= RC_FILE_NONE
;
153 new_inst
->U
.I
.SrcReg
[1].Swizzle
= RC_SWIZZLE_0000
;
158 static void lower_brk(
159 struct rc_instruction
* inst
,
160 struct vert_fc_state
* fc_state
)
162 if (fc_state
->LoopDepth
== 1) {
163 inst
->U
.I
.Opcode
= RC_OPCODE_RCP
;
164 inst
->U
.I
.DstReg
.Pred
= RC_PRED_INV
;
165 inst
->U
.I
.SrcReg
[0].Index
= 0;
166 inst
->U
.I
.SrcReg
[0].File
= RC_FILE_NONE
;
167 inst
->U
.I
.SrcReg
[0].Swizzle
= RC_SWIZZLE_0000
;
169 inst
->U
.I
.Opcode
= RC_ME_PRED_SET_CLR
;
170 inst
->U
.I
.DstReg
.Pred
= RC_PRED_SET
;
173 build_pred_dst(&inst
->U
.I
.DstReg
, fc_state
);
176 static void lower_endloop(
177 struct rc_instruction
* inst
,
178 struct vert_fc_state
* fc_state
)
180 struct rc_instruction
* new_inst
=
181 rc_insert_new_instruction(fc_state
->C
, inst
);
183 new_inst
->U
.I
.Opcode
= RC_ME_PRED_SET_RESTORE
;
184 build_pred_dst(&new_inst
->U
.I
.DstReg
, fc_state
);
185 /* Restore the previous predicate register. */
186 fc_state
->PredicateReg
= fc_state
->PredStack
[fc_state
->LoopDepth
- 1];
187 build_pred_src(&new_inst
->U
.I
.SrcReg
[0], fc_state
);
190 static void lower_if(
191 struct rc_instruction
* inst
,
192 struct vert_fc_state
* fc_state
)
194 /* Reserve a temporary to use as our predicate stack counter, if we
195 * don't already have one. */
196 if (fc_state
->PredicateReg
== -1) {
197 /* If we are inside a loop, the Predicate Register should
198 * have already been defined. */
199 assert(fc_state
->LoopDepth
== 0);
201 if (reserve_predicate_reg(fc_state
) == -1) {
206 if (inst
->Next
->U
.I
.Opcode
== RC_OPCODE_BRK
) {
207 fc_state
->InCFBreak
= 1;
209 if ((fc_state
->BranchDepth
== 0 && fc_state
->LoopDepth
== 0)
210 || (fc_state
->LoopDepth
== 1 && fc_state
->InCFBreak
)) {
211 if (fc_state
->InCFBreak
) {
212 inst
->U
.I
.Opcode
= RC_ME_PRED_SEQ
;
213 inst
->U
.I
.DstReg
.Pred
= RC_PRED_SET
;
215 inst
->U
.I
.Opcode
= RC_ME_PRED_SNEQ
;
219 inst
->U
.I
.Opcode
= RC_VE_PRED_SNEQ_PUSH
;
220 memcpy(&inst
->U
.I
.SrcReg
[1], &inst
->U
.I
.SrcReg
[0],
221 sizeof(inst
->U
.I
.SrcReg
[1]));
222 swz
= rc_get_scalar_src_swz(inst
->U
.I
.SrcReg
[1].Swizzle
);
223 /* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
225 inst
->U
.I
.SrcReg
[1].Swizzle
= RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED
,
226 RC_SWIZZLE_UNUSED
, RC_SWIZZLE_UNUSED
, swz
);
227 build_pred_src(&inst
->U
.I
.SrcReg
[0], fc_state
);
229 build_pred_dst(&inst
->U
.I
.DstReg
, fc_state
);
232 void rc_vert_fc(struct radeon_compiler
*c
, void *user
)
234 struct rc_instruction
* inst
;
235 struct vert_fc_state fc_state
;
237 memset(&fc_state
, 0, sizeof(fc_state
));
238 fc_state
.PredicateReg
= -1;
241 for(inst
= c
->Program
.Instructions
.Next
;
242 inst
!= &c
->Program
.Instructions
;
245 switch (inst
->U
.I
.Opcode
) {
247 case RC_OPCODE_BGNLOOP
:
248 lower_bgnloop(inst
, &fc_state
);
249 fc_state
.LoopDepth
++;
253 lower_brk(inst
, &fc_state
);
256 case RC_OPCODE_ENDLOOP
:
257 if (fc_state
.BranchDepth
!= 0
258 || fc_state
.LoopDepth
!= 1) {
259 lower_endloop(inst
, &fc_state
);
261 fc_state
.LoopDepth
--;
262 /* Skip PRED_RESTORE */
266 lower_if(inst
, &fc_state
);
267 fc_state
.BranchDepth
++;
271 inst
->U
.I
.Opcode
= RC_ME_PRED_SET_INV
;
272 build_pred_dst(&inst
->U
.I
.DstReg
, &fc_state
);
273 build_pred_src(&inst
->U
.I
.SrcReg
[0], &fc_state
);
276 case RC_OPCODE_ENDIF
:
277 if (fc_state
.LoopDepth
== 1 && fc_state
.InCFBreak
) {
278 struct rc_instruction
* to_delete
= inst
;
280 rc_remove_instruction(to_delete
);
281 /* XXX: Delete the endif instruction */
283 inst
->U
.I
.Opcode
= RC_ME_PRED_SET_POP
;
284 build_pred_dst(&inst
->U
.I
.DstReg
, &fc_state
);
285 build_pred_src(&inst
->U
.I
.SrcReg
[0], &fc_state
);
287 fc_state
.InCFBreak
= 0;
288 fc_state
.BranchDepth
--;
292 if (fc_state
.BranchDepth
|| fc_state
.LoopDepth
) {
293 inst
->U
.I
.DstReg
.Pred
= RC_PRED_SET
;