2 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 #include "radeon_emulate_loops.h"
35 #include "radeon_compiler.h"
36 #include "radeon_compiler_util.h"
37 #include "radeon_dataflow.h"
41 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
44 struct radeon_compiler
* C
;
45 struct rc_src_register
* Src
;
51 struct radeon_compiler
* C
;
59 static unsigned int loop_max_possible_iterations(struct radeon_compiler
*c
,
60 struct loop_info
* loop
)
62 unsigned int total_i
= rc_recompute_ips(c
);
63 unsigned int loop_i
= (loop
->EndLoop
->IP
- loop
->BeginLoop
->IP
) - 1;
64 /* +1 because the program already has one iteration of the loop. */
65 return 1 + ((c
->max_alu_insts
- total_i
) / loop_i
);
68 static void unroll_loop(struct radeon_compiler
* c
, struct loop_info
* loop
,
69 unsigned int iterations
)
72 struct rc_instruction
* ptr
;
73 struct rc_instruction
* first
= loop
->BeginLoop
->Next
;
74 struct rc_instruction
* last
= loop
->EndLoop
->Prev
;
75 struct rc_instruction
* append_to
= last
;
76 rc_remove_instruction(loop
->BeginLoop
);
77 rc_remove_instruction(loop
->EndLoop
);
78 for( i
= 1; i
< iterations
; i
++){
79 for(ptr
= first
; ptr
!= last
->Next
; ptr
= ptr
->Next
){
80 struct rc_instruction
*new = rc_alloc_instruction(c
);
81 memcpy(new, ptr
, sizeof(struct rc_instruction
));
82 rc_insert_instruction(append_to
, new);
89 static void update_const_value(void * data
, struct rc_instruction
* inst
,
90 rc_register_file file
, unsigned int index
, unsigned int mask
)
92 struct const_value
* value
= data
;
93 if(value
->Src
->File
!= file
||
94 value
->Src
->Index
!= index
||
95 !(1 << GET_SWZ(value
->Src
->Swizzle
, 0) & mask
)){
98 switch(inst
->U
.I
.Opcode
){
100 if(!rc_src_reg_is_immediate(value
->C
, inst
->U
.I
.SrcReg
[0].File
,
101 inst
->U
.I
.SrcReg
[0].Index
)){
106 rc_get_constant_value(value
->C
,
107 inst
->U
.I
.SrcReg
[0].Index
,
108 inst
->U
.I
.SrcReg
[0].Swizzle
,
109 inst
->U
.I
.SrcReg
[0].Negate
, 0);
114 static void get_incr_amount(void * data
, struct rc_instruction
* inst
,
115 rc_register_file file
, unsigned int index
, unsigned int mask
)
117 struct count_inst
* count_inst
= data
;
119 const struct rc_opcode_info
* opcode
;
122 if(file
!= RC_FILE_TEMPORARY
||
123 count_inst
->Index
!= index
||
124 (1 << GET_SWZ(count_inst
->Swz
,0) != mask
)){
128 /* XXX: Give up if the counter is modified within an IF block. We
129 * could handle this case with better analysis. */
130 if (count_inst
->BranchDepth
> 0) {
131 count_inst
->Unknown
= 1;
135 /* Find the index of the counter register. */
136 opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
137 if(opcode
->NumSrcRegs
!= 2){
138 count_inst
->Unknown
= 1;
141 if(inst
->U
.I
.SrcReg
[0].File
== RC_FILE_TEMPORARY
&&
142 inst
->U
.I
.SrcReg
[0].Index
== count_inst
->Index
&&
143 inst
->U
.I
.SrcReg
[0].Swizzle
== count_inst
->Swz
){
145 } else if( inst
->U
.I
.SrcReg
[1].File
== RC_FILE_TEMPORARY
&&
146 inst
->U
.I
.SrcReg
[1].Index
== count_inst
->Index
&&
147 inst
->U
.I
.SrcReg
[1].Swizzle
== count_inst
->Swz
){
151 count_inst
->Unknown
= 1;
154 if(rc_src_reg_is_immediate(count_inst
->C
,
155 inst
->U
.I
.SrcReg
[amnt_src_index
].File
,
156 inst
->U
.I
.SrcReg
[amnt_src_index
].Index
)){
157 amount
= rc_get_constant_value(count_inst
->C
,
158 inst
->U
.I
.SrcReg
[amnt_src_index
].Index
,
159 inst
->U
.I
.SrcReg
[amnt_src_index
].Swizzle
,
160 inst
->U
.I
.SrcReg
[amnt_src_index
].Negate
, 0);
163 count_inst
->Unknown
= 1 ;
166 switch(inst
->U
.I
.Opcode
){
168 count_inst
->Amount
+= amount
;
171 if(amnt_src_index
== 0){
172 count_inst
->Unknown
= 0;
175 count_inst
->Amount
-= amount
;
178 count_inst
->Unknown
= 1;
184 * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless
185 * of how many iterations they have.
187 static int try_unroll_loop(struct radeon_compiler
* c
, struct loop_info
* loop
)
191 struct count_inst count_inst
;
193 struct rc_src_register
* counter
;
194 struct rc_src_register
* limit
;
195 struct const_value counter_value
;
196 struct rc_instruction
* inst
;
198 /* Find the counter and the upper limit */
200 if(rc_src_reg_is_immediate(c
, loop
->Cond
->U
.I
.SrcReg
[0].File
,
201 loop
->Cond
->U
.I
.SrcReg
[0].Index
)){
202 limit
= &loop
->Cond
->U
.I
.SrcReg
[0];
203 counter
= &loop
->Cond
->U
.I
.SrcReg
[1];
205 else if(rc_src_reg_is_immediate(c
, loop
->Cond
->U
.I
.SrcReg
[1].File
,
206 loop
->Cond
->U
.I
.SrcReg
[1].Index
)){
207 limit
= &loop
->Cond
->U
.I
.SrcReg
[1];
208 counter
= &loop
->Cond
->U
.I
.SrcReg
[0];
211 DBG("No constant limit.\n");
215 /* Find the initial value of the counter */
216 counter_value
.Src
= counter
;
217 counter_value
.Value
= 0.0f
;
218 counter_value
.HasValue
= 0;
220 for(inst
= c
->Program
.Instructions
.Next
; inst
!= loop
->BeginLoop
;
222 rc_for_all_writes_mask(inst
, update_const_value
, &counter_value
);
224 if(!counter_value
.HasValue
){
225 DBG("Initial counter value cannot be determined.\n");
228 DBG("Initial counter value is %f\n", counter_value
.Value
);
229 /* Determine how the counter is modified each loop */
231 count_inst
.Index
= counter
->Index
;
232 count_inst
.Swz
= counter
->Swizzle
;
233 count_inst
.Amount
= 0.0f
;
234 count_inst
.Unknown
= 0;
235 count_inst
.BranchDepth
= 0;
237 for(inst
= loop
->BeginLoop
->Next
; end_loops
> 0; inst
= inst
->Next
){
238 switch(inst
->U
.I
.Opcode
){
239 /* XXX In the future we might want to try to unroll nested
241 case RC_OPCODE_BGNLOOP
:
244 case RC_OPCODE_ENDLOOP
:
245 loop
->EndLoop
= inst
;
249 /* Don't unroll loops if it has a BRK instruction
250 * other one used when testing the main conditional
253 /* Make sure we haven't entered a nested loops. */
254 if(inst
!= loop
->Brk
&& end_loops
== 1) {
259 count_inst
.BranchDepth
++;
261 case RC_OPCODE_ENDIF
:
262 count_inst
.BranchDepth
--;
265 rc_for_all_writes_mask(inst
, get_incr_amount
, &count_inst
);
266 if(count_inst
.Unknown
){
273 if(count_inst
.Amount
== 0.0f
){
276 DBG("Counter is increased by %f each iteration.\n", count_inst
.Amount
);
277 /* Calculate the number of iterations of this loop. Keeping this
278 * simple, since we only support increment and decrement loops.
280 limit_value
= rc_get_constant_value(c
, limit
->Index
, limit
->Swizzle
,
282 DBG("Limit is %f.\n", limit_value
);
283 /* The iteration calculations are opposite of what you would expect.
284 * In a normal loop, if the condition is met, then loop continues, but
285 * with our loops, if the condition is met, the is exited. */
286 switch(loop
->Cond
->U
.I
.Opcode
){
289 iterations
= (int) ceilf((limit_value
- counter_value
.Value
) /
295 iterations
= (int) floorf((limit_value
- counter_value
.Value
) /
296 count_inst
.Amount
) + 1;
302 if (c
->max_alu_insts
> 0
303 && iterations
> loop_max_possible_iterations(c
, loop
)) {
307 DBG("Loop will have %d iterations.\n", iterations
);
309 /* Prepare loop for unrolling */
310 rc_remove_instruction(loop
->Cond
);
311 rc_remove_instruction(loop
->If
);
312 rc_remove_instruction(loop
->Brk
);
313 rc_remove_instruction(loop
->EndIf
);
315 unroll_loop(c
, loop
, iterations
);
316 loop
->EndLoop
= NULL
;
323 * @param inst A pointer to a BGNLOOP instruction.
324 * @return 1 if all of the members of loop where set.
325 * @return 0 if there was an error and some members of loop are still NULL.
327 static int build_loop_info(struct radeon_compiler
* c
, struct loop_info
* loop
,
328 struct rc_instruction
* inst
)
330 struct rc_instruction
* ptr
;
332 if(inst
->U
.I
.Opcode
!= RC_OPCODE_BGNLOOP
){
333 rc_error(c
, "%s: expected BGNLOOP", __FUNCTION__
);
337 memset(loop
, 0, sizeof(struct loop_info
));
339 loop
->BeginLoop
= inst
;
341 for(ptr
= loop
->BeginLoop
->Next
; !loop
->EndLoop
; ptr
= ptr
->Next
) {
343 if (ptr
== &c
->Program
.Instructions
) {
344 rc_error(c
, "%s: BGNLOOP without an ENDLOOOP.\n",
349 switch(ptr
->U
.I
.Opcode
){
350 case RC_OPCODE_BGNLOOP
:
352 /* Nested loop, skip ahead to the end. */
353 unsigned int loop_depth
= 1;
354 for(ptr
= ptr
->Next
; ptr
!= &c
->Program
.Instructions
;
356 if (ptr
->U
.I
.Opcode
== RC_OPCODE_BGNLOOP
) {
358 } else if (ptr
->U
.I
.Opcode
== RC_OPCODE_ENDLOOP
) {
364 if (ptr
== &c
->Program
.Instructions
) {
365 rc_error(c
, "%s: BGNLOOP without an ENDLOOOP\n",
373 struct rc_src_register
*src
;
374 if(ptr
->Next
->U
.I
.Opcode
!= RC_OPCODE_ENDIF
375 || ptr
->Prev
->U
.I
.Opcode
!= RC_OPCODE_IF
380 loop
->If
= ptr
->Prev
;
381 loop
->EndIf
= ptr
->Next
;
382 src
= &loop
->If
->U
.I
.SrcReg
[0];
384 for (loop
->Cond
= loop
->If
->Prev
;
385 loop
->Cond
->U
.I
.Opcode
!= RC_OPCODE_BGNLOOP
;
386 loop
->Cond
= loop
->Cond
->Prev
) {
388 const struct rc_dst_register
*dst
= &loop
->Cond
->U
.I
.DstReg
;
389 if (dst
->File
== src
->File
&&
390 dst
->Index
== src
->Index
&&
391 dst
->WriteMask
& (rc_swizzle_to_writemask(src
->Swizzle
))) {
392 if (loop
->Cond
->U
.I
.Opcode
== RC_OPCODE_CMP
) {
393 src
= &loop
->Cond
->U
.I
.SrcReg
[0];
400 if (loop
->Cond
->U
.I
.Opcode
== RC_OPCODE_BGNLOOP
) {
401 rc_error(c
, "%s: Cannot find condition for if\n", __FUNCTION__
);
406 case RC_OPCODE_ENDLOOP
:
412 if (loop
->BeginLoop
&& loop
->Brk
&& loop
->If
&& loop
->EndIf
413 && loop
->Cond
&& loop
->EndLoop
) {
420 * This function prepares a loop to be unrolled by converting it into an if
421 * statement. Here is an outline of the conversion process:
422 * BGNLOOP; -> BGNLOOP;
423 * <Additional conditional code> -> <Additional conditional code>
424 * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
425 * IF temp[0]; -> IF temp[0];
427 * ENDIF; -> <Loop Body>
428 * <Loop Body> -> ENDIF;
429 * ENDLOOP; -> ENDLOOP
431 * @param inst A pointer to a BGNLOOP instruction.
432 * @return 1 for success, 0 for failure
434 static int transform_loop(struct emulate_loop_state
* s
,
435 struct rc_instruction
* inst
)
437 struct loop_info
* loop
;
439 memory_pool_array_reserve(&s
->C
->Pool
, struct loop_info
,
440 s
->Loops
, s
->LoopCount
, s
->LoopReserved
, 1);
442 loop
= &s
->Loops
[s
->LoopCount
++];
444 if (!build_loop_info(s
->C
, loop
, inst
)) {
445 rc_error(s
->C
, "Failed to build loop info\n");
449 if(try_unroll_loop(s
->C
, loop
)){
453 /* Reverse the conditional instruction */
454 switch(loop
->Cond
->U
.I
.Opcode
){
456 loop
->Cond
->U
.I
.Opcode
= RC_OPCODE_SLT
;
459 loop
->Cond
->U
.I
.Opcode
= RC_OPCODE_SGE
;
462 loop
->Cond
->U
.I
.Opcode
= RC_OPCODE_SGT
;
465 loop
->Cond
->U
.I
.Opcode
= RC_OPCODE_SLE
;
468 loop
->Cond
->U
.I
.Opcode
= RC_OPCODE_SNE
;
471 loop
->Cond
->U
.I
.Opcode
= RC_OPCODE_SEQ
;
474 rc_error(s
->C
, "loop->Cond is not a conditional.\n");
478 /* Prepare the loop to be emulated */
479 rc_remove_instruction(loop
->Brk
);
480 rc_remove_instruction(loop
->EndIf
);
481 rc_insert_instruction(loop
->EndLoop
->Prev
, loop
->EndIf
);
485 void rc_transform_loops(struct radeon_compiler
*c
, void *user
)
487 struct emulate_loop_state
* s
= &c
->loop_state
;
488 struct rc_instruction
* ptr
;
490 memset(s
, 0, sizeof(struct emulate_loop_state
));
492 for(ptr
= s
->C
->Program
.Instructions
.Next
;
493 ptr
!= &s
->C
->Program
.Instructions
; ptr
= ptr
->Next
) {
494 if(ptr
->Type
== RC_INSTRUCTION_NORMAL
&&
495 ptr
->U
.I
.Opcode
== RC_OPCODE_BGNLOOP
){
496 if (!transform_loop(s
, ptr
))
502 void rc_unroll_loops(struct radeon_compiler
*c
, void *user
)
504 struct rc_instruction
* inst
;
505 struct loop_info loop
;
507 for(inst
= c
->Program
.Instructions
.Next
;
508 inst
!= &c
->Program
.Instructions
; inst
= inst
->Next
) {
510 if (inst
->U
.I
.Opcode
== RC_OPCODE_BGNLOOP
) {
511 if (build_loop_info(c
, &loop
, inst
)) {
512 try_unroll_loop(c
, &loop
);
518 void rc_emulate_loops(struct radeon_compiler
*c
, void *user
)
520 struct emulate_loop_state
* s
= &c
->loop_state
;
522 /* Iterate backwards of the list of loops so that loops that nested
523 * loops are unrolled first.
525 for( i
= s
->LoopCount
- 1; i
>= 0; i
-- ){
526 unsigned int iterations
;
528 if(!s
->Loops
[i
].EndLoop
){
531 iterations
= loop_max_possible_iterations(s
->C
, &s
->Loops
[i
]);
532 unroll_loop(s
->C
, &s
->Loops
[i
], iterations
);