2 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 #include "radeon_emulate_loops.h"
34 #include "radeon_compiler.h"
35 #include "radeon_dataflow.h"
39 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
43 struct radeon_compiler
* C
;
44 struct rc_src_register
* Src
;
50 struct radeon_compiler
* C
;
57 static float get_constant_value(struct radeon_compiler
* c
,
58 struct rc_src_register
* src
,
62 int swz
= GET_SWZ(src
->Swizzle
, chan
);
63 if(swz
>= 4 || src
->Index
>= c
->Program
.Constants
.Count
){
64 rc_error(c
, "get_constant_value: Can't find a value.\n");
67 if(GET_BIT(src
->Negate
, chan
)){
71 c
->Program
.Constants
.Constants
[src
->Index
].u
.Immediate
[swz
];
74 static int src_reg_is_immediate(struct rc_src_register
* src
,
75 struct radeon_compiler
* c
)
77 return src
->File
== RC_FILE_CONSTANT
&&
78 c
->Program
.Constants
.Constants
[src
->Index
].Type
==RC_CONSTANT_IMMEDIATE
;
81 static unsigned int loop_calc_iterations(struct emulate_loop_state
*s
,
82 struct loop_info
* loop
, unsigned int max_instructions
)
84 unsigned int total_i
= rc_recompute_ips(s
->C
);
85 unsigned int loop_i
= (loop
->EndLoop
->IP
- loop
->BeginLoop
->IP
) - 1;
86 /* +1 because the program already has one iteration of the loop. */
87 return 1 + ((max_instructions
- total_i
) / (s
->LoopCount
* loop_i
));
90 static void loop_unroll(struct emulate_loop_state
* s
,
91 struct loop_info
*loop
, unsigned int iterations
)
94 struct rc_instruction
* ptr
;
95 struct rc_instruction
* first
= loop
->BeginLoop
->Next
;
96 struct rc_instruction
* last
= loop
->EndLoop
->Prev
;
97 struct rc_instruction
* append_to
= last
;
98 rc_remove_instruction(loop
->BeginLoop
);
99 rc_remove_instruction(loop
->EndLoop
);
100 for( i
= 1; i
< iterations
; i
++){
101 for(ptr
= first
; ptr
!= last
->Next
; ptr
= ptr
->Next
){
102 struct rc_instruction
*new = rc_alloc_instruction(s
->C
);
103 memcpy(new, ptr
, sizeof(struct rc_instruction
));
104 rc_insert_instruction(append_to
, new);
111 static void update_const_value(void * data
, struct rc_instruction
* inst
,
112 rc_register_file file
, unsigned int index
, unsigned int mask
)
114 struct const_value
* value
= data
;
115 if(value
->Src
->File
!= file
||
116 value
->Src
->Index
!= index
||
117 !(1 << GET_SWZ(value
->Src
->Swizzle
, 0) & mask
)){
120 switch(inst
->U
.I
.Opcode
){
122 if(!src_reg_is_immediate(&inst
->U
.I
.SrcReg
[0], value
->C
)){
127 get_constant_value(value
->C
, &inst
->U
.I
.SrcReg
[0], 0);
132 static void get_incr_amount(void * data
, struct rc_instruction
* inst
,
133 rc_register_file file
, unsigned int index
, unsigned int mask
)
135 struct count_inst
* count_inst
= data
;
137 const struct rc_opcode_info
* opcode
;
140 if(file
!= RC_FILE_TEMPORARY
||
141 count_inst
->Index
!= index
||
142 (1 << GET_SWZ(count_inst
->Swz
,0) != mask
)){
145 /* Find the index of the counter register. */
146 opcode
= rc_get_opcode_info(inst
->U
.I
.Opcode
);
147 if(opcode
->NumSrcRegs
!= 2){
148 count_inst
->Unknown
= 1;
151 if(inst
->U
.I
.SrcReg
[0].File
== RC_FILE_TEMPORARY
&&
152 inst
->U
.I
.SrcReg
[0].Index
== count_inst
->Index
&&
153 inst
->U
.I
.SrcReg
[0].Swizzle
== count_inst
->Swz
){
155 } else if( inst
->U
.I
.SrcReg
[1].File
== RC_FILE_TEMPORARY
&&
156 inst
->U
.I
.SrcReg
[1].Index
== count_inst
->Index
&&
157 inst
->U
.I
.SrcReg
[1].Swizzle
== count_inst
->Swz
){
161 count_inst
->Unknown
= 1;
164 if(src_reg_is_immediate(&inst
->U
.I
.SrcReg
[amnt_src_index
],
166 amount
= get_constant_value(count_inst
->C
,
167 &inst
->U
.I
.SrcReg
[amnt_src_index
], 0);
170 count_inst
->Unknown
= 1 ;
173 switch(inst
->U
.I
.Opcode
){
175 count_inst
->Amount
+= amount
;
178 if(amnt_src_index
== 0){
179 count_inst
->Unknown
= 0;
182 count_inst
->Amount
-= amount
;
185 count_inst
->Unknown
= 1;
191 static int transform_const_loop(struct emulate_loop_state
* s
,
192 struct loop_info
* loop
)
196 struct count_inst count_inst
;
198 struct rc_src_register
* counter
;
199 struct rc_src_register
* limit
;
200 struct const_value counter_value
;
201 struct rc_instruction
* inst
;
203 /* Find the counter and the upper limit */
205 if(src_reg_is_immediate(&loop
->Cond
->U
.I
.SrcReg
[0], s
->C
)){
206 limit
= &loop
->Cond
->U
.I
.SrcReg
[0];
207 counter
= &loop
->Cond
->U
.I
.SrcReg
[1];
209 else if(src_reg_is_immediate(&loop
->Cond
->U
.I
.SrcReg
[1], s
->C
)){
210 limit
= &loop
->Cond
->U
.I
.SrcReg
[1];
211 counter
= &loop
->Cond
->U
.I
.SrcReg
[0];
214 DBG("No constant limit.\n");
218 /* Find the initial value of the counter */
219 counter_value
.Src
= counter
;
220 counter_value
.Value
= 0.0f
;
221 counter_value
.HasValue
= 0;
222 counter_value
.C
= s
->C
;
223 for(inst
= s
->C
->Program
.Instructions
.Next
; inst
!= loop
->BeginLoop
;
225 rc_for_all_writes_mask(inst
, update_const_value
, &counter_value
);
227 if(!counter_value
.HasValue
){
228 DBG("Initial counter value cannot be determined.\n");
231 DBG("Initial counter value is %f\n", counter_value
.Value
);
232 /* Determine how the counter is modified each loop */
234 count_inst
.Index
= counter
->Index
;
235 count_inst
.Swz
= counter
->Swizzle
;
236 count_inst
.Amount
= 0.0f
;
237 count_inst
.Unknown
= 0;
238 for(inst
= loop
->BeginLoop
->Next
; end_loops
> 0; inst
= inst
->Next
){
239 switch(inst
->U
.I
.Opcode
){
240 /* XXX In the future we might want to try to unroll nested
242 case RC_OPCODE_BGNLOOP
:
245 case RC_OPCODE_ENDLOOP
:
246 loop
->EndLoop
= inst
;
249 /* XXX Check if the counter is modified within an if statement.
254 rc_for_all_writes_mask(inst
, get_incr_amount
, &count_inst
);
255 if(count_inst
.Unknown
){
262 if(count_inst
.Amount
== 0.0f
){
265 DBG("Counter is increased by %f each iteration.\n", count_inst
.Amount
);
266 /* Calculate the number of iterations of this loop. Keeping this
267 * simple, since we only support increment and decrement loops.
269 limit_value
= get_constant_value(s
->C
, limit
, 0);
270 DBG("Limit is %f.\n", limit_value
);
271 switch(loop
->Cond
->U
.I
.Opcode
){
274 iterations
= (int) ceilf((limit_value
- counter_value
.Value
) /
280 iterations
= (int) floorf((limit_value
- counter_value
.Value
) /
281 count_inst
.Amount
) + 1;
287 DBG("Loop will have %d iterations.\n", iterations
);
289 /* Prepare loop for unrolling */
290 rc_remove_instruction(loop
->Cond
);
291 rc_remove_instruction(loop
->If
);
292 rc_remove_instruction(loop
->Brk
);
293 rc_remove_instruction(loop
->EndIf
);
295 loop_unroll(s
, loop
, iterations
);
296 loop
->EndLoop
= NULL
;
301 * This function prepares a loop to be unrolled by converting it into an if
302 * statement. Here is an outline of the conversion process:
303 * BGNLOOP; -> BGNLOOP;
304 * <Additional conditional code> -> <Additional conditional code>
305 * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
306 * IF temp[0]; -> IF temp[0];
308 * ENDIF; -> <Loop Body>
309 * <Loop Body> -> ENDIF;
310 * ENDLOOP; -> ENDLOOP
312 * @param inst A pointer to a BGNLOOP instruction.
313 * @return If the loop can be unrolled, a pointer to the first instruction of
315 * Otherwise, A pointer to the ENDLOOP instruction.
316 * Null if there is an error.
318 static struct rc_instruction
* transform_loop(struct emulate_loop_state
* s
,
319 struct rc_instruction
* inst
)
321 struct loop_info
*loop
;
322 struct rc_instruction
* ptr
;
324 memory_pool_array_reserve(&s
->C
->Pool
, struct loop_info
,
325 s
->Loops
, s
->LoopCount
, s
->LoopReserved
, 1);
327 loop
= &s
->Loops
[s
->LoopCount
++];
328 memset(loop
, 0, sizeof(struct loop_info
));
329 if(inst
->U
.I
.Opcode
!= RC_OPCODE_BGNLOOP
){
330 rc_error(s
->C
, "expected BGNLOOP\n", __FUNCTION__
);
333 loop
->BeginLoop
= inst
;
335 for(ptr
= loop
->BeginLoop
->Next
; !loop
->EndLoop
; ptr
= ptr
->Next
){
336 switch(ptr
->U
.I
.Opcode
){
337 case RC_OPCODE_BGNLOOP
:
339 ptr
= transform_loop(s
, ptr
);
346 if(ptr
->Next
->U
.I
.Opcode
!= RC_OPCODE_ENDIF
){
348 "%s: expected ENDIF\n",__FUNCTION__
);
351 loop
->EndIf
= ptr
->Next
;
352 if(ptr
->Prev
->U
.I
.Opcode
!= RC_OPCODE_IF
){
354 "%s: expected IF\n", __FUNCTION__
);
357 loop
->If
= ptr
->Prev
;
358 switch(loop
->If
->Prev
->U
.I
.Opcode
){
367 rc_error(s
->C
, "%s expected conditional\n",
371 loop
->Cond
= loop
->If
->Prev
;
374 case RC_OPCODE_ENDLOOP
:
379 /* Reverse the conditional instruction */
380 switch(loop
->Cond
->U
.I
.Opcode
){
382 loop
->Cond
->U
.I
.Opcode
= RC_OPCODE_SLT
;
385 loop
->Cond
->U
.I
.Opcode
= RC_OPCODE_SGE
;
388 loop
->Cond
->U
.I
.Opcode
= RC_OPCODE_SGT
;
391 loop
->Cond
->U
.I
.Opcode
= RC_OPCODE_SLE
;
394 loop
->Cond
->U
.I
.Opcode
= RC_OPCODE_SNE
;
397 loop
->Cond
->U
.I
.Opcode
= RC_OPCODE_SEQ
;
400 rc_error(s
->C
, "loop->Cond is not a conditional.\n");
404 /* Check if the number of loops is known at compile time. */
405 if(transform_const_loop(s
, loop
)){
406 return loop
->BeginLoop
->Next
;
409 /* Prepare the loop to be unrolled */
410 rc_remove_instruction(loop
->Brk
);
411 rc_remove_instruction(loop
->EndIf
);
412 rc_insert_instruction(loop
->EndLoop
->Prev
, loop
->EndIf
);
413 return loop
->EndLoop
;
416 void rc_transform_unroll_loops(struct radeon_compiler
*c
,
417 struct emulate_loop_state
* s
)
419 struct rc_instruction
* ptr
;
421 memset(s
, 0, sizeof(struct emulate_loop_state
));
423 ptr
= s
->C
->Program
.Instructions
.Next
;
424 while(ptr
!= &s
->C
->Program
.Instructions
) {
425 if(ptr
->Type
== RC_INSTRUCTION_NORMAL
&&
426 ptr
->U
.I
.Opcode
== RC_OPCODE_BGNLOOP
){
427 ptr
= transform_loop(s
, ptr
);
436 void rc_emulate_loops(struct emulate_loop_state
*s
,
437 unsigned int max_instructions
)
440 /* Iterate backwards of the list of loops so that loops that nested
441 * loops are unrolled first.
443 for( i
= s
->LoopCount
- 1; i
>= 0; i
-- ){
444 if(!s
->Loops
[i
].EndLoop
){
447 unsigned int iterations
= loop_calc_iterations(s
, &s
->Loops
[i
],
449 loop_unroll(s
, &s
->Loops
[i
], iterations
);