2 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 #include "radeon_emulate_loops.h"
34 #include "radeon_compiler.h"
35 #include "radeon_dataflow.h"
37 struct emulate_loop_state
{
38 struct radeon_compiler
* C
;
39 struct loop_info
* Loops
;
40 unsigned int LoopCount
;
41 unsigned int LoopReserved
;
45 struct rc_instruction
* BeginLoop
;
46 struct rc_instruction
* EndLoop
;
51 struct radeon_compiler
* C
;
52 struct rc_src_register
* Src
;
58 struct radeon_compiler
* C
;
65 static float get_constant_value(struct radeon_compiler
* c
,
66 struct rc_src_register
* src
,
70 int swz
= GET_SWZ(src
->Swizzle
, chan
);
71 if(swz
>= 4 || src
->Index
>= c
->Program
.Constants
.Count
){
72 rc_error(c
, "get_constant_value: Can't find a value.\n");
75 if(GET_BIT(src
->Negate
, chan
)){
79 c
->Program
.Constants
.Constants
[src
->Index
].u
.Immediate
[swz
];
82 static int src_reg_is_immediate(struct rc_src_register
* src
,
83 struct radeon_compiler
* c
)
85 return src
->File
== RC_FILE_CONSTANT
&&
86 c
->Program
.Constants
.Constants
[src
->Index
].Type
==RC_CONSTANT_IMMEDIATE
;
89 static unsigned int loop_count_instructions(struct loop_info
* loop
)
91 unsigned int count
= 0;
92 struct rc_instruction
* inst
= loop
->BeginLoop
->Next
;
93 while(inst
!= loop
->EndLoop
){
100 static unsigned int loop_calc_iterations(struct loop_info
* loop
,
101 unsigned int loop_count
, unsigned int max_instructions
)
103 unsigned int icount
= loop_count_instructions(loop
);
104 return max_instructions
/ (loop_count
* icount
);
107 static void loop_unroll(struct emulate_loop_state
* s
,
108 struct loop_info
*loop
, unsigned int iterations
)
111 struct rc_instruction
* ptr
;
112 struct rc_instruction
* first
= loop
->BeginLoop
->Next
;
113 struct rc_instruction
* last
= loop
->EndLoop
->Prev
;
114 struct rc_instruction
* append_to
= last
;
115 rc_remove_instruction(loop
->BeginLoop
);
116 rc_remove_instruction(loop
->EndLoop
);
117 for( i
= 1; i
< iterations
; i
++){
118 for(ptr
= first
; ptr
!= last
->Next
; ptr
= ptr
->Next
){
119 struct rc_instruction
*new = rc_alloc_instruction(s
->C
);
120 memcpy(new, ptr
, sizeof(struct rc_instruction
));
121 rc_insert_instruction(append_to
, new);
128 static void update_const_value(void * data
, struct rc_instruction
* inst
,
129 rc_register_file file
, unsigned int index
, unsigned int mask
)
131 struct const_value
* value
= data
;
132 if(value
->Src
->File
!= file
||
133 value
->Src
->Index
!= index
||
134 !(1 << GET_SWZ(value
->Src
->Swizzle
, 0) & mask
)){
137 switch(inst
->U
.I
.Opcode
){
139 if(!src_reg_is_immediate(&inst
->U
.I
.SrcReg
[0], value
->C
)){
144 get_constant_value(value
->C
, &inst
->U
.I
.SrcReg
[0], 0);
149 static void get_incr_amount(void * data
, struct rc_instruction
* inst
,
150 rc_register_file file
, unsigned int index
, unsigned int mask
)
152 struct count_inst
* count_inst
= data
;
153 if(file
!= RC_FILE_TEMPORARY
||
154 count_inst
->Index
!= index
||
155 (1 << GET_SWZ(count_inst
->Swz
,0) != mask
)){
158 switch(inst
->U
.I
.Opcode
){
162 if(inst
->U
.I
.SrcReg
[0].File
== RC_FILE_TEMPORARY
&&
163 inst
->U
.I
.SrcReg
[0].Index
== count_inst
->Index
&&
164 inst
->U
.I
.SrcReg
[0].Swizzle
== count_inst
->Swz
){
166 } else if( inst
->U
.I
.SrcReg
[1].File
== RC_FILE_TEMPORARY
&&
167 inst
->U
.I
.SrcReg
[1].Index
== count_inst
->Index
&&
168 inst
->U
.I
.SrcReg
[1].Swizzle
== count_inst
->Swz
){
172 count_inst
->Unknown
= 1;
175 if(src_reg_is_immediate(&inst
->U
.I
.SrcReg
[incr_reg
],
177 count_inst
->Amount
= get_constant_value(count_inst
->C
,
178 &inst
->U
.I
.SrcReg
[incr_reg
], 0);
181 count_inst
->Unknown
= 1 ;
187 count_inst
->Unknown
= 1;
193 static int transform_const_loop(struct emulate_loop_state
* s
,
194 struct loop_info
* loop
,
195 struct rc_instruction
* slt
)
199 struct count_inst count_inst
;
201 struct rc_src_register
* counter
;
202 struct rc_src_register
* limit
;
203 struct const_value counter_value
;
204 struct rc_instruction
* inst
;
206 /* Find the counter and the upper limit */
208 /* limit < counter */
209 if(src_reg_is_immediate(&slt
->U
.I
.SrcReg
[0], s
->C
)){
210 limit
= &slt
->U
.I
.SrcReg
[0];
211 counter
= &slt
->U
.I
.SrcReg
[1];
213 /* counter < limit */
214 else if(src_reg_is_immediate(&slt
->U
.I
.SrcReg
[1], s
->C
)){
215 limit
= &slt
->U
.I
.SrcReg
[1];
216 counter
= &slt
->U
.I
.SrcReg
[0];
222 /* Find the initial value of the counter */
223 counter_value
.Src
= counter
;
224 counter_value
.Value
= 0.0f
;
225 counter_value
.HasValue
= 0;
226 counter_value
.C
= s
->C
;
227 for(inst
= s
->C
->Program
.Instructions
.Next
; inst
!= loop
->BeginLoop
;
229 rc_for_all_writes_mask(inst
, update_const_value
, &counter_value
);
231 if(!counter_value
.HasValue
){
235 /* Determine how the counter is modified each loop */
237 count_inst
.Index
= counter
->Index
;
238 count_inst
.Swz
= counter
->Swizzle
;
239 count_inst
.Amount
= 0.0f
;
240 count_inst
.Unknown
= 0;
241 for(inst
= loop
->BeginLoop
->Next
; end_loops
> 0; inst
= inst
->Next
){
242 switch(inst
->U
.I
.Opcode
){
243 /* XXX In the future we might want to try to unroll nested
245 case RC_OPCODE_BGNLOOP
:
248 case RC_OPCODE_ENDLOOP
:
249 loop
->EndLoop
= inst
;
253 rc_for_all_writes_mask(inst
, get_incr_amount
, &count_inst
);
254 if(count_inst
.Unknown
){
260 if(count_inst
.Amount
== 0.0f
){
264 /* Calculate the number of iterations of this loop */
265 limit_value
= get_constant_value(s
->C
, limit
, 0);
266 iterations
= (int) ((limit_value
- counter_value
.Value
) /
269 /* Prepare loop for unrolling */
270 /* Remove the first 4 instructions inside the loop, which are part
271 * of the conditional and no longer needed(SGE, IF, BRK, ENDIF).
273 rc_remove_instruction(loop
->BeginLoop
->Next
);
274 rc_remove_instruction(loop
->BeginLoop
->Next
);
275 rc_remove_instruction(loop
->BeginLoop
->Next
);
276 rc_remove_instruction(loop
->BeginLoop
->Next
);
278 loop_unroll(s
, loop
, iterations
);
279 loop
->EndLoop
= NULL
;
284 * This function prepares a loop to be unrolled by converting it into an if
285 * statement. Here is an outline of the conversion process:
286 * BGNLOOP; -> BGNLOOP;
287 * SGE temp[0], temp[1], temp[2]; -> SLT temp[0], temp[1], temp[2];
288 * IF temp[0]; -> IF temp[0];
290 * ENDIF; -> <Loop Body>
291 * <Loop Body> -> ENDIF;
292 * ENDLOOP; -> ENDLOOP
294 * @param inst A pointer to a BGNLOOP instruction.
295 * @return A pointer to the ENDLOOP instruction.
297 static struct rc_instruction
* transform_loop(struct emulate_loop_state
* s
,
298 struct rc_instruction
* inst
)
300 struct loop_info
*loop
;
301 struct rc_instruction
* ptr
;
303 memory_pool_array_reserve(&s
->C
->Pool
, struct loop_info
,
304 s
->Loops
, s
->LoopCount
, s
->LoopReserved
, 1);
306 loop
= &s
->Loops
[s
->LoopCount
++];
307 memset(loop
, 0, sizeof(struct loop_info
));
308 loop
->BeginLoop
= inst
;
310 /* Reverse the SGE instruction */
312 ptr
->U
.I
.Opcode
= RC_OPCODE_SLT
;
314 /* Check if the number of loops is known at compile time. */
315 if(transform_const_loop(s
, loop
, ptr
)){
316 return loop
->BeginLoop
->Next
;
319 while(!loop
->EndLoop
){
320 struct rc_instruction
* endif
;
321 if(ptr
->Type
== RC_INSTRUCTION_NORMAL
){
323 switch(ptr
->U
.I
.Opcode
){
324 case RC_OPCODE_BGNLOOP
:
326 ptr
= transform_loop(s
, ptr
);
329 /* The BRK instruction should always be followed by
330 * an ENDIF. This ENDIF will eventually replace the
331 * ENDLOOP insruction. */
333 rc_remove_instruction(ptr
);
334 rc_remove_instruction(endif
);
336 case RC_OPCODE_ENDLOOP
:
337 /* Insert the ENDIF before ENDLOOP. */
338 rc_insert_instruction(ptr
->Prev
, endif
);
347 static void rc_transform_loops(struct emulate_loop_state
* s
)
349 struct rc_instruction
* ptr
= s
->C
->Program
.Instructions
.Next
;
350 while(ptr
!= &s
->C
->Program
.Instructions
) {
351 if(ptr
->Type
== RC_INSTRUCTION_NORMAL
&&
352 ptr
->U
.I
.Opcode
== RC_OPCODE_BGNLOOP
){
353 ptr
= transform_loop(s
, ptr
);
359 static void rc_unroll_loops(struct emulate_loop_state
*s
,
360 unsigned int max_instructions
)
363 /* Iterate backwards of the list of loops so that loops that nested
364 * loops are unrolled first.
366 for( i
= s
->LoopCount
- 1; i
>= 0; i
-- ){
367 if(!s
->Loops
[i
].EndLoop
){
370 unsigned int iterations
= loop_calc_iterations(&s
->Loops
[i
],
371 s
->LoopCount
, max_instructions
);
372 loop_unroll(s
, &s
->Loops
[i
], iterations
);
376 void rc_emulate_loops(struct radeon_compiler
*c
, unsigned int max_instructions
)
378 struct emulate_loop_state s
;
380 memset(&s
, 0, sizeof(struct emulate_loop_state
));
383 /* We may need to move these two operations to r3xx_(vert|frag)prog.c
384 * and run the optimization passes between them in order to increase
385 * the number of unrolls we can do for each loop.
387 rc_transform_loops(&s
);
389 rc_unroll_loops(&s
, max_instructions
);