r300/compiler: Unroll loops that have a constant number of iterations.
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_emulate_loops.c
1 /*
2 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 */
31
32 #include "radeon_emulate_loops.h"
33
34 #include "radeon_compiler.h"
35 #include "radeon_dataflow.h"
36
37 struct emulate_loop_state {
38 struct radeon_compiler * C;
39 struct loop_info * Loops;
40 unsigned int LoopCount;
41 unsigned int LoopReserved;
42 };
43
44 struct loop_info {
45 struct rc_instruction * BeginLoop;
46 struct rc_instruction * EndLoop;
47 };
48
49 struct const_value {
50
51 struct radeon_compiler * C;
52 struct rc_src_register * Src;
53 float Value;
54 int HasValue;
55 };
56
57 struct count_inst {
58 struct radeon_compiler * C;
59 int Index;
60 rc_swizzle Swz;
61 float Amount;
62 int Unknown;
63 };
64
65 static float get_constant_value(struct radeon_compiler * c,
66 struct rc_src_register * src,
67 int chan)
68 {
69 float base = 1.0f;
70 int swz = GET_SWZ(src->Swizzle, chan);
71 if(swz >= 4 || src->Index >= c->Program.Constants.Count ){
72 rc_error(c, "get_constant_value: Can't find a value.\n");
73 return 0.0f;
74 }
75 if(GET_BIT(src->Negate, chan)){
76 base = -1.0f;
77 }
78 return base *
79 c->Program.Constants.Constants[src->Index].u.Immediate[swz];
80 }
81
82 static int src_reg_is_immediate(struct rc_src_register * src,
83 struct radeon_compiler * c)
84 {
85 return src->File == RC_FILE_CONSTANT &&
86 c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
87 }
88
89 static unsigned int loop_count_instructions(struct loop_info * loop)
90 {
91 unsigned int count = 0;
92 struct rc_instruction * inst = loop->BeginLoop->Next;
93 while(inst != loop->EndLoop){
94 count++;
95 inst = inst->Next;
96 }
97 return count;
98 }
99
100 static unsigned int loop_calc_iterations(struct loop_info * loop,
101 unsigned int loop_count, unsigned int max_instructions)
102 {
103 unsigned int icount = loop_count_instructions(loop);
104 return max_instructions / (loop_count * icount);
105 }
106
107 static void loop_unroll(struct emulate_loop_state * s,
108 struct loop_info *loop, unsigned int iterations)
109 {
110 unsigned int i;
111 struct rc_instruction * ptr;
112 struct rc_instruction * first = loop->BeginLoop->Next;
113 struct rc_instruction * last = loop->EndLoop->Prev;
114 struct rc_instruction * append_to = last;
115 rc_remove_instruction(loop->BeginLoop);
116 rc_remove_instruction(loop->EndLoop);
117 for( i = 1; i < iterations; i++){
118 for(ptr = first; ptr != last->Next; ptr = ptr->Next){
119 struct rc_instruction *new = rc_alloc_instruction(s->C);
120 memcpy(new, ptr, sizeof(struct rc_instruction));
121 rc_insert_instruction(append_to, new);
122 append_to = new;
123 }
124 }
125 }
126
127
128 static void update_const_value(void * data, struct rc_instruction * inst,
129 rc_register_file file, unsigned int index, unsigned int mask)
130 {
131 struct const_value * value = data;
132 if(value->Src->File != file ||
133 value->Src->Index != index ||
134 !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
135 return;
136 }
137 switch(inst->U.I.Opcode){
138 case RC_OPCODE_MOV:
139 if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){
140 return;
141 }
142 value->HasValue = 1;
143 value->Value =
144 get_constant_value(value->C, &inst->U.I.SrcReg[0], 0);
145 break;
146 }
147 }
148
149 static void get_incr_amount(void * data, struct rc_instruction * inst,
150 rc_register_file file, unsigned int index, unsigned int mask)
151 {
152 struct count_inst * count_inst = data;
153 if(file != RC_FILE_TEMPORARY ||
154 count_inst->Index != index ||
155 (1 << GET_SWZ(count_inst->Swz,0) != mask)){
156 return;
157 }
158 switch(inst->U.I.Opcode){
159 int incr_reg;
160 case RC_OPCODE_ADD:
161 {
162 if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
163 inst->U.I.SrcReg[0].Index == count_inst->Index &&
164 inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
165 incr_reg = 1;
166 } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
167 inst->U.I.SrcReg[1].Index == count_inst->Index &&
168 inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
169 incr_reg = 0;
170 }
171 else{
172 count_inst->Unknown = 1;
173 return;
174 }
175 if(src_reg_is_immediate(&inst->U.I.SrcReg[incr_reg],
176 count_inst->C)){
177 count_inst->Amount = get_constant_value(count_inst->C,
178 &inst->U.I.SrcReg[incr_reg], 0);
179 }
180 else{
181 count_inst->Unknown = 1 ;
182 return;
183 }
184 break;
185 }
186 default:
187 count_inst->Unknown = 1;
188 return;
189 }
190
191 }
192
193 static int transform_const_loop(struct emulate_loop_state * s,
194 struct loop_info * loop,
195 struct rc_instruction * slt)
196 {
197 int end_loops = 1;
198 int iterations;
199 struct count_inst count_inst;
200 float limit_value;
201 struct rc_src_register * counter;
202 struct rc_src_register * limit;
203 struct const_value counter_value;
204 struct rc_instruction * inst;
205
206 /* Find the counter and the upper limit */
207
208 /* limit < counter */
209 if(src_reg_is_immediate(&slt->U.I.SrcReg[0], s->C)){
210 limit = &slt->U.I.SrcReg[0];
211 counter = &slt->U.I.SrcReg[1];
212 }
213 /* counter < limit */
214 else if(src_reg_is_immediate(&slt->U.I.SrcReg[1], s->C)){
215 limit = &slt->U.I.SrcReg[1];
216 counter = &slt->U.I.SrcReg[0];
217 }
218 else{
219 return 0;
220 }
221
222 /* Find the initial value of the counter */
223 counter_value.Src = counter;
224 counter_value.Value = 0.0f;
225 counter_value.HasValue = 0;
226 counter_value.C = s->C;
227 for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
228 inst = inst->Next){
229 rc_for_all_writes_mask(inst, update_const_value, &counter_value);
230 }
231 if(!counter_value.HasValue){
232 return 0;
233 }
234
235 /* Determine how the counter is modified each loop */
236 count_inst.C = s->C;
237 count_inst.Index = counter->Index;
238 count_inst.Swz = counter->Swizzle;
239 count_inst.Amount = 0.0f;
240 count_inst.Unknown = 0;
241 for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
242 switch(inst->U.I.Opcode){
243 /* XXX In the future we might want to try to unroll nested
244 * loops here.*/
245 case RC_OPCODE_BGNLOOP:
246 end_loops++;
247 break;
248 case RC_OPCODE_ENDLOOP:
249 loop->EndLoop = inst;
250 end_loops--;
251 break;
252 default:
253 rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
254 if(count_inst.Unknown){
255 return 0;
256 }
257 break;
258 }
259 }
260 if(count_inst.Amount == 0.0f){
261 return 0;
262 }
263
264 /* Calculate the number of iterations of this loop */
265 limit_value = get_constant_value(s->C, limit, 0);
266 iterations = (int) ((limit_value - counter_value.Value) /
267 count_inst.Amount);
268
269 /* Prepare loop for unrolling */
270 /* Remove the first 4 instructions inside the loop, which are part
271 * of the conditional and no longer needed(SGE, IF, BRK, ENDIF).
272 */
273 rc_remove_instruction(loop->BeginLoop->Next);
274 rc_remove_instruction(loop->BeginLoop->Next);
275 rc_remove_instruction(loop->BeginLoop->Next);
276 rc_remove_instruction(loop->BeginLoop->Next);
277
278 loop_unroll(s, loop, iterations);
279 loop->EndLoop = NULL;
280 return 1;
281 }
282
283 /**
284 * This function prepares a loop to be unrolled by converting it into an if
285 * statement. Here is an outline of the conversion process:
286 * BGNLOOP; -> BGNLOOP;
287 * SGE temp[0], temp[1], temp[2]; -> SLT temp[0], temp[1], temp[2];
288 * IF temp[0]; -> IF temp[0];
289 * BRK; ->
290 * ENDIF; -> <Loop Body>
291 * <Loop Body> -> ENDIF;
292 * ENDLOOP; -> ENDLOOP
293 *
294 * @param inst A pointer to a BGNLOOP instruction.
295 * @return A pointer to the ENDLOOP instruction.
296 */
297 static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
298 struct rc_instruction * inst)
299 {
300 struct loop_info *loop;
301 struct rc_instruction * ptr;
302
303 memory_pool_array_reserve(&s->C->Pool, struct loop_info,
304 s->Loops, s->LoopCount, s->LoopReserved, 1);
305
306 loop = &s->Loops[s->LoopCount++];
307 memset(loop, 0, sizeof(struct loop_info));
308 loop->BeginLoop = inst;
309
310 /* Reverse the SGE instruction */
311 ptr = inst->Next;
312 ptr->U.I.Opcode = RC_OPCODE_SLT;
313
314 /* Check if the number of loops is known at compile time. */
315 if(transform_const_loop(s, loop, ptr)){
316 return loop->BeginLoop->Next;
317 }
318
319 while(!loop->EndLoop){
320 struct rc_instruction * endif;
321 if(ptr->Type == RC_INSTRUCTION_NORMAL){
322 }
323 switch(ptr->U.I.Opcode){
324 case RC_OPCODE_BGNLOOP:
325 /* Nested loop */
326 ptr = transform_loop(s, ptr);
327 break;
328 case RC_OPCODE_BRK:
329 /* The BRK instruction should always be followed by
330 * an ENDIF. This ENDIF will eventually replace the
331 * ENDLOOP insruction. */
332 endif = ptr->Next;
333 rc_remove_instruction(ptr);
334 rc_remove_instruction(endif);
335 break;
336 case RC_OPCODE_ENDLOOP:
337 /* Insert the ENDIF before ENDLOOP. */
338 rc_insert_instruction(ptr->Prev, endif);
339 loop->EndLoop = ptr;
340 break;
341 }
342 ptr = ptr->Next;
343 }
344 return ptr;
345 }
346
347 static void rc_transform_loops(struct emulate_loop_state * s)
348 {
349 struct rc_instruction * ptr = s->C->Program.Instructions.Next;
350 while(ptr != &s->C->Program.Instructions) {
351 if(ptr->Type == RC_INSTRUCTION_NORMAL &&
352 ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
353 ptr = transform_loop(s, ptr);
354 }
355 ptr = ptr->Next;
356 }
357 }
358
359 static void rc_unroll_loops(struct emulate_loop_state *s,
360 unsigned int max_instructions)
361 {
362 int i;
363 /* Iterate backwards of the list of loops so that loops that nested
364 * loops are unrolled first.
365 */
366 for( i = s->LoopCount - 1; i >= 0; i-- ){
367 if(!s->Loops[i].EndLoop){
368 continue;
369 }
370 unsigned int iterations = loop_calc_iterations(&s->Loops[i],
371 s->LoopCount, max_instructions);
372 loop_unroll(s, &s->Loops[i], iterations);
373 }
374 }
375
376 void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions)
377 {
378 struct emulate_loop_state s;
379
380 memset(&s, 0, sizeof(struct emulate_loop_state));
381 s.C = c;
382
383 /* We may need to move these two operations to r3xx_(vert|frag)prog.c
384 * and run the optimization passes between them in order to increase
385 * the number of unrolls we can do for each loop.
386 */
387 rc_transform_loops(&s);
388
389 rc_unroll_loops(&s, max_instructions);
390 }