r300/compiler: Don't unroll loops with continue or break.
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_emulate_loops.c
1 /*
2 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 */
31
32 #include "radeon_emulate_loops.h"
33
34 #include "radeon_compiler.h"
35 #include "radeon_dataflow.h"
36
37 #define VERBOSE 0
38
39 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
40
41 struct const_value {
42
43 struct radeon_compiler * C;
44 struct rc_src_register * Src;
45 float Value;
46 int HasValue;
47 };
48
49 struct count_inst {
50 struct radeon_compiler * C;
51 int Index;
52 rc_swizzle Swz;
53 float Amount;
54 int Unknown;
55 };
56
57 static float get_constant_value(struct radeon_compiler * c,
58 struct rc_src_register * src,
59 int chan)
60 {
61 float base = 1.0f;
62 int swz = GET_SWZ(src->Swizzle, chan);
63 if(swz >= 4 || src->Index >= c->Program.Constants.Count ){
64 rc_error(c, "get_constant_value: Can't find a value.\n");
65 return 0.0f;
66 }
67 if(GET_BIT(src->Negate, chan)){
68 base = -1.0f;
69 }
70 return base *
71 c->Program.Constants.Constants[src->Index].u.Immediate[swz];
72 }
73
74 static int src_reg_is_immediate(struct rc_src_register * src,
75 struct radeon_compiler * c)
76 {
77 return src->File == RC_FILE_CONSTANT &&
78 c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
79 }
80
81 static unsigned int loop_calc_iterations(struct emulate_loop_state *s,
82 struct loop_info * loop, unsigned int max_instructions)
83 {
84 unsigned int total_i = rc_recompute_ips(s->C);
85 unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
86 /* +1 because the program already has one iteration of the loop. */
87 return 1 + ((max_instructions - total_i) / (s->LoopCount * loop_i));
88 }
89
90 static void loop_unroll(struct emulate_loop_state * s,
91 struct loop_info *loop, unsigned int iterations)
92 {
93 unsigned int i;
94 struct rc_instruction * ptr;
95 struct rc_instruction * first = loop->BeginLoop->Next;
96 struct rc_instruction * last = loop->EndLoop->Prev;
97 struct rc_instruction * append_to = last;
98 rc_remove_instruction(loop->BeginLoop);
99 rc_remove_instruction(loop->EndLoop);
100 for( i = 1; i < iterations; i++){
101 for(ptr = first; ptr != last->Next; ptr = ptr->Next){
102 struct rc_instruction *new = rc_alloc_instruction(s->C);
103 memcpy(new, ptr, sizeof(struct rc_instruction));
104 rc_insert_instruction(append_to, new);
105 append_to = new;
106 }
107 }
108 }
109
110
111 static void update_const_value(void * data, struct rc_instruction * inst,
112 rc_register_file file, unsigned int index, unsigned int mask)
113 {
114 struct const_value * value = data;
115 if(value->Src->File != file ||
116 value->Src->Index != index ||
117 !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
118 return;
119 }
120 switch(inst->U.I.Opcode){
121 case RC_OPCODE_MOV:
122 if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){
123 return;
124 }
125 value->HasValue = 1;
126 value->Value =
127 get_constant_value(value->C, &inst->U.I.SrcReg[0], 0);
128 break;
129 }
130 }
131
132 static void get_incr_amount(void * data, struct rc_instruction * inst,
133 rc_register_file file, unsigned int index, unsigned int mask)
134 {
135 struct count_inst * count_inst = data;
136 int amnt_src_index;
137 const struct rc_opcode_info * opcode;
138 float amount;
139
140 if(file != RC_FILE_TEMPORARY ||
141 count_inst->Index != index ||
142 (1 << GET_SWZ(count_inst->Swz,0) != mask)){
143 return;
144 }
145 /* Find the index of the counter register. */
146 opcode = rc_get_opcode_info(inst->U.I.Opcode);
147 if(opcode->NumSrcRegs != 2){
148 count_inst->Unknown = 1;
149 return;
150 }
151 if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
152 inst->U.I.SrcReg[0].Index == count_inst->Index &&
153 inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
154 amnt_src_index = 1;
155 } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
156 inst->U.I.SrcReg[1].Index == count_inst->Index &&
157 inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
158 amnt_src_index = 0;
159 }
160 else{
161 count_inst->Unknown = 1;
162 return;
163 }
164 if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index],
165 count_inst->C)){
166 amount = get_constant_value(count_inst->C,
167 &inst->U.I.SrcReg[amnt_src_index], 0);
168 }
169 else{
170 count_inst->Unknown = 1 ;
171 return;
172 }
173 switch(inst->U.I.Opcode){
174 case RC_OPCODE_ADD:
175 count_inst->Amount += amount;
176 break;
177 case RC_OPCODE_SUB:
178 if(amnt_src_index == 0){
179 count_inst->Unknown = 0;
180 return;
181 }
182 count_inst->Amount -= amount;
183 break;
184 default:
185 count_inst->Unknown = 1;
186 return;
187 }
188
189 }
190
191 static int transform_const_loop(struct emulate_loop_state * s,
192 struct loop_info * loop)
193 {
194 int end_loops;
195 int iterations;
196 struct count_inst count_inst;
197 float limit_value;
198 struct rc_src_register * counter;
199 struct rc_src_register * limit;
200 struct const_value counter_value;
201 struct rc_instruction * inst;
202
203 /* Find the counter and the upper limit */
204
205 if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){
206 limit = &loop->Cond->U.I.SrcReg[0];
207 counter = &loop->Cond->U.I.SrcReg[1];
208 }
209 else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){
210 limit = &loop->Cond->U.I.SrcReg[1];
211 counter = &loop->Cond->U.I.SrcReg[0];
212 }
213 else{
214 DBG("No constant limit.\n");
215 return 0;
216 }
217
218 /* Find the initial value of the counter */
219 counter_value.Src = counter;
220 counter_value.Value = 0.0f;
221 counter_value.HasValue = 0;
222 counter_value.C = s->C;
223 for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
224 inst = inst->Next){
225 rc_for_all_writes_mask(inst, update_const_value, &counter_value);
226 }
227 if(!counter_value.HasValue){
228 DBG("Initial counter value cannot be determined.\n");
229 return 0;
230 }
231 DBG("Initial counter value is %f\n", counter_value.Value);
232 /* Determine how the counter is modified each loop */
233 count_inst.C = s->C;
234 count_inst.Index = counter->Index;
235 count_inst.Swz = counter->Swizzle;
236 count_inst.Amount = 0.0f;
237 count_inst.Unknown = 0;
238 end_loops = 1;
239 for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
240 switch(inst->U.I.Opcode){
241 /* XXX In the future we might want to try to unroll nested
242 * loops here.*/
243 case RC_OPCODE_BGNLOOP:
244 end_loops++;
245 break;
246 case RC_OPCODE_ENDLOOP:
247 loop->EndLoop = inst;
248 end_loops--;
249 break;
250 case RC_OPCODE_BRK:
251 /* Don't unroll loops if it has a BRK instruction
252 * other one used when testing the main conditional
253 * of the loop. */
254
255 /* Make sure we haven't entered a nested loops. */
256 if(inst != loop->Brk && end_loops == 1) {
257 return 0;
258 }
259 break;
260 /* XXX Check if the counter is modified within an if statement.
261 */
262 case RC_OPCODE_IF:
263 break;
264 default:
265 rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
266 if(count_inst.Unknown){
267 return 0;
268 }
269 break;
270 }
271 }
272 /* Infinite loop */
273 if(count_inst.Amount == 0.0f){
274 return 0;
275 }
276 DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
277 /* Calculate the number of iterations of this loop. Keeping this
278 * simple, since we only support increment and decrement loops.
279 */
280 limit_value = get_constant_value(s->C, limit, 0);
281 DBG("Limit is %f.\n", limit_value);
282 switch(loop->Cond->U.I.Opcode){
283 case RC_OPCODE_SGT:
284 case RC_OPCODE_SLT:
285 iterations = (int) ceilf((limit_value - counter_value.Value) /
286 count_inst.Amount);
287 break;
288
289 case RC_OPCODE_SLE:
290 case RC_OPCODE_SGE:
291 iterations = (int) floorf((limit_value - counter_value.Value) /
292 count_inst.Amount) + 1;
293 break;
294 default:
295 return 0;
296 }
297
298 DBG("Loop will have %d iterations.\n", iterations);
299
300 /* Prepare loop for unrolling */
301 rc_remove_instruction(loop->Cond);
302 rc_remove_instruction(loop->If);
303 rc_remove_instruction(loop->Brk);
304 rc_remove_instruction(loop->EndIf);
305
306 loop_unroll(s, loop, iterations);
307 loop->EndLoop = NULL;
308 return 1;
309 }
310
311 /**
312 * This function prepares a loop to be unrolled by converting it into an if
313 * statement. Here is an outline of the conversion process:
314 * BGNLOOP; -> BGNLOOP;
315 * <Additional conditional code> -> <Additional conditional code>
316 * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
317 * IF temp[0]; -> IF temp[0];
318 * BRK; ->
319 * ENDIF; -> <Loop Body>
320 * <Loop Body> -> ENDIF;
321 * ENDLOOP; -> ENDLOOP
322 *
323 * @param inst A pointer to a BGNLOOP instruction.
324 * @return If the loop can be unrolled, a pointer to the first instruction of
325 * the unrolled loop.
326 * Otherwise, A pointer to the ENDLOOP instruction.
327 * Null if there is an error.
328 */
329 static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
330 struct rc_instruction * inst)
331 {
332 struct loop_info *loop;
333 struct rc_instruction * ptr;
334
335 memory_pool_array_reserve(&s->C->Pool, struct loop_info,
336 s->Loops, s->LoopCount, s->LoopReserved, 1);
337
338 loop = &s->Loops[s->LoopCount++];
339 memset(loop, 0, sizeof(struct loop_info));
340 if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
341 rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__);
342 return NULL;
343 }
344 loop->BeginLoop = inst;
345
346 for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){
347 switch(ptr->U.I.Opcode){
348 case RC_OPCODE_BGNLOOP:
349 /* Nested loop */
350 ptr = transform_loop(s, ptr);
351 if(!ptr){
352 return NULL;
353 }
354 break;
355 case RC_OPCODE_BRK:
356 loop->Brk = ptr;
357 if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){
358 rc_error(s->C,
359 "%s: expected ENDIF\n",__FUNCTION__);
360 return NULL;
361 }
362 loop->EndIf = ptr->Next;
363 if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){
364 rc_error(s->C,
365 "%s: expected IF\n", __FUNCTION__);
366 return NULL;
367 }
368 loop->If = ptr->Prev;
369 switch(loop->If->Prev->U.I.Opcode){
370 case RC_OPCODE_SLT:
371 case RC_OPCODE_SGE:
372 case RC_OPCODE_SGT:
373 case RC_OPCODE_SLE:
374 case RC_OPCODE_SEQ:
375 case RC_OPCODE_SNE:
376 break;
377 default:
378 rc_error(s->C, "%s expected conditional\n",
379 __FUNCTION__);
380 return NULL;
381 }
382 loop->Cond = loop->If->Prev;
383 ptr = loop->EndIf;
384 break;
385 case RC_OPCODE_ENDLOOP:
386 loop->EndLoop = ptr;
387 break;
388 }
389 }
390 /* Reverse the conditional instruction */
391 switch(loop->Cond->U.I.Opcode){
392 case RC_OPCODE_SGE:
393 loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
394 break;
395 case RC_OPCODE_SLT:
396 loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
397 break;
398 case RC_OPCODE_SLE:
399 loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
400 break;
401 case RC_OPCODE_SGT:
402 loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
403 break;
404 case RC_OPCODE_SEQ:
405 loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
406 break;
407 case RC_OPCODE_SNE:
408 loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
409 break;
410 default:
411 rc_error(s->C, "loop->Cond is not a conditional.\n");
412 return NULL;
413 }
414
415 /* Check if the number of loops is known at compile time. */
416 if(transform_const_loop(s, loop)){
417 return loop->BeginLoop->Next;
418 }
419
420 /* Prepare the loop to be unrolled */
421 rc_remove_instruction(loop->Brk);
422 rc_remove_instruction(loop->EndIf);
423 rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
424 return loop->EndLoop;
425 }
426
427 void rc_transform_unroll_loops(struct radeon_compiler *c,
428 struct emulate_loop_state * s)
429 {
430 struct rc_instruction * ptr;
431
432 memset(s, 0, sizeof(struct emulate_loop_state));
433 s->C = c;
434 ptr = s->C->Program.Instructions.Next;
435 while(ptr != &s->C->Program.Instructions) {
436 if(ptr->Type == RC_INSTRUCTION_NORMAL &&
437 ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
438 ptr = transform_loop(s, ptr);
439 if(!ptr){
440 return;
441 }
442 }
443 ptr = ptr->Next;
444 }
445 }
446
447 void rc_emulate_loops(struct emulate_loop_state *s,
448 unsigned int max_instructions)
449 {
450 int i;
451 /* Iterate backwards of the list of loops so that loops that nested
452 * loops are unrolled first.
453 */
454 for( i = s->LoopCount - 1; i >= 0; i-- ){
455 if(!s->Loops[i].EndLoop){
456 continue;
457 }
458 unsigned int iterations = loop_calc_iterations(s, &s->Loops[i],
459 max_instructions);
460 loop_unroll(s, &s->Loops[i], iterations);
461 }
462 }