gallium: Add capability for ARB_robust_buffer_access_behavior.
[mesa.git] / src / gallium / drivers / r300 / compiler / radeon_emulate_loops.c
1 /*
2 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 */
31
32 #include <stdio.h>
33 #include "c99_math.h"
34 #include "radeon_emulate_loops.h"
35 #include "radeon_compiler.h"
36 #include "radeon_compiler_util.h"
37 #include "radeon_dataflow.h"
38
39 #define VERBOSE 0
40
41 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
42
43 struct const_value {
44 struct radeon_compiler * C;
45 struct rc_src_register * Src;
46 float Value;
47 int HasValue;
48 };
49
50 struct count_inst {
51 struct radeon_compiler * C;
52 int Index;
53 rc_swizzle Swz;
54 float Amount;
55 int Unknown;
56 unsigned BranchDepth;
57 };
58
59 static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
60 struct loop_info * loop)
61 {
62 unsigned int total_i = rc_recompute_ips(c);
63 unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
64 /* +1 because the program already has one iteration of the loop. */
65 return 1 + ((c->max_alu_insts - total_i) / loop_i);
66 }
67
68 static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
69 unsigned int iterations)
70 {
71 unsigned int i;
72 struct rc_instruction * ptr;
73 struct rc_instruction * first = loop->BeginLoop->Next;
74 struct rc_instruction * last = loop->EndLoop->Prev;
75 struct rc_instruction * append_to = last;
76 rc_remove_instruction(loop->BeginLoop);
77 rc_remove_instruction(loop->EndLoop);
78 for( i = 1; i < iterations; i++){
79 for(ptr = first; ptr != last->Next; ptr = ptr->Next){
80 struct rc_instruction *new = rc_alloc_instruction(c);
81 memcpy(new, ptr, sizeof(struct rc_instruction));
82 rc_insert_instruction(append_to, new);
83 append_to = new;
84 }
85 }
86 }
87
88
89 static void update_const_value(void * data, struct rc_instruction * inst,
90 rc_register_file file, unsigned int index, unsigned int mask)
91 {
92 struct const_value * value = data;
93 if(value->Src->File != file ||
94 value->Src->Index != index ||
95 !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
96 return;
97 }
98 switch(inst->U.I.Opcode){
99 case RC_OPCODE_MOV:
100 if(!rc_src_reg_is_immediate(value->C, inst->U.I.SrcReg[0].File,
101 inst->U.I.SrcReg[0].Index)){
102 return;
103 }
104 value->HasValue = 1;
105 value->Value =
106 rc_get_constant_value(value->C,
107 inst->U.I.SrcReg[0].Index,
108 inst->U.I.SrcReg[0].Swizzle,
109 inst->U.I.SrcReg[0].Negate, 0);
110 break;
111 }
112 }
113
114 static void get_incr_amount(void * data, struct rc_instruction * inst,
115 rc_register_file file, unsigned int index, unsigned int mask)
116 {
117 struct count_inst * count_inst = data;
118 int amnt_src_index;
119 const struct rc_opcode_info * opcode;
120 float amount;
121
122 if(file != RC_FILE_TEMPORARY ||
123 count_inst->Index != index ||
124 (1 << GET_SWZ(count_inst->Swz,0) != mask)){
125 return;
126 }
127
128 /* XXX: Give up if the counter is modified within an IF block. We
129 * could handle this case with better analysis. */
130 if (count_inst->BranchDepth > 0) {
131 count_inst->Unknown = 1;
132 return;
133 }
134
135 /* Find the index of the counter register. */
136 opcode = rc_get_opcode_info(inst->U.I.Opcode);
137 if(opcode->NumSrcRegs != 2){
138 count_inst->Unknown = 1;
139 return;
140 }
141 if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
142 inst->U.I.SrcReg[0].Index == count_inst->Index &&
143 inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
144 amnt_src_index = 1;
145 } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
146 inst->U.I.SrcReg[1].Index == count_inst->Index &&
147 inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
148 amnt_src_index = 0;
149 }
150 else{
151 count_inst->Unknown = 1;
152 return;
153 }
154 if(rc_src_reg_is_immediate(count_inst->C,
155 inst->U.I.SrcReg[amnt_src_index].File,
156 inst->U.I.SrcReg[amnt_src_index].Index)){
157 amount = rc_get_constant_value(count_inst->C,
158 inst->U.I.SrcReg[amnt_src_index].Index,
159 inst->U.I.SrcReg[amnt_src_index].Swizzle,
160 inst->U.I.SrcReg[amnt_src_index].Negate, 0);
161 }
162 else{
163 count_inst->Unknown = 1 ;
164 return;
165 }
166 switch(inst->U.I.Opcode){
167 case RC_OPCODE_ADD:
168 count_inst->Amount += amount;
169 break;
170 case RC_OPCODE_SUB:
171 if(amnt_src_index == 0){
172 count_inst->Unknown = 0;
173 return;
174 }
175 count_inst->Amount -= amount;
176 break;
177 default:
178 count_inst->Unknown = 1;
179 return;
180 }
181 }
182
183 /**
184 * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless
185 * of how many iterations they have.
186 */
187 static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop)
188 {
189 int end_loops;
190 int iterations;
191 struct count_inst count_inst;
192 float limit_value;
193 struct rc_src_register * counter;
194 struct rc_src_register * limit;
195 struct const_value counter_value;
196 struct rc_instruction * inst;
197
198 /* Find the counter and the upper limit */
199
200 if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[0].File,
201 loop->Cond->U.I.SrcReg[0].Index)){
202 limit = &loop->Cond->U.I.SrcReg[0];
203 counter = &loop->Cond->U.I.SrcReg[1];
204 }
205 else if(rc_src_reg_is_immediate(c, loop->Cond->U.I.SrcReg[1].File,
206 loop->Cond->U.I.SrcReg[1].Index)){
207 limit = &loop->Cond->U.I.SrcReg[1];
208 counter = &loop->Cond->U.I.SrcReg[0];
209 }
210 else{
211 DBG("No constant limit.\n");
212 return 0;
213 }
214
215 /* Find the initial value of the counter */
216 counter_value.Src = counter;
217 counter_value.Value = 0.0f;
218 counter_value.HasValue = 0;
219 counter_value.C = c;
220 for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;
221 inst = inst->Next){
222 rc_for_all_writes_mask(inst, update_const_value, &counter_value);
223 }
224 if(!counter_value.HasValue){
225 DBG("Initial counter value cannot be determined.\n");
226 return 0;
227 }
228 DBG("Initial counter value is %f\n", counter_value.Value);
229 /* Determine how the counter is modified each loop */
230 count_inst.C = c;
231 count_inst.Index = counter->Index;
232 count_inst.Swz = counter->Swizzle;
233 count_inst.Amount = 0.0f;
234 count_inst.Unknown = 0;
235 count_inst.BranchDepth = 0;
236 end_loops = 1;
237 for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
238 switch(inst->U.I.Opcode){
239 /* XXX In the future we might want to try to unroll nested
240 * loops here.*/
241 case RC_OPCODE_BGNLOOP:
242 end_loops++;
243 break;
244 case RC_OPCODE_ENDLOOP:
245 loop->EndLoop = inst;
246 end_loops--;
247 break;
248 case RC_OPCODE_BRK:
249 /* Don't unroll loops if it has a BRK instruction
250 * other one used when testing the main conditional
251 * of the loop. */
252
253 /* Make sure we haven't entered a nested loops. */
254 if(inst != loop->Brk && end_loops == 1) {
255 return 0;
256 }
257 break;
258 case RC_OPCODE_IF:
259 count_inst.BranchDepth++;
260 break;
261 case RC_OPCODE_ENDIF:
262 count_inst.BranchDepth--;
263 break;
264 default:
265 rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
266 if(count_inst.Unknown){
267 return 0;
268 }
269 break;
270 }
271 }
272 /* Infinite loop */
273 if(count_inst.Amount == 0.0f){
274 return 0;
275 }
276 DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
277 /* Calculate the number of iterations of this loop. Keeping this
278 * simple, since we only support increment and decrement loops.
279 */
280 limit_value = rc_get_constant_value(c, limit->Index, limit->Swizzle,
281 limit->Negate, 0);
282 DBG("Limit is %f.\n", limit_value);
283 /* The iteration calculations are opposite of what you would expect.
284 * In a normal loop, if the condition is met, then loop continues, but
285 * with our loops, if the condition is met, the is exited. */
286 switch(loop->Cond->U.I.Opcode){
287 case RC_OPCODE_SGE:
288 case RC_OPCODE_SLE:
289 iterations = (int) ceilf((limit_value - counter_value.Value) /
290 count_inst.Amount);
291 break;
292
293 case RC_OPCODE_SGT:
294 case RC_OPCODE_SLT:
295 iterations = (int) floorf((limit_value - counter_value.Value) /
296 count_inst.Amount) + 1;
297 break;
298 default:
299 return 0;
300 }
301
302 if (c->max_alu_insts > 0
303 && iterations > loop_max_possible_iterations(c, loop)) {
304 return 0;
305 }
306
307 DBG("Loop will have %d iterations.\n", iterations);
308
309 /* Prepare loop for unrolling */
310 rc_remove_instruction(loop->Cond);
311 rc_remove_instruction(loop->If);
312 rc_remove_instruction(loop->Brk);
313 rc_remove_instruction(loop->EndIf);
314
315 unroll_loop(c, loop, iterations);
316 loop->EndLoop = NULL;
317 return 1;
318 }
319
320 /**
321 * @param c
322 * @param loop
323 * @param inst A pointer to a BGNLOOP instruction.
324 * @return 1 if all of the members of loop where set.
325 * @return 0 if there was an error and some members of loop are still NULL.
326 */
327 static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
328 struct rc_instruction * inst)
329 {
330 struct rc_instruction * ptr;
331
332 if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
333 rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);
334 return 0;
335 }
336
337 memset(loop, 0, sizeof(struct loop_info));
338
339 loop->BeginLoop = inst;
340
341 for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {
342
343 if (ptr == &c->Program.Instructions) {
344 rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n",
345 __FUNCTION__);
346 return 0;
347 }
348
349 switch(ptr->U.I.Opcode){
350 case RC_OPCODE_BGNLOOP:
351 {
352 /* Nested loop, skip ahead to the end. */
353 unsigned int loop_depth = 1;
354 for(ptr = ptr->Next; ptr != &c->Program.Instructions;
355 ptr = ptr->Next){
356 if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
357 loop_depth++;
358 } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
359 if (!--loop_depth) {
360 break;
361 }
362 }
363 }
364 if (ptr == &c->Program.Instructions) {
365 rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n",
366 __FUNCTION__);
367 return 0;
368 }
369 break;
370 }
371 case RC_OPCODE_BRK:
372 {
373 struct rc_src_register *src;
374 if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF
375 || ptr->Prev->U.I.Opcode != RC_OPCODE_IF
376 || loop->Brk){
377 continue;
378 }
379 loop->Brk = ptr;
380 loop->If = ptr->Prev;
381 loop->EndIf = ptr->Next;
382 src = &loop->If->U.I.SrcReg[0];
383
384 for (loop->Cond = loop->If->Prev;
385 loop->Cond->U.I.Opcode != RC_OPCODE_BGNLOOP;
386 loop->Cond = loop->Cond->Prev) {
387
388 const struct rc_dst_register *dst = &loop->Cond->U.I.DstReg;
389 if (dst->File == src->File &&
390 dst->Index == src->Index &&
391 dst->WriteMask & (rc_swizzle_to_writemask(src->Swizzle))) {
392 if (loop->Cond->U.I.Opcode == RC_OPCODE_CMP) {
393 src = &loop->Cond->U.I.SrcReg[0];
394 continue;
395 }
396 break;
397 }
398 }
399
400 if (loop->Cond->U.I.Opcode == RC_OPCODE_BGNLOOP) {
401 rc_error(c, "%s: Cannot find condition for if\n", __FUNCTION__);
402 return 0;
403 }
404 break;
405 }
406 case RC_OPCODE_ENDLOOP:
407 loop->EndLoop = ptr;
408 break;
409 }
410 }
411
412 if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf
413 && loop->Cond && loop->EndLoop) {
414 return 1;
415 }
416 return 0;
417 }
418
419 /**
420 * This function prepares a loop to be unrolled by converting it into an if
421 * statement. Here is an outline of the conversion process:
422 * BGNLOOP; -> BGNLOOP;
423 * <Additional conditional code> -> <Additional conditional code>
424 * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
425 * IF temp[0]; -> IF temp[0];
426 * BRK; ->
427 * ENDIF; -> <Loop Body>
428 * <Loop Body> -> ENDIF;
429 * ENDLOOP; -> ENDLOOP
430 *
431 * @param inst A pointer to a BGNLOOP instruction.
432 * @return 1 for success, 0 for failure
433 */
434 static int transform_loop(struct emulate_loop_state * s,
435 struct rc_instruction * inst)
436 {
437 struct loop_info * loop;
438
439 memory_pool_array_reserve(&s->C->Pool, struct loop_info,
440 s->Loops, s->LoopCount, s->LoopReserved, 1);
441
442 loop = &s->Loops[s->LoopCount++];
443
444 if (!build_loop_info(s->C, loop, inst)) {
445 rc_error(s->C, "Failed to build loop info\n");
446 return 0;
447 }
448
449 if(try_unroll_loop(s->C, loop)){
450 return 1;
451 }
452
453 /* Reverse the conditional instruction */
454 switch(loop->Cond->U.I.Opcode){
455 case RC_OPCODE_SGE:
456 loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
457 break;
458 case RC_OPCODE_SLT:
459 loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
460 break;
461 case RC_OPCODE_SLE:
462 loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
463 break;
464 case RC_OPCODE_SGT:
465 loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
466 break;
467 case RC_OPCODE_SEQ:
468 loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
469 break;
470 case RC_OPCODE_SNE:
471 loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
472 break;
473 default:
474 rc_error(s->C, "loop->Cond is not a conditional.\n");
475 return 0;
476 }
477
478 /* Prepare the loop to be emulated */
479 rc_remove_instruction(loop->Brk);
480 rc_remove_instruction(loop->EndIf);
481 rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
482 return 1;
483 }
484
485 void rc_transform_loops(struct radeon_compiler *c, void *user)
486 {
487 struct emulate_loop_state * s = &c->loop_state;
488 struct rc_instruction * ptr;
489
490 memset(s, 0, sizeof(struct emulate_loop_state));
491 s->C = c;
492 for(ptr = s->C->Program.Instructions.Next;
493 ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
494 if(ptr->Type == RC_INSTRUCTION_NORMAL &&
495 ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
496 if (!transform_loop(s, ptr))
497 return;
498 }
499 }
500 }
501
502 void rc_unroll_loops(struct radeon_compiler *c, void *user)
503 {
504 struct rc_instruction * inst;
505 struct loop_info loop;
506
507 for(inst = c->Program.Instructions.Next;
508 inst != &c->Program.Instructions; inst = inst->Next) {
509
510 if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
511 if (build_loop_info(c, &loop, inst)) {
512 try_unroll_loop(c, &loop);
513 }
514 }
515 }
516 }
517
518 void rc_emulate_loops(struct radeon_compiler *c, void *user)
519 {
520 struct emulate_loop_state * s = &c->loop_state;
521 int i;
522 /* Iterate backwards of the list of loops so that loops that nested
523 * loops are unrolled first.
524 */
525 for( i = s->LoopCount - 1; i >= 0; i-- ){
526 unsigned int iterations;
527
528 if(!s->Loops[i].EndLoop){
529 continue;
530 }
531 iterations = loop_max_possible_iterations(s->C, &s->Loops[i]);
532 unroll_loop(s->C, &s->Loops[i], iterations);
533 }
534 }