r300/compiler: Fix loop unrolling
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_emulate_loops.c
1 /*
2 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 */
31
32 #include "radeon_emulate_loops.h"
33
34 #include "radeon_compiler.h"
35 #include "radeon_dataflow.h"
36
37 #define VERBOSE 0
38
39 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
40
41 struct const_value {
42
43 struct radeon_compiler * C;
44 struct rc_src_register * Src;
45 float Value;
46 int HasValue;
47 };
48
49 struct count_inst {
50 struct radeon_compiler * C;
51 int Index;
52 rc_swizzle Swz;
53 float Amount;
54 int Unknown;
55 };
56
57 static float get_constant_value(struct radeon_compiler * c,
58 struct rc_src_register * src,
59 int chan)
60 {
61 float base = 1.0f;
62 int swz = GET_SWZ(src->Swizzle, chan);
63 if(swz >= 4 || src->Index >= c->Program.Constants.Count ){
64 rc_error(c, "get_constant_value: Can't find a value.\n");
65 return 0.0f;
66 }
67 if(GET_BIT(src->Negate, chan)){
68 base = -1.0f;
69 }
70 return base *
71 c->Program.Constants.Constants[src->Index].u.Immediate[swz];
72 }
73
74 static int src_reg_is_immediate(struct rc_src_register * src,
75 struct radeon_compiler * c)
76 {
77 return src->File == RC_FILE_CONSTANT &&
78 c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
79 }
80
81 static unsigned int loop_calc_iterations(struct emulate_loop_state *s,
82 struct loop_info * loop, unsigned int max_instructions)
83 {
84 unsigned int total_i = rc_recompute_ips(s->C);
85 unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
86 /* +1 because the program already has one iteration of the loop. */
87 return 1 + ((max_instructions - total_i) / (s->LoopCount * loop_i));
88 }
89
90 static void loop_unroll(struct emulate_loop_state * s,
91 struct loop_info *loop, unsigned int iterations)
92 {
93 unsigned int i;
94 struct rc_instruction * ptr;
95 struct rc_instruction * first = loop->BeginLoop->Next;
96 struct rc_instruction * last = loop->EndLoop->Prev;
97 struct rc_instruction * append_to = last;
98 rc_remove_instruction(loop->BeginLoop);
99 rc_remove_instruction(loop->EndLoop);
100 for( i = 1; i < iterations; i++){
101 for(ptr = first; ptr != last->Next; ptr = ptr->Next){
102 struct rc_instruction *new = rc_alloc_instruction(s->C);
103 memcpy(new, ptr, sizeof(struct rc_instruction));
104 rc_insert_instruction(append_to, new);
105 append_to = new;
106 }
107 }
108 }
109
110
111 static void update_const_value(void * data, struct rc_instruction * inst,
112 rc_register_file file, unsigned int index, unsigned int mask)
113 {
114 struct const_value * value = data;
115 if(value->Src->File != file ||
116 value->Src->Index != index ||
117 !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
118 return;
119 }
120 switch(inst->U.I.Opcode){
121 case RC_OPCODE_MOV:
122 if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){
123 return;
124 }
125 value->HasValue = 1;
126 value->Value =
127 get_constant_value(value->C, &inst->U.I.SrcReg[0], 0);
128 break;
129 }
130 }
131
132 static void get_incr_amount(void * data, struct rc_instruction * inst,
133 rc_register_file file, unsigned int index, unsigned int mask)
134 {
135 struct count_inst * count_inst = data;
136 int amnt_src_index;
137 const struct rc_opcode_info * opcode;
138 float amount;
139
140 if(file != RC_FILE_TEMPORARY ||
141 count_inst->Index != index ||
142 (1 << GET_SWZ(count_inst->Swz,0) != mask)){
143 return;
144 }
145 /* Find the index of the counter register. */
146 opcode = rc_get_opcode_info(inst->U.I.Opcode);
147 if(opcode->NumSrcRegs != 2){
148 count_inst->Unknown = 1;
149 return;
150 }
151 if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
152 inst->U.I.SrcReg[0].Index == count_inst->Index &&
153 inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
154 amnt_src_index = 1;
155 } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
156 inst->U.I.SrcReg[1].Index == count_inst->Index &&
157 inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
158 amnt_src_index = 0;
159 }
160 else{
161 count_inst->Unknown = 1;
162 return;
163 }
164 if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index],
165 count_inst->C)){
166 amount = get_constant_value(count_inst->C,
167 &inst->U.I.SrcReg[amnt_src_index], 0);
168 }
169 else{
170 count_inst->Unknown = 1 ;
171 return;
172 }
173 switch(inst->U.I.Opcode){
174 case RC_OPCODE_ADD:
175 count_inst->Amount += amount;
176 break;
177 case RC_OPCODE_SUB:
178 if(amnt_src_index == 0){
179 count_inst->Unknown = 0;
180 return;
181 }
182 count_inst->Amount -= amount;
183 break;
184 default:
185 count_inst->Unknown = 1;
186 return;
187 }
188
189 }
190
191 static int transform_const_loop(struct emulate_loop_state * s,
192 struct loop_info * loop)
193 {
194 int end_loops = 1;
195 int iterations;
196 struct count_inst count_inst;
197 float limit_value;
198 struct rc_src_register * counter;
199 struct rc_src_register * limit;
200 struct const_value counter_value;
201 struct rc_instruction * inst;
202
203 /* Find the counter and the upper limit */
204
205 if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){
206 limit = &loop->Cond->U.I.SrcReg[0];
207 counter = &loop->Cond->U.I.SrcReg[1];
208 }
209 else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){
210 limit = &loop->Cond->U.I.SrcReg[1];
211 counter = &loop->Cond->U.I.SrcReg[0];
212 }
213 else{
214 DBG("No constant limit.\n");
215 return 0;
216 }
217
218 /* Find the initial value of the counter */
219 counter_value.Src = counter;
220 counter_value.Value = 0.0f;
221 counter_value.HasValue = 0;
222 counter_value.C = s->C;
223 for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
224 inst = inst->Next){
225 rc_for_all_writes_mask(inst, update_const_value, &counter_value);
226 }
227 if(!counter_value.HasValue){
228 DBG("Initial counter value cannot be determined.\n");
229 return 0;
230 }
231 DBG("Initial counter value is %f\n", counter_value.Value);
232 /* Determine how the counter is modified each loop */
233 count_inst.C = s->C;
234 count_inst.Index = counter->Index;
235 count_inst.Swz = counter->Swizzle;
236 count_inst.Amount = 0.0f;
237 count_inst.Unknown = 0;
238 for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
239 switch(inst->U.I.Opcode){
240 /* XXX In the future we might want to try to unroll nested
241 * loops here.*/
242 case RC_OPCODE_BGNLOOP:
243 end_loops++;
244 break;
245 case RC_OPCODE_ENDLOOP:
246 loop->EndLoop = inst;
247 end_loops--;
248 break;
249 /* XXX Check if the counter is modified within an if statement.
250 */
251 case RC_OPCODE_IF:
252 break;
253 default:
254 rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
255 if(count_inst.Unknown){
256 return 0;
257 }
258 break;
259 }
260 }
261 /* Infinite loop */
262 if(count_inst.Amount == 0.0f){
263 return 0;
264 }
265 DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
266 /* Calculate the number of iterations of this loop. Keeping this
267 * simple, since we only support increment and decrement loops.
268 */
269 limit_value = get_constant_value(s->C, limit, 0);
270 DBG("Limit is %f.\n", limit_value);
271 switch(loop->Cond->U.I.Opcode){
272 case RC_OPCODE_SGT:
273 case RC_OPCODE_SLT:
274 iterations = (int) ceilf((limit_value - counter_value.Value) /
275 count_inst.Amount);
276 break;
277
278 case RC_OPCODE_SLE:
279 case RC_OPCODE_SGE:
280 iterations = (int) floorf((limit_value - counter_value.Value) /
281 count_inst.Amount) + 1;
282 break;
283 default:
284 return 0;
285 }
286
287 DBG("Loop will have %d iterations.\n", iterations);
288
289 /* Prepare loop for unrolling */
290 rc_remove_instruction(loop->Cond);
291 rc_remove_instruction(loop->If);
292 rc_remove_instruction(loop->Brk);
293 rc_remove_instruction(loop->EndIf);
294
295 loop_unroll(s, loop, iterations);
296 loop->EndLoop = NULL;
297 return 1;
298 }
299
300 /**
301 * This function prepares a loop to be unrolled by converting it into an if
302 * statement. Here is an outline of the conversion process:
303 * BGNLOOP; -> BGNLOOP;
304 * <Additional conditional code> -> <Additional conditional code>
305 * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
306 * IF temp[0]; -> IF temp[0];
307 * BRK; ->
308 * ENDIF; -> <Loop Body>
309 * <Loop Body> -> ENDIF;
310 * ENDLOOP; -> ENDLOOP
311 *
312 * @param inst A pointer to a BGNLOOP instruction.
313 * @return If the loop can be unrolled, a pointer to the first instruction of
314 * the unrolled loop.
315 * Otherwise, A pointer to the ENDLOOP instruction.
316 * Null if there is an error.
317 */
318 static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
319 struct rc_instruction * inst)
320 {
321 struct loop_info *loop;
322 struct rc_instruction * ptr;
323
324 memory_pool_array_reserve(&s->C->Pool, struct loop_info,
325 s->Loops, s->LoopCount, s->LoopReserved, 1);
326
327 loop = &s->Loops[s->LoopCount++];
328 memset(loop, 0, sizeof(struct loop_info));
329 if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
330 rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__);
331 return NULL;
332 }
333 loop->BeginLoop = inst;
334
335 for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){
336 switch(ptr->U.I.Opcode){
337 case RC_OPCODE_BGNLOOP:
338 /* Nested loop */
339 ptr = transform_loop(s, ptr);
340 if(!ptr){
341 return NULL;
342 }
343 break;
344 case RC_OPCODE_BRK:
345 loop->Brk = ptr;
346 if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){
347 rc_error(s->C,
348 "%s: expected ENDIF\n",__FUNCTION__);
349 return NULL;
350 }
351 loop->EndIf = ptr->Next;
352 if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){
353 rc_error(s->C,
354 "%s: expected IF\n", __FUNCTION__);
355 return NULL;
356 }
357 loop->If = ptr->Prev;
358 switch(loop->If->Prev->U.I.Opcode){
359 case RC_OPCODE_SLT:
360 case RC_OPCODE_SGE:
361 case RC_OPCODE_SGT:
362 case RC_OPCODE_SLE:
363 case RC_OPCODE_SEQ:
364 case RC_OPCODE_SNE:
365 break;
366 default:
367 rc_error(s->C, "%s expected conditional\n",
368 __FUNCTION__);
369 return NULL;
370 }
371 loop->Cond = loop->If->Prev;
372 ptr = loop->EndIf;
373 break;
374 case RC_OPCODE_ENDLOOP:
375 loop->EndLoop = ptr;
376 break;
377 }
378 }
379 /* Reverse the conditional instruction */
380 switch(loop->Cond->U.I.Opcode){
381 case RC_OPCODE_SGE:
382 loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
383 break;
384 case RC_OPCODE_SLT:
385 loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
386 break;
387 case RC_OPCODE_SLE:
388 loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
389 break;
390 case RC_OPCODE_SGT:
391 loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
392 break;
393 case RC_OPCODE_SEQ:
394 loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
395 break;
396 case RC_OPCODE_SNE:
397 loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
398 break;
399 default:
400 rc_error(s->C, "loop->Cond is not a conditional.\n");
401 return NULL;
402 }
403
404 /* Check if the number of loops is known at compile time. */
405 if(transform_const_loop(s, loop)){
406 return loop->BeginLoop->Next;
407 }
408
409 /* Prepare the loop to be unrolled */
410 rc_remove_instruction(loop->Brk);
411 rc_remove_instruction(loop->EndIf);
412 rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
413 return loop->EndLoop;
414 }
415
416 void rc_transform_unroll_loops(struct radeon_compiler *c,
417 struct emulate_loop_state * s)
418 {
419 struct rc_instruction * ptr;
420
421 memset(s, 0, sizeof(struct emulate_loop_state));
422 s->C = c;
423 ptr = s->C->Program.Instructions.Next;
424 while(ptr != &s->C->Program.Instructions) {
425 if(ptr->Type == RC_INSTRUCTION_NORMAL &&
426 ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
427 ptr = transform_loop(s, ptr);
428 if(!ptr){
429 return;
430 }
431 }
432 ptr = ptr->Next;
433 }
434 }
435
436 void rc_emulate_loops(struct emulate_loop_state *s,
437 unsigned int max_instructions)
438 {
439 int i;
440 /* Iterate backwards of the list of loops so that loops that nested
441 * loops are unrolled first.
442 */
443 for( i = s->LoopCount - 1; i >= 0; i-- ){
444 if(!s->Loops[i].EndLoop){
445 continue;
446 }
447 unsigned int iterations = loop_calc_iterations(s, &s->Loops[i],
448 max_instructions);
449 loop_unroll(s, &s->Loops[i], iterations);
450 }
451 }