r300/compiler: Handle SGT and SLE at the beginning of loops.
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_emulate_loops.c
1 /*
2 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 */
31
32 #include "radeon_emulate_loops.h"
33
34 #include "radeon_compiler.h"
35 #include "radeon_dataflow.h"
36
37 #define VERBOSE 0
38
39 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
40
41 struct emulate_loop_state {
42 struct radeon_compiler * C;
43 struct loop_info * Loops;
44 unsigned int LoopCount;
45 unsigned int LoopReserved;
46 };
47
48 struct loop_info {
49 struct rc_instruction * BeginLoop;
50 struct rc_instruction * EndLoop;
51 };
52
53 struct const_value {
54
55 struct radeon_compiler * C;
56 struct rc_src_register * Src;
57 float Value;
58 int HasValue;
59 };
60
61 struct count_inst {
62 struct radeon_compiler * C;
63 int Index;
64 rc_swizzle Swz;
65 float Amount;
66 int Unknown;
67 };
68
69 static float get_constant_value(struct radeon_compiler * c,
70 struct rc_src_register * src,
71 int chan)
72 {
73 float base = 1.0f;
74 int swz = GET_SWZ(src->Swizzle, chan);
75 if(swz >= 4 || src->Index >= c->Program.Constants.Count ){
76 rc_error(c, "get_constant_value: Can't find a value.\n");
77 return 0.0f;
78 }
79 if(GET_BIT(src->Negate, chan)){
80 base = -1.0f;
81 }
82 return base *
83 c->Program.Constants.Constants[src->Index].u.Immediate[swz];
84 }
85
86 static int src_reg_is_immediate(struct rc_src_register * src,
87 struct radeon_compiler * c)
88 {
89 return src->File == RC_FILE_CONSTANT &&
90 c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
91 }
92
93 static unsigned int loop_count_instructions(struct loop_info * loop)
94 {
95 unsigned int count = 0;
96 struct rc_instruction * inst = loop->BeginLoop->Next;
97 while(inst != loop->EndLoop){
98 count++;
99 inst = inst->Next;
100 }
101 return count;
102 }
103
104 static unsigned int loop_calc_iterations(struct loop_info * loop,
105 unsigned int loop_count, unsigned int max_instructions)
106 {
107 unsigned int icount = loop_count_instructions(loop);
108 return max_instructions / (loop_count * icount);
109 }
110
111 static void loop_unroll(struct emulate_loop_state * s,
112 struct loop_info *loop, unsigned int iterations)
113 {
114 unsigned int i;
115 struct rc_instruction * ptr;
116 struct rc_instruction * first = loop->BeginLoop->Next;
117 struct rc_instruction * last = loop->EndLoop->Prev;
118 struct rc_instruction * append_to = last;
119 rc_remove_instruction(loop->BeginLoop);
120 rc_remove_instruction(loop->EndLoop);
121 for( i = 1; i < iterations; i++){
122 for(ptr = first; ptr != last->Next; ptr = ptr->Next){
123 struct rc_instruction *new = rc_alloc_instruction(s->C);
124 memcpy(new, ptr, sizeof(struct rc_instruction));
125 rc_insert_instruction(append_to, new);
126 append_to = new;
127 }
128 }
129 }
130
131
132 static void update_const_value(void * data, struct rc_instruction * inst,
133 rc_register_file file, unsigned int index, unsigned int mask)
134 {
135 struct const_value * value = data;
136 if(value->Src->File != file ||
137 value->Src->Index != index ||
138 !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
139 return;
140 }
141 switch(inst->U.I.Opcode){
142 case RC_OPCODE_MOV:
143 if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){
144 return;
145 }
146 value->HasValue = 1;
147 value->Value =
148 get_constant_value(value->C, &inst->U.I.SrcReg[0], 0);
149 break;
150 }
151 }
152
153 static void get_incr_amount(void * data, struct rc_instruction * inst,
154 rc_register_file file, unsigned int index, unsigned int mask)
155 {
156 struct count_inst * count_inst = data;
157 int amnt_src_index;
158 struct rc_opcode_info * opcode;
159 float amount;
160
161 if(file != RC_FILE_TEMPORARY ||
162 count_inst->Index != index ||
163 (1 << GET_SWZ(count_inst->Swz,0) != mask)){
164 return;
165 }
166 /* Find the index of the counter register. */
167 opcode = rc_get_opcode_info(inst->U.I.Opcode);
168 if(opcode->NumSrcRegs != 2){
169 count_inst->Unknown = 1;
170 return;
171 }
172 if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
173 inst->U.I.SrcReg[0].Index == count_inst->Index &&
174 inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
175 amnt_src_index = 1;
176 } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
177 inst->U.I.SrcReg[1].Index == count_inst->Index &&
178 inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
179 amnt_src_index = 0;
180 }
181 else{
182 count_inst->Unknown = 1;
183 return;
184 }
185 if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index],
186 count_inst->C)){
187 amount = get_constant_value(count_inst->C,
188 &inst->U.I.SrcReg[amnt_src_index], 0);
189 }
190 else{
191 count_inst->Unknown = 1 ;
192 return;
193 }
194 switch(inst->U.I.Opcode){
195 case RC_OPCODE_ADD:
196 count_inst->Amount += amount;
197 break;
198 case RC_OPCODE_SUB:
199 if(amnt_src_index == 0){
200 count_inst->Unknown = 0;
201 return;
202 }
203 count_inst->Amount -= amount;
204 break;
205 default:
206 count_inst->Unknown = 1;
207 return;
208 }
209
210 }
211
212 static int transform_const_loop(struct emulate_loop_state * s,
213 struct loop_info * loop,
214 struct rc_instruction * cond)
215 {
216 int end_loops = 1;
217 int iterations;
218 struct count_inst count_inst;
219 float limit_value;
220 struct rc_src_register * counter;
221 struct rc_src_register * limit;
222 struct const_value counter_value;
223 struct rc_instruction * inst;
224
225 /* Find the counter and the upper limit */
226
227 if(src_reg_is_immediate(&cond->U.I.SrcReg[0], s->C)){
228 limit = &cond->U.I.SrcReg[0];
229 counter = &cond->U.I.SrcReg[1];
230 }
231 else if(src_reg_is_immediate(&cond->U.I.SrcReg[1], s->C)){
232 limit = &cond->U.I.SrcReg[1];
233 counter = &cond->U.I.SrcReg[0];
234 }
235 else{
236 DBG("No constant limit.\n");
237 return 0;
238 }
239
240 /* Find the initial value of the counter */
241 counter_value.Src = counter;
242 counter_value.Value = 0.0f;
243 counter_value.HasValue = 0;
244 counter_value.C = s->C;
245 for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
246 inst = inst->Next){
247 rc_for_all_writes_mask(inst, update_const_value, &counter_value);
248 }
249 if(!counter_value.HasValue){
250 DBG("Initial counter value cannot be determined.\n");
251 return 0;
252 }
253 DBG("Initial counter value is %f\n", counter_value.Value);
254 /* Determine how the counter is modified each loop */
255 count_inst.C = s->C;
256 count_inst.Index = counter->Index;
257 count_inst.Swz = counter->Swizzle;
258 count_inst.Amount = 0.0f;
259 count_inst.Unknown = 0;
260 for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
261 switch(inst->U.I.Opcode){
262 /* XXX In the future we might want to try to unroll nested
263 * loops here.*/
264 case RC_OPCODE_BGNLOOP:
265 end_loops++;
266 break;
267 case RC_OPCODE_ENDLOOP:
268 loop->EndLoop = inst;
269 end_loops--;
270 break;
271 /* XXX Check if the counter is modified within an if statement.
272 */
273 case RC_OPCODE_IF:
274 break;
275 default:
276 rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
277 if(count_inst.Unknown){
278 return 0;
279 }
280 break;
281 }
282 }
283 /* Infinite loop */
284 if(count_inst.Amount == 0.0f){
285 return 0;
286 }
287 DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
288 /* Calculate the number of iterations of this loop. Keeping this
289 * simple, since we only support increment and decrement loops.
290 */
291 limit_value = get_constant_value(s->C, limit, 0);
292 iterations = (int) ((limit_value - counter_value.Value) /
293 count_inst.Amount);
294
295 DBG("Loop will have %d iterations.\n", iterations);
296 /* Prepare loop for unrolling */
297 /* Remove the first 4 instructions inside the loop, which are part
298 * of the conditional and no longer needed.
299 */
300 /* SLT/SGE/SGT/SLE */
301 if(loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_SLT &&
302 loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_SGE &&
303 loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_SGT &&
304 loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_SLE){
305 rc_error(s->C,"Unexpected instruction, expected LT,GT,LE,GE\n");
306 return 0;
307 }
308 /* IF */
309 rc_remove_instruction(loop->BeginLoop->Next);
310 if(loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_IF){
311 rc_error(s->C,"Unexpected instruction, expected IF\n");
312 return 0;
313 }
314 rc_remove_instruction(loop->BeginLoop->Next);
315 /* BRK */
316 if(loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_BRK){
317 rc_error(s->C,"Unexpected instruction, expected BRK\n");
318 return 0;
319 }
320 rc_remove_instruction(loop->BeginLoop->Next);
321 /* ENDIF */
322 if(loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_ENDIF){
323 rc_error(s->C,"Unexpected instruction, expected ENDIF\n");
324 return 0;
325 }
326 rc_remove_instruction(loop->BeginLoop->Next);
327
328 loop_unroll(s, loop, iterations);
329 loop->EndLoop = NULL;
330 return 1;
331 }
332
333 /**
334 * This function prepares a loop to be unrolled by converting it into an if
335 * statement. Here is an outline of the conversion process:
336 * BGNLOOP; -> BGNLOOP;
337 * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
338 * IF temp[0]; -> IF temp[0];
339 * BRK; ->
340 * ENDIF; -> <Loop Body>
341 * <Loop Body> -> ENDIF;
342 * ENDLOOP; -> ENDLOOP
343 *
344 * @param inst A pointer to a BGNLOOP instruction.
345 * @return A pointer to the ENDLOOP instruction.
346 */
347 static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
348 struct rc_instruction * inst)
349 {
350 struct loop_info *loop;
351 struct rc_instruction * ptr;
352
353 memory_pool_array_reserve(&s->C->Pool, struct loop_info,
354 s->Loops, s->LoopCount, s->LoopReserved, 1);
355
356 loop = &s->Loops[s->LoopCount++];
357 memset(loop, 0, sizeof(struct loop_info));
358 loop->BeginLoop = inst;
359
360 /* Reverse the conditional instruction */
361 ptr = inst->Next;
362 switch(ptr->U.I.Opcode){
363 case RC_OPCODE_SGE:
364 ptr->U.I.Opcode = RC_OPCODE_SLT;
365 break;
366 case RC_OPCODE_SLT:
367 ptr->U.I.Opcode = RC_OPCODE_SGE;
368 break;
369 case RC_OPCODE_SLE:
370 ptr->U.I.Opcode = RC_OPCODE_SGT;
371 break;
372 case RC_OPCODE_SGT:
373 ptr->U.I.Opcode = RC_OPCODE_SLE;
374 break;
375 default:
376 rc_error(s->C,
377 "Loop does not start with a conditional instruction.");
378 break;
379 }
380
381 /* Check if the number of loops is known at compile time. */
382 if(transform_const_loop(s, loop, ptr)){
383 return loop->BeginLoop->Next;
384 }
385
386 while(!loop->EndLoop){
387 struct rc_instruction * endif;
388 if(ptr->Type == RC_INSTRUCTION_NORMAL){
389 }
390 switch(ptr->U.I.Opcode){
391 case RC_OPCODE_BGNLOOP:
392 /* Nested loop */
393 ptr = transform_loop(s, ptr);
394 break;
395 case RC_OPCODE_BRK:
396 /* The BRK instruction should always be followed by
397 * an ENDIF. This ENDIF will eventually replace the
398 * ENDLOOP insruction. */
399 if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){
400 rc_error(s->C,
401 "transform_loop: expected ENDIF\n");
402 }
403 endif = ptr->Next;
404 rc_remove_instruction(ptr);
405 rc_remove_instruction(endif);
406 break;
407 case RC_OPCODE_ENDLOOP:
408 /* Insert the ENDIF before ENDLOOP. */
409 rc_insert_instruction(ptr->Prev, endif);
410 loop->EndLoop = ptr;
411 break;
412 }
413 ptr = ptr->Next;
414 }
415 return ptr;
416 }
417
418 static void rc_transform_loops(struct emulate_loop_state * s)
419 {
420 struct rc_instruction * ptr = s->C->Program.Instructions.Next;
421 while(ptr != &s->C->Program.Instructions) {
422 if(ptr->Type == RC_INSTRUCTION_NORMAL &&
423 ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
424 ptr = transform_loop(s, ptr);
425 }
426 ptr = ptr->Next;
427 }
428 }
429
430 static void rc_unroll_loops(struct emulate_loop_state *s,
431 unsigned int max_instructions)
432 {
433 int i;
434 /* Iterate backwards of the list of loops so that loops that nested
435 * loops are unrolled first.
436 */
437 for( i = s->LoopCount - 1; i >= 0; i-- ){
438 if(!s->Loops[i].EndLoop){
439 continue;
440 }
441 unsigned int iterations = loop_calc_iterations(&s->Loops[i],
442 s->LoopCount, max_instructions);
443 loop_unroll(s, &s->Loops[i], iterations);
444 }
445 }
446
447 void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions)
448 {
449 struct emulate_loop_state s;
450
451 memset(&s, 0, sizeof(struct emulate_loop_state));
452 s.C = c;
453
454 /* We may need to move these two operations to r3xx_(vert|frag)prog.c
455 * and run the optimization passes between them in order to increase
456 * the number of unrolls we can do for each loop.
457 */
458 rc_transform_loops(&s);
459
460 rc_unroll_loops(&s, max_instructions);
461 }