#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
-struct emulate_loop_state {
- struct radeon_compiler * C;
- struct loop_info * Loops;
- unsigned int LoopCount;
- unsigned int LoopReserved;
-};
-
-struct loop_info {
- struct rc_instruction * BeginLoop;
- struct rc_instruction * EndLoop;
-};
-
struct const_value {
-
struct radeon_compiler * C;
struct rc_src_register * Src;
float Value;
c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
}
-static unsigned int loop_count_instructions(struct loop_info * loop)
-{
- unsigned int count = 0;
- struct rc_instruction * inst = loop->BeginLoop->Next;
- while(inst != loop->EndLoop){
- count++;
- inst = inst->Next;
- }
- return count;
-}
-
-static unsigned int loop_calc_iterations(struct loop_info * loop,
- unsigned int loop_count, unsigned int max_instructions)
+static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
+ struct loop_info * loop)
{
- unsigned int icount = loop_count_instructions(loop);
- return max_instructions / (loop_count * icount);
+ unsigned int total_i = rc_recompute_ips(c);
+ unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
+ /* +1 because the program already has one iteration of the loop. */
+ return 1 + ((c->max_alu_insts - total_i) / loop_i);
}
-static void loop_unroll(struct emulate_loop_state * s,
- struct loop_info *loop, unsigned int iterations)
+static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+ unsigned int iterations)
{
unsigned int i;
struct rc_instruction * ptr;
rc_remove_instruction(loop->EndLoop);
for( i = 1; i < iterations; i++){
for(ptr = first; ptr != last->Next; ptr = ptr->Next){
- struct rc_instruction *new = rc_alloc_instruction(s->C);
+ struct rc_instruction *new = rc_alloc_instruction(c);
memcpy(new, ptr, sizeof(struct rc_instruction));
rc_insert_instruction(append_to, new);
append_to = new;
if(value->Src->File != file ||
value->Src->Index != index ||
!(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
- return;
+ return;
}
switch(inst->U.I.Opcode){
case RC_OPCODE_MOV:
{
struct count_inst * count_inst = data;
int amnt_src_index;
- struct rc_opcode_info * opcode;
+ const struct rc_opcode_info * opcode;
float amount;
if(file != RC_FILE_TEMPORARY ||
count_inst->Index != index ||
(1 << GET_SWZ(count_inst->Swz,0) != mask)){
- return;
+ return;
}
/* Find the index of the counter register. */
opcode = rc_get_opcode_info(inst->U.I.Opcode);
count_inst->Unknown = 1;
return;
}
-
}
-static int transform_const_loop(struct emulate_loop_state * s,
- struct loop_info * loop,
- struct rc_instruction * cond)
+/**
+ * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless
+ * of how many iterations they have.
+ */
+static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop)
{
- int end_loops = 1;
+ int end_loops;
int iterations;
struct count_inst count_inst;
float limit_value;
struct rc_instruction * inst;
/* Find the counter and the upper limit */
-
- if(src_reg_is_immediate(&cond->U.I.SrcReg[0], s->C)){
- limit = &cond->U.I.SrcReg[0];
- counter = &cond->U.I.SrcReg[1];
+
+ if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){
+ limit = &loop->Cond->U.I.SrcReg[0];
+ counter = &loop->Cond->U.I.SrcReg[1];
}
- else if(src_reg_is_immediate(&cond->U.I.SrcReg[1], s->C)){
- limit = &cond->U.I.SrcReg[1];
- counter = &cond->U.I.SrcReg[0];
+ else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){
+ limit = &loop->Cond->U.I.SrcReg[1];
+ counter = &loop->Cond->U.I.SrcReg[0];
}
else{
DBG("No constant limit.\n");
return 0;
}
-
+
/* Find the initial value of the counter */
counter_value.Src = counter;
counter_value.Value = 0.0f;
counter_value.HasValue = 0;
- counter_value.C = s->C;
- for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
+ counter_value.C = c;
+ for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;
inst = inst->Next){
rc_for_all_writes_mask(inst, update_const_value, &counter_value);
}
}
DBG("Initial counter value is %f\n", counter_value.Value);
/* Determine how the counter is modified each loop */
- count_inst.C = s->C;
+ count_inst.C = c;
count_inst.Index = counter->Index;
count_inst.Swz = counter->Swizzle;
count_inst.Amount = 0.0f;
count_inst.Unknown = 0;
+ end_loops = 1;
for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
switch(inst->U.I.Opcode){
/* XXX In the future we might want to try to unroll nested
loop->EndLoop = inst;
end_loops--;
break;
+ case RC_OPCODE_BRK:
+ /* Don't unroll loops if it has a BRK instruction
+ * other one used when testing the main conditional
+ * of the loop. */
+
+ /* Make sure we haven't entered a nested loops. */
+ if(inst != loop->Brk && end_loops == 1) {
+ return 0;
+ }
+ break;
/* XXX Check if the counter is modified within an if statement.
*/
case RC_OPCODE_IF:
/* Calculate the number of iterations of this loop. Keeping this
* simple, since we only support increment and decrement loops.
*/
- limit_value = get_constant_value(s->C, limit, 0);
- iterations = (int) ((limit_value - counter_value.Value) /
+ limit_value = get_constant_value(c, limit, 0);
+ DBG("Limit is %f.\n", limit_value);
+ /* The iteration calculations are opposite of what you would expect.
+ * In a normal loop, if the condition is met, then loop continues, but
+ * with our loops, if the condition is met, the is exited. */
+ switch(loop->Cond->U.I.Opcode){
+ case RC_OPCODE_SGE:
+ case RC_OPCODE_SLE:
+ iterations = (int) ceilf((limit_value - counter_value.Value) /
count_inst.Amount);
+ break;
- DBG("Loop will have %d iterations.\n", iterations);
- /* Prepare loop for unrolling */
- /* Remove the first 4 instructions inside the loop, which are part
- * of the conditional and no longer needed.
- */
- /* SLT/SGE/SGT/SLE */
- if(loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_SLT &&
- loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_SGE &&
- loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_SGT &&
- loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_SLE){
- rc_error(s->C,"Unexpected instruction, expected LT,GT,LE,GE\n");
+ case RC_OPCODE_SGT:
+ case RC_OPCODE_SLT:
+ iterations = (int) floorf((limit_value - counter_value.Value) /
+ count_inst.Amount) + 1;
+ break;
+ default:
return 0;
}
- /* IF */
- rc_remove_instruction(loop->BeginLoop->Next);
- if(loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_IF){
- rc_error(s->C,"Unexpected instruction, expected IF\n");
+
+ if (c->max_alu_insts > 0
+ && iterations > loop_max_possible_iterations(c, loop)) {
return 0;
}
- rc_remove_instruction(loop->BeginLoop->Next);
- /* BRK */
- if(loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_BRK){
- rc_error(s->C,"Unexpected instruction, expected BRK\n");
+
+ DBG("Loop will have %d iterations.\n", iterations);
+
+ /* Prepare loop for unrolling */
+ rc_remove_instruction(loop->Cond);
+ rc_remove_instruction(loop->If);
+ rc_remove_instruction(loop->Brk);
+ rc_remove_instruction(loop->EndIf);
+
+ unroll_loop(c, loop, iterations);
+ loop->EndLoop = NULL;
+ return 1;
+}
+
+/**
+ * @param c
+ * @param loop
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return 1 if all of the members of loop where set.
+ * @return 0 if there was an error and some members of loop are still NULL.
+ */
+static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
+ struct rc_instruction * inst)
+{
+ struct rc_instruction * ptr;
+
+ if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
+ rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);
return 0;
}
- rc_remove_instruction(loop->BeginLoop->Next);
- /* ENDIF */
- if(loop->BeginLoop->Next->U.I.Opcode != RC_OPCODE_ENDIF){
- rc_error(s->C,"Unexpected instruction, expected ENDIF\n");
- return 0;
+
+ memset(loop, 0, sizeof(struct loop_info));
+
+ loop->BeginLoop = inst;
+
+ for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {
+
+ if (ptr == &c->Program.Instructions) {
+ rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n",
+ __FUNCTION__);
+ return 0;
+ }
+
+ switch(ptr->U.I.Opcode){
+ case RC_OPCODE_BGNLOOP:
+ {
+ /* Nested loop, skip ahead to the end. */
+ unsigned int loop_depth = 1;
+ for(ptr = ptr->Next; ptr != &c->Program.Instructions;
+ ptr = ptr->Next){
+ if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ loop_depth++;
+ } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+ if (!--loop_depth) {
+ break;
+ }
+ }
+ }
+ if (ptr == &c->Program.Instructions) {
+ rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n",
+ __FUNCTION__);
+ return 0;
+ }
+ break;
+ }
+ case RC_OPCODE_BRK:
+ if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF
+ || ptr->Prev->U.I.Opcode != RC_OPCODE_IF
+ || loop->Brk){
+ continue;
+ }
+ loop->Brk = ptr;
+ loop->If = ptr->Prev;
+ loop->EndIf = ptr->Next;
+ switch(loop->If->Prev->U.I.Opcode){
+ case RC_OPCODE_SLT:
+ case RC_OPCODE_SGE:
+ case RC_OPCODE_SGT:
+ case RC_OPCODE_SLE:
+ case RC_OPCODE_SEQ:
+ case RC_OPCODE_SNE:
+ break;
+ default:
+ return 0;
+ }
+ loop->Cond = loop->If->Prev;
+ break;
+
+ case RC_OPCODE_ENDLOOP:
+ loop->EndLoop = ptr;
+ break;
+ }
}
- rc_remove_instruction(loop->BeginLoop->Next);
-
- loop_unroll(s, loop, iterations);
- loop->EndLoop = NULL;
- return 1;
+
+ if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf
+ && loop->Cond && loop->EndLoop) {
+ return 1;
+ }
+ return 0;
}
-/**
+/**
* This function prepares a loop to be unrolled by converting it into an if
* statement. Here is an outline of the conversion process:
* BGNLOOP; -> BGNLOOP;
+ * <Additional conditional code> -> <Additional conditional code>
* SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
* IF temp[0]; -> IF temp[0];
* BRK; ->
* ENDLOOP; -> ENDLOOP
*
* @param inst A pointer to a BGNLOOP instruction.
- * @return A pointer to the ENDLOOP instruction.
+ * @return 1 for success, 0 for failure
*/
-static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+static int transform_loop(struct emulate_loop_state * s,
struct rc_instruction * inst)
{
- struct loop_info *loop;
- struct rc_instruction * ptr;
+ struct loop_info * loop;
memory_pool_array_reserve(&s->C->Pool, struct loop_info,
s->Loops, s->LoopCount, s->LoopReserved, 1);
loop = &s->Loops[s->LoopCount++];
- memset(loop, 0, sizeof(struct loop_info));
- loop->BeginLoop = inst;
-
+
+ if (!build_loop_info(s->C, loop, inst)) {
+ rc_error(s->C, "Failed to build loop info\n");
+ return 0;
+ }
+
+ if(try_unroll_loop(s->C, loop)){
+ return 1;
+ }
+
/* Reverse the conditional instruction */
- ptr = inst->Next;
- switch(ptr->U.I.Opcode){
+ switch(loop->Cond->U.I.Opcode){
case RC_OPCODE_SGE:
- ptr->U.I.Opcode = RC_OPCODE_SLT;
+ loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
break;
case RC_OPCODE_SLT:
- ptr->U.I.Opcode = RC_OPCODE_SGE;
+ loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
break;
case RC_OPCODE_SLE:
- ptr->U.I.Opcode = RC_OPCODE_SGT;
+ loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
break;
case RC_OPCODE_SGT:
- ptr->U.I.Opcode = RC_OPCODE_SLE;
+ loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
break;
- default:
- rc_error(s->C,
- "Loop does not start with a conditional instruction.");
+ case RC_OPCODE_SEQ:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
break;
- }
-
- /* Check if the number of loops is known at compile time. */
- if(transform_const_loop(s, loop, ptr)){
- return loop->BeginLoop->Next;
+ case RC_OPCODE_SNE:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
+ break;
+ default:
+ rc_error(s->C, "loop->Cond is not a conditional.\n");
+ return 0;
}
- while(!loop->EndLoop){
- struct rc_instruction * endif;
- if(ptr->Type == RC_INSTRUCTION_NORMAL){
- }
- switch(ptr->U.I.Opcode){
- case RC_OPCODE_BGNLOOP:
- /* Nested loop */
- ptr = transform_loop(s, ptr);
- break;
- case RC_OPCODE_BRK:
- /* The BRK instruction should always be followed by
- * an ENDIF. This ENDIF will eventually replace the
- * ENDLOOP insruction. */
- if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){
- rc_error(s->C,
- "transform_loop: expected ENDIF\n");
- }
- endif = ptr->Next;
- rc_remove_instruction(ptr);
- rc_remove_instruction(endif);
- break;
- case RC_OPCODE_ENDLOOP:
- /* Insert the ENDIF before ENDLOOP. */
- rc_insert_instruction(ptr->Prev, endif);
- loop->EndLoop = ptr;
- break;
- }
- ptr = ptr->Next;
- }
- return ptr;
+ /* Prepare the loop to be emulated */
+ rc_remove_instruction(loop->Brk);
+ rc_remove_instruction(loop->EndIf);
+ rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
+ return 1;
}
-static void rc_transform_loops(struct emulate_loop_state * s)
+void rc_transform_loops(struct radeon_compiler *c, void *user)
{
- struct rc_instruction * ptr = s->C->Program.Instructions.Next;
- while(ptr != &s->C->Program.Instructions) {
+ struct emulate_loop_state * s = &c->loop_state;
+ struct rc_instruction * ptr;
+
+ memset(s, 0, sizeof(struct emulate_loop_state));
+ s->C = c;
+ for(ptr = s->C->Program.Instructions.Next;
+ ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
if(ptr->Type == RC_INSTRUCTION_NORMAL &&
ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
- ptr = transform_loop(s, ptr);
+ if (!transform_loop(s, ptr))
+ return;
+ }
+ }
+}
+
+void rc_unroll_loops(struct radeon_compiler *c, void *user)
+{
+ struct rc_instruction * inst;
+ struct loop_info loop;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ if (build_loop_info(c, &loop, inst)) {
+ try_unroll_loop(c, &loop);
+ }
}
- ptr = ptr->Next;
}
}
-static void rc_unroll_loops(struct emulate_loop_state *s,
- unsigned int max_instructions)
+void rc_emulate_loops(struct radeon_compiler *c, void *user)
{
+ struct emulate_loop_state * s = &c->loop_state;
int i;
/* Iterate backwards of the list of loops so that loops that nested
* loops are unrolled first.
*/
for( i = s->LoopCount - 1; i >= 0; i-- ){
+ unsigned int iterations;
+
if(!s->Loops[i].EndLoop){
continue;
}
- unsigned int iterations = loop_calc_iterations(&s->Loops[i],
- s->LoopCount, max_instructions);
- loop_unroll(s, &s->Loops[i], iterations);
+ iterations = loop_max_possible_iterations(s->C, &s->Loops[i]);
+ unroll_loop(s->C, &s->Loops[i], iterations);
}
}
-
-void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions)
-{
- struct emulate_loop_state s;
-
- memset(&s, 0, sizeof(struct emulate_loop_state));
- s.C = c;
-
- /* We may need to move these two operations to r3xx_(vert|frag)prog.c
- * and run the optimization passes between them in order to increase
- * the number of unrolls we can do for each loop.
- */
- rc_transform_loops(&s);
-
- rc_unroll_loops(&s, max_instructions);
-}