};
struct loop_info {
- int LoopStart;
+ int BgnLoop;
+
+ int BranchDepth;
+ int * Brks;
+ int BrkCount;
+ int BrkReserved;
};
struct emit_state {
unsigned int newip = ++s->Code->inst_end;
+ /* Currently all loops use the same integer constant to intialize
+ * the loop variables. */
+ if(!s->Code->int_constants[0]) {
+ s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
+ s->Code->int_constant_count = 1;
+ }
s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
switch(inst->U.I.Opcode){
s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
loop = &s->Loops[s->CurrentLoopDepth++];
-
- /* We don't emit an instruction for BGNLOOP, so we need to
- * decrement the instruction counter, but first we need to
- * set LoopStart to the current value of inst_end, which
- * will end up being the first real instruction in the loop.*/
- loop->LoopStart = s->Code->inst_end--;
+ memset(loop, 0, sizeof(struct loop_info));
+ loop->BranchDepth = s->CurrentBranchDepth;
+ loop->BgnLoop = newip;
+
+ s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
+ | R500_FC_JUMP_FUNC(0x00)
+ | R500_FC_IGNORE_UNCOVERED
+ ;
break;
-
case RC_OPCODE_BRK:
- /* Don't emit an instruction for BRK */
- s->Code->inst_end--;
+ loop = &s->Loops[s->CurrentLoopDepth - 1];
+ memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
+ loop->BrkCount, loop->BrkReserved, 1);
+
+ loop->Brks[loop->BrkCount++] = newip;
+ s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_B_OP1_DECR
+ | R500_FC_B_POP_CNT(
+ s->CurrentBranchDepth - loop->BranchDepth)
+ | R500_FC_IGNORE_UNCOVERED
+ ;
break;
case RC_OPCODE_CONTINUE:
loop = &s->Loops[s->CurrentLoopDepth - 1];
- s->Code->inst[newip].inst2 = R500_FC_OP_JUMP |
- R500_FC_JUMP_FUNC(0xff);
- s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->LoopStart);
+ s->Code->inst[newip].inst2 = R500_FC_OP_JUMP
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_B_OP1_DECR
+ | R500_FC_B_POP_CNT(
+ s->CurrentBranchDepth - loop->BranchDepth)
+ ;
+ s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->BgnLoop);
break;
case RC_OPCODE_ENDLOOP:
- /* Don't emit an instruction for ENDLOOP */
- s->Code->inst_end--;
+ {
+ unsigned int i;
+ loop = &s->Loops[s->CurrentLoopDepth - 1];
+ /* Emit ENDLOOP */
+ s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_JUMP_ANY
+ | R500_FC_IGNORE_UNCOVERED
+ ;
+ /* The constant integer at index 0 is used by all loops. */
+ s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
+ | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
+ ;
+
+ /* Set jump address and int constant for BGNLOOP */
+ s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
+ | R500_FC_JUMP_ADDR(newip)
+ ;
+
+ /* Set jump address for the BRK instructions. */
+ while(loop->BrkCount--) {
+ s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
+ R500_FC_JUMP_ADDR(newip + 1);
+ }
s->CurrentLoopDepth--;
break;
-
+ }
case RC_OPCODE_IF:
if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) {
rc_error(s->C, "Branch depth exceeds hardware limit");
}
branch = &s->Branches[s->CurrentBranchDepth - 1];
-
- if(inst->Prev->U.I.Opcode == RC_OPCODE_BRK){
- branch->Endif = --s->Code->inst_end;
- s->Code->inst[branch->Endif].inst2 |=
- R500_FC_B_OP0_DECR;
- }
- else{
- branch->Endif = newip;
-
- s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
- | R500_FC_A_OP_NONE /* no address stack */
- | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
- | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
- | R500_FC_B_OP1_NONE /* no branch counter if stay */
- | R500_FC_B_POP_CNT(1)
+ branch->Endif = newip;
+
+ s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+ | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+ | R500_FC_B_OP1_NONE /* no branch counter if stay */
+ | R500_FC_B_POP_CNT(1)
;
- s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
- }
+ s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
| R500_FC_A_OP_NONE /* no address stack */
| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
- /* Use FULL flow control mode if branches are nested deep enough.
- * We don not need to enable FULL flow control mode for loops, becasue
- * we aren't using the hardware loop instructions.
- */
- if (s.MaxBranchDepth >= 4) {
+ /* Enable full flow control mode if we are using loops or have if
+ * statements nested at least four deep. */
+ if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
if (code->max_temp_idx < 1)
code->max_temp_idx = 1;
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
struct const_value {
-
struct radeon_compiler * C;
struct rc_src_register * Src;
float Value;
c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
}
-static unsigned int loop_calc_iterations(struct emulate_loop_state *s,
- struct loop_info * loop, unsigned int max_instructions)
+static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
+ struct loop_info * loop, unsigned int prog_inst_limit)
{
- unsigned int total_i = rc_recompute_ips(s->C);
+ unsigned int total_i = rc_recompute_ips(c);
unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
/* +1 because the program already has one iteration of the loop. */
- return 1 + ((max_instructions - total_i) / (s->LoopCount * loop_i));
+ return 1 + ((prog_inst_limit - total_i) / loop_i);
}
-static void loop_unroll(struct emulate_loop_state * s,
- struct loop_info *loop, unsigned int iterations)
+static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+ unsigned int iterations)
{
unsigned int i;
struct rc_instruction * ptr;
rc_remove_instruction(loop->EndLoop);
for( i = 1; i < iterations; i++){
for(ptr = first; ptr != last->Next; ptr = ptr->Next){
- struct rc_instruction *new = rc_alloc_instruction(s->C);
+ struct rc_instruction *new = rc_alloc_instruction(c);
memcpy(new, ptr, sizeof(struct rc_instruction));
rc_insert_instruction(append_to, new);
append_to = new;
if(value->Src->File != file ||
value->Src->Index != index ||
!(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
- return;
+ return;
}
switch(inst->U.I.Opcode){
case RC_OPCODE_MOV:
if(file != RC_FILE_TEMPORARY ||
count_inst->Index != index ||
(1 << GET_SWZ(count_inst->Swz,0) != mask)){
- return;
+ return;
}
/* Find the index of the counter register. */
opcode = rc_get_opcode_info(inst->U.I.Opcode);
count_inst->Unknown = 1;
return;
}
-
}
-static int transform_const_loop(struct emulate_loop_state * s,
- struct loop_info * loop)
+static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+ unsigned int prog_inst_limit)
{
int end_loops;
int iterations;
struct rc_instruction * inst;
/* Find the counter and the upper limit */
-
- if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){
+
+ if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){
limit = &loop->Cond->U.I.SrcReg[0];
counter = &loop->Cond->U.I.SrcReg[1];
}
- else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){
+ else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){
limit = &loop->Cond->U.I.SrcReg[1];
counter = &loop->Cond->U.I.SrcReg[0];
}
DBG("No constant limit.\n");
return 0;
}
-
+
/* Find the initial value of the counter */
counter_value.Src = counter;
counter_value.Value = 0.0f;
counter_value.HasValue = 0;
- counter_value.C = s->C;
- for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
+ counter_value.C = c;
+ for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;
inst = inst->Next){
rc_for_all_writes_mask(inst, update_const_value, &counter_value);
}
}
DBG("Initial counter value is %f\n", counter_value.Value);
/* Determine how the counter is modified each loop */
- count_inst.C = s->C;
+ count_inst.C = c;
count_inst.Index = counter->Index;
count_inst.Swz = counter->Swizzle;
count_inst.Amount = 0.0f;
/* Calculate the number of iterations of this loop. Keeping this
* simple, since we only support increment and decrement loops.
*/
- limit_value = get_constant_value(s->C, limit, 0);
+ limit_value = get_constant_value(c, limit, 0);
DBG("Limit is %f.\n", limit_value);
+ /* The iteration calculations are opposite of what you would expect.
+ * In a normal loop, if the condition is met, then loop continues, but
+ * with our loops, if the condition is met, the is exited. */
switch(loop->Cond->U.I.Opcode){
- case RC_OPCODE_SGT:
- case RC_OPCODE_SLT:
+ case RC_OPCODE_SGE:
+ case RC_OPCODE_SLE:
iterations = (int) ceilf((limit_value - counter_value.Value) /
count_inst.Amount);
break;
- case RC_OPCODE_SLE:
- case RC_OPCODE_SGE:
+ case RC_OPCODE_SGT:
+ case RC_OPCODE_SLT:
iterations = (int) floorf((limit_value - counter_value.Value) /
count_inst.Amount) + 1;
break;
return 0;
}
+ if (iterations > loop_max_possible_iterations(c, loop,
+ prog_inst_limit)) {
+ return 0;
+ }
+
DBG("Loop will have %d iterations.\n", iterations);
-
+
/* Prepare loop for unrolling */
rc_remove_instruction(loop->Cond);
rc_remove_instruction(loop->If);
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
-
- loop_unroll(s, loop, iterations);
+
+ unroll_loop(c, loop, iterations);
loop->EndLoop = NULL;
return 1;
}
-/**
- * This function prepares a loop to be unrolled by converting it into an if
- * statement. Here is an outline of the conversion process:
- * BGNLOOP; -> BGNLOOP;
- * <Additional conditional code> -> <Additional conditional code>
- * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
- * IF temp[0]; -> IF temp[0];
- * BRK; ->
- * ENDIF; -> <Loop Body>
- * <Loop Body> -> ENDIF;
- * ENDLOOP; -> ENDLOOP
- *
+/**
+ * @param c
+ * @param loop
* @param inst A pointer to a BGNLOOP instruction.
- * @return If the loop can be unrolled, a pointer to the first instruction of
- * the unrolled loop.
- * Otherwise, A pointer to the ENDLOOP instruction.
- * Null if there is an error.
+ * @return 1 if all of the members of loop where set.
+ * @return 0 if there was an error and some members of loop are still NULL.
*/
-static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
struct rc_instruction * inst)
{
- struct loop_info *loop;
struct rc_instruction * ptr;
- memory_pool_array_reserve(&s->C->Pool, struct loop_info,
- s->Loops, s->LoopCount, s->LoopReserved, 1);
-
- loop = &s->Loops[s->LoopCount++];
- memset(loop, 0, sizeof(struct loop_info));
if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
- rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__);
- return NULL;
+ rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);
+ return 0;
}
+
+ memset(loop, 0, sizeof(struct loop_info));
+
loop->BeginLoop = inst;
- for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){
+ for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {
+
+ if (ptr == &c->Program.Instructions) {
+ rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n",
+ __FUNCTION__);
+ return 0;
+ }
+
switch(ptr->U.I.Opcode){
case RC_OPCODE_BGNLOOP:
- /* Nested loop */
- ptr = transform_loop(s, ptr);
- if(!ptr){
- return NULL;
+ {
+ /* Nested loop, skip ahead to the end. */
+ unsigned int loop_depth = 1;
+ for(ptr = ptr->Next; ptr != &c->Program.Instructions;
+ ptr = ptr->Next){
+ if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ loop_depth++;
+ } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+ if (!--loop_depth) {
+ break;
+ }
+ }
+ }
+ if (ptr == &c->Program.Instructions) {
+ rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n",
+ __FUNCTION__);
+ return 0;
}
break;
+ }
case RC_OPCODE_BRK:
- loop->Brk = ptr;
- if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){
- rc_error(s->C,
- "%s: expected ENDIF\n",__FUNCTION__);
- return NULL;
- }
- loop->EndIf = ptr->Next;
- if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){
- rc_error(s->C,
- "%s: expected IF\n", __FUNCTION__);
- return NULL;
+ if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF
+ || ptr->Prev->U.I.Opcode != RC_OPCODE_IF
+ || loop->Brk){
+ continue;
}
+ loop->Brk = ptr;
loop->If = ptr->Prev;
+ loop->EndIf = ptr->Next;
switch(loop->If->Prev->U.I.Opcode){
case RC_OPCODE_SLT:
case RC_OPCODE_SGE:
case RC_OPCODE_SNE:
break;
default:
- rc_error(s->C, "%s expected conditional\n",
+ rc_error(c, "%s: expected conditional",
__FUNCTION__);
- return NULL;
+ return 0;
}
loop->Cond = loop->If->Prev;
- ptr = loop->EndIf;
break;
+
case RC_OPCODE_ENDLOOP:
loop->EndLoop = ptr;
break;
}
}
+
+ if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf
+ && loop->Cond && loop->EndLoop) {
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement. Here is an outline of the conversion process:
+ * BGNLOOP; -> BGNLOOP;
+ * <Additional conditional code> -> <Additional conditional code>
+ * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
+ * IF temp[0]; -> IF temp[0];
+ * BRK; ->
+ * ENDIF; -> <Loop Body>
+ * <Loop Body> -> ENDIF;
+ * ENDLOOP; -> ENDLOOP
+ *
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return If the loop can be unrolled, a pointer to the first instruction of
+ * the unrolled loop.
+ * Otherwise, A pointer to the ENDLOOP instruction.
+ * Null if there is an error.
+ */
+static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+ struct rc_instruction * inst,
+ int prog_inst_limit)
+{
+ struct loop_info * loop;
+
+ memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+ s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+ loop = &s->Loops[s->LoopCount++];
+
+ if (!build_loop_info(s->C, loop, inst))
+ return NULL;
+
+ if(try_unroll_loop(s->C, loop, prog_inst_limit)){
+ return loop->BeginLoop->Next;
+ }
+
/* Reverse the conditional instruction */
switch(loop->Cond->U.I.Opcode){
case RC_OPCODE_SGE:
rc_error(s->C, "loop->Cond is not a conditional.\n");
return NULL;
}
-
- /* Check if the number of loops is known at compile time. */
- if(transform_const_loop(s, loop)){
- return loop->BeginLoop->Next;
- }
- /* Prepare the loop to be unrolled */
+ /* Prepare the loop to be emulated */
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
return loop->EndLoop;
}
-void rc_transform_unroll_loops(struct radeon_compiler *c,
- struct emulate_loop_state * s)
+void rc_transform_loops(struct radeon_compiler *c,
+ struct emulate_loop_state * s,
+ int prog_inst_limit)
{
struct rc_instruction * ptr;
-
+
memset(s, 0, sizeof(struct emulate_loop_state));
s->C = c;
ptr = s->C->Program.Instructions.Next;
while(ptr != &s->C->Program.Instructions) {
if(ptr->Type == RC_INSTRUCTION_NORMAL &&
ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
- ptr = transform_loop(s, ptr);
+ ptr = transform_loop(s, ptr, prog_inst_limit);
if(!ptr){
return;
}
}
}
-void rc_emulate_loops(struct emulate_loop_state *s,
- unsigned int max_instructions)
+void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit)
+{
+ struct rc_instruction * inst;
+ struct loop_info loop;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ if (build_loop_info(c, &loop, inst)) {
+ try_unroll_loop(c, &loop, prog_inst_limit);
+ }
+ }
+ }
+}
+
+void rc_emulate_loops(struct emulate_loop_state *s, int prog_inst_limit)
{
int i;
/* Iterate backwards of the list of loops so that loops that nested
if(!s->Loops[i].EndLoop){
continue;
}
- unsigned int iterations = loop_calc_iterations(s, &s->Loops[i],
- max_instructions);
- loop_unroll(s, &s->Loops[i], iterations);
+ unsigned int iterations = loop_max_possible_iterations(
+ s->C, &s->Loops[i], prog_inst_limit);
+ unroll_loop(s->C, &s->Loops[i], iterations);
}
}