Merge branch 'llvm-cliptest-viewport'
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r500_fragprog_emit.c
index 0bd8f0a239f610b855f238e2c25bccab8cc1ccee..6f101c68eb68630f872a2d2b681b9cf0283b8693 100644 (file)
@@ -63,8 +63,17 @@ struct branch_info {
        int Endif;
 };
 
-struct loop_info {
-       int LoopStart;
+struct r500_loop_info {
+       int BgnLoop;
+
+       int BranchDepth;
+       int * Brks;
+       int BrkCount;
+       int BrkReserved;
+
+       int * Conts;
+       int ContCount;
+       int ContReserved;
 };
 
 struct emit_state {
@@ -75,7 +84,7 @@ struct emit_state {
        unsigned int CurrentBranchDepth;
        unsigned int BranchesReserved;
 
-       struct loop_info * Loops;
+       struct r500_loop_info * Loops;
        unsigned int CurrentLoopDepth;
        unsigned int LoopsReserved;
 
@@ -189,7 +198,7 @@ static void use_temporary(struct r500_fragment_program_code* code, unsigned int
                code->max_temp_idx = index;
 }
 
-static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
+static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
 {
        if (src.File == RC_FILE_CONSTANT) {
                return src.Index | 0x100;
@@ -218,14 +227,15 @@ static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
  */
 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
 {
+       int ip;
        PROG_CODE;
 
-       if (code->inst_end >= 511) {
+       if (code->inst_end >= c->Base.max_alu_insts-1) {
                error("emit_alu: Too many instructions");
                return;
        }
 
-       int ip = ++code->inst_end;
+       ip = ++code->inst_end;
 
        /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
        if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
@@ -241,7 +251,7 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
        if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
                code->inst[ip].inst0 = R500_INST_TYPE_OUT;
                if (inst->WriteALUResult) {
-                       error("%s: cannot write output and ALU result at the same time");
+                       error("Cannot write output and ALU result at the same time");
                        return;
                }
        } else {
@@ -251,6 +261,9 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
 
        code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
        code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
+       if (inst->Nop) {
+               code->inst[ip].inst0 |= R500_INST_NOP;
+       }
        if (inst->Alpha.DepthWriteMask) {
                code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
                c->code->writes_depth = 1;
@@ -266,6 +279,40 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
        if (inst->Alpha.Saturate)
                code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
 
+       /* Set the presubtract operation. */
+       switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+               case RC_PRESUB_BIAS:
+                       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
+                       break;
+               case RC_PRESUB_SUB:
+                       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
+                       break;
+               case RC_PRESUB_ADD:
+                       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
+                       break;
+               case RC_PRESUB_INV:
+                       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
+                       break;
+               default:
+                       break;
+       }
+       switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+               case RC_PRESUB_BIAS:
+                       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
+                       break;
+               case RC_PRESUB_SUB:
+                       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
+                       break;
+               case RC_PRESUB_ADD:
+                       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
+                       break;
+               case RC_PRESUB_INV:
+                       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
+                       break;
+               default:
+                       break;
+       }
+
        code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
        code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
        code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
@@ -311,14 +358,15 @@ static unsigned int translate_strq_swizzle(unsigned int swizzle)
  */
 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
 {
+       int ip;
        PROG_CODE;
 
-       if (code->inst_end >= 511) {
+       if (code->inst_end >= c->Base.max_alu_insts-1) {
                error("emit_tex: Too many instructions");
                return 0;
        }
 
-       int ip = ++code->inst_end;
+       ip = ++code->inst_end;
 
        code->inst[ip].inst0 = R500_INST_TYPE_TEX
                | (inst->DstReg.WriteMask << 11)
@@ -361,49 +409,102 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
 
 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
 {
-       if (s->Code->inst_end >= 511) {
+       unsigned int newip;
+
+       if (s->Code->inst_end >= s->C->max_alu_insts-1) {
                rc_error(s->C, "emit_tex: Too many instructions");
                return;
        }
 
-       unsigned int newip = ++s->Code->inst_end;
+       newip = ++s->Code->inst_end;
 
+       /* Currently all loops use the same integer constant to intialize
+        * the loop variables. */
+       if(!s->Code->int_constants[0]) {
+               s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
+               s->Code->int_constant_count = 1;
+       }
        s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
 
        switch(inst->U.I.Opcode){
        struct branch_info * branch;
-       struct loop_info * loop;
+       struct r500_loop_info * loop;
        case RC_OPCODE_BGNLOOP:
-               memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+               memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
                        s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
 
                loop = &s->Loops[s->CurrentLoopDepth++];
-               
-               /* We don't emit an instruction for BGNLOOP, so we need to
-                * decrement the instruction counter, but first we need to
-                * set LoopStart to the current value of inst_end, which
-                * will end up being the first real instruction in the loop.*/
-               loop->LoopStart = s->Code->inst_end--;
+               memset(loop, 0, sizeof(struct r500_loop_info));
+               loop->BranchDepth = s->CurrentBranchDepth;
+               loop->BgnLoop = newip;
+
+               s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
+                       | R500_FC_JUMP_FUNC(0x00)
+                       | R500_FC_IGNORE_UNCOVERED
+                       ;
                break;
-       
        case RC_OPCODE_BRK:
-               /* Don't emit an instruction for BRK */
-               s->Code->inst_end--;
+               loop = &s->Loops[s->CurrentLoopDepth - 1];
+               memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
+                                       loop->BrkCount, loop->BrkReserved, 1);
+
+               loop->Brks[loop->BrkCount++] = newip;
+               s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
+                       | R500_FC_JUMP_FUNC(0xff)
+                       | R500_FC_B_OP1_DECR
+                       | R500_FC_B_POP_CNT(
+                               s->CurrentBranchDepth - loop->BranchDepth)
+                       | R500_FC_IGNORE_UNCOVERED
+                       ;
                break;
 
-       case RC_OPCODE_CONTINUE:
+       case RC_OPCODE_CONT:
                loop = &s->Loops[s->CurrentLoopDepth - 1];
-               s->Code->inst[newip].inst2 = R500_FC_OP_JUMP |
-                       R500_FC_JUMP_FUNC(0xff);
-               s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->LoopStart);
+               memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
+                                       loop->ContCount, loop->ContReserved, 1);
+               loop->Conts[loop->ContCount++] = newip;
+               s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
+                       | R500_FC_JUMP_FUNC(0xff)
+                       | R500_FC_B_OP1_DECR
+                       | R500_FC_B_POP_CNT(
+                               s->CurrentBranchDepth - loop->BranchDepth)
+                       | R500_FC_IGNORE_UNCOVERED
+                       ;
                break;
 
        case RC_OPCODE_ENDLOOP:
-               /* Don't emit an instruction for ENDLOOP */
-               s->Code->inst_end--;
+       {
+               loop = &s->Loops[s->CurrentLoopDepth - 1];
+               /* Emit ENDLOOP */
+               s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
+                       | R500_FC_JUMP_FUNC(0xff)
+                       | R500_FC_JUMP_ANY
+                       | R500_FC_IGNORE_UNCOVERED
+                       ;
+               /* The constant integer at index 0 is used by all loops. */
+               s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
+                       | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
+                       ;
+
+               /* Set jump address and int constant for BGNLOOP */
+               s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
+                       | R500_FC_JUMP_ADDR(newip)
+                       ;
+
+               /* Set jump address for the BRK instructions. */
+               while(loop->BrkCount--) {
+                       s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
+                                               R500_FC_JUMP_ADDR(newip + 1);
+               }
+
+               /* Set jump address for CONT instructions. */
+               while(loop->ContCount--) {
+                       s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
+                                               R500_FC_JUMP_ADDR(newip);
+               }
                s->CurrentLoopDepth--;
                break;
-
+       }
        case RC_OPCODE_IF:
                if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) {
                        rc_error(s->C, "Branch depth exceeds hardware limit");
@@ -442,24 +543,16 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
                }
 
                branch = &s->Branches[s->CurrentBranchDepth - 1];
-               
-               if(inst->Prev->U.I.Opcode == RC_OPCODE_BRK){
-                       branch->Endif = --s->Code->inst_end;
-                       s->Code->inst[branch->Endif].inst2 |=
-                               R500_FC_B_OP0_DECR;
-               }
-               else{
-                       branch->Endif = newip;
-               
-                       s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
-                               | R500_FC_A_OP_NONE /* no address stack */
-                               | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
-                               | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
-                               | R500_FC_B_OP1_NONE /* no branch counter if stay */
-                               | R500_FC_B_POP_CNT(1)
+               branch->Endif = newip;
+
+               s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+                       | R500_FC_A_OP_NONE /* no address stack */
+                       | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+                       | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+                       | R500_FC_B_OP1_NONE /* no branch counter if stay */
+                       | R500_FC_B_POP_CNT(1)
                        ;
-                       s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
-               }
+               s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
                s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
                        | R500_FC_A_OP_NONE /* no address stack */
                        | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
@@ -494,8 +587,9 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
        }
 }
 
-void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
+void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
 {
+       struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
        struct emit_state s;
        struct r500_fragment_program_code *code = &compiler->code->code.r500;
 
@@ -525,7 +619,7 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
                }
        }
 
-       if (code->max_temp_idx >= 128)
+       if (code->max_temp_idx >= compiler->Base.max_temp_regs)
                rc_error(&compiler->Base, "Too many hardware temporaries used");
 
        if (compiler->Base.Error)
@@ -533,22 +627,22 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
 
        if (code->inst_end == -1 ||
            (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
+               int ip;
+
                /* This may happen when dead-code elimination is disabled or
                 * when most of the fragment program logic is leading to a KIL */
-               if (code->inst_end >= 511) {
+               if (code->inst_end >= compiler->Base.max_alu_insts-1) {
                        rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
                        return;
                }
 
-               int ip = ++code->inst_end;
+               ip = ++code->inst_end;
                code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
        }
 
-       /* Use FULL flow control mode if branches are nested deep enough.
-        * We don not need to enable FULL flow control mode for loops, becasue
-        * we aren't using the hardware loop instructions.
-        */
-       if (s.MaxBranchDepth >= 4) {
+       /* Enable full flow control mode if we are using loops or have if
+        * statements nested at least four deep. */
+       if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
                if (code->max_temp_idx < 1)
                        code->max_temp_idx = 1;