X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr300%2Fcompiler%2Fr500_fragprog_emit.c;h=6f101c68eb68630f872a2d2b681b9cf0283b8693;hb=cd6a31cd4a9ea6deef4778c2eaef2d47240c3a6e;hp=10c5e2349e99e8377d6690333a3d6a463d14e1b4;hpb=64644ec3b21884d4a974fa29087fa98c4ed9e112;p=mesa.git diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 10c5e2349e9..6f101c68eb6 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -45,6 +45,8 @@ #include "radeon_program_pair.h" +#define MAX_BRANCH_DEPTH_FULL 32 +#define MAX_BRANCH_DEPTH_PARTIAL 4 #define PROG_CODE \ struct r500_fragment_program_code *code = &c->code->code.r500 @@ -61,6 +63,19 @@ struct branch_info { int Endif; }; +struct r500_loop_info { + int BgnLoop; + + int BranchDepth; + int * Brks; + int BrkCount; + int BrkReserved; + + int * Conts; + int ContCount; + int ContReserved; +}; + struct emit_state { struct radeon_compiler * C; struct r500_fragment_program_code * Code; @@ -69,7 +84,12 @@ struct emit_state { unsigned int CurrentBranchDepth; unsigned int BranchesReserved; + struct r500_loop_info * Loops; + unsigned int CurrentLoopDepth; + unsigned int LoopsReserved; + unsigned int MaxBranchDepth; + }; static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) @@ -82,7 +102,7 @@ static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, r case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; default: - error("translate_rgb_op(%d): unknown opcode\n", opcode); + error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); /* fall through */ case RC_OPCODE_NOP: /* fall through */ @@ -106,7 +126,7 @@ static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; default: - error("translate_alpha_op(%d): unknown opcode\n", opcode); + error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name); /* fall through */ case RC_OPCODE_NOP: /* fall through */ @@ -178,7 +198,7 @@ static void use_temporary(struct r500_fragment_program_code* code, unsigned int code->max_temp_idx = index; } -static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) +static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) { if (src.File == RC_FILE_CONSTANT) { return src.Index | 0x100; @@ -207,14 +227,15 @@ static void alu_nop(struct r300_fragment_program_compiler *c, int ip) */ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) { + int ip; PROG_CODE; - if (code->inst_end >= 511) { + if (code->inst_end >= c->Base.max_alu_insts-1) { error("emit_alu: Too many instructions"); return; } - int ip = ++code->inst_end; + ip = ++code->inst_end; /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */ if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX || @@ -230,7 +251,7 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { code->inst[ip].inst0 = R500_INST_TYPE_OUT; if (inst->WriteALUResult) { - error("%s: cannot write output and ALU result at the same time"); + error("Cannot write output and ALU result at the same time"); return; } } else { @@ -240,6 +261,9 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); + if (inst->Nop) { + code->inst[ip].inst0 |= R500_INST_NOP; + } if (inst->Alpha.DepthWriteMask) { code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; c->code->writes_depth = 1; @@ -255,6 +279,40 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair if (inst->Alpha.Saturate) code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; + /* Set the presubtract operation. */ + switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0; + break; + default: + break; + } + switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) { + case RC_PRESUB_BIAS: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0; + break; + case RC_PRESUB_SUB: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0; + break; + case RC_PRESUB_ADD: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0; + break; + case RC_PRESUB_INV: + code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0; + break; + default: + break; + } + code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); @@ -300,14 +358,15 @@ static unsigned int translate_strq_swizzle(unsigned int swizzle) */ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) { + int ip; PROG_CODE; - if (code->inst_end >= 511) { + if (code->inst_end >= c->Base.max_alu_insts-1) { error("emit_tex: Too many instructions"); return 0; } - int ip = ++code->inst_end; + ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_TEX | (inst->DstReg.WriteMask << 11) @@ -332,7 +391,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst code->inst[ip].inst1 |= R500_TEX_INST_PROJ; break; default: - error("emit_tex can't handle opcode %x\n", inst->Opcode); + error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name); } use_temporary(code, inst->SrcReg[0].Index); @@ -350,25 +409,111 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) { - if (s->Code->inst_end >= 511) { + unsigned int newip; + + if (s->Code->inst_end >= s->C->max_alu_insts-1) { rc_error(s->C, "emit_tex: Too many instructions"); return; } - unsigned int newip = ++s->Code->inst_end; + newip = ++s->Code->inst_end; + /* Currently all loops use the same integer constant to intialize + * the loop variables. */ + if(!s->Code->int_constants[0]) { + s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); + s->Code->int_constant_count = 1; + } s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; - if (inst->U.I.Opcode == RC_OPCODE_IF) { - if (s->CurrentBranchDepth >= 32) { + switch(inst->U.I.Opcode){ + struct branch_info * branch; + struct r500_loop_info * loop; + case RC_OPCODE_BGNLOOP: + memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info, + s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); + + loop = &s->Loops[s->CurrentLoopDepth++]; + memset(loop, 0, sizeof(struct r500_loop_info)); + loop->BranchDepth = s->CurrentBranchDepth; + loop->BgnLoop = newip; + + s->Code->inst[newip].inst2 = R500_FC_OP_LOOP + | R500_FC_JUMP_FUNC(0x00) + | R500_FC_IGNORE_UNCOVERED + ; + break; + case RC_OPCODE_BRK: + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, + loop->BrkCount, loop->BrkReserved, 1); + + loop->Brks[loop->BrkCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; + break; + + case RC_OPCODE_CONT: + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, + loop->ContCount, loop->ContReserved, 1); + loop->Conts[loop->ContCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; + break; + + case RC_OPCODE_ENDLOOP: + { + loop = &s->Loops[s->CurrentLoopDepth - 1]; + /* Emit ENDLOOP */ + s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_JUMP_ANY + | R500_FC_IGNORE_UNCOVERED + ; + /* The constant integer at index 0 is used by all loops. */ + s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(loop->BgnLoop + 1) + ; + + /* Set jump address and int constant for BGNLOOP */ + s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(newip) + ; + + /* Set jump address for the BRK instructions. */ + while(loop->BrkCount--) { + s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = + R500_FC_JUMP_ADDR(newip + 1); + } + + /* Set jump address for CONT instructions. */ + while(loop->ContCount--) { + s->Code->inst[loop->Conts[loop->ContCount]].inst3 = + R500_FC_JUMP_ADDR(newip); + } + s->CurrentLoopDepth--; + break; + } + case RC_OPCODE_IF: + if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) { rc_error(s->C, "Branch depth exceeds hardware limit"); return; } - memory_pool_array_reserve(&s->C->Pool, struct branch_info, s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1); - struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++]; + branch = &s->Branches[s->CurrentBranchDepth++]; branch->If = newip; branch->Else = -1; branch->Endif = -1; @@ -377,29 +522,42 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst s->MaxBranchDepth = s->CurrentBranchDepth; /* actual instruction is filled in at ENDIF time */ - } else if (inst->U.I.Opcode == RC_OPCODE_ELSE) { + break; + + case RC_OPCODE_ELSE: if (!s->CurrentBranchDepth) { rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); return; } - struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch = &s->Branches[s->CurrentBranchDepth - 1]; branch->Else = newip; /* actual instruction is filled in at ENDIF time */ - } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + break; + + case RC_OPCODE_ENDIF: if (!s->CurrentBranchDepth) { rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); return; } - struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch = &s->Branches[s->CurrentBranchDepth - 1]; branch->Endif = newip; + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */ | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ | R500_FC_B_OP0_INCR /* increment branch counter if stay */ + | R500_FC_IGNORE_UNCOVERED ; if (branch->Else >= 0) { @@ -421,23 +579,17 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); } - s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP - | R500_FC_A_OP_NONE /* no address stack */ - | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ - | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ - | R500_FC_B_OP1_NONE /* no branch counter if stay */ - | R500_FC_B_POP_CNT(1) - ; - s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); s->CurrentBranchDepth--; - } else { - rc_error(s->C, "%s: unknown opcode %i\n", __FUNCTION__, inst->U.I.Opcode); + break; + default: + rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name); } } -void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) +void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) { + struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; struct emit_state s; struct r500_fragment_program_code *code = &compiler->code->code.r500; @@ -467,7 +619,7 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi } } - if (code->max_temp_idx >= 128) + if (code->max_temp_idx >= compiler->Base.max_temp_regs) rc_error(&compiler->Base, "Too many hardware temporaries used"); if (compiler->Base.Error) @@ -475,18 +627,22 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi if (code->inst_end == -1 || (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + int ip; + /* This may happen when dead-code elimination is disabled or * when most of the fragment program logic is leading to a KIL */ - if (code->inst_end >= 511) { + if (code->inst_end >= compiler->Base.max_alu_insts-1) { rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); return; } - int ip = ++code->inst_end; + ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } - if (s.MaxBranchDepth >= 4) { + /* Enable full flow control mode if we are using loops or have if + * statements nested at least four deep. */ + if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { if (code->max_temp_idx < 1) code->max_temp_idx = 1;