From 476248befe2bd04558ce53e937230c1a400a51b6 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 19 May 2008 11:01:00 -0700 Subject: [PATCH] r5xx: Fixup emit_tex, add debugging info, enable temp temps. emit_tex now chases itself with an OUT if needed. Added airlied's dump_program, with some fixes. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 170 +++++++++++++--------- 1 file changed, 105 insertions(+), 65 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 7f8139a533b..cc042a00abf 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -127,28 +127,8 @@ static inline GLuint make_strq_swizzle(struct prog_src_register src) { return swiz; } -static int get_temp(struct r500_fragment_program *fp, int slot) { - - COMPILE_STATE; - - int r = slot; - - while (cs->inputs[r].refcount != 0) { - /* Crap, taken. */ - r++; - } - - fp->temp_reg_offset = r - slot; - - if (r >= R500_US_NUM_TEMP_REGS) { - ERROR("Out of hardware temps!\n"); - return 0; - } - - if (r > fp->max_temp_idx) - fp->max_temp_idx = r; - - return r; +static int get_temp(struct r500_fragment_program *fp) { + return fp->max_temp_idx + 1; } /* Borrowed verbatim from r300_fragprog since it hasn't changed. */ @@ -239,6 +219,12 @@ static void emit_tex(struct r500_fragment_program *fp, mask = fpi->DstReg.WriteMask << 11; hwsrc = make_src(fp, fpi->SrcReg[0]); + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + hwdest = get_temp(fp); + } else { + hwdest = dest; + } + fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask | R500_INST_TEX_SEM_WAIT; @@ -269,15 +255,35 @@ static void emit_tex(struct r500_fragment_program *fp, /* | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) */ | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A - | R500_TEX_DST_ADDR(dest) + | R500_TEX_DST_ADDR(hwdest) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - - fp->inst[counter].inst3 = 0x0; fp->inst[counter].inst4 = 0x0; - fp->inst[counter].inst5 = 0x0; + fp->inst[counter].inst5 = 0x0; + + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_OUT + | R500_INST_TEX_SEM_WAIT | (mask << 4); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp)); + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_OMOD_DISABLE; + fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A) + | R500_ALPHA_OMOD_DISABLE; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + } } static void dumb_shader(struct r500_fragment_program *fp) @@ -727,8 +733,8 @@ static GLboolean parse_program(struct r500_fragment_program *fp) | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); + /* Put 0 into B,A (z,w) channels. counter++; - /* Put 0 into B,A (z,w) channels. */ fpi->DstReg.WriteMask = 0xC; emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); @@ -744,7 +750,36 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_ADDRD(dest) | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); */ + break; + case OPCODE_SGE: + /* We use SRCP, so as a precaution we're + * going to set NOP in previous inst, if possible. */ + /* This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]) + | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]) + | R500_ALPHA_SRCP_OP_A1_MINUS_A0; + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); + fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRCP + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRCP + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); break; case OPCODE_SIN: src[0] = make_src(fp, fpi->SrcReg[0]); @@ -759,6 +794,35 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP | R500_ALU_RGBA_ADDRD(dest); break; + case OPCODE_SLT: + /* We use SRCP, so as a precaution we're + * going to set NOP in previous inst, if possible. */ + /* This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ + src[0] = make_src(fp, fpi->SrcReg[0]); + src[1] = make_src(fp, fpi->SrcReg[1]); + emit_alu(fp, counter, fpi); + fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]) + | R500_RGB_SRCP_OP_RGB1_MINUS_RGB0; + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]) + | R500_ALPHA_SRCP_OP_A1_MINUS_A0; + fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) + | R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); + fp->inst[counter].inst4 = R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRCP + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRCP + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); + break; case OPCODE_SUB: src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = make_src(fp, fpi->SrcReg[1]); @@ -791,12 +855,18 @@ static GLboolean parse_program(struct r500_fragment_program *fp) break; case OPCODE_TEX: emit_tex(fp, fpi, OPCODE_TEX, dest, counter); + if (fpi->DstReg.File == PROGRAM_OUTPUT) + counter++; break; case OPCODE_TXB: emit_tex(fp, fpi, OPCODE_TXB, dest, counter); + if (fpi->DstReg.File == PROGRAM_OUTPUT) + counter++; break; case OPCODE_TXP: emit_tex(fp, fpi, OPCODE_TXP, dest, counter); + if (fpi->DstReg.File == PROGRAM_OUTPUT) + counter++; break; default: ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); @@ -822,24 +892,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) /* We still need to put an output inst, right? */ WARN_ONCE("Final FP instruction is not an OUT.\n"); #if 0 - fp->inst[counter].inst0 = R500_INST_TYPE_OUT - | R500_INST_TEX_SEM_WAIT | R500_INST_LAST | - output_mask; - fp->inst[counter].inst1 = R500_RGB_ADDR0(dest); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(dest); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(0) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SEL_B_SRC0 - | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_1; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(0) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; + #endif } @@ -946,20 +999,6 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) return; } -#if 0 - for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { - int idx; - for (i = 0; i < 3; i++) { - idx = fpi->SrcReg[i].Index; - if (fpi->SrcReg[i].File == PROGRAM_INPUT) { - cs->inputs[idx].refcount++; - if (fp->max_temp_idx < idx) - fp->max_temp_idx = idx; - } - } - } -#endif - fp->max_temp_idx = fp->temp_reg_offset + 1; cs->temp_in_use = temps_used; @@ -1005,6 +1044,7 @@ void r500TranslateFragmentShader(r300ContextPtr r300, if (RADEON_DEBUG & DEBUG_PIXEL) dump_program(fp); + r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); } @@ -1094,7 +1134,7 @@ static char *to_mask(int val) } return str; } - + static void dump_program(struct r500_fragment_program *fp) { int pc = 0; @@ -1142,9 +1182,9 @@ static void dump_program(struct r500_fragment_program *fp) fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3); inst = fp->inst[n].inst3; fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", - (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), (inst >> 11) & 0x3, - (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3); @@ -1159,7 +1199,7 @@ static void dump_program(struct r500_fragment_program *fp) inst = fp->inst[n].inst5; fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", - (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), (inst >> 23) & 0x3, (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); break; @@ -1173,5 +1213,5 @@ static void dump_program(struct r500_fragment_program *fp) } fprintf(stderr,"\n"); } - + } -- 2.30.2