From 03abd021f2fa1d043682c9f1bbb1c080fba6b033 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 5 Jul 2008 22:35:46 +0200 Subject: [PATCH] r300: Translate fragment program LRP in radeon_program_alu.c --- .../drivers/dri/r300/r300_fragprog_emit.c | 17 ------------ .../drivers/dri/r300/r500_fragprog_emit.c | 27 ------------------- .../drivers/dri/r300/radeon_program_alu.c | 16 ++++++++++- 3 files changed, 15 insertions(+), 45 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c index d95008edc06..446517405ba 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c @@ -1590,23 +1590,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst emit_arith(cs, PFS_OP_LG2, dest, mask, src[0], undef, undef, flags); break; - case OPCODE_LRP: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - src[2] = t_src(cs, fpi->SrcReg[2]); - /* result = tmp0tmp1 + (1 - tmp0)tmp2 - * = tmp0tmp1 + tmp2 + (-tmp0)tmp2 - * MAD temp, -tmp0, tmp2, tmp2 - * MAD result, tmp0, tmp1, temp - */ - temp[0] = get_temp_reg(cs); - emit_arith(cs, PFS_OP_MAD, temp[0], mask, - negate(keep(src[0])), keep(src[2]), src[2], - 0); - emit_arith(cs, PFS_OP_MAD, dest, mask, - src[0], src[1], temp[0], flags); - free_temp(cs, temp[0]); - break; case OPCODE_MAD: src[0] = t_src(cs, fpi->SrcReg[0]); src[1] = t_src(cs, fpi->SrcReg[1]); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c index c79bff96bd4..5b4d06ecf36 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -751,33 +751,6 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction * src[0] = make_src(cs, fpi->SrcReg[0]); emit_sop(cs, R500_ALPHA_OP_LN2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0])); break; - case OPCODE_LRP: - /* result = src0*src1 + (1-src0)*src2 - * = src0*src1 + src2 + (-src0)*src2 - * - * Note: LRP without swizzling (or with only limited - * swizzling) could be done more efficiently using the - * presubtract hardware. - */ - dest = get_temp(cs, 0); - ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, dest, WRITEMASK_XYZW); - set_src0(cs, ip, fpi->SrcReg[0]); - set_src1(cs, ip, fpi->SrcReg[1]); - set_src2(cs, ip, fpi->SrcReg[2]); - set_argA_reg(cs, ip, 0, fpi->SrcReg[0]); - set_argB_reg(cs, ip, 1, fpi->SrcReg[1]); - set_argC_reg(cs, ip, 2, fpi->SrcReg[2]); - - ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); - set_src0(cs, ip, fpi->SrcReg[0]); - set_src1(cs, ip, fpi->SrcReg[2]); - set_src2_direct(cs, ip, dest); - set_argA(cs, ip, 0, - make_rgb_swizzle(fpi->SrcReg[0]) ^ (R500_SWIZ_MOD_NEG<<9), - make_alpha_swizzle(fpi->SrcReg[0]) ^ (R500_SWIZ_MOD_NEG<<3)); - set_argB_reg(cs, ip, 1, fpi->SrcReg[2]); - set_argC(cs, ip, 2, R500_SWIZ_RGB_RGB, SWIZZLE_W); - break; case OPCODE_MAD: ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg); set_src0(cs, ip, fpi->SrcReg[0]); diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c index 85ea810523f..483dfa2cdc3 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c @@ -296,6 +296,19 @@ static void transform_LIT(struct radeon_transform_context* t, emit1(t->Program, OPCODE_MOV, inst->DstReg, srctemp); } +static void transform_LRP(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_ADD, + dstreg(PROGRAM_TEMPORARY, tempreg), + inst->SrcReg[1], negate(inst->SrcReg[2])); + emit3(t->Program, OPCODE_MAD, + inst->DstReg, + inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]); +} + static void transform_POW(struct radeon_transform_context* t, struct prog_instruction* inst) { @@ -360,7 +373,7 @@ static void transform_XPD(struct radeon_transform_context* t, * no userData necessary. * * Eliminates the following ALU instructions: - * ABS, DPH, FLR, LIT, POW, SGE, SLT, SUB, SWZ, XPD + * ABS, DPH, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD * using: * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP * @@ -375,6 +388,7 @@ GLboolean radeonTransformALU(struct radeon_transform_context* t, case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE; + case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE; case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; -- 2.30.2