r300: Translate fragment program LRP in radeon_program_alu.c
authorNicolai Haehnle <nhaehnle@gmail.com>
Sat, 5 Jul 2008 20:35:46 +0000 (22:35 +0200)
committerNicolai Haehnle <nhaehnle@gmail.com>
Sun, 6 Jul 2008 08:00:35 +0000 (10:00 +0200)
src/mesa/drivers/dri/r300/r300_fragprog_emit.c
src/mesa/drivers/dri/r300/r500_fragprog_emit.c
src/mesa/drivers/dri/r300/radeon_program_alu.c

index d95008edc0612408afed9f0e2fd4d6a1d1bbff35..446517405ba0300e7d704898ea731ef29dfa1a13 100644 (file)
@@ -1590,23 +1590,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
                emit_arith(cs, PFS_OP_LG2, dest, mask,
                                src[0], undef, undef, flags);
                break;
-       case OPCODE_LRP:
-               src[0] = t_src(cs, fpi->SrcReg[0]);
-               src[1] = t_src(cs, fpi->SrcReg[1]);
-               src[2] = t_src(cs, fpi->SrcReg[2]);
-               /* result = tmp0tmp1 + (1 - tmp0)tmp2
-                       *        = tmp0tmp1 + tmp2 + (-tmp0)tmp2
-                       *     MAD temp, -tmp0, tmp2, tmp2
-                       *     MAD result, tmp0, tmp1, temp
-                       */
-               temp[0] = get_temp_reg(cs);
-               emit_arith(cs, PFS_OP_MAD, temp[0], mask,
-                               negate(keep(src[0])), keep(src[2]), src[2],
-                               0);
-               emit_arith(cs, PFS_OP_MAD, dest, mask,
-                               src[0], src[1], temp[0], flags);
-               free_temp(cs, temp[0]);
-               break;
        case OPCODE_MAD:
                src[0] = t_src(cs, fpi->SrcReg[0]);
                src[1] = t_src(cs, fpi->SrcReg[1]);
index c79bff96bd4df2870bf886b976676724f7fbcfde..5b4d06ecf362161cd969ce9a18070324b28b5716 100644 (file)
@@ -751,33 +751,6 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *
                        src[0] = make_src(cs, fpi->SrcReg[0]);
                        emit_sop(cs, R500_ALPHA_OP_LN2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));
                        break;
-               case OPCODE_LRP:
-                       /* result = src0*src1 + (1-src0)*src2
-                        *        = src0*src1 + src2 + (-src0)*src2
-                        *
-                        * Note: LRP without swizzling (or with only limited
-                        * swizzling) could be done more efficiently using the
-                        * presubtract hardware.
-                        */
-                       dest = get_temp(cs, 0);
-                       ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, dest, WRITEMASK_XYZW);
-                       set_src0(cs, ip, fpi->SrcReg[0]);
-                       set_src1(cs, ip, fpi->SrcReg[1]);
-                       set_src2(cs, ip, fpi->SrcReg[2]);
-                       set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
-                       set_argB_reg(cs, ip, 1, fpi->SrcReg[1]);
-                       set_argC_reg(cs, ip, 2, fpi->SrcReg[2]);
-
-                       ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg);
-                       set_src0(cs, ip, fpi->SrcReg[0]);
-                       set_src1(cs, ip, fpi->SrcReg[2]);
-                       set_src2_direct(cs, ip, dest);
-                       set_argA(cs, ip, 0,
-                               make_rgb_swizzle(fpi->SrcReg[0]) ^ (R500_SWIZ_MOD_NEG<<9),
-                               make_alpha_swizzle(fpi->SrcReg[0]) ^ (R500_SWIZ_MOD_NEG<<3));
-                       set_argB_reg(cs, ip, 1, fpi->SrcReg[2]);
-                       set_argC(cs, ip, 2, R500_SWIZ_RGB_RGB, SWIZZLE_W);
-                       break;
                case OPCODE_MAD:
                        ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg);
                        set_src0(cs, ip, fpi->SrcReg[0]);
index 85ea810523f4381cea521fb309fed6a4027ee038..483dfa2cdc3e7a5061e0606c3183d56cfaf26e7c 100644 (file)
@@ -296,6 +296,19 @@ static void transform_LIT(struct radeon_transform_context* t,
                emit1(t->Program, OPCODE_MOV, inst->DstReg, srctemp);
 }
 
+static void transform_LRP(struct radeon_transform_context* t,
+       struct prog_instruction* inst)
+{
+       int tempreg = radeonFindFreeTemporary(t);
+
+       emit2(t->Program, OPCODE_ADD,
+               dstreg(PROGRAM_TEMPORARY, tempreg),
+               inst->SrcReg[1], negate(inst->SrcReg[2]));
+       emit3(t->Program, OPCODE_MAD,
+               inst->DstReg,
+               inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]);
+}
+
 static void transform_POW(struct radeon_transform_context* t,
        struct prog_instruction* inst)
 {
@@ -360,7 +373,7 @@ static void transform_XPD(struct radeon_transform_context* t,
  * no userData necessary.
  *
  * Eliminates the following ALU instructions:
- *  ABS, DPH, FLR, LIT, POW, SGE, SLT, SUB, SWZ, XPD
+ *  ABS, DPH, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD
  * using:
  *  MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
  *
@@ -375,6 +388,7 @@ GLboolean radeonTransformALU(struct radeon_transform_context* t,
        case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
        case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
        case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;
+       case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
        case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
        case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
        case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;