r500_fragprog: Transform trigonometric functions in first pass
authorNicolai Haehnle <nhaehnle@gmail.com>
Sun, 6 Jul 2008 14:58:51 +0000 (16:58 +0200)
committerNicolai Haehnle <nhaehnle@gmail.com>
Sat, 12 Jul 2008 07:36:02 +0000 (09:36 +0200)
src/mesa/drivers/dri/r300/r500_fragprog.c
src/mesa/drivers/dri/r300/r500_fragprog_emit.c
src/mesa/drivers/dri/r300/radeon_program_alu.c
src/mesa/drivers/dri/r300/radeon_program_alu.h

index 1cdb065354be61f4ddde8e7c3b05a2f09272faeb..9bb92d3ba419d9576f80bc9684f89647f8bdc0fa 100644 (file)
@@ -318,12 +318,13 @@ void r500TranslateFragmentShader(r300ContextPtr r300,
 
                insert_WPOS_trailer(&compiler);
 
-               struct radeon_program_transformation transformations[2] = {
+               struct radeon_program_transformation transformations[3] = {
                        { &transform_TEX, &compiler },
-                       { &radeonTransformALU, 0 }
+                       { &radeonTransformALU, 0 },
+                       { &radeonTransformTrigScale, 0 }
                };
                radeonLocalTransform(r300->radeon.glCtx, compiler.program,
-                       2, transformations);
+                       3, transformations);
 
                if (RADEON_DEBUG & DEBUG_PIXEL) {
                        _mesa_printf("Compiler: after all transformations:\n");
index 8c900941c4d997ad5c1e5b82e3d70b4d3f5683bb..4f658039536e38d000a6e0d7e38819345ff24408 100644 (file)
@@ -156,17 +156,6 @@ struct r500_pfs_compile_state {
 #define R500_WRITEMASK_AB 0xC
 #define R500_WRITEMASK_ARGB 0xF
 
-/* 1/(2pi), needed for quick modulus in trig insts
- * Thanks to glisse for pointing out how to do it! */
-static const GLfloat RCP_2PI[] = {0.15915494309189535,
-       0.15915494309189535,
-       0.15915494309189535,
-       0.15915494309189535};
-
-static const GLfloat LIT[] = {127.999999,
-       127.999999,
-       127.999999,
-       -127.999999};
 
 static const struct prog_dst_register dstreg_template = {
        .File = PROGRAM_TEMPORARY,
@@ -476,12 +465,6 @@ static int emit_alu(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alph
        return _helper_emit_alu(cs, rgbop, alphaop, dst.File, dst.Index, dst.WriteMask);
 }
 
-static int emit_alu_temp(struct r500_pfs_compile_state *cs, GLuint rgbop, GLuint alphaop, int dst, int writemask)
-{
-       return _helper_emit_alu(cs, rgbop, alphaop,
-               PROGRAM_TEMPORARY, dst - cs->compiler->code->temp_reg_offset, writemask);
-}
-
 /**
  * Set an instruction's source 0 (both RGB and ALPHA) to the given hardware index.
  */
@@ -612,56 +595,6 @@ static int emit_sop(struct r500_pfs_compile_state *cs,
 }
 
 
-/**
- * Emit trigonometric function COS, SIN, SCS
- */
-static void emit_trig(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi)
-{
-       int ip;
-       struct prog_dst_register temp = dstreg_template;
-       temp.Index = get_temp(cs, 0);
-       temp.WriteMask = WRITEMASK_W;
-
-       struct prog_src_register srcreg;
-       GLuint constant_swizzle;
-
-       srcreg.File = PROGRAM_CONSTANT;
-       srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters,
-               RCP_2PI, 4, &constant_swizzle);
-       srcreg.Swizzle = constant_swizzle;
-
-       /* temp = Input*(1/2pi) */
-       ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, temp);
-       set_src0(cs, ip, fpi->SrcReg[0]);
-       set_src1(cs, ip, srcreg);
-       set_argA(cs, ip, 0, R500_SWIZ_RGB_ZERO, make_sop_swizzle(fpi->SrcReg[0]));
-       set_argB(cs, ip, 1, R500_SWIZ_RGB_ZERO, make_alpha_swizzle(srcreg));
-       set_argC(cs, ip, 0, R500_SWIZ_RGB_ZERO, R500_SWIZZLE_ZERO);
-
-       /* temp = frac(dst) */
-       ip = emit_alu(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, temp);
-       set_src0_direct(cs, ip, temp.Index);
-       set_argA(cs, ip, 0, R500_SWIZ_RGB_RGB, SWIZZLE_W);
-
-       /* Dest = trig(temp) */
-       if (fpi->Opcode == OPCODE_COS) {
-               emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, temp.Index, SWIZZLE_W);
-       } else if (fpi->Opcode == OPCODE_SIN) {
-               emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, temp.Index, SWIZZLE_W);
-       } else if (fpi->Opcode == OPCODE_SCS) {
-               struct prog_dst_register moddst = fpi->DstReg;
-
-               if (fpi->DstReg.WriteMask & WRITEMASK_X) {
-                       moddst.WriteMask = WRITEMASK_X;
-                       emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, temp.Index, SWIZZLE_W);
-               }
-               if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
-                       moddst.WriteMask = WRITEMASK_Y;
-                       emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, temp.Index, SWIZZLE_W);
-               }
-       }
-}
-
 static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi) {
        PROG_CODE;
        GLuint src[3], dest = 0;
@@ -693,7 +626,8 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *
                        set_argC_reg(cs, ip, 0, fpi->SrcReg[0]);
                        break;
                case OPCODE_COS:
-                       emit_trig(cs, fpi);
+                       src[0] = make_src(cs, fpi->SrcReg[0]);
+                       emit_sop(cs, R500_ALPHA_OP_COS, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));
                        break;
                case OPCODE_DP3:
                        ip = emit_alu(cs, R500_ALU_RGBA_OP_DP3, R500_ALPHA_OP_DP, fpi->DstReg);
@@ -713,21 +647,6 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *
                        src[0] = make_src(cs, fpi->SrcReg[0]);
                        emit_sop(cs, R500_ALPHA_OP_EX2, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));
                        break;
-               case OPCODE_FLR:
-                       dest = get_temp(cs, 0);
-                       ip = emit_alu_temp(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, dest, WRITEMASK_XYZW);
-                       set_src0(cs, ip, fpi->SrcReg[0]);
-                       set_argA_reg(cs, ip, 0, fpi->SrcReg[0]);
-
-                       ip = emit_alu(cs, R500_ALU_RGBA_OP_MAD, R500_ALPHA_OP_MAD, fpi->DstReg);
-                       set_src0(cs, ip, fpi->SrcReg[0]);
-                       set_src1_direct(cs, ip, dest);
-                       set_argA_reg(cs, ip, 0, fpi->SrcReg[1]);
-                       set_argB(cs, ip, 0, R500_SWIZ_RGB_ONE, R500_SWIZZLE_ONE);
-                       set_argC(cs, ip, 1,
-                               R500_SWIZ_RGB_RGB|(R500_SWIZ_MOD_NEG<<9),
-                               SWIZZLE_W|(R500_SWIZ_MOD_NEG<<3));
-                       break;
                case OPCODE_FRC:
                        ip = emit_alu(cs, R500_ALU_RGBA_OP_FRC, R500_ALPHA_OP_FRC, fpi->DstReg);
                        set_src0(cs, ip, fpi->SrcReg[0]);
@@ -787,11 +706,9 @@ static void do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *
                        emit_sop(cs, R500_ALPHA_OP_RSQ, fpi->DstReg, src[0],
                                (make_sop_swizzle(fpi->SrcReg[0]) | (R500_SWIZ_MOD_ABS<<3)) & ~(R500_SWIZ_MOD_NEG<<3));
                        break;
-               case OPCODE_SCS:
-                       emit_trig(cs, fpi);
-                       break;
                case OPCODE_SIN:
-                       emit_trig(cs, fpi);
+                       src[0] = make_src(cs, fpi->SrcReg[0]);
+                       emit_sop(cs, R500_ALPHA_OP_SIN, fpi->DstReg, src[0], make_sop_swizzle(fpi->SrcReg[0]));
                        break;
                case OPCODE_KIL:
                case OPCODE_TEX:
index fa6a67f0c1965e0bd3117708c776ca00d7295441..8daa94c7268e47867da2ef43460944eb3564c9e6 100644 (file)
@@ -556,3 +556,55 @@ GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
 
        return GL_TRUE;
 }
+
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * to include pre-scaling by 1/(2*PI) and taking the fractional
+ * part, so that the input to COS and SIN is always in the range [0,1).
+ * SCS is replaced by one COS and one SIN instruction.
+ *
+ * @warning This transformation implicitly changes the semantics of SIN and COS!
+ */
+GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
+       struct prog_instruction* inst,
+       void* unused)
+{
+       if (inst->Opcode != OPCODE_COS &&
+           inst->Opcode != OPCODE_SIN &&
+           inst->Opcode != OPCODE_SCS)
+               return GL_FALSE;
+
+       static const GLfloat RCP_2PI[] = { 0.15915494309189535 };
+       GLuint temp;
+       GLuint constant;
+       GLuint constant_swizzle;
+
+       temp = radeonFindFreeTemporary(t);
+       constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle);
+
+       emit2(t->Program, OPCODE_MUL, dstregtmpmask(temp, WRITEMASK_W),
+               swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+               srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
+       emit1(t->Program, OPCODE_FRC, dstregtmpmask(temp, WRITEMASK_W),
+               srcreg(PROGRAM_TEMPORARY, temp));
+
+       if (inst->Opcode == OPCODE_COS) {
+               emit1(t->Program, OPCODE_COS, inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+       } else if (inst->Opcode == OPCODE_SIN) {
+               emit1(t->Program, OPCODE_SIN, inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+       } else if (inst->Opcode == OPCODE_SCS) {
+               struct prog_dst_register moddst = inst->DstReg;
+
+               if (inst->DstReg.WriteMask & WRITEMASK_X) {
+                       moddst.WriteMask = WRITEMASK_X;
+                       emit1(t->Program, OPCODE_COS, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+               }
+               if (inst->DstReg.WriteMask & WRITEMASK_Y) {
+                       moddst.WriteMask = WRITEMASK_Y;
+                       emit1(t->Program, OPCODE_SIN, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+               }
+       }
+
+       return GL_TRUE;
+}
index 3fe6153fd820f3ec85a2f3200778ae23748d618d..ea9d5bb669c91796648250bacc39c64e2364dcd9 100644 (file)
@@ -40,4 +40,9 @@ GLboolean radeonTransformTrigSimple(
        struct prog_instruction*,
        void*);
 
+GLboolean radeonTransformTrigScale(
+       struct radeon_transform_context *t,
+       struct prog_instruction*,
+       void*);
+
 #endif /* __RADEON_PROGRAM_ALU_H_ */