From 2b2cb566563b9f1f9739327ef9874143af838850 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 5 Jul 2008 23:54:31 +0200 Subject: [PATCH] r300_fragprog: Emulate trigonometric functions in radeon_program_alu --- src/mesa/drivers/dri/r300/r300_fragprog.c | 5 +- .../drivers/dri/r300/r300_fragprog_emit.c | 206 +----------------- .../drivers/dri/r300/radeon_program_alu.c | 144 ++++++++++++ .../drivers/dri/r300/radeon_program_alu.h | 5 + 4 files changed, 153 insertions(+), 207 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 57987f5d0fc..8a1d690ae4e 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -408,12 +408,13 @@ void r300TranslateFragmentShader(r300ContextPtr r300, struct radeon_program_transformation transformations[] = { { &transform_TEX, &compiler }, - { &radeonTransformALU, 0 } + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } }; radeonLocalTransform( r300->radeon.glCtx, compiler.program, - 2, transformations); + 3, transformations); if (RADEON_DEBUG & DEBUG_PIXEL) { _mesa_printf("Fragment Program: After transformations:\n"); diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c index 30f513b5a31..4786b4554df 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c @@ -1423,40 +1423,11 @@ static void emit_arith(struct r300_pfs_compile_state *cs, return; } -static GLfloat SinCosConsts[2][4] = { - { - 1.273239545, // 4/PI - -0.405284735, // -4/(PI*PI) - 3.141592654, // PI - 0.2225 // weight - }, - { - 0.75, - 0.0, - 0.159154943, // 1/(2*PI) - 6.283185307 // 2*PI - } -}; - -static GLuint emit_sincosconsts(struct r300_pfs_compile_state *cs, int i) -{ - struct prog_src_register srcreg; - GLuint constant_swizzle; - - srcreg.File = PROGRAM_CONSTANT; - srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters, - SinCosConsts[i], 4, &constant_swizzle); - srcreg.Swizzle = constant_swizzle; - - return emit_const4fv(cs, srcreg); -} - static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_instruction *fpi) { COMPILE_STATE; - GLuint src[3], dest, temp[2]; + GLuint src[3], dest; int flags, mask = 0; - int const_sin[2]; if (fpi->SaturateMode == SATURATE_ZERO_ONE) flags = PFS_FLAG_SAT; @@ -1485,60 +1456,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst emit_arith(cs, PFS_OP_CMP, dest, mask, src[2], src[1], src[0], flags); break; - case OPCODE_COS: - /* - * cos using a parabola (see SIN): - * cos(x): - * x = (x/(2*PI))+0.75 - * x = frac(x) - * x = (x*2*PI)-PI - * result = sin(x) - */ - temp[0] = get_temp_reg(cs); - const_sin[0] = emit_sincosconsts(cs, 0); - const_sin[1] = emit_sincosconsts(cs, 1); - src[0] = t_scalar_src(cs, fpi->SrcReg[0]); - - /* add 0.5*PI and do range reduction */ - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(src[0], X, X, X, X), - swizzle(const_sin[1], Z, Z, Z, Z), - swizzle(const_sin[1], X, X, X, X), 0); - - emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X, - swizzle(temp[0], X, X, X, X), - undef, undef, 0); - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI - negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI - 0); - - /* SIN */ - - emit_arith(cs, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], - Z, Z, Z, - Z), - const_sin[0], pfs_zero, 0); - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(temp[0], Y, Y, Y, Y), - absolute(swizzle(temp[0], Z, Z, Z, Z)), - swizzle(temp[0], X, X, X, X), 0); - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y, - swizzle(temp[0], X, X, X, X), - absolute(swizzle(temp[0], X, X, X, X)), - negate(swizzle(temp[0], X, X, X, X)), 0); - - emit_arith(cs, PFS_OP_MAD, dest, mask, - swizzle(temp[0], Y, Y, Y, Y), - swizzle(const_sin[0], W, W, W, W), - swizzle(temp[0], X, X, X, X), flags); - - free_temp(cs, temp[0]); - break; case OPCODE_DP3: src[0] = t_src(cs, fpi->SrcReg[0]); src[1] = t_src(cs, fpi->SrcReg[1]); @@ -1609,127 +1526,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst emit_arith(cs, PFS_OP_RSQ, dest, mask, absolute(src[0]), pfs_zero, pfs_zero, flags); break; - case OPCODE_SCS: - /* - * scs using a parabola : - * scs(x): - * result.x = sin(-abs(x)+0.5*PI) (cos) - * result.y = sin(x) (sin) - * - */ - temp[0] = get_temp_reg(cs); - temp[1] = get_temp_reg(cs); - const_sin[0] = emit_sincosconsts(cs, 0); - const_sin[1] = emit_sincosconsts(cs, 1); - src[0] = t_scalar_src(cs, fpi->SrcReg[0]); - - /* x = -abs(x)+0.5*PI */ - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI - pfs_half, - negate(abs - (swizzle(keep(src[0]), X, X, X, X))), - 0); - - /* C*x (sin) */ - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W, - swizzle(const_sin[0], Y, Y, Y, Y), - swizzle(keep(src[0]), X, X, X, X), - pfs_zero, 0); - - /* B*x, C*x (cos) */ - emit_arith(cs, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], - Z, Z, Z, - Z), - const_sin[0], pfs_zero, 0); - - /* B*x (sin) */ - emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W, - swizzle(const_sin[0], X, X, X, X), - keep(src[0]), pfs_zero, 0); - - /* y = B*x + C*x*abs(x) (sin) */ - emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_Z, - absolute(src[0]), - swizzle(temp[0], W, W, W, W), - swizzle(temp[1], W, W, W, W), 0); - - /* y = B*x + C*x*abs(x) (cos) */ - emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W, - swizzle(temp[0], Y, Y, Y, Y), - absolute(swizzle(temp[0], Z, Z, Z, Z)), - swizzle(temp[0], X, X, X, X), 0); - - /* y*abs(y) - y (cos), y*abs(y) - y (sin) */ - emit_arith(cs, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1], - W, Z, Y, - X), - absolute(swizzle(temp[1], W, Z, Y, X)), - negate(swizzle(temp[1], W, Z, Y, X)), 0); - - /* dest.xy = mad(temp.xy, P, temp2.wz) */ - emit_arith(cs, PFS_OP_MAD, dest, - mask & (WRITEMASK_X | WRITEMASK_Y), temp[0], - swizzle(const_sin[0], W, W, W, W), - swizzle(temp[1], W, Z, Y, X), flags); - - free_temp(cs, temp[0]); - free_temp(cs, temp[1]); - break; - case OPCODE_SIN: - /* - * using a parabola: - * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x) - * extra precision is obtained by weighting against - * itself squared. - */ - - temp[0] = get_temp_reg(cs); - const_sin[0] = emit_sincosconsts(cs, 0); - const_sin[1] = emit_sincosconsts(cs, 1); - src[0] = t_scalar_src(cs, fpi->SrcReg[0]); - - /* do range reduction */ - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(keep(src[0]), X, X, X, X), - swizzle(const_sin[1], Z, Z, Z, Z), - pfs_half, 0); - - emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X, - swizzle(temp[0], X, X, X, X), - undef, undef, 0); - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI - negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI - 0); - - /* SIN */ - - emit_arith(cs, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], - Z, Z, Z, - Z), - const_sin[0], pfs_zero, 0); - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(temp[0], Y, Y, Y, Y), - absolute(swizzle(temp[0], Z, Z, Z, Z)), - swizzle(temp[0], X, X, X, X), 0); - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y, - swizzle(temp[0], X, X, X, X), - absolute(swizzle(temp[0], X, X, X, X)), - negate(swizzle(temp[0], X, X, X, X)), 0); - - emit_arith(cs, PFS_OP_MAD, dest, mask, - swizzle(temp[0], Y, Y, Y, Y), - swizzle(const_sin[0], W, W, W, W), - swizzle(temp[0], X, X, X, X), flags); - - free_temp(cs, temp[0]); - break; case OPCODE_TEX: emit_tex(cs, fpi, R300_TEX_OP_LD); break; diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c index 4a40d3e44d8..fa6a67f0c19 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c @@ -149,6 +149,14 @@ static struct prog_src_register srcregswz(int file, int index, int swz) return src; } +static struct prog_src_register absolute(struct prog_src_register reg) +{ + struct prog_src_register newreg = reg; + newreg.Abs = 1; + newreg.NegateAbs = 0; + return newreg; +} + static struct prog_src_register negate(struct prog_src_register reg) { struct prog_src_register newreg = reg; @@ -412,3 +420,139 @@ GLboolean radeonTransformALU(struct radeon_transform_context* t, return GL_FALSE; } } + + +static void sincos_constants(struct radeon_transform_context* t, GLuint *constants) +{ + static const GLfloat SinCosConsts[2][4] = { + { + 1.273239545, // 4/PI + -0.405284735, // -4/(PI*PI) + 3.141592654, // PI + 0.2225 // weight + }, + { + 0.75, + 0.5, + 0.159154943, // 1/(2*PI) + 6.283185307 // 2*PI + } + }; + int i; + + for(i = 0; i < 2; ++i) { + GLuint swz; + constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz); + ASSERT(swz == SWIZZLE_NOOP); + } +} + +/** + * Approximate sin(x), where x is clamped to (-pi/2, pi/2). + * + * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } + * MAD tmp.x, tmp.y, |src|, tmp.x + * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x + * MAD dest, tmp.y, weight, tmp.x + */ +static void sin_approx(struct radeon_transform_context* t, + struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) +{ + GLuint tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_MUL, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + srcreg(PROGRAM_CONSTANT, constants[0])); + emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_X), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_Y), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); + emit3(t->Program, OPCODE_MAD, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); +} + +/** + * Translate the trigonometric functions COS, SIN, and SCS + * using only the basic instructions + * MOV, ADD, MUL, MAD, FRC + */ +GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_COS && + inst->Opcode != OPCODE_SIN && + inst->Opcode != OPCODE_SCS) + return GL_FALSE; + + GLuint constants[2]; + GLuint tempreg = radeonFindFreeTemporary(t); + + sincos_constants(t, constants); + + if (inst->Opcode == OPCODE_COS) { + // MAD tmp.x, src, 1/(2*PI), 0.75 + // FRC tmp.x, tmp.x + // MAD tmp.z, tmp.x, 2*PI, -PI + emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + emit1(t->Program, OPCODE_FRC, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + sin_approx(t, inst->DstReg, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + constants); + } else if (inst->Opcode == OPCODE_SIN) { + emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); + emit1(t->Program, OPCODE_FRC, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + sin_approx(t, inst->DstReg, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + constants); + } else { + emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); + emit1(t->Program, OPCODE_FRC, dstregtmpmask(tempreg, WRITEMASK_XY), + srcreg(PROGRAM_TEMPORARY, tempreg)); + emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_XY), + srcreg(PROGRAM_TEMPORARY, tempreg), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + struct prog_dst_register dst = inst->DstReg; + + dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X; + sin_approx(t, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + constants); + + dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y; + sin_approx(t, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + constants); + } + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/radeon_program_alu.h index 858c5ed0b8c..3fe6153fd82 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.h +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.h @@ -35,4 +35,9 @@ GLboolean radeonTransformALU( struct prog_instruction*, void*); +GLboolean radeonTransformTrigSimple( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + #endif /* __RADEON_PROGRAM_ALU_H_ */ -- 2.30.2