rc_remove_instruction(inst);
}
+static void transform_CEIL(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* Assuming:
+ * ceil(x) = -floor(-x)
+ *
+ * After inlining floor:
+ * ceil(x) = -(-x-frac(-x))
+ *
+ * After simplification:
+ * ceil(x) = x+frac(-x)
+ */
+
+ int tempreg = rc_find_free_temporary(c);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg));
+ rc_remove_instruction(inst);
+}
+
static void transform_DP3(struct radeon_compiler* c,
struct rc_instruction* inst)
{
temp = inst->U.I.DstReg.Index;
srctemp = srcreg(RC_FILE_TEMPORARY, temp);
- // tmp.x = max(0.0, Src.x);
- // tmp.y = max(0.0, Src.y);
- // tmp.w = clamp(Src.z, -128+eps, 128-eps);
+ /* tmp.x = max(0.0, Src.x); */
+ /* tmp.y = max(0.0, Src.y); */
+ /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */
emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
dstregtmpmask(temp, RC_MASK_XYW),
inst->U.I.SrcReg[0],
swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));
- // tmp.w = Pow(tmp.y, tmp.w)
+ /* tmp.w = Pow(tmp.y, tmp.w) */
emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
dstregtmpmask(temp, RC_MASK_W),
swizzle(srctemp, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y));
dstregtmpmask(temp, RC_MASK_W),
swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W));
- // tmp.z = (tmp.x > 0) ? tmp.w : 0.0
+ /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
dstregtmpmask(temp, RC_MASK_Z),
negate(swizzle(srctemp, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)),
swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
builtin_zero);
- // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
+ /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
dstregtmpmask(temp, RC_MASK_XYW),
swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
* no userData necessary.
*
* Eliminates the following ALU instructions:
- * ABS, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
+ * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
* using:
* MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
*
{
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
+ case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_DST: transform_DST(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
}
+static void transform_r300_vertex_CMP(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* There is no decent CMP available, so let's rig one up.
+ * CMP is defined as dst = src0 < 0.0 ? src1 : src2
+ * The following sequence consumes two temps and two extra slots
+ * (the second temp and the second slot is consumed by transform_LRP),
+ * but should be equivalent:
+ *
+ * SLT tmp0, src0, 0.0
+ * LRP dst, tmp0, src1, src2
+ *
+ * Yes, I know, I'm a mad scientist. ~ C. & M. */
+ int tempreg0 = rc_find_free_temporary(c);
+
+ /* SLT tmp0, src0, 0.0 */
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg0),
+ inst->U.I.SrcReg[0], builtin_zero);
+
+ /* LRP dst, tmp0, src1, src2 */
+ transform_LRP(c,
+ emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
+
+ rc_remove_instruction(inst);
+}
+
/**
* For use with radeonLocalTransform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
{
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
+ case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
{
static const float SinCosConsts[2][4] = {
{
- 1.273239545, // 4/PI
- -0.405284735, // -4/(PI*PI)
- 3.141592654, // PI
- 0.2225 // weight
+ 1.273239545, /* 4/PI */
+ -0.405284735, /* -4/(PI*PI) */
+ 3.141592654, /* PI */
+ 0.2225 /* weight */
},
{
0.75,
0.5,
- 0.159154943, // 1/(2*PI)
- 6.283185307 // 2*PI
+ 0.159154943, /* 1/(2*PI) */
+ 6.283185307 /* 2*PI */
}
};
int i;
sincos_constants(c, constants);
if (inst->U.I.Opcode == RC_OPCODE_COS) {
- // MAD tmp.x, src, 1/(2*PI), 0.75
- // FRC tmp.x, tmp.x
- // MAD tmp.z, tmp.x, 2*PI, -PI
+ /* MAD tmp.x, src, 1/(2*PI), 0.75 */
+ /* FRC tmp.x, tmp.x */
+ /* MAD tmp.z, tmp.x, 2*PI, -PI */
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z),