return swizzled;
}
-static struct rc_src_register scalar(struct rc_src_register reg)
+static struct rc_src_register swizzle_smear(struct rc_src_register reg,
+ rc_swizzle x)
{
- return swizzle(reg, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X);
+ return swizzle(reg, x, x, x, x);
+}
+
+static struct rc_src_register swizzle_xxxx(struct rc_src_register reg)
+{
+ return swizzle_smear(reg, RC_SWIZZLE_X);
+}
+
+static struct rc_src_register swizzle_yyyy(struct rc_src_register reg)
+{
+ return swizzle_smear(reg, RC_SWIZZLE_Y);
+}
+
+static struct rc_src_register swizzle_zzzz(struct rc_src_register reg)
+{
+ return swizzle_smear(reg, RC_SWIZZLE_Z);
+}
+
+static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
+{
+ return swizzle_smear(reg, RC_SWIZZLE_W);
}
static void transform_ABS(struct radeon_compiler* c,
RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3));
emit2(c, inst->Prev, RC_OPCODE_MIN, 0,
dstregtmpmask(temp, RC_MASK_Z),
- swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ swizzle_wwww(srctemp),
negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));
/* tmp.w = Pow(tmp.y, tmp.w) */
emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
dstregtmpmask(temp, RC_MASK_W),
- swizzle(srctemp, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y));
+ swizzle_yyyy(srctemp));
emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
dstregtmpmask(temp, RC_MASK_W),
- swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
- swizzle(srctemp, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z));
+ swizzle_wwww(srctemp),
+ swizzle_zzzz(srctemp));
emit1(c, inst->Prev, RC_OPCODE_EX2, 0,
dstregtmpmask(temp, RC_MASK_W),
- swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W));
+ swizzle_wwww(srctemp));
/* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
dstregtmpmask(temp, RC_MASK_Z),
- negate(swizzle(srctemp, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)),
- swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ negate(swizzle_xxxx(srctemp)),
+ swizzle_wwww(srctemp),
builtin_zero);
/* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
tempdst.WriteMask = RC_MASK_W;
tempsrc.Swizzle = RC_SWIZZLE_WWWW;
- emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, scalar(inst->U.I.SrcReg[0]));
- emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->U.I.SrcReg[1]));
+ emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
rc_remove_instruction(inst);
rc_remove_instruction(inst);
}
+static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+ unsigned constant_swizzle;
+ int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
+ 0.0000000000000000001,
+ &constant_swizzle);
+
+ /* MOV dst, src */
+ emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg),
+ inst->U.I.SrcReg[0]);
+
+ /* MAX dst.z, src, 0.00...001 */
+ emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+ dstregtmpmask(tempreg, RC_MASK_Y),
+ srcreg(RC_FILE_TEMPORARY, tempreg),
+ srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+
+ inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg);
+}
+
+static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
+ struct rc_instruction *inst)
+{
+ /* x = y <==> x >= y && y >= x */
+ int tmp = rc_find_free_temporary(c);
+
+ /* x <= y */
+ emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+ dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+ inst->U.I.SrcReg[0],
+ inst->U.I.SrcReg[1]);
+
+ /* y <= x */
+ emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+ inst->U.I.DstReg,
+ inst->U.I.SrcReg[1],
+ inst->U.I.SrcReg[0]);
+
+ /* x && y = x * y */
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tmp),
+ srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SNE(struct radeon_compiler *c,
+ struct rc_instruction *inst)
+{
+ /* x != y <==> x < y || y < x */
+ int tmp = rc_find_free_temporary(c);
+
+ /* x < y */
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+ inst->U.I.SrcReg[0],
+ inst->U.I.SrcReg[1]);
+
+ /* y < x */
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ inst->U.I.DstReg,
+ inst->U.I.SrcReg[1],
+ inst->U.I.SrcReg[0]);
+
+ /* x || y = max(x, y) */
+ emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tmp),
+ srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SGT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* x > y <==> -x < -y */
+ inst->U.I.Opcode = RC_OPCODE_SLT;
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_SLE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* x <= y <==> -x >= -y */
+ inst->U.I.Opcode = RC_OPCODE_SGE;
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
/**
* For use with radeonLocalTransform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+ case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+ case RC_OPCODE_SEQ:
+ if (!c->is_r500) {
+ transform_r300_vertex_SEQ(c, inst);
+ return 1;
+ }
+ return 0;
+ case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+ case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1;
+ case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1;
+ case RC_OPCODE_SNE:
+ if (!c->is_r500) {
+ transform_r300_vertex_SNE(c, inst);
+ return 1;
+ }
+ return 0;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
unsigned int tempreg = rc_find_free_temporary(c);
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY),
- swizzle(src, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ swizzle_xxxx(src),
srcreg(RC_FILE_CONSTANT, constants[0]));
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X),
- swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y),
- absolute(swizzle(src, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)),
- swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X));
+ swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ absolute(swizzle_xxxx(src)),
+ swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y),
- swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
- absolute(swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)),
- negate(swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)));
+ swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))),
+ negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))));
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst,
- swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y),
- swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
- swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X));
+ swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])),
+ swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
}
/**
/* FRC tmp.x, tmp.x */
/* MAD tmp.z, tmp.x, 2*PI, -PI */
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
- swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
- swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z),
- swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X));
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+ swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1])));
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
- swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W));
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
- swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
- swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
- negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z)));
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+ negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
sin_approx(c, inst, inst->U.I.DstReg,
- swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
constants);
} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
- swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
- swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z),
- swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y));
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+ swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1])));
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
- swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W));
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
- swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
- swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
- negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z)));
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+ negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
sin_approx(c, inst, inst->U.I.DstReg,
- swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
constants);
} else {
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
- swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
- swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z),
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W));
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY),
srcreg(RC_FILE_TEMPORARY, tempreg));
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
srcreg(RC_FILE_TEMPORARY, tempreg),
- swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
- negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z)));
+ swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+ negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
struct rc_dst_register dst = inst->U.I.DstReg;
dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
sin_approx(c, inst, dst,
- swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
constants);
dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y;
sin_approx(c, inst, dst,
- swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y),
+ swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
constants);
}
return 1;
}
+static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
+ struct rc_instruction *inst,
+ unsigned srctmp)
+{
+ if (inst->U.I.Opcode == RC_OPCODE_COS) {
+ emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+ } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+ emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+ inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+ } else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
+ struct rc_dst_register moddst = inst->U.I.DstReg;
+
+ if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
+ moddst.WriteMask = RC_MASK_X;
+ emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+ srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+ }
+ if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
+ moddst.WriteMask = RC_MASK_Y;
+ emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+ srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+ }
+ }
+
+ rc_remove_instruction(inst);
+}
+
/**
* Transform the trigonometric functions COS, SIN, and SCS
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W),
- swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
srcreg(RC_FILE_TEMPORARY, temp));
- if (inst->U.I.Opcode == RC_OPCODE_COS) {
- emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
- srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
- } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
- emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
- inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
- } else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
- struct rc_dst_register moddst = inst->U.I.DstReg;
+ r300_transform_SIN_COS_SCS(c, inst, temp);
+ return 1;
+}
- if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
- moddst.WriteMask = RC_MASK_X;
- emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
- srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
- }
- if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
- moddst.WriteMask = RC_MASK_Y;
- emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
- srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
- }
- }
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * so that the input to COS and SIN is always in the range [-PI, PI].
+ * SCS is replaced by one COS and one SIN instruction.
+ */
+int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
+ struct rc_instruction *inst,
+ void *unused)
+{
+ if (inst->U.I.Opcode != RC_OPCODE_COS &&
+ inst->U.I.Opcode != RC_OPCODE_SIN &&
+ inst->U.I.Opcode != RC_OPCODE_SCS)
+ return 0;
- rc_remove_instruction(inst);
+ /* Repeat x in the range [-PI, PI]:
+ *
+ * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
+ */
+
+ static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
+ unsigned int temp;
+ unsigned int constant;
+ temp = rc_find_free_temporary(c);
+ constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
+
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
+ srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+ srcreg(RC_FILE_TEMPORARY, temp));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+ srcreg(RC_FILE_TEMPORARY, temp),
+ srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
+ srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));
+
+ r300_transform_SIN_COS_SCS(c, inst, temp);
return 1;
}
if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
return 0;
- inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE);
+ inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
return 1;
}
+
+/**
+ * IF Temp[0].x -\
+ * KILP - > KIL -abs(Temp[0].x)
+ * ENDIF -/
+ *
+ * This needs to be done in its own pass, because it modifies the instructions
+ * before and after KILP.
+ */
+void radeonTransformKILP(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst;
+ for (inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+
+ if (inst->U.I.Opcode != RC_OPCODE_KILP
+ || inst->Prev->U.I.Opcode != RC_OPCODE_IF
+ || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+ continue;
+ }
+ inst->U.I.Opcode = RC_OPCODE_KIL;
+ inst->U.I.SrcReg[0] = negate(absolute(inst->Prev->U.I.SrcReg[0]));
+
+ /* Remove IF */
+ rc_remove_instruction(inst->Prev);
+ /* Remove ENDIF */
+ rc_remove_instruction(inst->Next);
+ }
+}