static struct rc_instruction *emit1(
struct radeon_compiler * c, struct rc_instruction * after,
- rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
- struct rc_src_register SrcReg)
+ rc_opcode Opcode, struct rc_sub_instruction * base,
+ struct rc_dst_register DstReg, struct rc_src_register SrcReg)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+ if (base) {
+ memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
+ }
+
fpi->U.I.Opcode = Opcode;
- fpi->U.I.SaturateMode = Saturate;
fpi->U.I.DstReg = DstReg;
fpi->U.I.SrcReg[0] = SrcReg;
return fpi;
static struct rc_instruction *emit2(
struct radeon_compiler * c, struct rc_instruction * after,
- rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ rc_opcode Opcode, struct rc_sub_instruction * base,
+ struct rc_dst_register DstReg,
struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+ if (base) {
+ memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
+ }
+
fpi->U.I.Opcode = Opcode;
- fpi->U.I.SaturateMode = Saturate;
fpi->U.I.DstReg = DstReg;
fpi->U.I.SrcReg[0] = SrcReg0;
fpi->U.I.SrcReg[1] = SrcReg1;
static struct rc_instruction *emit3(
struct radeon_compiler * c, struct rc_instruction * after,
- rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ rc_opcode Opcode, struct rc_sub_instruction * base,
+ struct rc_dst_register DstReg,
struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
struct rc_src_register SrcReg2)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+ if (base) {
+ memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
+ }
+
fpi->U.I.Opcode = Opcode;
- fpi->U.I.SaturateMode = Saturate;
fpi->U.I.DstReg = DstReg;
fpi->U.I.SrcReg[0] = SrcReg0;
fpi->U.I.SrcReg[1] = SrcReg1;
struct rc_src_register src = inst->U.I.SrcReg[0];
src.Abs = 1;
src.Negate = RC_MASK_NONE;
- emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
+ emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, src);
rc_remove_instruction(inst);
}
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0]));
- emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg,
inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index));
rc_remove_instruction(inst);
}
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst,
inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]);
- emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit2(c, inst->Prev, RC_OPCODE_MAX, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]);
rc_remove_instruction(inst);
}
src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
src1.Swizzle &= ~(63 << (3 * 2));
src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
- emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+ emit2(c, inst->Prev, RC_OPCODE_DP3, &inst->U.I, inst->U.I.DstReg, src0, src1);
rc_remove_instruction(inst);
}
src0.Negate &= ~RC_MASK_W;
src0.Swizzle &= ~(7 << (3 * 3));
src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
- emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
rc_remove_instruction(inst);
}
static void transform_DST(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit2(c, inst->Prev, RC_OPCODE_MUL, &inst->U.I, inst->U.I.DstReg,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
rc_remove_instruction(inst);
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]);
- emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg,
inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
rc_remove_instruction(inst);
}
+static void transform_TRUNC(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* Definition of trunc:
+ * trunc(x) = (abs(x) - fract(abs(x))) * sgn(x)
+ *
+ * The multiplication by sgn(x) can be simplified using CMP:
+ * y * sgn(x) = (x < 0 ? -y : y)
+ */
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, absolute(inst->U.I.SrcReg[0]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, absolute(inst->U.I.SrcReg[0]),
+ negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0],
+ negate(srcreg(RC_FILE_TEMPORARY, dst.Index)), srcreg(RC_FILE_TEMPORARY, dst.Index));
+ rc_remove_instruction(inst);
+}
+
/**
* Definition of LIT (from ARB_fragment_program):
*
swizzle_wwww(srctemp));
/* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I,
dstregtmpmask(temp, RC_MASK_Z),
negate(swizzle_xxxx(srctemp)),
swizzle_wwww(srctemp),
builtin_zero);
/* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
- emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
+ emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I,
dstregtmpmask(temp, RC_MASK_XYW),
swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
dst,
inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
- emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
+ emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I,
inst->U.I.DstReg,
inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]);
emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
- emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
+ emit1(c, inst->Prev, RC_OPCODE_EX2, &inst->U.I, inst->U.I.DstReg, tempsrc);
rc_remove_instruction(inst);
}
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one);
rc_remove_instruction(inst);
static void transform_SFL(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
+ emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, builtin_zero);
rc_remove_instruction(inst);
}
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
rc_remove_instruction(inst);
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
rc_remove_instruction(inst);
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
rc_remove_instruction(inst);
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
rc_remove_instruction(inst);
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero);
rc_remove_instruction(inst);
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
- emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I, inst->U.I.DstReg,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
case RC_OPCODE_SSG: transform_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_TRUNC: transform_TRUNC(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
return 0;
src1.Negate &= ~RC_MASK_W;
src1.Swizzle &= ~(7 << (3 * 3));
src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
- emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, src1);
rc_remove_instruction(inst);
}
rc_remove_instruction(inst);
}
+static void transform_vertex_TRUNC(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_instruction *next = inst->Next;
+
+ /* next->Prev is removed after each transformation and replaced
+ * by a new instruction. */
+ transform_TRUNC(c, next->Prev);
+ transform_r300_vertex_CMP(c, next->Prev);
+}
+
/**
* For use with rc_local_transform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_TRUNC: transform_vertex_TRUNC(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
return 0;
unsigned srctmp)
{
if (inst->U.I.Opcode == RC_OPCODE_COS) {
- emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, inst->U.I.DstReg,
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
- emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+ emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I,
inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
} else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
struct rc_dst_register moddst = inst->U.I.DstReg;
if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
moddst.WriteMask = RC_MASK_X;
- emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+ emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, moddst,
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
}
if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
moddst.WriteMask = RC_MASK_Y;
- emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+ emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I, moddst,
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
}
}
/**
* IF Temp[0].x -> IF Temp[0].x
* ... -> ...
- * KILP -> KIL -abs(Temp[0].x)
+ * KILL -> KIL -abs(Temp[0].x)
* ... -> ...
* ENDIF -> ENDIF
*
* === OR ===
*
* IF Temp[0].x -\
- * KILP - > KIL -abs(Temp[0].x)
+ * KILL - > KIL -abs(Temp[0].x)
* ENDIF -/
*
* === OR ===
* ... -> ...
* ELSE -> ELSE
* ... -> ...
- * KILP -> KIL -abs(Temp[0].x)
+ * KILL -> KIL -abs(Temp[0].x)
* ... -> ...
* ENDIF -> ENDIF
*
* === OR ===
*
- * KILP -> KIL -none.1111
+ * KILL -> KIL -none.1111
*
* This needs to be done in its own pass, because it might modify the
- * instructions before and after KILP.
+ * instructions before and after KILL.
*/
-void rc_transform_KILP(struct radeon_compiler * c, void *user)
+void rc_transform_KILL(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst;
for (inst = c->Program.Instructions.Next;
}
}
}
+
+int rc_force_output_alpha_to_one(struct radeon_compiler *c,
+ struct rc_instruction *inst, void *data)
+{
+ struct r300_fragment_program_compiler *fragc = (struct r300_fragment_program_compiler*)c;
+ const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned tmp;
+
+ if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT ||
+ inst->U.I.DstReg.Index == fragc->OutputDepth)
+ return 1;
+
+ tmp = rc_find_free_temporary(c);
+
+ /* Insert MOV after inst, set alpha to 1. */
+ emit1(c, inst, RC_OPCODE_MOV, 0, inst->U.I.DstReg,
+ srcregswz(RC_FILE_TEMPORARY, tmp, RC_SWIZZLE_XYZ1));
+
+ /* Re-route the destination of inst to the source of mov. */
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = tmp;
+
+ /* Move the saturate output modifier to the MOV instruction
+ * (for better copy propagation). */
+ inst->Next->U.I.SaturateMode = inst->U.I.SaturateMode;
+ inst->U.I.SaturateMode = RC_SATURATE_NONE;
+ return 1;
+}