static struct rc_instruction *emit1(
struct radeon_compiler * c, struct rc_instruction * after,
- rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
- struct rc_src_register SrcReg)
+ rc_opcode Opcode, struct rc_sub_instruction * base,
+ struct rc_dst_register DstReg, struct rc_src_register SrcReg)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+ if (base) {
+ memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
+ }
+
fpi->U.I.Opcode = Opcode;
- fpi->U.I.SaturateMode = Saturate;
fpi->U.I.DstReg = DstReg;
fpi->U.I.SrcReg[0] = SrcReg;
return fpi;
static struct rc_instruction *emit2(
struct radeon_compiler * c, struct rc_instruction * after,
- rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ rc_opcode Opcode, struct rc_sub_instruction * base,
+ struct rc_dst_register DstReg,
struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+ if (base) {
+ memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
+ }
+
fpi->U.I.Opcode = Opcode;
- fpi->U.I.SaturateMode = Saturate;
fpi->U.I.DstReg = DstReg;
fpi->U.I.SrcReg[0] = SrcReg0;
fpi->U.I.SrcReg[1] = SrcReg1;
static struct rc_instruction *emit3(
struct radeon_compiler * c, struct rc_instruction * after,
- rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ rc_opcode Opcode, struct rc_sub_instruction * base,
+ struct rc_dst_register DstReg,
struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
struct rc_src_register SrcReg2)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+ if (base) {
+ memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
+ }
+
fpi->U.I.Opcode = Opcode;
- fpi->U.I.SaturateMode = Saturate;
fpi->U.I.DstReg = DstReg;
fpi->U.I.SrcReg[0] = SrcReg0;
fpi->U.I.SrcReg[1] = SrcReg1;
static struct rc_dst_register dstregtmpmask(int index, int mask)
{
- struct rc_dst_register dst = {0};
+ struct rc_dst_register dst = {0, 0, 0};
dst.File = RC_FILE_TEMPORARY;
dst.Index = index;
dst.WriteMask = mask;
.Index = 0,
.Swizzle = RC_SWIZZLE_1111
};
+
+static const struct rc_src_register builtin_half = {
+ .File = RC_FILE_NONE,
+ .Index = 0,
+ .Swizzle = RC_SWIZZLE_HHHH
+};
+
static const struct rc_src_register srcreg_undefined = {
.File = RC_FILE_NONE,
.Index = 0,
struct rc_src_register src = inst->U.I.SrcReg[0];
src.Abs = 1;
src.Negate = RC_MASK_NONE;
- emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
+ emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, src);
rc_remove_instruction(inst);
}
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0]));
- emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg,
inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index));
rc_remove_instruction(inst);
}
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst,
inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]);
- emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit2(c, inst->Prev, RC_OPCODE_MAX, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]);
rc_remove_instruction(inst);
}
src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
src1.Swizzle &= ~(63 << (3 * 2));
src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
- emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+ emit2(c, inst->Prev, RC_OPCODE_DP3, &inst->U.I, inst->U.I.DstReg, src0, src1);
rc_remove_instruction(inst);
}
src0.Negate &= ~RC_MASK_W;
src0.Swizzle &= ~(7 << (3 * 3));
src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
- emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
rc_remove_instruction(inst);
}
static void transform_DST(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit2(c, inst->Prev, RC_OPCODE_MUL, &inst->U.I, inst->U.I.DstReg,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
rc_remove_instruction(inst);
{
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]);
- emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg,
inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
rc_remove_instruction(inst);
}
+static void transform_TRUNC(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* Definition of trunc:
+ * trunc(x) = (abs(x) - fract(abs(x))) * sgn(x)
+ *
+ * The multiplication by sgn(x) can be simplified using CMP:
+ * y * sgn(x) = (x < 0 ? -y : y)
+ */
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, absolute(inst->U.I.SrcReg[0]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, absolute(inst->U.I.SrcReg[0]),
+ negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg, inst->U.I.SrcReg[0],
+ negate(srcreg(RC_FILE_TEMPORARY, dst.Index)), srcreg(RC_FILE_TEMPORARY, dst.Index));
+ rc_remove_instruction(inst);
+}
+
/**
* Definition of LIT (from ARB_fragment_program):
*
swizzle_wwww(srctemp));
/* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I,
dstregtmpmask(temp, RC_MASK_Z),
negate(swizzle_xxxx(srctemp)),
swizzle_wwww(srctemp),
builtin_zero);
/* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
- emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
+ emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I,
dstregtmpmask(temp, RC_MASK_XYW),
swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
dst,
inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
- emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
+ emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I,
inst->U.I.DstReg,
inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]);
emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
- emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
+ emit1(c, inst->Prev, RC_OPCODE_EX2, &inst->U.I, inst->U.I.DstReg, tempsrc);
+
+ rc_remove_instruction(inst);
+}
+/* dst = ROUND(src) :
+ * add = src + .5
+ * frac = FRC(add)
+ * dst = add - frac
+ *
+ * According to the GLSL spec, the implementor can decide which way to round
+ * when the fraction is .5. We round down for .5.
+ *
+ */
+static void transform_ROUND(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ unsigned int mask = inst->U.I.DstReg.WriteMask;
+ unsigned int frac_index, add_index;
+ struct rc_dst_register frac_dst, add_dst;
+ struct rc_src_register frac_src, add_src;
+
+ /* add = src + .5 */
+ add_index = rc_find_free_temporary(c);
+ add_dst = dstregtmpmask(add_index, mask);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, add_dst, inst->U.I.SrcReg[0],
+ builtin_half);
+ add_src = srcreg(RC_FILE_TEMPORARY, add_dst.Index);
+
+
+ /* frac = FRC(add) */
+ frac_index = rc_find_free_temporary(c);
+ frac_dst = dstregtmpmask(frac_index, mask);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, add_src);
+ frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index);
+
+ /* dst = add - frac */
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg,
+ add_src, negate(frac_src));
rc_remove_instruction(inst);
}
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one);
rc_remove_instruction(inst);
static void transform_SFL(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
+ emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, builtin_zero);
rc_remove_instruction(inst);
}
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
rc_remove_instruction(inst);
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
rc_remove_instruction(inst);
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
rc_remove_instruction(inst);
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
rc_remove_instruction(inst);
struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
- emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero);
rc_remove_instruction(inst);
emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
- emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I, inst->U.I.DstReg,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
case RC_OPCODE_POW: transform_POW(c, inst); return 1;
+ case RC_OPCODE_ROUND: transform_ROUND(c, inst); return 1;
case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
case RC_OPCODE_SSG: transform_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_TRUNC: transform_TRUNC(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
return 0;
src1.Negate &= ~RC_MASK_W;
src1.Swizzle &= ~(7 << (3 * 3));
src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
- emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, src1);
rc_remove_instruction(inst);
}
rc_remove_instruction(inst);
}
+static void transform_vertex_TRUNC(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_instruction *next = inst->Next;
+
+ /* next->Prev is removed after each transformation and replaced
+ * by a new instruction. */
+ transform_TRUNC(c, next->Prev);
+ transform_r300_vertex_CMP(c, next->Prev);
+}
+
/**
* For use with rc_local_transform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_TRUNC: transform_vertex_TRUNC(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
return 0;
unsigned srctmp)
{
if (inst->U.I.Opcode == RC_OPCODE_COS) {
- emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, inst->U.I.DstReg,
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
- emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+ emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I,
inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
} else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
struct rc_dst_register moddst = inst->U.I.DstReg;
if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
moddst.WriteMask = RC_MASK_X;
- emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+ emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, moddst,
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
}
if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
moddst.WriteMask = RC_MASK_Y;
- emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+ emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I, moddst,
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
}
}
}
/**
+ * IF Temp[0].x -> IF Temp[0].x
+ * ... -> ...
+ * KILP -> KIL -abs(Temp[0].x)
+ * ... -> ...
+ * ENDIF -> ENDIF
+ *
+ * === OR ===
+ *
* IF Temp[0].x -\
* KILP - > KIL -abs(Temp[0].x)
* ENDIF -/
*
- * This needs to be done in its own pass, because it modifies the instructions
- * before and after KILP.
+ * === OR ===
+ *
+ * IF Temp[0].x -> IF Temp[0].x
+ * ... -> ...
+ * ELSE -> ELSE
+ * ... -> ...
+ * KILP -> KIL -abs(Temp[0].x)
+ * ... -> ...
+ * ENDIF -> ENDIF
+ *
+ * === OR ===
+ *
+ * KILP -> KIL -none.1111
+ *
+ * This needs to be done in its own pass, because it might modify the
+ * instructions before and after KILP.
*/
void rc_transform_KILP(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst;
for (inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
+ struct rc_instruction * if_inst;
+ unsigned in_if = 0;
if (inst->U.I.Opcode != RC_OPCODE_KILP)
continue;
+ for (if_inst = inst->Prev; if_inst != &c->Program.Instructions;
+ if_inst = if_inst->Prev) {
+
+ if (if_inst->U.I.Opcode == RC_OPCODE_IF) {
+ in_if = 1;
+ break;
+ }
+ }
+
inst->U.I.Opcode = RC_OPCODE_KIL;
- if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
- || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+ if (!in_if) {
inst->U.I.SrcReg[0] = negate(builtin_one);
} else {
-
+ /* This should work even if the KILP is inside the ELSE
+ * block, because -0.0 is considered negative. */
inst->U.I.SrcReg[0] =
- negate(absolute(inst->Prev->U.I.SrcReg[0]));
- /* Remove IF */
- rc_remove_instruction(inst->Prev);
- /* Remove ENDIF */
- rc_remove_instruction(inst->Next);
+ negate(absolute(if_inst->U.I.SrcReg[0]));
+
+ if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
+ && inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+
+ /* Optimize the special case:
+ * IF Temp[0].x
+ * KILP
+ * ENDIF
+ */
+
+ /* Remove IF */
+ rc_remove_instruction(inst->Prev);
+ /* Remove ENDIF */
+ rc_remove_instruction(inst->Next);
+ }
}
}
}