boolean reg_is_itemp = FALSE;
uint sign_op;
- assert(swizzle >= 0);
- assert(swizzle <= 3);
+ assert(swizzle >= TGSI_SWIZZLE_X);
+ assert(swizzle <= TGSI_EXTSWIZZLE_ONE);
channel = swizzle;
break;
case TGSI_FILE_INPUT:
{
- /* offset is measured in quadwords, not bytes */
- int offset = src->SrcRegister.Index * 4 + channel;
- reg = get_itemp(gen);
- reg_is_itemp = TRUE;
- /* Load: reg = memory[(machine_reg) + offset] */
- spe_lqd(gen->f, reg, gen->inputs_reg, offset);
+ if(channel == TGSI_EXTSWIZZLE_ONE)
+ {
+ /* Load const one float and early out */
+ reg = get_const_one_reg(gen);
+ return reg;
+ }
+ else if(channel == TGSI_EXTSWIZZLE_ZERO)
+ {
+ /* Load const zero float and early out */
+ reg = get_itemp(gen);
+ spe_xor(gen->f, reg, reg, reg);
+ return reg;
+ }
+ else
+ {
+ /* offset is measured in quadwords, not bytes */
+ int offset = src->SrcRegister.Index * 4 + channel;
+ reg = get_itemp(gen);
+ reg_is_itemp = TRUE;
+ /* Load: reg = memory[(machine_reg) + offset] */
+ spe_lqd(gen->f, reg, gen->inputs_reg, offset);
+ }
}
break;
case TGSI_FILE_IMMEDIATE:
return true;
}
-
-
/**
* Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
* becomes (up to) four SPU "fa" instructions because we're doing SOA
int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, CHAN_X, &inst->FullDstRegisters[0]);
- /* d = x * x */
- spe_fm(gen->f, d_reg, s1_reg, s2_reg);
+ int tmp_reg = get_itemp(gen);
+ /* t = x0 * x1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
- /* d = y * y + d */
- spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
+ /* t = y0 * y1 + t */
+ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
- /* d = z * z + d */
- spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
+ /* t = z0 * z1 + t */
+ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
}
}
emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
{
int ch;
- spe_comment(gen->f, -4, "DP3:");
+ spe_comment(gen->f, -4, "DP4:");
int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, CHAN_X, &inst->FullDstRegisters[0]);
- /* d = x * x */
- spe_fm(gen->f, d_reg, s1_reg, s2_reg);
+ int tmp_reg = get_itemp(gen);
+ /* t = x0 * x1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
- /* d = y * y + d */
- spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
+ /* t = y0 * y1 + t */
+ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
- /* d = z * z + d */
- spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
+ /* t = z0 * z1 + t */
+ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
s1_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]);
s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
- /* d = w * w + d */
- spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
+ /* t = w0 * w1 + t */
+ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
}
}
int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
- int d_reg = get_dst_reg(gen, CHAN_X, &inst->FullDstRegisters[0]);
- /* d = x * x */
- spe_fm(gen->f, d_reg, s1_reg, s2_reg);
+ int tmp_reg = get_itemp(gen);
+
+ /* t = x0 * x1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
- /* d = y * y + d */
- spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
+ /* t = y0 * y1 + t */
+ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
- /* d = z * z + d */
- spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
+ /* t = z0 * z1 + t */
+ spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
- /* d = w + d */
- spe_fa(gen->f, d_reg, s2_reg, d_reg);
+ /* t = w1 + t */
+ spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
- store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
}
}
return true;
}
+/**
+ * Emit cross product. See emit_ADD for comments.
+ */
+static boolean
+emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ spe_comment(gen->f, -4, "XPD:");
+
+ int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ int tmp_reg = get_itemp(gen);
+
+ /* t = z0 * y1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ /* t = y0 * z1 - t */
+ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) {
+ store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]);
+ }
+
+ s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+ /* t = x0 * z1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ /* t = z0 * x1 - t */
+ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
+ store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]);
+ }
+
+ s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+ /* t = y0 * x1 */
+ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
+
+ s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
+ s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+ /* t = x0 * y1 - t */
+ spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
+
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
+ store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]);
+ }
+
+ free_itemps(gen);
+ return true;
+}
+
/**
* Emit set-if-greater-than.
* Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
return true;
}
+/**
+ * Emit trunc.
+ * Convert float to signed int
+ * Convert signed int to float
+ */
+static boolean
+emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ int ch;
+
+ spe_comment(gen->f, -4, "TRUNC:");
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+
+ /* Convert float to int */
+ spe_cflts(gen->f, d_reg, s1_reg, 0);
+
+ /* Convert int to float */
+ spe_csflt(gen->f, d_reg, d_reg, 0);
+
+ store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return true;
+}
+
/**
* Emit floor.
* If negative int subtract one
spe_comment(gen->f, -4, "FLR:");
+ int zero_reg = get_itemp(gen);
+ spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
+
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
int tmp_reg = get_itemp(gen);
/* If negative, subtract 1.0 */
- spe_xor(gen->f, tmp_reg, tmp_reg, tmp_reg);
- spe_fcgt(gen->f, d_reg, tmp_reg, s1_reg);
- spe_selb(gen->f, tmp_reg, tmp_reg, get_const_one_reg(gen), d_reg);
+ spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
+ spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), d_reg);
spe_fs(gen->f, d_reg, s1_reg, tmp_reg);
/* Convert float to int */
spe_comment(gen->f, -4, "FLR:");
+ int zero_reg = get_itemp(gen);
+ spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
+
for (ch = 0; ch < 4; ch++) {
if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
int tmp_reg = get_itemp(gen);
/* If negative, subtract 1.0 */
- spe_xor(gen->f, tmp_reg, tmp_reg, tmp_reg);
- spe_fcgt(gen->f, d_reg, tmp_reg, s1_reg);
- spe_selb(gen->f, tmp_reg, tmp_reg, get_const_one_reg(gen), d_reg);
+ spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
+ spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), d_reg);
spe_fs(gen->f, d_reg, s1_reg, tmp_reg);
/* Convert float to int */
{
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_MOV:
+ case TGSI_OPCODE_SWZ:
return emit_MOV(gen, inst);
case TGSI_OPCODE_MUL:
return emit_MUL(gen, inst);
return emit_DP4(gen, inst);
case TGSI_OPCODE_DPH:
return emit_DPH(gen, inst);
+ case TGSI_OPCODE_XPD:
+ return emit_XPD(gen, inst);
case TGSI_OPCODE_RCP:
return emit_RCP(gen, inst);
case TGSI_OPCODE_RSQ:
return emit_MAX(gen, inst);
case TGSI_OPCODE_MIN:
return emit_MIN(gen, inst);
+ case TGSI_OPCODE_TRUNC:
+ return emit_TRUNC(gen, inst);
case TGSI_OPCODE_FLR:
return emit_FLR(gen, inst);
case TGSI_OPCODE_FRC: