+static void
+exec_scs(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
+ union tgsi_exec_channel arg;
+ union tgsi_exec_channel result;
+
+ fetch_source(mach, &arg, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ micro_cos(&result, &arg);
+ store_dest(mach, &result, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ micro_sin(&result, &arg);
+ store_dest(mach, &result, &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ store_dest(mach, &ZeroVec, &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_x2d(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[4];
+ union tgsi_exec_channel d[2];
+
+ fetch_source(mach, &r[0], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) {
+ fetch_source(mach, &r[2], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[2], &r[2], &r[0]);
+ fetch_source(mach, &r[3], &inst->Src[2], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[3], &r[3], &r[1]);
+ micro_add(&r[2], &r[2], &r[3]);
+ fetch_source(mach, &r[3], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_add(&d[0], &r[2], &r[3]);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) {
+ fetch_source(mach, &r[2], &inst->Src[2], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[2], &r[2], &r[0]);
+ fetch_source(mach, &r[3], &inst->Src[2], CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[3], &r[3], &r[1]);
+ micro_add(&r[2], &r[2], &r[3]);
+ fetch_source(mach, &r[3], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_add(&d[1], &r[2], &r[3]);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_rfl(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[9];
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
+ /* r0 = dp3(src0, src0) */
+ fetch_source(mach, &r[2], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[0], &r[2], &r[2]);
+ fetch_source(mach, &r[4], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[8], &r[4], &r[4]);
+ micro_add(&r[0], &r[0], &r[8]);
+ fetch_source(mach, &r[6], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[8], &r[6], &r[6]);
+ micro_add(&r[0], &r[0], &r[8]);
+
+ /* r1 = dp3(src0, src1) */
+ fetch_source(mach, &r[3], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[1], &r[2], &r[3]);
+ fetch_source(mach, &r[5], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[8], &r[4], &r[5]);
+ micro_add(&r[1], &r[1], &r[8]);
+ fetch_source(mach, &r[7], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&r[8], &r[6], &r[7]);
+ micro_add(&r[1], &r[1], &r[8]);
+
+ /* r1 = 2 * r1 / r0 */
+ micro_add(&r[1], &r[1], &r[1]);
+ micro_div(&r[1], &r[1], &r[0]);
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ micro_mul(&r[2], &r[2], &r[1]);
+ micro_sub(&r[2], &r[2], &r[3]);
+ store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ micro_mul(&r[4], &r[4], &r[1]);
+ micro_sub(&r[4], &r[4], &r[5]);
+ store_dest(mach, &r[4], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ micro_mul(&r[6], &r[6], &r[1]);
+ micro_sub(&r[6], &r[6], &r[7]);
+ store_dest(mach, &r[6], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_xpd(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[6];
+ union tgsi_exec_channel d[3];
+
+ fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &r[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+
+ micro_mul(&r[2], &r[0], &r[1]);
+
+ fetch_source(mach, &r[3], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &r[4], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+
+ micro_mul(&r[5], &r[3], &r[4] );
+ micro_sub(&d[CHAN_X], &r[2], &r[5]);
+
+ fetch_source(mach, &r[2], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+
+ micro_mul(&r[3], &r[3], &r[2]);
+
+ fetch_source(mach, &r[5], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+
+ micro_mul(&r[1], &r[1], &r[5]);
+ micro_sub(&d[CHAN_Y], &r[3], &r[1]);
+
+ micro_mul(&r[5], &r[5], &r[4]);
+ micro_mul(&r[0], &r[0], &r[2]);
+ micro_sub(&d[CHAN_Z], &r[5], &r[0]);
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest(mach, &d[CHAN_X], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_dst(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[2];
+ union tgsi_exec_channel d[4];
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&d[CHAN_Y], &r[0], &r[1]);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ fetch_source(mach, &d[CHAN_Z], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ fetch_source(mach, &d[CHAN_W], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &d[CHAN_W], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_log(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[3];
+
+ fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */
+ micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */
+ micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */
+ micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
+ store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ store_dest(mach, &r[1], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_exp(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[3];
+
+ fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */
+ store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
+ store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */
+ store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+static void
+exec_lit(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel r[3];
+ union tgsi_exec_channel d[3];
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
+ fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+ micro_max(&d[CHAN_Y], &r[0], &ZeroVec);
+ store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ }
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
+ fetch_source(mach, &r[1], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ micro_max(&r[1], &r[1], &ZeroVec);
+
+ fetch_source(mach, &r[2], &inst->Src[0], CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ micro_min(&r[2], &r[2], &P128Vec);
+ micro_max(&r[2], &r[2], &M128Vec);
+ micro_pow(&r[1], &r[1], &r[2]);
+ micro_lt(&d[CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
+ store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+