dst->f[3] = ceilf(src->f[3]);
}
+static void
+micro_clamp(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2)
+{
+ dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0];
+ dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1];
+ dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2];
+ dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3];
+}
+
+static void
+micro_cmp(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2)
+{
+ dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
+ dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
+ dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
+ dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
+}
+
+static void
+micro_cnd(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2)
+{
+ dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0];
+ dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1];
+ dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2];
+ dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3];
+}
+
static void
micro_cos(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
static void
micro_lrp(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2)
{
- dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0];
- dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1];
- dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2];
- dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3];
+ dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
+ dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
+ dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
+ dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
}
static void
micro_mad(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2)
{
- dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0];
- dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1];
- dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2];
- dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3];
+ dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
+ dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
+ dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
+ dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
}
static void
micro_rcp(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
+#if 0 /* for debugging */
+ assert(src->f[0] != 0.0f);
+ assert(src->f[1] != 0.0f);
+ assert(src->f[2] != 0.0f);
+ assert(src->f[3] != 0.0f);
+#endif
dst->f[0] = 1.0f / src->f[0];
dst->f[1] = 1.0f / src->f[1];
dst->f[2] = 1.0f / src->f[2];
micro_rsq(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
{
+#if 0 /* for debugging */
+ assert(src->f[0] != 0.0f);
+ assert(src->f[1] != 0.0f);
+ assert(src->f[2] != 0.0f);
+ assert(src->f[3] != 0.0f);
+#endif
dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
static void
micro_seq(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f;
- dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f;
- dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f;
- dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f;
+ dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
}
static void
micro_sge(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f;
- dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f;
- dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f;
- dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f;
+ dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
}
static void
static void
micro_sgt(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f;
- dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f;
- dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f;
- dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f;
+ dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
}
static void
static void
micro_sle(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f;
- dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f;
- dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f;
- dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f;
+ dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
}
static void
micro_slt(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f;
- dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f;
- dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f;
- dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f;
+ dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
}
static void
micro_sne(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f;
- dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f;
- dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f;
- dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f;
+ dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
+ dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
+ dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
+ dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
}
static void
static const union tgsi_exec_channel ZeroVec =
{ { 0.0, 0.0, 0.0, 0.0 } };
+static const union tgsi_exec_channel OneVec = {
+ {1.0f, 1.0f, 1.0f, 1.0f}
+};
-#define CHECK_INF_OR_NAN(chan) do {\
- assert(!util_is_inf_or_nan((chan)->f[0]));\
- assert(!util_is_inf_or_nan((chan)->f[1]));\
- assert(!util_is_inf_or_nan((chan)->f[2]));\
- assert(!util_is_inf_or_nan((chan)->f[3]));\
- } while (0)
+
+/**
+ * Assert that none of the float values in 'chan' are infinite or NaN.
+ * NaN and Inf may occur normally during program execution and should
+ * not lead to crashes, etc. But when debugging, it's helpful to catch
+ * them.
+ */
+static INLINE void
+check_inf_or_nan(const union tgsi_exec_channel *chan)
+{
+ assert(!util_is_inf_or_nan((chan)->f[0]));
+ assert(!util_is_inf_or_nan((chan)->f[1]));
+ assert(!util_is_inf_or_nan((chan)->f[2]));
+ assert(!util_is_inf_or_nan((chan)->f[3]));
+}
#ifdef DEBUG
}
static void
-micro_add(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
+micro_add(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
dst->f[0] = src0->f[0] + src1->f[0];
dst->f[1] = src0->f[1] + src1->f[1];
}
static void
-micro_max(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
+micro_max(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
}
static void
-micro_min(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
+micro_min(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
}
static void
-micro_mul(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
+micro_mul(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
dst->f[0] = src0->f[0] * src1->f[0];
dst->f[1] = src0->f[1] * src1->f[1];
}
static void
-micro_sqrt( union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = sqrtf( src->f[0] );
- dst->f[1] = sqrtf( src->f[1] );
- dst->f[2] = sqrtf( src->f[2] );
- dst->f[3] = sqrtf( src->f[3] );
-}
-
-static void
-micro_sub(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
+micro_sub(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
dst->f[0] = src0->f[0] - src1->f[0];
dst->f[1] = src0->f[1] - src1->f[1];
}
static void
-fetch_src_file_channel(
- const struct tgsi_exec_machine *mach,
- const uint file,
- const uint swizzle,
- const union tgsi_exec_channel *index,
- union tgsi_exec_channel *chan )
-{
- switch( swizzle ) {
- case TGSI_SWIZZLE_X:
- case TGSI_SWIZZLE_Y:
- case TGSI_SWIZZLE_Z:
- case TGSI_SWIZZLE_W:
- switch( file ) {
- case TGSI_FILE_CONSTANT:
- assert(mach->Consts);
- if (index->i[0] < 0)
- chan->f[0] = 0.0f;
- else
- chan->f[0] = mach->Consts[index->i[0]][swizzle];
- if (index->i[1] < 0)
- chan->f[1] = 0.0f;
- else
- chan->f[1] = mach->Consts[index->i[1]][swizzle];
- if (index->i[2] < 0)
- chan->f[2] = 0.0f;
- else
- chan->f[2] = mach->Consts[index->i[2]][swizzle];
- if (index->i[3] < 0)
- chan->f[3] = 0.0f;
- else
- chan->f[3] = mach->Consts[index->i[3]][swizzle];
- break;
+fetch_src_file_channel(const struct tgsi_exec_machine *mach,
+ const uint file,
+ const uint swizzle,
+ const union tgsi_exec_channel *index,
+ const union tgsi_exec_channel *index2D,
+ union tgsi_exec_channel *chan)
+{
+ uint i;
- case TGSI_FILE_INPUT:
- case TGSI_FILE_SYSTEM_VALUE:
- chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
- break;
+ switch (file) {
+ case TGSI_FILE_CONSTANT:
+ for (i = 0; i < QUAD_SIZE; i++) {
+ assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
+ assert(mach->Consts[index2D->i[i]]);
- case TGSI_FILE_TEMPORARY:
- assert(index->i[0] < TGSI_EXEC_NUM_TEMPS);
- chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
- break;
+ if (index->i[i] < 0) {
+ chan->u[i] = 0;
+ } else {
+ const uint *p = (const uint *)mach->Consts[index2D->i[i]];
- case TGSI_FILE_IMMEDIATE:
- assert( index->i[0] < (int) mach->ImmLimit );
- chan->f[0] = mach->Imms[index->i[0]][swizzle];
- assert( index->i[1] < (int) mach->ImmLimit );
- chan->f[1] = mach->Imms[index->i[1]][swizzle];
- assert( index->i[2] < (int) mach->ImmLimit );
- chan->f[2] = mach->Imms[index->i[2]][swizzle];
- assert( index->i[3] < (int) mach->ImmLimit );
- chan->f[3] = mach->Imms[index->i[3]][swizzle];
- break;
+ chan->u[i] = p[index->i[i] * 4 + swizzle];
+ }
+ }
+ break;
- case TGSI_FILE_ADDRESS:
- chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
- break;
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_SYSTEM_VALUE:
+ for (i = 0; i < QUAD_SIZE; i++) {
+ /* XXX: 2D indexing */
+ chan->u[i] = mach->Inputs[index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]].xyzw[swizzle].u[i];
+ }
+ break;
- case TGSI_FILE_PREDICATE:
- assert(index->i[0] < TGSI_EXEC_NUM_PREDS);
- assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
- assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
- assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
- chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0];
- chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1];
- chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2];
- chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3];
- break;
+ case TGSI_FILE_TEMPORARY:
+ for (i = 0; i < QUAD_SIZE; i++) {
+ assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
+ assert(index2D->i[i] == 0);
- case TGSI_FILE_OUTPUT:
- /* vertex/fragment output vars can be read too */
- chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
- break;
+ chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
+ }
+ break;
- default:
- assert( 0 );
- chan->u[0] = 0;
- chan->u[1] = 0;
- chan->u[2] = 0;
- chan->u[3] = 0;
+ case TGSI_FILE_IMMEDIATE:
+ for (i = 0; i < QUAD_SIZE; i++) {
+ assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
+ assert(index2D->i[i] == 0);
+
+ chan->f[i] = mach->Imms[index->i[i]][swizzle];
+ }
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ for (i = 0; i < QUAD_SIZE; i++) {
+ assert(index->i[i] >= 0);
+ assert(index2D->i[i] == 0);
+
+ chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
+ }
+ break;
+
+ case TGSI_FILE_PREDICATE:
+ for (i = 0; i < QUAD_SIZE; i++) {
+ assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS);
+ assert(index2D->i[i] == 0);
+
+ chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i];
+ }
+ break;
+
+ case TGSI_FILE_OUTPUT:
+ /* vertex/fragment output vars can be read too */
+ for (i = 0; i < QUAD_SIZE; i++) {
+ assert(index->i[i] >= 0);
+ assert(index2D->i[i] == 0);
+
+ chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
}
break;
default:
- assert( 0 );
- chan->u[0] = 0;
- chan->u[1] = 0;
- chan->u[2] = 0;
- chan->u[3] = 0;
+ assert(0);
+ for (i = 0; i < QUAD_SIZE; i++) {
+ chan->u[i] = 0;
+ }
}
}
enum tgsi_exec_datatype src_datatype)
{
union tgsi_exec_channel index;
+ union tgsi_exec_channel index2D;
uint swizzle;
/* We start with a direct index into a register file.
/* get current value of address register[swizzle] */
swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X );
- fetch_src_file_channel(
- mach,
- reg->Indirect.File,
- swizzle,
- &index2,
- &indir_index );
+ fetch_src_file_channel(mach,
+ reg->Indirect.File,
+ swizzle,
+ &index2,
+ &ZeroVec,
+ &indir_index);
/* add value of address register to the offset */
index.i[0] += indir_index.i[0];
* subscript to a register file. Effectively it means that
* the register file is actually a 2D array of registers.
*
- * file[3][1] == file[3*sizeof(file[1])+1],
+ * file[3][1],
* where:
* [3] = Dimension.Index
*/
if (reg->Register.Dimension) {
- int array_size;
- union tgsi_exec_channel dim_index;
-
- /* The size of the first-order array depends on the register file type.
- * We need to multiply the index to the first array to get an effective,
- * "flat" index that points to the beginning of the second-order array.
- */
- switch (reg->Register.File) {
- case TGSI_FILE_INPUT:
- case TGSI_FILE_SYSTEM_VALUE:
- array_size = TGSI_EXEC_MAX_INPUT_ATTRIBS;
- break;
- case TGSI_FILE_CONSTANT:
- array_size = TGSI_EXEC_MAX_CONST_BUFFER;
- break;
- default:
- assert( 0 );
- array_size = 0;
- }
-
- dim_index.i[0] =
- dim_index.i[1] =
- dim_index.i[2] =
- dim_index.i[3] = reg->Dimension.Index;
+ index2D.i[0] =
+ index2D.i[1] =
+ index2D.i[2] =
+ index2D.i[3] = reg->Dimension.Index;
/* Again, the second subscript index can be addressed indirectly
* identically to the first one.
index2.i[3] = reg->DimIndirect.Index;
swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X );
- fetch_src_file_channel(
- mach,
- reg->DimIndirect.File,
- swizzle,
- &index2,
- &indir_index );
-
- dim_index.i[0] += indir_index.i[0];
- dim_index.i[1] += indir_index.i[1];
- dim_index.i[2] += indir_index.i[2];
- dim_index.i[3] += indir_index.i[3];
+ fetch_src_file_channel(mach,
+ reg->DimIndirect.File,
+ swizzle,
+ &index2,
+ &ZeroVec,
+ &indir_index);
+
+ index2D.i[0] += indir_index.i[0];
+ index2D.i[1] += indir_index.i[1];
+ index2D.i[2] += indir_index.i[2];
+ index2D.i[3] += indir_index.i[3];
/* for disabled execution channels, zero-out the index to
* avoid using a potential garbage value.
*/
for (i = 0; i < QUAD_SIZE; i++) {
- if ((execmask & (1 << i)) == 0)
- dim_index.i[i] = 0;
+ if ((execmask & (1 << i)) == 0) {
+ index2D.i[i] = 0;
+ }
}
}
- index.i[0] += dim_index.i[0] * array_size;
- index.i[1] += dim_index.i[1] * array_size;
- index.i[2] += dim_index.i[2] * array_size;
- index.i[3] += dim_index.i[3] * array_size;
-
/* If by any chance there was a need for a 3D array of register
* files, we would have to check whether Dimension is followed
* by a dimension register and continue the saga.
*/
+ } else {
+ index2D.i[0] =
+ index2D.i[1] =
+ index2D.i[2] =
+ index2D.i[3] = 0;
}
swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
- fetch_src_file_channel(
- mach,
- reg->Register.File,
- swizzle,
- &index,
- chan );
+ fetch_src_file_channel(mach,
+ reg->Register.File,
+ swizzle,
+ &index,
+ &index2D,
+ chan);
if (reg->Register.Absolute) {
if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
int offset = 0; /* indirection offset */
int index;
- if (dst_datatype == TGSI_EXEC_DATA_FLOAT) {
- CHECK_INF_OR_NAN(chan);
+ /* for debugging */
+ if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
+ check_inf_or_nan(chan);
}
/* There is an extra source register that indirectly subscripts
swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X );
/* fetch values from the address/indirection register */
- fetch_src_file_channel(
- mach,
- reg->Indirect.File,
- swizzle,
- &index,
- &indir_index );
+ fetch_src_file_channel(mach,
+ reg->Indirect.File,
+ swizzle,
+ &index,
+ &ZeroVec,
+ &indir_index);
/* save indirection offset */
offset = indir_index.i[0];
}
/*
- * Fetch a four texture samples using STR texture coordinates.
+ * Fetch four texture samples using STR texture coordinates.
*/
static void
fetch_texel( struct tgsi_sampler *sampler,
last = decl->Range.Last;
mask = decl->Declaration.UsageMask;
- if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
- assert(decl->Semantic.Index == 0);
- assert(first == last);
- assert(mask == TGSI_WRITEMASK_XYZW);
-
- mach->Inputs[first] = mach->QuadPos;
- } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
+ /* XXX we could remove this special-case code since
+ * mach->InterpCoefs[first].a0 should already have the
+ * front/back-face value. But we should first update the
+ * ureg code to emit the right UsageMask value (WRITEMASK_X).
+ * Then, we could remove the tgsi_exec_machine::Face field.
+ */
+ if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
uint i;
assert(decl->Semantic.Index == 0);
}
}
-typedef void (* micro_op)(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src);
+typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src);
static void
exec_scalar_unary(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
- micro_op op,
+ micro_unary_op op,
enum tgsi_exec_datatype dst_datatype,
enum tgsi_exec_datatype src_datatype)
{
static void
exec_vector_unary(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
- micro_op op,
+ micro_unary_op op,
enum tgsi_exec_datatype dst_datatype,
enum tgsi_exec_datatype src_datatype)
{
}
}
+typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1);
+
static void
exec_vector_binary(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
- micro_op op,
+ micro_binary_op op,
enum tgsi_exec_datatype dst_datatype,
enum tgsi_exec_datatype src_datatype)
{
fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
- op(&dst.xyzw[chan], src);
+ op(&dst.xyzw[chan], &src[0], &src[1]);
}
}
for (chan = 0; chan < NUM_CHANNELS; chan++) {
}
}
+typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2);
+
static void
exec_vector_trinary(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst,
- micro_op op,
+ micro_trinary_op op,
enum tgsi_exec_datatype dst_datatype,
enum tgsi_exec_datatype src_datatype)
{
fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
- op(&dst.xyzw[chan], src);
+ op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
}
}
for (chan = 0; chan < NUM_CHANNELS; chan++) {
for (chan = CHAN_Y; chan <= CHAN_Z; chan++) {
fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
- micro_mad(&arg[2], arg);
+ micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
}
for (chan = 0; chan < NUM_CHANNELS; chan++) {
for (chan = CHAN_Y; chan <= CHAN_W; chan++) {
fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
- micro_mad(&arg[2], arg);
+ micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
}
for (chan = 0; chan < NUM_CHANNELS; chan++) {
fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- micro_mad(&arg[0], arg);
+ micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT);
micro_add(&arg[0], &arg[0], &arg[1]);
fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- micro_mad(&arg[2], arg);
+ micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT);
- micro_mad(&arg[0], arg);
+ micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT);
micro_add(&arg[0], &arg[0], &arg[1]);
fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT);
- micro_mad(&arg[2], arg);
+ micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
for (chan = 0; chan < NUM_CHANNELS; chan++) {
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
}
}
+static void
+exec_nrm4(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned int chan;
+ union tgsi_exec_channel arg[4];
+ union tgsi_exec_channel scale;
+
+ fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&scale, &arg[0], &arg[0]);
+
+ for (chan = CHAN_Y; chan <= CHAN_W; chan++) {
+ union tgsi_exec_channel product;
+
+ fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&product, &arg[chan], &arg[chan]);
+ micro_add(&scale, &scale, &product);
+ }
+
+ micro_rsq(&scale, &scale);
+
+ for (chan = CHAN_X; chan <= CHAN_W; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ micro_mul(&arg[chan], &arg[chan], &scale);
+ store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
+static void
+exec_nrm3(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
+ unsigned int chan;
+ union tgsi_exec_channel arg[3];
+ union tgsi_exec_channel scale;
+
+ fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&scale, &arg[0], &arg[0]);
+
+ for (chan = CHAN_Y; chan <= CHAN_Z; chan++) {
+ union tgsi_exec_channel product;
+
+ fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
+ micro_mul(&product, &arg[chan], &arg[chan]);
+ micro_add(&scale, &scale, &product);
+ }
+
+ micro_rsq(&scale, &scale);
+
+ for (chan = CHAN_X; chan <= CHAN_Z; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ micro_mul(&arg[chan], &arg[chan], &scale);
+ store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+ }
+
+ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
+ store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
static void
exec_break(struct tgsi_exec_machine *mach)
{
static void
micro_shl(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] << src[1].u[0];
- dst->u[1] = src[0].u[1] << src[1].u[1];
- dst->u[2] = src[0].u[2] << src[1].u[2];
- dst->u[3] = src[0].u[3] << src[1].u[3];
+ dst->u[0] = src0->u[0] << src1->u[0];
+ dst->u[1] = src0->u[1] << src1->u[1];
+ dst->u[2] = src0->u[2] << src1->u[2];
+ dst->u[3] = src0->u[3] << src1->u[3];
}
static void
micro_and(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] & src[1].u[0];
- dst->u[1] = src[0].u[1] & src[1].u[1];
- dst->u[2] = src[0].u[2] & src[1].u[2];
- dst->u[3] = src[0].u[3] & src[1].u[3];
+ dst->u[0] = src0->u[0] & src1->u[0];
+ dst->u[1] = src0->u[1] & src1->u[1];
+ dst->u[2] = src0->u[2] & src1->u[2];
+ dst->u[3] = src0->u[3] & src1->u[3];
}
static void
micro_or(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] | src[1].u[0];
- dst->u[1] = src[0].u[1] | src[1].u[1];
- dst->u[2] = src[0].u[2] | src[1].u[2];
- dst->u[3] = src[0].u[3] | src[1].u[3];
+ dst->u[0] = src0->u[0] | src1->u[0];
+ dst->u[1] = src0->u[1] | src1->u[1];
+ dst->u[2] = src0->u[2] | src1->u[2];
+ dst->u[3] = src0->u[3] | src1->u[3];
}
static void
micro_xor(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] ^ src[1].u[0];
- dst->u[1] = src[0].u[1] ^ src[1].u[1];
- dst->u[2] = src[0].u[2] ^ src[1].u[2];
- dst->u[3] = src[0].u[3] ^ src[1].u[3];
+ dst->u[0] = src0->u[0] ^ src1->u[0];
+ dst->u[1] = src0->u[1] ^ src1->u[1];
+ dst->u[2] = src0->u[2] ^ src1->u[2];
+ dst->u[3] = src0->u[3] ^ src1->u[3];
}
static void
static void
micro_idiv(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->i[0] = src[0].i[0] / src[1].i[0];
- dst->i[1] = src[0].i[1] / src[1].i[1];
- dst->i[2] = src[0].i[2] / src[1].i[2];
- dst->i[3] = src[0].i[3] / src[1].i[3];
+ dst->i[0] = src0->i[0] / src1->i[0];
+ dst->i[1] = src0->i[1] / src1->i[1];
+ dst->i[2] = src0->i[2] / src1->i[2];
+ dst->i[3] = src0->i[3] / src1->i[3];
}
static void
micro_imax(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0];
- dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1];
- dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2];
- dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3];
+ dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
+ dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
+ dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
+ dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
}
static void
micro_imin(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0];
- dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1];
- dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2];
- dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3];
+ dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
+ dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
+ dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
+ dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
}
static void
micro_isge(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0;
- dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0;
- dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0;
- dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0;
+ dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
+ dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
+ dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
+ dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
}
static void
micro_ishr(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->i[0] = src[0].i[0] >> src[1].i[0];
- dst->i[1] = src[0].i[1] >> src[1].i[1];
- dst->i[2] = src[0].i[2] >> src[1].i[2];
- dst->i[3] = src[0].i[3] >> src[1].i[3];
+ dst->i[0] = src0->i[0] >> src1->i[0];
+ dst->i[1] = src0->i[1] >> src1->i[1];
+ dst->i[2] = src0->i[2] >> src1->i[2];
+ dst->i[3] = src0->i[3] >> src1->i[3];
}
static void
micro_islt(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0;
- dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0;
- dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0;
- dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0;
+ dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
+ dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
+ dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
+ dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
}
static void
static void
micro_uadd(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] + src[1].u[0];
- dst->u[1] = src[0].u[1] + src[1].u[1];
- dst->u[2] = src[0].u[2] + src[1].u[2];
- dst->u[3] = src[0].u[3] + src[1].u[3];
+ dst->u[0] = src0->u[0] + src1->u[0];
+ dst->u[1] = src0->u[1] + src1->u[1];
+ dst->u[2] = src0->u[2] + src1->u[2];
+ dst->u[3] = src0->u[3] + src1->u[3];
}
static void
micro_udiv(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] / src[1].u[0];
- dst->u[1] = src[0].u[1] / src[1].u[1];
- dst->u[2] = src[0].u[2] / src[1].u[2];
- dst->u[3] = src[0].u[3] / src[1].u[3];
+ dst->u[0] = src0->u[0] / src1->u[0];
+ dst->u[1] = src0->u[1] / src1->u[1];
+ dst->u[2] = src0->u[2] / src1->u[2];
+ dst->u[3] = src0->u[3] / src1->u[3];
}
static void
micro_umad(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2)
{
- dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0];
- dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1];
- dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2];
- dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3];
+ dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
+ dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
+ dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
+ dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
}
static void
micro_umax(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0];
- dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1];
- dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2];
- dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3];
+ dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
+ dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
+ dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
+ dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
}
static void
micro_umin(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0];
- dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1];
- dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2];
- dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3];
+ dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
+ dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
+ dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
+ dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
}
static void
micro_umod(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] % src[1].u[0];
- dst->u[1] = src[0].u[1] % src[1].u[1];
- dst->u[2] = src[0].u[2] % src[1].u[2];
- dst->u[3] = src[0].u[3] % src[1].u[3];
+ dst->u[0] = src0->u[0] % src1->u[0];
+ dst->u[1] = src0->u[1] % src1->u[1];
+ dst->u[2] = src0->u[2] % src1->u[2];
+ dst->u[3] = src0->u[3] % src1->u[3];
}
static void
micro_umul(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] * src[1].u[0];
- dst->u[1] = src[0].u[1] * src[1].u[1];
- dst->u[2] = src[0].u[2] * src[1].u[2];
- dst->u[3] = src[0].u[3] * src[1].u[3];
+ dst->u[0] = src0->u[0] * src1->u[0];
+ dst->u[1] = src0->u[1] * src1->u[1];
+ dst->u[2] = src0->u[2] * src1->u[2];
+ dst->u[3] = src0->u[3] * src1->u[3];
}
static void
micro_useq(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0;
- dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0;
- dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0;
- dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0;
+ dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
+ dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
+ dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
+ dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
}
static void
micro_usge(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0;
- dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0;
- dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0;
- dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0;
+ dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
+ dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
+ dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
+ dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
}
static void
micro_ushr(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] >> src[1].u[0];
- dst->u[1] = src[0].u[1] >> src[1].u[1];
- dst->u[2] = src[0].u[2] >> src[1].u[2];
- dst->u[3] = src[0].u[3] >> src[1].u[3];
+ dst->u[0] = src0->u[0] >> src1->u[0];
+ dst->u[1] = src0->u[1] >> src1->u[1];
+ dst->u[2] = src0->u[2] >> src1->u[2];
+ dst->u[3] = src0->u[3] >> src1->u[3];
}
static void
micro_uslt(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0;
- dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0;
- dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0;
- dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0;
+ dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
+ dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
+ dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
+ dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
}
static void
micro_usne(union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1)
{
- dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0;
- dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0;
- dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0;
- dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0;
+ dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
+ dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
+ dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
+ dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
}
static void
break;
case TGSI_OPCODE_MUL:
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- micro_mul(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_ADD:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_add(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_DP3:
break;
case TGSI_OPCODE_MIN:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- /* XXX use micro_min()?? */
- micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_MAX:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- /* XXX use micro_max()?? */
- micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SLT:
break;
case TGSI_OPCODE_SUB:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- micro_sub(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_LRP:
break;
case TGSI_OPCODE_CND:
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- FETCH(&r[2], 2, chan_index);
- micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_DP2A:
break;
case TGSI_OPCODE_CLAMP:
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- micro_max(&r[0], &r[0], &r[1]);
- FETCH(&r[1], 2, chan_index);
- micro_min(&d[chan_index], &r[0], &r[1]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_FLR:
break;
case TGSI_OPCODE_CMP:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- FETCH(&r[2], 2, chan_index);
- micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
- }
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- STORE(&d[chan_index], 0, chan_index);
- }
+ exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
case TGSI_OPCODE_SCS:
break;
case TGSI_OPCODE_NRM:
- /* 3-component vector normalize */
- if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
- IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
- IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- /* r3 = sqrt(dp3(src0, src0)) */
- FETCH(&r[0], 0, CHAN_X);
- micro_mul(&r[3], &r[0], &r[0]);
- FETCH(&r[1], 0, CHAN_Y);
- micro_mul(&r[4], &r[1], &r[1]);
- micro_add(&r[3], &r[3], &r[4]);
- FETCH(&r[2], 0, CHAN_Z);
- micro_mul(&r[4], &r[2], &r[2]);
- micro_add(&r[3], &r[3], &r[4]);
- micro_sqrt(&r[3], &r[3]);
-
- if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
- micro_div(&r[0], &r[0], &r[3]);
- STORE(&r[0], 0, CHAN_X);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
- micro_div(&r[1], &r[1], &r[3]);
- STORE(&r[1], 0, CHAN_Y);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- micro_div(&r[2], &r[2], &r[3]);
- STORE(&r[2], 0, CHAN_Z);
- }
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
- STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
- }
+ exec_nrm3(mach, inst);
break;
case TGSI_OPCODE_NRM4:
- /* 4-component vector normalize */
- {
- union tgsi_exec_channel tmp, dot;
-
- /* tmp = dp4(src0, src0): */
- FETCH( &r[0], 0, CHAN_X );
- micro_mul( &tmp, &r[0], &r[0] );
-
- FETCH( &r[1], 0, CHAN_Y );
- micro_mul( &dot, &r[1], &r[1] );
- micro_add( &tmp, &tmp, &dot );
-
- FETCH( &r[2], 0, CHAN_Z );
- micro_mul( &dot, &r[2], &r[2] );
- micro_add( &tmp, &tmp, &dot );
-
- FETCH( &r[3], 0, CHAN_W );
- micro_mul( &dot, &r[3], &r[3] );
- micro_add( &tmp, &tmp, &dot );
-
- /* tmp = 1 / sqrt(tmp) */
- micro_sqrt( &tmp, &tmp );
- micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- /* chan = chan * tmp */
- micro_mul( &r[chan_index], &tmp, &r[chan_index] );
- STORE( &r[chan_index], 0, chan_index );
- }
- }
+ exec_nrm4(mach, inst);
break;
case TGSI_OPCODE_DIV: