#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
#define UPDATE_EXEC_MASK(MACH) \
MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
+
+static const union tgsi_exec_channel ZeroVec =
+ { { 0.0, 0.0, 0.0, 0.0 } };
+
+
+#ifdef DEBUG
+static void
+check_inf_or_nan(const union tgsi_exec_channel *chan)
+{
+ assert(!util_is_inf_or_nan(chan->f[0]));
+ assert(!util_is_inf_or_nan(chan->f[1]));
+ assert(!util_is_inf_or_nan(chan->f[2]));
+ assert(!util_is_inf_or_nan(chan->f[3]));
+}
+#endif
+
+
+#ifdef DEBUG
+static void
+print_chan(const char *msg, const union tgsi_exec_channel *chan)
+{
+ debug_printf("%s = {%f, %f, %f, %f}\n",
+ msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
+}
+#endif
+
+
+#ifdef DEBUG
+static void
+print_temp(const struct tgsi_exec_machine *mach, uint index)
+{
+ const struct tgsi_exec_vector *tmp = &mach->Temps[index];
+ int i;
+ debug_printf("Temp[%u] =\n", index);
+ for (i = 0; i < 4; i++) {
+ debug_printf(" %c: { %f, %f, %f, %f }\n",
+ "XYZW"[i],
+ tmp->xyzw[i].f[0],
+ tmp->xyzw[i].f[1],
+ tmp->xyzw[i].f[2],
+ tmp->xyzw[i].f[3]);
+ }
+}
+#endif
+
+
+/**
+ * Check if there's a potential src/dst register data dependency when
+ * using SOA execution.
+ * Example:
+ * MOV T, T.yxwz;
+ * This would expand into:
+ * MOV t0, t1;
+ * MOV t1, t0;
+ * MOV t2, t3;
+ * MOV t3, t2;
+ * The second instruction will have the wrong value for t0 if executed as-is.
+ */
+static boolean
+tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
+{
+ uint i, chan;
+
+ uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ if (writemask == TGSI_WRITEMASK_X ||
+ writemask == TGSI_WRITEMASK_Y ||
+ writemask == TGSI_WRITEMASK_Z ||
+ writemask == TGSI_WRITEMASK_W ||
+ writemask == TGSI_WRITEMASK_NONE) {
+ /* no chance of data dependency */
+ return FALSE;
+ }
+
+ /* loop over src regs */
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if ((inst->FullSrcRegisters[i].SrcRegister.File ==
+ inst->FullDstRegisters[0].DstRegister.File) &&
+ (inst->FullSrcRegisters[i].SrcRegister.Index ==
+ inst->FullDstRegisters[0].DstRegister.Index)) {
+ /* loop over dest channels */
+ uint channelsWritten = 0x0;
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
+ /* check if we're reading a channel that's been written */
+ uint swizzle = tgsi_util_get_full_src_register_extswizzle(&inst->FullSrcRegisters[i], chan);
+ if (swizzle <= TGSI_SWIZZLE_W &&
+ (channelsWritten & (1 << swizzle))) {
+ return TRUE;
+ }
+
+ channelsWritten |= (1 << chan);
+ }
+ }
+ }
+ return FALSE;
+}
+
+
/**
* Initialize machine state by expanding tokens to full instructions,
* allocating temporary storage, setting up constants, etc.
struct tgsi_exec_machine *mach,
const struct tgsi_token *tokens,
uint numSamplers,
- struct tgsi_sampler *samplers)
+ struct tgsi_sampler **samplers)
{
uint k;
struct tgsi_parse_context parse;
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
- uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
- assert( size % 4 == 0 );
- assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
+ uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+ assert( size <= 4 );
+ assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
for( i = 0; i < size; i++ ) {
- mach->Imms[mach->ImmLimit + i / 4][i % 4] =
- parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ mach->Imms[mach->ImmLimit][i] =
+ parse.FullToken.FullImmediate.u[i].Float;
}
- mach->ImmLimit += size / 4;
+ mach->ImmLimit += 1;
}
break;
memcpy(instructions + numInstructions,
&parse.FullToken.FullInstruction,
sizeof(instructions[0]));
+
+#if 0
+ if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
+ debug_printf("SOA dependency in instruction:\n");
+ tgsi_dump_instruction(&parse.FullToken.FullInstruction,
+ numInstructions);
+ }
+#else
+ (void) tgsi_check_soa_dependencies;
+#endif
+
numInstructions++;
break;
}
-void
-tgsi_exec_machine_init(
- struct tgsi_exec_machine *mach )
+struct tgsi_exec_machine *
+tgsi_exec_machine_create( void )
{
+ struct tgsi_exec_machine *mach;
uint i;
- mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
+ mach = align_malloc( sizeof *mach, 16 );
+ if (!mach)
+ goto fail;
+
mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
+ mach->Samplers = NULL;
+ mach->Consts = NULL;
+ mach->Tokens = NULL;
+ mach->Primitives = NULL;
+ mach->InterpCoefs = NULL;
+ mach->Instructions = NULL;
+ mach->Declarations = NULL;
+
/* Setup constants. */
for( i = 0; i < 4; i++ ) {
mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
}
+
+#ifdef DEBUG
+ /* silence warnings */
+ (void) print_chan;
+ (void) print_temp;
+#endif
+
+ return mach;
+
+fail:
+ align_free(mach);
+ return NULL;
}
void
-tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
+tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
{
- if (mach->Instructions) {
+ if (mach) {
FREE(mach->Instructions);
- mach->Instructions = NULL;
- mach->NumInstructions = 0;
- }
- if (mach->Declarations) {
FREE(mach->Declarations);
- mach->Declarations = NULL;
- mach->NumDeclarations = 0;
}
+
+ align_free(mach);
}
dst->f[3] = src0->f[3] + src1->f[3];
}
+#if 0
static void
micro_iadd(
union tgsi_exec_channel *dst,
dst->i[2] = src0->i[2] + src1->i[2];
dst->i[3] = src0->i[3] + src1->i[3];
}
+#endif
static void
micro_and(
}
}
+#if 0
static void
micro_udiv(
union tgsi_exec_channel *dst,
dst->u[2] = src0->u[2] / src1->u[2];
dst->u[3] = src0->u[3] / src1->u[3];
}
+#endif
static void
micro_eq(
dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
}
+#if 0
static void
micro_ieq(
union tgsi_exec_channel *dst,
dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
}
+#endif
static void
micro_exp2(
#endif
}
-static void
-micro_f2it(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->i[0] = (int) src->f[0];
- dst->i[1] = (int) src->f[1];
- dst->i[2] = (int) src->f[2];
- dst->i[3] = (int) src->f[3];
-}
-
+#if 0
static void
micro_f2ut(
union tgsi_exec_channel *dst,
dst->u[2] = (uint) src->f[2];
dst->u[3] = (uint) src->f[3];
}
+#endif
+
+static void
+micro_float_clamp(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ uint i;
+
+ for (i = 0; i < 4; i++) {
+ if (src->f[i] > 0.0f) {
+ if (src->f[i] > 1.884467e+019f)
+ dst->f[i] = 1.884467e+019f;
+ else if (src->f[i] < 5.42101e-020f)
+ dst->f[i] = 5.42101e-020f;
+ else
+ dst->f[i] = src->f[i];
+ }
+ else {
+ if (src->f[i] < -1.884467e+019f)
+ dst->f[i] = -1.884467e+019f;
+ else if (src->f[i] > -5.42101e-020f)
+ dst->f[i] = -5.42101e-020f;
+ else
+ dst->f[i] = src->f[i];
+ }
+ }
+}
static void
micro_flr(
dst->f[3] = src->f[3] - floorf( src->f[3] );
}
-static void
-micro_ge(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
- dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
- dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
- dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
static void
micro_i2f(
union tgsi_exec_channel *dst,
dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
}
+#if 0
static void
micro_ilt(
union tgsi_exec_channel *dst,
dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
}
+#endif
+#if 0
static void
micro_ult(
union tgsi_exec_channel *dst,
dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
}
+#endif
static void
micro_max(
dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
}
+#if 0
static void
micro_imax(
union tgsi_exec_channel *dst,
dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
}
+#endif
+#if 0
static void
micro_umax(
union tgsi_exec_channel *dst,
dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
}
+#endif
static void
micro_min(
dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
}
+#if 0
static void
micro_imin(
union tgsi_exec_channel *dst,
dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
}
+#endif
+#if 0
static void
micro_umin(
union tgsi_exec_channel *dst,
dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
}
+#endif
+#if 0
static void
micro_umod(
union tgsi_exec_channel *dst,
dst->u[2] = src0->u[2] % src1->u[2];
dst->u[3] = src0->u[3] % src1->u[3];
}
+#endif
static void
micro_mul(
dst->f[3] = src0->f[3] * src1->f[3];
}
+#if 0
static void
micro_imul(
union tgsi_exec_channel *dst,
dst->i[2] = src0->i[2] * src1->i[2];
dst->i[3] = src0->i[3] * src1->i[3];
}
+#endif
+#if 0
static void
micro_imul64(
union tgsi_exec_channel *dst0,
dst0->i[2] = 0;
dst0->i[3] = 0;
}
+#endif
+#if 0
static void
micro_umul64(
union tgsi_exec_channel *dst0,
dst0->u[2] = 0;
dst0->u[3] = 0;
}
+#endif
+
+#if 0
static void
micro_movc(
union tgsi_exec_channel *dst,
dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
}
+#endif
static void
micro_neg(
dst->f[3] = -src->f[3];
}
+#if 0
static void
micro_ineg(
union tgsi_exec_channel *dst,
dst->i[2] = -src->i[2];
dst->i[3] = -src->i[3];
}
+#endif
static void
micro_not(
dst->f[3] = floorf( src->f[3] + 0.5f );
}
+static void
+micro_sgn(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
+ dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
+ dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
+ dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
+}
+
static void
micro_shl(
union tgsi_exec_channel *dst,
dst->f[3] = (float) (int) src0->f[3];
}
+#if 0
static void
micro_ushr(
union tgsi_exec_channel *dst,
dst->u[2] = src0->u[2] >> src1->u[2];
dst->u[3] = src0->u[3] >> src1->u[3];
}
+#endif
static void
micro_sin(
dst->f[3] = src0->f[3] - src1->f[3];
}
+#if 0
static void
micro_u2f(
union tgsi_exec_channel *dst,
dst->f[2] = (float) src->u[2];
dst->f[3] = (float) src->u[3];
}
+#endif
static void
micro_xor(
switch( file ) {
case TGSI_FILE_CONSTANT:
assert(mach->Consts);
- assert(index->i[0] >= 0);
- assert(index->i[1] >= 0);
- assert(index->i[2] >= 0);
- assert(index->i[3] >= 0);
- chan->f[0] = mach->Consts[index->i[0]][swizzle];
- chan->f[1] = mach->Consts[index->i[1]][swizzle];
- chan->f[2] = mach->Consts[index->i[2]][swizzle];
- chan->f[3] = mach->Consts[index->i[3]][swizzle];
+ if (index->i[0] < 0)
+ chan->f[0] = 0.0f;
+ else
+ chan->f[0] = mach->Consts[index->i[0]][swizzle];
+ if (index->i[1] < 0)
+ chan->f[1] = 0.0f;
+ else
+ chan->f[1] = mach->Consts[index->i[1]][swizzle];
+ if (index->i[2] < 0)
+ chan->f[2] = 0.0f;
+ else
+ chan->f[2] = mach->Consts[index->i[2]][swizzle];
+ if (index->i[3] < 0)
+ chan->f[3] = 0.0f;
+ else
+ chan->f[3] = mach->Consts[index->i[3]][swizzle];
break;
case TGSI_FILE_INPUT:
union tgsi_exec_channel index;
uint swizzle;
+ /* We start with a direct index into a register file.
+ *
+ * file[1],
+ * where:
+ * file = SrcRegister.File
+ * [1] = SrcRegister.Index
+ */
index.i[0] =
index.i[1] =
index.i[2] =
index.i[3] = reg->SrcRegister.Index;
+ /* There is an extra source register that indirectly subscripts
+ * a register file. The direct index now becomes an offset
+ * that is being added to the indirect register.
+ *
+ * file[ind[2].x+1],
+ * where:
+ * ind = SrcRegisterInd.File
+ * [2] = SrcRegisterInd.Index
+ * .x = SrcRegisterInd.SwizzleX
+ */
if (reg->SrcRegister.Indirect) {
union tgsi_exec_channel index2;
union tgsi_exec_channel indir_index;
&indir_index );
/* add value of address register to the offset */
- index.i[0] += indir_index.i[0];
- index.i[1] += indir_index.i[1];
- index.i[2] += indir_index.i[2];
- index.i[3] += indir_index.i[3];
+ index.i[0] += (int) indir_index.f[0];
+ index.i[1] += (int) indir_index.f[1];
+ index.i[2] += (int) indir_index.f[2];
+ index.i[3] += (int) indir_index.f[3];
/* for disabled execution channels, zero-out the index to
* avoid using a potential garbage value.
}
}
- if( reg->SrcRegister.Dimension ) {
- switch( reg->SrcRegister.File ) {
+ /* There is an extra source register that is a second
+ * subscript to a register file. Effectively it means that
+ * the register file is actually a 2D array of registers.
+ *
+ * file[1][3] == file[1*sizeof(file[1])+3],
+ * where:
+ * [3] = SrcRegisterDim.Index
+ */
+ if (reg->SrcRegister.Dimension) {
+ /* The size of the first-order array depends on the register file type.
+ * We need to multiply the index to the first array to get an effective,
+ * "flat" index that points to the beginning of the second-order array.
+ */
+ switch (reg->SrcRegister.File) {
case TGSI_FILE_INPUT:
- index.i[0] *= 17;
- index.i[1] *= 17;
- index.i[2] *= 17;
- index.i[3] *= 17;
+ index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
+ index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
break;
case TGSI_FILE_CONSTANT:
- index.i[0] *= 4096;
- index.i[1] *= 4096;
- index.i[2] *= 4096;
- index.i[3] *= 4096;
+ index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER;
+ index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER;
break;
default:
assert( 0 );
index.i[2] += reg->SrcRegisterDim.Index;
index.i[3] += reg->SrcRegisterDim.Index;
+ /* Again, the second subscript index can be addressed indirectly
+ * identically to the first one.
+ * Nothing stops us from indirectly addressing the indirect register,
+ * but there is no need for that, so we won't exercise it.
+ *
+ * file[1][ind[4].y+3],
+ * where:
+ * ind = SrcRegisterDimInd.File
+ * [4] = SrcRegisterDimInd.Index
+ * .y = SrcRegisterDimInd.SwizzleX
+ */
if (reg->SrcRegisterDim.Indirect) {
union tgsi_exec_channel index2;
union tgsi_exec_channel indir_index;
&index2,
&indir_index );
- index.i[0] += indir_index.i[0];
- index.i[1] += indir_index.i[1];
- index.i[2] += indir_index.i[2];
- index.i[3] += indir_index.i[3];
+ index.i[0] += (int) indir_index.f[0];
+ index.i[1] += (int) indir_index.f[1];
+ index.i[2] += (int) indir_index.f[2];
+ index.i[3] += (int) indir_index.f[3];
/* for disabled execution channels, zero-out the index to
* avoid using a potential garbage value.
index.i[i] = 0;
}
}
+
+ /* If by any chance there was a need for a 3D array of register
+ * files, we would have to check whether SrcRegisterDim is followed
+ * by a dimension register and continue the saga.
+ */
}
swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
union tgsi_exec_channel null;
union tgsi_exec_channel *dst;
uint execmask = mach->ExecMask;
+ int offset = 0; /* indirection offset */
+ int index;
+
+#ifdef DEBUG
+ check_inf_or_nan(chan);
+#endif
+
+ /* There is an extra source register that indirectly subscripts
+ * a register file. The direct index now becomes an offset
+ * that is being added to the indirect register.
+ *
+ * file[ind[2].x+1],
+ * where:
+ * ind = DstRegisterInd.File
+ * [2] = DstRegisterInd.Index
+ * .x = DstRegisterInd.SwizzleX
+ */
+ if (reg->DstRegister.Indirect) {
+ union tgsi_exec_channel index;
+ union tgsi_exec_channel indir_index;
+ uint swizzle;
+
+ /* which address register (always zero for now) */
+ index.i[0] =
+ index.i[1] =
+ index.i[2] =
+ index.i[3] = reg->DstRegisterInd.Index;
+
+ /* get current value of address register[swizzle] */
+ swizzle = tgsi_util_get_src_register_swizzle( ®->DstRegisterInd, CHAN_X );
+
+ /* fetch values from the address/indirection register */
+ fetch_src_file_channel(
+ mach,
+ reg->DstRegisterInd.File,
+ swizzle,
+ &index,
+ &indir_index );
+
+ /* save indirection offset */
+ offset = (int) indir_index.f[0];
+ }
switch (reg->DstRegister.File) {
case TGSI_FILE_NULL:
break;
case TGSI_FILE_OUTPUT:
- dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
- + reg->DstRegister.Index].xyzw[chan_index];
+ index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
+ + reg->DstRegister.Index;
+ dst = &mach->Outputs[offset + index].xyzw[chan_index];
break;
case TGSI_FILE_TEMPORARY:
- assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
- dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
+ index = reg->DstRegister.Index;
+ assert( index < TGSI_EXEC_NUM_TEMPS );
+ dst = &mach->Temps[offset + index].xyzw[chan_index];
break;
case TGSI_FILE_ADDRESS:
- dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
+ index = reg->DstRegister.Index;
+ dst = &mach->Addrs[index].xyzw[chan_index];
break;
default:
/*
- * Fetch a texel using STR texture coordinates.
+ * Fetch a four texture samples using STR texture coordinates.
*/
static void
fetch_texel( struct tgsi_sampler *sampler,
boolean projected)
{
const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
- union tgsi_exec_channel r[8];
+ union tgsi_exec_channel r[4];
uint chan_index;
float lodBias;
switch (inst->InstructionExtTexture.Texture) {
case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
FETCH(&r[0], 0, CHAN_X);
else
lodBias = 0.0;
- fetch_texel(&mach->Samplers[unit],
- &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
+ fetch_texel(mach->Samplers[unit],
+ &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */
&r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 0, CHAN_Y);
else
lodBias = 0.0;
- fetch_texel(&mach->Samplers[unit],
+ fetch_texel(mach->Samplers[unit],
&r[0], &r[1], &r[2], lodBias, /* inputs */
&r[0], &r[1], &r[2], &r[3]); /* outputs */
break;
else
lodBias = 0.0;
- fetch_texel(&mach->Samplers[unit],
+ fetch_texel(mach->Samplers[unit],
&r[0], &r[1], &r[2], lodBias,
&r[0], &r[1], &r[2], &r[3]);
break;
int *pc )
{
uint chan_index;
- union tgsi_exec_channel r[8];
+ union tgsi_exec_channel r[10];
(*pc)++;
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
+ case TGSI_OPCODE_FLR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_f2it( &r[0], &r[0] );
+ micro_flr( &r[0], &r[0] );
STORE( &r[0], 0, chan_index );
}
break;
case TGSI_OPCODE_LIT:
if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_X );
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Y );
- }
+ FETCH( &r[0], 0, CHAN_X );
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+ STORE( &r[0], 0, CHAN_Y );
+ }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[1], 0, CHAN_Y );
- micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ FETCH( &r[1], 0, CHAN_Y );
+ micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- FETCH( &r[2], 0, CHAN_W );
- micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
- micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
- micro_pow( &r[1], &r[1], &r[2] );
- micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Z );
- }
+ FETCH( &r[2], 0, CHAN_W );
+ micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
+ micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
+ micro_pow( &r[1], &r[1], &r[2] );
+ micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+ STORE( &r[0], 0, CHAN_Z );
+ }
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
break;
FETCH( &r[0], 0, CHAN_X );
micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
case TGSI_OPCODE_RSQ:
/* TGSI_OPCODE_RECIPSQRT */
FETCH( &r[0], 0, CHAN_X );
+ micro_abs( &r[0], &r[0] );
micro_sqrt( &r[0], &r[0] );
micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
micro_add( &r[0], &r[0], &r[1] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
case TGSI_OPCODE_DST:
if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- FETCH( &r[0], 0, CHAN_Y );
- FETCH( &r[1], 1, CHAN_Y);
- micro_mul( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, CHAN_Y );
+ FETCH( &r[0], 0, CHAN_Y );
+ FETCH( &r[1], 1, CHAN_Y);
+ micro_mul( &r[0], &r[0], &r[1] );
+ STORE( &r[0], 0, CHAN_Y );
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_Z );
- STORE( &r[0], 0, CHAN_Z );
+ FETCH( &r[0], 0, CHAN_Z );
+ STORE( &r[0], 0, CHAN_Z );
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- FETCH( &r[0], 1, CHAN_W );
- STORE( &r[0], 0, CHAN_W );
+ FETCH( &r[0], 1, CHAN_W );
+ STORE( &r[0], 0, CHAN_W );
}
break;
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+ micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
STORE( &r[0], 0, chan_index );
}
break;
}
break;
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
+ case TGSI_OPCODE_LRP:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
break;
case TGSI_OPCODE_CND:
- assert (0);
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+ FETCH(&r[2], 2, chan_index);
+ micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
+ STORE(&r[0], 0, chan_index);
+ }
break;
case TGSI_OPCODE_CND0:
- assert (0);
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+ FETCH(&r[2], 2, chan_index);
+ micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
+ STORE(&r[0], 0, chan_index);
+ }
break;
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
- assert (0);
- break;
+ case TGSI_OPCODE_DP2A:
+ FETCH( &r[0], 0, CHAN_X );
+ FETCH( &r[1], 1, CHAN_X );
+ micro_mul( &r[0], &r[0], &r[1] );
- case TGSI_OPCODE_INDEX:
- assert (0);
- break;
+ FETCH( &r[1], 0, CHAN_Y );
+ FETCH( &r[2], 1, CHAN_Y );
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
- case TGSI_OPCODE_NEGATE:
- assert (0);
+ FETCH( &r[2], 2, CHAN_X );
+ micro_add( &r[0], &r[0], &r[2] );
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
break;
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
+ case TGSI_OPCODE_FRC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
micro_frc( &r[0], &r[0] );
break;
case TGSI_OPCODE_CLAMP:
- assert (0);
- break;
-
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_flr( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+ micro_max(&r[0], &r[0], &r[1]);
+ FETCH(&r[1], 2, chan_index);
+ micro_min(&r[0], &r[0], &r[1]);
+ STORE(&r[0], 0, chan_index);
}
break;
case TGSI_OPCODE_ROUND:
+ case TGSI_OPCODE_ARR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
micro_rnd( &r[0], &r[0] );
}
break;
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
+ case TGSI_OPCODE_EX2:
FETCH(&r[0], 0, CHAN_X);
#if FAST_MATH
#endif
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
+ case TGSI_OPCODE_LG2:
FETCH( &r[0], 0, CHAN_X );
micro_lg2( &r[0], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
}
break;
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
+ case TGSI_OPCODE_POW:
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 1, CHAN_X);
micro_pow( &r[0], &r[0], &r[1] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
- case TGSI_OPCODE_CROSSPRODUCT:
- /* TGSI_OPCODE_XPD */
+ case TGSI_OPCODE_XPD:
FETCH(&r[0], 0, CHAN_Y);
FETCH(&r[1], 1, CHAN_Z);
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- assert (0);
- break;
-
case TGSI_OPCODE_ABS:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
break;
case TGSI_OPCODE_RCC:
- assert (0);
+ FETCH(&r[0], 0, CHAN_X);
+ micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
+ micro_float_clamp(&r[0], &r[0]);
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&r[0], 0, chan_index);
+ }
break;
case TGSI_OPCODE_DPH:
micro_add( &r[0], &r[0], &r[1] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
micro_cos( &r[0], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
break;
case TGSI_OPCODE_RFL:
- assert (0);
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
+ IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+ IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ /* r0 = dp3(src0, src0) */
+ FETCH(&r[2], 0, CHAN_X);
+ micro_mul(&r[0], &r[2], &r[2]);
+ FETCH(&r[4], 0, CHAN_Y);
+ micro_mul(&r[8], &r[4], &r[4]);
+ micro_add(&r[0], &r[0], &r[8]);
+ FETCH(&r[6], 0, CHAN_Z);
+ micro_mul(&r[8], &r[6], &r[6]);
+ micro_add(&r[0], &r[0], &r[8]);
+
+ /* r1 = dp3(src0, src1) */
+ FETCH(&r[3], 1, CHAN_X);
+ micro_mul(&r[1], &r[2], &r[3]);
+ FETCH(&r[5], 1, CHAN_Y);
+ micro_mul(&r[8], &r[4], &r[5]);
+ micro_add(&r[1], &r[1], &r[8]);
+ FETCH(&r[7], 1, CHAN_Z);
+ micro_mul(&r[8], &r[6], &r[7]);
+ micro_add(&r[1], &r[1], &r[8]);
+
+ /* r1 = 2 * r1 / r0 */
+ micro_add(&r[1], &r[1], &r[1]);
+ micro_div(&r[1], &r[1], &r[0]);
+
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ micro_mul(&r[2], &r[2], &r[1]);
+ micro_sub(&r[2], &r[2], &r[3]);
+ STORE(&r[2], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ micro_mul(&r[4], &r[4], &r[1]);
+ micro_sub(&r[4], &r[4], &r[5]);
+ STORE(&r[4], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ micro_mul(&r[6], &r[6], &r[1]);
+ micro_sub(&r[6], &r[6], &r[7]);
+ STORE(&r[6], 0, CHAN_Z);
+ }
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
+ }
break;
case TGSI_OPCODE_SEQ:
break;
case TGSI_OPCODE_SFL:
- assert (0);
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
+ }
break;
case TGSI_OPCODE_SGT:
break;
case TGSI_OPCODE_STR:
- assert (0);
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
+ }
break;
case TGSI_OPCODE_TEX:
break;
case TGSI_OPCODE_X2D:
- assert (0);
+ FETCH(&r[0], 1, CHAN_X);
+ FETCH(&r[1], 1, CHAN_Y);
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
+ IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ FETCH(&r[2], 2, CHAN_X);
+ micro_mul(&r[2], &r[2], &r[0]);
+ FETCH(&r[3], 2, CHAN_Y);
+ micro_mul(&r[3], &r[3], &r[1]);
+ micro_add(&r[2], &r[2], &r[3]);
+ FETCH(&r[3], 0, CHAN_X);
+ micro_add(&r[2], &r[2], &r[3]);
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ STORE(&r[2], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&r[2], 0, CHAN_Z);
+ }
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+ IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ FETCH(&r[2], 2, CHAN_Z);
+ micro_mul(&r[2], &r[2], &r[0]);
+ FETCH(&r[3], 2, CHAN_W);
+ micro_mul(&r[3], &r[3], &r[1]);
+ micro_add(&r[2], &r[2], &r[3]);
+ FETCH(&r[3], 0, CHAN_Y);
+ micro_add(&r[2], &r[2], &r[3]);
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&r[2], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ STORE(&r[2], 0, CHAN_W);
+ }
+ }
break;
case TGSI_OPCODE_ARA:
assert (0);
break;
- case TGSI_OPCODE_ARR:
- assert (0);
- break;
-
case TGSI_OPCODE_BRA:
assert (0);
break;
break;
case TGSI_OPCODE_SSG:
- assert (0);
+ /* TGSI_OPCODE_SGN */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ micro_sgn( &r[0], &r[0] );
+ STORE( &r[0], 0, chan_index );
+ }
break;
case TGSI_OPCODE_CMP:
case TGSI_OPCODE_SCS:
if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
FETCH( &r[0], 0, CHAN_X );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
- micro_cos( &r[1], &r[0] );
- STORE( &r[1], 0, CHAN_X );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- micro_sin( &r[1], &r[0] );
- STORE( &r[1], 0, CHAN_Y );
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ micro_cos(&r[1], &r[0]);
+ STORE(&r[1], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ micro_sin(&r[1], &r[0]);
+ STORE(&r[1], 0, CHAN_Y);
+ }
}
if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
break;
case TGSI_OPCODE_NRM:
- assert (0);
+ /* 3-component vector normalize */
+ if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
+ IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+ IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ /* r3 = sqrt(dp3(src0, src0)) */
+ FETCH(&r[0], 0, CHAN_X);
+ micro_mul(&r[3], &r[0], &r[0]);
+ FETCH(&r[1], 0, CHAN_Y);
+ micro_mul(&r[4], &r[1], &r[1]);
+ micro_add(&r[3], &r[3], &r[4]);
+ FETCH(&r[2], 0, CHAN_Z);
+ micro_mul(&r[4], &r[2], &r[2]);
+ micro_add(&r[3], &r[3], &r[4]);
+ micro_sqrt(&r[3], &r[3]);
+
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ micro_div(&r[0], &r[0], &r[3]);
+ STORE(&r[0], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ micro_div(&r[1], &r[1], &r[3]);
+ STORE(&r[1], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ micro_div(&r[2], &r[2], &r[3]);
+ STORE(&r[2], 0, CHAN_Z);
+ }
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
+ }
+ break;
+
+ case TGSI_OPCODE_NRM4:
+ /* 4-component vector normalize */
+ {
+ union tgsi_exec_channel tmp, dot;
+
+ /* tmp = dp4(src0, src0): */
+ FETCH( &r[0], 0, CHAN_X );
+ micro_mul( &tmp, &r[0], &r[0] );
+
+ FETCH( &r[1], 0, CHAN_Y );
+ micro_mul( &dot, &r[1], &r[1] );
+ micro_add( &tmp, &tmp, &dot );
+
+ FETCH( &r[2], 0, CHAN_Z );
+ micro_mul( &dot, &r[2], &r[2] );
+ micro_add( &tmp, &tmp, &dot );
+
+ FETCH( &r[3], 0, CHAN_W );
+ micro_mul( &dot, &r[3], &r[3] );
+ micro_add( &tmp, &tmp, &dot );
+
+ /* tmp = 1 / sqrt(tmp) */
+ micro_sqrt( &tmp, &tmp );
+ micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ /* chan = chan * tmp */
+ micro_mul( &r[chan_index], &tmp, &r[chan_index] );
+ STORE( &r[chan_index], 0, chan_index );
+ }
+ }
break;
case TGSI_OPCODE_DIV:
return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
}
-
-