#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
#define FAST_MATH 1
+/** for tgsi_full_instruction::Flags */
+#define SOA_DEPENDENCY_FLAG 0x1
+
#define TILE_TOP_LEFT 0
#define TILE_TOP_RIGHT 1
#define TILE_BOTTOM_LEFT 2
#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
#define TEMP_R0 TGSI_EXEC_TEMP_R0
+#define TEMP_P0 TGSI_EXEC_TEMP_P0
#define IS_CHANNEL_ENABLED(INST, CHAN)\
((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
#endif
+/**
+ * Check if there's a potential src/dst register data dependency when
+ * using SOA execution.
+ * Example:
+ * MOV T, T.yxwz;
+ * This would expand into:
+ * MOV t0, t1;
+ * MOV t1, t0;
+ * MOV t2, t3;
+ * MOV t3, t2;
+ * The second instruction will have the wrong value for t0 if executed as-is.
+ */
+boolean
+tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
+{
+ uint i, chan;
+
+ uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+ if (writemask == TGSI_WRITEMASK_X ||
+ writemask == TGSI_WRITEMASK_Y ||
+ writemask == TGSI_WRITEMASK_Z ||
+ writemask == TGSI_WRITEMASK_W ||
+ writemask == TGSI_WRITEMASK_NONE) {
+ /* no chance of data dependency */
+ return FALSE;
+ }
+
+ /* loop over src regs */
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if ((inst->FullSrcRegisters[i].SrcRegister.File ==
+ inst->FullDstRegisters[0].DstRegister.File) &&
+ (inst->FullSrcRegisters[i].SrcRegister.Index ==
+ inst->FullDstRegisters[0].DstRegister.Index)) {
+ /* loop over dest channels */
+ uint channelsWritten = 0x0;
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
+ /* check if we're reading a channel that's been written */
+ uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan);
+ if (channelsWritten & (1 << swizzle)) {
+ return TRUE;
+ }
+
+ channelsWritten |= (1 << chan);
+ }
+ }
+ }
+ return FALSE;
+}
+
/**
* Initialize machine state by expanding tokens to full instructions,
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
- assert( size % 4 == 0 );
- assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
+ assert( size <= 4 );
+ assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
for( i = 0; i < size; i++ ) {
- mach->Imms[mach->ImmLimit + i / 4][i % 4] =
- parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ mach->Imms[mach->ImmLimit][i] =
+ parse.FullToken.FullImmediate.u[i].Float;
}
- mach->ImmLimit += size / 4;
+ mach->ImmLimit += 1;
}
break;
* sizeof(struct tgsi_full_instruction));
maxInstructions += 10;
}
+
+ if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
+ uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
+ parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG;
+ /* XXX we only handle SOA dependencies properly for MOV/SWZ
+ * at this time!
+ */
+ if (opcode != TGSI_OPCODE_MOV) {
+ debug_printf("Warning: SOA dependency in instruction"
+ " is not handled:\n");
+ tgsi_dump_instruction(&parse.FullToken.FullInstruction,
+ numInstructions);
+ }
+ }
+
memcpy(instructions + numInstructions,
&parse.FullToken.FullInstruction,
sizeof(instructions[0]));
+
numInstructions++;
break;
}
-void
-tgsi_exec_machine_init(
- struct tgsi_exec_machine *mach )
+struct tgsi_exec_machine *
+tgsi_exec_machine_create( void )
{
+ struct tgsi_exec_machine *mach;
uint i;
- mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
+ mach = align_malloc( sizeof *mach, 16 );
+ if (!mach)
+ goto fail;
+
+ memset(mach, 0, sizeof(*mach));
+
mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
/* Setup constants. */
(void) print_chan;
(void) print_temp;
#endif
+
+ return mach;
+
+fail:
+ align_free(mach);
+ return NULL;
}
void
-tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
+tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
{
- if (mach->Instructions) {
+ if (mach) {
FREE(mach->Instructions);
- mach->Instructions = NULL;
- mach->NumInstructions = 0;
- }
- if (mach->Declarations) {
FREE(mach->Declarations);
- mach->Declarations = NULL;
- mach->NumDeclarations = 0;
}
+
+ align_free(mach);
}
union tgsi_exec_channel *chan )
{
switch( swizzle ) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
switch( file ) {
case TGSI_FILE_CONSTANT:
assert(mach->Consts);
chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
break;
+ case TGSI_FILE_PREDICATE:
+ assert(index->i[0] < TGSI_EXEC_NUM_PREDS);
+ assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
+ assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
+ assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
+ chan->u[0] = mach->Addrs[0].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Addrs[0].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Addrs[0].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Addrs[0].xyzw[swizzle].u[3];
+ break;
+
case TGSI_FILE_OUTPUT:
/* vertex/fragment output vars can be read too */
chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
}
break;
- case TGSI_EXTSWIZZLE_ZERO:
- *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
- break;
-
default:
assert( 0 );
}
*/
}
- swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+ swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
fetch_src_file_channel(
mach,
reg->SrcRegister.File,
union tgsi_exec_channel null;
union tgsi_exec_channel *dst;
uint execmask = mach->ExecMask;
+ int offset = 0; /* indirection offset */
+ int index;
#ifdef DEBUG
check_inf_or_nan(chan);
#endif
+ /* There is an extra source register that indirectly subscripts
+ * a register file. The direct index now becomes an offset
+ * that is being added to the indirect register.
+ *
+ * file[ind[2].x+1],
+ * where:
+ * ind = DstRegisterInd.File
+ * [2] = DstRegisterInd.Index
+ * .x = DstRegisterInd.SwizzleX
+ */
+ if (reg->DstRegister.Indirect) {
+ union tgsi_exec_channel index;
+ union tgsi_exec_channel indir_index;
+ uint swizzle;
+
+ /* which address register (always zero for now) */
+ index.i[0] =
+ index.i[1] =
+ index.i[2] =
+ index.i[3] = reg->DstRegisterInd.Index;
+
+ /* get current value of address register[swizzle] */
+ swizzle = tgsi_util_get_src_register_swizzle( ®->DstRegisterInd, CHAN_X );
+
+ /* fetch values from the address/indirection register */
+ fetch_src_file_channel(
+ mach,
+ reg->DstRegisterInd.File,
+ swizzle,
+ &index,
+ &indir_index );
+
+ /* save indirection offset */
+ offset = (int) indir_index.f[0];
+ }
+
switch (reg->DstRegister.File) {
case TGSI_FILE_NULL:
dst = &null;
break;
case TGSI_FILE_OUTPUT:
- dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
- + reg->DstRegister.Index].xyzw[chan_index];
+ index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
+ + reg->DstRegister.Index;
+ dst = &mach->Outputs[offset + index].xyzw[chan_index];
break;
case TGSI_FILE_TEMPORARY:
- assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
- dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
+ index = reg->DstRegister.Index;
+ assert( index < TGSI_EXEC_NUM_TEMPS );
+ dst = &mach->Temps[offset + index].xyzw[chan_index];
break;
case TGSI_FILE_ADDRESS:
- dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
+ index = reg->DstRegister.Index;
+ dst = &mach->Addrs[index].xyzw[chan_index];
+ break;
+
+ case TGSI_FILE_PREDICATE:
+ index = reg->DstRegister.Index;
+ assert(index < TGSI_EXEC_NUM_PREDS);
+ dst = &mach->Addrs[index].xyzw[chan_index];
break;
default:
return;
}
- if (inst->InstructionExtNv.CondFlowEnable) {
- union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
- uint swizzle;
- uint shift;
- uint mask;
- uint test;
-
- /* Only CC0 supported.
- */
- assert( inst->InstructionExtNv.CondFlowIndex < 1 );
-
- switch (chan_index) {
- case CHAN_X:
- swizzle = inst->InstructionExtNv.CondSwizzleX;
- break;
- case CHAN_Y:
- swizzle = inst->InstructionExtNv.CondSwizzleY;
- break;
- case CHAN_Z:
- swizzle = inst->InstructionExtNv.CondSwizzleZ;
- break;
- case CHAN_W:
- swizzle = inst->InstructionExtNv.CondSwizzleW;
- break;
- default:
- assert( 0 );
- return;
- }
-
- switch (swizzle) {
- case TGSI_SWIZZLE_X:
- shift = TGSI_EXEC_CC_X_SHIFT;
- mask = TGSI_EXEC_CC_X_MASK;
- break;
- case TGSI_SWIZZLE_Y:
- shift = TGSI_EXEC_CC_Y_SHIFT;
- mask = TGSI_EXEC_CC_Y_MASK;
- break;
- case TGSI_SWIZZLE_Z:
- shift = TGSI_EXEC_CC_Z_SHIFT;
- mask = TGSI_EXEC_CC_Z_MASK;
- break;
- case TGSI_SWIZZLE_W:
- shift = TGSI_EXEC_CC_W_SHIFT;
- mask = TGSI_EXEC_CC_W_MASK;
- break;
- default:
- assert( 0 );
- return;
- }
-
- switch (inst->InstructionExtNv.CondMask) {
- case TGSI_CC_GT:
- test = ~(TGSI_EXEC_CC_GT << shift) & mask;
- for (i = 0; i < QUAD_SIZE; i++)
- if (cc->u[i] & test)
- execmask &= ~(1 << i);
- break;
-
- case TGSI_CC_EQ:
- test = ~(TGSI_EXEC_CC_EQ << shift) & mask;
- for (i = 0; i < QUAD_SIZE; i++)
- if (cc->u[i] & test)
- execmask &= ~(1 << i);
- break;
-
- case TGSI_CC_LT:
- test = ~(TGSI_EXEC_CC_LT << shift) & mask;
- for (i = 0; i < QUAD_SIZE; i++)
- if (cc->u[i] & test)
- execmask &= ~(1 << i);
- break;
-
- case TGSI_CC_GE:
- test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask;
- for (i = 0; i < QUAD_SIZE; i++)
- if (cc->u[i] & test)
- execmask &= ~(1 << i);
- break;
-
- case TGSI_CC_LE:
- test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask;
- for (i = 0; i < QUAD_SIZE; i++)
- if (cc->u[i] & test)
- execmask &= ~(1 << i);
- break;
-
- case TGSI_CC_NE:
- test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask;
- for (i = 0; i < QUAD_SIZE; i++)
- if (cc->u[i] & test)
- execmask &= ~(1 << i);
- break;
-
- case TGSI_CC_TR:
- break;
-
- case TGSI_CC_FL:
- for (i = 0; i < QUAD_SIZE; i++)
- execmask &= ~(1 << i);
- break;
-
- default:
- assert( 0 );
- return;
- }
- }
-
switch (inst->Instruction.Saturate) {
case TGSI_SAT_NONE:
for (i = 0; i < QUAD_SIZE; i++)
default:
assert( 0 );
}
-
- if (inst->InstructionExtNv.CondDstUpdate) {
- union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C];
- uint shift;
- uint mask;
-
- /* Only CC0 supported.
- */
- assert( inst->InstructionExtNv.CondDstIndex < 1 );
-
- switch (chan_index) {
- case CHAN_X:
- shift = TGSI_EXEC_CC_X_SHIFT;
- mask = ~TGSI_EXEC_CC_X_MASK;
- break;
- case CHAN_Y:
- shift = TGSI_EXEC_CC_Y_SHIFT;
- mask = ~TGSI_EXEC_CC_Y_MASK;
- break;
- case CHAN_Z:
- shift = TGSI_EXEC_CC_Z_SHIFT;
- mask = ~TGSI_EXEC_CC_Z_MASK;
- break;
- case CHAN_W:
- shift = TGSI_EXEC_CC_W_SHIFT;
- mask = ~TGSI_EXEC_CC_W_MASK;
- break;
- default:
- assert( 0 );
- return;
- }
-
- for (i = 0; i < QUAD_SIZE; i++)
- if (execmask & (1 << i)) {
- cc->u[i] &= mask;
- if (dst->f[i] < 0.0f)
- cc->u[i] |= TGSI_EXEC_CC_LT << shift;
- else if (dst->f[i] > 0.0f)
- cc->u[i] |= TGSI_EXEC_CC_GT << shift;
- else if (dst->f[i] == 0.0f)
- cc->u[i] |= TGSI_EXEC_CC_EQ << shift;
- else
- cc->u[i] |= TGSI_EXEC_CC_UN << shift;
- }
- }
}
#define FETCH(VAL,INDEX,CHAN)\
uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
union tgsi_exec_channel r[1];
- /* This mask stores component bits that were already tested. Note that
- * we test if the value is less than zero, so 1.0 and 0.0 need not to be
- * tested. */
- uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+ /* This mask stores component bits that were already tested. */
+ uniquemask = 0;
for (chan_index = 0; chan_index < 4; chan_index++)
{
uint i;
/* unswizzle channel */
- swizzle = tgsi_util_get_full_src_register_extswizzle (
+ swizzle = tgsi_util_get_full_src_register_swizzle (
&inst->FullSrcRegisters[0],
chan_index);
{
uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
- if (inst->InstructionExtNv.CondFlowEnable) {
- uint swizzle[4];
- uint chan_index;
-
- kilmask = 0x0;
-
- swizzle[0] = inst->InstructionExtNv.CondSwizzleX;
- swizzle[1] = inst->InstructionExtNv.CondSwizzleY;
- swizzle[2] = inst->InstructionExtNv.CondSwizzleZ;
- swizzle[3] = inst->InstructionExtNv.CondSwizzleW;
-
- for (chan_index = 0; chan_index < 4; chan_index++)
- {
- uint i;
-
- for (i = 0; i < 4; i++) {
- /* TODO: evaluate the condition code */
- if (0)
- kilmask |= 1 << i;
- }
- }
- }
- else {
- /* "unconditional" kil */
- kilmask = mach->ExecMask;
- }
+ /* "unconditional" kil */
+ kilmask = mach->ExecMask;
mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
}
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
+ case TGSI_OPCODE_FLR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
micro_flr( &r[0], &r[0] );
break;
case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- STORE( &r[0], 0, chan_index );
+ if (inst->Flags & SOA_DEPENDENCY_FLAG) {
+ /* Do all fetches into temp regs, then do all stores to avoid
+ * intermediate/accidental clobbering. This could be done all the
+ * time for MOV but for other instructions we'll need more temps...
+ */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[chan_index], 0, chan_index );
+ }
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[chan_index], 0, chan_index );
+ }
+ }
+ else {
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
+ }
}
break;
}
break;
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
+ case TGSI_OPCODE_LRP:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
}
break;
- case TGSI_OPCODE_CND0:
- FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- FETCH(&r[2], 2, chan_index);
- micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
+ case TGSI_OPCODE_DP2A:
FETCH( &r[0], 0, CHAN_X );
FETCH( &r[1], 1, CHAN_X );
micro_mul( &r[0], &r[0], &r[1] );
}
break;
- case TGSI_OPCODE_INDEX:
- /* XXX: considered for removal */
- assert (0);
- break;
-
- case TGSI_OPCODE_NEGATE:
- /* XXX: considered for removal */
- assert (0);
- break;
-
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
+ case TGSI_OPCODE_FRC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
micro_frc( &r[0], &r[0] );
}
break;
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
+ case TGSI_OPCODE_EX2:
FETCH(&r[0], 0, CHAN_X);
#if FAST_MATH
}
break;
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
+ case TGSI_OPCODE_LG2:
FETCH( &r[0], 0, CHAN_X );
micro_lg2( &r[0], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
}
break;
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
+ case TGSI_OPCODE_POW:
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 1, CHAN_X);
}
break;
- case TGSI_OPCODE_CROSSPRODUCT:
- /* TGSI_OPCODE_XPD */
+ case TGSI_OPCODE_XPD:
FETCH(&r[0], 0, CHAN_Y);
FETCH(&r[1], 1, CHAN_Z);
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- /* XXX: considered for removal */
- assert (0);
- break;
-
case TGSI_OPCODE_ABS:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
if (mach->ExecMask) {
/* do the call */
- /* push the Cond, Loop, Cont stacks */
+ /* First, record the depths of the execution stacks.
+ * This is important for deeply nested/looped return statements.
+ * We have to unwind the stacks by the correct amount. For a
+ * real code generator, we could determine the number of entries
+ * to pop off each stack with simple static analysis and avoid
+ * implementing this data structure at run time.
+ */
+ mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
+ mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
+ mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
+ /* note that PC was already incremented above */
+ mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
+
+ mach->CallStackTop++;
+
+ /* Second, push the Cond, Loop, Cont, Func stacks */
assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
mach->CondStack[mach->CondStackTop++] = mach->CondMask;
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
-
assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
- /* note that PC was already incremented above */
- mach->CallStack[mach->CallStackTop++] = *pc;
+ /* Finally, jump to the subroutine */
*pc = inst->InstructionExtLabel.Label;
}
break;
*pc = -1;
return;
}
- *pc = mach->CallStack[--mach->CallStackTop];
- /* pop the Cond, Loop, Cont stacks */
- assert(mach->CondStackTop > 0);
- mach->CondMask = mach->CondStack[--mach->CondStackTop];
- assert(mach->LoopStackTop > 0);
- mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ assert(mach->CallStackTop > 0);
+ mach->CallStackTop--;
+
+ mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
+ mach->CondMask = mach->CondStack[mach->CondStackTop];
+
+ mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
+ mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
+
+ mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
+ mach->ContMask = mach->ContStack[mach->ContStackTop];
+
assert(mach->FuncStackTop > 0);
mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
+ *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
+
UPDATE_EXEC_MASK(mach);
}
break;
mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
break;
- case TGSI_OPCODE_LOOP:
+ case TGSI_OPCODE_BGNFOR:
+ assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ for (chan_index = 0; chan_index < 3; chan_index++) {
+ FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
+ }
+ STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
+ ++mach->LoopCounterStackTop;
/* fall-through (for now) */
- case TGSI_OPCODE_BGNLOOP2:
+ case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+ assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
+ break;
+
+ case TGSI_OPCODE_ENDFOR:
+ assert(mach->LoopCounterStackTop > 0);
+ micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
+ &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+ /* update LoopMask */
+ if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) {
+ mach->LoopMask &= ~0x1;
+ }
+ if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) {
+ mach->LoopMask &= ~0x2;
+ }
+ if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) {
+ mach->LoopMask &= ~0x4;
+ }
+ if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) {
+ mach->LoopMask &= ~0x8;
+ }
+ micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
+ assert(mach->LoopLabelStackTop > 0);
+ inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
+ STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
+ /* Restore ContMask, but don't pop */
+ assert(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
+ UPDATE_EXEC_MASK(mach);
+ if (mach->ExecMask) {
+ /* repeat loop: jump to instruction just past BGNLOOP */
+ assert(mach->LoopLabelStackTop > 0);
+ *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
+ }
+ else {
+ /* exit loop: pop LoopMask */
+ assert(mach->LoopStackTop > 0);
+ mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+ /* pop ContMask */
+ assert(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ assert(mach->LoopLabelStackTop > 0);
+ --mach->LoopLabelStackTop;
+ assert(mach->LoopCounterStackTop > 0);
+ --mach->LoopCounterStackTop;
+ }
+ UPDATE_EXEC_MASK(mach);
break;
-
+
case TGSI_OPCODE_ENDLOOP:
- /* fall-through (for now at least) */
- case TGSI_OPCODE_ENDLOOP2:
/* Restore ContMask, but don't pop */
assert(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
UPDATE_EXEC_MASK(mach);
if (mach->ExecMask) {
/* repeat loop: jump to instruction just past BGNLOOP */
- *pc = inst->InstructionExtLabel.Label + 1;
+ assert(mach->LoopLabelStackTop > 0);
+ *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
}
else {
/* exit loop: pop LoopMask */
/* pop ContMask */
assert(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ assert(mach->LoopLabelStackTop > 0);
+ --mach->LoopLabelStackTop;
}
UPDATE_EXEC_MASK(mach);
break;
/* no-op */
break;
- case TGSI_OPCODE_NOISE1:
- assert( 0 );
- break;
-
- case TGSI_OPCODE_NOISE2:
- assert( 0 );
- break;
-
- case TGSI_OPCODE_NOISE3:
- assert( 0 );
- break;
-
- case TGSI_OPCODE_NOISE4:
- assert( 0 );
- break;
-
case TGSI_OPCODE_NOP:
break;
mach->FuncMask = 0xf;
mach->ExecMask = 0xf;
- mach->CondStackTop = 0; /* temporarily subvert this assertion */
assert(mach->CondStackTop == 0);
assert(mach->LoopStackTop == 0);
assert(mach->ContStackTop == 0);