X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Ftgsi%2Ftgsi_exec.c;h=712e8aca794c4252804f8bb76f799bde68b3e60f;hb=636d01bd61cac83e13c3c64874e7e34e828ca93a;hp=11045e4ba2f7de98a74649678e5b084e4a1cddbf;hpb=64644ec3b21884d4a974fa29087fa98c4ed9e112;p=mesa.git diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 11045e4ba2f..712e8aca794 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -428,6 +428,24 @@ micro_sne(union tgsi_exec_channel *dst, dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; } +static void +micro_sfl(union tgsi_exec_channel *dst) +{ + dst->f[0] = 0.0f; + dst->f[1] = 0.0f; + dst->f[2] = 0.0f; + dst->f[3] = 0.0f; +} + +static void +micro_str(union tgsi_exec_channel *dst) +{ + dst->f[0] = 1.0f; + dst->f[1] = 1.0f; + dst->f[2] = 1.0f; + dst->f[3] = 1.0f; +} + static void micro_trunc(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -453,50 +471,12 @@ enum tgsi_exec_datatype { /* * Shorthand locations of various utility registers (_I = Index, _C = Channel) */ -#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I -#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C -#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I -#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C -#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I -#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C -#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I -#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C -#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I -#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C -#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I -#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C -#define TEMP_128_I TGSI_EXEC_TEMP_128_I -#define TEMP_128_C TGSI_EXEC_TEMP_128_C -#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I -#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C -#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I -#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C -#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I -#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C -#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I -#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C -#define TEMP_R0 TGSI_EXEC_TEMP_R0 -#define TEMP_P0 TGSI_EXEC_TEMP_P0 - -#define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) - -#define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) - -#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ - for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ - if (IS_CHANNEL_ENABLED( INST, CHAN )) - -#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ - for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ - if (IS_CHANNEL_ENABLED2( INST, CHAN )) /** The execution mask depends on the conditional mask and the loop mask */ @@ -511,6 +491,14 @@ static const union tgsi_exec_channel OneVec = { {1.0f, 1.0f, 1.0f, 1.0f} }; +static const union tgsi_exec_channel P128Vec = { + {128.0f, 128.0f, 128.0f, 128.0f} +}; + +static const union tgsi_exec_channel M128Vec = { + {-128.0f, -128.0f, -128.0f, -128.0f} +}; + /** * Assert that none of the float values in 'chan' are infinite or NaN. @@ -557,6 +545,21 @@ print_temp(const struct tgsi_exec_machine *mach, uint index) #endif +void +tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, + unsigned num_bufs, + const void **bufs, + const unsigned *buf_sizes) +{ + unsigned i; + + for (i = 0; i < num_bufs; i++) { + mach->Consts[i] = bufs[i]; + mach->ConstsSize[i] = buf_sizes[i]; + } +} + + /** * Check if there's a potential src/dst register data dependency when * using SOA execution. @@ -588,18 +591,22 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { if ((inst->Src[i].Register.File == inst->Dst[0].Register.File) && - (inst->Src[i].Register.Index == - inst->Dst[0].Register.Index)) { + ((inst->Src[i].Register.Index == + inst->Dst[0].Register.Index) || + inst->Src[i].Register.Indirect || + inst->Dst[0].Register.Indirect)) { /* loop over dest channels */ uint channelsWritten = 0x0; - FOR_EACH_ENABLED_CHANNEL(*inst, chan) { - /* check if we're reading a channel that's been written */ - uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); - if (channelsWritten & (1 << swizzle)) { - return TRUE; - } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + /* check if we're reading a channel that's been written */ + uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); + if (channelsWritten & (1 << swizzle)) { + return TRUE; + } - channelsWritten |= (1 << chan); + channelsWritten |= (1 << chan); + } } } } @@ -621,12 +628,10 @@ tgsi_exec_machine_bind_shader( { uint k; struct tgsi_parse_context parse; - struct tgsi_exec_labels *labels = &mach->Labels; struct tgsi_full_instruction *instructions; struct tgsi_full_declaration *declarations; uint maxInstructions = 10, numInstructions = 0; uint maxDeclarations = 10, numDeclarations = 0; - uint instno = 0; #if 0 tgsi_dump(tokens, 0); @@ -634,9 +639,30 @@ tgsi_exec_machine_bind_shader( util_init_math(); + if (numSamplers) { + assert(samplers); + } + mach->Tokens = tokens; mach->Samplers = samplers; + if (!tokens) { + /* unbind and free all */ + if (mach->Declarations) { + FREE( mach->Declarations ); + } + mach->Declarations = NULL; + mach->NumDeclarations = 0; + + if (mach->Instructions) { + FREE( mach->Instructions ); + } + mach->Instructions = NULL; + mach->NumInstructions = 0; + + return; + } + k = tgsi_parse_init (&parse, mach->Tokens); if (k != TGSI_PARSE_OK) { debug_printf( "Problem parsing!\n" ); @@ -645,7 +671,31 @@ tgsi_exec_machine_bind_shader( mach->Processor = parse.FullHeader.Processor.Processor; mach->ImmLimit = 0; - labels->count = 0; + + if (mach->Processor == TGSI_PROCESSOR_GEOMETRY && + !mach->UsedGeometryShader) { + struct tgsi_exec_vector *inputs = + align_malloc(sizeof(struct tgsi_exec_vector) * + TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS, + 16); + struct tgsi_exec_vector *outputs = + align_malloc(sizeof(struct tgsi_exec_vector) * + TGSI_MAX_TOTAL_VERTICES, 16); + + if (!inputs) + return; + if (!outputs) { + align_free(inputs); + return; + } + + align_free(mach->Inputs); + align_free(mach->Outputs); + + mach->Inputs = inputs; + mach->Outputs = outputs; + mach->UsedGeometryShader = TRUE; + } declarations = (struct tgsi_full_declaration *) MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); @@ -663,7 +713,6 @@ tgsi_exec_machine_bind_shader( } while( !tgsi_parse_end_of_tokens( &parse ) ) { - uint pointer = parse.Position; uint i; tgsi_parse_token( &parse ); @@ -686,6 +735,19 @@ tgsi_exec_machine_bind_shader( ++mach->NumOutputs; } } + if (parse.FullToken.FullDeclaration.Declaration.File == + TGSI_FILE_IMMEDIATE_ARRAY) { + unsigned reg; + struct tgsi_full_declaration *decl = + &parse.FullToken.FullDeclaration; + debug_assert(decl->Range.Last < TGSI_EXEC_NUM_IMMEDIATES); + for (reg = decl->Range.First; reg <= decl->Range.Last; ++reg) { + for( i = 0; i < 4; i++ ) { + int idx = reg * 4 + i; + mach->ImmArray[reg][i] = decl->ImmediateData.u[idx].Float; + } + } + } memcpy(declarations + numDeclarations, &parse.FullToken.FullDeclaration, sizeof(declarations[0])); @@ -707,11 +769,6 @@ tgsi_exec_machine_bind_shader( break; case TGSI_TOKEN_TYPE_INSTRUCTION: - assert( labels->count < MAX_LABELS ); - - labels->labels[labels->count][0] = instno; - labels->labels[labels->count][1] = pointer; - labels->count++; /* save expanded instruction */ if (numInstructions == maxInstructions) { @@ -769,18 +826,23 @@ tgsi_exec_machine_create( void ) mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; - /* Setup constants. */ + mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); + mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); + if (!mach->Inputs || !mach->Outputs) + goto fail; + + /* Setup constants needed by the SSE2 executor. */ for( i = 0; i < 4; i++ ) { - mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; - mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; - mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; - mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; - mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; - mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; - mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; - mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; - mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; - mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; + mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000; + mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF; + mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000; + mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */ + mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f; + mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */ + mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f; + mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f; + mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f; + mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f; } #ifdef DEBUG @@ -792,7 +854,11 @@ tgsi_exec_machine_create( void ) return mach; fail: - align_free(mach); + if (mach) { + align_free(mach->Inputs); + align_free(mach->Outputs); + align_free(mach); + } return NULL; } @@ -801,11 +867,16 @@ void tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) { if (mach) { - FREE(mach->Instructions); - FREE(mach->Declarations); - } + if (mach->Instructions) + FREE(mach->Instructions); + if (mach->Declarations) + FREE(mach->Declarations); - align_free(mach); + align_free(mach->Inputs); + align_free(mach->Outputs); + + align_free(mach); + } } static void @@ -840,27 +911,35 @@ micro_div( } static void -micro_float_clamp(union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) +micro_rcc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) { uint i; for (i = 0; i < 4; i++) { - if (src->f[i] > 0.0f) { - if (src->f[i] > 1.884467e+019f) + float recip = 1.0f / src->f[i]; + + if (recip > 0.0f) { + if (recip > 1.884467e+019f) { dst->f[i] = 1.884467e+019f; - else if (src->f[i] < 5.42101e-020f) + } + else if (recip < 5.42101e-020f) { dst->f[i] = 5.42101e-020f; - else - dst->f[i] = src->f[i]; + } + else { + dst->f[i] = recip; + } } else { - if (src->f[i] < -1.884467e+019f) + if (recip < -1.884467e+019f) { dst->f[i] = -1.884467e+019f; - else if (src->f[i] > -5.42101e-020f) + } + else if (recip > -5.42101e-020f) { dst->f[i] = -5.42101e-020f; - else - dst->f[i] = src->f[i]; + } + else { + dst->f[i] = recip; + } } } } @@ -912,60 +991,6 @@ micro_mul(union tgsi_exec_channel *dst, dst->f[3] = src0->f[3] * src1->f[3]; } -#if 0 -static void -micro_imul64( - union tgsi_exec_channel *dst0, - union tgsi_exec_channel *dst1, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst1->i[0] = src0->i[0] * src1->i[0]; - dst1->i[1] = src0->i[1] * src1->i[1]; - dst1->i[2] = src0->i[2] * src1->i[2]; - dst1->i[3] = src0->i[3] * src1->i[3]; - dst0->i[0] = 0; - dst0->i[1] = 0; - dst0->i[2] = 0; - dst0->i[3] = 0; -} -#endif - -#if 0 -static void -micro_umul64( - union tgsi_exec_channel *dst0, - union tgsi_exec_channel *dst1, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst1->u[0] = src0->u[0] * src1->u[0]; - dst1->u[1] = src0->u[1] * src1->u[1]; - dst1->u[2] = src0->u[2] * src1->u[2]; - dst1->u[3] = src0->u[3] * src1->u[3]; - dst0->u[0] = 0; - dst0->u[1] = 0; - dst0->u[2] = 0; - dst0->u[3] = 0; -} -#endif - - -#if 0 -static void -micro_movc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2 ) -{ - dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; - dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; - dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; - dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; -} -#endif - static void micro_neg( union tgsi_exec_channel *dst, @@ -1017,6 +1042,8 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, { uint i; + assert(swizzle < 4); + switch (file) { case TGSI_FILE_CONSTANT: for (i = 0; i < QUAD_SIZE; i++) { @@ -1026,18 +1053,48 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, if (index->i[i] < 0) { chan->u[i] = 0; } else { - const uint *p = (const uint *)mach->Consts[index2D->i[i]]; - - chan->u[i] = p[index->i[i] * 4 + swizzle]; + /* NOTE: copying the const value as a uint instead of float */ + const uint constbuf = index2D->i[i]; + const uint *buf = (const uint *)mach->Consts[constbuf]; + const int pos = index->i[i] * 4 + swizzle; + /* const buffer bounds check */ + if (pos < 0 || pos >= mach->ConstsSize[constbuf]) { + if (0) { + /* Debug: print warning */ + static int count = 0; + if (count++ < 100) + debug_printf("TGSI Exec: const buffer index %d" + " out of bounds\n", pos); + } + chan->u[i] = 0; + } + else + chan->u[i] = buf[pos]; } } break; case TGSI_FILE_INPUT: + for (i = 0; i < QUAD_SIZE; i++) { + /* + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", + index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], + index2D->i[i], index->i[i]); + }*/ + int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; + assert(pos >= 0); + assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); + chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; + } + break; + case TGSI_FILE_SYSTEM_VALUE: + /* XXX no swizzling at this point. Will be needed if we put + * gl_FragCoord, for example, in a sys value register. + */ for (i = 0; i < QUAD_SIZE; i++) { - /* XXX: 2D indexing */ - chan->u[i] = mach->Inputs[index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]].xyzw[swizzle].u[i]; + chan->f[i] = mach->SystemValue[index->i[i]][0]; } break; @@ -1050,6 +1107,16 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, } break; + case TGSI_FILE_TEMPORARY_ARRAY: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); + assert(index2D->i[i] < TGSI_EXEC_NUM_TEMP_ARRAYS); + + chan->u[i] = + mach->TempArray[index2D->i[i]][index->i[i]].xyzw[swizzle].u[i]; + } + break; + case TGSI_FILE_IMMEDIATE: for (i = 0; i < QUAD_SIZE; i++) { assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); @@ -1059,6 +1126,14 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, } break; + case TGSI_FILE_IMMEDIATE_ARRAY: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index2D->i[i] == 0); + + chan->f[i] = mach->ImmArray[index->i[i]][swizzle]; + } + break; + case TGSI_FILE_ADDRESS: for (i = 0; i < QUAD_SIZE; i++) { assert(index->i[i] >= 0); @@ -1139,7 +1214,7 @@ fetch_source(const struct tgsi_exec_machine *mach, index2.i[1] = index2.i[2] = index2.i[3] = reg->Indirect.Index; - + assert(reg->Indirect.File == TGSI_FILE_ADDRESS); /* get current value of address register[swizzle] */ swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); fetch_src_file_channel(mach, @@ -1270,6 +1345,7 @@ store_dest(struct tgsi_exec_machine *mach, uint i; union tgsi_exec_channel null; union tgsi_exec_channel *dst; + union tgsi_exec_channel index2D; uint execmask = mach->ExecMask; int offset = 0; /* indirection offset */ int index; @@ -1315,6 +1391,77 @@ store_dest(struct tgsi_exec_machine *mach, offset = indir_index.i[0]; } + /* There is an extra source register that is a second + * subscript to a register file. Effectively it means that + * the register file is actually a 2D array of registers. + * + * file[3][1], + * where: + * [3] = Dimension.Index + */ + if (reg->Register.Dimension) { + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = reg->Dimension.Index; + + /* Again, the second subscript index can be addressed indirectly + * identically to the first one. + * Nothing stops us from indirectly addressing the indirect register, + * but there is no need for that, so we won't exercise it. + * + * file[ind[4].y+3][1], + * where: + * ind = DimIndirect.File + * [4] = DimIndirect.Index + * .y = DimIndirect.SwizzleX + */ + if (reg->Dimension.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + const uint execmask = mach->ExecMask; + unsigned swizzle; + uint i; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->DimIndirect.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); + fetch_src_file_channel(mach, + reg->DimIndirect.File, + swizzle, + &index2, + &ZeroVec, + &indir_index); + + index2D.i[0] += indir_index.i[0]; + index2D.i[1] += indir_index.i[1]; + index2D.i[2] += indir_index.i[2]; + index2D.i[3] += indir_index.i[3]; + + /* for disabled execution channels, zero-out the index to + * avoid using a potential garbage value. + */ + for (i = 0; i < QUAD_SIZE; i++) { + if ((execmask & (1 << i)) == 0) { + index2D.i[i] = 0; + } + } + } + + /* If by any chance there was a need for a 3D array of register + * files, we would have to check whether Dimension is followed + * by a dimension register and continue the saga. + */ + } else { + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = 0; + } + switch (reg->Register.File) { case TGSI_FILE_NULL: dst = &null; @@ -1341,16 +1488,19 @@ store_dest(struct tgsi_exec_machine *mach, dst = &mach->Temps[offset + index].xyzw[chan_index]; break; - case TGSI_FILE_ADDRESS: + case TGSI_FILE_TEMPORARY_ARRAY: index = reg->Register.Index; - dst = &mach->Addrs[index].xyzw[chan_index]; + assert( index < TGSI_EXEC_NUM_TEMPS ); + assert( index2D.i[0] < TGSI_EXEC_NUM_TEMP_ARRAYS ); + /* XXX we use index2D.i[0] here but somehow we might + * end up with someone trying to store indirectly in + * different buffers */ + dst = &mach->TempArray[index2D.i[0]][offset + index].xyzw[chan_index]; break; - case TGSI_FILE_LOOP: - assert(reg->Register.Index == 0); - assert(mach->LoopCounterStackTop > 0); - assert(chan_index == CHAN_X); - dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; + case TGSI_FILE_ADDRESS: + index = reg->Register.Index; + dst = &mach->Addrs[index].xyzw[chan_index]; break; case TGSI_FILE_PREDICATE: @@ -1444,9 +1594,6 @@ store_dest(struct tgsi_exec_machine *mach, #define FETCH(VAL,INDEX,CHAN)\ fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) -#define STORE(VAL,INDEX,CHAN)\ - store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT) - /** * Execute ARB-style KIL which is predicated by a src register. @@ -1533,6 +1680,19 @@ emit_primitive(struct tgsi_exec_machine *mach) } } +static void +conditional_emit_primitive(struct tgsi_exec_machine *mach) +{ + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + int emitted_verts = + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; + if (emitted_verts) { + emit_primitive(mach); + } + } +} + + /* * Fetch four texture samples using STR texture coordinates. */ @@ -1577,7 +1737,7 @@ exec_tex(struct tgsi_exec_machine *mach, union tgsi_exec_channel r[4]; const union tgsi_exec_channel *lod = &ZeroVec; enum tgsi_sampler_control control; - uint chan_index; + uint chan; if (modifier != TEX_MODIFIER_NONE) { FETCH(&r[3], 0, CHAN_W); @@ -1627,6 +1787,36 @@ exec_tex(struct tgsi_exec_machine *mach, &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; + case TGSI_TEXTURE_1D_ARRAY: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + } + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_2D_ARRAY: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + } + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: FETCH(&r[0], 0, CHAN_X); @@ -1649,8 +1839,21 @@ exec_tex(struct tgsi_exec_machine *mach, assert(0); } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&r[chan_index], 0, chan_index); +#if 0 + debug_printf("fetch r: %g %g %g %g\n", + r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]); + debug_printf("fetch g: %g %g %g %g\n", + r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]); + debug_printf("fetch b: %g %g %g %g\n", + r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]); + debug_printf("fetch a: %g %g %g %g\n", + r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]); +#endif + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } } } @@ -1660,7 +1863,7 @@ exec_txd(struct tgsi_exec_machine *mach, { const uint unit = inst->Src[3].Register.Index; union tgsi_exec_channel r[4]; - uint chan_index; + uint chan; /* * XXX: This is fake TXD -- the derivatives are not taken into account, yet. @@ -1710,8 +1913,168 @@ exec_txd(struct tgsi_exec_machine *mach, assert(0); } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&r[chan_index], 0, chan_index); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + + + +static void +exec_sample(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + uint modifier) +{ + const uint resource_unit = inst->Src[1].Register.Index; + const uint sampler_unit = inst->Src[2].Register.Index; + union tgsi_exec_channel r[4]; + const union tgsi_exec_channel *lod = &ZeroVec; + enum tgsi_sampler_control control; + uint chan; + + if (modifier != TEX_MODIFIER_NONE) { + if (modifier == TEX_MODIFIER_LOD_BIAS) + FETCH(&r[3], 3, CHAN_X); + else /*TEX_MODIFIER_LOD*/ + FETCH(&r[3], 0, CHAN_W); + + if (modifier != TEX_MODIFIER_PROJECTED) { + lod = &r[3]; + } + } + + if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { + control = tgsi_sampler_lod_explicit; + } else { + control = tgsi_sampler_lod_bias; + } + + switch (mach->Resources[resource_unit].Resource) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + FETCH(&r[0], 0, CHAN_X); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + } + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); + } + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); + } + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &r[1], &r[2], lod, + control, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert(0); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void +exec_sample_d(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + const uint resource_unit = inst->Src[1].Register.Index; + const uint sampler_unit = inst->Src[2].Register.Index; + union tgsi_exec_channel r[4]; + uint chan; + /* + * XXX: This is fake SAMPLE_D -- the derivatives are not taken into account, yet. + */ + + switch (mach->Resources[resource_unit].Resource) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + + FETCH(&r[0], 0, CHAN_X); + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &r[1], &r[2], &ZeroVec, /* inputs */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[sampler_unit], + &r[0], &r[1], &r[2], &ZeroVec, + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert(0); + } + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } } } @@ -1787,9 +2150,13 @@ static void exec_declaration(struct tgsi_exec_machine *mach, const struct tgsi_full_declaration *decl) { + if (decl->Declaration.File == TGSI_FILE_RESOURCE) { + mach->Resources[decl->Range.First] = decl->Resource; + return; + } + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { - if (decl->Declaration.File == TGSI_FILE_INPUT || - decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { + if (decl->Declaration.File == TGSI_FILE_INPUT) { uint first, last, mask; first = decl->Range.First; @@ -1802,6 +2169,7 @@ exec_declaration(struct tgsi_exec_machine *mach, * ureg code to emit the right UsageMask value (WRITEMASK_X). * Then, we could remove the tgsi_exec_machine::Face field. */ + /* XXX make FACE a system value */ if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { uint i; @@ -1843,6 +2211,31 @@ exec_declaration(struct tgsi_exec_machine *mach, } } } + + if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { + mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First; + } +} + + +typedef void (* micro_op)(union tgsi_exec_channel *dst); + +static void +exec_vector(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_op op, + enum tgsi_exec_datatype dst_datatype) +{ + unsigned int chan; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel dst; + + op(&dst); + store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); + } + } } typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, @@ -1897,6 +2290,27 @@ typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1); +static void +exec_scalar_binary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_binary_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + union tgsi_exec_channel src[2]; + union tgsi_exec_channel dst; + + fetch_source(mach, &src[0], &inst->Src[0], CHAN_X, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], CHAN_Y, src_datatype); + op(&dst, &src[0], &src[1]); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); + } + } +} + static void exec_vector_binary(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, @@ -2143,6 +2557,289 @@ exec_nrm3(struct tgsi_exec_machine *mach, } } +static void +exec_scs(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { + union tgsi_exec_channel arg; + union tgsi_exec_channel result; + + fetch_source(mach, &arg, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + micro_cos(&result, &arg); + store_dest(mach, &result, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + micro_sin(&result, &arg); + store_dest(mach, &result, &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + store_dest(mach, &ZeroVec, &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_x2d(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[4]; + union tgsi_exec_channel d[2]; + + fetch_source(mach, &r[0], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) { + fetch_source(mach, &r[2], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[2], &r[2], &r[0]); + fetch_source(mach, &r[3], &inst->Src[2], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[3], &r[3], &r[1]); + micro_add(&r[2], &r[2], &r[3]); + fetch_source(mach, &r[3], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_add(&d[0], &r[2], &r[3]); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) { + fetch_source(mach, &r[2], &inst->Src[2], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[2], &r[2], &r[0]); + fetch_source(mach, &r[3], &inst->Src[2], CHAN_W, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[3], &r[3], &r[1]); + micro_add(&r[2], &r[2], &r[3]); + fetch_source(mach, &r[3], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_add(&d[1], &r[2], &r[3]); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_rfl(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[9]; + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { + /* r0 = dp3(src0, src0) */ + fetch_source(mach, &r[2], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[0], &r[2], &r[2]); + fetch_source(mach, &r[4], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[8], &r[4], &r[4]); + micro_add(&r[0], &r[0], &r[8]); + fetch_source(mach, &r[6], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[8], &r[6], &r[6]); + micro_add(&r[0], &r[0], &r[8]); + + /* r1 = dp3(src0, src1) */ + fetch_source(mach, &r[3], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[1], &r[2], &r[3]); + fetch_source(mach, &r[5], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[8], &r[4], &r[5]); + micro_add(&r[1], &r[1], &r[8]); + fetch_source(mach, &r[7], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + micro_mul(&r[8], &r[6], &r[7]); + micro_add(&r[1], &r[1], &r[8]); + + /* r1 = 2 * r1 / r0 */ + micro_add(&r[1], &r[1], &r[1]); + micro_div(&r[1], &r[1], &r[0]); + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + micro_mul(&r[2], &r[2], &r[1]); + micro_sub(&r[2], &r[2], &r[3]); + store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + micro_mul(&r[4], &r[4], &r[1]); + micro_sub(&r[4], &r[4], &r[5]); + store_dest(mach, &r[4], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + micro_mul(&r[6], &r[6], &r[1]); + micro_sub(&r[6], &r[6], &r[7]); + store_dest(mach, &r[6], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_xpd(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[6]; + union tgsi_exec_channel d[3]; + + fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &r[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + + micro_mul(&r[2], &r[0], &r[1]); + + fetch_source(mach, &r[3], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &r[4], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + + micro_mul(&r[5], &r[3], &r[4] ); + micro_sub(&d[CHAN_X], &r[2], &r[5]); + + fetch_source(mach, &r[2], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + + micro_mul(&r[3], &r[3], &r[2]); + + fetch_source(mach, &r[5], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + + micro_mul(&r[1], &r[1], &r[5]); + micro_sub(&d[CHAN_Y], &r[3], &r[1]); + + micro_mul(&r[5], &r[5], &r[4]); + micro_mul(&r[0], &r[0], &r[2]); + micro_sub(&d[CHAN_Z], &r[5], &r[0]); + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &d[CHAN_X], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_dst(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[2]; + union tgsi_exec_channel d[4]; + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mul(&d[CHAN_Y], &r[0], &r[1]); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + fetch_source(mach, &d[CHAN_Z], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + fetch_source(mach, &d[CHAN_W], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT); + } + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &d[CHAN_W], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_log(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[3]; + + fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */ + micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */ + micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */ + micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */ + store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + store_dest(mach, &r[1], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_exp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[3]; + + fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */ + store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */ + store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */ + store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + +static void +exec_lit(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + union tgsi_exec_channel r[3]; + union tgsi_exec_channel d[3]; + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) { + fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { + micro_max(&d[CHAN_Y], &r[0], &ZeroVec); + store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); + } + + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { + fetch_source(mach, &r[1], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_max(&r[1], &r[1], &ZeroVec); + + fetch_source(mach, &r[2], &inst->Src[0], CHAN_W, TGSI_EXEC_DATA_FLOAT); + micro_min(&r[2], &r[2], &P128Vec); + micro_max(&r[2], &r[2], &M128Vec); + micro_pow(&r[1], &r[1], &r[2]); + micro_lt(&d[CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec); + store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); + } + } + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} + static void exec_break(struct tgsi_exec_machine *mach) { @@ -2526,9 +3223,7 @@ exec_instruction( const struct tgsi_full_instruction *inst, int *pc ) { - uint chan_index; union tgsi_exec_channel r[10]; - union tgsi_exec_channel d[8]; (*pc)++; @@ -2542,36 +3237,7 @@ exec_instruction( break; case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_X ); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[1], 0, CHAN_Y ); - micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - - FETCH( &r[2], 0, CHAN_W ); - micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); - micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); - micro_pow( &r[1], &r[1], &r[2] ); - micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(&d[CHAN_Y], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - STORE(&d[CHAN_Z], 0, CHAN_Z); - } - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } + exec_lit(mach, inst); break; case TGSI_OPCODE_RCP: @@ -2583,44 +3249,11 @@ exec_instruction( break; case TGSI_OPCODE_EXP: - FETCH( &r[0], 0, CHAN_X ); - micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ - STORE( &r[2], 0, CHAN_X ); /* store r2 */ - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ - STORE( &r[2], 0, CHAN_Y ); /* store r2 */ - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ - STORE( &r[2], 0, CHAN_Z ); /* store r2 */ - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } + exec_exp(mach, inst); break; case TGSI_OPCODE_LOG: - FETCH( &r[0], 0, CHAN_X ); - micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ - micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ - micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[0], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ - micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ - STORE( &r[0], 0, CHAN_Y ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[1], 0, CHAN_Z ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } + exec_log(mach, inst); break; case TGSI_OPCODE_MUL: @@ -2640,30 +3273,7 @@ exec_instruction( break; case TGSI_OPCODE_DST: - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - FETCH( &r[0], 0, CHAN_Y ); - FETCH( &r[1], 1, CHAN_Y); - micro_mul(&d[CHAN_Y], &r[0], &r[1]); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH(&d[CHAN_Z], 0, CHAN_Z); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH(&d[CHAN_W], 1, CHAN_W); - } - - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(&d[CHAN_Y], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - STORE(&d[CHAN_Z], 0, CHAN_Z); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - STORE(&d[CHAN_W], 0, CHAN_W); - } + exec_dst(mach, inst); break; case TGSI_OPCODE_MIN: @@ -2727,53 +3337,11 @@ exec_instruction( break; case TGSI_OPCODE_POW: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_pow( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_XPD: - FETCH(&r[0], 0, CHAN_Y); - FETCH(&r[1], 1, CHAN_Z); - - micro_mul( &r[2], &r[0], &r[1] ); - - FETCH(&r[3], 0, CHAN_Z); - FETCH(&r[4], 1, CHAN_Y); - - micro_mul( &r[5], &r[3], &r[4] ); - micro_sub(&d[CHAN_X], &r[2], &r[5]); - - FETCH(&r[2], 1, CHAN_X); - - micro_mul( &r[3], &r[3], &r[2] ); - - FETCH(&r[5], 0, CHAN_X); - - micro_mul( &r[1], &r[1], &r[5] ); - micro_sub(&d[CHAN_Y], &r[3], &r[1]); - - micro_mul( &r[5], &r[5], &r[4] ); - micro_mul( &r[0], &r[0], &r[2] ); - micro_sub(&d[CHAN_Z], &r[5], &r[0]); - - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - STORE(&d[CHAN_X], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(&d[CHAN_Y], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - STORE(&d[CHAN_Z], 0, CHAN_Z); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } + exec_xpd(mach, inst); break; case TGSI_OPCODE_ABS: @@ -2781,12 +3349,7 @@ exec_instruction( break; case TGSI_OPCODE_RCC: - FETCH(&r[0], 0, CHAN_X); - micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); - micro_float_clamp(&r[0], &r[0]); - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&r[0], 0, chan_index); - } + exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DPH: @@ -2830,52 +3393,7 @@ exec_instruction( break; case TGSI_OPCODE_RFL: - if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || - IS_CHANNEL_ENABLED(*inst, CHAN_Y) || - IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - /* r0 = dp3(src0, src0) */ - FETCH(&r[2], 0, CHAN_X); - micro_mul(&r[0], &r[2], &r[2]); - FETCH(&r[4], 0, CHAN_Y); - micro_mul(&r[8], &r[4], &r[4]); - micro_add(&r[0], &r[0], &r[8]); - FETCH(&r[6], 0, CHAN_Z); - micro_mul(&r[8], &r[6], &r[6]); - micro_add(&r[0], &r[0], &r[8]); - - /* r1 = dp3(src0, src1) */ - FETCH(&r[3], 1, CHAN_X); - micro_mul(&r[1], &r[2], &r[3]); - FETCH(&r[5], 1, CHAN_Y); - micro_mul(&r[8], &r[4], &r[5]); - micro_add(&r[1], &r[1], &r[8]); - FETCH(&r[7], 1, CHAN_Z); - micro_mul(&r[8], &r[6], &r[7]); - micro_add(&r[1], &r[1], &r[8]); - - /* r1 = 2 * r1 / r0 */ - micro_add(&r[1], &r[1], &r[1]); - micro_div(&r[1], &r[1], &r[0]); - - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - micro_mul(&r[2], &r[2], &r[1]); - micro_sub(&r[2], &r[2], &r[3]); - STORE(&r[2], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - micro_mul(&r[4], &r[4], &r[1]); - micro_sub(&r[4], &r[4], &r[5]); - STORE(&r[4], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - micro_mul(&r[6], &r[6], &r[1]); - micro_sub(&r[6], &r[6], &r[7]); - STORE(&r[6], 0, CHAN_Z); - } - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); - } + exec_rfl(mach, inst); break; case TGSI_OPCODE_SEQ: @@ -2883,9 +3401,7 @@ exec_instruction( break; case TGSI_OPCODE_SFL: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); - } + exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SGT: @@ -2905,9 +3421,7 @@ exec_instruction( break; case TGSI_OPCODE_STR: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); - } + exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_TEX: @@ -2964,42 +3478,7 @@ exec_instruction( break; case TGSI_OPCODE_X2D: - FETCH(&r[0], 1, CHAN_X); - FETCH(&r[1], 1, CHAN_Y); - if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || - IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - FETCH(&r[2], 2, CHAN_X); - micro_mul(&r[2], &r[2], &r[0]); - FETCH(&r[3], 2, CHAN_Y); - micro_mul(&r[3], &r[3], &r[1]); - micro_add(&r[2], &r[2], &r[3]); - FETCH(&r[3], 0, CHAN_X); - micro_add(&d[CHAN_X], &r[2], &r[3]); - - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || - IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - FETCH(&r[2], 2, CHAN_Z); - micro_mul(&r[2], &r[2], &r[0]); - FETCH(&r[3], 2, CHAN_W); - micro_mul(&r[3], &r[3], &r[1]); - micro_add(&r[2], &r[2], &r[3]); - FETCH(&r[3], 0, CHAN_Y); - micro_add(&d[CHAN_Y], &r[2], &r[3]); - - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - STORE(&d[CHAN_X], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(&d[CHAN_Y], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - STORE(&d[CHAN_X], 0, CHAN_Z); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - STORE(&d[CHAN_Y], 0, CHAN_W); - } + exec_x2d(mach, inst); break; case TGSI_OPCODE_ARA: @@ -3065,6 +3544,8 @@ exec_instruction( if (mach->CallStackTop == 0) { /* returning from main() */ + mach->CondStackTop = 0; + mach->LoopStackTop = 0; *pc = -1; return; } @@ -3105,23 +3586,7 @@ exec_instruction( break; case TGSI_OPCODE_SCS: - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( &r[0], 0, CHAN_X ); - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - micro_cos(&r[1], &r[0]); - STORE(&r[1], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - micro_sin(&r[1], &r[0]); - STORE(&r[1], 0, CHAN_Y); - } - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } + exec_scs(mach, inst); break; case TGSI_OPCODE_NRM: @@ -3133,7 +3598,7 @@ exec_instruction( break; case TGSI_OPCODE_DIV: - assert( 0 ); + exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DP2: @@ -3182,18 +3647,13 @@ exec_instruction( break; case TGSI_OPCODE_END: + /* make sure we end primitives which haven't + * been explicitly emitted */ + conditional_emit_primitive(mach); /* halt execution */ *pc = -1; break; - case TGSI_OPCODE_REP: - assert (0); - break; - - case TGSI_OPCODE_ENDREP: - assert (0); - break; - case TGSI_OPCODE_PUSHA: assert (0); break; @@ -3258,29 +3718,6 @@ exec_instruction( emit_primitive(mach); break; - case TGSI_OPCODE_BGNFOR: - assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - for (chan_index = 0; chan_index < 3; chan_index++) { - FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); - } - ++mach->LoopCounterStackTop; - STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); - /* update LoopMask */ - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { - mach->LoopMask &= ~0x1; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { - mach->LoopMask &= ~0x2; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { - mach->LoopMask &= ~0x4; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { - mach->LoopMask &= ~0x8; - } - /* TODO: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); @@ -3295,56 +3732,6 @@ exec_instruction( mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; break; - case TGSI_OPCODE_ENDFOR: - assert(mach->LoopCounterStackTop > 0); - micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - /* update LoopMask */ - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { - mach->LoopMask &= ~0x1; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { - mach->LoopMask &= ~0x2; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { - mach->LoopMask &= ~0x4; - } - if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { - mach->LoopMask &= ~0x8; - } - micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], - &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); - assert(mach->LoopLabelStackTop > 0); - inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; - STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); - /* Restore ContMask, but don't pop */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; - UPDATE_EXEC_MASK(mach); - if (mach->ExecMask) { - /* repeat loop: jump to instruction just past BGNLOOP */ - assert(mach->LoopLabelStackTop > 0); - *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; - } - else { - /* exit loop: pop LoopMask */ - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - /* pop ContMask */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - assert(mach->LoopLabelStackTop > 0); - --mach->LoopLabelStackTop; - assert(mach->LoopCounterStackTop > 0); - --mach->LoopCounterStackTop; - - mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; - } - UPDATE_EXEC_MASK(mach); - break; - case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); @@ -3542,6 +3929,54 @@ exec_instruction( exec_endswitch(mach); break; + case TGSI_OPCODE_LOAD: + assert(0); + break; + + case TGSI_OPCODE_LOAD_MS: + assert(0); + break; + + case TGSI_OPCODE_SAMPLE: + exec_sample(mach, inst, TEX_MODIFIER_NONE); + break; + + case TGSI_OPCODE_SAMPLE_B: + exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS); + break; + + case TGSI_OPCODE_SAMPLE_C: + exec_sample(mach, inst, TEX_MODIFIER_NONE); + break; + + case TGSI_OPCODE_SAMPLE_C_LZ: + exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS); + break; + + case TGSI_OPCODE_SAMPLE_D: + exec_sample_d(mach, inst); + break; + + case TGSI_OPCODE_SAMPLE_L: + exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD); + break; + + case TGSI_OPCODE_GATHER4: + assert(0); + break; + + case TGSI_OPCODE_RESINFO: + assert(0); + break; + + case TGSI_OPCODE_SAMPLE_POS: + assert(0); + break; + + case TGSI_OPCODE_SAMPLE_INFO: + assert(0); + break; + default: assert( 0 ); } @@ -3584,14 +4019,6 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->Primitives[0] = 0; } - for (i = 0; i < QUAD_SIZE; i++) { - mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = - (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | - (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | - (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | - (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); - } - /* execute declarations (interpolants) */ for (i = 0; i < mach->NumDeclarations; i++) { exec_declaration( mach, mach->Declarations+i ); @@ -3671,6 +4098,9 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) } #endif + /* Strictly speaking, these assertions aren't really needed but they + * can potentially catch some bugs in the control flow code. + */ assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0);