X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Ftgsi%2Ftgsi_exec.c;h=11045e4ba2f7de98a74649678e5b084e4a1cddbf;hb=ccd13da0fc1f1813b55fc0d2181a6cb0d3b42b0d;hp=22984c32320e8531895d8a55cb437f28266b21b5;hpb=eaa3a025da18a0b7f25460f86bb4afcd08017dfe;p=mesa.git diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 22984c32320..11045e4ba2f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2,6 +2,7 @@ * * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009-2010 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -60,6 +61,7 @@ #include "util/u_memory.h" #include "util/u_math.h" + #define FAST_MATH 1 #define TILE_TOP_LEFT 0 @@ -67,11 +69,387 @@ #define TILE_BOTTOM_LEFT 2 #define TILE_BOTTOM_RIGHT 3 +static void +micro_abs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = fabsf(src->f[0]); + dst->f[1] = fabsf(src->f[1]); + dst->f[2] = fabsf(src->f[2]); + dst->f[3] = fabsf(src->f[3]); +} + +static void +micro_arl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0]); + dst->i[1] = (int)floorf(src->f[1]); + dst->i[2] = (int)floorf(src->f[2]); + dst->i[3] = (int)floorf(src->f[3]); +} + +static void +micro_arr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)floorf(src->f[0] + 0.5f); + dst->i[1] = (int)floorf(src->f[1] + 0.5f); + dst->i[2] = (int)floorf(src->f[2] + 0.5f); + dst->i[3] = (int)floorf(src->f[3] + 0.5f); +} + +static void +micro_ceil(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = ceilf(src->f[0]); + dst->f[1] = ceilf(src->f[1]); + dst->f[2] = ceilf(src->f[2]); + dst->f[3] = ceilf(src->f[3]); +} + +static void +micro_clamp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3]; +} + +static void +micro_cmp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; + dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; + dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; + dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; +} + +static void +micro_cnd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0]; + dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1]; + dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2]; + dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3]; +} + +static void +micro_cos(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = cosf(src->f[0]); + dst->f[1] = cosf(src->f[1]); + dst->f[2] = cosf(src->f[2]); + dst->f[3] = cosf(src->f[3]); +} + +static void +micro_ddx(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_ddy(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; +} + +static void +micro_exp2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_exp2(src->f[0]); + dst->f[1] = util_fast_exp2(src->f[1]); + dst->f[2] = util_fast_exp2(src->f[2]); + dst->f[3] = util_fast_exp2(src->f[3]); +#else +#if DEBUG + /* Inf is okay for this instruction, so clamp it to silence assertions. */ + uint i; + union tgsi_exec_channel clamped; + + for (i = 0; i < 4; i++) { + if (src->f[i] > 127.99999f) { + clamped.f[i] = 127.99999f; + } else if (src->f[i] < -126.99999f) { + clamped.f[i] = -126.99999f; + } else { + clamped.f[i] = src->f[i]; + } + } + src = &clamped; +#endif /* DEBUG */ + + dst->f[0] = powf(2.0f, src->f[0]); + dst->f[1] = powf(2.0f, src->f[1]); + dst->f[2] = powf(2.0f, src->f[2]); + dst->f[3] = powf(2.0f, src->f[3]); +#endif /* FAST_MATH */ +} + +static void +micro_flr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0]); + dst->f[1] = floorf(src->f[1]); + dst->f[2] = floorf(src->f[2]); + dst->f[3] = floorf(src->f[3]); +} + +static void +micro_frc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] - floorf(src->f[0]); + dst->f[1] = src->f[1] - floorf(src->f[1]); + dst->f[2] = src->f[2] - floorf(src->f[2]); + dst->f[3] = src->f[3] - floorf(src->f[3]); +} + +static void +micro_iabs(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; + dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; + dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; + dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; +} + +static void +micro_ineg(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = -src->i[0]; + dst->i[1] = -src->i[1]; + dst->i[2] = -src->i[2]; + dst->i[3] = -src->i[3]; +} + +static void +micro_lg2(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_log2(src->f[0]); + dst->f[1] = util_fast_log2(src->f[1]); + dst->f[2] = util_fast_log2(src->f[2]); + dst->f[3] = util_fast_log2(src->f[3]); +#else + dst->f[0] = logf(src->f[0]) * 1.442695f; + dst->f[1] = logf(src->f[1]) * 1.442695f; + dst->f[2] = logf(src->f[2]) * 1.442695f; + dst->f[3] = logf(src->f[3]) * 1.442695f; +#endif +} + +static void +micro_lrp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; + dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; + dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; + dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; +} + +static void +micro_mad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; + dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; + dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; + dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; +} + +static void +micro_mov(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = src->u[0]; + dst->u[1] = src->u[1]; + dst->u[2] = src->u[2]; + dst->u[3] = src->u[3]; +} + +static void +micro_rcp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if 0 /* for debugging */ + assert(src->f[0] != 0.0f); + assert(src->f[1] != 0.0f); + assert(src->f[2] != 0.0f); + assert(src->f[3] != 0.0f); +#endif + dst->f[0] = 1.0f / src->f[0]; + dst->f[1] = 1.0f / src->f[1]; + dst->f[2] = 1.0f / src->f[2]; + dst->f[3] = 1.0f / src->f[3]; +} + +static void +micro_rnd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = floorf(src->f[0] + 0.5f); + dst->f[1] = floorf(src->f[1] + 0.5f); + dst->f[2] = floorf(src->f[2] + 0.5f); + dst->f[3] = floorf(src->f[3] + 0.5f); +} + +static void +micro_rsq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if 0 /* for debugging */ + assert(src->f[0] != 0.0f); + assert(src->f[1] != 0.0f); + assert(src->f[2] != 0.0f); + assert(src->f[3] != 0.0f); +#endif + dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); + dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); + dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); + dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3])); +} + +static void +micro_seq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; + dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; + dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; + dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; +} + +static void +micro_sge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; + dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; + dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; + dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; +} + +static void +micro_sgn(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; + dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; + dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; + dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; +} + +static void +micro_sgt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; + dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; + dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; + dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; +} + +static void +micro_sin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = sinf(src->f[0]); + dst->f[1] = sinf(src->f[1]); + dst->f[2] = sinf(src->f[2]); + dst->f[3] = sinf(src->f[3]); +} + +static void +micro_sle(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; + dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; + dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; + dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; +} + +static void +micro_slt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; + dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; + dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; + dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; +} + +static void +micro_sne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; + dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; + dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; + dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; +} + +static void +micro_trunc(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)(int)src->f[0]; + dst->f[1] = (float)(int)src->f[1]; + dst->f[2] = (float)(int)src->f[2]; + dst->f[3] = (float)(int)src->f[3]; +} + + #define CHAN_X 0 #define CHAN_Y 1 #define CHAN_Z 2 #define CHAN_W 3 +enum tgsi_exec_datatype { + TGSI_EXEC_DATA_FLOAT, + TGSI_EXEC_DATA_INT, + TGSI_EXEC_DATA_UINT +}; + /* * Shorthand locations of various utility registers (_I = Index, _C = Channel) */ @@ -123,23 +501,31 @@ /** The execution mask depends on the conditional mask and the loop mask */ #define UPDATE_EXEC_MASK(MACH) \ - MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask static const union tgsi_exec_channel ZeroVec = { { 0.0, 0.0, 0.0, 0.0 } }; +static const union tgsi_exec_channel OneVec = { + {1.0f, 1.0f, 1.0f, 1.0f} +}; -#ifdef DEBUG -static void + +/** + * Assert that none of the float values in 'chan' are infinite or NaN. + * NaN and Inf may occur normally during program execution and should + * not lead to crashes, etc. But when debugging, it's helpful to catch + * them. + */ +static INLINE void check_inf_or_nan(const union tgsi_exec_channel *chan) { - assert(!util_is_inf_or_nan(chan->f[0])); - assert(!util_is_inf_or_nan(chan->f[1])); - assert(!util_is_inf_or_nan(chan->f[2])); - assert(!util_is_inf_or_nan(chan->f[3])); + assert(!util_is_inf_or_nan((chan)->f[0])); + assert(!util_is_inf_or_nan((chan)->f[1])); + assert(!util_is_inf_or_nan((chan)->f[2])); + assert(!util_is_inf_or_nan((chan)->f[3])); } -#endif #ifdef DEBUG @@ -292,6 +678,14 @@ tgsi_exec_machine_bind_shader( * sizeof(struct tgsi_full_declaration)); maxDeclarations += 10; } + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { + unsigned reg; + for (reg = parse.FullToken.FullDeclaration.Range.First; + reg <= parse.FullToken.FullDeclaration.Range.Last; + ++reg) { + ++mach->NumOutputs; + } + } memcpy(declarations + numDeclarations, &parse.FullToken.FullDeclaration, sizeof(declarations[0])); @@ -336,6 +730,9 @@ tgsi_exec_machine_bind_shader( numInstructions++; break; + case TGSI_TOKEN_TYPE_PROPERTY: + break; + default: assert( 0 ); } @@ -369,6 +766,7 @@ tgsi_exec_machine_create( void ) memset(mach, 0, sizeof(*mach)); mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; + mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; /* Setup constants. */ @@ -410,23 +808,10 @@ tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) align_free(mach); } - -static void -micro_abs( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = fabsf( src->f[0] ); - dst->f[1] = fabsf( src->f[1] ); - dst->f[2] = fabsf( src->f[2] ); - dst->f[3] = fabsf( src->f[3] ); -} - static void -micro_add( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) +micro_add(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) { dst->f[0] = src0->f[0] + src1->f[0]; dst->f[1] = src0->f[1] + src1->f[1]; @@ -434,1518 +819,1705 @@ micro_add( dst->f[3] = src0->f[3] + src1->f[3]; } -#if 0 static void -micro_iadd( +micro_div( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { - dst->i[0] = src0->i[0] + src1->i[0]; - dst->i[1] = src0->i[1] + src1->i[1]; - dst->i[2] = src0->i[2] + src1->i[2]; - dst->i[3] = src0->i[3] + src1->i[3]; + if (src1->f[0] != 0) { + dst->f[0] = src0->f[0] / src1->f[0]; + } + if (src1->f[1] != 0) { + dst->f[1] = src0->f[1] / src1->f[1]; + } + if (src1->f[2] != 0) { + dst->f[2] = src0->f[2] / src1->f[2]; + } + if (src1->f[3] != 0) { + dst->f[3] = src0->f[3] / src1->f[3]; + } } -#endif static void -micro_and( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) +micro_float_clamp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) { - dst->u[0] = src0->u[0] & src1->u[0]; - dst->u[1] = src0->u[1] & src1->u[1]; - dst->u[2] = src0->u[2] & src1->u[2]; - dst->u[3] = src0->u[3] & src1->u[3]; + uint i; + + for (i = 0; i < 4; i++) { + if (src->f[i] > 0.0f) { + if (src->f[i] > 1.884467e+019f) + dst->f[i] = 1.884467e+019f; + else if (src->f[i] < 5.42101e-020f) + dst->f[i] = 5.42101e-020f; + else + dst->f[i] = src->f[i]; + } + else { + if (src->f[i] < -1.884467e+019f) + dst->f[i] = -1.884467e+019f; + else if (src->f[i] > -5.42101e-020f) + dst->f[i] = -5.42101e-020f; + else + dst->f[i] = src->f[i]; + } + } } static void -micro_ceil( +micro_lt( union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) { - dst->f[0] = ceilf( src->f[0] ); - dst->f[1] = ceilf( src->f[1] ); - dst->f[2] = ceilf( src->f[2] ); - dst->f[3] = ceilf( src->f[3] ); + dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; } static void -micro_cos( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) +micro_max(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) { - dst->f[0] = cosf( src->f[0] ); - dst->f[1] = cosf( src->f[1] ); - dst->f[2] = cosf( src->f[2] ); - dst->f[3] = cosf( src->f[3] ); + dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; + dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; + dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; + dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; } static void -micro_ddx( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) +micro_min(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) { - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; + dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; } static void -micro_ddy( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) +micro_mul(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) { - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; + dst->f[0] = src0->f[0] * src1->f[0]; + dst->f[1] = src0->f[1] * src1->f[1]; + dst->f[2] = src0->f[2] * src1->f[2]; + dst->f[3] = src0->f[3] * src1->f[3]; } +#if 0 static void -micro_div( - union tgsi_exec_channel *dst, +micro_imul64( + union tgsi_exec_channel *dst0, + union tgsi_exec_channel *dst1, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { - if (src1->f[0] != 0) { - dst->f[0] = src0->f[0] / src1->f[0]; - } - if (src1->f[1] != 0) { - dst->f[1] = src0->f[1] / src1->f[1]; - } - if (src1->f[2] != 0) { - dst->f[2] = src0->f[2] / src1->f[2]; - } - if (src1->f[3] != 0) { - dst->f[3] = src0->f[3] / src1->f[3]; - } + dst1->i[0] = src0->i[0] * src1->i[0]; + dst1->i[1] = src0->i[1] * src1->i[1]; + dst1->i[2] = src0->i[2] * src1->i[2]; + dst1->i[3] = src0->i[3] * src1->i[3]; + dst0->i[0] = 0; + dst0->i[1] = 0; + dst0->i[2] = 0; + dst0->i[3] = 0; } +#endif #if 0 static void -micro_udiv( - union tgsi_exec_channel *dst, +micro_umul64( + union tgsi_exec_channel *dst0, + union tgsi_exec_channel *dst1, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { - dst->u[0] = src0->u[0] / src1->u[0]; - dst->u[1] = src0->u[1] / src1->u[1]; - dst->u[2] = src0->u[2] / src1->u[2]; - dst->u[3] = src0->u[3] / src1->u[3]; + dst1->u[0] = src0->u[0] * src1->u[0]; + dst1->u[1] = src0->u[1] * src1->u[1]; + dst1->u[2] = src0->u[2] * src1->u[2]; + dst1->u[3] = src0->u[3] * src1->u[3]; + dst0->u[0] = 0; + dst0->u[1] = 0; + dst0->u[2] = 0; + dst0->u[3] = 0; } #endif + +#if 0 static void -micro_eq( +micro_movc( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) + const union tgsi_exec_channel *src2 ) { - dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; + dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; + dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; + dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; + dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; } +#endif -#if 0 static void -micro_ieq( +micro_neg( union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) + const union tgsi_exec_channel *src ) { - dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; + dst->f[0] = -src->f[0]; + dst->f[1] = -src->f[1]; + dst->f[2] = -src->f[2]; + dst->f[3] = -src->f[3]; } -#endif static void -micro_exp2( +micro_pow( union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) { #if FAST_MATH - dst->f[0] = util_fast_exp2( src->f[0] ); - dst->f[1] = util_fast_exp2( src->f[1] ); - dst->f[2] = util_fast_exp2( src->f[2] ); - dst->f[3] = util_fast_exp2( src->f[3] ); + dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); + dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); + dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); + dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); #else - -#if DEBUG - /* Inf is okay for this instruction, so clamp it to silence assertions. */ - uint i; - union tgsi_exec_channel clamped; - - for (i = 0; i < 4; i++) { - if (src->f[i] > 127.99999f) { - clamped.f[i] = 127.99999f; - } else if (src->f[i] < -126.99999f) { - clamped.f[i] = -126.99999f; - } else { - clamped.f[i] = src->f[i]; - } - } - src = &clamped; -#endif - - dst->f[0] = powf( 2.0f, src->f[0] ); - dst->f[1] = powf( 2.0f, src->f[1] ); - dst->f[2] = powf( 2.0f, src->f[2] ); - dst->f[3] = powf( 2.0f, src->f[3] ); + dst->f[0] = powf( src0->f[0], src1->f[0] ); + dst->f[1] = powf( src0->f[1], src1->f[1] ); + dst->f[2] = powf( src0->f[2], src1->f[2] ); + dst->f[3] = powf( src0->f[3], src1->f[3] ); #endif } -#if 0 static void -micro_f2ut( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) +micro_sub(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) { - dst->u[0] = (uint) src->f[0]; - dst->u[1] = (uint) src->f[1]; - dst->u[2] = (uint) src->f[2]; - dst->u[3] = (uint) src->f[3]; + dst->f[0] = src0->f[0] - src1->f[0]; + dst->f[1] = src0->f[1] - src1->f[1]; + dst->f[2] = src0->f[2] - src1->f[2]; + dst->f[3] = src0->f[3] - src1->f[3]; } -#endif static void -micro_float_clamp(union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) +fetch_src_file_channel(const struct tgsi_exec_machine *mach, + const uint file, + const uint swizzle, + const union tgsi_exec_channel *index, + const union tgsi_exec_channel *index2D, + union tgsi_exec_channel *chan) { uint i; - for (i = 0; i < 4; i++) { - if (src->f[i] > 0.0f) { - if (src->f[i] > 1.884467e+019f) - dst->f[i] = 1.884467e+019f; - else if (src->f[i] < 5.42101e-020f) - dst->f[i] = 5.42101e-020f; - else - dst->f[i] = src->f[i]; + switch (file) { + case TGSI_FILE_CONSTANT: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); + assert(mach->Consts[index2D->i[i]]); + + if (index->i[i] < 0) { + chan->u[i] = 0; + } else { + const uint *p = (const uint *)mach->Consts[index2D->i[i]]; + + chan->u[i] = p[index->i[i] * 4 + swizzle]; + } } - else { - if (src->f[i] < -1.884467e+019f) - dst->f[i] = -1.884467e+019f; - else if (src->f[i] > -5.42101e-020f) - dst->f[i] = -5.42101e-020f; - else - dst->f[i] = src->f[i]; + break; + + case TGSI_FILE_INPUT: + case TGSI_FILE_SYSTEM_VALUE: + for (i = 0; i < QUAD_SIZE; i++) { + /* XXX: 2D indexing */ + chan->u[i] = mach->Inputs[index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]].xyzw[swizzle].u[i]; } - } -} + break; -static void -micro_flr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] ); - dst->f[1] = floorf( src->f[1] ); - dst->f[2] = floorf( src->f[2] ); - dst->f[3] = floorf( src->f[3] ); -} + case TGSI_FILE_TEMPORARY: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); + assert(index2D->i[i] == 0); -static void -micro_frc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] - floorf( src->f[0] ); - dst->f[1] = src->f[1] - floorf( src->f[1] ); - dst->f[2] = src->f[2] - floorf( src->f[2] ); - dst->f[3] = src->f[3] - floorf( src->f[3] ); -} + chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; + } + break; -static void -micro_i2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->i[0]; - dst->f[1] = (float) src->i[1]; - dst->f[2] = (float) src->i[2]; - dst->f[3] = (float) src->i[3]; -} + case TGSI_FILE_IMMEDIATE: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); + assert(index2D->i[i] == 0); -static void -micro_lg2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ -#if FAST_MATH - dst->f[0] = util_fast_log2( src->f[0] ); - dst->f[1] = util_fast_log2( src->f[1] ); - dst->f[2] = util_fast_log2( src->f[2] ); - dst->f[3] = util_fast_log2( src->f[3] ); -#else - dst->f[0] = logf( src->f[0] ) * 1.442695f; - dst->f[1] = logf( src->f[1] ) * 1.442695f; - dst->f[2] = logf( src->f[2] ) * 1.442695f; - dst->f[3] = logf( src->f[3] ) * 1.442695f; -#endif -} + chan->f[i] = mach->Imms[index->i[i]][swizzle]; + } + break; -static void -micro_le( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; -} + case TGSI_FILE_ADDRESS: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] >= 0); + assert(index2D->i[i] == 0); -static void -micro_lt( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; -} + chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; + } + break; -#if 0 -static void -micro_ilt( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; -} -#endif + case TGSI_FILE_PREDICATE: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS); + assert(index2D->i[i] == 0); -#if 0 -static void -micro_ult( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; -} -#endif + chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i]; + } + break; -static void -micro_max( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; - dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; - dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; - dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; -} + case TGSI_FILE_OUTPUT: + /* vertex/fragment output vars can be read too */ + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] >= 0); + assert(index2D->i[i] == 0); -#if 0 -static void -micro_imax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; + chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; + } + break; + + default: + assert(0); + for (i = 0; i < QUAD_SIZE; i++) { + chan->u[i] = 0; + } + } } -#endif -#if 0 static void -micro_umax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) +fetch_source(const struct tgsi_exec_machine *mach, + union tgsi_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index, + enum tgsi_exec_datatype src_datatype) { - dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif + union tgsi_exec_channel index; + union tgsi_exec_channel index2D; + uint swizzle; -static void -micro_min( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; - dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; - dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; - dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; -} + /* We start with a direct index into a register file. + * + * file[1], + * where: + * file = Register.File + * [1] = Register.Index + */ + index.i[0] = + index.i[1] = + index.i[2] = + index.i[3] = reg->Register.Index; -#if 0 -static void -micro_imin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; -} -#endif + /* There is an extra source register that indirectly subscripts + * a register file. The direct index now becomes an offset + * that is being added to the indirect register. + * + * file[ind[2].x+1], + * where: + * ind = Indirect.File + * [2] = Indirect.Index + * .x = Indirect.SwizzleX + */ + if (reg->Register.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + const uint execmask = mach->ExecMask; + uint i; -#if 0 -static void -micro_umin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; -} -#endif + /* which address register (always zero now) */ + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->Indirect.Index; -#if 0 -static void -micro_umod( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] % src1->u[0]; - dst->u[1] = src0->u[1] % src1->u[1]; - dst->u[2] = src0->u[2] % src1->u[2]; - dst->u[3] = src0->u[3] % src1->u[3]; -} -#endif + /* get current value of address register[swizzle] */ + swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); + fetch_src_file_channel(mach, + reg->Indirect.File, + swizzle, + &index2, + &ZeroVec, + &indir_index); -static void -micro_mul( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] * src1->f[0]; - dst->f[1] = src0->f[1] * src1->f[1]; - dst->f[2] = src0->f[2] * src1->f[2]; - dst->f[3] = src0->f[3] * src1->f[3]; -} + /* add value of address register to the offset */ + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; -#if 0 -static void -micro_imul( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] * src1->i[0]; - dst->i[1] = src0->i[1] * src1->i[1]; - dst->i[2] = src0->i[2] * src1->i[2]; - dst->i[3] = src0->i[3] * src1->i[3]; -} -#endif + /* for disabled execution channels, zero-out the index to + * avoid using a potential garbage value. + */ + for (i = 0; i < QUAD_SIZE; i++) { + if ((execmask & (1 << i)) == 0) + index.i[i] = 0; + } + } -#if 0 -static void -micro_imul64( - union tgsi_exec_channel *dst0, - union tgsi_exec_channel *dst1, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst1->i[0] = src0->i[0] * src1->i[0]; - dst1->i[1] = src0->i[1] * src1->i[1]; - dst1->i[2] = src0->i[2] * src1->i[2]; - dst1->i[3] = src0->i[3] * src1->i[3]; - dst0->i[0] = 0; - dst0->i[1] = 0; - dst0->i[2] = 0; - dst0->i[3] = 0; -} -#endif + /* There is an extra source register that is a second + * subscript to a register file. Effectively it means that + * the register file is actually a 2D array of registers. + * + * file[3][1], + * where: + * [3] = Dimension.Index + */ + if (reg->Register.Dimension) { + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = reg->Dimension.Index; -#if 0 -static void -micro_umul64( - union tgsi_exec_channel *dst0, - union tgsi_exec_channel *dst1, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst1->u[0] = src0->u[0] * src1->u[0]; - dst1->u[1] = src0->u[1] * src1->u[1]; - dst1->u[2] = src0->u[2] * src1->u[2]; - dst1->u[3] = src0->u[3] * src1->u[3]; - dst0->u[0] = 0; - dst0->u[1] = 0; - dst0->u[2] = 0; - dst0->u[3] = 0; -} -#endif + /* Again, the second subscript index can be addressed indirectly + * identically to the first one. + * Nothing stops us from indirectly addressing the indirect register, + * but there is no need for that, so we won't exercise it. + * + * file[ind[4].y+3][1], + * where: + * ind = DimIndirect.File + * [4] = DimIndirect.Index + * .y = DimIndirect.SwizzleX + */ + if (reg->Dimension.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + const uint execmask = mach->ExecMask; + uint i; + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->DimIndirect.Index; -#if 0 -static void -micro_movc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2 ) -{ - dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; - dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; - dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; - dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; + swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); + fetch_src_file_channel(mach, + reg->DimIndirect.File, + swizzle, + &index2, + &ZeroVec, + &indir_index); + + index2D.i[0] += indir_index.i[0]; + index2D.i[1] += indir_index.i[1]; + index2D.i[2] += indir_index.i[2]; + index2D.i[3] += indir_index.i[3]; + + /* for disabled execution channels, zero-out the index to + * avoid using a potential garbage value. + */ + for (i = 0; i < QUAD_SIZE; i++) { + if ((execmask & (1 << i)) == 0) { + index2D.i[i] = 0; + } + } + } + + /* If by any chance there was a need for a 3D array of register + * files, we would have to check whether Dimension is followed + * by a dimension register and continue the saga. + */ + } else { + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = 0; + } + + swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); + fetch_src_file_channel(mach, + reg->Register.File, + swizzle, + &index, + &index2D, + chan); + + if (reg->Register.Absolute) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_abs(chan, chan); + } else { + micro_iabs(chan, chan); + } + } + + if (reg->Register.Negate) { + if (src_datatype == TGSI_EXEC_DATA_FLOAT) { + micro_neg(chan, chan); + } else { + micro_ineg(chan, chan); + } + } } -#endif static void -micro_neg( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) +store_dest(struct tgsi_exec_machine *mach, + const union tgsi_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index, + enum tgsi_exec_datatype dst_datatype) { - dst->f[0] = -src->f[0]; - dst->f[1] = -src->f[1]; - dst->f[2] = -src->f[2]; - dst->f[3] = -src->f[3]; -} + uint i; + union tgsi_exec_channel null; + union tgsi_exec_channel *dst; + uint execmask = mach->ExecMask; + int offset = 0; /* indirection offset */ + int index; + /* for debugging */ + if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { + check_inf_or_nan(chan); + } + + /* There is an extra source register that indirectly subscripts + * a register file. The direct index now becomes an offset + * that is being added to the indirect register. + * + * file[ind[2].x+1], + * where: + * ind = Indirect.File + * [2] = Indirect.Index + * .x = Indirect.SwizzleX + */ + if (reg->Register.Indirect) { + union tgsi_exec_channel index; + union tgsi_exec_channel indir_index; + uint swizzle; + + /* which address register (always zero for now) */ + index.i[0] = + index.i[1] = + index.i[2] = + index.i[3] = reg->Indirect.Index; + + /* get current value of address register[swizzle] */ + swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); + + /* fetch values from the address/indirection register */ + fetch_src_file_channel(mach, + reg->Indirect.File, + swizzle, + &index, + &ZeroVec, + &indir_index); + + /* save indirection offset */ + offset = indir_index.i[0]; + } + + switch (reg->Register.File) { + case TGSI_FILE_NULL: + dst = &null; + break; + + case TGSI_FILE_OUTPUT: + index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + + reg->Register.Index; + dst = &mach->Outputs[offset + index].xyzw[chan_index]; #if 0 -static void -micro_ineg( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->i[0] = -src->i[0]; - dst->i[1] = -src->i[1]; - dst->i[2] = -src->i[2]; - dst->i[3] = -src->i[3]; -} + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask); + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) + fprintf(stderr, "%f, ", chan->f[i]); + fprintf(stderr, ")\n"); + } #endif + break; -static void -micro_not( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = ~src->u[0]; - dst->u[1] = ~src->u[1]; - dst->u[2] = ~src->u[2]; - dst->u[3] = ~src->u[3]; -} + case TGSI_FILE_TEMPORARY: + index = reg->Register.Index; + assert( index < TGSI_EXEC_NUM_TEMPS ); + dst = &mach->Temps[offset + index].xyzw[chan_index]; + break; -static void -micro_or( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] | src1->u[0]; - dst->u[1] = src0->u[1] | src1->u[1]; - dst->u[2] = src0->u[2] | src1->u[2]; - dst->u[3] = src0->u[3] | src1->u[3]; -} + case TGSI_FILE_ADDRESS: + index = reg->Register.Index; + dst = &mach->Addrs[index].xyzw[chan_index]; + break; -static void -micro_pow( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ -#if FAST_MATH - dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); - dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); - dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); - dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); -#else - dst->f[0] = powf( src0->f[0], src1->f[0] ); - dst->f[1] = powf( src0->f[1], src1->f[1] ); - dst->f[2] = powf( src0->f[2], src1->f[2] ); - dst->f[3] = powf( src0->f[3], src1->f[3] ); -#endif -} + case TGSI_FILE_LOOP: + assert(reg->Register.Index == 0); + assert(mach->LoopCounterStackTop > 0); + assert(chan_index == CHAN_X); + dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; + break; -static void -micro_rnd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] + 0.5f ); - dst->f[1] = floorf( src->f[1] + 0.5f ); - dst->f[2] = floorf( src->f[2] + 0.5f ); - dst->f[3] = floorf( src->f[3] + 0.5f ); -} + case TGSI_FILE_PREDICATE: + index = reg->Register.Index; + assert(index < TGSI_EXEC_NUM_PREDS); + dst = &mach->Predicates[index].xyzw[chan_index]; + break; -static void -micro_sgn( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; - dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; - dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; - dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; -} + default: + assert( 0 ); + return; + } -static void -micro_shl( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] << src1->i[0]; - dst->i[1] = src0->i[1] << src1->i[1]; - dst->i[2] = src0->i[2] << src1->i[2]; - dst->i[3] = src0->i[3] << src1->i[3]; -} + if (inst->Instruction.Predicate) { + uint swizzle; + union tgsi_exec_channel *pred; -static void -micro_ishr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] >> src1->i[0]; - dst->i[1] = src0->i[1] >> src1->i[1]; - dst->i[2] = src0->i[2] >> src1->i[2]; - dst->i[3] = src0->i[3] >> src1->i[3]; -} + switch (chan_index) { + case CHAN_X: + swizzle = inst->Predicate.SwizzleX; + break; + case CHAN_Y: + swizzle = inst->Predicate.SwizzleY; + break; + case CHAN_Z: + swizzle = inst->Predicate.SwizzleZ; + break; + case CHAN_W: + swizzle = inst->Predicate.SwizzleW; + break; + default: + assert(0); + return; + } -static void -micro_trunc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0 ) -{ - dst->f[0] = (float) (int) src0->f[0]; - dst->f[1] = (float) (int) src0->f[1]; - dst->f[2] = (float) (int) src0->f[2]; - dst->f[3] = (float) (int) src0->f[3]; -} + assert(inst->Predicate.Index == 0); -#if 0 -static void -micro_ushr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] >> src1->u[0]; - dst->u[1] = src0->u[1] >> src1->u[1]; - dst->u[2] = src0->u[2] >> src1->u[2]; - dst->u[3] = src0->u[3] >> src1->u[3]; + pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; + + if (inst->Predicate.Negate) { + for (i = 0; i < QUAD_SIZE; i++) { + if (pred->u[i]) { + execmask &= ~(1 << i); + } + } + } else { + for (i = 0; i < QUAD_SIZE; i++) { + if (!pred->u[i]) { + execmask &= ~(1 << i); + } + } + } + } + + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) + dst->i[i] = chan->i[i]; + break; + + case TGSI_SAT_ZERO_ONE: + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) { + if (chan->f[i] < 0.0f) + dst->f[i] = 0.0f; + else if (chan->f[i] > 1.0f) + dst->f[i] = 1.0f; + else + dst->i[i] = chan->i[i]; + } + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) { + if (chan->f[i] < -1.0f) + dst->f[i] = -1.0f; + else if (chan->f[i] > 1.0f) + dst->f[i] = 1.0f; + else + dst->i[i] = chan->i[i]; + } + break; + + default: + assert( 0 ); + } } -#endif +#define FETCH(VAL,INDEX,CHAN)\ + fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) + +#define STORE(VAL,INDEX,CHAN)\ + store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT) + + +/** + * Execute ARB-style KIL which is predicated by a src register. + * Kill fragment if any of the four values is less than zero. + */ static void -micro_sin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) +exec_kil(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - dst->f[0] = sinf( src->f[0] ); - dst->f[1] = sinf( src->f[1] ); - dst->f[2] = sinf( src->f[2] ); - dst->f[3] = sinf( src->f[3] ); -} + uint uniquemask; + uint chan_index; + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + union tgsi_exec_channel r[1]; -static void -micro_sqrt( union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = sqrtf( src->f[0] ); - dst->f[1] = sqrtf( src->f[1] ); - dst->f[2] = sqrtf( src->f[2] ); - dst->f[3] = sqrtf( src->f[3] ); + /* This mask stores component bits that were already tested. */ + uniquemask = 0; + + for (chan_index = 0; chan_index < 4; chan_index++) + { + uint swizzle; + uint i; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_swizzle ( + &inst->Src[0], + chan_index); + + /* check if the component has not been already tested */ + if (uniquemask & (1 << swizzle)) + continue; + uniquemask |= 1 << swizzle; + + FETCH(&r[0], 0, chan_index); + for (i = 0; i < 4; i++) + if (r[0].f[i] < 0.0f) + kilmask |= 1 << i; + } + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } +/** + * Execute NVIDIA-style KIL which is predicated by a condition code. + * Kill fragment if the condition code is TRUE. + */ static void -micro_sub( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) +exec_kilp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - dst->f[0] = src0->f[0] - src1->f[0]; - dst->f[1] = src0->f[1] - src1->f[1]; - dst->f[2] = src0->f[2] - src1->f[2]; - dst->f[3] = src0->f[3] - src1->f[3]; + uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + + /* "unconditional" kil */ + kilmask = mach->ExecMask; + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } -#if 0 static void -micro_u2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) +emit_vertex(struct tgsi_exec_machine *mach) { - dst->f[0] = (float) src->u[0]; - dst->f[1] = (float) src->u[1]; - dst->f[2] = (float) src->u[2]; - dst->f[3] = (float) src->u[3]; + /* FIXME: check for exec mask correctly + unsigned i; + for (i = 0; i < QUAD_SIZE; ++i) { + if ((mach->ExecMask & (1 << i))) + */ + if (mach->ExecMask) { + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + } } -#endif static void -micro_xor( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) +emit_primitive(struct tgsi_exec_machine *mach) { - dst->u[0] = src0->u[0] ^ src1->u[0]; - dst->u[1] = src0->u[1] ^ src1->u[1]; - dst->u[2] = src0->u[2] ^ src1->u[2]; - dst->u[3] = src0->u[3] ^ src1->u[3]; + unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; + /* FIXME: check for exec mask correctly + unsigned i; + for (i = 0; i < QUAD_SIZE; ++i) { + if ((mach->ExecMask & (1 << i))) + */ + if (mach->ExecMask) { + ++(*prim_count); + debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); + mach->Primitives[*prim_count] = 0; + } } +/* + * Fetch four texture samples using STR texture coordinates. + */ static void -fetch_src_file_channel( - const struct tgsi_exec_machine *mach, - const uint file, - const uint swizzle, - const union tgsi_exec_channel *index, - union tgsi_exec_channel *chan ) -{ - switch( swizzle ) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - switch( file ) { - case TGSI_FILE_CONSTANT: - assert(mach->Consts); - if (index->i[0] < 0) - chan->f[0] = 0.0f; - else - chan->f[0] = mach->Consts[index->i[0]][swizzle]; - if (index->i[1] < 0) - chan->f[1] = 0.0f; - else - chan->f[1] = mach->Consts[index->i[1]][swizzle]; - if (index->i[2] < 0) - chan->f[2] = 0.0f; - else - chan->f[2] = mach->Consts[index->i[2]][swizzle]; - if (index->i[3] < 0) - chan->f[3] = 0.0f; - else - chan->f[3] = mach->Consts[index->i[3]][swizzle]; - break; +fetch_texel( struct tgsi_sampler *sampler, + const union tgsi_exec_channel *s, + const union tgsi_exec_channel *t, + const union tgsi_exec_channel *p, + const union tgsi_exec_channel *c0, + enum tgsi_sampler_control control, + union tgsi_exec_channel *r, + union tgsi_exec_channel *g, + union tgsi_exec_channel *b, + union tgsi_exec_channel *a ) +{ + uint j; + float rgba[NUM_CHANNELS][QUAD_SIZE]; - case TGSI_FILE_INPUT: - chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; - break; + sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba); - case TGSI_FILE_TEMPORARY: - assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); - chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; - break; + for (j = 0; j < 4; j++) { + r->f[j] = rgba[0][j]; + g->f[j] = rgba[1][j]; + b->f[j] = rgba[2][j]; + a->f[j] = rgba[3][j]; + } +} - case TGSI_FILE_IMMEDIATE: - assert( index->i[0] < (int) mach->ImmLimit ); - chan->f[0] = mach->Imms[index->i[0]][swizzle]; - assert( index->i[1] < (int) mach->ImmLimit ); - chan->f[1] = mach->Imms[index->i[1]][swizzle]; - assert( index->i[2] < (int) mach->ImmLimit ); - chan->f[2] = mach->Imms[index->i[2]][swizzle]; - assert( index->i[3] < (int) mach->ImmLimit ); - chan->f[3] = mach->Imms[index->i[3]][swizzle]; - break; - case TGSI_FILE_ADDRESS: - chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; - break; +#define TEX_MODIFIER_NONE 0 +#define TEX_MODIFIER_PROJECTED 1 +#define TEX_MODIFIER_LOD_BIAS 2 +#define TEX_MODIFIER_EXPLICIT_LOD 3 - case TGSI_FILE_PREDICATE: - assert(index->i[0] < TGSI_EXEC_NUM_PREDS); - assert(index->i[1] < TGSI_EXEC_NUM_PREDS); - assert(index->i[2] < TGSI_EXEC_NUM_PREDS); - assert(index->i[3] < TGSI_EXEC_NUM_PREDS); - chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0]; - chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1]; - chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2]; - chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3]; - break; - case TGSI_FILE_OUTPUT: - /* vertex/fragment output vars can be read too */ - chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; - break; +static void +exec_tex(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + uint modifier) +{ + const uint unit = inst->Src[1].Register.Index; + union tgsi_exec_channel r[4]; + const union tgsi_exec_channel *lod = &ZeroVec; + enum tgsi_sampler_control control; + uint chan_index; - default: - assert( 0 ); + if (modifier != TEX_MODIFIER_NONE) { + FETCH(&r[3], 0, CHAN_W); + if (modifier != TEX_MODIFIER_PROJECTED) { + lod = &r[3]; } - break; + } - default: - assert( 0 ); + if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { + control = tgsi_sampler_lod_explicit; + } else { + control = tgsi_sampler_lod_bias; } -} -static void -fetch_source( - const struct tgsi_exec_machine *mach, - union tgsi_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) -{ - union tgsi_exec_channel index; - uint swizzle; + switch (inst->Texture.Texture) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + FETCH(&r[0], 0, CHAN_X); - /* We start with a direct index into a register file. - * - * file[1], - * where: - * file = Register.File - * [1] = Register.Index - */ - index.i[0] = - index.i[1] = - index.i[2] = - index.i[3] = reg->Register.Index; + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + } - /* There is an extra source register that indirectly subscripts - * a register file. The direct index now becomes an offset - * that is being added to the indirect register. - * - * file[ind[2].x+1], - * where: - * ind = Indirect.File - * [2] = Indirect.Index - * .x = Indirect.SwizzleX - */ - if (reg->Register.Indirect) { - union tgsi_exec_channel index2; - union tgsi_exec_channel indir_index; - const uint execmask = mach->ExecMask; - uint i; + fetch_texel(mach->Samplers[unit], + &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; - /* which address register (always zero now) */ - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->Indirect.Index; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); - /* get current value of address register[swizzle] */ - swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); - fetch_src_file_channel( - mach, - reg->Indirect.File, - swizzle, - &index2, - &indir_index ); + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); + } - /* add value of address register to the offset */ - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ + control, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; - /* for disabled execution channels, zero-out the index to - * avoid using a potential garbage value. - */ - for (i = 0; i < QUAD_SIZE; i++) { - if ((execmask & (1 << i)) == 0) - index.i[i] = 0; - } - } + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); - /* There is an extra source register that is a second - * subscript to a register file. Effectively it means that - * the register file is actually a 2D array of registers. - * - * file[1][3] == file[1*sizeof(file[1])+3], - * where: - * [3] = Dimension.Index - */ - if (reg->Register.Dimension) { - /* The size of the first-order array depends on the register file type. - * We need to multiply the index to the first array to get an effective, - * "flat" index that points to the beginning of the second-order array. - */ - switch (reg->Register.File) { - case TGSI_FILE_INPUT: - index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; - break; - case TGSI_FILE_CONSTANT: - index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; - index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; - break; - default: - assert( 0 ); + if (modifier == TEX_MODIFIER_PROJECTED) { + micro_div(&r[0], &r[0], &r[3]); + micro_div(&r[1], &r[1], &r[3]); + micro_div(&r[2], &r[2], &r[3]); } - index.i[0] += reg->Dimension.Index; - index.i[1] += reg->Dimension.Index; - index.i[2] += reg->Dimension.Index; - index.i[3] += reg->Dimension.Index; + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], lod, + control, + &r[0], &r[1], &r[2], &r[3]); + break; - /* Again, the second subscript index can be addressed indirectly - * identically to the first one. - * Nothing stops us from indirectly addressing the indirect register, - * but there is no need for that, so we won't exercise it. - * - * file[1][ind[4].y+3], - * where: - * ind = DimIndirect.File - * [4] = DimIndirect.Index - * .y = DimIndirect.SwizzleX - */ - if (reg->Dimension.Indirect) { - union tgsi_exec_channel index2; - union tgsi_exec_channel indir_index; - const uint execmask = mach->ExecMask; - uint i; + default: + assert(0); + } - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->DimIndirect.Index; + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&r[chan_index], 0, chan_index); + } +} - swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); - fetch_src_file_channel( - mach, - reg->DimIndirect.File, - swizzle, - &index2, - &indir_index ); - - index.i[0] += (int) indir_index.f[0]; - index.i[1] += (int) indir_index.f[1]; - index.i[2] += (int) indir_index.f[2]; - index.i[3] += (int) indir_index.f[3]; +static void +exec_txd(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + const uint unit = inst->Src[3].Register.Index; + union tgsi_exec_channel r[4]; + uint chan_index; - /* for disabled execution channels, zero-out the index to - * avoid using a potential garbage value. - */ - for (i = 0; i < QUAD_SIZE; i++) { - if ((execmask & (1 << i)) == 0) - index.i[i] = 0; - } - } + /* + * XXX: This is fake TXD -- the derivatives are not taken into account, yet. + */ - /* If by any chance there was a need for a 3D array of register - * files, we would have to check whether Dimension is followed - * by a dimension register and continue the saga. - */ - } + switch (inst->Texture.Texture) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: - swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); - fetch_src_file_channel( - mach, - reg->Register.File, - swizzle, - &index, - chan ); + FETCH(&r[0], 0, CHAN_X); - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - micro_abs( chan, chan ); + fetch_texel(mach->Samplers[unit], + &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; - case TGSI_UTIL_SIGN_SET: - micro_abs( chan, chan ); - micro_neg( chan, chan ); - break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); - case TGSI_UTIL_SIGN_TOGGLE: - micro_neg( chan, chan ); + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], &ZeroVec, /* inputs */ + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; - case TGSI_UTIL_SIGN_KEEP: + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + fetch_texel(mach->Samplers[unit], + &r[0], &r[1], &r[2], &ZeroVec, + tgsi_sampler_lod_bias, + &r[0], &r[1], &r[2], &r[3]); break; + + default: + assert(0); + } + + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&r[chan_index], 0, chan_index); } } + +/** + * Evaluate a constant-valued coefficient at the position of the + * current quad. + */ static void -store_dest( +eval_constant_coef( struct tgsi_exec_machine *mach, - const union tgsi_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) + unsigned attrib, + unsigned chan ) { - uint i; - union tgsi_exec_channel null; - union tgsi_exec_channel *dst; - uint execmask = mach->ExecMask; - int offset = 0; /* indirection offset */ - int index; + unsigned i; -#ifdef DEBUG - check_inf_or_nan(chan); -#endif + for( i = 0; i < QUAD_SIZE; i++ ) { + mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + } +} - /* There is an extra source register that indirectly subscripts - * a register file. The direct index now becomes an offset - * that is being added to the indirect register. - * - * file[ind[2].x+1], - * where: - * ind = Indirect.File - * [2] = Indirect.Index - * .x = Indirect.SwizzleX - */ - if (reg->Register.Indirect) { - union tgsi_exec_channel index; - union tgsi_exec_channel indir_index; - uint swizzle; +/** + * Evaluate a linear-valued coefficient at the position of the + * current quad. + */ +static void +eval_linear_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + mach->Inputs[attrib].xyzw[chan].f[0] = a0; + mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; + mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; + mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; +} - /* which address register (always zero for now) */ - index.i[0] = - index.i[1] = - index.i[2] = - index.i[3] = reg->Indirect.Index; +/** + * Evaluate a perspective-valued coefficient at the position of the + * current quad. + */ +static void +eval_perspective_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; +} - /* get current value of address register[swizzle] */ - swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); - /* fetch values from the address/indirection register */ - fetch_src_file_channel( - mach, - reg->Indirect.File, - swizzle, - &index, - &indir_index ); +typedef void (* eval_coef_func)( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ); - /* save indirection offset */ - offset = (int) indir_index.f[0]; - } +static void +exec_declaration(struct tgsi_exec_machine *mach, + const struct tgsi_full_declaration *decl) +{ + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + if (decl->Declaration.File == TGSI_FILE_INPUT || + decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { + uint first, last, mask; - switch (reg->Register.File) { - case TGSI_FILE_NULL: - dst = &null; - break; + first = decl->Range.First; + last = decl->Range.Last; + mask = decl->Declaration.UsageMask; - case TGSI_FILE_OUTPUT: - index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->Register.Index; - dst = &mach->Outputs[offset + index].xyzw[chan_index]; - break; + /* XXX we could remove this special-case code since + * mach->InterpCoefs[first].a0 should already have the + * front/back-face value. But we should first update the + * ureg code to emit the right UsageMask value (WRITEMASK_X). + * Then, we could remove the tgsi_exec_machine::Face field. + */ + if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { + uint i; - case TGSI_FILE_TEMPORARY: - index = reg->Register.Index; - assert( index < TGSI_EXEC_NUM_TEMPS ); - dst = &mach->Temps[offset + index].xyzw[chan_index]; - break; + assert(decl->Semantic.Index == 0); + assert(first == last); - case TGSI_FILE_ADDRESS: - index = reg->Register.Index; - dst = &mach->Addrs[index].xyzw[chan_index]; - break; + for (i = 0; i < QUAD_SIZE; i++) { + mach->Inputs[first].xyzw[0].f[i] = mach->Face; + } + } else { + eval_coef_func eval; + uint i, j; - case TGSI_FILE_LOOP: - assert(reg->Register.Index == 0); - assert(mach->LoopCounterStackTop > 0); - assert(chan_index == CHAN_X); - dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; - break; + switch (decl->Declaration.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + eval = eval_constant_coef; + break; - case TGSI_FILE_PREDICATE: - index = reg->Register.Index; - assert(index < TGSI_EXEC_NUM_PREDS); - dst = &mach->Predicates[index].xyzw[chan_index]; - break; + case TGSI_INTERPOLATE_LINEAR: + eval = eval_linear_coef; + break; - default: - assert( 0 ); - return; - } + case TGSI_INTERPOLATE_PERSPECTIVE: + eval = eval_perspective_coef; + break; - if (inst->Instruction.Predicate) { - uint swizzle; - union tgsi_exec_channel *pred; + default: + assert(0); + return; + } - switch (chan_index) { - case CHAN_X: - swizzle = inst->Predicate.SwizzleX; - break; - case CHAN_Y: - swizzle = inst->Predicate.SwizzleY; - break; - case CHAN_Z: - swizzle = inst->Predicate.SwizzleZ; - break; - case CHAN_W: - swizzle = inst->Predicate.SwizzleW; - break; - default: - assert(0); - return; + for (j = 0; j < NUM_CHANNELS; j++) { + if (mask & (1 << j)) { + for (i = first; i <= last; i++) { + eval(mach, i, j); + } + } + } + } } + } +} - assert(inst->Predicate.Index == 0); +typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src); - pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; +static void +exec_scalar_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_unary_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + union tgsi_exec_channel src; + union tgsi_exec_channel dst; - if (inst->Predicate.Negate) { - for (i = 0; i < QUAD_SIZE; i++) { - if (pred->u[i]) { - execmask &= ~(1 << i); - } - } - } else { - for (i = 0; i < QUAD_SIZE; i++) { - if (!pred->u[i]) { - execmask &= ~(1 << i); - } - } + fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype); + op(&dst, &src); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); } } +} - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: - for (i = 0; i < QUAD_SIZE; i++) - if (execmask & (1 << i)) - dst->i[i] = chan->i[i]; - break; +static void +exec_vector_unary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_unary_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; - case TGSI_SAT_ZERO_ONE: - for (i = 0; i < QUAD_SIZE; i++) - if (execmask & (1 << i)) { - if (chan->f[i] < 0.0f) - dst->f[i] = 0.0f; - else if (chan->f[i] > 1.0f) - dst->f[i] = 1.0f; - else - dst->i[i] = chan->i[i]; - } - break; + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src; + + fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); + op(&dst.xyzw[chan], &src); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} - case TGSI_SAT_MINUS_PLUS_ONE: - for (i = 0; i < QUAD_SIZE; i++) - if (execmask & (1 << i)) { - if (chan->f[i] < -1.0f) - dst->f[i] = -1.0f; - else if (chan->f[i] > 1.0f) - dst->f[i] = 1.0f; - else - dst->i[i] = chan->i[i]; - } - break; +typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1); - default: - assert( 0 ); +static void +exec_vector_binary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_binary_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[2]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + op(&dst.xyzw[chan], &src[0], &src[1]); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } } } -#define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) +typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2); -#define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) +static void +exec_vector_trinary(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + micro_trinary_op op, + enum tgsi_exec_datatype dst_datatype, + enum tgsi_exec_datatype src_datatype) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[3]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); + fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); + fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); + op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); + } + } + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); + } + } +} -/** - * Execute ARB-style KIL which is predicated by a src register. - * Kill fragment if any of the four values is less than zero. - */ static void -exec_kil(struct tgsi_exec_machine *mach, +exec_dp3(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - uint uniquemask; - uint chan_index; - uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ - union tgsi_exec_channel r[1]; + unsigned int chan; + union tgsi_exec_channel arg[3]; - /* This mask stores component bits that were already tested. */ - uniquemask = 0; + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); - for (chan_index = 0; chan_index < 4; chan_index++) - { - uint swizzle; - uint i; + for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { + fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); + } - /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_swizzle ( - &inst->Src[0], - chan_index); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} - /* check if the component has not been already tested */ - if (uniquemask & (1 << swizzle)) - continue; - uniquemask |= 1 << swizzle; +static void +exec_dp4(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[3]; - FETCH(&r[0], 0, chan_index); - for (i = 0; i < 4; i++) - if (r[0].f[i] < 0.0f) - kilmask |= 1 << i; + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + + for (chan = CHAN_Y; chan <= CHAN_W; chan++) { + fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); } - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } } -/** - * Execute NVIDIA-style KIL which is predicated by a condition code. - * Kill fragment if the condition code is TRUE. - */ static void -exec_kilp(struct tgsi_exec_machine *mach, +exec_dp2a(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + unsigned int chan; + union tgsi_exec_channel arg[3]; - /* "unconditional" kil */ - kilmask = mach->ExecMask; - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; -} + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); + + fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_add(&arg[0], &arg[0], &arg[1]); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} -/* - * Fetch a four texture samples using STR texture coordinates. - */ static void -fetch_texel( struct tgsi_sampler *sampler, - const union tgsi_exec_channel *s, - const union tgsi_exec_channel *t, - const union tgsi_exec_channel *p, - float lodbias, /* XXX should be float[4] */ - union tgsi_exec_channel *r, - union tgsi_exec_channel *g, - union tgsi_exec_channel *b, - union tgsi_exec_channel *a ) +exec_dph(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - uint j; - float rgba[NUM_CHANNELS][QUAD_SIZE]; + unsigned int chan; + union tgsi_exec_channel arg[3]; - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); - for (j = 0; j < 4; j++) { - r->f[j] = rgba[0][j]; - g->f[j] = rgba[1][j]; - b->f[j] = rgba[2][j]; - a->f[j] = rgba[3][j]; + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); + + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); + + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT); + micro_add(&arg[0], &arg[0], &arg[1]); + + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } } } - static void -exec_tex(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst, - boolean biasLod, - boolean projected) +exec_dp2(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { - const uint unit = inst->Src[1].Register.Index; - union tgsi_exec_channel r[4]; - uint chan_index; - float lodBias; - - /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ + unsigned int chan; + union tgsi_exec_channel arg[3]; - switch (inst->Texture.Texture) { - case TGSI_TEXTURE_1D: - case TGSI_TEXTURE_SHADOW1D: + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&arg[2], &arg[0], &arg[1]); - FETCH(&r[0], 0, CHAN_X); + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); + micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); - if (projected) { - FETCH(&r[1], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[1] ); + for (chan = 0; chan < NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); } + } +} - if (biasLod) { - FETCH(&r[1], 0, CHAN_W); - lodBias = r[2].f[0]; - } - else - lodBias = 0.0; +static void +exec_nrm4(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + union tgsi_exec_channel arg[4]; + union tgsi_exec_channel scale; - fetch_texel(mach->Samplers[unit], - &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ - break; + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&scale, &arg[0], &arg[0]); - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: + for (chan = CHAN_Y; chan <= CHAN_W; chan++) { + union tgsi_exec_channel product; - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); + fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); + micro_mul(&product, &arg[chan], &arg[chan]); + micro_add(&scale, &scale, &product); + } - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); - } + micro_rsq(&scale, &scale); - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; + for (chan = CHAN_X; chan <= CHAN_W; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + micro_mul(&arg[chan], &arg[chan], &scale); + store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); } - else - lodBias = 0.0; + } +} - fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, /* inputs */ - &r[0], &r[1], &r[2], &r[3]); /* outputs */ - break; +static void +exec_nrm3(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { + unsigned int chan; + union tgsi_exec_channel arg[3]; + union tgsi_exec_channel scale; - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: + fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); + micro_mul(&scale, &arg[0], &arg[0]); - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); + for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { + union tgsi_exec_channel product; - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); + fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); + micro_mul(&product, &arg[chan], &arg[chan]); + micro_add(&scale, &scale, &product); } - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; + micro_rsq(&scale, &scale); + + for (chan = CHAN_X; chan <= CHAN_Z; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + micro_mul(&arg[chan], &arg[chan], &scale); + store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } } - else - lodBias = 0.0; + } - fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, - &r[0], &r[1], &r[2], &r[3]); - break; + if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { + store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); + } +} - default: - assert (0); +static void +exec_break(struct tgsi_exec_machine *mach) +{ + if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + } else { + assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); + + mach->Switch.mask = 0x0; + + UPDATE_EXEC_MASK(mach); } +} + +static void +exec_switch(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); + mach->Switch.mask = 0x0; + mach->Switch.defaultMask = 0x0; + + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; + + UPDATE_EXEC_MASK(mach); +} + +static void +exec_case(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; + union tgsi_exec_channel src; + uint mask = 0; + + fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); + if (mach->Switch.selector.u[0] == src.u[0]) { + mask |= 0x1; } + if (mach->Switch.selector.u[1] == src.u[1]) { + mask |= 0x2; + } + if (mach->Switch.selector.u[2] == src.u[2]) { + mask |= 0x4; + } + if (mach->Switch.selector.u[3] == src.u[3]) { + mask |= 0x8; + } + + mach->Switch.defaultMask |= mask; + + mach->Switch.mask |= mask & prevMask; + + UPDATE_EXEC_MASK(mach); } static void -exec_txd(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_default(struct tgsi_exec_machine *mach) { - const uint unit = inst->Src[3].Register.Index; - union tgsi_exec_channel r[4]; - uint chan_index; + uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; - /* - * XXX: This is fake TXD -- the derivatives are not taken into account, yet. - */ + mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; - switch (inst->Texture.Texture) { - case TGSI_TEXTURE_1D: - case TGSI_TEXTURE_SHADOW1D: + UPDATE_EXEC_MASK(mach); +} - FETCH(&r[0], 0, CHAN_X); +static void +exec_endswitch(struct tgsi_exec_machine *mach) +{ + mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; - fetch_texel(mach->Samplers[unit], - &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ - break; + UPDATE_EXEC_MASK(mach); +} - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: +static void +micro_i2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->i[0]; + dst->f[1] = (float)src->i[1]; + dst->f[2] = (float)src->i[2]; + dst->f[3] = (float)src->i[3]; +} - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); +static void +micro_not(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = ~src->u[0]; + dst->u[1] = ~src->u[1]; + dst->u[2] = ~src->u[2]; + dst->u[3] = ~src->u[3]; +} - fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], 0.0f, /* inputs */ - &r[0], &r[1], &r[2], &r[3]); /* outputs */ - break; +static void +micro_shl(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] << src1->u[0]; + dst->u[1] = src0->u[1] << src1->u[1]; + dst->u[2] = src0->u[2] << src1->u[2]; + dst->u[3] = src0->u[3] << src1->u[3]; +} - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: +static void +micro_and(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] & src1->u[0]; + dst->u[1] = src0->u[1] & src1->u[1]; + dst->u[2] = src0->u[2] & src1->u[2]; + dst->u[3] = src0->u[3] & src1->u[3]; +} - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); +static void +micro_or(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] | src1->u[0]; + dst->u[1] = src0->u[1] | src1->u[1]; + dst->u[2] = src0->u[2] | src1->u[2]; + dst->u[3] = src0->u[3] | src1->u[3]; +} - fetch_texel(mach->Samplers[unit], - &r[0], &r[1], &r[2], 0.0f, - &r[0], &r[1], &r[2], &r[3]); - break; +static void +micro_xor(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] ^ src1->u[0]; + dst->u[1] = src0->u[1] ^ src1->u[1]; + dst->u[2] = src0->u[2] ^ src1->u[2]; + dst->u[3] = src0->u[3] ^ src1->u[3]; +} - default: - assert(0); - } +static void +micro_f2i(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->i[0] = (int)src->f[0]; + dst->i[1] = (int)src->f[1]; + dst->i[2] = (int)src->f[2]; + dst->i[3] = (int)src->f[3]; +} - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&r[chan_index], 0, chan_index); - } +static void +micro_idiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->i[0] = src0->i[0] / src1->i[0]; + dst->i[1] = src0->i[1] / src1->i[1]; + dst->i[2] = src0->i[2] / src1->i[2]; + dst->i[3] = src0->i[3] / src1->i[3]; } +static void +micro_imax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; + dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; + dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; + dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; +} -/** - * Evaluate a constant-valued coefficient at the position of the - * current quad. - */ static void -eval_constant_coef( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ) +micro_imin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) { - unsigned i; + dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; + dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; + dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; + dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; +} - for( i = 0; i < QUAD_SIZE; i++ ) { - mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; - } +static void +micro_isge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; + dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; + dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; + dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; } -/** - * Evaluate a linear-valued coefficient at the position of the - * current quad. - */ static void -eval_linear_coef( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ) +micro_ishr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) { - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - mach->Inputs[attrib].xyzw[chan].f[0] = a0; - mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; - mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; - mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; + dst->i[0] = src0->i[0] >> src1->i[0]; + dst->i[1] = src0->i[1] >> src1->i[1]; + dst->i[2] = src0->i[2] >> src1->i[2]; + dst->i[3] = src0->i[3] >> src1->i[3]; } -/** - * Evaluate a perspective-valued coefficient at the position of the - * current quad. - */ static void -eval_perspective_coef( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ) +micro_islt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) { - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - const float *w = mach->QuadPos.xyzw[3].f; - /* divide by W here */ - mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; - mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; - mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; - mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; + dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; + dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; + dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; + dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; } +static void +micro_f2u(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->u[0] = (uint)src->f[0]; + dst->u[1] = (uint)src->f[1]; + dst->u[2] = (uint)src->f[2]; + dst->u[3] = (uint)src->f[3]; +} -typedef void (* eval_coef_func)( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ); +static void +micro_u2f(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float)src->u[0]; + dst->f[1] = (float)src->u[1]; + dst->f[2] = (float)src->u[2]; + dst->f[3] = (float)src->u[3]; +} static void -exec_declaration(struct tgsi_exec_machine *mach, - const struct tgsi_full_declaration *decl) +micro_uadd(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) { - if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { - if (decl->Declaration.File == TGSI_FILE_INPUT) { - uint first, last, mask; + dst->u[0] = src0->u[0] + src1->u[0]; + dst->u[1] = src0->u[1] + src1->u[1]; + dst->u[2] = src0->u[2] + src1->u[2]; + dst->u[3] = src0->u[3] + src1->u[3]; +} - first = decl->Range.First; - last = decl->Range.Last; - mask = decl->Declaration.UsageMask; +static void +micro_udiv(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] / src1->u[0]; + dst->u[1] = src0->u[1] / src1->u[1]; + dst->u[2] = src0->u[2] / src1->u[2]; + dst->u[3] = src0->u[3] / src1->u[3]; +} - if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { - assert(decl->Semantic.Index == 0); - assert(first == last); - assert(mask = TGSI_WRITEMASK_XYZW); +static void +micro_umad(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; + dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; + dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; + dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; +} - mach->Inputs[first] = mach->QuadPos; - } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { - uint i; +static void +micro_umax(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; + dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; + dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; + dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; +} - assert(decl->Semantic.Index == 0); - assert(first == last); +static void +micro_umin(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; + dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; + dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; + dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; +} - for (i = 0; i < QUAD_SIZE; i++) { - mach->Inputs[first].xyzw[0].f[i] = mach->Face; - } - } else { - eval_coef_func eval; - uint i, j; +static void +micro_umod(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] % src1->u[0]; + dst->u[1] = src0->u[1] % src1->u[1]; + dst->u[2] = src0->u[2] % src1->u[2]; + dst->u[3] = src0->u[3] % src1->u[3]; +} - switch (decl->Declaration.Interpolate) { - case TGSI_INTERPOLATE_CONSTANT: - eval = eval_constant_coef; - break; +static void +micro_umul(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] * src1->u[0]; + dst->u[1] = src0->u[1] * src1->u[1]; + dst->u[2] = src0->u[2] * src1->u[2]; + dst->u[3] = src0->u[3] * src1->u[3]; +} - case TGSI_INTERPOLATE_LINEAR: - eval = eval_linear_coef; - break; +static void +micro_useq(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; + dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; + dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; + dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; +} - case TGSI_INTERPOLATE_PERSPECTIVE: - eval = eval_perspective_coef; - break; +static void +micro_usge(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; + dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; + dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; + dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; +} - default: - assert(0); - return; - } +static void +micro_ushr(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] >> src1->u[0]; + dst->u[1] = src0->u[1] >> src1->u[1]; + dst->u[2] = src0->u[2] >> src1->u[2]; + dst->u[3] = src0->u[3] >> src1->u[3]; +} - for (j = 0; j < NUM_CHANNELS; j++) { - if (mask & (1 << j)) { - for (i = first; i <= last; i++) { - eval(mach, i, j); - } - } - } - } - } - } +static void +micro_uslt(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; + dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; + dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; + dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; +} + +static void +micro_usne(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1) +{ + dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; + dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; + dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; + dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; } static void @@ -1962,23 +2534,11 @@ exec_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - case TGSI_OPCODE_FLR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_flr(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MOV: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&d[chan_index], 0, chan_index); - } - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LIT: @@ -2015,23 +2575,11 @@ exec_instruction( break; case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( &r[0], 0, CHAN_X ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( &r[0], 0, CHAN_X ); - micro_abs( &r[0], &r[0] ); - micro_sqrt( &r[0], &r[0] ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EXP: @@ -2076,76 +2624,19 @@ exec_instruction( break; case TGSI_OPCODE_MUL: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - micro_mul(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_ADD: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_add(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Z ); - FETCH( &r[2], 1, CHAN_Z ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp3(mach, inst); break; - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_W); - FETCH(&r[2], 1, CHAN_W); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + case TGSI_OPCODE_DP4: + exec_dp4(mach, inst); break; case TGSI_OPCODE_DST: @@ -2176,174 +2667,63 @@ exec_instruction( break; case TGSI_OPCODE_MIN: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - /* XXX use micro_min()?? */ - micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MAX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - /* XXX use micro_max()?? */ - micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] ); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_mul( &r[0], &r[0], &r[1] ); - FETCH( &r[1], 2, chan_index ); - micro_add(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SUB: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - micro_sub(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LRP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_sub( &r[1], &r[1], &r[2] ); - micro_mul( &r[0], &r[0], &r[1] ); - micro_add(&d[chan_index], &r[0], &r[2]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CND: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DP2A: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH( &r[2], 2, CHAN_X ); - micro_add( &r[0], &r[0], &r[2] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp2a(mach, inst); break; case TGSI_OPCODE_FRC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_frc(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CLAMP: - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - micro_max(&r[0], &r[0], &r[1]); - FETCH(&r[1], 2, chan_index); - micro_min(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_FLR: + exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_ROUND: - case TGSI_OPCODE_ARR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_rnd(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_EX2: - FETCH(&r[0], 0, CHAN_X); - - micro_exp2( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_LG2: - FETCH( &r[0], 0, CHAN_X ); - micro_lg2( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_POW: @@ -2396,15 +2776,9 @@ exec_instruction( } break; - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - micro_abs(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } - break; + case TGSI_OPCODE_ABS: + exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; case TGSI_OPCODE_RCC: FETCH(&r[0], 0, CHAN_X); @@ -2416,60 +2790,19 @@ exec_instruction( break; case TGSI_OPCODE_DPH: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 1, CHAN_W); - - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dph(mach, inst); break; case TGSI_OPCODE_COS: - FETCH(&r[0], 0, CHAN_X); - - micro_cos( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddx(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_DDY: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddy(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_KILP: @@ -2546,14 +2879,7 @@ exec_instruction( break; case TGSI_OPCODE_SEQ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SFL: @@ -2563,44 +2889,19 @@ exec_instruction( break; case TGSI_OPCODE_SGT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SIN: - FETCH( &r[0], 0, CHAN_X ); - micro_sin( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SLE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SNE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_STR: @@ -2613,14 +2914,14 @@ exec_instruction( /* simple texture lookup */ /* src[0] = texcoord */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_NONE); break; case TGSI_OPCODE_TXB: /* Texture lookup with lod bias */ /* src[0] = texcoord (src[0].w = LOD bias) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS); break; case TGSI_OPCODE_TXD: @@ -2636,14 +2937,14 @@ exec_instruction( /* Texture lookup with explit LOD */ /* src[0] = texcoord (src[0].w = LOD) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); + exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD); break; case TGSI_OPCODE_TXP: /* Texture lookup with projection */ /* src[0] = texcoord (src[0].w = projection) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, TRUE); + exec_tex(mach, inst, TEX_MODIFIER_PROJECTED); break; case TGSI_OPCODE_UP2H: @@ -2705,6 +3006,10 @@ exec_instruction( assert (0); break; + case TGSI_OPCODE_ARR: + exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_BRA: assert (0); break; @@ -2724,6 +3029,8 @@ exec_instruction( mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; + mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; + mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; /* note that PC was already incremented above */ mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; @@ -2731,12 +3038,17 @@ exec_instruction( /* Second, push the Cond, Loop, Cont, Func stacks */ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; + assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* Finally, jump to the subroutine */ @@ -2769,6 +3081,12 @@ exec_instruction( mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; mach->ContMask = mach->ContStack[mach->ContStackTop]; + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; @@ -2779,26 +3097,11 @@ exec_instruction( break; case TGSI_OPCODE_SSG: - /* TGSI_OPCODE_SGN */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_sgn(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_CMP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SCS: @@ -2822,70 +3125,11 @@ exec_instruction( break; case TGSI_OPCODE_NRM: - /* 3-component vector normalize */ - if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || - IS_CHANNEL_ENABLED(*inst, CHAN_Y) || - IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - /* r3 = sqrt(dp3(src0, src0)) */ - FETCH(&r[0], 0, CHAN_X); - micro_mul(&r[3], &r[0], &r[0]); - FETCH(&r[1], 0, CHAN_Y); - micro_mul(&r[4], &r[1], &r[1]); - micro_add(&r[3], &r[3], &r[4]); - FETCH(&r[2], 0, CHAN_Z); - micro_mul(&r[4], &r[2], &r[2]); - micro_add(&r[3], &r[3], &r[4]); - micro_sqrt(&r[3], &r[3]); - - if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { - micro_div(&r[0], &r[0], &r[3]); - STORE(&r[0], 0, CHAN_X); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { - micro_div(&r[1], &r[1], &r[3]); - STORE(&r[1], 0, CHAN_Y); - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { - micro_div(&r[2], &r[2], &r[3]); - STORE(&r[2], 0, CHAN_Z); - } - } - if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { - STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); - } + exec_nrm3(mach, inst); break; case TGSI_OPCODE_NRM4: - /* 4-component vector normalize */ - { - union tgsi_exec_channel tmp, dot; - - /* tmp = dp4(src0, src0): */ - FETCH( &r[0], 0, CHAN_X ); - micro_mul( &tmp, &r[0], &r[0] ); - - FETCH( &r[1], 0, CHAN_Y ); - micro_mul( &dot, &r[1], &r[1] ); - micro_add( &tmp, &tmp, &dot ); - - FETCH( &r[2], 0, CHAN_Z ); - micro_mul( &dot, &r[2], &r[2] ); - micro_add( &tmp, &tmp, &dot ); - - FETCH( &r[3], 0, CHAN_W ); - micro_mul( &dot, &r[3], &r[3] ); - micro_add( &tmp, &tmp, &dot ); - - /* tmp = 1 / sqrt(tmp) */ - micro_sqrt( &tmp, &tmp ); - micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - /* chan = chan * tmp */ - micro_mul( &r[chan_index], &tmp, &r[chan_index] ); - STORE( &r[chan_index], 0, chan_index ); - } - } + exec_nrm4(mach, inst); break; case TGSI_OPCODE_DIV: @@ -2893,18 +3137,7 @@ exec_instruction( break; case TGSI_OPCODE_DP2: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } + exec_dp2(mach, inst); break; case TGSI_OPCODE_IF: @@ -2970,87 +3203,31 @@ exec_instruction( break; case TGSI_OPCODE_CEIL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ceil(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_I2F: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_i2f(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); break; case TGSI_OPCODE_NOT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_not(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_TRUNC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_trunc(&d[chan_index], &r[0]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; case TGSI_OPCODE_SHL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_shl(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } - break; - - case TGSI_OPCODE_SHR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_ishr(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_AND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_and(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_OR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_or(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_MOD: @@ -3058,14 +3235,7 @@ exec_instruction( break; case TGSI_OPCODE_XOR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_xor(&d[chan_index], &r[0], &r[1]); - } - FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { - STORE(&d[chan_index], 0, chan_index); - } + exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); break; case TGSI_OPCODE_SAD: @@ -3081,13 +3251,11 @@ exec_instruction( break; case TGSI_OPCODE_EMIT: - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + emit_vertex(mach); break; case TGSI_OPCODE_ENDPRIM: - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + emit_primitive(mach); break; case TGSI_OPCODE_BGNFOR: @@ -3116,11 +3284,15 @@ exec_instruction( case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); + + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + mach->ContStack[mach->ContStackTop++] = mach->ContMask; mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; + mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; + mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; break; case TGSI_OPCODE_ENDFOR: @@ -3167,6 +3339,8 @@ exec_instruction( --mach->LoopLabelStackTop; assert(mach->LoopCounterStackTop > 0); --mach->LoopCounterStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; @@ -3190,15 +3364,14 @@ exec_instruction( mach->ContMask = mach->ContStack[--mach->ContStackTop]; assert(mach->LoopLabelStackTop > 0); --mach->LoopLabelStackTop; + + mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; } UPDATE_EXEC_MASK(mach); break; case TGSI_OPCODE_BRK: - /* turn off loop channels for each enabled exec channel */ - mach->LoopMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); + exec_break(mach); break; case TGSI_OPCODE_CONT: @@ -3229,6 +3402,12 @@ exec_instruction( mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; mach->ContMask = mach->ContStack[mach->ContStackTop]; + mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; + mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; + + mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; + mach->BreakType = mach->BreakStack[mach->BreakStackTop]; + assert(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; @@ -3240,11 +3419,135 @@ exec_instruction( case TGSI_OPCODE_NOP: break; + case TGSI_OPCODE_BREAKC: + FETCH(&r[0], 0, CHAN_X); + /* update CondMask */ + if (r[0].u[0] && (mach->ExecMask & 0x1)) { + mach->LoopMask &= ~0x1; + } + if (r[0].u[1] && (mach->ExecMask & 0x2)) { + mach->LoopMask &= ~0x2; + } + if (r[0].u[2] && (mach->ExecMask & 0x4)) { + mach->LoopMask &= ~0x4; + } + if (r[0].u[3] && (mach->ExecMask & 0x8)) { + mach->LoopMask &= ~0x8; + } + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_F2I: + exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_IDIV: + exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMAX: + exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_IMIN: + exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_INEG: + exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISGE: + exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISHR: + exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_ISLT: + exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); + break; + + case TGSI_OPCODE_F2U: + exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); + break; + + case TGSI_OPCODE_U2F: + exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UADD: + exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UDIV: + exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAD: + exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMAX: + exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMIN: + exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMOD: + exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_UMUL: + exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USEQ: + exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USGE: + exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USHR: + exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USLT: + exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_USNE: + exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + break; + + case TGSI_OPCODE_SWITCH: + exec_switch(mach, inst); + break; + + case TGSI_OPCODE_CASE: + exec_case(mach, inst); + break; + + case TGSI_OPCODE_DEFAULT: + exec_default(mach); + break; + + case TGSI_OPCODE_ENDSWITCH: + exec_endswitch(mach); + break; + default: assert( 0 ); } } + #define DEBUG_EXECUTION 0 @@ -3264,9 +3567,13 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; + mach->Switch.mask = 0xf; + assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; @@ -3323,11 +3630,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("(%6f, %6f, %6f, %6f)\n", - temps[i].xyzw[0].f[j], - temps[i].xyzw[1].f[j], - temps[i].xyzw[2].f[j], - temps[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], + temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], + temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], + temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); } } } @@ -3341,11 +3648,11 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) if (j > 0) { debug_printf(" "); } - debug_printf("{%6f, %6f, %6f, %6f}\n", - outputs[i].xyzw[0].f[j], - outputs[i].xyzw[1].f[j], - outputs[i].xyzw[2].f[j], - outputs[i].xyzw[3].f[j]); + debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", + outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], + outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], + outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], + outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); } } } @@ -3367,6 +3674,8 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) assert(mach->CondStackTop == 0); assert(mach->LoopStackTop == 0); assert(mach->ContStackTop == 0); + assert(mach->SwitchStackTop == 0); + assert(mach->BreakStackTop == 0); assert(mach->CallStackTop == 0); return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];