X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fcell%2Fspu%2Fspu_exec.c;h=0eaae2e451bc12258b213931fb5302cfa3988250;hb=ba002eb1965ccdb09d7c3657d32e22e71ed4096a;hp=cf81bee8fde54b4b76a32c4af40887a7bb6a5194;hpb=2d1f086c12b6d64f5c3fb80474f26775aeb71370;p=mesa.git diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index cf81bee8fde..0eaae2e451b 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -50,8 +50,6 @@ * Brian Paul */ -#include -#include #include #include #include @@ -65,10 +63,9 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "spu_exec.h" #include "spu_main.h" #include "spu_vertex_shader.h" @@ -151,6 +148,7 @@ spu_exec_machine_init(struct spu_exec_machine *mach, const qword zero = si_il(0); const qword not_zero = si_il(~0); + (void) numSamplers; mach->Samplers = samplers; mach->Processor = processor; mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; @@ -384,10 +382,10 @@ fetch_src_file_channel( break; case TGSI_FILE_IMMEDIATE: - assert( index->i[0] < (int) mach->ImmLimit ); - assert( index->i[1] < (int) mach->ImmLimit ); - assert( index->i[2] < (int) mach->ImmLimit ); - assert( index->i[3] < (int) mach->ImmLimit ); + ASSERT( index->i[0] < (int) mach->ImmLimit ); + ASSERT( index->i[1] < (int) mach->ImmLimit ); + ASSERT( index->i[2] < (int) mach->ImmLimit ); + ASSERT( index->i[3] < (int) mach->ImmLimit ); chan->f[0] = mach->Imms[index->i[0]][swizzle]; chan->f[1] = mach->Imms[index->i[1]][swizzle]; @@ -411,7 +409,7 @@ fetch_src_file_channel( break; default: - assert( 0 ); + ASSERT( 0 ); } break; @@ -424,7 +422,7 @@ fetch_src_file_channel( break; default: - assert( 0 ); + ASSERT( 0 ); } } @@ -473,7 +471,7 @@ fetch_source( index.q = si_shli(index.q, 12); break; default: - assert( 0 ); + ASSERT( 0 ); } index.i[0] += reg->SrcRegisterDim.Index; @@ -560,7 +558,7 @@ store_dest( break; default: - assert( 0 ); + ASSERT( 0 ); return; } @@ -584,11 +582,11 @@ store_dest( break; case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); + ASSERT( 0 ); break; default: - assert( 0 ); + ASSERT( 0 ); } } @@ -604,8 +602,8 @@ store_dest( * Kill fragment if any of the four values is less than zero. */ static void -exec_kilp(struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_kil(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst) { uint uniquemask; uint chan_index; @@ -641,6 +639,20 @@ exec_kilp(struct spu_exec_machine *mach, mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } +/** + * Execute NVIDIA-style KIL which is predicated by a condition code. + * Kill fragment if the condition code is TRUE. + */ +static void +exec_kilp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + + /* TODO: build kilmask from CC mask */ + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} /* * Fetch a texel using STR texture coordinates. @@ -659,9 +671,10 @@ fetch_texel( struct spu_sampler *sampler, qword rgba[4]; qword out[4]; - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba); + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, + (float (*)[4]) rgba); - _transpose_matrix4x4(out, rgba); + _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba); r->q = out[0]; g->q = out[1]; b->q = out[2]; @@ -672,7 +685,7 @@ fetch_texel( struct spu_sampler *sampler, static void exec_tex(struct spu_exec_machine *mach, const struct tgsi_full_instruction *inst, - boolean biasLod) + boolean biasLod, boolean projected) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; union spu_exec_channel r[8]; @@ -686,17 +699,9 @@ exec_tex(struct spu_exec_machine *mach, FETCH(&r[0], 0, CHAN_X); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[1], 0, CHAN_W); r[0].q = micro_div(r[0].q, r[1].q); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -718,19 +723,11 @@ exec_tex(struct spu_exec_machine *mach, FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[3], 0, CHAN_W); r[0].q = micro_div(r[0].q, r[3].q); r[1].q = micro_div(r[1].q, r[3].q); r[2].q = micro_div(r[2].q, r[3].q); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -752,19 +749,11 @@ exec_tex(struct spu_exec_machine *mach, FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[3], 0, CHAN_W); r[0].q = micro_div(r[0].q, r[3].q); r[1].q = micro_div(r[1].q, r[3].q); r[2].q = micro_div(r[2].q, r[3].q); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -780,7 +769,7 @@ exec_tex(struct spu_exec_machine *mach, break; default: - assert (0); + ASSERT (0); } FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { @@ -854,13 +843,11 @@ exec_declaration(struct spu_exec_machine *mach, unsigned first, last, mask; interpolation_func interp; - assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); - - first = decl->u.DeclarationRange.First; - last = decl->u.DeclarationRange.Last; + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; mask = decl->Declaration.UsageMask; - switch( decl->Interpolation.Interpolate ) { + switch( decl->Declaration.Interpolate ) { case TGSI_INTERPOLATE_CONSTANT: interp = constant_interpolation; break; @@ -874,7 +861,7 @@ exec_declaration(struct spu_exec_machine *mach, break; default: - assert( 0 ); + ASSERT( 0 ); } if( mask == TGSI_WRITEMASK_XYZW ) { @@ -922,7 +909,7 @@ exec_instruction( break; case TGSI_OPCODE_MOV: - /* TGSI_OPCODE_SWZ */ + case TGSI_OPCODE_SWZ: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); STORE( &r[0], 0, chan_index ); @@ -965,7 +952,6 @@ exec_instruction( break; case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ FETCH( &r[0], 0, CHAN_X ); r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { @@ -974,7 +960,6 @@ exec_instruction( break; case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ FETCH( &r[0], 0, CHAN_X ); r[0].q = micro_sqrt(r[0].q); r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); @@ -984,11 +969,11 @@ exec_instruction( break; case TGSI_OPCODE_EXP: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_LOG: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_MUL: @@ -1128,7 +1113,6 @@ exec_instruction( break; case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); @@ -1149,8 +1133,7 @@ exec_instruction( } break; - case TGSI_OPCODE_LERP: - /* TGSI_OPCODE_LRP */ + case TGSI_OPCODE_LRP: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); FETCH(&r[1], 1, chan_index); @@ -1164,28 +1147,14 @@ exec_instruction( break; case TGSI_OPCODE_CND: - assert (0); - break; - - case TGSI_OPCODE_CND0: - assert (0); - break; - - case TGSI_OPCODE_DOT2ADD: - /* TGSI_OPCODE_DP2A */ - assert (0); - break; - - case TGSI_OPCODE_INDEX: - assert (0); + ASSERT (0); break; - case TGSI_OPCODE_NEGATE: - assert (0); + case TGSI_OPCODE_DP2A: + ASSERT (0); break; - case TGSI_OPCODE_FRAC: - /* TGSI_OPCODE_FRC */ + case TGSI_OPCODE_FRC: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); r[0].q = micro_frc(r[0].q); @@ -1194,11 +1163,10 @@ exec_instruction( break; case TGSI_OPCODE_CLAMP: - assert (0); + ASSERT (0); break; - case TGSI_OPCODE_FLOOR: - /* TGSI_OPCODE_FLR */ + case TGSI_OPCODE_FLR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); r[0].q = micro_flr(r[0].q); @@ -1214,8 +1182,7 @@ exec_instruction( } break; - case TGSI_OPCODE_EXPBASE2: - /* TGSI_OPCODE_EX2 */ + case TGSI_OPCODE_EX2: FETCH(&r[0], 0, CHAN_X); r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); @@ -1225,8 +1192,7 @@ exec_instruction( } break; - case TGSI_OPCODE_LOGBASE2: - /* TGSI_OPCODE_LG2 */ + case TGSI_OPCODE_LG2: FETCH( &r[0], 0, CHAN_X ); r[0].q = micro_lg2(r[0].q); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { @@ -1234,8 +1200,7 @@ exec_instruction( } break; - case TGSI_OPCODE_POWER: - /* TGSI_OPCODE_POW */ + case TGSI_OPCODE_POW: FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 1, CHAN_X); @@ -1246,7 +1211,7 @@ exec_instruction( } break; - case TGSI_OPCODE_CROSSPRODUCT: + case TGSI_OPCODE_XPD: /* TGSI_OPCODE_XPD */ FETCH(&r[0], 0, CHAN_Y); FETCH(&r[1], 1, CHAN_Z); @@ -1288,10 +1253,6 @@ exec_instruction( } break; - case TGSI_OPCODE_MULTIPLYMATRIX: - assert (0); - break; - case TGSI_OPCODE_ABS: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH(&r[0], 0, chan_index); @@ -1303,7 +1264,7 @@ exec_instruction( break; case TGSI_OPCODE_RCC: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_DPH: @@ -1362,28 +1323,27 @@ exec_instruction( break; case TGSI_OPCODE_KIL: - /* for enabled ExecMask bits, set the killed bit */ - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; + exec_kil (mach, inst); break; case TGSI_OPCODE_PK2H: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_PK2US: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_PK4B: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_PK4UB: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_RFL: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_SEQ: @@ -1398,7 +1358,7 @@ exec_instruction( break; case TGSI_OPCODE_SFL: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_SGT: @@ -1443,21 +1403,21 @@ exec_instruction( break; case TGSI_OPCODE_STR: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_TEX: /* simple texture lookup */ /* src[0] = texcoord */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE); + exec_tex(mach, inst, FALSE, FALSE); break; case TGSI_OPCODE_TXB: /* Texture lookup with lod bias */ /* src[0] = texcoord (src[0].w = load bias) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); + exec_tex(mach, inst, TRUE, FALSE); break; case TGSI_OPCODE_TXD: @@ -1466,46 +1426,53 @@ exec_instruction( /* src[1] = d[strq]/dx */ /* src[2] = d[strq]/dy */ /* src[3] = sampler unit */ - assert (0); + ASSERT (0); break; case TGSI_OPCODE_TXL: /* Texture lookup with explit LOD */ /* src[0] = texcoord (src[0].w = load bias) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXP: + /* Texture lookup with projection */ + /* src[0] = texcoord (src[0].w = projection) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, TRUE); break; case TGSI_OPCODE_UP2H: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_UP2US: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_UP4B: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_UP4UB: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_X2D: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_ARA: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_ARR: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_BRA: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_CAL: @@ -1514,14 +1481,14 @@ exec_instruction( /* do the call */ /* push the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); mach->CondStack[mach->CondStackTop++] = mach->CondMask; - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; - assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* note that PC was already incremented above */ @@ -1545,13 +1512,13 @@ exec_instruction( *pc = mach->CallStack[--mach->CallStackTop]; /* pop the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop > 0); + ASSERT(mach->CondStackTop > 0); mach->CondMask = mach->CondStack[--mach->CondStackTop]; - assert(mach->LoopStackTop > 0); + ASSERT(mach->LoopStackTop > 0); mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - assert(mach->ContStackTop > 0); + ASSERT(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[--mach->ContStackTop]; - assert(mach->FuncStackTop > 0); + ASSERT(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; UPDATE_EXEC_MASK(mach); @@ -1559,7 +1526,7 @@ exec_instruction( break; case TGSI_OPCODE_SSG: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_CMP: @@ -1599,11 +1566,11 @@ exec_instruction( break; case TGSI_OPCODE_NRM: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_DIV: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_DP2: @@ -1622,7 +1589,7 @@ exec_instruction( case TGSI_OPCODE_IF: /* push CondMask */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); mach->CondStack[mach->CondStackTop++] = mach->CondMask; FETCH( &r[0], 0, CHAN_X ); /* update CondMask */ @@ -1646,7 +1613,7 @@ exec_instruction( /* invert CondMask wrt previous mask */ { uint prevMask; - assert(mach->CondStackTop > 0); + ASSERT(mach->CondStackTop > 0); prevMask = mach->CondStack[mach->CondStackTop - 1]; mach->CondMask = ~mach->CondMask & prevMask; UPDATE_EXEC_MASK(mach); @@ -1656,7 +1623,7 @@ exec_instruction( case TGSI_OPCODE_ENDIF: /* pop CondMask */ - assert(mach->CondStackTop > 0); + ASSERT(mach->CondStackTop > 0); mach->CondMask = mach->CondStack[--mach->CondStackTop]; UPDATE_EXEC_MASK(mach); break; @@ -1667,19 +1634,19 @@ exec_instruction( break; case TGSI_OPCODE_REP: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_ENDREP: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_PUSHA: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_POPA: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_CEIL: @@ -1753,7 +1720,7 @@ exec_instruction( break; case TGSI_OPCODE_MOD: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_XOR: @@ -1766,15 +1733,15 @@ exec_instruction( break; case TGSI_OPCODE_SAD: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_TXF: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_TXQ: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_EMIT: @@ -1787,21 +1754,21 @@ exec_instruction( mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; break; - case TGSI_OPCODE_LOOP: + case TGSI_OPCODE_BGNFOR: /* fall-through (for now) */ - case TGSI_OPCODE_BGNLOOP2: + case TGSI_OPCODE_BGNLOOP: /* push LoopMask and ContMasks */ - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; break; - case TGSI_OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDFOR: /* fall-through (for now at least) */ - case TGSI_OPCODE_ENDLOOP2: + case TGSI_OPCODE_ENDLOOP: /* Restore ContMask, but don't pop */ - assert(mach->ContStackTop > 0); + ASSERT(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; if (mach->LoopMask) { /* repeat loop: jump to instruction just past BGNLOOP */ @@ -1809,10 +1776,10 @@ exec_instruction( } else { /* exit loop: pop LoopMask */ - assert(mach->LoopStackTop > 0); + ASSERT(mach->LoopStackTop > 0); mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; /* pop ContMask */ - assert(mach->ContStackTop > 0); + ASSERT(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[--mach->ContStackTop]; } UPDATE_EXEC_MASK(mach); @@ -1841,26 +1808,26 @@ exec_instruction( break; case TGSI_OPCODE_NOISE1: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_NOISE2: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_NOISE3: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_NOISE4: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_NOP: break; default: - assert( 0 ); + ASSERT( 0 ); } } @@ -1881,11 +1848,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; - mach->CondStackTop = 0; /* temporarily subvert this assertion */ - assert(mach->CondStackTop == 0); - assert(mach->LoopStackTop == 0); - assert(mach->ContStackTop == 0); - assert(mach->CallStackTop == 0); + mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */ + ASSERT(mach->CondStackTop == 0); + ASSERT(mach->LoopStackTop == 0); + ASSERT(mach->ContStackTop == 0); + ASSERT(mach->CallStackTop == 0); mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;