X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Ftgsi%2Ftgsi_sse2.c;h=5f6b83b2362acbf503300c8fb2558361bd05e147;hb=c9f7a23ef05adfd2ebae56ee9f1b19897a589831;hp=1e719940ec52c21e18a67d8d18c77959316efde4;hpb=7549a8397b310acf672f97a08c8e7d866cdf492c;p=mesa.git diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 1e719940ec5..5f6b83b2362 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -39,8 +39,9 @@ #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" -#include "tgsi_exec.h" -#include "tgsi_sse2.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_sse2.h" #include "rtasm/rtasm_x86sse.h" @@ -1360,6 +1361,32 @@ emit_store( const struct tgsi_full_instruction *inst, unsigned chan_index ) { + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + sse_maxps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + + sse_minps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ) ); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } + + switch( reg->DstRegister.File ) { case TGSI_FILE_OUTPUT: emit_output( @@ -1388,19 +1415,6 @@ emit_store( default: assert( 0 ); } - - switch( inst->Instruction.Saturate ) { - case TGSI_SAT_NONE: - break; - - case TGSI_SAT_ZERO_ONE: - /* assert( 0 ); */ - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); - break; - } } #define STORE( FUNC, INST, XMM, INDEX, CHAN )\ @@ -1747,14 +1761,6 @@ emit_instruction( if (indirect_temp_reference(inst)) return FALSE; - /* we don't handle saturation/clamping yet */ - if (inst->Instruction.Saturate != TGSI_SAT_NONE) - return FALSE; - - /* need to use extra temps to fix SOA dependencies : */ - if (tgsi_check_soa_dependencies(inst)) - return FALSE; - switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { @@ -1768,8 +1774,10 @@ emit_instruction( case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - STORE( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 4 + chan_index, 0, chan_index ); + } + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 4 + chan_index, 0, chan_index ); } break; @@ -1847,7 +1855,6 @@ emit_instruction( break; case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ FETCH( func, *inst, 0, 0, CHAN_X ); emit_rcp( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { @@ -1856,7 +1863,6 @@ emit_instruction( break; case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ FETCH( func, *inst, 0, 0, CHAN_X ); emit_abs( func, 0 ); emit_rsqrt( func, 1, 0 ); @@ -1954,7 +1960,6 @@ emit_instruction( break; case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ FETCH( func, *inst, 0, 0, CHAN_X ); FETCH( func, *inst, 1, 1, CHAN_X ); emit_mul( func, 0, 1 ); @@ -1972,7 +1977,6 @@ emit_instruction( break; case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ FETCH( func, *inst, 0, 0, CHAN_X ); FETCH( func, *inst, 1, 1, CHAN_X ); emit_mul( func, 0, 1 ); @@ -2043,17 +2047,14 @@ emit_instruction( break; case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ emit_setcc( func, inst, cc_LessThan ); break; case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ emit_setcc( func, inst, cc_NotLessThan ); break; case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); FETCH( func, *inst, 1, 1, chan_index ); @@ -2283,7 +2284,7 @@ emit_instruction( break; case TGSI_OPCODE_SEQ: - return 0; + emit_setcc( func, inst, cc_Equal ); break; case TGSI_OPCODE_SFL: @@ -2291,7 +2292,7 @@ emit_instruction( break; case TGSI_OPCODE_SGT: - return 0; + emit_setcc( func, inst, cc_NotLessThanEqual ); break; case TGSI_OPCODE_SIN: @@ -2303,11 +2304,11 @@ emit_instruction( break; case TGSI_OPCODE_SLE: - return 0; + emit_setcc( func, inst, cc_LessThanEqual ); break; case TGSI_OPCODE_SNE: - return 0; + emit_setcc( func, inst, cc_NotEqual ); break; case TGSI_OPCODE_STR: @@ -2371,7 +2372,6 @@ emit_instruction( break; case TGSI_OPCODE_SSG: - /* TGSI_OPCODE_SGN */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); emit_sgn( func, 0, 0 ); @@ -2929,6 +2929,22 @@ tgsi_emit_sse2( parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? "vertex shader" : "fragment shader"); } + + if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { + uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; + + /* XXX: we only handle src/dst aliasing in a few opcodes + * currently. Need to use an additional temporay to hold + * the result in the cases where the code is too opaque to + * fix. + */ + if (opcode != TGSI_OPCODE_MOV && + opcode != TGSI_OPCODE_SWZ) { + debug_printf("Warning: src/dst aliasing in instruction" + " is not handled:\n"); + tgsi_dump_instruction(&parse.FullToken.FullInstruction, 1); + } + } break; case TGSI_TOKEN_TYPE_IMMEDIATE: