From 90e9a4d4f99e722d8f0f2050e134a3c69863541b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 12 Aug 2009 02:36:35 +0100 Subject: [PATCH] llvmpipe: Migrate more SSE2 codegen to LLVM IR. --- .../drivers/llvmpipe/lp_bld_tgsi_soa.c | 536 ++++++++---------- 1 file changed, 251 insertions(+), 285 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index 01aa3b8f20a..78adeab0ae4 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -82,30 +82,6 @@ struct lp_build_tgsi_soa_context }; -/** - * Function call helpers. - */ - -/** - * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be - * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee - * that the stack pointer is 16 byte aligned, as expected. - */ -static void -emit_func_call( - struct lp_build_tgsi_soa_context *bld, - const LLVMValueRef *args, - unsigned nr_args, - void (PIPE_CDECL *code)() ) -{ -#if 0 - LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global, - void* Addr); -#endif - -} - - /** * Register fetch. */ @@ -495,7 +471,9 @@ emit_instruction( struct tgsi_full_instruction *inst ) { unsigned chan_index; - LLVMValueRef tmp; + LLVMValueRef src0, src1, src2; + LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + LLVMValueRef dst0; /* we can't handle indirect addressing into temp register file yet */ if (indirect_temp_reference(inst)) @@ -505,10 +483,10 @@ emit_instruction( #if 0 case TGSI_OPCODE_ARL: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( bld, *inst, 0, 0, chan_index ); + tmp0 = FETCH( bld, *inst, 0, chan_index ); emit_flr(bld, 0, 0); emit_f2it( bld, 0 ); - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; #endif @@ -516,7 +494,8 @@ emit_instruction( case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( bld, *inst, 0, chan_index, FETCH( bld, *inst, 0, chan_index ) ); + tmp0 = FETCH( bld, *inst, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; @@ -524,33 +503,24 @@ emit_instruction( case TGSI_OPCODE_LIT: if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - emit_tempf( - bld, - 0, - TEMP_ONE_I, - TEMP_ONE_C); + tmp0 = bld->base.one; if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { - STORE( bld, *inst, 0, 0, CHAN_X ); + STORE( bld, *inst, 0, CHAN_X, tmp0); } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - STORE( bld, *inst, 0, 0, CHAN_W ); + STORE( bld, *inst, 0, CHAN_W, tmp0); } } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - tmp = FETCH( bld, *inst, 0, 0, CHAN_X ); - sse_maxps( - bld, - make_xmm( 0 ), - get_temp( - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ) ); - STORE( bld, *inst, 0, 0, CHAN_Y ); + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); + tmp0 = lp_build_max( &bld->base, tmp0, bld->base.one); + STORE( bld, *inst, 0, CHAN_Y, tmp0); } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { /* XMM[1] = SrcReg[0].yyyy */ - FETCH( bld, *inst, 1, 0, CHAN_Y ); + tmp1 = FETCH( bld, *inst, 0, CHAN_Y ); /* XMM[1] = max(XMM[1], 0) */ sse_maxps( bld, @@ -559,7 +529,7 @@ emit_instruction( TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C ) ); /* XMM[2] = SrcReg[0].wwww */ - FETCH( bld, *inst, 2, 0, CHAN_W ); + tmp2 = FETCH( bld, *inst, 0, CHAN_W ); /* XMM[2] = min(XMM[2], 128.0) */ sse_minps( bld, @@ -574,8 +544,8 @@ emit_instruction( get_temp( TGSI_EXEC_TEMP_MINUS_128_I, TGSI_EXEC_TEMP_MINUS_128_C ) ); - emit_pow( bld, 3, 1, 1, 2 ); - FETCH( bld, *inst, 0, 0, CHAN_X ); + tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); sse_xorps( bld, make_xmm( 2 ), @@ -589,7 +559,7 @@ emit_instruction( bld, make_xmm( 2 ), make_xmm( 1 ) ); - STORE( bld, *inst, 2, 0, CHAN_Z ); + STORE( bld, *inst, 0, CHAN_Z, tmp2); } } break; @@ -597,20 +567,20 @@ emit_instruction( case TGSI_OPCODE_RCP: /* TGSI_OPCODE_RECIP */ - tmp = FETCH( bld, *inst, 0, CHAN_X ); - tmp = lp_build_rcp(&bld->base, tmp); + src0 = FETCH( bld, *inst, 0, CHAN_X ); + dst0 = lp_build_rcp(&bld->base, src0); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( bld, *inst, 0, chan_index, tmp ); + STORE( bld, *inst, 0, chan_index, dst0 ); } break; case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ - tmp = FETCH( bld, *inst, 0, CHAN_X ); - tmp = lp_build_abs(&bld->base, tmp); - tmp = lp_build_rsqrt(&bld->base, tmp); + src0 = FETCH( bld, *inst, 0, CHAN_X ); + src0 = lp_build_abs(&bld->base, src0); + dst0 = lp_build_rsqrt(&bld->base, src0); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( bld, *inst, 0, chan_index, tmp ); + STORE( bld, *inst, 0, chan_index, dst0 ); } break; @@ -619,34 +589,34 @@ emit_instruction( if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( bld, *inst, 0, 0, CHAN_X ); + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_MOV( bld, 1, 0 ); + tmp1 = tmp0; emit_flr( bld, 2, 1 ); /* dst.x = ex2(floor(src.x)) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { - emit_MOV( bld, 2, 1 ); - emit_ex2( bld, 3, 2 ); - STORE( bld, *inst, 2, 0, CHAN_X ); + tmp2 = tmp1; + tmp2 = lp_build_exp2( &bld->base, tmp2); + STORE( bld, *inst, 0, CHAN_X, tmp2); } /* dst.y = src.x - floor(src.x) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_MOV( bld, 2, 0 ); - emit_sub( bld, 2, 1 ); - STORE( bld, *inst, 2, 0, CHAN_Y ); + tmp2 = tmp0; + tmp2 = lp_build_sub( &bld->base, tmp2, tmp1); + STORE( bld, *inst, 0, CHAN_Y, tmp2); } } /* dst.z = ex2(src.x) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - emit_ex2( bld, 3, 0 ); - STORE( bld, *inst, 0, 0, CHAN_Z ); + tmp0 = lp_build_exp2( &bld->base, tmp0); + STORE( bld, *inst, 0, CHAN_Z, tmp0); } } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { - emit_tempf( bld, 0, TEMP_ONE_I, TEMP_ONE_C ); - STORE( bld, *inst, 0, 0, CHAN_W ); + tmp0 = bld->base.one; + STORE( bld, *inst, 0, CHAN_W, tmp0); } break; #endif @@ -656,97 +626,97 @@ emit_instruction( if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( bld, *inst, 0, 0, CHAN_X ); - emit_abs( bld, 0 ); - emit_MOV( bld, 1, 0 ); - emit_lg2( bld, 2, 1 ); + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); + tmp0 = lp_build_abs( &bld->base, tmp0 ); + tmp1 = tmp0; + tmp1 = lp_build_log2( &bld->base, tmp1); /* dst.z = lg2(abs(src.x)) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( bld, *inst, 1, 0, CHAN_Z ); + STORE( bld, *inst, 0, CHAN_Z, tmp1); } if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { emit_flr( bld, 2, 1 ); /* dst.x = floor(lg2(abs(src.x))) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( bld, *inst, 1, 0, CHAN_X ); + STORE( bld, *inst, 0, CHAN_X, tmp1); } /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_ex2( bld, 2, 1 ); + tmp1 = lp_build_exp2( &bld->base, tmp1); emit_rcp( bld, 1, 1 ); - emit_mul( bld, 0, 1 ); - STORE( bld, *inst, 0, 0, CHAN_Y ); + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); + STORE( bld, *inst, 0, CHAN_Y, tmp0); } } } /* dst.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { emit_tempf( bld, 0, TEMP_ONE_I, TEMP_ONE_C ); - STORE( bld, *inst, 0, 0, CHAN_W ); + STORE( bld, *inst, 0, CHAN_W, tmp0); } break; #endif case TGSI_OPCODE_MUL: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - LLVMValueRef a = FETCH( bld, *inst, 0, chan_index ); - LLVMValueRef b = FETCH( bld, *inst, 1, chan_index ); - tmp = lp_build_mul(&bld->base, a, b); - STORE( bld, *inst, 0, chan_index, tmp ); + src0 = FETCH( bld, *inst, 0, chan_index ); + src1 = FETCH( bld, *inst, 1, chan_index ); + dst0 = lp_build_mul(&bld->base, src0, src1); + STORE( bld, *inst, 0, chan_index, dst0); } break; case TGSI_OPCODE_ADD: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - LLVMValueRef a = FETCH( bld, *inst, 0, chan_index ); - LLVMValueRef b = FETCH( bld, *inst, 1, chan_index ); - tmp = lp_build_add(&bld->base, a, b); - STORE( bld, *inst, 0, chan_index, tmp ); + src0 = FETCH( bld, *inst, 0, chan_index ); + src1 = FETCH( bld, *inst, 1, chan_index ); + dst0 = lp_build_add(&bld->base, src0, src1); + STORE( bld, *inst, 0, chan_index, dst0); } break; -#if 0 case TGSI_OPCODE_DP3: /* TGSI_OPCODE_DOT3 */ - FETCH( bld, *inst, 0, 0, CHAN_X ); - FETCH( bld, *inst, 1, 1, CHAN_X ); - emit_mul( bld, 0, 1 ); - FETCH( bld, *inst, 1, 0, CHAN_Y ); - FETCH( bld, *inst, 2, 1, CHAN_Y ); - emit_mul( bld, 1, 2 ); - emit_add( bld, 0, 1 ); - FETCH( bld, *inst, 1, 0, CHAN_Z ); - FETCH( bld, *inst, 2, 1, CHAN_Z ); - emit_mul( bld, 1, 2 ); - emit_add( bld, 0, 1 ); + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); + tmp1 = FETCH( bld, *inst, 1, CHAN_X ); + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); + tmp1 = FETCH( bld, *inst, 0, CHAN_Y ); + tmp2 = FETCH( bld, *inst, 1, CHAN_Y ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + tmp1 = FETCH( bld, *inst, 0, CHAN_Z ); + tmp2 = FETCH( bld, *inst, 1, CHAN_Z ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; case TGSI_OPCODE_DP4: /* TGSI_OPCODE_DOT4 */ - FETCH( bld, *inst, 0, 0, CHAN_X ); - FETCH( bld, *inst, 1, 1, CHAN_X ); - emit_mul( bld, 0, 1 ); - FETCH( bld, *inst, 1, 0, CHAN_Y ); - FETCH( bld, *inst, 2, 1, CHAN_Y ); - emit_mul( bld, 1, 2 ); - emit_add( bld, 0, 1 ); - FETCH( bld, *inst, 1, 0, CHAN_Z ); - FETCH( bld, *inst, 2, 1, CHAN_Z ); - emit_mul(bld, 1, 2 ); - emit_add(bld, 0, 1 ); - FETCH( bld, *inst, 1, 0, CHAN_W ); - FETCH( bld, *inst, 2, 1, CHAN_W ); - emit_mul( bld, 1, 2 ); - emit_add( bld, 0, 1 ); + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); + tmp1 = FETCH( bld, *inst, 1, CHAN_X ); + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); + tmp1 = FETCH( bld, *inst, 0, CHAN_Y ); + tmp2 = FETCH( bld, *inst, 1, CHAN_Y ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + tmp1 = FETCH( bld, *inst, 0, CHAN_Z ); + tmp2 = FETCH( bld, *inst, 1, CHAN_Z ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + tmp1 = FETCH( bld, *inst, 0, CHAN_W ); + tmp2 = FETCH( bld, *inst, 1, CHAN_W ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; +#if 0 case TGSI_OPCODE_DST: IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { emit_tempf( @@ -754,48 +724,44 @@ emit_instruction( 0, TEMP_ONE_I, TEMP_ONE_C ); - STORE( bld, *inst, 0, 0, CHAN_X ); + STORE( bld, *inst, 0, CHAN_X, tmp0); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - FETCH( bld, *inst, 0, 0, CHAN_Y ); - FETCH( bld, *inst, 1, 1, CHAN_Y ); - emit_mul( bld, 0, 1 ); - STORE( bld, *inst, 0, 0, CHAN_Y ); + tmp0 = FETCH( bld, *inst, 0, CHAN_Y ); + tmp1 = FETCH( bld, *inst, 1, CHAN_Y ); + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); + STORE( bld, *inst, 0, CHAN_Y, tmp0); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - FETCH( bld, *inst, 0, 0, CHAN_Z ); - STORE( bld, *inst, 0, 0, CHAN_Z ); + tmp0 = FETCH( bld, *inst, 0, CHAN_Z ); + STORE( bld, *inst, 0, CHAN_Z, tmp0); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { - FETCH( bld, *inst, 0, 1, CHAN_W ); - STORE( bld, *inst, 0, 0, CHAN_W ); + tmp0 = FETCH( bld, *inst, 1, CHAN_W ); + STORE( bld, *inst, 0, CHAN_W, tmp0); } break; +#endif case TGSI_OPCODE_MIN: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( bld, *inst, 0, 0, chan_index ); - FETCH( bld, *inst, 1, 1, chan_index ); - sse_minps( - bld, - make_xmm( 0 ), - make_xmm( 1 ) ); - STORE( bld, *inst, 0, 0, chan_index ); + src0 = FETCH( bld, *inst, 0, chan_index ); + src1 = FETCH( bld, *inst, 1, chan_index ); + dst0 = lp_build_min( &bld->base, src0, src1 ); + STORE( bld, *inst, 0, chan_index, dst0); } break; case TGSI_OPCODE_MAX: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( bld, *inst, 0, 0, chan_index ); - FETCH( bld, *inst, 1, 1, chan_index ); - sse_maxps( - bld, - make_xmm( 0 ), - make_xmm( 1 ) ); - STORE( bld, *inst, 0, 0, chan_index ); + src0 = FETCH( bld, *inst, 0, chan_index ); + src1 = FETCH( bld, *inst, 1, chan_index ); + dst0 = lp_build_max( &bld->base, src0, src1 ); + STORE( bld, *inst, 0, chan_index, dst0); } break; +#if 0 case TGSI_OPCODE_SLT: /* TGSI_OPCODE_SETLT */ emit_setcc( bld, inst, cc_LessThan ); @@ -809,33 +775,34 @@ emit_instruction( case TGSI_OPCODE_MAD: /* TGSI_OPCODE_MADD */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( bld, *inst, 0, 0, chan_index ); - FETCH( bld, *inst, 1, 1, chan_index ); - FETCH( bld, *inst, 2, 2, chan_index ); - emit_mul( bld, 0, 1 ); - emit_add( bld, 0, 2 ); - STORE( bld, *inst, 0, 0, chan_index ); + tmp0 = FETCH( bld, *inst, 0, chan_index ); + tmp1 = FETCH( bld, *inst, 1, chan_index ); + tmp2 = FETCH( bld, *inst, 2, chan_index ); + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); + tmp0 = lp_build_add( &bld->base, tmp0, tmp2); + STORE( bld, *inst, 0, chan_index, tmp0); } break; case TGSI_OPCODE_SUB: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( bld, *inst, 0, 0, chan_index ); - FETCH( bld, *inst, 1, 1, chan_index ); - emit_sub( bld, 0, 1 ); - STORE( bld, *inst, 0, 0, chan_index ); + tmp0 = FETCH( bld, *inst, 0, chan_index ); + tmp1 = FETCH( bld, *inst, 1, chan_index ); + tmp0 = lp_build_sub( &bld->base, tmp0, tmp1); + STORE( bld, *inst, 0, chan_index, tmp0); } break; +#endif case TGSI_OPCODE_LRP: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( bld, *inst, 0, 0, chan_index ); - FETCH( bld, *inst, 1, 1, chan_index ); - FETCH( bld, *inst, 2, 2, chan_index ); - emit_sub( bld, 1, 2 ); - emit_mul( bld, 0, 1 ); - emit_add( bld, 0, 2 ); - STORE( bld, *inst, 0, 0, chan_index ); + src0 = FETCH( bld, *inst, 0, chan_index ); + src1 = FETCH( bld, *inst, 1, chan_index ); + src2 = FETCH( bld, *inst, 2, chan_index ); + tmp0 = lp_build_sub( &bld->base, src1, src2 ); + tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); + dst0 = lp_build_add( &bld->base, tmp0, src2 ); + STORE( bld, *inst, 0, chan_index, dst0 ); } break; @@ -848,25 +815,26 @@ emit_instruction( break; case TGSI_OPCODE_DP2A: - FETCH( bld, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */ - FETCH( bld, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */ - emit_mul( bld, 0, 1 ); /* xmm0 = xmm0 * xmm1 */ - FETCH( bld, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */ - FETCH( bld, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */ - emit_mul( bld, 1, 2 ); /* xmm1 = xmm1 * xmm2 */ - emit_add( bld, 0, 1 ); /* xmm0 = xmm0 + xmm1 */ - FETCH( bld, *inst, 1, 2, CHAN_X ); /* xmm1 = src[2].x */ - emit_add( bld, 0, 1 ); /* xmm0 = xmm0 + xmm1 */ + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); /* xmm0 = src[0].x */ + tmp1 = FETCH( bld, *inst, 1, CHAN_X ); /* xmm1 = src[1].x */ + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ + tmp1 = FETCH( bld, *inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ + tmp2 = FETCH( bld, *inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ + tmp1 = FETCH( bld, *inst, 2, CHAN_X ); /* xmm1 = src[2].x */ + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( bld, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */ + STORE( bld, *inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */ } break; +#if 0 case TGSI_OPCODE_FRC: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( bld, *inst, 0, 0, chan_index ); + tmp0 = FETCH( bld, *inst, 0, chan_index ); emit_frc( bld, 0, 0 ); - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; @@ -876,97 +844,94 @@ emit_instruction( case TGSI_OPCODE_FLR: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( bld, *inst, 0, 0, chan_index ); + tmp0 = FETCH( bld, *inst, 0, chan_index ); emit_flr( bld, 0, 0 ); - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; case TGSI_OPCODE_ROUND: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( bld, *inst, 0, 0, chan_index ); + tmp0 = FETCH( bld, *inst, 0, chan_index ); emit_rnd( bld, 0, 0 ); - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; +#endif - case TGSI_OPCODE_EX2: - FETCH( bld, *inst, 0, 0, CHAN_X ); - emit_ex2( bld, 0, 0 ); + case TGSI_OPCODE_EX2: { + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); + tmp0 = lp_build_exp2( &bld->base, tmp0); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; + } case TGSI_OPCODE_LG2: - FETCH( bld, *inst, 0, 0, CHAN_X ); - emit_lg2( bld, 0, 0 ); + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); + tmp0 = lp_build_log2( &bld->base, tmp0); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; case TGSI_OPCODE_POW: - FETCH( bld, *inst, 0, 0, CHAN_X ); - FETCH( bld, *inst, 1, 1, CHAN_X ); - emit_pow( bld, 0, 0, 0, 1 ); + src0 = FETCH( bld, *inst, 0, CHAN_X ); + src1 = FETCH( bld, *inst, 1, CHAN_X ); + dst0 = lp_build_pow( &bld->base, src0, src1 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, dst0 ); } break; case TGSI_OPCODE_XPD: if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( bld, *inst, 1, 1, CHAN_Z ); - FETCH( bld, *inst, 3, 0, CHAN_Z ); + tmp1 = FETCH( bld, *inst, 1, CHAN_Z ); + tmp3 = FETCH( bld, *inst, 0, CHAN_Z ); } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( bld, *inst, 0, 0, CHAN_Y ); - FETCH( bld, *inst, 4, 1, CHAN_Y ); + tmp0 = FETCH( bld, *inst, 0, CHAN_Y ); + tmp4 = FETCH( bld, *inst, 1, CHAN_Y ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - emit_MOV( bld, 2, 0 ); - emit_mul( bld, 2, 1 ); - emit_MOV( bld, 5, 3 ); - emit_mul( bld, 5, 4 ); - emit_sub( bld, 2, 5 ); - STORE( bld, *inst, 2, 0, CHAN_X ); + tmp2 = tmp0; + tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); + tmp5 = tmp3; + tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); + tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); + STORE( bld, *inst, 0, CHAN_X, tmp2); } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( bld, *inst, 2, 1, CHAN_X ); - FETCH( bld, *inst, 5, 0, CHAN_X ); + tmp2 = FETCH( bld, *inst, 1, CHAN_X ); + tmp5 = FETCH( bld, *inst, 0, CHAN_X ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - emit_mul( bld, 3, 2 ); - emit_mul( bld, 1, 5 ); - emit_sub( bld, 3, 1 ); - STORE( bld, *inst, 3, 0, CHAN_Y ); + tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); + tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); + STORE( bld, *inst, 0, CHAN_Y, tmp3); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - emit_mul( bld, 5, 4 ); - emit_mul( bld, 0, 2 ); - emit_sub( bld, 5, 0 ); - STORE( bld, *inst, 5, 0, CHAN_Z ); + tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); + tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); + tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); + STORE( bld, *inst, 0, CHAN_Z, tmp5); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { - emit_tempf( - bld, - 0, - TEMP_ONE_I, - TEMP_ONE_C ); - STORE( bld, *inst, 0, 0, CHAN_W ); + tmp0 = bld->base.one; + STORE( bld, *inst, 0, CHAN_W, tmp0); } break; case TGSI_OPCODE_ABS: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( bld, *inst, 0, 0, chan_index ); - emit_abs( bld, 0) ; - - STORE( bld, *inst, 0, 0, chan_index ); + tmp0 = FETCH( bld, *inst, 0, chan_index ); + tmp0 = lp_build_abs( &bld->base, tmp0 ) ; + STORE( bld, *inst, 0, chan_index, tmp0); } break; @@ -975,29 +940,29 @@ emit_instruction( break; case TGSI_OPCODE_DPH: - FETCH( bld, *inst, 0, 0, CHAN_X ); - FETCH( bld, *inst, 1, 1, CHAN_X ); - emit_mul( bld, 0, 1 ); - FETCH( bld, *inst, 1, 0, CHAN_Y ); - FETCH( bld, *inst, 2, 1, CHAN_Y ); - emit_mul( bld, 1, 2 ); - emit_add( bld, 0, 1 ); - FETCH( bld, *inst, 1, 0, CHAN_Z ); - FETCH( bld, *inst, 2, 1, CHAN_Z ); - emit_mul( bld, 1, 2 ); - emit_add( bld, 0, 1 ); - FETCH( bld, *inst, 1, 1, CHAN_W ); - emit_add( bld, 0, 1 ); + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); + tmp1 = FETCH( bld, *inst, 1, CHAN_X ); + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); + tmp1 = FETCH( bld, *inst, 0, CHAN_Y ); + tmp2 = FETCH( bld, *inst, 1, CHAN_Y ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + tmp1 = FETCH( bld, *inst, 0, CHAN_Z ); + tmp2 = FETCH( bld, *inst, 1, CHAN_Z ); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); + tmp1 = FETCH( bld, *inst, 1, CHAN_W ); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; case TGSI_OPCODE_COS: - FETCH( bld, *inst, 0, 0, CHAN_X ); - emit_cos( bld, 0, 0 ); + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); + tmp0 = lp_build_cos( &bld->base, tmp0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; @@ -1009,6 +974,7 @@ emit_instruction( return 0; break; +#if 0 case TGSI_OPCODE_KILP: /* predicated kill */ emit_kilp( bld ); @@ -1019,6 +985,7 @@ emit_instruction( /* conditional kill */ emit_kil( bld, &inst->FullSrcRegisters[0] ); break; +#endif case TGSI_OPCODE_PK2H: return 0; @@ -1053,10 +1020,10 @@ emit_instruction( break; case TGSI_OPCODE_SIN: - FETCH( bld, *inst, 0, 0, CHAN_X ); - emit_sin( bld, 0, 0 ); + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); + tmp0 = lp_build_sin( &bld->base, tmp0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; @@ -1071,13 +1038,11 @@ emit_instruction( case TGSI_OPCODE_STR: return 0; break; -#endif case TGSI_OPCODE_TEX: emit_tex( bld, inst, FALSE, FALSE ); break; -#if 0 case TGSI_OPCODE_TXD: return 0; break; @@ -1106,14 +1071,16 @@ emit_instruction( return 0; break; +#if 0 case TGSI_OPCODE_ARR: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( bld, *inst, 0, 0, chan_index ); + tmp0 = FETCH( bld, *inst, 0, chan_index ); emit_rnd( bld, 0, 0 ); emit_f2it( bld, 0 ); - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; +#endif case TGSI_OPCODE_BRA: return 0; @@ -1123,6 +1090,7 @@ emit_instruction( return 0; break; +#if 0 case TGSI_OPCODE_RET: emit_ret( bld ); break; @@ -1135,9 +1103,9 @@ emit_instruction( case TGSI_OPCODE_SSG: /* TGSI_OPCODE_SGN */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( bld, *inst, 0, 0, chan_index ); + tmp0 = FETCH( bld, *inst, 0, chan_index ); emit_sgn( bld, 0, 0 ); - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; @@ -1147,14 +1115,14 @@ emit_instruction( case TGSI_OPCODE_SCS: IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - FETCH( bld, *inst, 0, 0, CHAN_X ); - emit_cos( bld, 0, 0 ); - STORE( bld, *inst, 0, 0, CHAN_X ); + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); + tmp0 = lp_build_cos( &bld->base, tmp0 ); + STORE( bld, *inst, 0, CHAN_X, tmp0); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - FETCH( bld, *inst, 0, 0, CHAN_X ); - emit_sin( bld, 0, 0 ); - STORE( bld, *inst, 0, 0, CHAN_Y ); + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); + tmp0 = lp_build_sin( &bld->base, tmp0 ); + STORE( bld, *inst, 0, CHAN_Y, tmp0); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { emit_tempf( @@ -1162,7 +1130,7 @@ emit_instruction( 0, TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C ); - STORE( bld, *inst, 0, 0, CHAN_Z ); + STORE( bld, *inst, 0, CHAN_Z, tmp0); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { emit_tempf( @@ -1170,7 +1138,7 @@ emit_instruction( 0, TEMP_ONE_I, TEMP_ONE_C ); - STORE( bld, *inst, 0, 0, CHAN_W ); + STORE( bld, *inst, 0, CHAN_W, tmp0); } break; #endif @@ -1179,7 +1147,6 @@ emit_instruction( emit_tex( bld, inst, TRUE, FALSE ); break; -#if 0 case TGSI_OPCODE_NRM: /* fall-through */ case TGSI_OPCODE_NRM4: @@ -1196,73 +1163,73 @@ emit_instruction( /* xmm4 = src.x */ /* xmm0 = src.x * src.x */ - FETCH(bld, *inst, 0, 0, CHAN_X); + tmp0 = FETCH(bld, *inst, 0, CHAN_X); if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { - emit_MOV(bld, 4, 0); + tmp4 = tmp0; } - emit_mul(bld, 0, 0); + tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); /* xmm5 = src.y */ /* xmm0 = xmm0 + src.y * src.y */ - FETCH(bld, *inst, 1, 0, CHAN_Y); + tmp1 = FETCH(bld, *inst, 0, CHAN_Y); if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { - emit_MOV(bld, 5, 1); + tmp5 = tmp1; } - emit_mul(bld, 1, 1); - emit_add(bld, 0, 1); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm6 = src.z */ /* xmm0 = xmm0 + src.z * src.z */ - FETCH(bld, *inst, 1, 0, CHAN_Z); + tmp1 = FETCH(bld, *inst, 0, CHAN_Z); if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { - emit_MOV(bld, 6, 1); + tmp6 = tmp1; } - emit_mul(bld, 1, 1); - emit_add(bld, 0, 1); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); if (dims == 4) { /* xmm7 = src.w */ /* xmm0 = xmm0 + src.w * src.w */ - FETCH(bld, *inst, 1, 0, CHAN_W); + tmp1 = FETCH(bld, *inst, 0, CHAN_W); if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { - emit_MOV(bld, 7, 1); + tmp7 = tmp1; } - emit_mul(bld, 1, 1); - emit_add(bld, 0, 1); + tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); } /* xmm1 = 1 / sqrt(xmm0) */ - emit_rsqrt(bld, 1, 0); + tmp1 = lp_build_rsqrt( &bld->base, tmp0); /* dst.x = xmm1 * src.x */ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { - emit_mul(bld, 4, 1); - STORE(bld, *inst, 4, 0, CHAN_X); + tmp4 = lp_build_mul( &bld->base, tmp4, tmp1); + STORE(bld, *inst, 0, CHAN_X, tmp4); } /* dst.y = xmm1 * src.y */ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { - emit_mul(bld, 5, 1); - STORE(bld, *inst, 5, 0, CHAN_Y); + tmp5 = lp_build_mul( &bld->base, tmp5, tmp1); + STORE(bld, *inst, 0, CHAN_Y, tmp5); } /* dst.z = xmm1 * src.z */ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { - emit_mul(bld, 6, 1); - STORE(bld, *inst, 6, 0, CHAN_Z); + tmp6 = lp_build_mul( &bld->base, tmp6, tmp1); + STORE(bld, *inst, 0, CHAN_Z, tmp6); } /* dst.w = xmm1 * src.w */ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && dims == 4) { - emit_mul(bld, 7, 1); - STORE(bld, *inst, 7, 0, CHAN_W); + tmp7 = lp_build_mul( &bld->base, tmp7, tmp1); + STORE(bld, *inst, 0, CHAN_W, tmp7); } } /* dst0.w = 1.0 */ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 3) { - emit_tempf(bld, 0, TEMP_ONE_I, TEMP_ONE_C); - STORE(bld, *inst, 0, 0, CHAN_W); + tmp0 = bld->base.one; + STORE(bld, *inst, 0, CHAN_W, tmp0); } } break; @@ -1272,18 +1239,17 @@ emit_instruction( break; case TGSI_OPCODE_DP2: - FETCH( bld, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */ - FETCH( bld, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */ - emit_mul( bld, 0, 1 ); /* xmm0 = xmm0 * xmm1 */ - FETCH( bld, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */ - FETCH( bld, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */ - emit_mul( bld, 1, 2 ); /* xmm1 = xmm1 * xmm2 */ - emit_add( bld, 0, 1 ); /* xmm0 = xmm0 + xmm1 */ + tmp0 = FETCH( bld, *inst, 0, CHAN_X ); /* xmm0 = src[0].x */ + tmp1 = FETCH( bld, *inst, 1, CHAN_X ); /* xmm1 = src[1].x */ + tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ + tmp1 = FETCH( bld, *inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ + tmp2 = FETCH( bld, *inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ + tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ + tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( bld, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */ + STORE( bld, *inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */ } break; -#endif case TGSI_OPCODE_TXL: emit_tex( bld, inst, TRUE, FALSE ); @@ -1293,7 +1259,6 @@ emit_instruction( emit_tex( bld, inst, FALSE, TRUE ); break; -#if 0 case TGSI_OPCODE_BRK: return 0; break; @@ -1346,14 +1311,16 @@ emit_instruction( return 0; break; +#if 0 case TGSI_OPCODE_TRUNC: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( bld, *inst, 0, 0, chan_index ); + tmp0 = FETCH( bld, *inst, 0, chan_index ); emit_f2it( bld, 0 ); emit_i2f( bld, 0 ); - STORE( bld, *inst, 0, 0, chan_index ); + STORE( bld, *inst, 0, chan_index, tmp0); } break; +#endif case TGSI_OPCODE_SHL: return 0; @@ -1402,7 +1369,6 @@ emit_instruction( case TGSI_OPCODE_ENDPRIM: return 0; break; -#endif default: return 0; -- 2.30.2