From: Michal Krol Date: Tue, 3 Jun 2008 08:59:46 +0000 (+0200) Subject: draw: Fix fetch_src(). Resurrect SSE version of DP3. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=183d490ab139483c88d0b0f541714919de86235c;p=mesa.git draw: Fix fetch_src(). Resurrect SSE version of DP3. --- diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 5bfcd96ac3d..891f8c211aa 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -532,10 +532,11 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, if (swz != SSE_SWIZZLE_NOOP || negs != 0 || abs != 0) { struct x86_reg dst = aos_get_xmm_reg(cp); - if (swz != SSE_SWIZZLE_NOOP) { + if (swz != SSE_SWIZZLE_NOOP) emit_pshufd(cp, dst, arg0, swz); - arg0 = dst; - } + else + sse_movaps(cp->func, dst, arg0); + arg0 = dst; if (negs && negs != 0xf) { struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ); @@ -550,15 +551,13 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, (negs & 2) ? 1 : 0, (negs & 4) ? 1 : 0, (negs & 8) ? 1 : 0)); - sse_mulps(cp->func, dst, arg0); + sse_mulps(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - arg0 = dst; } else if (negs) { struct x86_reg imm_negs = aos_get_internal_xmm(cp, IMM_NEGS); sse_mulps(cp->func, dst, imm_negs); - arg0 = dst; } @@ -571,10 +570,9 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, sse_movaps(cp->func, tmp, arg0); sse_mulps(cp->func, tmp, neg); - sse_maxps(cp->func, dst, arg0); + sse_maxps(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - arg0 = dst; } } @@ -962,33 +960,6 @@ static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } -#if 1 - -/* The x87 version. - */ -static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) -{ - struct x86_reg st1 = x86_make_reg( file_x87, 1 ); - - x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); - x87_fld_src( cp, &op->FullSrcRegisters[1], 0 ); - x87_fmulp( cp->func, st1 ); - x87_fld_src( cp, &op->FullSrcRegisters[0], 1 ); - x87_fld_src( cp, &op->FullSrcRegisters[1], 1 ); - x87_fmulp( cp->func, st1 ); - x87_faddp( cp->func, st1 ); - x87_fld_src( cp, &op->FullSrcRegisters[0], 2 ); - x87_fld_src( cp, &op->FullSrcRegisters[1], 2 ); - x87_fmulp( cp->func, st1 ); - x87_faddp( cp->func, st1 ); - - x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); - - return TRUE; -} - -#else - /* The dotproduct instructions don't really do that well in sse: * XXX: produces wrong results -- disabled. */ @@ -1012,8 +983,6 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } -#endif - static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);