From b9de2089b1ffafd7d072d78f716c9e39bab06627 Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Mon, 16 Feb 2009 20:20:55 +0000 Subject: [PATCH] gallium: fix glean's vertProg1 RSQ test 2 (reciprocal square toot of negative value) --- src/gallium/auxiliary/draw/draw_vs_aos.c | 28 +++++++++++++++--------- src/gallium/auxiliary/tgsi/tgsi_exec.c | 1 + src/gallium/auxiliary/tgsi/tgsi_sse2.c | 1 + 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 0c693a4a65c..f4c6705bae8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1559,7 +1559,6 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst */ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - if (0) { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg r = aos_get_xmm_reg(cp); @@ -1568,21 +1567,30 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } else { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg r = aos_get_xmm_reg(cp); + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg r = aos_get_xmm_reg(cp); struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ ); struct x86_reg one_point_five = x86_make_disp( neg_half, 4 ); struct x86_reg src = get_xmm_writable( cp, arg0 ); - - sse_rsqrtss( cp->func, r, src ); /* rsqrtss(a) */ - sse_mulss( cp->func, src, neg_half ); /* -.5 * a */ - sse_mulss( cp->func, src, r ); /* -.5 * a * r */ - sse_mulss( cp->func, src, r ); /* -.5 * a * r * r */ - sse_addss( cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */ - sse_mulss( cp->func, r, src ); /* r * (1.5 - .5 * a * r * r) */ + struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); + struct x86_reg tmp = aos_get_xmm_reg(cp); + + sse_movaps(cp->func, tmp, src); + sse_mulps(cp->func, tmp, neg); + sse_maxps(cp->func, tmp, src); + + sse_rsqrtss( cp->func, r, tmp ); /* rsqrtss(a) */ + sse_mulss( cp->func, tmp, neg_half ); /* -.5 * a */ + sse_mulss( cp->func, tmp, r ); /* -.5 * a * r */ + sse_mulss( cp->func, tmp, r ); /* -.5 * a * r * r */ + sse_addss( cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */ + sse_mulss( cp->func, r, tmp ); /* r * (1.5 - .5 * a * r * r) */ store_scalar_dest(cp, &op->FullDstRegisters[0], r); + + aos_release_xmm_reg(cp, tmp.idx); + return TRUE; } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index ab641efb603..5c5d8d25500 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1889,6 +1889,7 @@ exec_instruction( case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ FETCH( &r[0], 0, CHAN_X ); + micro_abs( &r[0], &r[0] ); micro_sqrt( &r[0], &r[0] ); micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 481ba89c5e7..a183603aeab 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1575,6 +1575,7 @@ emit_instruction( case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ FETCH( func, *inst, 0, 0, CHAN_X ); + emit_abs( func, 0 ); emit_rsqrt( func, 1, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 1, 0, chan_index ); -- 2.30.2